1 /* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) 2 3 Based on original fortran 77 code from FFTPACKv4 from NETLIB, 4 authored by Dr Paul Swarztrauber of NCAR, in 1985. 5 6 As confirmed by the NCAR fftpack software curators, the following 7 FFTPACKv5 license applies to FFTPACKv4 sources. My changes are 8 released under the same terms. 9 10 FFTPACK license: 11 12 http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html 13 14 Copyright (c) 2004 the University Corporation for Atmospheric 15 Research ("UCAR"). All rights reserved. Developed by NCAR's 16 Computational and Information Systems Laboratory, UCAR, 17 www.cisl.ucar.edu. 18 19 Redistribution and use of the Software in source and binary forms, 20 with or without modification, is permitted provided that the 21 following conditions are met: 22 23 - Neither the names of NCAR's Computational and Information Systems 24 Laboratory, the University Corporation for Atmospheric Research, 25 nor the names of its sponsors or contributors may be used to 26 endorse or promote products derived from this Software without 27 specific prior written permission. 28 29 - Redistributions of source code must retain the above copyright 30 notices, this list of conditions, and the disclaimer below. 31 32 - Redistributions in binary form must reproduce the above copyright 33 notice, this list of conditions, and the disclaimer below in the 34 documentation and/or other materials provided with the 35 distribution. 36 37 THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 38 EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF 39 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 40 NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT 41 HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, 42 EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN 43 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 44 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 45 SOFTWARE. 46 */ 47 /* 48 NOTE: This file is adapted from Julien Pommier's original PFFFT, 49 which works on 32 bit floating point precision using SSE instructions, 50 to work with 64 bit floating point precision using AVX instructions. 51 Author: Dario Mambro @ https://github.com/unevens/pffft 52 */ 53 /* 54 PFFFT : a Pretty Fast FFT. 55 56 This is basically an adaptation of the single precision fftpack 57 (v4) as found on netlib taking advantage of SIMD instruction found 58 on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON). 59 60 For architectures where no SIMD instruction is available, the code 61 falls back to a scalar version. 62 63 Restrictions: 64 65 - 1D transforms only, with 64-bit double precision. 66 67 - supports only transforms for inputs of length N of the form 68 N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, 69 144, 160, etc are all acceptable lengths). Performance is best for 70 128<=N<=8192. 71 72 - all (double*) pointers in the functions below are expected to 73 have an "simd-compatible" alignment, that is 32 bytes on x86 and 74 powerpc CPUs. 75 76 You can allocate such buffers with the functions 77 pffft_aligned_malloc / pffft_aligned_free (or with stuff like 78 posix_memalign..) 79 80 */ 81 82 #ifndef PFFFT_DOUBLE_H 83 #define PFFFT_DOUBLE_H 84 85 #include <stddef.h> /* for size_t */ 86 87 #ifdef __cplusplus 88 extern "C" { 89 #endif 90 91 /* opaque struct holding internal stuff (precomputed twiddle factors) 92 this struct can be shared by many threads as it contains only 93 read-only data. 94 */ 95 typedef struct PFFFTD_Setup PFFFTD_Setup; 96 97 #ifndef PFFFT_COMMON_ENUMS 98 #define PFFFT_COMMON_ENUMS 99 100 /* direction of the transform */ 101 typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; 102 103 /* type of transform */ 104 typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; 105 106 #endif 107 108 /* 109 prepare for performing transforms of size N -- the returned 110 PFFFTD_Setup structure is read-only so it can safely be shared by 111 multiple concurrent threads. 112 */ 113 PFFFTD_Setup *pffftd_new_setup(int N, pffft_transform_t transform); 114 void pffftd_destroy_setup(PFFFTD_Setup *); 115 /* 116 Perform a Fourier transform , The z-domain data is stored in the 117 most efficient order for transforming it back, or using it for 118 convolution. If you need to have its content sorted in the 119 "usual" way, that is as an array of interleaved complex numbers, 120 either use pffft_transform_ordered , or call pffft_zreorder after 121 the forward fft, and before the backward fft. 122 123 Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. 124 Typically you will want to scale the backward transform by 1/N. 125 126 The 'work' pointer should point to an area of N (2*N for complex 127 fft) doubles, properly aligned. If 'work' is NULL, then stack will 128 be used instead (this is probably the best strategy for small 129 FFTs, say for N < 16384). Threads usually have a small stack, that 130 there's no sufficient amount of memory, usually leading to a crash! 131 Use the heap with pffft_aligned_malloc() in this case. 132 133 input and output may alias. 134 */ 135 void pffftd_transform(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffft_direction_t direction); 136 137 /* 138 Similar to pffft_transform, but makes sure that the output is 139 ordered as expected (interleaved complex numbers). This is 140 similar to calling pffft_transform and then pffft_zreorder. 141 142 input and output may alias. 143 */ 144 void pffftd_transform_ordered(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffft_direction_t direction); 145 146 /* 147 call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., 148 PFFFT_FORWARD) if you want to have the frequency components in 149 the correct "canonical" order, as interleaved complex numbers. 150 151 (for real transforms, both 0-frequency and half frequency 152 components, which are real, are assembled in the first entry as 153 F(0)+i*F(n/2+1). Note that the original fftpack did place 154 F(n/2+1) at the end of the arrays). 155 156 input and output should not alias. 157 */ 158 void pffftd_zreorder(PFFFTD_Setup *setup, const double *input, double *output, pffft_direction_t direction); 159 160 /* 161 Perform a multiplication of the frequency components of dft_a and 162 dft_b and accumulate them into dft_ab. The arrays should have 163 been obtained with pffft_transform(.., PFFFT_FORWARD) and should 164 *not* have been reordered with pffft_zreorder (otherwise just 165 perform the operation yourself as the dft coefs are stored as 166 interleaved complex numbers). 167 168 the operation performed is: dft_ab += (dft_a * fdt_b)*scaling 169 170 The dft_a, dft_b and dft_ab pointers may alias. 171 */ 172 void pffftd_zconvolve_accumulate(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling); 173 174 /* 175 Perform a multiplication of the frequency components of dft_a and 176 dft_b and put result in dft_ab. The arrays should have 177 been obtained with pffft_transform(.., PFFFT_FORWARD) and should 178 *not* have been reordered with pffft_zreorder (otherwise just 179 perform the operation yourself as the dft coefs are stored as 180 interleaved complex numbers). 181 182 the operation performed is: dft_ab = (dft_a * fdt_b)*scaling 183 184 The dft_a, dft_b and dft_ab pointers may alias. 185 */ 186 void pffftd_zconvolve_no_accu(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double*dft_ab, double scaling); 187 188 /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */ 189 int pffftd_simd_size(); 190 191 /* return string identifier of used architecture (AVX/..) */ 192 const char * pffftd_simd_arch(); 193 194 195 /* following functions are identical to the pffft_ functions */ 196 197 /* simple helper to get minimum possible fft size */ 198 int pffftd_min_fft_size(pffft_transform_t transform); 199 200 /* simple helper to determine next power of 2 201 - without inexact/rounding floating point operations 202 */ 203 int pffftd_next_power_of_two(int N); 204 205 /* simple helper to determine if power of 2 - returns bool */ 206 int pffftd_is_power_of_two(int N); 207 208 /* 209 the double buffers must have the correct alignment (32-byte boundary 210 on intel and powerpc). This function may be used to obtain such 211 correctly aligned buffers. 212 */ 213 void *pffftd_aligned_malloc(size_t nb_bytes); 214 void pffftd_aligned_free(void *); 215 216 #ifdef __cplusplus 217 } 218 #endif 219 220 #endif /* PFFFT_DOUBLE_H */ 221 222