• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2013  Julien Pommier ( pommier@modartt.com )
2 
3    Based on original fortran 77 code from FFTPACKv4 from NETLIB,
4    authored by Dr Paul Swarztrauber of NCAR, in 1985.
5 
6    As confirmed by the NCAR fftpack software curators, the following
7    FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
8    released under the same terms.
9 
10    FFTPACK license:
11 
12    http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
13 
14    Copyright (c) 2004 the University Corporation for Atmospheric
15    Research ("UCAR"). All rights reserved. Developed by NCAR's
16    Computational and Information Systems Laboratory, UCAR,
17    www.cisl.ucar.edu.
18 
19    Redistribution and use of the Software in source and binary forms,
20    with or without modification, is permitted provided that the
21    following conditions are met:
22 
23    - Neither the names of NCAR's Computational and Information Systems
24    Laboratory, the University Corporation for Atmospheric Research,
25    nor the names of its sponsors or contributors may be used to
26    endorse or promote products derived from this Software without
27    specific prior written permission.
28 
29    - Redistributions of source code must retain the above copyright
30    notices, this list of conditions, and the disclaimer below.
31 
32    - Redistributions in binary form must reproduce the above copyright
33    notice, this list of conditions, and the disclaimer below in the
34    documentation and/or other materials provided with the
35    distribution.
36 
37    THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
38    EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
39    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
40    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
41    HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
42    EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
43    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
44    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
45    SOFTWARE.
46 */
47 /*
48    NOTE: This file is adapted from Julien Pommier's original PFFFT,
49    which works on 32 bit floating point precision using SSE instructions,
50    to work with 64 bit floating point precision using AVX instructions.
51    Author: Dario Mambro @ https://github.com/unevens/pffft
52 */
53 /*
54    PFFFT : a Pretty Fast FFT.
55 
56    This is basically an adaptation of the single precision fftpack
57    (v4) as found on netlib taking advantage of SIMD instruction found
58    on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
59 
60    For architectures where no SIMD instruction is available, the code
61    falls back to a scalar version.
62 
63    Restrictions:
64 
65    - 1D transforms only, with 64-bit double precision.
66 
67    - supports only transforms for inputs of length N of the form
68    N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
69    144, 160, etc are all acceptable lengths). Performance is best for
70    128<=N<=8192.
71 
72    - all (double*) pointers in the functions below are expected to
73    have an "simd-compatible" alignment, that is 32 bytes on x86 and
74    powerpc CPUs.
75 
76    You can allocate such buffers with the functions
77    pffft_aligned_malloc / pffft_aligned_free (or with stuff like
78    posix_memalign..)
79 
80 */
81 
82 #ifndef PFFFT_DOUBLE_H
83 #define PFFFT_DOUBLE_H
84 
85 #include <stddef.h> /* for size_t */
86 
87 #ifdef __cplusplus
88 extern "C" {
89 #endif
90 
91   /* opaque struct holding internal stuff (precomputed twiddle factors)
92      this struct can be shared by many threads as it contains only
93      read-only data.
94   */
95   typedef struct PFFFTD_Setup PFFFTD_Setup;
96 
97 #ifndef PFFFT_COMMON_ENUMS
98 #define PFFFT_COMMON_ENUMS
99 
100   /* direction of the transform */
101   typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
102 
103   /* type of transform */
104   typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
105 
106 #endif
107 
108   /*
109     prepare for performing transforms of size N -- the returned
110     PFFFTD_Setup structure is read-only so it can safely be shared by
111     multiple concurrent threads.
112   */
113   PFFFTD_Setup *pffftd_new_setup(int N, pffft_transform_t transform);
114   void pffftd_destroy_setup(PFFFTD_Setup *);
115   /*
116      Perform a Fourier transform , The z-domain data is stored in the
117      most efficient order for transforming it back, or using it for
118      convolution. If you need to have its content sorted in the
119      "usual" way, that is as an array of interleaved complex numbers,
120      either use pffft_transform_ordered , or call pffft_zreorder after
121      the forward fft, and before the backward fft.
122 
123      Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
124      Typically you will want to scale the backward transform by 1/N.
125 
126      The 'work' pointer should point to an area of N (2*N for complex
127      fft) doubles, properly aligned. If 'work' is NULL, then stack will
128      be used instead (this is probably the best strategy for small
129      FFTs, say for N < 16384). Threads usually have a small stack, that
130      there's no sufficient amount of memory, usually leading to a crash!
131      Use the heap with pffft_aligned_malloc() in this case.
132 
133      input and output may alias.
134   */
135   void pffftd_transform(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffft_direction_t direction);
136 
137   /*
138      Similar to pffft_transform, but makes sure that the output is
139      ordered as expected (interleaved complex numbers).  This is
140      similar to calling pffft_transform and then pffft_zreorder.
141 
142      input and output may alias.
143   */
144   void pffftd_transform_ordered(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffft_direction_t direction);
145 
146   /*
147      call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
148      PFFFT_FORWARD) if you want to have the frequency components in
149      the correct "canonical" order, as interleaved complex numbers.
150 
151      (for real transforms, both 0-frequency and half frequency
152      components, which are real, are assembled in the first entry as
153      F(0)+i*F(n/2+1). Note that the original fftpack did place
154      F(n/2+1) at the end of the arrays).
155 
156      input and output should not alias.
157   */
158   void pffftd_zreorder(PFFFTD_Setup *setup, const double *input, double *output, pffft_direction_t direction);
159 
160   /*
161      Perform a multiplication of the frequency components of dft_a and
162      dft_b and accumulate them into dft_ab. The arrays should have
163      been obtained with pffft_transform(.., PFFFT_FORWARD) and should
164      *not* have been reordered with pffft_zreorder (otherwise just
165      perform the operation yourself as the dft coefs are stored as
166      interleaved complex numbers).
167 
168      the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
169 
170      The dft_a, dft_b and dft_ab pointers may alias.
171   */
172   void pffftd_zconvolve_accumulate(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling);
173 
174   /*
175      Perform a multiplication of the frequency components of dft_a and
176      dft_b and put result in dft_ab. The arrays should have
177      been obtained with pffft_transform(.., PFFFT_FORWARD) and should
178      *not* have been reordered with pffft_zreorder (otherwise just
179      perform the operation yourself as the dft coefs are stored as
180      interleaved complex numbers).
181 
182      the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
183 
184      The dft_a, dft_b and dft_ab pointers may alias.
185   */
186   void pffftd_zconvolve_no_accu(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double*dft_ab, double scaling);
187 
188   /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */
189   int pffftd_simd_size();
190 
191   /* return string identifier of used architecture (AVX/..) */
192   const char * pffftd_simd_arch();
193 
194 
195   /* following functions are identical to the pffft_ functions */
196 
197   /* simple helper to get minimum possible fft size */
198   int pffftd_min_fft_size(pffft_transform_t transform);
199 
200   /* simple helper to determine next power of 2
201      - without inexact/rounding floating point operations
202   */
203   int pffftd_next_power_of_two(int N);
204 
205   /* simple helper to determine if power of 2 - returns bool */
206   int pffftd_is_power_of_two(int N);
207 
208   /*
209     the double buffers must have the correct alignment (32-byte boundary
210     on intel and powerpc). This function may be used to obtain such
211     correctly aligned buffers.
212   */
213   void *pffftd_aligned_malloc(size_t nb_bytes);
214   void pffftd_aligned_free(void *);
215 
216 #ifdef __cplusplus
217 }
218 #endif
219 
220 #endif /* PFFFT_DOUBLE_H */
221 
222