1 /* Copyright (c) 2020  Dario Mambro ( dario.mambro@gmail.com )
2    Copyright (c) 2020  Hayati Ayguen ( h_ayguen@web.de )
3 
4    Redistribution and use of the Software in source and binary forms,
5    with or without modification, is permitted provided that the
6    following conditions are met:
7 
8    - Neither the names of PFFFT, nor the names of its
9    sponsors or contributors may be used to endorse or promote products
10    derived from this Software without specific prior written permission.
11 
12    - Redistributions of source code must retain the above copyright
13    notices, this list of conditions, and the disclaimer below.
14 
15    - Redistributions in binary form must reproduce the above copyright
16    notice, this list of conditions, and the disclaimer below in the
17    documentation and/or other materials provided with the
18    distribution.
19 
20    THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21    EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
22    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
24    HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
25    EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
26    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
27    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
28    SOFTWARE.
29 */
30 
31 #pragma once
32 
33 #include <complex>
34 #include <vector>
35 #include <limits>
36 
37 namespace {
38 #if defined(PFFFT_ENABLE_FLOAT) || ( !defined(PFFFT_ENABLE_FLOAT) && !defined(PFFFT_ENABLE_DOUBLE) )
39 #include "pffft.h"
40 #endif
41 #if defined(PFFFT_ENABLE_DOUBLE)
42 #include "pffft_double.h"
43 #endif
44 }
45 
46 namespace pffft {
47 
48 // enum { PFFFT_REAL, PFFFT_COMPLEX }
49 typedef pffft_transform_t TransformType;
50 
51 // define 'Scalar' and 'Complex' (in namespace pffft) with template Types<>
52 // and other type specific helper functions
53 template<typename T> struct Types {};
54 #if defined(PFFFT_ENABLE_FLOAT) || ( !defined(PFFFT_ENABLE_FLOAT) && !defined(PFFFT_ENABLE_DOUBLE) )
55 template<> struct Types<float>  {
56   typedef float  Scalar;
57   typedef std::complex<Scalar> Complex;
simd_sizepffft::Types58   static int simd_size() { return pffft_simd_size(); }
simd_archpffft::Types59   static const char * simd_arch() { return pffft_simd_arch(); }
60 };
61 template<> struct Types< std::complex<float> >  {
62   typedef float  Scalar;
63   typedef std::complex<float>  Complex;
simd_sizepffft::Types64   static int simd_size() { return pffft_simd_size(); }
simd_archpffft::Types65   static const char * simd_arch() { return pffft_simd_arch(); }
66 };
67 #endif
68 #if defined(PFFFT_ENABLE_DOUBLE)
69 template<> struct Types<double> {
70   typedef double Scalar;
71   typedef std::complex<Scalar> Complex;
simd_sizepffft::Types72   static int simd_size() { return pffftd_simd_size(); }
simd_archpffft::Types73   static const char * simd_arch() { return pffftd_simd_arch(); }
74 };
75 template<> struct Types< std::complex<double> > {
76   typedef double Scalar;
77   typedef std::complex<double> Complex;
simd_sizepffft::Types78   static int simd_size() { return pffftd_simd_size(); }
simd_archpffft::Types79   static const char * simd_arch() { return pffftd_simd_arch(); }
80 };
81 #endif
82 
83 // Allocator
84 template<typename T> class PFAlloc;
85 
86 namespace {
87   template<typename T> class Setup;
88 }
89 
90 #if (__cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900))
91 
92 // define AlignedVector<T> utilizing 'using' in C++11
93 template<typename T>
94 using AlignedVector = typename std::vector< T, PFAlloc<T> >;
95 
96 #else
97 
98 // define AlignedVector<T> having to derive std::vector<>
99 template <typename T>
100 struct AlignedVector : public std::vector< T, PFAlloc<T> > {
AlignedVectorpffft::AlignedVector101   AlignedVector() : std::vector< T, PFAlloc<T> >() { }
AlignedVectorpffft::AlignedVector102   AlignedVector(int N) : std::vector< T, PFAlloc<T> >(N) { }
103 };
104 
105 #endif
106 
107 
108 // T can be float, double, std::complex<float> or std::complex<double>
109 //   define PFFFT_ENABLE_DOUBLE before include this file for double and std::complex<double>
110 template<typename T>
111 class Fft
112 {
113 public:
114 
115   // define types value_type, Scalar and Complex
116   typedef T value_type;
117   typedef typename Types<T>::Scalar  Scalar;
118   typedef typename Types<T>::Complex Complex;
119 
120   // static retrospection functions
isComplexTransform()121   static bool isComplexTransform()  { return sizeof(T) == sizeof(Complex); }
isFloatScalar()122   static bool isFloatScalar()  { return sizeof(Scalar) == sizeof(float); }
isDoubleScalar()123   static bool isDoubleScalar() { return sizeof(Scalar) == sizeof(double); }
124 
125   // simple helper to get minimum possible fft length
minFFtsize()126   static int minFFtsize() { return pffft_min_fft_size( isComplexTransform() ? PFFFT_COMPLEX : PFFFT_REAL ); }
127 
128   // simple helper to determine next power of 2 - without inexact/rounding floating point operations
nextPowerOfTwo(int N)129   static int nextPowerOfTwo(int N) { return pffft_next_power_of_two(N); }
isPowerOfTwo(int N)130   static bool isPowerOfTwo(int N) { return pffft_is_power_of_two(N) ? true : false; }
131 
simd_size()132   static int simd_size() { return Types<T>::simd_size(); }
simd_arch()133   static const char * simd_arch() { return Types<T>::simd_arch(); }
134 
135   //////////////////
136 
137   /*
138    * Contructor, with transformation length, preparing transforms.
139    *
140    * For length <= stackThresholdLen, the stack is used for the internal
141    * work memory. for bigger length', the heap is used.
142    *
143    * Using the stack is probably the best strategy for small
144    * FFTs, say for N <= 4096). Threads usually have a small stack, that
145    * there's no sufficient amount of memory, usually leading to a crash!
146    */
147   Fft( int length, int stackThresholdLen = 4096 );
148 
149   ~Fft();
150 
151   /*
152    * prepare for transformation length 'newLength'.
153    * length is identical to forward()'s input vector's size,
154    * and also equals inverse()'s output vector size.
155    * this function is no simple setter. it pre-calculates twiddle factors.
156    */
157   void prepareLength(int newLength);
158 
159   /*
160    * retrieve the transformation length.
161    */
getLength() const162   int getLength() const { return length; }
163 
164   /*
165    * retrieve size of complex spectrum vector,
166    * the output of forward()
167    */
getSpectrumSize() const168   int getSpectrumSize() const { return isComplexTransform() ? length : ( length / 2 ); }
169 
170   /*
171    * retrieve size of spectrum vector - in internal layout;
172    * the output of forwardToInternalLayout()
173    */
getInternalLayoutSize() const174   int getInternalLayoutSize() const { return isComplexTransform() ? ( 2 * length ) : length; }
175 
176 
177   ////////////////////////////////////////////
178   ////
179   //// API 1, with std::vector<> based containers,
180   ////   which free the allocated memory themselves (RAII).
181   ////
182   //// uses an Allocator for the alignment of SIMD data.
183   ////
184   ////////////////////////////////////////////
185 
186   // create suitably preallocated aligned vector for one FFT
187   AlignedVector<T>       valueVector() const;
188   AlignedVector<Complex> spectrumVector() const;
189   AlignedVector<Scalar>  internalLayoutVector() const;
190 
191   ////////////////////////////////////////////
192   // although using Vectors for output ..
193   // they need to have resize() applied before!
194 
195   // core API, having the spectrum in canonical order
196 
197   /*
198    * Perform the forward Fourier transform.
199    *
200    * Transforms are not scaled: inverse(forward(x)) = N*x.
201    * Typically you will want to scale the backward transform by 1/N.
202    *
203    * The output 'spectrum' is canonically ordered - as expected.
204    *
205    * a) for complex input isComplexTransform() == true,
206    *    and transformation length N  the output array is complex:
207    *   index k in 0 .. N/2 -1  corresponds to frequency k * Samplerate / N
208    *   index k in N/2 .. N -1  corresponds to frequency (k -N) * Samplerate / N,
209    *     resulting in negative frequencies
210    *
211    * b) for real input isComplexTransform() == false,
212    *    and transformation length N  the output array is 'mostly' complex:
213    *   index k in 1 .. N/2 -1  corresponds to frequency k * Samplerate / N
214    *   index k == 0 is a special case:
215    *     the real() part contains the result for the DC frequency 0,
216    *     the imag() part contains the result for the Nyquist frequency Samplerate/2
217    *   both 0-frequency and half frequency components, which are real,
218    *   are assembled in the first entry as  F(0)+i*F(N/2).
219    *
220    * input and output may alias - if you do nasty type conversion.
221    * return is just the given output parameter 'spectrum'.
222    */
223   AlignedVector<Complex> & forward(const AlignedVector<T> & input, AlignedVector<Complex> & spectrum);
224 
225   /*
226    * Perform the inverse Fourier transform, see forward().
227    * return is just the given output parameter 'output'.
228    */
229   AlignedVector<T> & inverse(const AlignedVector<Complex> & spectrum, AlignedVector<T> & output);
230 
231 
232   // provide additional functions with spectrum in some internal Layout.
233   // these are faster, cause the implementation omits the reordering.
234   // these are useful in special applications, like fast convolution,
235   // where inverse() is following anyway ..
236 
237   /*
238    * Perform the forward Fourier transform - similar to forward(), BUT:
239    *
240    * The z-domain data is stored in the most efficient order
241    * for transforming it back, or using it for convolution.
242    * If you need to have its content sorted in the "usual" canonical order,
243    * either use forward(), or call reorderSpectrum() after calling
244    * forwardToInternalLayout(), and before the backward fft
245    *
246    * return is just the given output parameter 'spectrum_internal_layout'.
247    */
248   AlignedVector<Scalar> & forwardToInternalLayout(
249           const AlignedVector<T> & input,
250           AlignedVector<Scalar> & spectrum_internal_layout );
251 
252   /*
253    * Perform the inverse Fourier transform, see forwardToInternalLayout()
254    *
255    * return is just the given output parameter 'output'.
256    */
257   AlignedVector<T> & inverseFromInternalLayout(
258           const AlignedVector<Scalar> & spectrum_internal_layout,
259           AlignedVector<T> & output );
260 
261   /*
262    * Reorder the spectrum from internal layout to have the
263    * frequency components in the correct "canonical" order.
264    * see forward() for a description of the canonical order.
265    *
266    * input and output should not alias.
267    */
268   void reorderSpectrum(
269           const AlignedVector<Scalar> & input,
270           AlignedVector<Complex> & output );
271 
272   /*
273    * Perform a multiplication of the frequency components of
274    * spectrum_internal_a and spectrum_internal_b
275    * into spectrum_internal_ab.
276    * The arrays should have been obtained with forwardToInternalLayout)
277    * and should *not* have been reordered with reorderSpectrum().
278    *
279    * the operation performed is:
280    *  spectrum_internal_ab = (spectrum_internal_a * spectrum_internal_b)*scaling
281    *
282    * The spectrum_internal_[a][b], pointers may alias.
283    * return is just the given output parameter 'spectrum_internal_ab'.
284    */
285   AlignedVector<Scalar> & convolve(
286           const AlignedVector<Scalar> & spectrum_internal_a,
287           const AlignedVector<Scalar> & spectrum_internal_b,
288           AlignedVector<Scalar> & spectrum_internal_ab,
289           const Scalar scaling );
290 
291   /*
292    * Perform a multiplication and accumulation of the frequency components
293    * - similar to convolve().
294    *
295    * the operation performed is:
296    *  spectrum_internal_ab += (spectrum_internal_a * spectrum_internal_b)*scaling
297    *
298    * The spectrum_internal_[a][b], pointers may alias.
299    * return is just the given output parameter 'spectrum_internal_ab'.
300    */
301   AlignedVector<Scalar> & convolveAccumulate(
302           const AlignedVector<Scalar> & spectrum_internal_a,
303           const AlignedVector<Scalar> & spectrum_internal_b,
304           AlignedVector<Scalar> & spectrum_internal_ab,
305           const Scalar scaling );
306 
307 
308   ////////////////////////////////////////////
309   ////
310   //// API 2, dealing with raw pointers,
311   //// which need to be deallocated using alignedFree()
312   ////
313   //// the special allocation is required cause SIMD
314   //// implementations require aligned memory
315   ////
316   //// Method descriptions are equal to the methods above,
317   //// having  AlignedVector<T> parameters - instead of raw pointers.
318   //// That is why following methods have no documentation.
319   ////
320   ////////////////////////////////////////////
321 
322   static void alignedFree(void* ptr);
323 
324   static T * alignedAllocType(int length);
325   static Scalar* alignedAllocScalar(int length);
326   static Complex* alignedAllocComplex(int length);
327 
328   // core API, having the spectrum in canonical order
329 
330   Complex* forward(const T* input, Complex* spectrum);
331 
332   T* inverse(const Complex* spectrum, T* output);
333 
334 
335   // provide additional functions with spectrum in some internal Layout.
336   // these are faster, cause the implementation omits the reordering.
337   // these are useful in special applications, like fast convolution,
338   // where inverse() is following anyway ..
339 
340   Scalar* forwardToInternalLayout(const T* input,
341                                 Scalar* spectrum_internal_layout);
342 
343   T* inverseFromInternalLayout(const Scalar* spectrum_internal_layout, T* output);
344 
345   void reorderSpectrum(const Scalar* input, Complex* output );
346 
347   Scalar* convolve(const Scalar* spectrum_internal_a,
348                    const Scalar* spectrum_internal_b,
349                    Scalar* spectrum_internal_ab,
350                    const Scalar scaling);
351 
352   Scalar* convolveAccumulate(const Scalar* spectrum_internal_a,
353                              const Scalar* spectrum_internal_b,
354                              Scalar* spectrum_internal_ab,
355                              const Scalar scaling);
356 
357 private:
358   Setup<T> setup;
359   Scalar* work;
360   int length;
361   int stackThresholdLen;
362 };
363 
364 
365 template<typename T>
alignedAlloc(int length)366 inline T* alignedAlloc(int length) {
367   return (T*)pffft_aligned_malloc( length * sizeof(T) );
368 }
369 
alignedFree(void * ptr)370 inline void alignedFree(void *ptr) {
371   pffft_aligned_free(ptr);
372 }
373 
374 
375 // simple helper to get minimum possible fft length
minFFtsize(pffft_transform_t transform)376 inline int minFFtsize(pffft_transform_t transform) {
377   return pffft_min_fft_size(transform);
378 }
379 
380 // simple helper to determine next power of 2 - without inexact/rounding floating point operations
nextPowerOfTwo(int N)381 inline int nextPowerOfTwo(int N) {
382   return pffft_next_power_of_two(N);
383 }
384 
isPowerOfTwo(int N)385 inline bool isPowerOfTwo(int N) {
386   return pffft_is_power_of_two(N) ? true : false;
387 }
388 
389 
390 
391 ////////////////////////////////////////////////////////////////////
392 
393 // implementation
394 
395 namespace {
396 
397 template<typename T>
398 class Setup
399 {};
400 
401 #if defined(PFFFT_ENABLE_FLOAT) || ( !defined(PFFFT_ENABLE_FLOAT) && !defined(PFFFT_ENABLE_DOUBLE) )
402 
403 template<>
404 class Setup<float>
405 {
406   PFFFT_Setup* self;
407 
408 public:
409   typedef float value_type;
410   typedef Types< value_type >::Scalar Scalar;
411 
Setup()412   Setup()
413     : self(NULL)
414   {}
415 
prepareLength(int length)416   void prepareLength(int length)
417   {
418     if (self) {
419       pffft_destroy_setup(self);
420     }
421     self = pffft_new_setup(length, PFFFT_REAL);
422   }
423 
~Setup()424   ~Setup() { pffft_destroy_setup(self); }
425 
transform_ordered(const Scalar * input,Scalar * output,Scalar * work,pffft_direction_t direction)426   void transform_ordered(const Scalar* input,
427                          Scalar* output,
428                          Scalar* work,
429                          pffft_direction_t direction)
430   {
431     pffft_transform_ordered(self, input, output, work, direction);
432   }
433 
transform(const Scalar * input,Scalar * output,Scalar * work,pffft_direction_t direction)434   void transform(const Scalar* input,
435                  Scalar* output,
436                  Scalar* work,
437                  pffft_direction_t direction)
438   {
439     pffft_transform(self, input, output, work, direction);
440   }
441 
reorder(const Scalar * input,Scalar * output,pffft_direction_t direction)442   void reorder(const Scalar* input, Scalar* output, pffft_direction_t direction)
443   {
444     pffft_zreorder(self, input, output, direction);
445   }
446 
convolveAccumulate(const Scalar * dft_a,const Scalar * dft_b,Scalar * dft_ab,const Scalar scaling)447   void convolveAccumulate(const Scalar* dft_a,
448                           const Scalar* dft_b,
449                           Scalar* dft_ab,
450                           const Scalar scaling)
451   {
452     pffft_zconvolve_accumulate(self, dft_a, dft_b, dft_ab, scaling);
453   }
454 
convolve(const Scalar * dft_a,const Scalar * dft_b,Scalar * dft_ab,const Scalar scaling)455   void convolve(const Scalar* dft_a,
456                 const Scalar* dft_b,
457                 Scalar* dft_ab,
458                 const Scalar scaling)
459   {
460     pffft_zconvolve_no_accu(self, dft_a, dft_b, dft_ab, scaling);
461   }
462 };
463 
464 template<>
465 class Setup< std::complex<float> >
466 {
467   PFFFT_Setup* self;
468 
469 public:
470   typedef std::complex<float> value_type;
471   typedef Types< value_type >::Scalar Scalar;
472 
Setup()473   Setup()
474     : self(NULL)
475   {}
476 
~Setup()477   ~Setup() { pffft_destroy_setup(self); }
478 
prepareLength(int length)479   void prepareLength(int length)
480   {
481     if (self) {
482       pffft_destroy_setup(self);
483     }
484     self = pffft_new_setup(length, PFFFT_COMPLEX);
485   }
486 
transform_ordered(const Scalar * input,Scalar * output,Scalar * work,pffft_direction_t direction)487   void transform_ordered(const Scalar* input,
488                          Scalar* output,
489                          Scalar* work,
490                          pffft_direction_t direction)
491   {
492     pffft_transform_ordered(self, input, output, work, direction);
493   }
494 
transform(const Scalar * input,Scalar * output,Scalar * work,pffft_direction_t direction)495   void transform(const Scalar* input,
496                  Scalar* output,
497                  Scalar* work,
498                  pffft_direction_t direction)
499   {
500     pffft_transform(self, input, output, work, direction);
501   }
502 
reorder(const Scalar * input,Scalar * output,pffft_direction_t direction)503   void reorder(const Scalar* input, Scalar* output, pffft_direction_t direction)
504   {
505     pffft_zreorder(self, input, output, direction);
506   }
507 
convolve(const Scalar * dft_a,const Scalar * dft_b,Scalar * dft_ab,const Scalar scaling)508   void convolve(const Scalar* dft_a,
509                 const Scalar* dft_b,
510                 Scalar* dft_ab,
511                 const Scalar scaling)
512   {
513     pffft_zconvolve_no_accu(self, dft_a, dft_b, dft_ab, scaling);
514   }
515 };
516 
517 #endif /* defined(PFFFT_ENABLE_FLOAT) || ( !defined(PFFFT_ENABLE_FLOAT) && !defined(PFFFT_ENABLE_DOUBLE) ) */
518 
519 
520 #if defined(PFFFT_ENABLE_DOUBLE)
521 
522 template<>
523 class Setup<double>
524 {
525   PFFFTD_Setup* self;
526 
527 public:
528   typedef double value_type;
529   typedef Types< value_type >::Scalar Scalar;
530 
Setup()531   Setup()
532     : self(NULL)
533   {}
534 
~Setup()535   ~Setup() { pffftd_destroy_setup(self); }
536 
prepareLength(int length)537   void prepareLength(int length)
538   {
539     if (self) {
540       pffftd_destroy_setup(self);
541       self = NULL;
542     }
543     if (length > 0) {
544       self = pffftd_new_setup(length, PFFFT_REAL);
545     }
546   }
547 
transform_ordered(const Scalar * input,Scalar * output,Scalar * work,pffft_direction_t direction)548   void transform_ordered(const Scalar* input,
549                          Scalar* output,
550                          Scalar* work,
551                          pffft_direction_t direction)
552   {
553     pffftd_transform_ordered(self, input, output, work, direction);
554   }
555 
transform(const Scalar * input,Scalar * output,Scalar * work,pffft_direction_t direction)556   void transform(const Scalar* input,
557                  Scalar* output,
558                  Scalar* work,
559                  pffft_direction_t direction)
560   {
561     pffftd_transform(self, input, output, work, direction);
562   }
563 
reorder(const Scalar * input,Scalar * output,pffft_direction_t direction)564   void reorder(const Scalar* input, Scalar* output, pffft_direction_t direction)
565   {
566     pffftd_zreorder(self, input, output, direction);
567   }
568 
convolveAccumulate(const Scalar * dft_a,const Scalar * dft_b,Scalar * dft_ab,const Scalar scaling)569   void convolveAccumulate(const Scalar* dft_a,
570                           const Scalar* dft_b,
571                           Scalar* dft_ab,
572                           const Scalar scaling)
573   {
574     pffftd_zconvolve_accumulate(self, dft_a, dft_b, dft_ab, scaling);
575   }
576 
convolve(const Scalar * dft_a,const Scalar * dft_b,Scalar * dft_ab,const Scalar scaling)577   void convolve(const Scalar* dft_a,
578                 const Scalar* dft_b,
579                 Scalar* dft_ab,
580                 const Scalar scaling)
581   {
582     pffftd_zconvolve_no_accu(self, dft_a, dft_b, dft_ab, scaling);
583   }
584 };
585 
586 template<>
587 class Setup< std::complex<double> >
588 {
589   PFFFTD_Setup* self;
590 
591 public:
592   typedef std::complex<double> value_type;
593   typedef Types< value_type >::Scalar Scalar;
594 
Setup()595   Setup()
596     : self(NULL)
597   {}
598 
~Setup()599   ~Setup() { pffftd_destroy_setup(self); }
600 
prepareLength(int length)601   void prepareLength(int length)
602   {
603     if (self) {
604       pffftd_destroy_setup(self);
605     }
606     self = pffftd_new_setup(length, PFFFT_COMPLEX);
607   }
608 
transform_ordered(const Scalar * input,Scalar * output,Scalar * work,pffft_direction_t direction)609   void transform_ordered(const Scalar* input,
610                          Scalar* output,
611                          Scalar* work,
612                          pffft_direction_t direction)
613   {
614     pffftd_transform_ordered(self, input, output, work, direction);
615   }
616 
transform(const Scalar * input,Scalar * output,Scalar * work,pffft_direction_t direction)617   void transform(const Scalar* input,
618                  Scalar* output,
619                  Scalar* work,
620                  pffft_direction_t direction)
621   {
622     pffftd_transform(self, input, output, work, direction);
623   }
624 
reorder(const Scalar * input,Scalar * output,pffft_direction_t direction)625   void reorder(const Scalar* input, Scalar* output, pffft_direction_t direction)
626   {
627     pffftd_zreorder(self, input, output, direction);
628   }
629 
convolveAccumulate(const Scalar * dft_a,const Scalar * dft_b,Scalar * dft_ab,const Scalar scaling)630   void convolveAccumulate(const Scalar* dft_a,
631                           const Scalar* dft_b,
632                           Scalar* dft_ab,
633                           const Scalar scaling)
634   {
635     pffftd_zconvolve_accumulate(self, dft_a, dft_b, dft_ab, scaling);
636   }
637 
convolve(const Scalar * dft_a,const Scalar * dft_b,Scalar * dft_ab,const Scalar scaling)638   void convolve(const Scalar* dft_a,
639                 const Scalar* dft_b,
640                 Scalar* dft_ab,
641                 const Scalar scaling)
642   {
643     pffftd_zconvolve_no_accu(self, dft_a, dft_b, dft_ab, scaling);
644   }
645 };
646 
647 #endif /* defined(PFFFT_ENABLE_DOUBLE) */
648 
649 } // end of anonymous namespace for Setup<>
650 
651 
652 template<typename T>
Fft(int length,int stackThresholdLen)653 inline Fft<T>::Fft(int length, int stackThresholdLen)
654   : length(0)
655   , stackThresholdLen(stackThresholdLen)
656   , work(NULL)
657 {
658 #if (__cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900))
659   static_assert( sizeof(Complex) == 2 * sizeof(Scalar), "pffft requires sizeof(std::complex<>) == 2 * sizeof(Scalar)" );
660 #elif defined(__GNUC__)
661   char static_assert_like[(sizeof(Complex) == 2 * sizeof(Scalar)) ? 1 : -1]; // pffft requires sizeof(std::complex<>) == 2 * sizeof(Scalar)
662 #endif
663   prepareLength(length);
664 }
665 
666 template<typename T>
~Fft()667 inline Fft<T>::~Fft()
668 {
669   alignedFree(work);
670 }
671 
672 template<typename T>
673 inline void
prepareLength(int newLength)674 Fft<T>::prepareLength(int newLength)
675 {
676   const bool wasOnHeap = ( work != NULL );
677 
678   const bool useHeap = newLength > stackThresholdLen;
679 
680   if (useHeap == wasOnHeap && newLength == length) {
681     return;
682   }
683 
684   length = newLength;
685 
686   setup.prepareLength(length);
687 
688   if (work) {
689     alignedFree(work);
690     work = NULL;
691   }
692 
693   if (useHeap) {
694     work = reinterpret_cast<Scalar*>( alignedAllocType(length) );
695   }
696 }
697 
698 
699 template<typename T>
700 inline AlignedVector<T>
valueVector() const701 Fft<T>::valueVector() const
702 {
703   return AlignedVector<T>(length);
704 }
705 
706 template<typename T>
707 inline AlignedVector< typename Fft<T>::Complex >
spectrumVector() const708 Fft<T>::spectrumVector() const
709 {
710   return AlignedVector<Complex>( getSpectrumSize() );
711 }
712 
713 template<typename T>
714 inline AlignedVector< typename Fft<T>::Scalar >
internalLayoutVector() const715 Fft<T>::internalLayoutVector() const
716 {
717   return AlignedVector<Scalar>( getInternalLayoutSize() );
718 }
719 
720 
721 template<typename T>
722 inline AlignedVector< typename Fft<T>::Complex > &
forward(const AlignedVector<T> & input,AlignedVector<Complex> & spectrum)723 Fft<T>::forward(const AlignedVector<T> & input, AlignedVector<Complex> & spectrum)
724 {
725   forward( input.data(), spectrum.data() );
726   return spectrum;
727 }
728 
729 template<typename T>
730 inline AlignedVector<T> &
inverse(const AlignedVector<Complex> & spectrum,AlignedVector<T> & output)731 Fft<T>::inverse(const AlignedVector<Complex> & spectrum, AlignedVector<T> & output)
732 {
733   inverse( spectrum.data(), output.data() );
734   return output;
735 }
736 
737 
738 template<typename T>
739 inline AlignedVector< typename Fft<T>::Scalar > &
forwardToInternalLayout(const AlignedVector<T> & input,AlignedVector<Scalar> & spectrum_internal_layout)740 Fft<T>::forwardToInternalLayout(
741     const AlignedVector<T> & input,
742     AlignedVector<Scalar> & spectrum_internal_layout )
743 {
744   forwardToInternalLayout( input.data(), spectrum_internal_layout.data() );
745   return spectrum_internal_layout;
746 }
747 
748 template<typename T>
749 inline AlignedVector<T> &
inverseFromInternalLayout(const AlignedVector<Scalar> & spectrum_internal_layout,AlignedVector<T> & output)750 Fft<T>::inverseFromInternalLayout(
751     const AlignedVector<Scalar> & spectrum_internal_layout,
752     AlignedVector<T> & output )
753 {
754   inverseFromInternalLayout( spectrum_internal_layout.data(), output.data() );
755   return output;
756 }
757 
758 template<typename T>
759 inline void
reorderSpectrum(const AlignedVector<Scalar> & input,AlignedVector<Complex> & output)760 Fft<T>::reorderSpectrum(
761     const AlignedVector<Scalar> & input,
762     AlignedVector<Complex> & output )
763 {
764   reorderSpectrum( input.data(), output.data() );
765 }
766 
767 template<typename T>
768 inline AlignedVector< typename Fft<T>::Scalar > &
convolveAccumulate(const AlignedVector<Scalar> & spectrum_internal_a,const AlignedVector<Scalar> & spectrum_internal_b,AlignedVector<Scalar> & spectrum_internal_ab,const Scalar scaling)769 Fft<T>::convolveAccumulate(
770     const AlignedVector<Scalar> & spectrum_internal_a,
771     const AlignedVector<Scalar> & spectrum_internal_b,
772     AlignedVector<Scalar> & spectrum_internal_ab,
773     const Scalar scaling )
774 {
775   convolveAccumulate( spectrum_internal_a.data(), spectrum_internal_b.data(),
776                       spectrum_internal_ab.data(), scaling );
777   return spectrum_internal_ab;
778 }
779 
780 template<typename T>
781 inline AlignedVector< typename Fft<T>::Scalar > &
convolve(const AlignedVector<Scalar> & spectrum_internal_a,const AlignedVector<Scalar> & spectrum_internal_b,AlignedVector<Scalar> & spectrum_internal_ab,const Scalar scaling)782 Fft<T>::convolve(
783     const AlignedVector<Scalar> & spectrum_internal_a,
784     const AlignedVector<Scalar> & spectrum_internal_b,
785     AlignedVector<Scalar> & spectrum_internal_ab,
786     const Scalar scaling )
787 {
788   convolve( spectrum_internal_a.data(), spectrum_internal_b.data(),
789             spectrum_internal_ab.data(), scaling );
790   return spectrum_internal_ab;
791 }
792 
793 
794 template<typename T>
795 inline typename Fft<T>::Complex *
forward(const T * input,Complex * spectrum)796 Fft<T>::forward(const T* input, Complex * spectrum)
797 {
798   setup.transform_ordered(reinterpret_cast<const Scalar*>(input),
799                           reinterpret_cast<Scalar*>(spectrum),
800                           work,
801                           PFFFT_FORWARD);
802   return spectrum;
803 }
804 
805 template<typename T>
806 inline T*
inverse(Complex const * spectrum,T * output)807 Fft<T>::inverse(Complex const* spectrum, T* output)
808 {
809   setup.transform_ordered(reinterpret_cast<const Scalar*>(spectrum),
810                           reinterpret_cast<Scalar*>(output),
811                           work,
812                           PFFFT_BACKWARD);
813   return output;
814 }
815 
816 template<typename T>
817 inline typename pffft::Fft<T>::Scalar*
forwardToInternalLayout(const T * input,Scalar * spectrum_internal_layout)818 Fft<T>::forwardToInternalLayout(const T* input, Scalar* spectrum_internal_layout)
819 {
820   setup.transform(reinterpret_cast<const Scalar*>(input),
821                   spectrum_internal_layout,
822                   work,
823                   PFFFT_FORWARD);
824   return spectrum_internal_layout;
825 }
826 
827 template<typename T>
828 inline T*
inverseFromInternalLayout(const Scalar * spectrum_internal_layout,T * output)829 Fft<T>::inverseFromInternalLayout(const Scalar* spectrum_internal_layout, T* output)
830 {
831   setup.transform(spectrum_internal_layout,
832                   reinterpret_cast<Scalar*>(output),
833                   work,
834                   PFFFT_BACKWARD);
835   return output;
836 }
837 
838 template<typename T>
839 inline void
reorderSpectrum(const Scalar * input,Complex * output)840 Fft<T>::reorderSpectrum( const Scalar* input, Complex* output )
841 {
842   setup.reorder(input, reinterpret_cast<Scalar*>(output), PFFFT_FORWARD);
843 }
844 
845 template<typename T>
846 inline typename pffft::Fft<T>::Scalar*
convolveAccumulate(const Scalar * dft_a,const Scalar * dft_b,Scalar * dft_ab,const Scalar scaling)847 Fft<T>::convolveAccumulate(const Scalar* dft_a,
848                            const Scalar* dft_b,
849                            Scalar* dft_ab,
850                            const Scalar scaling)
851 {
852   setup.convolveAccumulate(dft_a, dft_b, dft_ab, scaling);
853   return dft_ab;
854 }
855 
856 template<typename T>
857 inline typename pffft::Fft<T>::Scalar*
convolve(const Scalar * dft_a,const Scalar * dft_b,Scalar * dft_ab,const Scalar scaling)858 Fft<T>::convolve(const Scalar* dft_a,
859                  const Scalar* dft_b,
860                  Scalar* dft_ab,
861                  const Scalar scaling)
862 {
863   setup.convolve(dft_a, dft_b, dft_ab, scaling);
864   return dft_ab;
865 }
866 
867 template<typename T>
868 inline void
alignedFree(void * ptr)869 Fft<T>::alignedFree(void* ptr)
870 {
871   pffft::alignedFree(ptr);
872 }
873 
874 
875 template<typename T>
876 inline T*
alignedAllocType(int length)877 pffft::Fft<T>::alignedAllocType(int length)
878 {
879   return alignedAlloc<T>(length);
880 }
881 
882 template<typename T>
883 inline typename pffft::Fft<T>::Scalar*
alignedAllocScalar(int length)884 pffft::Fft<T>::alignedAllocScalar(int length)
885 {
886   return alignedAlloc<Scalar>(length);
887 }
888 
889 template<typename T>
890 inline typename Fft<T>::Complex *
alignedAllocComplex(int length)891 Fft<T>::alignedAllocComplex(int length)
892 {
893   return alignedAlloc<Complex>(length);
894 }
895 
896 
897 
898 ////////////////////////////////////////////////////////////////////
899 
900 // Allocator - for std::vector<>:
901 // origin: http://www.josuttis.com/cppcode/allocator.html
902 // http://www.josuttis.com/cppcode/myalloc.hpp
903 //
904 // minor renaming and utilizing of pffft (de)allocation functions
905 // are applied to Jossutis' allocator
906 
907 /* The following code example is taken from the book
908  * "The C++ Standard Library - A Tutorial and Reference"
909  * by Nicolai M. Josuttis, Addison-Wesley, 1999
910  *
911  * (C) Copyright Nicolai M. Josuttis 1999.
912  * Permission to copy, use, modify, sell and distribute this software
913  * is granted provided this copyright notice appears in all copies.
914  * This software is provided "as is" without express or implied
915  * warranty, and with no claim as to its suitability for any purpose.
916  */
917 
918 template <class T>
919 class PFAlloc {
920   public:
921     // type definitions
922     typedef T        value_type;
923     typedef T*       pointer;
924     typedef const T* const_pointer;
925     typedef T&       reference;
926     typedef const T& const_reference;
927     typedef std::size_t    size_type;
928     typedef std::ptrdiff_t difference_type;
929 
930     // rebind allocator to type U
931     template <class U>
932     struct rebind {
933         typedef PFAlloc<U> other;
934     };
935 
936     // return address of values
address(reference value) const937     pointer address (reference value) const {
938         return &value;
939     }
address(const_reference value) const940     const_pointer address (const_reference value) const {
941         return &value;
942     }
943 
944     /* constructors and destructor
945      * - nothing to do because the allocator has no state
946      */
PFAlloc()947     PFAlloc() throw() {
948     }
PFAlloc(const PFAlloc &)949     PFAlloc(const PFAlloc&) throw() {
950     }
951     template <class U>
PFAlloc(const PFAlloc<U> &)952       PFAlloc (const PFAlloc<U>&) throw() {
953     }
~PFAlloc()954     ~PFAlloc() throw() {
955     }
956 
957     // return maximum number of elements that can be allocated
max_size() const958     size_type max_size () const throw() {
959         return std::numeric_limits<std::size_t>::max() / sizeof(T);
960     }
961 
962     // allocate but don't initialize num elements of type T
allocate(size_type num,const void * =0)963     pointer allocate (size_type num, const void* = 0) {
964         pointer ret = (pointer)( alignedAlloc<T>(num) );
965         return ret;
966     }
967 
968     // initialize elements of allocated storage p with value value
construct(pointer p,const T & value)969     void construct (pointer p, const T& value) {
970         // initialize memory with placement new
971         new((void*)p)T(value);
972     }
973 
974     // destroy elements of initialized storage p
destroy(pointer p)975     void destroy (pointer p) {
976         // destroy objects by calling their destructor
977         p->~T();
978     }
979 
980     // deallocate storage p of deleted elements
deallocate(pointer p,size_type num)981     void deallocate (pointer p, size_type num) {
982         // deallocate memory with pffft
983         alignedFree( (void*)p );
984     }
985 };
986 
987 // return that all specializations of this allocator are interchangeable
988 template <class T1, class T2>
operator ==(const PFAlloc<T1> &,const PFAlloc<T2> &)989 bool operator== (const PFAlloc<T1>&,
990                  const PFAlloc<T2>&) throw() {
991     return true;
992 }
993 template <class T1, class T2>
operator !=(const PFAlloc<T1> &,const PFAlloc<T2> &)994 bool operator!= (const PFAlloc<T1>&,
995                  const PFAlloc<T2>&) throw() {
996     return false;
997 }
998 
999 
1000 } // namespace pffft
1001 
1002