1 
2 /* -----------------------------------------------------------------------------------------------------------
3 Software License for The Fraunhofer FDK AAC Codec Library for Android
4 
5 � Copyright  1995 - 2013 Fraunhofer-Gesellschaft zur F�rderung der angewandten Forschung e.V.
6   All rights reserved.
7 
8  1.    INTRODUCTION
9 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
10 the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
11 This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
12 
13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
14 audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
15 independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
16 of the MPEG specifications.
17 
18 Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
19 may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
20 individually for the purpose of encoding or decoding bit streams in products that are compliant with
21 the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
22 these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
23 software may already be covered under those patent licenses when it is used for those licensed purposes only.
24 
25 Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
26 are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
27 applications information and documentation.
28 
29 2.    COPYRIGHT LICENSE
30 
31 Redistribution and use in source and binary forms, with or without modification, are permitted without
32 payment of copyright license fees provided that you satisfy the following conditions:
33 
34 You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
35 your modifications thereto in source code form.
36 
37 You must retain the complete text of this software license in the documentation and/or other materials
38 provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
39 You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
40 modifications thereto to recipients of copies in binary form.
41 
42 The name of Fraunhofer may not be used to endorse or promote products derived from this library without
43 prior written permission.
44 
45 You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
46 software or your modifications thereto.
47 
48 Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
49 and the date of any change. For modified versions of the FDK AAC Codec, the term
50 "Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
51 "Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
52 
53 3.    NO PATENT LICENSE
54 
55 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
56 ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
57 respect to this software.
58 
59 You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
60 by appropriate patent licenses.
61 
62 4.    DISCLAIMER
63 
64 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
65 "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
66 of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
67 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
68 including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
69 or business interruption, however caused and on any theory of liability, whether in contract, strict
70 liability, or tort (including negligence), arising in any way out of the use of this software, even if
71 advised of the possibility of such damage.
72 
73 5.    CONTACT INFORMATION
74 
75 Fraunhofer Institute for Integrated Circuits IIS
76 Attention: Audio and Multimedia Departments - FDK AAC LL
77 Am Wolfsmantel 33
78 91058 Erlangen, Germany
79 
80 www.iis.fraunhofer.de/amm
81 amm-info@iis.fraunhofer.de
82 ----------------------------------------------------------------------------------------------------------- */
83 
84 /***************************  Fraunhofer IIS FDK Tools  **********************
85 
86    Author(s):   Josef Hoepfl, DSP Solutions
87    Description: Fix point FFT
88 
89 ******************************************************************************/
90 
91 #include "fft.h"
92 
93 #include "fft_rad2.h"
94 #include "FDK_tools_rom.h"
95 
96 
97 
98 
99 
100 #define F3C(x) STC(x)
101 
102 #define     C31       (F3C(0x91261468))      /* FL2FXCONST_DBL(-0.86602540)   */
103 
104 /* Performs the FFT of length 3 according to the algorithm after winograd.
105    No scaling of the input vector because the scaling is already done in the rotation vector. */
fft3(FIXP_DBL * RESTRICT pDat)106 static FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat)
107 {
108   FIXP_DBL r1,r2;
109   FIXP_DBL s1,s2;
110   /* real part */
111   r1      = pDat[2] + pDat[4];
112   r2      = fMult((pDat[2] - pDat[4]), C31);
113   pDat[0] = pDat[0] + r1;
114   r1      = pDat[0] - r1 - (r1>>1);
115 
116   /* imaginary part */
117   s1      = pDat[3] + pDat[5];
118   s2      = fMult((pDat[3] - pDat[5]), C31);
119   pDat[1] = pDat[1] + s1;
120   s1      = pDat[1] - s1 - (s1>>1);
121 
122   /* combination */
123   pDat[2] = r1 - s2;
124   pDat[4] = r1 + s2;
125   pDat[3] = s1 + r2;
126   pDat[5] = s1 - r2;
127 }
128 
129 
130 #define F5C(x) STC(x)
131 
132 #define     C51       (F5C(0x79bc3854))      /* FL2FXCONST_DBL( 0.95105652)   */
133 #define     C52       (F5C(0x9d839db0))      /* FL2FXCONST_DBL(-1.53884180/2) */
134 #define     C53       (F5C(0xd18053ce))      /* FL2FXCONST_DBL(-0.36327126)   */
135 #define     C54       (F5C(0x478dde64))      /* FL2FXCONST_DBL( 0.55901699)   */
136 #define     C55       (F5C(0xb0000001))      /* FL2FXCONST_DBL(-1.25/2)       */
137 
138 /* performs the FFT of length 5 according to the algorithm after winograd */
fft5(FIXP_DBL * RESTRICT pDat)139 static FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat)
140 {
141   FIXP_DBL r1,r2,r3,r4;
142   FIXP_DBL s1,s2,s3,s4;
143   FIXP_DBL t;
144 
145   /* real part */
146   r1      = pDat[2] + pDat[8];
147   r4      = pDat[2] - pDat[8];
148   r3      = pDat[4] + pDat[6];
149   r2      = pDat[4] - pDat[6];
150   t       = fMult((r1-r3), C54);
151   r1      = r1 + r3;
152   pDat[0] = pDat[0] + r1;
153   /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
154      the values as fracts */
155   r1      = pDat[0] + (fMultDiv2(r1, C55) <<(2));
156   r3      = r1 - t;
157   r1      = r1 + t;
158   t       = fMult((r4 + r2), C51);
159   /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
160      the values as fracts */
161   r4      = t + (fMultDiv2(r4, C52) <<(2));
162   r2      = t + fMult(r2, C53);
163 
164   /* imaginary part */
165   s1      = pDat[3] + pDat[9];
166   s4      = pDat[3] - pDat[9];
167   s3      = pDat[5] + pDat[7];
168   s2      = pDat[5] - pDat[7];
169   t       = fMult((s1 - s3), C54);
170   s1      = s1 + s3;
171   pDat[1] = pDat[1] + s1;
172   /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
173      the values as fracts */
174   s1      = pDat[1] + (fMultDiv2(s1, C55) <<(2));
175   s3      = s1 - t;
176   s1      = s1 + t;
177   t       = fMult((s4 + s2), C51);
178   /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
179      the values as fracts */
180   s4      = t + (fMultDiv2(s4, C52) <<(2));
181   s2      = t + fMult(s2, C53);
182 
183   /* combination */
184   pDat[2] = r1 + s2;
185   pDat[8] = r1 - s2;
186   pDat[4] = r3 - s4;
187   pDat[6] = r3 + s4;
188 
189   pDat[3] = s1 - r2;
190   pDat[9] = s1 + r2;
191   pDat[5] = s3 + r4;
192   pDat[7] = s3 - r4;
193 }
194 
195 
196 
197 
198 #define N3                    3
199 #define N5                    5
200 #define N6                    6
201 #define N15                   15
202 
203 /* Performs the FFT of length 15. It is split into FFTs of length 3 and length 5. */
fft15(FIXP_DBL * pInput)204 static inline void fft15(FIXP_DBL *pInput)
205 {
206   FIXP_DBL  aDst[2*N15];
207   FIXP_DBL  aDst1[2*N15];
208   int    i,k,l;
209 
210   /* Sort input vector for fft's of length 3
211   input3(0:2)   = [input(0) input(5) input(10)];
212   input3(3:5)   = [input(3) input(8) input(13)];
213   input3(6:8)   = [input(6) input(11) input(1)];
214   input3(9:11)  = [input(9) input(14) input(4)];
215   input3(12:14) = [input(12) input(2) input(7)]; */
216   {
217     const FIXP_DBL *pSrc = pInput;
218     FIXP_DBL *RESTRICT pDst = aDst;
219     /* Merge 3 loops into one, skip call of fft3 */
220     for(i=0,l=0,k=0; i<N5; i++, k+=6)
221     {
222       pDst[k+0] = pSrc[l];
223       pDst[k+1] = pSrc[l+1];
224       l += 2*N5;
225       if (l >= (2*N15))
226           l -= (2*N15);
227 
228       pDst[k+2] = pSrc[l];
229       pDst[k+3] = pSrc[l+1];
230       l += 2*N5;
231       if (l >= (2*N15))
232           l -= (2*N15);
233       pDst[k+4] = pSrc[l];
234       pDst[k+5] = pSrc[l+1];
235       l += (2*N5) + (2*N3);
236       if (l >= (2*N15))
237           l -= (2*N15);
238 
239       /* fft3 merged with shift right by 2 loop */
240       FIXP_DBL r1,r2,r3;
241       FIXP_DBL s1,s2;
242       /* real part */
243       r1      = pDst[k+2] + pDst[k+4];
244       r2      = fMult((pDst[k+2] - pDst[k+4]), C31);
245       s1      = pDst[k+0];
246       pDst[k+0] = (s1 + r1)>>2;
247       r1      = s1 - (r1>>1);
248 
249       /* imaginary part */
250       s1      = pDst[k+3] + pDst[k+5];
251       s2      = fMult((pDst[k+3] - pDst[k+5]), C31);
252       r3      = pDst[k+1];
253       pDst[k+1] = (r3 + s1)>>2;
254       s1      = r3 - (s1>>1);
255 
256       /* combination */
257       pDst[k+2] = (r1 - s2)>>2;
258       pDst[k+4] = (r1 + s2)>>2;
259       pDst[k+3] = (s1 + r2)>>2;
260       pDst[k+5] = (s1 - r2)>>2;
261     }
262   }
263   /* Sort input vector for fft's of length 5
264   input5(0:4)   = [output3(0) output3(3) output3(6) output3(9) output3(12)];
265   input5(5:9)   = [output3(1) output3(4) output3(7) output3(10) output3(13)];
266   input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
267   /* Merge 2 loops into one, brings about 10% */
268   {
269     const FIXP_DBL *pSrc = aDst;
270     FIXP_DBL *RESTRICT pDst = aDst1;
271     for(i=0,l=0,k=0; i<N3; i++, k+=10)
272     {
273       l = 2*i;
274       pDst[k+0] = pSrc[l+0];
275       pDst[k+1] = pSrc[l+1];
276       pDst[k+2] = pSrc[l+0+(2*N3)];
277       pDst[k+3] = pSrc[l+1+(2*N3)];
278       pDst[k+4] = pSrc[l+0+(4*N3)];
279       pDst[k+5] = pSrc[l+1+(4*N3)];
280       pDst[k+6] = pSrc[l+0+(6*N3)];
281       pDst[k+7] = pSrc[l+1+(6*N3)];
282       pDst[k+8] = pSrc[l+0+(8*N3)];
283       pDst[k+9] = pSrc[l+1+(8*N3)];
284       fft5(&pDst[k]);
285     }
286   }
287   /* Sort output vector of length 15
288   output = [out5(0)  out5(6)  out5(12) out5(3)  out5(9)
289             out5(10) out5(1)  out5(7)  out5(13) out5(4)
290             out5(5)  out5(11) out5(2)  out5(8)  out5(14)]; */
291   /* optimize clumsy loop, brings about 5% */
292   {
293     const FIXP_DBL *pSrc = aDst1;
294     FIXP_DBL *RESTRICT pDst = pInput;
295     for(i=0,l=0,k=0; i<N3; i++, k+=10)
296     {
297       pDst[k+0] = pSrc[l];
298       pDst[k+1] = pSrc[l+1];
299       l += (2*N6);
300       if (l >= (2*N15))
301           l -= (2*N15);
302       pDst[k+2] = pSrc[l];
303       pDst[k+3] = pSrc[l+1];
304       l += (2*N6);
305       if (l >= (2*N15))
306           l -= (2*N15);
307       pDst[k+4] = pSrc[l];
308       pDst[k+5] = pSrc[l+1];
309       l += (2*N6);
310       if (l >= (2*N15))
311           l -= (2*N15);
312       pDst[k+6] = pSrc[l];
313       pDst[k+7] = pSrc[l+1];
314       l += (2*N6);
315       if (l >= (2*N15))
316           l -= (2*N15);
317       pDst[k+8] = pSrc[l];
318       pDst[k+9] = pSrc[l+1];
319       l += 2;    /* no modulo check needed, it cannot occur */
320     }
321   }
322 }
323 
324 #define W_PiFOURTH STC(0x5a82799a)
325 #ifndef SUMDIFF_PIFOURTH
326 #define SUMDIFF_PIFOURTH(diff,sum,a,b) \
327   { \
328     FIXP_DBL wa, wb;\
329     wa = fMultDiv2(a, W_PiFOURTH);\
330     wb = fMultDiv2(b, W_PiFOURTH);\
331     diff = wb - wa;\
332     sum  = wb + wa;\
333   }
334 #endif
335 
336 /* This version is more overflow save, but less cycle optimal. */
337 #define SUMDIFF_EIGTH(x, y, ix, iy, vr, vi, ur, ui) \
338   vr = (x[ 0 + ix]>>1) + (x[16 + ix]>>1);  /* Re A + Re B */ \
339   vi = (x[ 8 + ix]>>1) + (x[24 + ix]>>1);     /* Re C + Re D */ \
340   ur = (x[ 1 + ix]>>1) + (x[17 + ix]>>1);  /* Im A + Im B */ \
341   ui = (x[ 9 + ix]>>1) + (x[25 + ix]>>1);     /* Im C + Im D */ \
342   y[ 0 + iy] = vr + vi;     /* Re A' = ReA + ReB +ReC + ReD */    \
343   y[ 4 + iy] = vr - vi;     /* Re C' = -(ReC+ReD) + (ReA+ReB) */  \
344   y[ 1 + iy] = ur + ui;     /* Im A' = sum of imag values */      \
345   y[ 5 + iy] = ur - ui;     /* Im C' = -Im C -Im D +Im A +Im B */ \
346   vr -= x[16 + ix];              /* Re A - Re B */ \
347   vi = vi - x[24 + ix];          /* Re C - Re D */ \
348   ur -= x[17 + ix];              /* Im A - Im B */ \
349   ui = ui - x[25 + ix];          /* Im C - Im D */ \
350   y[ 2 + iy] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */ \
351   y[ 6 + iy] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */ \
352   y[ 3 + iy] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */  \
353   y[ 7 + iy] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
354 
355 static const FIXP_STP fft16_w16[2] =  { STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d) };
356 
357 LNK_SECTION_CODE_L1
fft_16(FIXP_DBL * RESTRICT x)358 inline void fft_16(FIXP_DBL *RESTRICT x)
359 {
360   FIXP_DBL vr, vi, ur, ui;
361   FIXP_DBL y[32];
362 
363   SUMDIFF_EIGTH(x, y, 0,  0, vr, vi, ur, ui);
364   SUMDIFF_EIGTH(x, y, 4,  8, vr, vi, ur, ui);
365   SUMDIFF_EIGTH(x, y, 2, 16, vr, vi, ur, ui);
366   SUMDIFF_EIGTH(x, y, 6, 24, vr, vi, ur, ui);
367 
368 // xt1 =  0
369 // xt2 =  8
370   vr = y[ 8];
371   vi = y[ 9];
372   ur = y[ 0]>>1;
373   ui = y[ 1]>>1;
374   x[ 0] = ur + (vr>>1);
375   x[ 1] = ui + (vi>>1);
376   x[ 8] = ur - (vr>>1);
377   x[ 9] = ui - (vi>>1);
378 
379 // xt1 =  4
380 // xt2 = 12
381   vr = y[13];
382   vi = y[12];
383   ur = y[ 4]>>1;
384   ui = y[ 5]>>1;
385   x[ 4] = ur + (vr>>1);
386   x[ 5] = ui - (vi>>1);
387   x[12] = ur - (vr>>1);
388   x[13] = ui + (vi>>1);
389 
390 // xt1 = 16
391 // xt2 = 24
392   vr = y[24];
393   vi = y[25];
394   ur = y[16]>>1;
395   ui = y[17]>>1;
396   x[16] = ur + (vr>>1);
397   x[17] = ui + (vi>>1);
398   x[24] = ur - (vr>>1);
399   x[25] = ui - (vi>>1);
400 
401 // xt1 = 20
402 // xt2 = 28
403   vr = y[29];
404   vi = y[28];
405   ur = y[20]>>1;
406   ui = y[21]>>1;
407   x[20] = ur + (vr>>1);
408   x[21] = ui - (vi>>1);
409   x[28] = ur - (vr>>1);
410   x[29] = ui + (vi>>1);
411 
412   // xt1 =  2
413 // xt2 = 10
414   SUMDIFF_PIFOURTH(vi, vr, y[10], y[11])
415   //vr = fMultDiv2((y[11] + y[10]),W_PiFOURTH);
416   //vi = fMultDiv2((y[11] - y[10]),W_PiFOURTH);
417   ur = y[ 2];
418   ui = y[ 3];
419   x[ 2] = (ur>>1) + vr;
420   x[ 3] = (ui>>1) + vi;
421   x[10] = (ur>>1) - vr;
422   x[11] = (ui>>1) - vi;
423 
424 // xt1 =  6
425 // xt2 = 14
426   SUMDIFF_PIFOURTH(vr, vi, y[14], y[15])
427   ur = y[ 6];
428   ui = y[ 7];
429   x[ 6] = (ur>>1) + vr;
430   x[ 7] = (ui>>1) - vi;
431   x[14] = (ur>>1) - vr;
432   x[15] = (ui>>1) + vi;
433 
434 // xt1 = 18
435 // xt2 = 26
436   SUMDIFF_PIFOURTH(vi, vr, y[26], y[27])
437   ur = y[18];
438   ui = y[19];
439   x[18] = (ur>>1) + vr;
440   x[19] = (ui>>1) + vi;
441   x[26] = (ur>>1) - vr;
442   x[27] = (ui>>1) - vi;
443 
444 // xt1 = 22
445 // xt2 = 30
446   SUMDIFF_PIFOURTH(vr, vi, y[30], y[31])
447   ur = y[22];
448   ui = y[23];
449   x[22] = (ur>>1) + vr;
450   x[23] = (ui>>1) - vi;
451   x[30] = (ur>>1) - vr;
452   x[31] = (ui>>1) + vi;
453 
454 // xt1 =  0
455 // xt2 = 16
456   vr = x[16];
457   vi = x[17];
458   ur = x[ 0]>>1;
459   ui = x[ 1]>>1;
460   x[ 0] = ur + (vr>>1);
461   x[ 1] = ui + (vi>>1);
462   x[16] = ur - (vr>>1);
463   x[17] = ui - (vi>>1);
464 
465 // xt1 =  8
466 // xt2 = 24
467   vi = x[24];
468   vr = x[25];
469   ur = x[ 8]>>1;
470   ui = x[ 9]>>1;
471   x[ 8] = ur + (vr>>1);
472   x[ 9] = ui - (vi>>1);
473   x[24] = ur - (vr>>1);
474   x[25] = ui + (vi>>1);
475 
476 // xt1 =  2
477 // xt2 = 18
478   cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
479   ur = x[ 2];
480   ui = x[ 3];
481   x[ 2] = (ur>>1) + vr;
482   x[ 3] = (ui>>1) + vi;
483   x[18] = (ur>>1) - vr;
484   x[19] = (ui>>1) - vi;
485 
486 // xt1 = 10
487 // xt2 = 26
488   cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
489   ur = x[10];
490   ui = x[11];
491   x[10] = (ur>>1) + vr;
492   x[11] = (ui>>1) - vi;
493   x[26] = (ur>>1) - vr;
494   x[27] = (ui>>1) + vi;
495 
496 // xt1 =  4
497 // xt2 = 20
498   SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
499   ur = x[ 4];
500   ui = x[ 5];
501   x[ 4] = (ur>>1) + vr;
502   x[ 5] = (ui>>1) + vi;
503   x[20] = (ur>>1) - vr;
504   x[21] = (ui>>1) - vi;
505 
506 // xt1 = 12
507 // xt2 = 28
508   SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
509   ur = x[12];
510   ui = x[13];
511   x[12] = (ur>>1) + vr;
512   x[13] = (ui>>1) - vi;
513   x[28] = (ur>>1) - vr;
514   x[29] = (ui>>1) + vi;
515 
516 // xt1 =  6
517 // xt2 = 22
518   cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
519   ur = x[ 6];
520   ui = x[ 7];
521   x[ 6] = (ur>>1) + vr;
522   x[ 7] = (ui>>1) + vi;
523   x[22] = (ur>>1) - vr;
524   x[23] = (ui>>1) - vi;
525 
526 // xt1 = 14
527 // xt2 = 30
528   cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
529   ur = x[14];
530   ui = x[15];
531   x[14] = (ur>>1) + vr;
532   x[15] = (ui>>1) - vi;
533   x[30] = (ur>>1) - vr;
534   x[31] = (ui>>1) + vi;
535 }
536 
537 #ifndef FUNCTION_fft_32
538 static const FIXP_STP fft32_w32[6] =
539 {
540   STCP (0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d), STCP(0x7d8a5f40, 0x18f8b83c),
541   STCP (0x6a6d98a4, 0x471cece7), STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)
542 };
543 
544 LNK_SECTION_CODE_L1
fft_32(FIXP_DBL * x)545 inline void fft_32(FIXP_DBL *x)
546 {
547 
548 #define W_PiFOURTH STC(0x5a82799a)
549 
550   FIXP_DBL vr,vi,ur,ui;
551   FIXP_DBL y[64];
552 
553   /*
554    * 1+2 stage radix 4
555    */
556 
557 /////////////////////////////////////////////////////////////////////////////////////////
558 
559   // i = 0
560   vr = (x[ 0] + x[32])>>1;  /* Re A + Re B */
561   vi = (x[16] + x[48]);     /* Re C + Re D */
562   ur = (x[ 1] + x[33])>>1;  /* Im A + Im B */
563   ui = (x[17] + x[49]);     /* Im C + Im D */
564 
565   y[ 0] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
566   y[ 4] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
567   y[ 1] = ur + (ui>>1);     /* Im A' = sum of imag values */
568   y[ 5] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
569 
570   vr -= x[32];              /* Re A - Re B */
571   vi = (vi>>1) - x[48];     /* Re C - Re D */
572   ur -= x[33];              /* Im A - Im B */
573   ui = (ui>>1) - x[49];     /* Im C - Im D */
574 
575   y[ 2] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
576   y[ 6] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
577   y[ 3] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
578   y[ 7] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
579 
580   //i=8
581   vr = (x[ 8] + x[40])>>1;  /* Re A + Re B */
582   vi = (x[24] + x[56]);     /* Re C + Re D */
583   ur = (x[ 9] + x[41])>>1;  /* Im A + Im B */
584   ui = (x[25] + x[57]);     /* Im C + Im D */
585 
586   y[ 8] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
587   y[12] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
588   y[ 9] = ur + (ui>>1);     /* Im A' = sum of imag values */
589   y[13] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
590 
591   vr -= x[40];              /* Re A - Re B */
592   vi = (vi>>1) - x[56];     /* Re C - Re D */
593   ur -= x[41];              /* Im A - Im B */
594   ui = (ui>>1) - x[57];     /* Im C - Im D */
595 
596   y[10] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
597   y[14] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
598   y[11] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
599   y[15] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
600 
601   //i=16
602   vr = (x[ 4] + x[36])>>1;  /* Re A + Re B */
603   vi = (x[20] + x[52]);     /* Re C + Re D */
604   ur = (x[ 5] + x[37])>>1;  /* Im A + Im B */
605   ui = (x[21] + x[53]);     /* Im C + Im D */
606 
607   y[16] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
608   y[20] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
609   y[17] = ur + (ui>>1);     /* Im A' = sum of imag values */
610   y[21] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
611 
612   vr -= x[36];              /* Re A - Re B */
613   vi = (vi>>1) - x[52];     /* Re C - Re D */
614   ur -= x[37];              /* Im A - Im B */
615   ui = (ui>>1) - x[53];     /* Im C - Im D */
616 
617   y[18] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
618   y[22] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
619   y[19] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
620   y[23] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
621 
622   //i=24
623   vr = (x[12] + x[44])>>1;  /* Re A + Re B */
624   vi = (x[28] + x[60]);     /* Re C + Re D */
625   ur = (x[13] + x[45])>>1;  /* Im A + Im B */
626   ui = (x[29] + x[61]);     /* Im C + Im D */
627 
628   y[24] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
629   y[28] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
630   y[25] = ur + (ui>>1);     /* Im A' = sum of imag values */
631   y[29] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
632 
633   vr -= x[44];              /* Re A - Re B */
634   vi = (vi>>1) - x[60];     /* Re C - Re D */
635   ur -= x[45];              /* Im A - Im B */
636   ui = (ui>>1) - x[61];     /* Im C - Im D */
637 
638   y[26] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
639   y[30] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
640   y[27] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
641   y[31] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
642 
643   // i = 32
644   vr = (x[ 2] + x[34])>>1;  /* Re A + Re B */
645   vi = (x[18] + x[50]);     /* Re C + Re D */
646   ur = (x[ 3] + x[35])>>1;  /* Im A + Im B */
647   ui = (x[19] + x[51]);     /* Im C + Im D */
648 
649   y[32] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
650   y[36] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
651   y[33] = ur + (ui>>1);     /* Im A' = sum of imag values */
652   y[37] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
653 
654   vr -= x[34];              /* Re A - Re B */
655   vi = (vi>>1) - x[50];     /* Re C - Re D */
656   ur -= x[35];              /* Im A - Im B */
657   ui = (ui>>1) - x[51];     /* Im C - Im D */
658 
659   y[34] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
660   y[38] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
661   y[35] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
662   y[39] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
663 
664   //i=40
665   vr = (x[10] + x[42])>>1;  /* Re A + Re B */
666   vi = (x[26] + x[58]);     /* Re C + Re D */
667   ur = (x[11] + x[43])>>1;  /* Im A + Im B */
668   ui = (x[27] + x[59]);     /* Im C + Im D */
669 
670   y[40] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
671   y[44] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
672   y[41] = ur + (ui>>1);     /* Im A' = sum of imag values */
673   y[45] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
674 
675   vr -= x[42];              /* Re A - Re B */
676   vi = (vi>>1) - x[58];     /* Re C - Re D */
677   ur -= x[43];              /* Im A - Im B */
678   ui = (ui>>1) - x[59];     /* Im C - Im D */
679 
680   y[42] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
681   y[46] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
682   y[43] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
683   y[47] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
684 
685   //i=48
686   vr = (x[ 6] + x[38])>>1;  /* Re A + Re B */
687   vi = (x[22] + x[54]);     /* Re C + Re D */
688   ur = (x[ 7] + x[39])>>1;  /* Im A + Im B */
689   ui = (x[23] + x[55]);     /* Im C + Im D */
690 
691   y[48] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
692   y[52] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
693   y[49] = ur + (ui>>1);     /* Im A' = sum of imag values */
694   y[53] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
695 
696   vr -= x[38];              /* Re A - Re B */
697   vi = (vi>>1) - x[54];     /* Re C - Re D */
698   ur -= x[39];              /* Im A - Im B */
699   ui = (ui>>1) - x[55];     /* Im C - Im D */
700 
701   y[50] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
702   y[54] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
703   y[51] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
704   y[55] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
705 
706   //i=56
707   vr = (x[14] + x[46])>>1;  /* Re A + Re B */
708   vi = (x[30] + x[62]);     /* Re C + Re D */
709   ur = (x[15] + x[47])>>1;  /* Im A + Im B */
710   ui = (x[31] + x[63]);     /* Im C + Im D */
711 
712   y[56] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
713   y[60] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
714   y[57] = ur + (ui>>1);     /* Im A' = sum of imag values */
715   y[61] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
716 
717   vr -= x[46];              /* Re A - Re B */
718   vi = (vi>>1) - x[62];     /* Re C - Re D */
719   ur -= x[47];              /* Im A - Im B */
720   ui = (ui>>1) - x[63];     /* Im C - Im D */
721 
722   y[58] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
723   y[62] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
724   y[59] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
725   y[63] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
726 
727 
728   FIXP_DBL *xt = &x[0];
729   FIXP_DBL *yt = &y[0];
730 
731   int j = 4;
732   do
733   {
734     vr = yt[8];
735     vi = yt[9];
736     ur = yt[0]>>1;
737     ui = yt[1]>>1;
738     xt[ 0] = ur + (vr>>1);
739     xt[ 1] = ui + (vi>>1);
740     xt[ 8] = ur - (vr>>1);
741     xt[ 9] = ui - (vi>>1);
742 
743     vr = yt[13];
744     vi = yt[12];
745     ur = yt[4]>>1;
746     ui = yt[5]>>1;
747     xt[ 4] = ur + (vr>>1);
748     xt[ 5] = ui - (vi>>1);
749     xt[12] = ur - (vr>>1);
750     xt[13] = ui + (vi>>1);
751 
752     SUMDIFF_PIFOURTH(vi, vr, yt[10], yt[11])
753     ur = yt[2];
754     ui = yt[3];
755     xt[ 2] = (ur>>1) + vr;
756     xt[ 3] = (ui>>1) + vi;
757     xt[10] = (ur>>1) - vr;
758     xt[11] = (ui>>1) - vi;
759 
760     SUMDIFF_PIFOURTH(vr, vi, yt[14], yt[15])
761     ur = yt[6];
762     ui = yt[7];
763 
764     xt[ 6] = (ur>>1) + vr;
765     xt[ 7] = (ui>>1) - vi;
766     xt[14] = (ur>>1) - vr;
767     xt[15] = (ui>>1) + vi;
768     xt += 16;
769     yt += 16;
770   } while (--j != 0);
771 
772   vr = x[16];
773   vi = x[17];
774   ur = x[ 0]>>1;
775   ui = x[ 1]>>1;
776   x[ 0] = ur + (vr>>1);
777   x[ 1] = ui + (vi>>1);
778   x[16] = ur - (vr>>1);
779   x[17] = ui - (vi>>1);
780 
781   vi = x[24];
782   vr = x[25];
783   ur = x[ 8]>>1;
784   ui = x[ 9]>>1;
785   x[ 8] = ur + (vr>>1);
786   x[ 9] = ui - (vi>>1);
787   x[24] = ur - (vr>>1);
788   x[25] = ui + (vi>>1);
789 
790   vr = x[48];
791   vi = x[49];
792   ur = x[32]>>1;
793   ui = x[33]>>1;
794   x[32] = ur + (vr>>1);
795   x[33] = ui + (vi>>1);
796   x[48] = ur - (vr>>1);
797   x[49] = ui - (vi>>1);
798 
799   vi = x[56];
800   vr = x[57];
801   ur = x[40]>>1;
802   ui = x[41]>>1;
803   x[40] = ur + (vr>>1);
804   x[41] = ui - (vi>>1);
805   x[56] = ur - (vr>>1);
806   x[57] = ui + (vi>>1);
807 
808   cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
809   ur = x[ 2];
810   ui = x[ 3];
811   x[ 2] = (ur>>1) + vr;
812   x[ 3] = (ui>>1) + vi;
813   x[18] = (ur>>1) - vr;
814   x[19] = (ui>>1) - vi;
815 
816   cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
817   ur = x[10];
818   ui = x[11];
819   x[10] = (ur>>1) + vr;
820   x[11] = (ui>>1) - vi;
821   x[26] = (ur>>1) - vr;
822   x[27] = (ui>>1) + vi;
823 
824   cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
825   ur = x[34];
826   ui = x[35];
827   x[34] = (ur>>1) + vr;
828   x[35] = (ui>>1) + vi;
829   x[50] = (ur>>1) - vr;
830   x[51] = (ui>>1) - vi;
831 
832   cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
833   ur = x[42];
834   ui = x[43];
835   x[42] = (ur>>1) + vr;
836   x[43] = (ui>>1) - vi;
837   x[58] = (ur>>1) - vr;
838   x[59] = (ui>>1) + vi;
839 
840   SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
841   ur = x[ 4];
842   ui = x[ 5];
843   x[ 4] = (ur>>1) + vr;
844   x[ 5] = (ui>>1) + vi;
845   x[20] = (ur>>1) - vr;
846   x[21] = (ui>>1) - vi;
847 
848   SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
849   ur = x[12];
850   ui = x[13];
851   x[12] = (ur>>1) + vr;
852   x[13] = (ui>>1) - vi;
853   x[28] = (ur>>1) - vr;
854   x[29] = (ui>>1) + vi;
855 
856   SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
857   ur = x[36];
858   ui = x[37];
859   x[36] = (ur>>1) + vr;
860   x[37] = (ui>>1) + vi;
861   x[52] = (ur>>1) - vr;
862   x[53] = (ui>>1) - vi;
863 
864   SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
865   ur = x[44];
866   ui = x[45];
867   x[44] = (ur>>1) + vr;
868   x[45] = (ui>>1) - vi;
869   x[60] = (ur>>1) - vr;
870   x[61] = (ui>>1) + vi;
871 
872 
873   cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
874   ur = x[ 6];
875   ui = x[ 7];
876   x[ 6] = (ur>>1) + vr;
877   x[ 7] = (ui>>1) + vi;
878   x[22] = (ur>>1) - vr;
879   x[23] = (ui>>1) - vi;
880 
881   cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
882   ur = x[14];
883   ui = x[15];
884   x[14] = (ur>>1) + vr;
885   x[15] = (ui>>1) - vi;
886   x[30] = (ur>>1) - vr;
887   x[31] = (ui>>1) + vi;
888 
889   cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
890   ur = x[38];
891   ui = x[39];
892   x[38] = (ur>>1) + vr;
893   x[39] = (ui>>1) + vi;
894   x[54] = (ur>>1) - vr;
895   x[55] = (ui>>1) - vi;
896 
897   cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
898   ur = x[46];
899   ui = x[47];
900 
901   x[46] = (ur>>1) + vr;
902   x[47] = (ui>>1) - vi;
903   x[62] = (ur>>1) - vr;
904   x[63] = (ui>>1) + vi;
905 
906   vr = x[32];
907   vi = x[33];
908   ur = x[ 0]>>1;
909   ui = x[ 1]>>1;
910   x[ 0] = ur + (vr>>1);
911   x[ 1] = ui + (vi>>1);
912   x[32] = ur - (vr>>1);
913   x[33] = ui - (vi>>1);
914 
915   vi = x[48];
916   vr = x[49];
917   ur = x[16]>>1;
918   ui = x[17]>>1;
919   x[16] = ur + (vr>>1);
920   x[17] = ui - (vi>>1);
921   x[48] = ur - (vr>>1);
922   x[49] = ui + (vi>>1);
923 
924   cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
925   ur = x[ 2];
926   ui = x[ 3];
927   x[ 2] = (ur>>1) + vr;
928   x[ 3] = (ui>>1) + vi;
929   x[34] = (ur>>1) - vr;
930   x[35] = (ui>>1) - vi;
931 
932   cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
933   ur = x[18];
934   ui = x[19];
935   x[18] = (ur>>1) + vr;
936   x[19] = (ui>>1) - vi;
937   x[50] = (ur>>1) - vr;
938   x[51] = (ui>>1) + vi;
939 
940   cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
941   ur = x[ 4];
942   ui = x[ 5];
943   x[ 4] = (ur>>1) + vr;
944   x[ 5] = (ui>>1) + vi;
945   x[36] = (ur>>1) - vr;
946   x[37] = (ui>>1) - vi;
947 
948   cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
949   ur = x[20];
950   ui = x[21];
951   x[20] = (ur>>1) + vr;
952   x[21] = (ui>>1) - vi;
953   x[52] = (ur>>1) - vr;
954   x[53] = (ui>>1) + vi;
955 
956   cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
957   ur = x[ 6];
958   ui = x[ 7];
959   x[ 6] = (ur>>1) + vr;
960   x[ 7] = (ui>>1) + vi;
961   x[38] = (ur>>1) - vr;
962   x[39] = (ui>>1) - vi;
963 
964   cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
965   ur = x[22];
966   ui = x[23];
967   x[22] = (ur>>1) + vr;
968   x[23] = (ui>>1) - vi;
969   x[54] = (ur>>1) - vr;
970   x[55] = (ui>>1) + vi;
971 
972   SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
973   ur = x[ 8];
974   ui = x[ 9];
975   x[ 8] = (ur>>1) + vr;
976   x[ 9] = (ui>>1) + vi;
977   x[40] = (ur>>1) - vr;
978   x[41] = (ui>>1) - vi;
979 
980   SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
981   ur = x[24];
982   ui = x[25];
983   x[24] = (ur>>1) + vr;
984   x[25] = (ui>>1) - vi;
985   x[56] = (ur>>1) - vr;
986   x[57] = (ui>>1) + vi;
987 
988   cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
989   ur = x[10];
990   ui = x[11];
991 
992   x[10] = (ur>>1) + vr;
993   x[11] = (ui>>1) + vi;
994   x[42] = (ur>>1) - vr;
995   x[43] = (ui>>1) - vi;
996 
997   cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
998   ur = x[26];
999   ui = x[27];
1000   x[26] = (ur>>1) + vr;
1001   x[27] = (ui>>1) - vi;
1002   x[58] = (ur>>1) - vr;
1003   x[59] = (ui>>1) + vi;
1004 
1005   cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
1006   ur = x[12];
1007   ui = x[13];
1008   x[12] = (ur>>1) + vr;
1009   x[13] = (ui>>1) + vi;
1010   x[44] = (ur>>1) - vr;
1011   x[45] = (ui>>1) - vi;
1012 
1013   cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
1014   ur = x[28];
1015   ui = x[29];
1016   x[28] = (ur>>1) + vr;
1017   x[29] = (ui>>1) - vi;
1018   x[60] = (ur>>1) - vr;
1019   x[61] = (ui>>1) + vi;
1020 
1021   cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
1022   ur = x[14];
1023   ui = x[15];
1024   x[14] = (ur>>1) + vr;
1025   x[15] = (ui>>1) + vi;
1026   x[46] = (ur>>1) - vr;
1027   x[47] = (ui>>1) - vi;
1028 
1029   cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
1030   ur = x[30];
1031   ui = x[31];
1032   x[30] = (ur>>1) + vr;
1033   x[31] = (ui>>1) - vi;
1034   x[62] = (ur>>1) - vr;
1035   x[63] = (ui>>1) + vi;
1036 }
1037 #endif /* #ifndef FUNCTION_fft_32 */
1038 
1039 
1040 /**
1041  * \brief Apply rotation vectors to a data buffer.
1042  * \param cl length of each row of input data.
1043  * \param l total length of input data.
1044  * \param pVecRe real part of rotation ceofficient vector.
1045  * \param pVecIm imaginary part of rotation ceofficient vector.
1046  */
fft_apply_rot_vector(FIXP_DBL * RESTRICT pData,const int cl,const int l,const FIXP_STB * pVecRe,const FIXP_STB * pVecIm)1047 static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl, const int l, const FIXP_STB *pVecRe, const FIXP_STB *pVecIm)
1048 {
1049   FIXP_DBL re, im;
1050   FIXP_STB vre, vim;
1051 
1052   int i, c;
1053 
1054   for(i=0; i<cl; i++) {
1055     re  = pData[2*i];
1056     im  = pData[2*i+1];
1057 
1058     pData[2*i]   = re>>2; /* * 0.25 */
1059     pData[2*i+1] = im>>2; /* * 0.25 */
1060   }
1061   for(; i<l; i+=cl)
1062   {
1063     re  = pData[2*i];
1064     im  = pData[2*i+1];
1065 
1066     pData[2*i]   = re>>2; /* * 0.25 */
1067     pData[2*i+1] = im>>2; /* * 0.25 */
1068 
1069     for (c=i+1; c<i+cl; c++)
1070     {
1071       re  = pData[2*c]>>1;
1072       im  = pData[2*c+1]>>1;
1073       vre = *pVecRe++;
1074       vim = *pVecIm++;
1075 
1076       cplxMultDiv2(&pData[2*c+1], &pData[2*c], im, re, vre, vim);
1077     }
1078   }
1079 }
1080 
1081 #define FFT_TWO_STAGE_MACRO_ENABLE
1082 
1083 
1084 #ifdef FFT_TWO_STAGE_MACRO_ENABLE
1085 
1086 #define fftN2(pInput, length, dim1, dim2, fft_func1, fft_func2, RotVectorReal, RotVectorImag) \
1087 { \
1088   int       i, j; \
1089  \
1090   C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2); \
1091   C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2); \
1092  \
1093   FDK_ASSERT(length == dim1*dim2); \
1094  \
1095   /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the \
1096   output samples are at the address of pDst. The input vector for the fft of length dim1 is built \
1097   of the interleaved samples in pSrc, the output samples are stored consecutively. \
1098   */ \
1099   { \
1100     const FIXP_DBL* pSrc = pInput; \
1101     FIXP_DBL  *RESTRICT pDst = aDst; \
1102     \
1103     for(i=0; i<dim2; i++) \
1104     { \
1105       for(j=0; j<dim1; j++) \
1106       { \
1107         pDst[2*j]   = pSrc[2*j*dim2]; \
1108         pDst[2*j+1] = pSrc[2*j*dim2+1]; \
1109       } \
1110       \
1111       fft_func1(pDst); \
1112       pSrc += 2; \
1113       pDst = pDst + 2*dim1; \
1114     } \
1115   } \
1116   \
1117   /* Perform the modulation of the output of the fft of length dim1 */ \
1118   fft_apply_rot_vector(aDst, dim1, length, RotVectorReal, RotVectorImag); \
1119   \
1120   /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the \
1121   output samples are at the address of pInput. The input vector for the fft of length dim2 is built \
1122   of the interleaved samples in aDst, the output samples are stored consecutively at the address \
1123   of pInput. \
1124   */ \
1125   { \
1126     const FIXP_DBL* pSrc       = aDst; \
1127     FIXP_DBL *RESTRICT pDst    = aDst2; \
1128     FIXP_DBL *RESTRICT pDstOut = pInput; \
1129     \
1130     for(i=0; i<dim1; i++) \
1131     { \
1132       for(j=0; j<dim2; j++) \
1133       { \
1134         pDst[2*j]   = pSrc[2*j*dim1]; \
1135         pDst[2*j+1] = pSrc[2*j*dim1+1]; \
1136       } \
1137       \
1138       fft_func2(pDst); \
1139       \
1140       for(j=0; j<dim2; j++) \
1141       { \
1142         pDstOut[2*j*dim1]   = pDst[2*j]; \
1143         pDstOut[2*j*dim1+1] = pDst[2*j+1]; \
1144       } \
1145       pSrc += 2; \
1146       pDstOut += 2; \
1147     } \
1148   } \
1149   \
1150   C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2); \
1151   C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2); \
1152 } \
1153 
1154 #else /* FFT_TWO_STAGE_MACRO_ENABLE */
1155 
1156 /* select either switch case of function pointer. */
1157 //#define FFT_TWO_STAGE_SWITCH_CASE
1158 
fftN2(FIXP_DBL * pInput,const int length,const int dim1,const int dim2,void (* const fft1)(FIXP_DBL *),void (* const fft2)(FIXP_DBL *),const FIXP_STB * RotVectorReal,const FIXP_STB * RotVectorImag)1159 static inline void fftN2(
1160         FIXP_DBL *pInput,
1161         const int length,
1162         const int dim1,
1163         const int dim2,
1164         void (* const fft1)(FIXP_DBL *),
1165         void (* const fft2)(FIXP_DBL *),
1166         const FIXP_STB *RotVectorReal,
1167         const FIXP_STB *RotVectorImag
1168         )
1169 {
1170   /* The real part of the input samples are at the addresses with even indices and the imaginary
1171   part of the input samples are at the addresses with odd indices. The output samples are stored
1172   at the address of pInput
1173   */
1174   FIXP_DBL  *pSrc, *pDst, *pDstOut;
1175   int       i, j;
1176 
1177   C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2);
1178   C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2);
1179 
1180   FDK_ASSERT(length == dim1*dim2);
1181 
1182   /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the
1183   output samples are at the address of pDst. The input vector for the fft of length dim1 is built
1184   of the interleaved samples in pSrc, the output samples are stored consecutively.
1185   */
1186   pSrc = pInput;
1187   pDst = aDst;
1188   for(i=0; i<length/dim1; i++)
1189   {
1190     for(j=0; j<length/dim2; j++)
1191     {
1192       pDst[2*j]   = pSrc[2*j*dim2];
1193       pDst[2*j+1] = pSrc[2*j*dim2+1];
1194     }
1195 
1196     /* fft of size dim1 */
1197 #ifndef FFT_TWO_STAGE_SWITCH_CASE
1198     fft1(pDst);
1199 #else
1200     switch (dim1) {
1201       case 3: fft3(pDst); break;
1202       case 4: fft_4(pDst); break;
1203       case 5: fft5(pDst); break;
1204       case 8: fft_8(pDst); break;
1205       case 15: fft15(pDst); break;
1206       case 16: fft_16(pDst); break;
1207       case 32: fft_32(pDst); break;
1208       /*case 64: fft_64(pDst); break;*/
1209       case 128: fft_128(pDst); break;
1210     }
1211 #endif
1212     pSrc += 2;
1213     pDst = pDst + 2*length/dim2;
1214   }
1215 
1216   /* Perform the modulation of the output of the fft of length dim1 */
1217   pSrc=aDst;
1218   fft_apply_rot_vector(pSrc, length/dim2, length, RotVectorReal, RotVectorImag);
1219 
1220   /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the
1221   output samples are at the address of pInput. The input vector for the fft of length dim2 is built
1222   of the interleaved samples in aDst, the output samples are stored consecutively at the address
1223   of pInput.
1224   */
1225   pSrc    = aDst;
1226   pDst    = aDst2;
1227   pDstOut = pInput;
1228   for(i=0; i<length/dim2; i++)
1229   {
1230     for(j=0; j<length/dim1; j++)
1231     {
1232       pDst[2*j]   = pSrc[2*j*dim1];
1233       pDst[2*j+1] = pSrc[2*j*dim1+1];
1234     }
1235 
1236 #ifndef FFT_TWO_STAGE_SWITCH_CASE
1237     fft2(pDst);
1238 #else
1239     switch (dim2) {
1240       case 3: fft3(pDst); break;
1241       case 4: fft_4(pDst); break;
1242       case 5: fft5(pDst); break;
1243       case 8: fft_8(pDst); break;
1244       case 15: fft15(pDst); break;
1245       case 16: fft_16(pDst); break;
1246       case 32: fft_32(pDst); break;
1247       /*case 64: fft_64(pDst); break;*/
1248       case 128: fft_128(pDst); break;
1249     }
1250 #endif
1251 
1252     for(j=0; j<length/dim1; j++)
1253     {
1254       pDstOut[2*j*dim1]   = pDst[2*j];
1255       pDstOut[2*j*dim1+1] = pDst[2*j+1];
1256     }
1257     pSrc += 2;
1258     pDstOut += 2;
1259   }
1260 
1261   C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2);
1262   C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2);
1263 }
1264 
1265 #endif /* FFT_TWO_STAGE_MACRO_ENABLE */
1266 
1267 
1268 
1269 
1270 
1271 
1272 
1273 
1274 
1275 
1276 
1277 
1278 #define SCALEFACTOR60         5
1279 /**
1280 The function performs the fft of length 60. It is splittet into fft's of length 4 and fft's of
1281 length 15. Between the fft's a modolation is calculated.
1282 */
fft60(FIXP_DBL * pInput,INT * pScalefactor)1283 static inline void fft60(FIXP_DBL *pInput, INT *pScalefactor)
1284 {
1285   fftN2(
1286           pInput, 60, 4, 15,
1287           fft_4, fft15,
1288           RotVectorReal60, RotVectorImag60
1289           );
1290   *pScalefactor += SCALEFACTOR60;
1291 }
1292 
1293 
1294 
1295 /* Fallback implementation in case of no better implementation available. */
1296 
1297 #define SCALEFACTOR240        7
1298 
1299 /**
1300 The function performs the fft of length 240. It is splittet into fft's of length 16 and fft's of
1301 length 15. Between the fft's a modulation is calculated.
1302 */
fft240(FIXP_DBL * pInput,INT * pScalefactor)1303 static inline void fft240(FIXP_DBL *pInput, INT *pScalefactor)
1304 {
1305   fftN2(
1306           pInput, 240, 16, 15,
1307           fft_16, fft15,
1308           RotVectorReal240, RotVectorImag240
1309           );
1310   *pScalefactor += SCALEFACTOR240;
1311 }
1312 
1313 
1314 #define SCALEFACTOR480        8
1315 #define N32                   32
1316 #define TABLE_SIZE_16        (32/2)
1317 
1318 /**
1319 The function performs the fft of length 480. It is splittet into fft's of length 32 and fft's of
1320 length 15. Between the fft's a modulation is calculated.
1321 */
fft480(FIXP_DBL * pInput,INT * pScalefactor)1322 static inline void fft480(FIXP_DBL *pInput, INT *pScalefactor)
1323 {
1324   fftN2(
1325           pInput, 480, 32, 15,
1326           fft_32, fft15,
1327           RotVectorReal480, RotVectorImag480
1328           );
1329   *pScalefactor += SCALEFACTOR480;
1330 }
1331 
fft(int length,FIXP_DBL * pInput,INT * pScalefactor)1332 void fft(int length, FIXP_DBL *pInput, INT *pScalefactor)
1333 {
1334   if (length == 32)
1335   {
1336       fft_32(pInput);
1337       *pScalefactor += SCALEFACTOR32;
1338   }
1339   else
1340   {
1341 
1342   switch (length) {
1343     case 16:
1344       fft_16(pInput);
1345       *pScalefactor += SCALEFACTOR16;
1346       break;
1347     case 8:
1348       fft_8(pInput);
1349       *pScalefactor += SCALEFACTOR8;
1350       break;
1351     case 3:
1352       fft3(pInput);
1353       break;
1354     case 4:
1355       fft_4(pInput);
1356       *pScalefactor += SCALEFACTOR4;
1357       break;
1358     case 5:
1359       fft5(pInput);
1360       break;
1361     case 15:
1362       fft15(pInput);
1363       *pScalefactor += 2;
1364       break;
1365     case 60:
1366       fft60(pInput, pScalefactor);
1367       break;
1368     case 64:
1369       dit_fft(pInput, 6, SineTable512, 512);
1370       *pScalefactor += SCALEFACTOR64;
1371       break;
1372     case 240:
1373       fft240(pInput, pScalefactor);
1374       break;
1375     case 256:
1376       dit_fft(pInput, 8, SineTable512, 512);
1377       *pScalefactor += SCALEFACTOR256;
1378       break;
1379     case 480:
1380       fft480(pInput, pScalefactor);
1381       break;
1382     case 512:
1383       dit_fft(pInput, 9, SineTable512, 512);
1384       *pScalefactor += SCALEFACTOR512;
1385       break;
1386     default:
1387       FDK_ASSERT(0); /* FFT length not supported! */
1388       break;
1389   }
1390   }
1391 }
1392 
1393 
ifft(int length,FIXP_DBL * pInput,INT * scalefactor)1394 void ifft(int length, FIXP_DBL *pInput, INT *scalefactor)
1395 {
1396   switch (length) {
1397     default:
1398       FDK_ASSERT(0); /* IFFT length not supported! */
1399       break;
1400   }
1401 }
1402 
1403 
1404