1 
2 /* -----------------------------------------------------------------------------------------------------------
3 Software License for The Fraunhofer FDK AAC Codec Library for Android
4 
5 � Copyright  1995 - 2013 Fraunhofer-Gesellschaft zur F�rderung der angewandten Forschung e.V.
6   All rights reserved.
7 
8  1.    INTRODUCTION
9 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
10 the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
11 This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
12 
13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
14 audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
15 independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
16 of the MPEG specifications.
17 
18 Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
19 may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
20 individually for the purpose of encoding or decoding bit streams in products that are compliant with
21 the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
22 these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
23 software may already be covered under those patent licenses when it is used for those licensed purposes only.
24 
25 Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
26 are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
27 applications information and documentation.
28 
29 2.    COPYRIGHT LICENSE
30 
31 Redistribution and use in source and binary forms, with or without modification, are permitted without
32 payment of copyright license fees provided that you satisfy the following conditions:
33 
34 You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
35 your modifications thereto in source code form.
36 
37 You must retain the complete text of this software license in the documentation and/or other materials
38 provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
39 You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
40 modifications thereto to recipients of copies in binary form.
41 
42 The name of Fraunhofer may not be used to endorse or promote products derived from this library without
43 prior written permission.
44 
45 You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
46 software or your modifications thereto.
47 
48 Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
49 and the date of any change. For modified versions of the FDK AAC Codec, the term
50 "Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
51 "Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
52 
53 3.    NO PATENT LICENSE
54 
55 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
56 ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
57 respect to this software.
58 
59 You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
60 by appropriate patent licenses.
61 
62 4.    DISCLAIMER
63 
64 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
65 "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
66 of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
67 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
68 including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
69 or business interruption, however caused and on any theory of liability, whether in contract, strict
70 liability, or tort (including negligence), arising in any way out of the use of this software, even if
71 advised of the possibility of such damage.
72 
73 5.    CONTACT INFORMATION
74 
75 Fraunhofer Institute for Integrated Circuits IIS
76 Attention: Audio and Multimedia Departments - FDK AAC LL
77 Am Wolfsmantel 33
78 91058 Erlangen, Germany
79 
80 www.iis.fraunhofer.de/amm
81 amm-info@iis.fraunhofer.de
82 ----------------------------------------------------------------------------------------------------------- */
83 
84 /*!
85   \file   dct.cpp
86   \brief  DCT Implementations
87   Library functions to calculate standard DCTs. This will most likely be replaced by hand-optimized
88   functions for the specific target processor.
89 
90   Three different implementations of the dct type II and the dct type III transforms are provided.
91 
92   By default implementations which are based on a single, standard complex FFT-kernel are used (dctII_f() and dctIII_f()).
93   These are specifically helpful in cases where optimized FFT libraries are already available. The FFT used in these
94   implementation is FFT rad2 from FDK_tools.
95 
96   Of course, one might also use DCT-libraries should they be available. The DCT and DST
97   type IV implementations are only available in a version based on a complex FFT kernel.
98 */
99 
100 #include "dct.h"
101 
102 
103 #include "FDK_tools_rom.h"
104 #include "fft.h"
105 
106 
107 #if defined(__arm__)
108 #include "arm/dct_arm.cpp"
109 #endif
110 
111 
112 #if !defined(FUNCTION_dct_III)
dct_III(FIXP_DBL * pDat,FIXP_DBL * tmp,int L,int * pDat_e)113 void dct_III(FIXP_DBL *pDat, /*!< pointer to input/output */
114              FIXP_DBL *tmp,  /*!< pointer to temporal working buffer */
115              int L,          /*!< lenght of transform */
116              int *pDat_e
117              )
118 {
119   FDK_ASSERT(L == 64 || L == 32);
120   int  i;
121   FIXP_DBL xr, accu1, accu2;
122   int inc;
123   int M = L>>1;
124   int ld_M;
125 
126   if (L == 64)  ld_M = 5;
127   else          ld_M = 4;
128 
129   /* This loop performs multiplication for index i (i*inc) */
130   inc = (64/2) >> ld_M; /* 64/L */
131 
132   FIXP_DBL *pTmp_0 = &tmp[2];
133   FIXP_DBL *pTmp_1 = &tmp[(M-1)*2];
134 
135   for(i=1; i<M>>1; i++,pTmp_0+=2,pTmp_1-=2) {
136 
137     FIXP_DBL accu3,accu4,accu5,accu6;
138 
139     cplxMultDiv2(&accu2, &accu1, pDat[L - i], pDat[i], sin_twiddle_L64[i*inc]);
140     cplxMultDiv2(&accu4, &accu3, pDat[M+i], pDat[M-i], sin_twiddle_L64[(M-i)*inc]);
141     accu3 >>= 1; accu4 >>= 1;
142 
143     /* This method is better for ARM926, that uses operand2 shifted right by 1 always */
144     cplxMultDiv2(&accu6, &accu5, (accu3 - (accu1>>1)), ((accu2>>1) + accu4), sin_twiddle_L64[(4*i)*inc]);
145     xr = (accu1>>1) + accu3;
146     pTmp_0[0] = (xr>>1) - accu5;
147     pTmp_1[0] = (xr>>1) + accu5;
148 
149     xr = (accu2>>1) - accu4;
150     pTmp_0[1] =  (xr>>1) - accu6;
151     pTmp_1[1] = -((xr>>1) + accu6);
152 
153   }
154 
155   xr     = fMultDiv2(pDat[M], sin_twiddle_L64[64/2].v.re );/* cos((PI/(2*L))*M); */
156   tmp[0] = ((pDat[0]>>1) + xr)>>1;
157   tmp[1] = ((pDat[0]>>1) - xr)>>1;
158 
159   cplxMultDiv2(&accu2, &accu1, pDat[L - (M/2)], pDat[M/2], sin_twiddle_L64[64/4]);
160   tmp[M]   = accu1>>1;
161   tmp[M+1] = accu2>>1;
162 
163   /* dit_fft expects 1 bit scaled input values */
164   fft(M, tmp, pDat_e);
165 
166   /* ARM926: 12 cycles per 2-iteration, no overhead code by compiler */
167   pTmp_1 = &tmp[L];
168   for (i = M>>1; i--;)
169   {
170     FIXP_DBL tmp1, tmp2, tmp3, tmp4;
171     tmp1 = *tmp++;
172     tmp2 = *tmp++;
173     tmp3 = *--pTmp_1;
174     tmp4 = *--pTmp_1;
175     *pDat++ = tmp1;
176     *pDat++ = tmp3;
177     *pDat++ = tmp2;
178     *pDat++ = tmp4;
179   }
180 
181   *pDat_e += 2;
182 }
183 #endif
184 
185 #if !defined(FUNCTION_dct_II)
dct_II(FIXP_DBL * pDat,FIXP_DBL * tmp,int L,int * pDat_e)186 void dct_II(FIXP_DBL *pDat, /*!< pointer to input/output */
187             FIXP_DBL *tmp,  /*!< pointer to temporal working buffer */
188             int L,          /*!< lenght of transform */
189             int *pDat_e
190             )
191 {
192     FDK_ASSERT(L == 64 || L == 32);
193     FIXP_DBL accu1,accu2;
194     FIXP_DBL *pTmp_0, *pTmp_1;
195 
196     int i;
197     int inc;
198     int M =  L>>1;
199     int ld_M;
200 
201     FDK_ASSERT(L == 64 || L == 32);
202     ld_M = 4 + (L >> 6);  /* L=64: 5,  L=32: 4 */
203 
204     inc = (64/2) >> ld_M; /* L=64: 1,  L=32: 2 */
205 
206     FIXP_DBL *pdat  = &pDat[0];
207     FIXP_DBL accu3, accu4;
208     pTmp_0 = &tmp[0];
209     pTmp_1 = &tmp[L-1];
210     for (i = M>>1; i--; )
211     {
212       accu1 = *pdat++;
213       accu2 = *pdat++;
214       accu3 = *pdat++;
215       accu4 = *pdat++;
216       accu1 >>= 1;
217       accu2 >>= 1;
218       accu3 >>= 1;
219       accu4 >>= 1;
220       *pTmp_0++ = accu1;
221       *pTmp_0++ = accu3;
222       *pTmp_1-- = accu2;
223       *pTmp_1-- = accu4;
224     }
225 
226 
227     fft(M, tmp, pDat_e);
228 
229     pTmp_0 = &tmp[2];
230     pTmp_1 = &tmp[(M-1)*2];
231 
232     for (i=1; i<M>>1; i++,pTmp_0+=2,pTmp_1-=2) {
233 
234       FIXP_DBL a1,a2;
235       FIXP_DBL accu3, accu4;
236 
237       a1 = ((pTmp_0[1]>>1) + (pTmp_1[1]>>1));
238       a2 = ((pTmp_1[0]>>1) - (pTmp_0[0]>>1));
239 
240       cplxMultDiv2(&accu1, &accu2, a2, a1, sin_twiddle_L64[(4*i)*inc]);
241       accu1<<=1; accu2<<=1;
242 
243       a1 = ((pTmp_0[0]>>1) + (pTmp_1[0]>>1));
244       a2 = ((pTmp_0[1]>>1) - (pTmp_1[1]>>1));
245 
246       cplxMultDiv2(&accu3, &accu4, (a1 + accu2), -(accu1 + a2), sin_twiddle_L64[i*inc]);
247       pDat[L - i] = accu4;
248       pDat[i]     = accu3;
249 
250       cplxMultDiv2(&accu3, &accu4, (a1 - accu2), -(accu1 - a2), sin_twiddle_L64[(M-i)*inc]);
251       pDat[M + i] = accu4;
252       pDat[M - i] = accu3;
253 
254     }
255 
256     cplxMultDiv2(&accu1, &accu2, tmp[M], tmp[M+1], sin_twiddle_L64[(M/2)*inc]);
257     pDat[L - (M/2)] = accu2;
258     pDat[M/2]       = accu1;
259 
260     pDat[0] = (tmp[0]>>1)+(tmp[1]>>1);
261     pDat[M] =  fMult(((tmp[0]>>1)-(tmp[1]>>1)), sin_twiddle_L64[64/2].v.re);/* cos((PI/(2*L))*M); */
262 
263     *pDat_e += 2;
264 }
265 #endif
266 
267 static
getTables(const FIXP_WTP ** twiddle,const FIXP_STP ** sin_twiddle,int * sin_step,int length)268 void getTables(const FIXP_WTP **twiddle, const FIXP_STP **sin_twiddle, int *sin_step, int length)
269 {
270   int ld2_length;
271 
272  /* Get ld2 of length - 2 + 1
273      -2: because first table entry is window of size 4
274      +1: because we already include +1 because of ceil(log2(length)) */
275   ld2_length = DFRACT_BITS-1-fNormz((FIXP_DBL)length) - 1;
276 
277   /* Extract sort of "eigenvalue" (the 4 left most bits) of length. */
278   switch ( (length) >> (ld2_length-1) ) {
279     case 0x4: /* radix 2 */
280       *sin_twiddle = SineTable512;
281       *sin_step = 1<<(9 - ld2_length);
282       *twiddle = windowSlopes[0][0][ld2_length-1];
283       break;
284     case 0x7: /* 10 ms */
285       *sin_twiddle = SineTable480;
286       *sin_step = 1<<(8 - ld2_length);
287       *twiddle = windowSlopes[0][1][ld2_length];
288       break;
289     default:
290       *sin_twiddle = NULL;
291       *sin_step = 0;
292       *twiddle = NULL;
293       break;
294   }
295 
296   FDK_ASSERT(*twiddle != NULL);
297 
298   FDK_ASSERT(*sin_step > 0);
299 
300 }
301 
302 #if !defined(FUNCTION_dct_IV)
303 
dct_IV(FIXP_DBL * pDat,int L,int * pDat_e)304 void dct_IV(FIXP_DBL *pDat,
305             int L,
306             int *pDat_e)
307 {
308   int sin_step = 0;
309   int M = L >> 1;
310 
311   const FIXP_WTP *twiddle;
312   const FIXP_STP *sin_twiddle;
313 
314   FDK_ASSERT(L >= 4);
315 
316   getTables(&twiddle, &sin_twiddle, &sin_step, L);
317 
318 #ifdef FUNCTION_dct_IV_func1
319   if (M>=4 && (M&3) == 0) {
320      /* ARM926: 44 cycles for 2 iterations = 22 cycles/iteration */
321     dct_IV_func1(M>>2, twiddle,  &pDat[0], &pDat[L-1]);
322   } else
323 #endif /* FUNCTION_dct_IV_func1 */
324   {
325     FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
326     FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
327     register int i;
328 
329     /* 29 cycles on ARM926 */
330     for (i = 0; i < M-1; i+=2,pDat_0+=2,pDat_1-=2)
331     {
332       register FIXP_DBL accu1,accu2,accu3,accu4;
333 
334       accu1 = pDat_1[1]; accu2 = pDat_0[0];
335       accu3 = pDat_0[1]; accu4 = pDat_1[0];
336 
337       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
338       cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i+1]);
339 
340       pDat_0[0] = accu2; pDat_0[1] = accu1;
341       pDat_1[0] = accu4; pDat_1[1] = -accu3;
342     }
343     if (M&1)
344     {
345       register FIXP_DBL accu1,accu2;
346 
347       accu1 = pDat_1[1]; accu2 = pDat_0[0];
348 
349       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
350 
351       pDat_0[0] = accu2; pDat_0[1] = accu1;
352     }
353   }
354 
355   fft(M, pDat, pDat_e);
356 
357 #ifdef FUNCTION_dct_IV_func2
358   if (M>=4 && (M&3) == 0) {
359      /* ARM926: 42 cycles for 2 iterations = 21 cycles/iteration */
360     dct_IV_func2(M>>2, sin_twiddle, &pDat[0], &pDat[L], sin_step);
361   } else
362 #endif /* FUNCTION_dct_IV_func2 */
363   {
364     FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
365     FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
366     register FIXP_DBL accu1,accu2,accu3,accu4;
367     int idx, i;
368 
369     /* Sin and Cos values are 0.0f and 1.0f */
370     accu1 = pDat_1[0];
371     accu2 = pDat_1[1];
372 
373     pDat_1[1] = -(pDat_0[1]>>1);
374     pDat_0[0] = (pDat_0[0]>>1);
375 
376 
377     /* 28 cycles for ARM926 */
378     for (idx = sin_step,i=1; i<(M+1)>>1; i++, idx+=sin_step)
379     {
380       FIXP_STP twd = sin_twiddle[idx];
381       cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
382       pDat_0[1] =  accu3;
383       pDat_1[0] =  accu4;
384 
385       pDat_0+=2;
386       pDat_1-=2;
387 
388       cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
389 
390       accu1 = pDat_1[0];
391       accu2 = pDat_1[1];
392 
393       pDat_1[1] = -accu3;
394       pDat_0[0] =  accu4;
395     }
396 
397     if ( (M&1) == 0 )
398     {
399       /* Last Sin and Cos value pair are the same */
400       accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
401       accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
402 
403       pDat_1[0] = accu1 + accu2;
404       pDat_0[1] = accu1 - accu2;
405     }
406   }
407 
408   /* Add twiddeling scale. */
409   *pDat_e += 2;
410 }
411 #endif /* defined (FUNCTION_dct_IV) */
412 
413 #if !defined(FUNCTION_dst_IV)
dst_IV(FIXP_DBL * pDat,int L,int * pDat_e)414 void dst_IV(FIXP_DBL *pDat,
415             int L,
416             int *pDat_e )
417 {
418   int sin_step = 0;
419   int M = L >> 1;
420 
421   const FIXP_WTP *twiddle;
422   const FIXP_STP *sin_twiddle;
423 
424 #ifdef DSTIV2_ENABLE
425   if (L == 2) {
426     const FIXP_STP tab = STCP(0x7641AF3D, 0x30FB9452);
427     FIXP_DBL tmp1, tmp2;
428 
429     cplxMultDiv2(&tmp2, &tmp1, pDat[0], pDat[1], tab);
430 
431     pDat[0] = tmp1;
432     pDat[1] = tmp2;
433 
434     *pDat_e += 1;
435 
436     return;
437   }
438 #else
439   FDK_ASSERT(L >= 4);
440 #endif
441 
442   getTables(&twiddle, &sin_twiddle, &sin_step, L);
443 
444 #ifdef FUNCTION_dst_IV_func1
445   if ( (M>=4) && ((M&3) == 0) ) {
446     dst_IV_func1(M, twiddle, &pDat[0], &pDat[L]);
447   } else
448 #endif
449   {
450     FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
451     FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
452 
453     register int i;
454 
455     /* 34 cycles on ARM926 */
456     for (i = 0; i < M-1; i+=2,pDat_0+=2,pDat_1-=2)
457     {
458       register FIXP_DBL accu1,accu2,accu3,accu4;
459 
460       accu1 =  pDat_1[1]; accu2 = -pDat_0[0];
461       accu3 =  pDat_0[1]; accu4 = -pDat_1[0];
462 
463       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
464       cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i+1]);
465 
466       pDat_0[0] = accu2; pDat_0[1] = accu1;
467       pDat_1[0] = accu4; pDat_1[1] = -accu3;
468     }
469     if (M&1)
470     {
471       register FIXP_DBL accu1,accu2;
472 
473       accu1 =  pDat_1[1]; accu2 = -pDat_0[0];
474 
475       cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
476 
477       pDat_0[0] = accu2; pDat_0[1] = accu1;
478     }
479   }
480 
481   fft(M, pDat, pDat_e);
482 
483 #ifdef FUNCTION_dst_IV_func2
484   if ( (M>=4) && ((M&3) == 0) ) {
485     dst_IV_func2(M>>2, sin_twiddle + sin_step, &pDat[0], &pDat[L - 1], sin_step);
486   } else
487 #endif /* FUNCTION_dst_IV_func2 */
488   {
489     FIXP_DBL *RESTRICT pDat_0;
490     FIXP_DBL *RESTRICT pDat_1;
491     register FIXP_DBL accu1,accu2,accu3,accu4;
492     int idx, i;
493 
494     pDat_0 = &pDat[0];
495     pDat_1 = &pDat[L - 2];
496 
497     /* Sin and Cos values are 0.0f and 1.0f */
498     accu1 = pDat_1[0];
499     accu2 = pDat_1[1];
500 
501     pDat_1[1] = -(pDat_0[0]>>1);
502     pDat_0[0] = (pDat_0[1]>>1);
503 
504     for (idx = sin_step,i=1; i<(M+1)>>1; i++, idx+=sin_step)
505     {
506       FIXP_STP twd = sin_twiddle[idx];
507 
508       cplxMultDiv2(&accu3, &accu4, accu1, accu2, twd);
509       pDat_1[0] =  -accu3;
510       pDat_0[1] =  -accu4;
511 
512       pDat_0+=2;
513       pDat_1-=2;
514 
515       cplxMultDiv2(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
516 
517       accu1 = pDat_1[0];
518       accu2 = pDat_1[1];
519 
520       pDat_0[0] =  accu3;
521       pDat_1[1] = -accu4;
522     }
523 
524     if ( (M&1) == 0 )
525     {
526       /* Last Sin and Cos value pair are the same */
527       accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
528       accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
529 
530       pDat_0[1] = - accu1 - accu2;
531       pDat_1[0] =   accu2 - accu1;
532     }
533   }
534 
535   /* Add twiddeling scale. */
536   *pDat_e += 2;
537 }
538 #endif /* !defined(FUNCTION_dst_IV) */
539 
540 
541