1 
2 /* -----------------------------------------------------------------------------------------------------------
3 Software License for The Fraunhofer FDK AAC Codec Library for Android
4 
5 � Copyright  1995 - 2013 Fraunhofer-Gesellschaft zur F�rderung der angewandten Forschung e.V.
6   All rights reserved.
7 
8  1.    INTRODUCTION
9 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
10 the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
11 This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
12 
13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
14 audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
15 independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
16 of the MPEG specifications.
17 
18 Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
19 may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
20 individually for the purpose of encoding or decoding bit streams in products that are compliant with
21 the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
22 these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
23 software may already be covered under those patent licenses when it is used for those licensed purposes only.
24 
25 Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
26 are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
27 applications information and documentation.
28 
29 2.    COPYRIGHT LICENSE
30 
31 Redistribution and use in source and binary forms, with or without modification, are permitted without
32 payment of copyright license fees provided that you satisfy the following conditions:
33 
34 You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
35 your modifications thereto in source code form.
36 
37 You must retain the complete text of this software license in the documentation and/or other materials
38 provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
39 You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
40 modifications thereto to recipients of copies in binary form.
41 
42 The name of Fraunhofer may not be used to endorse or promote products derived from this library without
43 prior written permission.
44 
45 You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
46 software or your modifications thereto.
47 
48 Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
49 and the date of any change. For modified versions of the FDK AAC Codec, the term
50 "Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
51 "Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
52 
53 3.    NO PATENT LICENSE
54 
55 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
56 ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
57 respect to this software.
58 
59 You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
60 by appropriate patent licenses.
61 
62 4.    DISCLAIMER
63 
64 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
65 "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
66 of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
67 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
68 including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
69 or business interruption, however caused and on any theory of liability, whether in contract, strict
70 liability, or tort (including negligence), arising in any way out of the use of this software, even if
71 advised of the possibility of such damage.
72 
73 5.    CONTACT INFORMATION
74 
75 Fraunhofer Institute for Integrated Circuits IIS
76 Attention: Audio and Multimedia Departments - FDK AAC LL
77 Am Wolfsmantel 33
78 91058 Erlangen, Germany
79 
80 www.iis.fraunhofer.de/amm
81 amm-info@iis.fraunhofer.de
82 ----------------------------------------------------------------------------------------------------------- */
83 
84 
85 
86 #ifdef FUNCTION_dct_IV_func1
87 
88 /*
89    Note: This assembler routine is here, because the ARM926 compiler does
90          not encode the inline assembler with optimal speed.
91          With this version, we save 2 cycles per loop iteration.
92 */
93 
dct_IV_func1(int i,const FIXP_SPK * twiddle,FIXP_DBL * RESTRICT pDat_0,FIXP_DBL * RESTRICT pDat_1)94 __asm  void dct_IV_func1(
95     int i,
96     const FIXP_SPK *twiddle,
97     FIXP_DBL *RESTRICT pDat_0,
98     FIXP_DBL *RESTRICT pDat_1)
99 {
100     /* Register map:
101        r0   i
102        r1   twiddle
103        r2   pDat_0
104        r3   pDat_1
105        r4   accu1
106        r5   accu2
107        r6   accu3
108        r7   accu4
109        r8   val_tw
110        r9   accuX
111     */
112     PUSH    {r4-r9}
113 
114      /* 44 cycles for 2 iterations = 22 cycles/iteration */
115 dct_IV_loop1_start
116 /*  First iteration */
117     LDR     r8, [r1], #4    // val_tw = *twiddle++;
118     LDR     r5, [r2, #0]    // accu2 = pDat_0[0]
119     LDR     r4, [r3, #0]    // accu1 = pDat_1[0]
120 
121     SMULWT  r9, r5, r8      // accuX = accu2*val_tw.l
122     SMULWB  r5, r5, r8      // accu2 = accu2*val_tw.h
123     RSB     r9, r9, #0      // accuX =-accu2*val_tw.l
124     SMLAWT  r5, r4, r8, r5  // accu2 = accu2*val_tw.h + accu1*val_tw.l
125     SMLAWB  r4, r4, r8, r9  // accu1 = accu1*val_tw.h - accu2*val_tw.l
126 
127     LDR     r8, [r1], #4    // val_tw = *twiddle++;
128     LDR     r7, [r3, #-4]   // accu4 = pDat_1[-1]
129     LDR     r6, [r2, #4]    // accu3 = pDat_0[1]
130 
131     SMULWB  r9, r7, r8      // accuX = accu4*val_tw.h
132     SMULWT  r7, r7, r8      // accu4 = accu4*val_tw.l
133     RSB     r9, r9, #0      // accuX =-accu4*val_tw.h
134     SMLAWB  r7, r6, r8, r7  // accu4 = accu4*val_tw.l+accu3*val_tw.h
135     SMLAWT  r6, r6, r8, r9  // accu3 = accu3*val_tw.l-accu4*val_tw.h
136 
137     STR     r5, [r2], #4    // *pDat_0++ = accu2
138     STR     r4, [r2], #4    // *pDat_0++ = accu1
139     STR     r6, [r3], #-4   // *pDat_1-- = accu3
140     STR     r7, [r3], #-4   // *pDat_1-- = accu4
141 
142 /*  Second iteration */
143     LDR     r8, [r1], #4    // val_tw = *twiddle++;
144     LDR     r5, [r2, #0]    // accu2 = pDat_0[0]
145     LDR     r4, [r3, #0]    // accu1 = pDat_1[0]
146 
147     SMULWT  r9, r5, r8      // accuX = accu2*val_tw.l
148     SMULWB  r5, r5, r8      // accu2 = accu2*val_tw.h
149     RSB     r9, r9, #0      // accuX =-accu2*val_tw.l
150     SMLAWT  r5, r4, r8, r5  // accu2 = accu2*val_tw.h + accu1*val_tw.l
151     SMLAWB  r4, r4, r8, r9  // accu1 = accu1*val_tw.h - accu2*val_tw.l
152 
153     LDR     r8, [r1], #4    // val_tw = *twiddle++;
154     LDR     r7, [r3, #-4]   // accu4 = pDat_1[-1]
155     LDR     r6, [r2, #4]    // accu3 = pDat_0[1]
156 
157     SMULWB  r9, r7, r8      // accuX = accu4*val_tw.h
158     SMULWT  r7, r7, r8      // accu4 = accu4*val_tw.l
159     RSB     r9, r9, #0      // accuX =-accu4*val_tw.h
160     SMLAWB  r7, r6, r8, r7  // accu4 = accu4*val_tw.l+accu3*val_tw.h
161     SMLAWT  r6, r6, r8, r9  // accu3 = accu3*val_tw.l-accu4*val_tw.h
162 
163     STR     r5, [r2], #4    // *pDat_0++ = accu2
164     STR     r4, [r2], #4    // *pDat_0++ = accu1
165     STR     r6, [r3], #-4   // *pDat_1-- = accu3
166     STR     r7, [r3], #-4   // *pDat_1-- = accu4
167 
168     SUBS    r0, r0, #1
169     BNE     dct_IV_loop1_start
170 
171     POP     {r4-r9}
172 
173     BX      lr
174 }
175 
176 #endif /* FUNCTION_dct_IV_func1 */
177 
178 
179 #ifdef FUNCTION_dct_IV_func2
180 
181 FDK_INLINE
182 /* __attribute__((noinline)) */
dct_IV_func2(int i,const FIXP_SPK * twiddle,FIXP_DBL * pDat_0,FIXP_DBL * pDat_1,int inc)183 static void dct_IV_func2(
184     int i,
185     const FIXP_SPK *twiddle,
186     FIXP_DBL *pDat_0,
187     FIXP_DBL *pDat_1,
188     int inc)
189 {
190   FIXP_DBL accu1, accu2, accu3, accu4, accuX;
191   LONG val_tw;
192 
193   accu1 = pDat_1[-2];
194   accu2 = pDat_1[-1];
195 
196   *--pDat_1 = -(pDat_0[1]>>1);
197   *pDat_0++ = (pDat_0[0]>>1);
198 
199   twiddle += inc;
200 
201 __asm
202   {
203     LDR     val_tw, [twiddle], inc, LSL #2    // val_tw = *twiddle; twiddle += inc
204     B       dct_IV_loop2_2nd_part
205 
206     /* 42 cycles for 2 iterations = 21 cycles/iteration */
207 dct_IV_loop2:
208     SMULWT  accuX, accu2, val_tw
209     SMULWB  accu2, accu2, val_tw
210     RSB     accuX, accuX, #0
211     SMLAWB  accuX, accu1, val_tw, accuX
212     SMLAWT  accu2, accu1, val_tw, accu2
213     STR     accuX, [pDat_0], #4
214     STR     accu2, [pDat_1, #-4] !
215 
216     LDR     accu4, [pDat_0, #4]
217     LDR     accu3, [pDat_0]
218     SMULWB  accuX, accu4, val_tw
219     SMULWT  accu4, accu4, val_tw
220     RSB     accuX, accuX, #0
221     SMLAWT  accuX, accu3, val_tw, accuX
222     SMLAWB  accu4, accu3, val_tw, accu4
223 
224     LDR     accu1, [pDat_1, #-8]
225     LDR     accu2, [pDat_1, #-4]
226 
227     LDR     val_tw, [twiddle], inc, LSL #2    // val_tw = *twiddle; twiddle += inc
228 
229     STR     accuX, [pDat_1, #-4] !
230     STR     accu4, [pDat_0], #4
231 
232 dct_IV_loop2_2nd_part:
233     SMULWT  accuX, accu2, val_tw
234     SMULWB  accu2, accu2, val_tw
235     RSB     accuX, accuX, #0
236     SMLAWB  accuX, accu1, val_tw, accuX
237     SMLAWT  accu2, accu1, val_tw, accu2
238     STR     accuX, [pDat_0], #4
239     STR     accu2, [pDat_1, #-4] !
240 
241     LDR     accu4, [pDat_0, #4]
242     LDR     accu3, [pDat_0]
243     SMULWB  accuX, accu4, val_tw
244     SMULWT  accu4, accu4, val_tw
245     RSB     accuX, accuX, #0
246     SMLAWT  accuX, accu3, val_tw, accuX
247     SMLAWB  accu4, accu3, val_tw, accu4
248 
249     LDR     accu1, [pDat_1, #-8]
250     LDR     accu2, [pDat_1, #-4]
251 
252     STR     accuX, [pDat_1, #-4] !
253     STR     accu4, [pDat_0], #4
254 
255     LDR     val_tw, [twiddle], inc, LSL #2    // val_tw = *twiddle; twiddle += inc
256 
257     SUBS    i, i, #1
258     BNE     dct_IV_loop2
259   }
260 
261   /* Last Sin and Cos value pair are the same */
262   accu1 = fMultDiv2(accu1, WTC(0x5a82799a));
263   accu2 = fMultDiv2(accu2, WTC(0x5a82799a));
264 
265   *--pDat_1 = accu1 + accu2;
266   *pDat_0++ = accu1 - accu2;
267 }
268 #endif /* FUNCTION_dct_IV_func2 */
269 
270 
271 #ifdef FUNCTION_dst_IV_func1
272 
dst_IV_func1(int i,const FIXP_SPK * twiddle,FIXP_DBL * pDat_0,FIXP_DBL * pDat_1)273 __asm void dst_IV_func1(
274     int i,
275     const FIXP_SPK *twiddle,
276     FIXP_DBL *pDat_0,
277     FIXP_DBL *pDat_1)
278 {
279     /* Register map:
280        r0   i
281        r1   twiddle
282        r2   pDat_0
283        r3   pDat_1
284        r4   accu1
285        r5   accu2
286        r6   accu3
287        r7   accu4
288        r8   val_tw
289        r9   accuX
290     */
291     PUSH    {r4-r9}
292 
293 dst_IV_loop1
294     LDR     r8, [r1], #4               // val_tw = *twiddle++
295     LDR     r5, [r2]                   // accu2 = pDat_0[0]
296     LDR     r6, [r2, #4]               // accu3 = pDat_0[1]
297     RSB     r5, r5, #0                 // accu2 = -accu2
298     SMULWT  r9, r5, r8                 // accuX = (-accu2)*val_tw.l
299     LDR     r4, [r3, #-4]              // accu1 = pDat_1[-1]
300     RSB     r9, r9, #0                 // accuX = -(-accu2)*val_tw.l
301     SMLAWB  r9, r4, r8, r9             // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
302     SMULWT  r4, r4, r8                 // accu1 = accu1*val_tw.l
303     LDR     r7, [r3, #-8]              // accu4 = pDat_1[-2]
304     SMLAWB  r5, r5, r8, r4             // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
305     LDR     r8, [r1], #4               // val_tw = *twiddle++
306     STR     r5, [r2], #4               // *pDat_0++ = accu2
307     STR     r9, [r2], #4               // *pDat_0++ = accu1 (accuX)
308     RSB     r7, r7, #0                 // accu4 = -accu4
309     SMULWB  r5, r7, r8                 // accu2 = (-accu4)*val_tw.h
310     SMULWB  r4, r6, r8                 // accu1 = (-accu4)*val_tw.l
311     RSB     r5, r5, #0                 // accu2 = -(-accu4)*val_tw.h
312     SMLAWT  r6, r6, r8, r5             // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
313     SMLAWT  r7, r7, r8, r4             // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
314     STR     r6, [r3, #-4] !            // *--pDat_1 = accu3
315     STR     r7, [r3, #-4] !            // *--pDat_1 = accu4
316 
317     LDR     r8, [r1], #4               // val_tw = *twiddle++
318     LDR     r5, [r2]                   // accu2 = pDat_0[0]
319     LDR     r6, [r2, #4]               // accu3 = pDat_0[1]
320     RSB     r5, r5, #0                 // accu2 = -accu2
321     SMULWT  r9, r5, r8                 // accuX = (-accu2)*val_tw.l
322     LDR     r4, [r3, #-4]              // accu1 = pDat_1[-1]
323     RSB     r9, r9, #0                 // accuX = -(-accu2)*val_tw.l
324     SMLAWB  r9, r4, r8, r9             // accuX = accu1*val_tw.h-(-accu2)*val_tw.l
325     SMULWT  r4, r4, r8                 // accu1 = accu1*val_tw.l
326     LDR     r7, [r3, #-8]              // accu4 = pDat_1[-2]
327     SMLAWB  r5, r5, r8, r4             // accu2 = (-accu2)*val_tw.t+accu1*val_tw.l
328     LDR     r8, [r1], #4               // val_tw = *twiddle++
329     STR     r5, [r2], #4               // *pDat_0++ = accu2
330     STR     r9, [r2], #4               // *pDat_0++ = accu1 (accuX)
331     RSB     r7, r7, #0                 // accu4 = -accu4
332     SMULWB  r5, r7, r8                 // accu2 = (-accu4)*val_tw.h
333     SMULWB  r4, r6, r8                 // accu1 = (-accu4)*val_tw.l
334     RSB     r5, r5, #0                 // accu2 = -(-accu4)*val_tw.h
335     SMLAWT  r6, r6, r8, r5             // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
336     SMLAWT  r7, r7, r8, r4             // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
337     STR     r6, [r3, #-4] !            // *--pDat_1 = accu3
338     STR     r7, [r3, #-4] !            // *--pDat_1 = accu4
339 
340     SUBS    r0, r0, #4                 // i-= 4
341     BNE     dst_IV_loop1
342 
343     POP     {r4-r9}
344     BX      lr
345 }
346 #endif /* FUNCTION_dst_IV_func1 */
347 
348 #ifdef FUNCTION_dst_IV_func2
349 
350 FDK_INLINE
351 /* __attribute__((noinline)) */
dst_IV_func2(int i,const FIXP_SPK * twiddle,FIXP_DBL * RESTRICT pDat_0,FIXP_DBL * RESTRICT pDat_1,int inc)352 static void dst_IV_func2(
353     int i,
354     const FIXP_SPK *twiddle,
355     FIXP_DBL *RESTRICT pDat_0,
356     FIXP_DBL *RESTRICT pDat_1,
357     int inc)
358 {
359   FIXP_DBL accu1,accu2,accu3,accu4;
360   LONG val_tw;
361 
362   accu4 = pDat_0[0];
363   accu3 = pDat_0[1];
364   accu4 >>= 1;
365   accu3 >>= 1;
366   accu4 = -accu4;
367 
368   accu1 = pDat_1[-1];
369   accu2 = pDat_1[0];
370 
371   *pDat_0++ = accu3;
372   *pDat_1-- = accu4;
373 
374 
375   __asm
376   {
377     B       dst_IV_loop2_2nd_part
378 
379     /* 50 cycles for 2 iterations = 25 cycles/iteration */
380 
381 dst_IV_loop2:
382 
383     LDR     val_tw, [twiddle], inc, LSL #2    // val_tw = *twiddle; twiddle += inc
384 
385     RSB     accu2, accu2, #0                  // accu2 = -accu2
386     RSB     accu1, accu1, #0                  // accu1 = -accu1
387     SMULWT  accu3, accu2, val_tw              // accu3 = (-accu2)*val_tw.l
388     SMULWT  accu4, accu1, val_tw              // accu4 = (-accu1)*val_tw.l
389     RSB     accu3, accu3, #0                  // accu3 = -accu2*val_tw.l
390     SMLAWB  accu1, accu1, val_tw, accu3       // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
391     SMLAWB  accu2, accu2, val_tw, accu4       // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
392     STR     accu1, [pDat_1], #-4              // *pDat_1-- = accu1
393   	STR     accu2, [pDat_0], #4               // *pDat_0++ = accu2
394 
395   	LDR     accu4, [pDat_0]                   // accu4 = pDat_0[0]
396   	LDR     accu3, [pDat_0, #4]               // accu3 = pDat_0[1]
397 
398     RSB     accu4, accu4, #0                  // accu4 = -accu4
399     RSB     accu3, accu3, #0                  // accu3 = -accu3
400 
401     SMULWB  accu1, accu3, val_tw              // accu1 = (-accu3)*val_tw.h
402     SMULWT  accu2, accu3, val_tw              // accu2 = (-accu3)*val_tw.l
403     RSB     accu1, accu1, #0                  // accu1 = -(-accu3)*val_tw.h
404     SMLAWT  accu3, accu4, val_tw, accu1       // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
405     SMLAWB  accu4, accu4, val_tw, accu2       // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
406 
407     LDR     accu1, [pDat_1, #-4]              // accu1 = pDat_1[-1]
408     LDR     accu2, [pDat_1]                   // accu2 = pDat_1[0]
409 
410     STR     accu3, [pDat_0], #4               // *pDat_0++ = accu3
411     STR     accu4, [pDat_1], #-4              // *pDat_1-- = accu4
412 
413 dst_IV_loop2_2nd_part:
414 
415     LDR     val_tw, [twiddle], inc, LSL #2    // val_tw = *twiddle; twiddle += inc
416 
417     RSB     accu2, accu2, #0                  // accu2 = -accu2
418     RSB     accu1, accu1, #0                  // accu1 = -accu1
419     SMULWT  accu3, accu2, val_tw              // accu3 = (-accu2)*val_tw.l
420     SMULWT  accu4, accu1, val_tw              // accu4 = (-accu1)*val_tw.l
421     RSB     accu3, accu3, #0                  // accu3 = -accu2*val_tw.l
422     SMLAWB  accu1, accu1, val_tw, accu3       // accu1 = -accu1*val_tw.h-(-accu2)*val_tw.l
423     SMLAWB  accu2, accu2, val_tw, accu4       // accu2 = (-accu1)*val_tw.l+(-accu2)*val_tw.h
424     STR     accu1, [pDat_1], #-4              // *pDat_1-- = accu1
425   	STR     accu2, [pDat_0], #4               // *pDat_0++ = accu2
426 
427   	LDR     accu4, [pDat_0]                   // accu4 = pDat_0[0]
428   	LDR     accu3, [pDat_0, #4]               // accu3 = pDat_0[1]
429 
430     RSB     accu4, accu4, #0                  // accu4 = -accu4
431     RSB     accu3, accu3, #0                  // accu3 = -accu3
432 
433     SMULWB  accu1, accu3, val_tw              // accu1 = (-accu3)*val_tw.h
434     SMULWT  accu2, accu3, val_tw              // accu2 = (-accu3)*val_tw.l
435     RSB     accu1, accu1, #0                  // accu1 = -(-accu3)*val_tw.h
436     SMLAWT  accu3, accu4, val_tw, accu1       // accu3 = (-accu4)*val_tw.l-(-accu3)*val_tw.h
437     SMLAWB  accu4, accu4, val_tw, accu2       // accu4 = (-accu3)*val_tw.l+(-accu4)*val_tw.h
438 
439     LDR     accu1, [pDat_1, #-4]              // accu1 = pDat_1[-1]
440     LDR     accu2, [pDat_1]                   // accu2 = pDat_1[0]
441 
442     STR     accu3, [pDat_0], #4               // *pDat_0++ = accu3
443     STR     accu4, [pDat_1], #-4              // *pDat_1-- = accu4
444 
445     SUBS    i, i, #1
446     BNE     dst_IV_loop2
447   }
448 
449   /* Last Sin and Cos value pair are the same */
450   accu1 = fMultDiv2(-accu1, WTC(0x5a82799a));
451   accu2 = fMultDiv2(-accu2, WTC(0x5a82799a));
452 
453   *pDat_0 = accu1 + accu2;
454   *pDat_1 = accu1 - accu2;
455 }
456 #endif /* FUNCTION_dst_IV_func2 */
457