1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 #include <string.h>
21 #include "ixheaacd_sbr_common.h"
22 #include "ixheaacd_type_def.h"
23 
24 #include "ixheaacd_constants.h"
25 #include "ixheaacd_basic_ops32.h"
26 #include "ixheaacd_basic_ops16.h"
27 #include "ixheaacd_basic_ops40.h"
28 #include "ixheaacd_basic_ops.h"
29 
30 #include "ixheaacd_intrinsics.h"
31 #include "ixheaacd_common_rom.h"
32 #include "ixheaacd_bitbuffer.h"
33 #include "ixheaacd_sbrdecsettings.h"
34 #include "ixheaacd_sbr_scale.h"
35 #include "ixheaacd_lpp_tran.h"
36 #include "ixheaacd_env_extr_part.h"
37 #include "ixheaacd_sbr_rom.h"
38 #include "ixheaacd_hybrid.h"
39 #include "ixheaacd_ps_dec.h"
40 #include "ixheaacd_env_extr.h"
41 #include "ixheaacd_qmf_dec.h"
42 
43 #include "ixheaacd_basic_op.h"
44 #include "ixheaacd_env_calc.h"
45 
46 #include "ixheaacd_interface.h"
47 #include "ixheaacd_function_selector.h"
48 #include "ixheaacd_audioobjtypes.h"
49 
50 #define mult16x16_16(a, b) ixheaacd_mult16((a), (b))
51 #define mac16x16(a, b, c) ixheaacd_mac16x16in32_sat((a), (b), (c))
52 #define mpy_32x16(a, b) fixmuldiv2_32x16b((a), (b))
53 #define mpy_16x16(a, b) ixheaacd_mult16x16in32((a), (b))
54 #define mpy_32x32(a, b) ixheaacd_mult32((a), (b))
55 #define mpy_32x16H_n(a, b) ixheaacd_mult32x16hin32((a), (b))
56 #define msu16x16(a, b, c) msu16x16in32((a), (b), (c))
57 
58 #define DCT3_LEN (32)
59 #define DCT2_LEN (64)
60 
61 #define LP_SHIFT_VAL 7
62 #define HQ_SHIFT_64 4
63 #define RADIXSHIFT 1
64 #define ROUNDING_SPECTRA 1
65 #define HQ_SHIFT_VAL 4
66 
67 VOID ixheaacd_dct3_32(WORD32 *input, WORD32 *output,
68                       const WORD16 *main_twidle_fwd, const WORD16 *post_tbl,
69                       const WORD16 *w_16, const WORD32 *p_table) {
70   WORD32 n, k;
71 
72   WORD32 temp1[6];
73   WORD32 temp2[4];
74   WORD16 twid_re, twid_im;
75   WORD32 *ptr_reverse, *ptr_forward, *p_out, *ptr_out1;
76   const WORD16 *twidle_fwd, *twidle_rev;
77 
78   ptr_forward = &input[49];
79   ptr_reverse = &input[47];
80 
81   p_out = output;
82   twidle_fwd = main_twidle_fwd;
83   twidle_fwd += 4;
84 
85   *p_out++ = input[48] >> LP_SHIFT_VAL;
86   *p_out++ = 0;
87 
88   for (n = 1; n < DCT3_LEN / 2; n++) {
89     temp1[0] = *ptr_forward++;
90     temp1[1] = *ptr_reverse--;
91     temp1[0] = ixheaacd_add32_sat(ixheaacd_shr32(temp1[0], LP_SHIFT_VAL),
92                                   ixheaacd_shr32(temp1[1], LP_SHIFT_VAL));
93 
94     temp1[2] = *(ptr_forward - 33);
95     temp1[3] = *(ptr_reverse - 31);
96     temp1[1] = ixheaacd_sub32_sat(ixheaacd_shr32(temp1[2], LP_SHIFT_VAL),
97                                   ixheaacd_shr32(temp1[3], LP_SHIFT_VAL));
98     twid_re = *twidle_fwd++;
99 
100     twid_im = *twidle_fwd;
101     twidle_fwd += 3;
102 
103     *p_out++ = ixheaacd_add32_sat(ixheaacd_mult32x16in32(temp1[0], twid_re),
104                                   ixheaacd_mult32x16in32(temp1[1], twid_im));
105     *p_out++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(temp1[0], twid_im),
106                                   ixheaacd_mult32x16in32(temp1[1], twid_re));
107   }
108 
109   twid_re = *twidle_fwd++;
110 
111   twid_im = *twidle_fwd;
112   twidle_fwd += 3;
113 
114   temp1[1] = *ptr_reverse--;
115   temp1[0] = *(ptr_reverse - 31);
116   temp1[1] = ixheaacd_sub32_sat(ixheaacd_shr32(temp1[1], LP_SHIFT_VAL),
117                                 ixheaacd_shr32(temp1[0], LP_SHIFT_VAL));
118 
119   temp1[0] = temp1[1];
120 
121   temp2[2] = ixheaacd_add32_sat(ixheaacd_mult32x16in32(temp1[0], twid_re),
122                                 ixheaacd_mult32x16in32(temp1[1], twid_im));
123   temp2[3] = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(temp1[0], twid_im),
124                                 ixheaacd_mult32x16in32(temp1[1], twid_re));
125 
126   ptr_forward = output;
127   ptr_reverse = &output[DCT3_LEN - 1];
128 
129   temp2[0] = *ptr_forward++;
130   temp2[1] = *ptr_forward--;
131 
132   temp1[0] = ixheaacd_negate32_sat(ixheaacd_add32_sat(temp2[1], temp2[3]));
133   temp1[1] = ixheaacd_sub32_sat(temp2[0], temp2[2]);
134   temp2[0] =
135       ixheaacd_add32_sat(ixheaacd_add32_sat(temp2[0], temp2[2]), temp1[0]);
136   temp2[1] =
137       ixheaacd_add32_sat(ixheaacd_sub32_sat(temp2[1], temp2[3]), temp1[1]);
138 
139   temp2[0] >>= 1;
140   temp2[1] >>= 1;
141 
142   *ptr_forward++ = temp2[0];
143   *ptr_forward++ = temp2[1];
144 
145   twidle_fwd = post_tbl + 2;
146   twidle_rev = post_tbl + 14;
147 
148   for (n = 1; n < DCT3_LEN / 4; n++) {
149     temp2[0] = *ptr_forward++;
150     temp2[1] = *ptr_forward--;
151     temp2[3] = *ptr_reverse--;
152     temp2[2] = *ptr_reverse++;
153 
154     twid_re = *twidle_rev;
155     twidle_rev -= 2;
156     twid_im = *twidle_fwd;
157     twidle_fwd += 2;
158 
159     temp1[0] = ixheaacd_sub32_sat(temp2[0], temp2[2]);
160     temp1[1] = ixheaacd_add32_sat(temp2[0], temp2[2]);
161 
162     temp1[2] = ixheaacd_add32_sat(temp2[1], temp2[3]);
163     temp1[3] = ixheaacd_sub32_sat(temp2[1], temp2[3]);
164 
165     temp1[4] = ixheaacd_add32_sat(ixheaacd_mult32x16in32(temp1[0], twid_re),
166                                   ixheaacd_mult32x16in32(temp1[2], twid_im));
167     temp1[5] = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(temp1[0], twid_im),
168                                   ixheaacd_mult32x16in32(temp1[2], twid_re));
169 
170     temp1[1] >>= 1;
171     temp1[3] >>= 1;
172 
173     *ptr_forward++ = ixheaacd_sub32_sat(temp1[1], temp1[4]);
174     *ptr_forward++ = ixheaacd_add32_sat(temp1[3], temp1[5]);
175 
176     *ptr_reverse-- = ixheaacd_sub32_sat(temp1[5], temp1[3]);
177     *ptr_reverse-- = ixheaacd_add32_sat(temp1[1], temp1[4]);
178   }
179   temp2[0] = *ptr_forward++;
180   temp2[1] = *ptr_forward--;
181   temp2[3] = *ptr_reverse--;
182   temp2[2] = *ptr_reverse++;
183 
184   twid_re = -*twidle_rev;
185   twidle_rev -= 2;
186   twid_im = *twidle_fwd;
187   twidle_fwd += 2;
188 
189   temp1[0] = ixheaacd_sub32_sat(temp2[0], temp2[2]);
190   temp1[1] = ixheaacd_add32_sat(temp2[0], temp2[2]);
191 
192   temp1[2] = ixheaacd_add32_sat(temp2[1], temp2[3]);
193   temp1[3] = ixheaacd_sub32_sat(temp2[1], temp2[3]);
194 
195   temp1[4] = ixheaacd_sub32_sat(ixheaacd_mult32x16in32(temp1[0], twid_re),
196                                 ixheaacd_mult32x16in32(temp1[2], twid_im));
197   temp1[5] = ixheaacd_add32_sat(ixheaacd_mult32x16in32(temp1[2], twid_re),
198                                 ixheaacd_mult32x16in32(temp1[0], twid_im));
199 
200   temp1[1] >>= 1;
201   temp1[3] >>= 1;
202   *ptr_forward++ = ixheaacd_add32_sat(temp1[1], temp1[4]);
203   *ptr_forward++ = ixheaacd_add32_sat(temp1[3], temp1[5]);
204 
205   ixheaacd_radix4bfly(w_16, output, 1, 4);
206   ixheaacd_postradixcompute4(input, output, p_table, 16);
207 
208   output[0] = input[0];
209   output[2] = input[1];
210 
211   p_out = input + 2;
212   ptr_forward = output + 1;
213   ptr_reverse = output + 30;
214   ptr_out1 = input + 18;
215 
216   for (k = (DCT3_LEN / 4) - 1; k != 0; k--) {
217     WORD32 tempre, tempim;
218 
219     tempre = *p_out++;
220     tempim = *p_out++;
221     *ptr_forward = (tempim);
222     ptr_forward += 2;
223     *ptr_forward = (tempre);
224     ptr_forward += 2;
225 
226     tempre = *ptr_out1++;
227     tempim = *ptr_out1++;
228     *ptr_reverse = (tempim);
229     ptr_reverse -= 2;
230     *ptr_reverse = (tempre);
231     ptr_reverse -= 2;
232   }
233 
234   {
235     WORD32 tempre, tempim;
236     tempre = *p_out++;
237     tempim = *p_out++;
238     *ptr_forward = (tempim);
239     ptr_forward += 2;
240     *ptr_forward = (tempre);
241     ptr_forward += 2;
242   }
243 
244   return;
245 }
246 VOID ixheaacd_dct2_64(WORD32 *x, WORD32 *X,
247                       ia_qmf_dec_tables_struct *qmf_dec_tables_ptr,
248                       WORD16 *filter_states) {
249   ixheaacd_pretwdct2(x, X);
250 
251   ixheaacd_sbr_imdct_using_fft(qmf_dec_tables_ptr->w1024, 32, X, x,
252                                qmf_dec_tables_ptr->dig_rev_table2_128,
253                                qmf_dec_tables_ptr->dig_rev_table2_128,
254                                qmf_dec_tables_ptr->dig_rev_table2_128,
255                                qmf_dec_tables_ptr->dig_rev_table2_128);
256 
257   ixheaacd_fftposttw(x, qmf_dec_tables_ptr);
258 
259   ixheaacd_posttwdct2(x, filter_states, qmf_dec_tables_ptr);
260 
261   return;
262 }
263 
264 VOID ixheaacd_cos_sin_mod(WORD32 *subband,
265                           ia_sbr_qmf_filter_bank_struct *qmf_bank,
266                           WORD16 *p_twiddle, WORD32 *p_dig_rev_tbl) {
267   WORD32 M = ixheaacd_shr32(qmf_bank->no_channels, 1);
268 
269   const WORD16 *p_sin;
270   const WORD16 *p_sin_cos = &qmf_bank->cos_twiddle[0];
271   WORD32 subband_tmp[128];
272 
273   ixheaacd_cos_sin_mod_loop1(subband, M, p_sin_cos, subband_tmp);
274 
275   if (M == 32) {
276     ixheaacd_sbr_imdct_using_fft(
277         (const WORD32 *)p_twiddle, 32, subband_tmp, subband,
278         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
279         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
280 
281     ixheaacd_sbr_imdct_using_fft(
282         (const WORD32 *)p_twiddle, 32, &subband_tmp[64], &subband[64],
283         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
284         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
285 
286   } else {
287     ixheaacd_sbr_imdct_using_fft(
288         (const WORD32 *)p_twiddle, 16, subband_tmp, subband,
289         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
290         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
291 
292     ixheaacd_sbr_imdct_using_fft(
293         (const WORD32 *)p_twiddle, 16, &subband_tmp[64], &subband[64],
294         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
295         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
296   }
297 
298   p_sin = &qmf_bank->alt_sin_twiddle[0];
299   ixheaacd_cos_sin_mod_loop2(subband, p_sin, M);
300 }
301 
302 VOID ixheaacd_fwd_modulation(const WORD32 *p_time_in1, WORD32 *real_subband,
303                              WORD32 *imag_subband,
304                              ia_sbr_qmf_filter_bank_struct *qmf_bank,
305                              ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
306   WORD32 i;
307   const WORD32 *p_time_in2 = &p_time_in1[2 * NO_ANALYSIS_CHANNELS - 1];
308   WORD32 temp1, temp2;
309   WORD32 *t_real_subband = real_subband;
310   WORD32 *t_imag_subband = imag_subband;
311   const WORD16 *tcos;
312 
313   for (i = NO_ANALYSIS_CHANNELS - 1; i >= 0; i--) {
314     temp1 = ixheaacd_shr32(*p_time_in1++, HQ_SHIFT_VAL);
315     temp2 = ixheaacd_shr32(*p_time_in2--, HQ_SHIFT_VAL);
316 
317     *t_real_subband++ = ixheaacd_sub32_sat(temp1, temp2);
318     ;
319     *t_imag_subband++ = ixheaacd_add32_sat(temp1, temp2);
320     ;
321   }
322 
323   ixheaacd_cos_sin_mod(real_subband, qmf_bank,
324                        (WORD16 *)qmf_dec_tables_ptr->w1024,
325                        (WORD32 *)qmf_dec_tables_ptr->dig_rev_table2_128);
326 
327   tcos = qmf_bank->t_cos;
328 
329   for (i = (qmf_bank->usb - qmf_bank->lsb - 1); i >= 0; i--) {
330     WORD16 cosh, sinh;
331     WORD32 re, im;
332 
333     re = *real_subband;
334     im = *imag_subband;
335     cosh = *tcos++;
336     sinh = *tcos++;
337     *real_subband++ = ixheaacd_add32_sat(ixheaacd_mult32x16in32_shl(re, cosh),
338                                          ixheaacd_mult32x16in32_shl(im, sinh));
339     *imag_subband++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32_shl(im, cosh),
340                                          ixheaacd_mult32x16in32_shl(re, sinh));
341   }
342 }
343 
344 VOID ixheaacd_cplx_anal_qmffilt(const WORD16 *time_sample_buf,
345                                 ia_sbr_scale_fact_struct *sbr_scale_factor,
346                                 WORD32 **qmf_real, WORD32 **qmf_imag,
347                                 ia_sbr_qmf_filter_bank_struct *qmf_bank,
348                                 ia_qmf_dec_tables_struct *qmf_dec_tables_ptr,
349                                 WORD32 ch_fac, WORD32 low_pow_flag,
350                                 WORD audio_object_type) {
351   WORD32 i, k;
352   WORD32 num_time_slots = qmf_bank->num_time_slots;
353 
354   WORD32 analysis_buffer[4 * NO_ANALYSIS_CHANNELS];
355   WORD16 *filter_states = qmf_bank->core_samples_buffer;
356 
357   WORD16 *fp1, *fp2, *tmp;
358 
359   WORD16 *filter_1;
360   WORD16 *filter_2;
361   WORD16 *filt_ptr;
362   if (audio_object_type != AOT_ER_AAC_ELD &&
363       audio_object_type != AOT_ER_AAC_LD) {
364     qmf_bank->filter_pos +=
365         (qmf_dec_tables_ptr->qmf_c - qmf_bank->analy_win_coeff);
366     qmf_bank->analy_win_coeff = qmf_dec_tables_ptr->qmf_c;
367   } else {
368     qmf_bank->filter_pos +=
369         (qmf_dec_tables_ptr->qmf_c_eld3 - qmf_bank->analy_win_coeff);
370     qmf_bank->analy_win_coeff = qmf_dec_tables_ptr->qmf_c_eld3;
371   }
372 
373   filter_1 = qmf_bank->filter_pos;
374 
375   if (audio_object_type != AOT_ER_AAC_ELD &&
376       audio_object_type != AOT_ER_AAC_LD) {
377     filter_2 = filter_1 + 64;
378   } else {
379     filter_2 = filter_1 + 32;
380   }
381 
382   sbr_scale_factor->st_lb_scale = 0;
383   sbr_scale_factor->lb_scale = -10;
384   if (!low_pow_flag) {
385     if (audio_object_type != AOT_ER_AAC_ELD &&
386         audio_object_type != AOT_ER_AAC_LD) {
387       sbr_scale_factor->lb_scale = -8;
388     } else {
389       sbr_scale_factor->lb_scale = -9;
390     }
391     qmf_bank->cos_twiddle =
392         (WORD16 *)qmf_dec_tables_ptr->sbr_sin_cos_twiddle_l32;
393     qmf_bank->alt_sin_twiddle =
394         (WORD16 *)qmf_dec_tables_ptr->sbr_alt_sin_twiddle_l32;
395     if (audio_object_type != AOT_ER_AAC_ELD &&
396         audio_object_type != AOT_ER_AAC_LD) {
397       qmf_bank->t_cos = (WORD16 *)qmf_dec_tables_ptr->sbr_t_cos_sin_l32;
398     } else {
399       qmf_bank->t_cos =
400           (WORD16 *)qmf_dec_tables_ptr->ixheaacd_sbr_t_cos_sin_l32_eld;
401     }
402   }
403 
404   fp1 = qmf_bank->anal_filter_states;
405   fp2 = qmf_bank->anal_filter_states + NO_ANALYSIS_CHANNELS;
406 
407   if (audio_object_type == AOT_ER_AAC_ELD ||
408       audio_object_type == AOT_ER_AAC_LD) {
409     filter_2 = qmf_bank->filter_2;
410     fp1 = qmf_bank->fp1_anal;
411     fp2 = qmf_bank->fp2_anal;
412   }
413 
414   for (i = 0; i < num_time_slots; i++) {
415     for (k = 0; k < NO_ANALYSIS_CHANNELS; k++)
416       filter_states[NO_ANALYSIS_CHANNELS - 1 - k] = time_sample_buf[ch_fac * k];
417 
418     if (audio_object_type != AOT_ER_AAC_ELD &&
419         audio_object_type != AOT_ER_AAC_LD) {
420       ixheaacd_sbr_qmfanal32_winadds(fp1, fp2, filter_1, filter_2,
421                                      analysis_buffer, filter_states,
422                                      time_sample_buf, ch_fac);
423     }
424 
425     else {
426       ixheaacd_sbr_qmfanal32_winadd_eld(fp1, fp2, filter_1, filter_2,
427                                         analysis_buffer);
428     }
429 
430     time_sample_buf += NO_ANALYSIS_CHANNELS * ch_fac;
431 
432     filter_states -= NO_ANALYSIS_CHANNELS;
433     if (filter_states < qmf_bank->anal_filter_states) {
434       filter_states = qmf_bank->anal_filter_states + 288;
435     }
436 
437     tmp = fp1;
438     fp1 = fp2;
439     fp2 = tmp;
440     if (audio_object_type != AOT_ER_AAC_ELD &&
441         audio_object_type != AOT_ER_AAC_LD) {
442       filter_1 += 64;
443       filter_2 += 64;
444     } else {
445       filter_1 += 32;
446       filter_2 += 32;
447     }
448 
449     filt_ptr = filter_1;
450     filter_1 = filter_2;
451     filter_2 = filt_ptr;
452     if (audio_object_type != AOT_ER_AAC_ELD &&
453         audio_object_type != AOT_ER_AAC_LD) {
454       if (filter_2 > (qmf_bank->analy_win_coeff + 640)) {
455         filter_1 = (WORD16 *)qmf_bank->analy_win_coeff;
456         filter_2 = (WORD16 *)qmf_bank->analy_win_coeff + 64;
457       }
458     } else {
459       if (filter_2 > (qmf_bank->analy_win_coeff + 320)) {
460         filter_1 = (WORD16 *)qmf_bank->analy_win_coeff;
461         filter_2 = (WORD16 *)qmf_bank->analy_win_coeff + 32;
462       }
463     }
464 
465     if (!low_pow_flag) {
466       ixheaacd_fwd_modulation(analysis_buffer, qmf_real[i], qmf_imag[i],
467                               qmf_bank, qmf_dec_tables_ptr);
468     } else {
469       ixheaacd_dct3_32(
470           (WORD32 *)analysis_buffer, qmf_real[i], qmf_dec_tables_ptr->dct23_tw,
471           qmf_dec_tables_ptr->post_fft_tbl, qmf_dec_tables_ptr->w_16,
472           qmf_dec_tables_ptr->dig_rev_table4_16);
473     }
474   }
475 
476   qmf_bank->filter_pos = filter_1;
477   qmf_bank->core_samples_buffer = filter_states;
478 
479   if (audio_object_type == AOT_ER_AAC_ELD || audio_object_type == AOT_ER_AAC_LD)
480 
481   {
482     qmf_bank->fp1_anal = fp1;
483     qmf_bank->fp2_anal = fp2;
484     qmf_bank->filter_2 = filter_2;
485   }
486 }
487 
488 VOID ixheaacd_inv_modulation_lp(WORD32 *qmf_real, WORD16 *filter_states,
489                                 ia_sbr_qmf_filter_bank_struct *syn_qmf,
490                                 ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
491   WORD32 L = syn_qmf->no_channels;
492   const WORD32 M = (L >> 1);
493   WORD32 *dct_in = qmf_real;
494   WORD32 time_out[2 * NO_SYNTHESIS_CHANNELS];
495 
496   WORD32 ui_rem = ((WORD64)(&time_out[0]) % 8);
497   WORD32 *ptime_out = (pVOID)((WORD8 *)&time_out[0] + 8 - ui_rem);
498 
499   if (L == 64)
500     ixheaacd_dct2_64(dct_in, ptime_out, qmf_dec_tables_ptr, filter_states + M);
501   else
502     ixheaacd_dct2_32(dct_in, time_out, qmf_dec_tables_ptr, filter_states);
503 
504   filter_states[3 * M] = 0;
505 }
506 
507 VOID ixheaacd_inv_emodulation(WORD32 *qmf_real,
508                               ia_sbr_qmf_filter_bank_struct *syn_qmf,
509                               ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
510   ixheaacd_cos_sin_mod(qmf_real, syn_qmf, (WORD16 *)qmf_dec_tables_ptr->w1024,
511                        (WORD32 *)qmf_dec_tables_ptr->dig_rev_table2_128);
512 }
513 
514 VOID ixheaacd_esbr_radix4bfly(const WORD32 *w, WORD32 *x, WORD32 index1,
515                               WORD32 index) {
516   int i;
517   WORD32 l1, l2, h2, fft_jmp;
518   WORD64 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
519   WORD64 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
520   WORD32 x_0, x_1, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
521   WORD32 x_h2_0, x_h2_1;
522   WORD32 si10, si20, si30, co10, co20, co30;
523 
524   WORD64 mul_1, mul_2, mul_3, mul_4, mul_5, mul_6;
525   WORD64 mul_7, mul_8, mul_9, mul_10, mul_11, mul_12;
526   WORD32 *x_l1;
527   WORD32 *x_l2;
528   WORD32 *x_h2;
529   const WORD32 *w_ptr = w;
530   WORD32 i1;
531 
532   h2 = index << 1;
533   l1 = index << 2;
534   l2 = (index << 2) + (index << 1);
535 
536   x_l1 = &(x[l1]);
537   x_l2 = &(x[l2]);
538   x_h2 = &(x[h2]);
539 
540   fft_jmp = 6 * (index);
541 
542   for (i1 = 0; i1 < index1; i1++) {
543     for (i = 0; i < index; i++) {
544       si10 = (*w_ptr++);
545       co10 = (*w_ptr++);
546       si20 = (*w_ptr++);
547       co20 = (*w_ptr++);
548       si30 = (*w_ptr++);
549       co30 = (*w_ptr++);
550 
551       x_0 = x[0];
552       x_h2_0 = x[h2];
553       x_l1_0 = x[l1];
554       x_l2_0 = x[l2];
555 
556       xh0_0 = (WORD64)x_0 + (WORD64)x_l1_0;
557       xl0_0 = (WORD64)x_0 - (WORD64)x_l1_0;
558 
559       xh20_0 = (WORD64)x_h2_0 + (WORD64)x_l2_0;
560       xl20_0 = (WORD64)x_h2_0 - (WORD64)x_l2_0;
561 
562       x[0] = (WORD32)ixheaacd_add64_sat(xh0_0, xh20_0);
563       xt0_0 = (WORD64)xh0_0 - (WORD64)xh20_0;
564 
565       x_1 = x[1];
566       x_h2_1 = x[h2 + 1];
567       x_l1_1 = x[l1 + 1];
568       x_l2_1 = x[l2 + 1];
569 
570       xh1_0 = (WORD64)x_1 + (WORD64)x_l1_1;
571       xl1_0 = (WORD64)x_1 - (WORD64)x_l1_1;
572 
573       xh21_0 = (WORD64)x_h2_1 + (WORD64)x_l2_1;
574       xl21_0 = (WORD64)x_h2_1 - (WORD64)x_l2_1;
575 
576       x[1] = (WORD32)ixheaacd_add64_sat(xh1_0, xh21_0);
577       yt0_0 = (WORD64)xh1_0 - (WORD64)xh21_0;
578 
579       xt1_0 = (WORD64)xl0_0 + (WORD64)xl21_0;
580       xt2_0 = (WORD64)xl0_0 - (WORD64)xl21_0;
581 
582       yt2_0 = (WORD64)xl1_0 + (WORD64)xl20_0;
583       yt1_0 = (WORD64)xl1_0 - (WORD64)xl20_0;
584 
585       mul_11 = ixheaacd_mult64(xt2_0, co30);
586       mul_3 = ixheaacd_mult64(yt2_0, si30);
587       x[l2] = ixheaacd_sat64_32(((mul_3 + mul_11) >> 32) << RADIXSHIFT);
588 
589       mul_5 = ixheaacd_mult64(xt2_0, si30);
590       mul_9 = ixheaacd_mult64(yt2_0, co30);
591       x[l2 + 1] = ixheaacd_sat64_32(((mul_9 - mul_5) >> 32) << RADIXSHIFT);
592 
593       mul_12 = ixheaacd_mult64(xt0_0, co20);
594       mul_2 = ixheaacd_mult64(yt0_0, si20);
595       x[l1] = ixheaacd_sat64_32(((mul_2 + mul_12) >> 32) << RADIXSHIFT);
596 
597       mul_6 = ixheaacd_mult64(xt0_0, si20);
598       mul_8 = ixheaacd_mult64(yt0_0, co20);
599       x[l1 + 1] = ixheaacd_sat64_32(((mul_8 - mul_6) >> 32) << RADIXSHIFT);
600 
601       mul_4 = ixheaacd_mult64(xt1_0, co10);
602       mul_1 = ixheaacd_mult64(yt1_0, si10);
603       x[h2] = ixheaacd_sat64_32(((mul_1 + mul_4) >> 32) << RADIXSHIFT);
604 
605       mul_10 = ixheaacd_mult64(xt1_0, si10);
606       mul_7 = ixheaacd_mult64(yt1_0, co10);
607       x[h2 + 1] = ixheaacd_sat64_32(((mul_7 - mul_10) >> 32) << RADIXSHIFT);
608 
609       x += 2;
610     }
611     x += fft_jmp;
612     w_ptr = w_ptr - fft_jmp;
613   }
614 }
615 
616 VOID ixheaacd_esbr_postradixcompute2(WORD32 *ptr_y, WORD32 *ptr_x,
617                                      const WORD32 *pdig_rev_tbl,
618                                      WORD32 npoints) {
619   WORD32 i, k;
620   WORD32 h2;
621   WORD32 x_0, x_1, x_2, x_3;
622   WORD32 x_4, x_5, x_6, x_7;
623   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
624   WORD32 n0, j0;
625   WORD32 *x2, *x0;
626   WORD32 *y0, *y1, *y2, *y3;
627 
628   y0 = ptr_y;
629   y2 = ptr_y + (WORD32)npoints;
630   x0 = ptr_x;
631   x2 = ptr_x + (WORD32)(npoints >> 1);
632 
633   y1 = y0 + (WORD32)(npoints >> 2);
634   y3 = y2 + (WORD32)(npoints >> 2);
635   j0 = 8;
636   n0 = npoints >> 1;
637 
638   for (k = 0; k < 2; k++) {
639     for (i = 0; i<npoints>> 1; i += 8) {
640       h2 = *pdig_rev_tbl++ >> 2;
641 
642       x_0 = *x0++;
643       x_1 = *x0++;
644       x_2 = *x0++;
645       x_3 = *x0++;
646       x_4 = *x0++;
647       x_5 = *x0++;
648       x_6 = *x0++;
649       x_7 = *x0++;
650 
651       y0[h2] = ixheaacd_add32_sat(x_0, x_2);
652       y0[h2 + 1] = ixheaacd_add32_sat(x_1, x_3);
653       y1[h2] = ixheaacd_add32_sat(x_4, x_6);
654       y1[h2 + 1] = ixheaacd_add32_sat(x_5, x_7);
655       y2[h2] = ixheaacd_sub32_sat(x_0, x_2);
656       y2[h2 + 1] = ixheaacd_sub32_sat(x_1, x_3);
657       y3[h2] = ixheaacd_sub32_sat(x_4, x_6);
658       y3[h2 + 1] = ixheaacd_sub32_sat(x_5, x_7);
659 
660       x_8 = *x2++;
661       x_9 = *x2++;
662       x_a = *x2++;
663       x_b = *x2++;
664       x_c = *x2++;
665       x_d = *x2++;
666       x_e = *x2++;
667       x_f = *x2++;
668 
669       y0[h2 + 2] = ixheaacd_add32_sat(x_8, x_a);
670       y0[h2 + 3] = ixheaacd_add32_sat(x_9, x_b);
671       y1[h2 + 2] = ixheaacd_add32_sat(x_c, x_e);
672       y1[h2 + 3] = ixheaacd_add32_sat(x_d, x_f);
673       y2[h2 + 2] = ixheaacd_sub32_sat(x_8, x_a);
674       y2[h2 + 3] = ixheaacd_sub32_sat(x_9, x_b);
675       y3[h2 + 2] = ixheaacd_sub32_sat(x_c, x_e);
676       y3[h2 + 3] = ixheaacd_sub32_sat(x_d, x_f);
677     }
678     x0 += (WORD32)npoints >> 1;
679     x2 += (WORD32)npoints >> 1;
680   }
681 }
682 
683 VOID ixheaacd_esbr_postradixcompute4(WORD32 *ptr_y, WORD32 *ptr_x,
684                                      const WORD32 *p_dig_rev_tbl,
685                                      WORD32 npoints) {
686   WORD32 i, k;
687   WORD32 h2;
688   WORD32 xh0_0, xh1_0, xl0_0, xl1_0;
689   WORD32 xh0_1, xh1_1, xl0_1, xl1_1;
690   WORD32 x_0, x_1, x_2, x_3;
691   WORD32 xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;
692   WORD32 x_4, x_5, x_6, x_7;
693   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
694   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
695   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
696   WORD32 n0, j0;
697   WORD32 *x2, *x0;
698   WORD32 *y0, *y1, *y2, *y3;
699 
700   y0 = ptr_y;
701   y2 = ptr_y + (WORD32)npoints;
702   x0 = ptr_x;
703   x2 = ptr_x + (WORD32)(npoints >> 1);
704 
705   y1 = y0 + (WORD32)(npoints >> 1);
706   y3 = y2 + (WORD32)(npoints >> 1);
707 
708   j0 = 4;
709   n0 = npoints >> 2;
710 
711   for (k = 0; k < 2; k++) {
712     for (i = 0; i<npoints>> 1; i += 8) {
713       h2 = *p_dig_rev_tbl++ >> 2;
714       x_0 = *x0++;
715       x_1 = *x0++;
716       x_2 = *x0++;
717       x_3 = *x0++;
718       x_4 = *x0++;
719       x_5 = *x0++;
720       x_6 = *x0++;
721       x_7 = *x0++;
722 
723       xh0_0 = ixheaacd_add32_sat(x_0, x_4);
724       xh1_0 = ixheaacd_add32_sat(x_1, x_5);
725       xl0_0 = ixheaacd_sub32_sat(x_0, x_4);
726       xl1_0 = ixheaacd_sub32_sat(x_1, x_5);
727       xh0_1 = ixheaacd_add32_sat(x_2, x_6);
728       xh1_1 = ixheaacd_add32_sat(x_3, x_7);
729       xl0_1 = ixheaacd_sub32_sat(x_2, x_6);
730       xl1_1 = ixheaacd_sub32_sat(x_3, x_7);
731 
732       n00 = ixheaacd_add32_sat(xh0_0, xh0_1);
733       n01 = ixheaacd_add32_sat(xh1_0, xh1_1);
734       n10 = ixheaacd_add32_sat(xl0_0, xl1_1);
735       n11 = ixheaacd_sub32_sat(xl1_0, xl0_1);
736       n20 = ixheaacd_sub32_sat(xh0_0, xh0_1);
737       n21 = ixheaacd_sub32_sat(xh1_0, xh1_1);
738       n30 = ixheaacd_sub32_sat(xl0_0, xl1_1);
739       n31 = ixheaacd_add32_sat(xl1_0, xl0_1);
740 
741       y0[h2] = n00;
742       y0[h2 + 1] = n01;
743       y1[h2] = n10;
744       y1[h2 + 1] = n11;
745       y2[h2] = n20;
746       y2[h2 + 1] = n21;
747       y3[h2] = n30;
748       y3[h2 + 1] = n31;
749 
750       x_8 = *x2++;
751       x_9 = *x2++;
752       x_a = *x2++;
753       x_b = *x2++;
754       x_c = *x2++;
755       x_d = *x2++;
756       x_e = *x2++;
757       x_f = *x2++;
758 
759       xh0_2 = ixheaacd_add32_sat(x_8, x_c);
760       xh1_2 = ixheaacd_add32_sat(x_9, x_d);
761       xl0_2 = ixheaacd_sub32_sat(x_8, x_c);
762       xl1_2 = ixheaacd_sub32_sat(x_9, x_d);
763       xh0_3 = ixheaacd_add32_sat(x_a, x_e);
764       xh1_3 = ixheaacd_add32_sat(x_b, x_f);
765       xl0_3 = ixheaacd_sub32_sat(x_a, x_e);
766       xl1_3 = ixheaacd_sub32_sat(x_b, x_f);
767 
768       n02 = ixheaacd_add32_sat(xh0_2, xh0_3);
769       n03 = ixheaacd_add32_sat(xh1_2, xh1_3);
770       n12 = ixheaacd_add32_sat(xl0_2, xl1_3);
771       n13 = ixheaacd_sub32_sat(xl1_2, xl0_3);
772       n22 = ixheaacd_sub32_sat(xh0_2, xh0_3);
773       n23 = ixheaacd_sub32_sat(xh1_2, xh1_3);
774       n32 = ixheaacd_sub32_sat(xl0_2, xl1_3);
775       n33 = ixheaacd_add32_sat(xl1_2, xl0_3);
776 
777       y0[h2 + 2] = n02;
778       y0[h2 + 3] = n03;
779       y1[h2 + 2] = n12;
780       y1[h2 + 3] = n13;
781       y2[h2 + 2] = n22;
782       y2[h2 + 3] = n23;
783       y3[h2 + 2] = n32;
784       y3[h2 + 3] = n33;
785     }
786     x0 += (WORD32)npoints >> 1;
787     x2 += (WORD32)npoints >> 1;
788   }
789 }
790 
791 VOID ixheaacd_esbr_cos_sin_mod(WORD32 *subband,
792                                ia_sbr_qmf_filter_bank_struct *qmf_bank,
793                                WORD32 *p_twiddle, WORD32 *p_dig_rev_tbl) {
794   WORD32 z;
795   WORD32 temp[128];
796   WORD32 scaleshift = 0;
797 
798   WORD32 re2, re3;
799   WORD32 wim, wre;
800 
801   WORD32 i, M_2;
802   WORD32 M = ixheaacd_shr32(qmf_bank->no_channels, 1);
803 
804   const WORD32 *p_sin;
805   const WORD32 *p_sin_cos;
806 
807   WORD32 subband_tmp[128];
808   WORD32 re;
809   WORD32 im;
810   WORD32 *psubband, *psubband1;
811   WORD32 *psubband_t, *psubband1_t;
812   WORD32 *psubband2, *psubband12;
813   WORD32 *psubband_t2, *psubband1_t2;
814 
815   M_2 = ixheaacd_shr32(M, 1);
816 
817   p_sin_cos = qmf_bank->esbr_cos_twiddle;
818 
819   psubband = &subband[0];
820   psubband1 = &subband[2 * M - 1];
821   psubband_t = subband_tmp;
822   psubband1_t = &subband_tmp[2 * M - 1];
823 
824   psubband2 = &subband[64];
825   psubband12 = &subband[2 * M - 1 + 64];
826   psubband_t2 = &subband_tmp[64];
827   psubband1_t2 = &subband_tmp[2 * M - 1 + 64];
828 
829   for (i = (M_2 >> 1) - 1; i >= 0; i--) {
830     re = *psubband++;
831     im = *psubband1--;
832 
833     wim = *p_sin_cos++;
834     wre = *p_sin_cos++;
835 
836     *psubband_t++ = (WORD32)(
837         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
838         32);
839     *psubband_t++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
840                                                  ixheaacd_mult64(re, wim))) >>
841                              32);
842 
843     re = *psubband2++;
844     im = *psubband12--;
845 
846     *psubband_t2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
847                                                   ixheaacd_mult64(re, wre))) >>
848                               32);
849     *psubband_t2++ = (WORD32)(
850         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
851         32);
852 
853     re = *psubband1--;
854     im = *psubband++;
855 
856     wim = *p_sin_cos++;
857     wre = *p_sin_cos++;
858 
859     *psubband1_t-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
860                                                   ixheaacd_mult64(re, wim))) >>
861                               32);
862     *psubband1_t-- = (WORD32)(
863         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
864         32);
865 
866     re = *psubband12--;
867     im = *psubband2++;
868 
869     *psubband1_t2-- = (WORD32)(
870         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
871         32);
872     *psubband1_t2-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
873                                                    ixheaacd_mult64(re, wre))) >>
874                                32);
875 
876     re = *psubband++;
877     im = *psubband1--;
878 
879     wim = *p_sin_cos++;
880     wre = *p_sin_cos++;
881 
882     *psubband_t++ = (WORD32)(
883         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
884         32);
885     *psubband_t++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
886                                                  ixheaacd_mult64(re, wim))) >>
887                              32);
888 
889     re = *psubband2++;
890     im = *psubband12--;
891 
892     *psubband_t2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
893                                                   ixheaacd_mult64(re, wre))) >>
894                               32);
895     *psubband_t2++ = (WORD32)(
896         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
897         32);
898 
899     re = *psubband1--;
900     im = *psubband++;
901     ;
902 
903     wim = *p_sin_cos++;
904     wre = *p_sin_cos++;
905 
906     *psubband1_t-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
907                                                   ixheaacd_mult64(re, wim))) >>
908                               32);
909     *psubband1_t-- = (WORD32)(
910         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
911         32);
912 
913     re = *psubband12--;
914     im = *psubband2++;
915     ;
916 
917     *psubband1_t2-- = (WORD32)(
918         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
919         32);
920     *psubband1_t2-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
921                                                    ixheaacd_mult64(re, wre))) >>
922                                32);
923   }
924 
925   if (M == 32) {
926     ixheaacd_esbr_radix4bfly(p_twiddle, subband_tmp, 1, 8);
927     ixheaacd_esbr_radix4bfly(p_twiddle + 48, subband_tmp, 4, 2);
928     ixheaacd_esbr_postradixcompute2(subband, subband_tmp, p_dig_rev_tbl, 32);
929 
930     ixheaacd_esbr_radix4bfly(p_twiddle, &subband_tmp[64], 1, 8);
931     ixheaacd_esbr_radix4bfly(p_twiddle + 48, &subband_tmp[64], 4, 2);
932     ixheaacd_esbr_postradixcompute2(&subband[64], &subband_tmp[64],
933                                     p_dig_rev_tbl, 32);
934 
935   }
936 
937   else if (M == 16) {
938     ixheaacd_esbr_radix4bfly(p_twiddle, subband_tmp, 1, 4);
939     ixheaacd_esbr_postradixcompute4(subband, subband_tmp, p_dig_rev_tbl, 16);
940 
941     ixheaacd_esbr_radix4bfly(p_twiddle, &subband_tmp[64], 1, 4);
942     ixheaacd_esbr_postradixcompute4(&subband[64], &subband_tmp[64],
943                                     p_dig_rev_tbl, 16);
944 
945   }
946 
947   else if (M == 12) {
948     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
949       temp[z] = subband_tmp[2 * z];
950       temp[12 + z] = subband_tmp[2 * z + 1];
951     }
952 
953     ixheaacd_complex_fft_p3(temp, &temp[12], 12, -1, &scaleshift);
954 
955     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
956       subband[2 * z] = temp[z];
957       subband[2 * z + 1] = temp[z + 12];
958     }
959     scaleshift = 0;
960     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
961       temp[z] = subband_tmp[64 + 2 * z];
962       temp[12 + z] = subband_tmp[64 + 2 * z + 1];
963     }
964 
965     ixheaacd_complex_fft_p3(temp, &temp[12], 12, -1, &scaleshift);
966 
967     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
968       subband[64 + 2 * z] = temp[z];
969       subband[64 + 2 * z + 1] = temp[z + 12];
970     }
971 
972   }
973 
974   else {
975     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
976       temp[z] = subband_tmp[2 * z];
977       temp[8 + z] = subband_tmp[2 * z + 1];
978     }
979 
980     (*ixheaacd_complex_fft_p2)(temp, &temp[8], 8, -1, &scaleshift);
981 
982     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
983       subband[2 * z] = ixheaacd_shl32_sat(temp[z], scaleshift);
984       subband[2 * z + 1] = ixheaacd_shl32_sat(temp[z + 8], scaleshift);
985     }
986     scaleshift = 0;
987     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
988       temp[z] = subband_tmp[64 + 2 * z];
989       temp[8 + z] = subband_tmp[64 + 2 * z + 1];
990     }
991 
992     (*ixheaacd_complex_fft_p2)(temp, &temp[8], 8, -1, &scaleshift);
993 
994     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
995       subband[64 + 2 * z] = ixheaacd_shl32_sat(temp[z], scaleshift);
996       subband[64 + 2 * z + 1] = ixheaacd_shl32_sat(temp[8 + z], scaleshift);
997     }
998   }
999 
1000   psubband = &subband[0];
1001   psubband1 = &subband[2 * M - 1];
1002 
1003   re = *psubband1;
1004 
1005   *psubband = *psubband >> 1;
1006   psubband++;
1007   *psubband1 = ixheaacd_negate32(*psubband >> 1);
1008   psubband1--;
1009 
1010   p_sin = qmf_bank->esbr_alt_sin_twiddle;
1011 
1012   wim = *p_sin++;
1013   wre = *p_sin++;
1014 
1015   im = *psubband1;
1016   ;
1017 
1018   *psubband1-- = (WORD32)(
1019       (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
1020       32);
1021   *psubband++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
1022                                              ixheaacd_mult64(re, wim))) >>
1023                          32);
1024 
1025   psubband2 = &subband[64];
1026   psubband12 = &subband[2 * M - 1 + 64];
1027 
1028   re = *psubband12;
1029   ;
1030 
1031   *psubband12-- = ixheaacd_negate32_sat(*psubband2 >> 1);
1032   ;
1033   *psubband2 = psubband2[1] >> 1;
1034   ;
1035   psubband2++;
1036 
1037   im = *psubband12;
1038   ;
1039 
1040   *psubband2++ = ixheaacd_negate32_sat((WORD32)(
1041       (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
1042       32));
1043   *psubband12-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re, wim),
1044                                                ixheaacd_mult64(im, wre))) >>
1045                            32);
1046 
1047   for (i = (M_2 - 2); i >= 0; i--) {
1048     im = psubband[0];
1049     ;
1050     re = psubband[1];
1051     ;
1052     re2 = *psubband1;
1053     ;
1054 
1055     *psubband++ = (WORD32)(
1056         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
1057         32);
1058     *psubband1-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
1059                                                 ixheaacd_mult64(re, wre))) >>
1060                             32);
1061 
1062     im = psubband2[0];
1063     ;
1064     re = psubband2[1];
1065     ;
1066     re3 = *psubband12;
1067     ;
1068 
1069     *psubband12-- = ixheaacd_negate32_sat((WORD32)(
1070         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
1071         32));
1072     *psubband2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re, wre),
1073                                                 ixheaacd_mult64(im, wim))) >>
1074                             32);
1075 
1076     wim = *p_sin++;
1077     wre = *p_sin++;
1078     im = psubband1[0];
1079     ;
1080 
1081     *psubband1-- = (WORD32)(
1082         (ixheaacd_add64(ixheaacd_mult64(re2, wre), ixheaacd_mult64(im, wim))) >>
1083         32);
1084     *psubband++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
1085                                                ixheaacd_mult64(re2, wim))) >>
1086                            32);
1087 
1088     im = psubband12[0];
1089     ;
1090 
1091     *psubband2++ = ixheaacd_negate32_sat((WORD32)(
1092         (ixheaacd_add64(ixheaacd_mult64(re3, wre), ixheaacd_mult64(im, wim))) >>
1093         32));
1094     *psubband12-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re3, wim),
1095                                                  ixheaacd_mult64(im, wre))) >>
1096                              32);
1097   }
1098 }
1099 
1100 VOID ixheaacd_esbr_fwd_modulation(
1101     const WORD32 *time_sample_buf, WORD32 *real_subband, WORD32 *imag_subband,
1102     ia_sbr_qmf_filter_bank_struct *qmf_bank,
1103     ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
1104   WORD32 i;
1105   const WORD32 *time_sample_buf1 =
1106       &time_sample_buf[2 * qmf_bank->no_channels - 1];
1107   WORD32 temp1, temp2;
1108   WORD32 *t_real_subband = real_subband;
1109   WORD32 *t_imag_subband = imag_subband;
1110   const WORD32 *tcos;
1111 
1112   for (i = qmf_bank->no_channels - 1; i >= 0; i--) {
1113     temp1 = ixheaacd_shr32(*time_sample_buf++, HQ_SHIFT_64);
1114     temp2 = ixheaacd_shr32(*time_sample_buf1--, HQ_SHIFT_64);
1115 
1116     *t_real_subband++ = ixheaacd_sub32_sat(temp1, temp2);
1117     ;
1118     *t_imag_subband++ = ixheaacd_add32_sat(temp1, temp2);
1119     ;
1120   }
1121 
1122   ixheaacd_esbr_cos_sin_mod(real_subband, qmf_bank,
1123                             qmf_dec_tables_ptr->esbr_w_16,
1124                             qmf_dec_tables_ptr->dig_rev_table4_16);
1125 
1126   tcos = qmf_bank->esbr_t_cos;
1127 
1128   for (i = (qmf_bank->usb - qmf_bank->lsb - 1); i >= 0; i--) {
1129     WORD32 cosh, sinh;
1130     WORD32 re, im;
1131 
1132     re = *real_subband;
1133     im = *imag_subband;
1134     cosh = *tcos++;
1135     sinh = *tcos++;
1136     *real_subband++ =
1137         ixheaacd_sat64_32((ixheaacd_add64(ixheaacd_mult64(re, cosh),
1138                                           ixheaacd_mult64(im, sinh))) >>
1139                           31);
1140     *imag_subband++ =
1141         ixheaacd_sat64_32((ixheaacd_sub64_sat(ixheaacd_mult64(im, cosh),
1142                                               ixheaacd_mult64(re, sinh))) >>
1143                           31);
1144   }
1145 }
1146 
1147 VOID ixheaacd_esbr_qmfsyn64_winadd(WORD32 *tmp1, WORD32 *tmp2, WORD32 *inp1,
1148                                    WORD32 *sample_buffer, WORD32 ch_fac) {
1149   WORD32 k;
1150 
1151   for (k = 0; k < 64; k++) {
1152     WORD64 syn_out = 0;
1153 
1154     syn_out =
1155         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[0 + k], inp1[k + 0]));
1156     syn_out =
1157         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[256 + k], inp1[k + 128]));
1158     syn_out =
1159         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[512 + k], inp1[k + 256]));
1160     syn_out =
1161         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[768 + k], inp1[k + 384]));
1162     syn_out =
1163         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[1024 + k], inp1[k + 512]));
1164 
1165     syn_out =
1166         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[128 + k], inp1[k + 64]));
1167     syn_out =
1168         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[384 + k], inp1[k + 192]));
1169     syn_out =
1170         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[640 + k], inp1[k + 320]));
1171     syn_out =
1172         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[896 + k], inp1[k + 448]));
1173     syn_out =
1174         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[1152 + k], inp1[k + 576]));
1175 
1176     sample_buffer[ch_fac * k] = ixheaacd_sat64_32(syn_out >> 31);
1177   }
1178 }
1179 
1180 VOID ixheaacd_shiftrountine(WORD32 *qmf_real, WORD32 *qmf_imag, WORD32 len,
1181                             WORD32 common_shift) {
1182   WORD32 treal, timag;
1183   WORD32 j;
1184 
1185   if (common_shift < 0) {
1186     WORD32 cshift = -common_shift;
1187     cshift = ixheaacd_min32(cshift, 31);
1188     for (j = len - 1; j >= 0; j--) {
1189       treal = *qmf_real;
1190       timag = *qmf_imag;
1191 
1192       treal = (ixheaacd_shr32(treal, cshift));
1193       timag = (ixheaacd_shr32(timag, cshift));
1194 
1195       *qmf_real++ = treal;
1196       *qmf_imag++ = timag;
1197     }
1198   } else {
1199     for (j = len - 1; j >= 0; j--) {
1200       treal = (ixheaacd_shl32_sat(*qmf_real, common_shift));
1201       timag = (ixheaacd_shl32_sat(*qmf_imag, common_shift));
1202       *qmf_real++ = treal;
1203       *qmf_imag++ = timag;
1204     }
1205   }
1206 }
1207 
1208 VOID ixheaacd_shiftrountine_with_rnd_hq(WORD32 *qmf_real, WORD32 *qmf_imag,
1209                                         WORD32 *filter_states, WORD32 len,
1210                                         WORD32 shift) {
1211   WORD32 *filter_states_rev = filter_states + len;
1212   WORD32 treal, timag;
1213   WORD32 j;
1214 
1215   for (j = (len - 1); j >= 0; j -= 2) {
1216     WORD32 r1, r2, i1, i2;
1217     i2 = qmf_imag[j];
1218     r2 = qmf_real[j];
1219     r1 = *qmf_real++;
1220     i1 = *qmf_imag++;
1221 
1222     timag = ixheaacd_add32_sat(i1, r1);
1223     timag = (ixheaacd_shl32_sat(timag, shift));
1224     filter_states_rev[j] = timag;
1225 
1226     treal = ixheaacd_sub32_sat(i2, r2);
1227     treal = (ixheaacd_shl32_sat(treal, shift));
1228     filter_states[j] = treal;
1229 
1230     treal = ixheaacd_sub32_sat(i1, r1);
1231     treal = (ixheaacd_shl32_sat(treal, shift));
1232     *filter_states++ = treal;
1233 
1234     timag = ixheaacd_add32_sat(i2, r2);
1235     timag = (ixheaacd_shl32_sat(timag, shift));
1236     *filter_states_rev++ = timag;
1237   }
1238 }
1239 
1240 VOID ixheaacd_radix4bfly(const WORD16 *w, WORD32 *x, WORD32 index1,
1241                          WORD32 index) {
1242   int i;
1243   WORD32 l1, l2, h2, fft_jmp;
1244   WORD32 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
1245   WORD32 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
1246   WORD32 x_0, x_1, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
1247   WORD32 x_h2_0, x_h2_1;
1248   WORD16 si10, si20, si30, co10, co20, co30;
1249 
1250   WORD32 mul_1, mul_2, mul_3, mul_4, mul_5, mul_6;
1251   WORD32 mul_7, mul_8, mul_9, mul_10, mul_11, mul_12;
1252   WORD32 *x_l1;
1253   WORD32 *x_l2;
1254   WORD32 *x_h2;
1255   const WORD16 *w_ptr = w;
1256   WORD32 i1;
1257 
1258   h2 = index << 1;
1259   l1 = index << 2;
1260   l2 = (index << 2) + (index << 1);
1261 
1262   x_l1 = &(x[l1]);
1263   x_l2 = &(x[l2]);
1264   x_h2 = &(x[h2]);
1265 
1266   fft_jmp = 6 * (index);
1267 
1268   for (i1 = 0; i1 < index1; i1++) {
1269     for (i = 0; i < index; i++) {
1270       si10 = (*w_ptr++);
1271       co10 = (*w_ptr++);
1272       si20 = (*w_ptr++);
1273       co20 = (*w_ptr++);
1274       si30 = (*w_ptr++);
1275       co30 = (*w_ptr++);
1276 
1277       x_0 = x[0];
1278       x_h2_0 = x[h2];
1279       x_l1_0 = x[l1];
1280       x_l2_0 = x[l2];
1281 
1282       xh0_0 = ixheaacd_add32_sat(x_0, x_l1_0);
1283       xl0_0 = ixheaacd_sub32_sat(x_0, x_l1_0);
1284 
1285       xh20_0 = ixheaacd_add32_sat(x_h2_0, x_l2_0);
1286       xl20_0 = ixheaacd_sub32_sat(x_h2_0, x_l2_0);
1287 
1288       x[0] = ixheaacd_add32_sat(xh0_0, xh20_0);
1289       xt0_0 = ixheaacd_sub32_sat(xh0_0, xh20_0);
1290 
1291       x_1 = x[1];
1292       x_h2_1 = x[h2 + 1];
1293       x_l1_1 = x[l1 + 1];
1294       x_l2_1 = x[l2 + 1];
1295 
1296       xh1_0 = ixheaacd_add32_sat(x_1, x_l1_1);
1297       xl1_0 = ixheaacd_sub32_sat(x_1, x_l1_1);
1298 
1299       xh21_0 = ixheaacd_add32_sat(x_h2_1, x_l2_1);
1300       xl21_0 = ixheaacd_sub32_sat(x_h2_1, x_l2_1);
1301 
1302       x[1] = ixheaacd_add32_sat(xh1_0, xh21_0);
1303       yt0_0 = ixheaacd_sub32_sat(xh1_0, xh21_0);
1304 
1305       xt1_0 = ixheaacd_add32_sat(xl0_0, xl21_0);
1306       xt2_0 = ixheaacd_sub32_sat(xl0_0, xl21_0);
1307 
1308       yt2_0 = ixheaacd_add32_sat(xl1_0, xl20_0);
1309       yt1_0 = ixheaacd_sub32_sat(xl1_0, xl20_0);
1310 
1311       mul_11 = ixheaacd_mult32x16in32(xt2_0, co30);
1312       mul_3 = ixheaacd_mult32x16in32(yt2_0, si30);
1313       x[l2] = ixheaacd_shl32_sat((mul_3 + mul_11), RADIXSHIFT);
1314 
1315       mul_5 = ixheaacd_mult32x16in32(xt2_0, si30);
1316       mul_9 = ixheaacd_mult32x16in32(yt2_0, co30);
1317       x[l2 + 1] = ixheaacd_shl32_sat((mul_9 - mul_5), RADIXSHIFT);
1318 
1319       mul_12 = ixheaacd_mult32x16in32(xt0_0, co20);
1320       mul_2 = ixheaacd_mult32x16in32(yt0_0, si20);
1321       x[l1] = ixheaacd_shl32_sat((mul_2 + mul_12), RADIXSHIFT);
1322 
1323       mul_6 = ixheaacd_mult32x16in32(xt0_0, si20);
1324       mul_8 = ixheaacd_mult32x16in32(yt0_0, co20);
1325       x[l1 + 1] = ixheaacd_shl32_sat((mul_8 - mul_6), RADIXSHIFT);
1326 
1327       mul_4 = ixheaacd_mult32x16in32(xt1_0, co10);
1328       mul_1 = ixheaacd_mult32x16in32(yt1_0, si10);
1329       x[h2] = ixheaacd_shl32_sat((mul_1 + mul_4), RADIXSHIFT);
1330 
1331       mul_10 = ixheaacd_mult32x16in32(xt1_0, si10);
1332       mul_7 = ixheaacd_mult32x16in32(yt1_0, co10);
1333       x[h2 + 1] = ixheaacd_shl32_sat((mul_7 - mul_10), RADIXSHIFT);
1334 
1335       x += 2;
1336     }
1337     x += fft_jmp;
1338     w_ptr = w_ptr - fft_jmp;
1339   }
1340 }