1 /*
2  ** Copyright 2003-2010, VisualOn, Inc.
3  **
4  ** Licensed under the Apache License, Version 2.0 (the "License");
5  ** you may not use this file except in compliance with the License.
6  ** You may obtain a copy of the License at
7  **
8  **     http://www.apache.org/licenses/LICENSE-2.0
9  **
10  ** Unless required by applicable law or agreed to in writing, software
11  ** distributed under the License is distributed on an "AS IS" BASIS,
12  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  ** See the License for the specific language governing permissions and
14  ** limitations under the License.
15  */
16 
17 /***********************************************************************
18 *      File: c4t64fx.c                                                 *
19 *                                                                      *
20 *      Description:Performs algebraic codebook search for higher modes *
21 *                                                                      *
22 ************************************************************************/
23 
24 /************************************************************************
25 * Function: ACELP_4t64_fx()                                             *
26 *                                                                       *
27 * 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook.                   *
28 * 4 tracks x 16 positions per track = 64 samples.                       *
29 *                                                                       *
30 * 20 bits --> 4 pulses in a frame of 64 samples.                        *
31 * 36 bits --> 8 pulses in a frame of 64 samples.                        *
32 * 44 bits --> 10 pulses in a frame of 64 samples.                       *
33 * 52 bits --> 12 pulses in a frame of 64 samples.                       *
34 * 64 bits --> 16 pulses in a frame of 64 samples.                       *
35 * 72 bits --> 18 pulses in a frame of 64 samples.                       *
36 * 88 bits --> 24 pulses in a frame of 64 samples.                       *
37 *                                                                       *
38 * All pulses can have two (2) possible amplitudes: +1 or -1.            *
39 * Each pulse can have sixteen (16) possible positions.                  *
40 *************************************************************************/
41 
42 #include "typedef.h"
43 #include "basic_op.h"
44 #include "math_op.h"
45 #include "acelp.h"
46 #include "cnst.h"
47 
48 #include "q_pulse.h"
49 
50 #undef LOG_TAG
51 #define LOG_TAG "amrwbenc"
52 #include "log/log.h"
53 
54 static Word16 tipos[36] = {
55     0, 1, 2, 3,                            /* starting point &ipos[0], 1st iter */
56     1, 2, 3, 0,                            /* starting point &ipos[4], 2nd iter */
57     2, 3, 0, 1,                            /* starting point &ipos[8], 3rd iter */
58     3, 0, 1, 2,                            /* starting point &ipos[12], 4th iter */
59     0, 1, 2, 3,
60     1, 2, 3, 0,
61     2, 3, 0, 1,
62     3, 0, 1, 2,
63     0, 1, 2, 3};                           /* end point for 24 pulses &ipos[35], 4th iter */
64 
65 #define NB_PULSE_MAX  24
66 
67 #define L_SUBFR   64
68 #define NB_TRACK  4
69 #define STEP      4
70 #define NB_POS    16
71 #define MSIZE     256
72 #define NB_MAX    8
73 #define NPMAXPT   ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK)
74 
75 /* Private functions */
76 void cor_h_vec_012(
77         Word16 h[],                           /* (i) scaled impulse response                 */
78         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
79         Word16 track,                         /* (i) track to use                            */
80         Word16 sign[],                        /* (i) sign vector                             */
81         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
82         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
83         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
84         );
85 
86 void cor_h_vec_012_asm(
87         Word16 h[],                           /* (i) scaled impulse response                 */
88         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
89         Word16 track,                         /* (i) track to use                            */
90         Word16 sign[],                        /* (i) sign vector                             */
91         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
92         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
93         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
94         );
95 
96 void cor_h_vec_30(
97         Word16 h[],                           /* (i) scaled impulse response                 */
98         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
99         Word16 track,                         /* (i) track to use                            */
100         Word16 sign[],                        /* (i) sign vector                             */
101         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
102         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
103         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
104         );
105 
106 void search_ixiy(
107         Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
108         Word16 track_x,                       /* (i) track of pulse 1                   */
109         Word16 track_y,                       /* (i) track of pulse 2                   */
110         Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
111         Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
112         Word16 * ix,                          /* (o) position of pulse 1                */
113         Word16 * iy,                          /* (o) position of pulse 2                */
114         Word16 dn[],                          /* (i) corr. between target and h[]       */
115         Word16 dn2[],                         /* (i) vector of selected positions       */
116         Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
117         Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
118         Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
119         );
120 
121 
ACELP_4t64_fx(Word16 dn[],Word16 cn[],Word16 H[],Word16 code[],Word16 y[],Word16 nbbits,Word16 ser_size,Word16 _index[])122 void ACELP_4t64_fx(
123         Word16 dn[],                          /* (i) <12b : correlation between target x[] and H[]      */
124         Word16 cn[],                          /* (i) <12b : residual after long term prediction         */
125         Word16 H[],                           /* (i) Q12: impulse response of weighted synthesis filter */
126         Word16 code[],                        /* (o) Q9 : algebraic (fixed) codebook excitation         */
127         Word16 y[],                           /* (o) Q9 : filtered fixed codebook excitation            */
128         Word16 nbbits,                        /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits                */
129         Word16 ser_size,                      /* (i) : bit rate                                         */
130         Word16 _index[]                       /* (o) : index (20): 5+5+5+5 = 20 bits.                   */
131         /* (o) : index (36): 9+9+9+9 = 36 bits.                   */
132         /* (o) : index (44): 13+9+13+9 = 44 bits.                 */
133         /* (o) : index (52): 13+13+13+13 = 52 bits.               */
134         /* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits.       */
135         /* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits.     */
136         /* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits.   */
137         )
138 {
139     Word32 i, j, k;
140     Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp;
141     Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp;
142     Word16 *p0, *p1, *p2, *p3, *psign;
143     Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift;
144     Word32 s, cor, L_tmp, L_index;
145     Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR];
146     Word16 ind[NPMAXPT * NB_TRACK];
147     Word16 codvec[NB_PULSE_MAX], nbpos[10];
148     Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK];
149     Word16 h_buf[4 * L_SUBFR];
150     Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE];
151     Word16 ipos[NB_PULSE_MAX];
152 
153     switch (nbbits)
154     {
155         case 20:                               /* 20 bits, 4 pulses, 4 tracks */
156             nbiter = 4;                          /* 4x16x16=1024 loop */
157             alp = 8192;                          /* alp = 2.0 (Q12) */
158             nb_pulse = 4;
159             nbpos[0] = 4;
160             nbpos[1] = 8;
161             break;
162         case 36:                               /* 36 bits, 8 pulses, 4 tracks */
163             nbiter = 4;                          /* 4x20x16=1280 loop */
164             alp = 4096;                          /* alp = 1.0 (Q12) */
165             nb_pulse = 8;
166             nbpos[0] = 4;
167             nbpos[1] = 8;
168             nbpos[2] = 8;
169             break;
170         case 44:                               /* 44 bits, 10 pulses, 4 tracks */
171             nbiter = 4;                          /* 4x26x16=1664 loop */
172             alp = 4096;                          /* alp = 1.0 (Q12) */
173             nb_pulse = 10;
174             nbpos[0] = 4;
175             nbpos[1] = 6;
176             nbpos[2] = 8;
177             nbpos[3] = 8;
178             break;
179         case 52:                               /* 52 bits, 12 pulses, 4 tracks */
180             nbiter = 4;                          /* 4x26x16=1664 loop */
181             alp = 4096;                          /* alp = 1.0 (Q12) */
182             nb_pulse = 12;
183             nbpos[0] = 4;
184             nbpos[1] = 6;
185             nbpos[2] = 8;
186             nbpos[3] = 8;
187             break;
188         case 64:                               /* 64 bits, 16 pulses, 4 tracks */
189             nbiter = 3;                          /* 3x36x16=1728 loop */
190             alp = 3277;                          /* alp = 0.8 (Q12) */
191             nb_pulse = 16;
192             nbpos[0] = 4;
193             nbpos[1] = 4;
194             nbpos[2] = 6;
195             nbpos[3] = 6;
196             nbpos[4] = 8;
197             nbpos[5] = 8;
198             break;
199         case 72:                               /* 72 bits, 18 pulses, 4 tracks */
200             nbiter = 3;                          /* 3x35x16=1680 loop */
201             alp = 3072;                          /* alp = 0.75 (Q12) */
202             nb_pulse = 18;
203             nbpos[0] = 2;
204             nbpos[1] = 3;
205             nbpos[2] = 4;
206             nbpos[3] = 5;
207             nbpos[4] = 6;
208             nbpos[5] = 7;
209             nbpos[6] = 8;
210             break;
211         case 88:                               /* 88 bits, 24 pulses, 4 tracks */
212             if(ser_size > 462)
213                 nbiter = 1;
214             else
215                 nbiter = 2;                    /* 2x53x16=1696 loop */
216 
217             alp = 2048;                          /* alp = 0.5 (Q12) */
218             nb_pulse = 24;
219             nbpos[0] = 2;
220             nbpos[1] = 2;
221             nbpos[2] = 3;
222             nbpos[3] = 4;
223             nbpos[4] = 5;
224             nbpos[5] = 6;
225             nbpos[6] = 7;
226             nbpos[7] = 8;
227             nbpos[8] = 8;
228             nbpos[9] = 8;
229             break;
230         default:
231             nbiter = 0;
232             alp = 0;
233             nb_pulse = 0;
234     }
235 
236     for (i = 0; i < nb_pulse; i++)
237     {
238         codvec[i] = i;
239     }
240 
241     /*----------------------------------------------------------------*
242      * Find sign for each pulse position.                             *
243      *----------------------------------------------------------------*/
244     /* calculate energy for normalization of cn[] and dn[] */
245     /* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */
246 #ifdef ASM_OPT                  /* asm optimization branch */
247     s = Dot_product12_asm(cn, cn, L_SUBFR, &exp);
248 #else
249     s = Dot_product12(cn, cn, L_SUBFR, &exp);
250 #endif
251 
252     Isqrt_n(&s, &exp);
253     s = L_shl(s, (exp + 5));
254     k_cn = extract_h(L_add(s, 0x8000));
255 
256     /* set k_dn = 32..512 (ener_dn = 2^30..2^22) */
257 #ifdef ASM_OPT                      /* asm optimization branch */
258     s = Dot_product12_asm(dn, dn, L_SUBFR, &exp);
259 #else
260     s = Dot_product12(dn, dn, L_SUBFR, &exp);
261 #endif
262 
263     Isqrt_n(&s, &exp);
264     k_dn = voround(L_shl(s, (exp + 5 + 3)));    /* k_dn = 256..4096 */
265     k_dn = vo_mult_r(alp, k_dn);              /* alp in Q12 */
266 
267     /* mix normalized cn[] and dn[] */
268     p0 = cn;
269     p1 = dn;
270     p2 = dn2;
271 
272     for (i = 0; i < L_SUBFR/4; i++)
273     {
274         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
275         *p2++ = s >> 7;
276         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
277         *p2++ = s >> 7;
278         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
279         *p2++ = s >> 7;
280         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
281         *p2++ = s >> 7;
282     }
283 
284     /* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[]    */
285     for(i = 0; i < L_SUBFR; i++)
286     {
287         val = dn[i];
288         ps = dn2[i];
289         if (ps >= 0)
290         {
291             sign[i] = 32767;             /* sign = +1 (Q12) */
292             vec[i] = -32768;
293         } else
294         {
295             sign[i] = -32768;            /* sign = -1 (Q12) */
296             vec[i] = 32767;
297             dn[i] = -val;
298             dn2[i] = -ps;
299         }
300     }
301     /*----------------------------------------------------------------*
302      * Select NB_MAX position per track according to max of dn2[].    *
303      *----------------------------------------------------------------*/
304     pos = 0;
305     for (i = 0; i < NB_TRACK; i++)
306     {
307         for (k = 0; k < NB_MAX; k++)
308         {
309             ps = -1;
310             for (j = i; j < L_SUBFR; j += STEP)
311             {
312                 if(dn2[j] > ps)
313                 {
314                     ps = dn2[j];
315                     pos = j;
316                 }
317             }
318             dn2[pos] = (k - NB_MAX);     /* dn2 < 0 when position is selected */
319             if (k == 0)
320             {
321                 pos_max[i] = pos;
322             }
323         }
324     }
325 
326     /*--------------------------------------------------------------*
327      * Scale h[] to avoid overflow and to get maximum of precision  *
328      * on correlation.                                              *
329      *                                                              *
330      * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16).         *
331      *  ==> This allow addition of 16 pulses without saturation.    *
332      *                                                              *
333      * Energy worst case (on resonant impulse response),            *
334      * - energy of h[] is approximately MAX/16.                     *
335      * - During search, the energy is divided by 8 to avoid         *
336      *   overflow on "alp". (energy of h[] = MAX/128).              *
337      *  ==> "alp" worst case detected is 22854 on sinusoidal wave.  *
338      *--------------------------------------------------------------*/
339 
340     /* impulse response buffer for fast computation */
341 
342     h = h_buf;
343     h_inv = h_buf + (2 * L_SUBFR);
344     L_tmp = 0;
345     for (i = 0; i < L_SUBFR; i++)
346     {
347         *h++ = 0;
348         *h_inv++ = 0;
349         L_tmp = L_add(L_tmp, (H[i] * H[i]) << 1);
350     }
351     /* scale h[] down (/2) when energy of h[] is high with many pulses used */
352     val = extract_h(L_tmp);
353     h_shift = 0;
354 
355     if ((nb_pulse >= 12) && (val > 1024))
356     {
357         h_shift = 1;
358     }
359     p0 = H;
360     p1 = h;
361     p2 = h_inv;
362 
363     for (i = 0; i < L_SUBFR/4; i++)
364     {
365         *p1 = *p0++ >> h_shift;
366         *p2++ = -(*p1++);
367         *p1 = *p0++ >> h_shift;
368         *p2++ = -(*p1++);
369         *p1 = *p0++ >> h_shift;
370         *p2++ = -(*p1++);
371         *p1 = *p0++ >> h_shift;
372         *p2++ = -(*p1++);
373     }
374 
375     /*------------------------------------------------------------*
376      * Compute rrixix[][] needed for the codebook search.         *
377      * This algorithm compute impulse response energy of all      *
378      * positions (16) in each track (4).       Total = 4x16 = 64. *
379      *------------------------------------------------------------*/
380 
381     /* storage order --> i3i3, i2i2, i1i1, i0i0 */
382 
383     /* Init pointers to last position of rrixix[] */
384     p0 = &rrixix[0][NB_POS - 1];
385     p1 = &rrixix[1][NB_POS - 1];
386     p2 = &rrixix[2][NB_POS - 1];
387     p3 = &rrixix[3][NB_POS - 1];
388 
389     ptr_h1 = h;
390     cor = 0x00008000L;                             /* for rounding */
391     for (i = 0; i < NB_POS; i++)
392     {
393         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
394         ptr_h1++;
395         *p3-- = extract_h(cor);
396         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
397         ptr_h1++;
398         *p2-- = extract_h(cor);
399         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
400         ptr_h1++;
401         *p1-- = extract_h(cor);
402         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
403         ptr_h1++;
404         *p0-- = extract_h(cor);
405     }
406 
407     /*------------------------------------------------------------*
408      * Compute rrixiy[][] needed for the codebook search.         *
409      * This algorithm compute correlation between 2 pulses        *
410      * (2 impulses responses) in 4 possible adjacents tracks.     *
411      * (track 0-1, 1-2, 2-3 and 3-0).     Total = 4x16x16 = 1024. *
412      *------------------------------------------------------------*/
413 
414     /* storage order --> i2i3, i1i2, i0i1, i3i0 */
415 
416     pos = MSIZE - 1;
417     ptr_hf = h + 1;
418 
419     for (k = 0; k < NB_POS; k++)
420     {
421         p3 = &rrixiy[2][pos];
422         p2 = &rrixiy[1][pos];
423         p1 = &rrixiy[0][pos];
424         p0 = &rrixiy[3][pos - NB_POS];
425 
426         cor = 0x00008000L;                   /* for rounding */
427         ptr_h1 = h;
428         ptr_h2 = ptr_hf;
429 
430         for (i = k + 1; i < NB_POS; i++)
431         {
432             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
433             ptr_h1++;
434             ptr_h2++;
435             *p3 = extract_h(cor);
436             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
437             ptr_h1++;
438             ptr_h2++;
439             *p2 = extract_h(cor);
440             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
441             ptr_h1++;
442             ptr_h2++;
443             *p1 = extract_h(cor);
444             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
445             ptr_h1++;
446             ptr_h2++;
447             *p0 = extract_h(cor);
448 
449             p3 -= (NB_POS + 1);
450             p2 -= (NB_POS + 1);
451             p1 -= (NB_POS + 1);
452             p0 -= (NB_POS + 1);
453         }
454         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
455         ptr_h1++;
456         ptr_h2++;
457         *p3 = extract_h(cor);
458         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
459         ptr_h1++;
460         ptr_h2++;
461         *p2 = extract_h(cor);
462         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
463         ptr_h1++;
464         ptr_h2++;
465         *p1 = extract_h(cor);
466 
467         pos -= NB_POS;
468         ptr_hf += STEP;
469     }
470 
471     /* storage order --> i3i0, i2i3, i1i2, i0i1 */
472 
473     pos = MSIZE - 1;
474     ptr_hf = h + 3;
475 
476     for (k = 0; k < NB_POS; k++)
477     {
478         p3 = &rrixiy[3][pos];
479         p2 = &rrixiy[2][pos - 1];
480         p1 = &rrixiy[1][pos - 1];
481         p0 = &rrixiy[0][pos - 1];
482 
483         cor = 0x00008000L;                              /* for rounding */
484         ptr_h1 = h;
485         ptr_h2 = ptr_hf;
486 
487         for (i = k + 1; i < NB_POS; i++)
488         {
489             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
490             ptr_h1++;
491             ptr_h2++;
492             *p3 = extract_h(cor);
493             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
494             ptr_h1++;
495             ptr_h2++;
496             *p2 = extract_h(cor);
497             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
498             ptr_h1++;
499             ptr_h2++;
500             *p1 = extract_h(cor);
501             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
502             ptr_h1++;
503             ptr_h2++;
504             *p0 = extract_h(cor);
505 
506             p3 -= (NB_POS + 1);
507             p2 -= (NB_POS + 1);
508             p1 -= (NB_POS + 1);
509             p0 -= (NB_POS + 1);
510         }
511         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
512         ptr_h1++;
513         ptr_h2++;
514         *p3 = extract_h(cor);
515 
516         pos--;
517         ptr_hf += STEP;
518     }
519 
520     /*------------------------------------------------------------*
521      * Modification of rrixiy[][] to take signs into account.     *
522      *------------------------------------------------------------*/
523 
524     p0 = &rrixiy[0][0];
525 
526     for (k = 0; k < NB_TRACK; k++)
527     {
528         j_temp = (k + 1)&0x03;
529         for (i = k; i < L_SUBFR; i += STEP)
530         {
531             psign = sign;
532             if (psign[i] < 0)
533             {
534                 psign = vec;
535             }
536             j = j_temp;
537             for (; j < L_SUBFR; j += STEP)
538             {
539                 *p0 = vo_mult(*p0, psign[j]);
540                 p0++;
541             }
542         }
543     }
544 
545     /*-------------------------------------------------------------------*
546      *                       Deep first search                           *
547      *-------------------------------------------------------------------*/
548 
549     psk = -1;
550     alpk = 1;
551 
552     for (k = 0; k < nbiter; k++)
553     {
554         j_temp = k<<2;
555         for (i = 0; i < nb_pulse; i++)
556             ipos[i] = tipos[j_temp + i];
557 
558         if(nbbits == 20)
559         {
560             pos = 0;
561             ps = 0;
562             alp = 0;
563             for (i = 0; i < L_SUBFR; i++)
564             {
565                 vec[i] = 0;
566             }
567         } else if ((nbbits == 36) || (nbbits == 44))
568         {
569             /* first stage: fix 2 pulses */
570             pos = 2;
571 
572             ix = ind[0] = pos_max[ipos[0]];
573             iy = ind[1] = pos_max[ipos[1]];
574             ps = dn[ix] + dn[iy];
575             i = ix >> 2;                /* ix / STEP */
576             j = iy >> 2;                /* iy / STEP */
577             s = rrixix[ipos[0]][i] << 13;
578             s += rrixix[ipos[1]][j] << 13;
579             i = (i << 4) + j;         /* (ix/STEP)*NB_POS + (iy/STEP) */
580             s += rrixiy[ipos[0]][i] << 14;
581             alp = (s + 0x8000) >> 16;
582             if (sign[ix] < 0)
583                 p0 = h_inv - ix;
584             else
585                 p0 = h - ix;
586             if (sign[iy] < 0)
587                 p1 = h_inv - iy;
588             else
589                 p1 = h - iy;
590 
591             for (i = 0; i < L_SUBFR; i++)
592             {
593                 vec[i] = (*p0++) + (*p1++);
594             }
595 
596             if(nbbits == 44)
597             {
598                 ipos[8] = 0;
599                 ipos[9] = 1;
600             }
601         } else
602         {
603             /* first stage: fix 4 pulses */
604             pos = 4;
605 
606             ix = ind[0] = pos_max[ipos[0]];
607             iy = ind[1] = pos_max[ipos[1]];
608             i = ind[2] = pos_max[ipos[2]];
609             j = ind[3] = pos_max[ipos[3]];
610             ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]);
611 
612             if (sign[ix] < 0)
613                 p0 = h_inv - ix;
614             else
615                 p0 = h - ix;
616 
617             if (sign[iy] < 0)
618                 p1 = h_inv - iy;
619             else
620                 p1 = h - iy;
621 
622             if (sign[i] < 0)
623                 p2 = h_inv - i;
624             else
625                 p2 = h - i;
626 
627             if (sign[j] < 0)
628                 p3 = h_inv - j;
629             else
630                 p3 = h - j;
631 
632             L_tmp = 0L;
633             for(i = 0; i < L_SUBFR; i++)
634             {
635                 Word32 vecSq2;
636                 vec[i]  = add1(add1(add1(*p0++, *p1++), *p2++), *p3++);
637                 vecSq2 = (vec[i] * vec[i]) << 1;
638                 if (vecSq2 > 0 && L_tmp > INT_MAX - vecSq2) {
639                     L_tmp = INT_MAX;
640                 } else if (vecSq2 < 0 && L_tmp < INT_MIN - vecSq2) {
641                     L_tmp = INT_MIN;
642                 } else {
643                     L_tmp  += vecSq2;
644                 }
645             }
646 
647             alp = ((L_tmp >> 3) + 0x8000) >> 16;
648 
649             if(nbbits == 72)
650             {
651                 ipos[16] = 0;
652                 ipos[17] = 1;
653             }
654         }
655 
656         /* other stages of 2 pulses */
657 
658         for (j = pos, st = 0; j < nb_pulse; j += 2, st++)
659         {
660             /*--------------------------------------------------*
661              * Calculate correlation of all possible positions  *
662              * of the next 2 pulses with previous fixed pulses. *
663              * Each pulse can have 16 possible positions.       *
664              *--------------------------------------------------*/
665             if(ipos[j] == 3)
666             {
667                 cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
668             }
669             else
670             {
671 #ifdef ASM_OPT                 /* asm optimization branch */
672                 cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
673 #else
674                 cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
675 #endif
676             }
677             /*--------------------------------------------------*
678              * Find best positions of 2 pulses.                 *
679              *--------------------------------------------------*/
680             search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp,
681                     &ix, &iy, dn, dn2, cor_x, cor_y, rrixiy);
682 
683             ind[j] = ix;
684             ind[j + 1] = iy;
685 
686             if (sign[ix] < 0)
687                 p0 = h_inv - ix;
688             else
689                 p0 = h - ix;
690             if (sign[iy] < 0)
691                 p1 = h_inv - iy;
692             else
693                 p1 = h - iy;
694 
695             for (i = 0; i < L_SUBFR; i+=4)
696             {
697                 vec[i]   += add1((*p0++), (*p1++));
698                 vec[i+1] += add1((*p0++), (*p1++));
699                 vec[i+2] += add1((*p0++), (*p1++));
700                 vec[i+3] += add1((*p0++), (*p1++));
701             }
702         }
703         /* memorise the best codevector */
704         ps = vo_mult(ps, ps);
705         s = L_sub(vo_L_mult(alpk, ps), vo_L_mult(psk, alp));
706         if (s > 0)
707         {
708             psk = ps;
709             alpk = alp;
710             for (i = 0; i < nb_pulse; i++)
711             {
712                 codvec[i] = ind[i];
713             }
714             for (i = 0; i < L_SUBFR; i++)
715             {
716                 y[i] = vec[i];
717             }
718         }
719     }
720     /*-------------------------------------------------------------------*
721      * Build the codeword, the filtered codeword and index of codevector.*
722      *-------------------------------------------------------------------*/
723     for (i = 0; i < NPMAXPT * NB_TRACK; i++)
724     {
725         ind[i] = -1;
726     }
727     for (i = 0; i < L_SUBFR; i++)
728     {
729         code[i] = 0;
730         y[i] = vo_shr_r(y[i], 3);               /* Q12 to Q9 */
731     }
732     val = (512 >> h_shift);               /* codeword in Q9 format */
733     for (k = 0; k < nb_pulse; k++)
734     {
735         i = codvec[k];                       /* read pulse position */
736         j = sign[i];                         /* read sign           */
737         index = i >> 2;                 /* index = pos of pulse (0..15) */
738         track = (Word16) (i & 0x03);         /* track = i % NB_TRACK (0..3)  */
739 
740         if (j > 0)
741         {
742             code[i] += val;
743             codvec[k] += 128;
744         } else
745         {
746             code[i] -= val;
747             index += NB_POS;
748         }
749 
750         i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1));
751 
752         while (i < NPMAXPT * NB_TRACK && ind[i] >= 0)
753         {
754             i += 1;
755         }
756         if (i < NPMAXPT * NB_TRACK) {
757             ind[i] = index;
758         } else {
759             ALOGE("b/132647222, OOB access in ind array track=%d i=%d", track, i);
760             android_errorWriteLog(0x534e4554, "132647222");
761         }
762     }
763 
764     k = 0;
765     /* Build index of codevector */
766     if(nbbits == 20)
767     {
768         for (track = 0; track < NB_TRACK; track++)
769         {
770             _index[track] = (Word16)(quant_1p_N1(ind[k], 4));
771             k += NPMAXPT;
772         }
773     } else if(nbbits == 36)
774     {
775         for (track = 0; track < NB_TRACK; track++)
776         {
777             _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
778             k += NPMAXPT;
779         }
780     } else if(nbbits == 44)
781     {
782         for (track = 0; track < NB_TRACK - 2; track++)
783         {
784             _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
785             k += NPMAXPT;
786         }
787         for (track = 2; track < NB_TRACK; track++)
788         {
789             _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
790             k += NPMAXPT;
791         }
792     } else if(nbbits == 52)
793     {
794         for (track = 0; track < NB_TRACK; track++)
795         {
796             _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
797             k += NPMAXPT;
798         }
799     } else if(nbbits == 64)
800     {
801         for (track = 0; track < NB_TRACK; track++)
802         {
803             L_index = quant_4p_4N(&ind[k], 4);
804             _index[track] = (Word16)((L_index >> 14) & 3);
805             _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
806             k += NPMAXPT;
807         }
808     } else if(nbbits == 72)
809     {
810         for (track = 0; track < NB_TRACK - 2; track++)
811         {
812             L_index = quant_5p_5N(&ind[k], 4);
813             _index[track] = (Word16)((L_index >> 10) & 0x03FF);
814             _index[track + NB_TRACK] = (Word16)(L_index & 0x03FF);
815             k += NPMAXPT;
816         }
817         for (track = 2; track < NB_TRACK; track++)
818         {
819             L_index = quant_4p_4N(&ind[k], 4);
820             _index[track] = (Word16)((L_index >> 14) & 3);
821             _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
822             k += NPMAXPT;
823         }
824     } else if(nbbits == 88)
825     {
826         for (track = 0; track < NB_TRACK; track++)
827         {
828             L_index = quant_6p_6N_2(&ind[k], 4);
829             _index[track] = (Word16)((L_index >> 11) & 0x07FF);
830             _index[track + NB_TRACK] = (Word16)(L_index & 0x07FF);
831             k += NPMAXPT;
832         }
833     }
834     return;
835 }
836 
837 
838 /*-------------------------------------------------------------------*
839  * Function  cor_h_vec()                                             *
840  * ~~~~~~~~~~~~~~~~~~~~~                                             *
841  * Compute correlations of h[] with vec[] for the specified track.   *
842  *-------------------------------------------------------------------*/
cor_h_vec_30(Word16 h[],Word16 vec[],Word16 track,Word16 sign[],Word16 rrixix[][NB_POS],Word16 cor_1[],Word16 cor_2[])843 void cor_h_vec_30(
844         Word16 h[],                           /* (i) scaled impulse response                 */
845         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
846         Word16 track,                         /* (i) track to use                            */
847         Word16 sign[],                        /* (i) sign vector                             */
848         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
849         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
850         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
851         )
852 {
853     Word32 i, j, pos, corr;
854     Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
855     Word32 L_sum1,L_sum2;
856     cor_x = cor_1;
857     cor_y = cor_2;
858     p0 = rrixix[track];
859     p3 = rrixix[0];
860     pos = track;
861 
862     for (i = 0; i < NB_POS; i+=2)
863     {
864         L_sum1 = L_sum2 = 0L;
865         p1 = h;
866         p2 = &vec[pos];
867         for (j=pos;j < L_SUBFR; j++)
868         {
869             L_sum1 = L_add(L_sum1, *p1 * *p2);
870             p2-=3;
871             L_sum2 = L_add(L_sum2, *p1++ * *p2);
872             p2+=4;
873         }
874         p2-=3;
875         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
876         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
877         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
878 
879         L_sum1 = L_shl(L_sum1, 2);
880         L_sum2 = L_shl(L_sum2, 2);
881 
882         corr = voround(L_sum1);
883         *cor_x++ = mult(corr, sign[pos]) + (*p0++);
884         corr = voround(L_sum2);
885         *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
886         pos += STEP;
887 
888         L_sum1 = L_sum2 = 0L;
889         p1 = h;
890         p2 = &vec[pos];
891         for (j=pos;j < L_SUBFR; j++)
892         {
893             L_sum1 = L_add(L_sum1, *p1 * *p2);
894             p2-=3;
895             L_sum2 = L_add(L_sum2, *p1++ * *p2);
896             p2+=4;
897         }
898         p2-=3;
899         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
900         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
901         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
902 
903         L_sum1 = L_shl(L_sum1, 2);
904         L_sum2 = L_shl(L_sum2, 2);
905 
906         corr = voround(L_sum1);
907         *cor_x++ = mult(corr, sign[pos]) + (*p0++);
908         corr = voround(L_sum2);
909         *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
910         pos += STEP;
911     }
912     return;
913 }
914 
cor_h_vec_012(Word16 h[],Word16 vec[],Word16 track,Word16 sign[],Word16 rrixix[][NB_POS],Word16 cor_1[],Word16 cor_2[])915 void cor_h_vec_012(
916         Word16 h[],                           /* (i) scaled impulse response                 */
917         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
918         Word16 track,                         /* (i) track to use                            */
919         Word16 sign[],                        /* (i) sign vector                             */
920         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
921         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
922         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
923         )
924 {
925     Word32 i, j, pos, corr;
926     Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
927     Word32 L_sum1,L_sum2;
928     cor_x = cor_1;
929     cor_y = cor_2;
930     p0 = rrixix[track];
931     p3 = rrixix[track+1];
932     pos = track;
933 
934     for (i = 0; i < NB_POS; i+=2)
935     {
936         L_sum1 = L_sum2 = 0L;
937         p1 = h;
938         p2 = &vec[pos];
939         for (j=62-pos ;j >= 0; j--)
940         {
941             L_sum1 = L_add(L_sum1, *p1 * *p2++);
942             L_sum2 = L_add(L_sum2, *p1++ * *p2);
943         }
944         L_sum1 = L_add(L_sum1, *p1 * *p2);
945         L_sum1 = L_shl(L_sum1, 2);
946         L_sum2 = L_shl(L_sum2, 2);
947 
948         corr = voround(L_sum1);
949         cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++);
950         corr = voround(L_sum2);
951         cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++);
952         pos += STEP;
953 
954         L_sum1 = L_sum2 = 0L;
955         p1 = h;
956         p2 = &vec[pos];
957         for (j= 62-pos;j >= 0; j--)
958         {
959             L_sum1 = L_add(L_sum1, *p1 * *p2++);
960             L_sum2 = L_add(L_sum2, *p1++ * *p2);
961         }
962         L_sum1 = L_add(L_sum1, *p1 * *p2);
963         L_sum1 = L_shl(L_sum1, 2);
964         L_sum2 = L_shl(L_sum2, 2);
965 
966         corr = voround(L_sum1);
967         cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++);
968         corr = voround(L_sum2);
969         cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++);
970         pos += STEP;
971     }
972     return;
973 }
974 
975 /*-------------------------------------------------------------------*
976  * Function  search_ixiy()                                           *
977  * ~~~~~~~~~~~~~~~~~~~~~~~                                           *
978  * Find the best positions of 2 pulses in a subframe.                *
979  *-------------------------------------------------------------------*/
980 
search_ixiy(Word16 nb_pos_ix,Word16 track_x,Word16 track_y,Word16 * ps,Word16 * alp,Word16 * ix,Word16 * iy,Word16 dn[],Word16 dn2[],Word16 cor_x[],Word16 cor_y[],Word16 rrixiy[][MSIZE])981 void search_ixiy(
982         Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
983         Word16 track_x,                       /* (i) track of pulse 1                   */
984         Word16 track_y,                       /* (i) track of pulse 2                   */
985         Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
986         Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
987         Word16 * ix,                          /* (o) position of pulse 1                */
988         Word16 * iy,                          /* (o) position of pulse 2                */
989         Word16 dn[],                          /* (i) corr. between target and h[]       */
990         Word16 dn2[],                         /* (i) vector of selected positions       */
991         Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
992         Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
993         Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
994         )
995 {
996     Word32 x, y, pos, thres_ix;
997     Word16 ps1, ps2, sq, sqk;
998     Word16 alp_16, alpk;
999     Word16 *p0, *p1, *p2;
1000     Word32 s, alp0, alp1, alp2;
1001 
1002     p0 = cor_x;
1003     p1 = cor_y;
1004     p2 = rrixiy[track_x];
1005 
1006     thres_ix = nb_pos_ix - NB_MAX;
1007 
1008     alp0 = L_deposit_h(*alp);
1009     alp0 = (alp0 + 0x00008000L);       /* for rounding */
1010 
1011     sqk = -1;
1012     alpk = 1;
1013 
1014     for (x = track_x; x < L_SUBFR; x += STEP)
1015     {
1016         ps1 = *ps + dn[x];
1017         alp1 = L_add(alp0, ((*p0++)<<13));
1018 
1019         if (dn2[x] < thres_ix)
1020         {
1021             pos = -1;
1022             for (y = track_y; y < L_SUBFR; y += STEP)
1023             {
1024                 ps2 = add1(ps1, dn[y]);
1025 
1026                 alp2 = L_add(alp1, ((*p1++)<<13));
1027                 alp2 = L_add(alp2, ((*p2++)<<14));
1028                 alp_16 = extract_h(alp2);
1029                 sq = vo_mult(ps2, ps2);
1030                 s = L_sub(vo_L_mult(alpk, sq), L_mult(sqk, alp_16));
1031 
1032                 if (s > 0)
1033                 {
1034                     sqk = sq;
1035                     alpk = alp_16;
1036                     pos = y;
1037                 }
1038             }
1039             p1 -= NB_POS;
1040 
1041             if (pos >= 0)
1042             {
1043                 *ix = x;
1044                 *iy = pos;
1045             }
1046         } else
1047         {
1048             p2 += NB_POS;
1049         }
1050     }
1051 
1052     *ps = add1(*ps, add1(dn[*ix], dn[*iy]));
1053     *alp = alpk;
1054 
1055     return;
1056 }
1057 
1058 
1059 
1060 
1061