1 /*
2  ** Copyright 2003-2010, VisualOn, Inc.
3  **
4  ** Licensed under the Apache License, Version 2.0 (the "License");
5  ** you may not use this file except in compliance with the License.
6  ** You may obtain a copy of the License at
7  **
8  **     http://www.apache.org/licenses/LICENSE-2.0
9  **
10  ** Unless required by applicable law or agreed to in writing, software
11  ** distributed under the License is distributed on an "AS IS" BASIS,
12  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  ** See the License for the specific language governing permissions and
14  ** limitations under the License.
15  */
16 
17 /***********************************************************************
18 *      File: c4t64fx.c                                                 *
19 *                                                                      *
20 *      Description:Performs algebraic codebook search for higher modes *
21 *                                                                      *
22 ************************************************************************/
23 
24 /************************************************************************
25 * Function: ACELP_4t64_fx()                                             *
26 *                                                                       *
27 * 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook.                   *
28 * 4 tracks x 16 positions per track = 64 samples.                       *
29 *                                                                       *
30 * 20 bits --> 4 pulses in a frame of 64 samples.                        *
31 * 36 bits --> 8 pulses in a frame of 64 samples.                        *
32 * 44 bits --> 10 pulses in a frame of 64 samples.                       *
33 * 52 bits --> 12 pulses in a frame of 64 samples.                       *
34 * 64 bits --> 16 pulses in a frame of 64 samples.                       *
35 * 72 bits --> 18 pulses in a frame of 64 samples.                       *
36 * 88 bits --> 24 pulses in a frame of 64 samples.                       *
37 *                                                                       *
38 * All pulses can have two (2) possible amplitudes: +1 or -1.            *
39 * Each pulse can have sixteen (16) possible positions.                  *
40 *************************************************************************/
41 
42 #include "typedef.h"
43 #include "basic_op.h"
44 #include "math_op.h"
45 #include "acelp.h"
46 #include "cnst.h"
47 
48 #include "q_pulse.h"
49 
50 static Word16 tipos[36] = {
51     0, 1, 2, 3,                            /* starting point &ipos[0], 1st iter */
52     1, 2, 3, 0,                            /* starting point &ipos[4], 2nd iter */
53     2, 3, 0, 1,                            /* starting point &ipos[8], 3rd iter */
54     3, 0, 1, 2,                            /* starting point &ipos[12], 4th iter */
55     0, 1, 2, 3,
56     1, 2, 3, 0,
57     2, 3, 0, 1,
58     3, 0, 1, 2,
59     0, 1, 2, 3};                           /* end point for 24 pulses &ipos[35], 4th iter */
60 
61 #define NB_PULSE_MAX  24
62 
63 #define L_SUBFR   64
64 #define NB_TRACK  4
65 #define STEP      4
66 #define NB_POS    16
67 #define MSIZE     256
68 #define NB_MAX    8
69 #define NPMAXPT   ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK)
70 
71 /* Private functions */
72 void cor_h_vec_012(
73         Word16 h[],                           /* (i) scaled impulse response                 */
74         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
75         Word16 track,                         /* (i) track to use                            */
76         Word16 sign[],                        /* (i) sign vector                             */
77         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
78         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
79         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
80         );
81 
82 void cor_h_vec_012_asm(
83         Word16 h[],                           /* (i) scaled impulse response                 */
84         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
85         Word16 track,                         /* (i) track to use                            */
86         Word16 sign[],                        /* (i) sign vector                             */
87         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
88         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
89         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
90         );
91 
92 void cor_h_vec_30(
93         Word16 h[],                           /* (i) scaled impulse response                 */
94         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
95         Word16 track,                         /* (i) track to use                            */
96         Word16 sign[],                        /* (i) sign vector                             */
97         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
98         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
99         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
100         );
101 
102 void search_ixiy(
103         Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
104         Word16 track_x,                       /* (i) track of pulse 1                   */
105         Word16 track_y,                       /* (i) track of pulse 2                   */
106         Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
107         Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
108         Word16 * ix,                          /* (o) position of pulse 1                */
109         Word16 * iy,                          /* (o) position of pulse 2                */
110         Word16 dn[],                          /* (i) corr. between target and h[]       */
111         Word16 dn2[],                         /* (i) vector of selected positions       */
112         Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
113         Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
114         Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
115         );
116 
117 
ACELP_4t64_fx(Word16 dn[],Word16 cn[],Word16 H[],Word16 code[],Word16 y[],Word16 nbbits,Word16 ser_size,Word16 _index[])118 void ACELP_4t64_fx(
119         Word16 dn[],                          /* (i) <12b : correlation between target x[] and H[]      */
120         Word16 cn[],                          /* (i) <12b : residual after long term prediction         */
121         Word16 H[],                           /* (i) Q12: impulse response of weighted synthesis filter */
122         Word16 code[],                        /* (o) Q9 : algebraic (fixed) codebook excitation         */
123         Word16 y[],                           /* (o) Q9 : filtered fixed codebook excitation            */
124         Word16 nbbits,                        /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits                */
125         Word16 ser_size,                      /* (i) : bit rate                                         */
126         Word16 _index[]                       /* (o) : index (20): 5+5+5+5 = 20 bits.                   */
127         /* (o) : index (36): 9+9+9+9 = 36 bits.                   */
128         /* (o) : index (44): 13+9+13+9 = 44 bits.                 */
129         /* (o) : index (52): 13+13+13+13 = 52 bits.               */
130         /* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits.       */
131         /* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits.     */
132         /* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits.   */
133         )
134 {
135     Word32 i, j, k;
136     Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp;
137     Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp;
138     Word16 *p0, *p1, *p2, *p3, *psign;
139     Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift;
140     Word32 s, cor, L_tmp, L_index;
141     Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR];
142     Word16 ind[NPMAXPT * NB_TRACK];
143     Word16 codvec[NB_PULSE_MAX], nbpos[10];
144     Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK];
145     Word16 h_buf[4 * L_SUBFR];
146     Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE];
147     Word16 ipos[NB_PULSE_MAX];
148 
149     switch (nbbits)
150     {
151         case 20:                               /* 20 bits, 4 pulses, 4 tracks */
152             nbiter = 4;                          /* 4x16x16=1024 loop */
153             alp = 8192;                          /* alp = 2.0 (Q12) */
154             nb_pulse = 4;
155             nbpos[0] = 4;
156             nbpos[1] = 8;
157             break;
158         case 36:                               /* 36 bits, 8 pulses, 4 tracks */
159             nbiter = 4;                          /* 4x20x16=1280 loop */
160             alp = 4096;                          /* alp = 1.0 (Q12) */
161             nb_pulse = 8;
162             nbpos[0] = 4;
163             nbpos[1] = 8;
164             nbpos[2] = 8;
165             break;
166         case 44:                               /* 44 bits, 10 pulses, 4 tracks */
167             nbiter = 4;                          /* 4x26x16=1664 loop */
168             alp = 4096;                          /* alp = 1.0 (Q12) */
169             nb_pulse = 10;
170             nbpos[0] = 4;
171             nbpos[1] = 6;
172             nbpos[2] = 8;
173             nbpos[3] = 8;
174             break;
175         case 52:                               /* 52 bits, 12 pulses, 4 tracks */
176             nbiter = 4;                          /* 4x26x16=1664 loop */
177             alp = 4096;                          /* alp = 1.0 (Q12) */
178             nb_pulse = 12;
179             nbpos[0] = 4;
180             nbpos[1] = 6;
181             nbpos[2] = 8;
182             nbpos[3] = 8;
183             break;
184         case 64:                               /* 64 bits, 16 pulses, 4 tracks */
185             nbiter = 3;                          /* 3x36x16=1728 loop */
186             alp = 3277;                          /* alp = 0.8 (Q12) */
187             nb_pulse = 16;
188             nbpos[0] = 4;
189             nbpos[1] = 4;
190             nbpos[2] = 6;
191             nbpos[3] = 6;
192             nbpos[4] = 8;
193             nbpos[5] = 8;
194             break;
195         case 72:                               /* 72 bits, 18 pulses, 4 tracks */
196             nbiter = 3;                          /* 3x35x16=1680 loop */
197             alp = 3072;                          /* alp = 0.75 (Q12) */
198             nb_pulse = 18;
199             nbpos[0] = 2;
200             nbpos[1] = 3;
201             nbpos[2] = 4;
202             nbpos[3] = 5;
203             nbpos[4] = 6;
204             nbpos[5] = 7;
205             nbpos[6] = 8;
206             break;
207         case 88:                               /* 88 bits, 24 pulses, 4 tracks */
208             if(ser_size > 462)
209                 nbiter = 1;
210             else
211                 nbiter = 2;                    /* 2x53x16=1696 loop */
212 
213             alp = 2048;                          /* alp = 0.5 (Q12) */
214             nb_pulse = 24;
215             nbpos[0] = 2;
216             nbpos[1] = 2;
217             nbpos[2] = 3;
218             nbpos[3] = 4;
219             nbpos[4] = 5;
220             nbpos[5] = 6;
221             nbpos[6] = 7;
222             nbpos[7] = 8;
223             nbpos[8] = 8;
224             nbpos[9] = 8;
225             break;
226         default:
227             nbiter = 0;
228             alp = 0;
229             nb_pulse = 0;
230     }
231 
232     for (i = 0; i < nb_pulse; i++)
233     {
234         codvec[i] = i;
235     }
236 
237     /*----------------------------------------------------------------*
238      * Find sign for each pulse position.                             *
239      *----------------------------------------------------------------*/
240     /* calculate energy for normalization of cn[] and dn[] */
241     /* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */
242 #ifdef ASM_OPT                  /* asm optimization branch */
243     s = Dot_product12_asm(cn, cn, L_SUBFR, &exp);
244 #else
245     s = Dot_product12(cn, cn, L_SUBFR, &exp);
246 #endif
247 
248     Isqrt_n(&s, &exp);
249     s = L_shl(s, (exp + 5));
250     k_cn = extract_h(L_add(s, 0x8000));
251 
252     /* set k_dn = 32..512 (ener_dn = 2^30..2^22) */
253 #ifdef ASM_OPT                      /* asm optimization branch */
254     s = Dot_product12_asm(dn, dn, L_SUBFR, &exp);
255 #else
256     s = Dot_product12(dn, dn, L_SUBFR, &exp);
257 #endif
258 
259     Isqrt_n(&s, &exp);
260     k_dn = voround(L_shl(s, (exp + 5 + 3)));    /* k_dn = 256..4096 */
261     k_dn = vo_mult_r(alp, k_dn);              /* alp in Q12 */
262 
263     /* mix normalized cn[] and dn[] */
264     p0 = cn;
265     p1 = dn;
266     p2 = dn2;
267 
268     for (i = 0; i < L_SUBFR/4; i++)
269     {
270         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
271         *p2++ = s >> 7;
272         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
273         *p2++ = s >> 7;
274         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
275         *p2++ = s >> 7;
276         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
277         *p2++ = s >> 7;
278     }
279 
280     /* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[]    */
281     for(i = 0; i < L_SUBFR; i++)
282     {
283         val = dn[i];
284         ps = dn2[i];
285         if (ps >= 0)
286         {
287             sign[i] = 32767;             /* sign = +1 (Q12) */
288             vec[i] = -32768;
289         } else
290         {
291             sign[i] = -32768;            /* sign = -1 (Q12) */
292             vec[i] = 32767;
293             dn[i] = -val;
294             dn2[i] = -ps;
295         }
296     }
297     /*----------------------------------------------------------------*
298      * Select NB_MAX position per track according to max of dn2[].    *
299      *----------------------------------------------------------------*/
300     pos = 0;
301     for (i = 0; i < NB_TRACK; i++)
302     {
303         for (k = 0; k < NB_MAX; k++)
304         {
305             ps = -1;
306             for (j = i; j < L_SUBFR; j += STEP)
307             {
308                 if(dn2[j] > ps)
309                 {
310                     ps = dn2[j];
311                     pos = j;
312                 }
313             }
314             dn2[pos] = (k - NB_MAX);     /* dn2 < 0 when position is selected */
315             if (k == 0)
316             {
317                 pos_max[i] = pos;
318             }
319         }
320     }
321 
322     /*--------------------------------------------------------------*
323      * Scale h[] to avoid overflow and to get maximum of precision  *
324      * on correlation.                                              *
325      *                                                              *
326      * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16).         *
327      *  ==> This allow addition of 16 pulses without saturation.    *
328      *                                                              *
329      * Energy worst case (on resonant impulse response),            *
330      * - energy of h[] is approximately MAX/16.                     *
331      * - During search, the energy is divided by 8 to avoid         *
332      *   overflow on "alp". (energy of h[] = MAX/128).              *
333      *  ==> "alp" worst case detected is 22854 on sinusoidal wave.  *
334      *--------------------------------------------------------------*/
335 
336     /* impulse response buffer for fast computation */
337 
338     h = h_buf;
339     h_inv = h_buf + (2 * L_SUBFR);
340     L_tmp = 0;
341     for (i = 0; i < L_SUBFR; i++)
342     {
343         *h++ = 0;
344         *h_inv++ = 0;
345         L_tmp = L_add(L_tmp, (H[i] * H[i]) << 1);
346     }
347     /* scale h[] down (/2) when energy of h[] is high with many pulses used */
348     val = extract_h(L_tmp);
349     h_shift = 0;
350 
351     if ((nb_pulse >= 12) && (val > 1024))
352     {
353         h_shift = 1;
354     }
355     p0 = H;
356     p1 = h;
357     p2 = h_inv;
358 
359     for (i = 0; i < L_SUBFR/4; i++)
360     {
361         *p1 = *p0++ >> h_shift;
362         *p2++ = -(*p1++);
363         *p1 = *p0++ >> h_shift;
364         *p2++ = -(*p1++);
365         *p1 = *p0++ >> h_shift;
366         *p2++ = -(*p1++);
367         *p1 = *p0++ >> h_shift;
368         *p2++ = -(*p1++);
369     }
370 
371     /*------------------------------------------------------------*
372      * Compute rrixix[][] needed for the codebook search.         *
373      * This algorithm compute impulse response energy of all      *
374      * positions (16) in each track (4).       Total = 4x16 = 64. *
375      *------------------------------------------------------------*/
376 
377     /* storage order --> i3i3, i2i2, i1i1, i0i0 */
378 
379     /* Init pointers to last position of rrixix[] */
380     p0 = &rrixix[0][NB_POS - 1];
381     p1 = &rrixix[1][NB_POS - 1];
382     p2 = &rrixix[2][NB_POS - 1];
383     p3 = &rrixix[3][NB_POS - 1];
384 
385     ptr_h1 = h;
386     cor = 0x00008000L;                             /* for rounding */
387     for (i = 0; i < NB_POS; i++)
388     {
389         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
390         ptr_h1++;
391         *p3-- = extract_h(cor);
392         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
393         ptr_h1++;
394         *p2-- = extract_h(cor);
395         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
396         ptr_h1++;
397         *p1-- = extract_h(cor);
398         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
399         ptr_h1++;
400         *p0-- = extract_h(cor);
401     }
402 
403     /*------------------------------------------------------------*
404      * Compute rrixiy[][] needed for the codebook search.         *
405      * This algorithm compute correlation between 2 pulses        *
406      * (2 impulses responses) in 4 possible adjacents tracks.     *
407      * (track 0-1, 1-2, 2-3 and 3-0).     Total = 4x16x16 = 1024. *
408      *------------------------------------------------------------*/
409 
410     /* storage order --> i2i3, i1i2, i0i1, i3i0 */
411 
412     pos = MSIZE - 1;
413     ptr_hf = h + 1;
414 
415     for (k = 0; k < NB_POS; k++)
416     {
417         p3 = &rrixiy[2][pos];
418         p2 = &rrixiy[1][pos];
419         p1 = &rrixiy[0][pos];
420         p0 = &rrixiy[3][pos - NB_POS];
421 
422         cor = 0x00008000L;                   /* for rounding */
423         ptr_h1 = h;
424         ptr_h2 = ptr_hf;
425 
426         for (i = k + 1; i < NB_POS; i++)
427         {
428             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
429             ptr_h1++;
430             ptr_h2++;
431             *p3 = extract_h(cor);
432             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
433             ptr_h1++;
434             ptr_h2++;
435             *p2 = extract_h(cor);
436             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
437             ptr_h1++;
438             ptr_h2++;
439             *p1 = extract_h(cor);
440             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
441             ptr_h1++;
442             ptr_h2++;
443             *p0 = extract_h(cor);
444 
445             p3 -= (NB_POS + 1);
446             p2 -= (NB_POS + 1);
447             p1 -= (NB_POS + 1);
448             p0 -= (NB_POS + 1);
449         }
450         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
451         ptr_h1++;
452         ptr_h2++;
453         *p3 = extract_h(cor);
454         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
455         ptr_h1++;
456         ptr_h2++;
457         *p2 = extract_h(cor);
458         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
459         ptr_h1++;
460         ptr_h2++;
461         *p1 = extract_h(cor);
462 
463         pos -= NB_POS;
464         ptr_hf += STEP;
465     }
466 
467     /* storage order --> i3i0, i2i3, i1i2, i0i1 */
468 
469     pos = MSIZE - 1;
470     ptr_hf = h + 3;
471 
472     for (k = 0; k < NB_POS; k++)
473     {
474         p3 = &rrixiy[3][pos];
475         p2 = &rrixiy[2][pos - 1];
476         p1 = &rrixiy[1][pos - 1];
477         p0 = &rrixiy[0][pos - 1];
478 
479         cor = 0x00008000L;                              /* for rounding */
480         ptr_h1 = h;
481         ptr_h2 = ptr_hf;
482 
483         for (i = k + 1; i < NB_POS; i++)
484         {
485             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
486             ptr_h1++;
487             ptr_h2++;
488             *p3 = extract_h(cor);
489             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
490             ptr_h1++;
491             ptr_h2++;
492             *p2 = extract_h(cor);
493             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
494             ptr_h1++;
495             ptr_h2++;
496             *p1 = extract_h(cor);
497             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
498             ptr_h1++;
499             ptr_h2++;
500             *p0 = extract_h(cor);
501 
502             p3 -= (NB_POS + 1);
503             p2 -= (NB_POS + 1);
504             p1 -= (NB_POS + 1);
505             p0 -= (NB_POS + 1);
506         }
507         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
508         ptr_h1++;
509         ptr_h2++;
510         *p3 = extract_h(cor);
511 
512         pos--;
513         ptr_hf += STEP;
514     }
515 
516     /*------------------------------------------------------------*
517      * Modification of rrixiy[][] to take signs into account.     *
518      *------------------------------------------------------------*/
519 
520     p0 = &rrixiy[0][0];
521 
522     for (k = 0; k < NB_TRACK; k++)
523     {
524         j_temp = (k + 1)&0x03;
525         for (i = k; i < L_SUBFR; i += STEP)
526         {
527             psign = sign;
528             if (psign[i] < 0)
529             {
530                 psign = vec;
531             }
532             j = j_temp;
533             for (; j < L_SUBFR; j += STEP)
534             {
535                 *p0 = vo_mult(*p0, psign[j]);
536                 p0++;
537             }
538         }
539     }
540 
541     /*-------------------------------------------------------------------*
542      *                       Deep first search                           *
543      *-------------------------------------------------------------------*/
544 
545     psk = -1;
546     alpk = 1;
547 
548     for (k = 0; k < nbiter; k++)
549     {
550         j_temp = k<<2;
551         for (i = 0; i < nb_pulse; i++)
552             ipos[i] = tipos[j_temp + i];
553 
554         if(nbbits == 20)
555         {
556             pos = 0;
557             ps = 0;
558             alp = 0;
559             for (i = 0; i < L_SUBFR; i++)
560             {
561                 vec[i] = 0;
562             }
563         } else if ((nbbits == 36) || (nbbits == 44))
564         {
565             /* first stage: fix 2 pulses */
566             pos = 2;
567 
568             ix = ind[0] = pos_max[ipos[0]];
569             iy = ind[1] = pos_max[ipos[1]];
570             ps = dn[ix] + dn[iy];
571             i = ix >> 2;                /* ix / STEP */
572             j = iy >> 2;                /* iy / STEP */
573             s = rrixix[ipos[0]][i] << 13;
574             s += rrixix[ipos[1]][j] << 13;
575             i = (i << 4) + j;         /* (ix/STEP)*NB_POS + (iy/STEP) */
576             s += rrixiy[ipos[0]][i] << 14;
577             alp = (s + 0x8000) >> 16;
578             if (sign[ix] < 0)
579                 p0 = h_inv - ix;
580             else
581                 p0 = h - ix;
582             if (sign[iy] < 0)
583                 p1 = h_inv - iy;
584             else
585                 p1 = h - iy;
586 
587             for (i = 0; i < L_SUBFR; i++)
588             {
589                 vec[i] = (*p0++) + (*p1++);
590             }
591 
592             if(nbbits == 44)
593             {
594                 ipos[8] = 0;
595                 ipos[9] = 1;
596             }
597         } else
598         {
599             /* first stage: fix 4 pulses */
600             pos = 4;
601 
602             ix = ind[0] = pos_max[ipos[0]];
603             iy = ind[1] = pos_max[ipos[1]];
604             i = ind[2] = pos_max[ipos[2]];
605             j = ind[3] = pos_max[ipos[3]];
606             ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]);
607 
608             if (sign[ix] < 0)
609                 p0 = h_inv - ix;
610             else
611                 p0 = h - ix;
612 
613             if (sign[iy] < 0)
614                 p1 = h_inv - iy;
615             else
616                 p1 = h - iy;
617 
618             if (sign[i] < 0)
619                 p2 = h_inv - i;
620             else
621                 p2 = h - i;
622 
623             if (sign[j] < 0)
624                 p3 = h_inv - j;
625             else
626                 p3 = h - j;
627 
628             L_tmp = 0L;
629             for(i = 0; i < L_SUBFR; i++)
630             {
631                 Word32 vecSq2;
632                 vec[i]  = add1(add1(add1(*p0++, *p1++), *p2++), *p3++);
633                 vecSq2 = (vec[i] * vec[i]) << 1;
634                 if (vecSq2 > 0 && L_tmp > INT_MAX - vecSq2) {
635                     L_tmp = INT_MAX;
636                 } else if (vecSq2 < 0 && L_tmp < INT_MIN - vecSq2) {
637                     L_tmp = INT_MIN;
638                 } else {
639                     L_tmp  += vecSq2;
640                 }
641             }
642 
643             alp = ((L_tmp >> 3) + 0x8000) >> 16;
644 
645             if(nbbits == 72)
646             {
647                 ipos[16] = 0;
648                 ipos[17] = 1;
649             }
650         }
651 
652         /* other stages of 2 pulses */
653 
654         for (j = pos, st = 0; j < nb_pulse; j += 2, st++)
655         {
656             /*--------------------------------------------------*
657              * Calculate correlation of all possible positions  *
658              * of the next 2 pulses with previous fixed pulses. *
659              * Each pulse can have 16 possible positions.       *
660              *--------------------------------------------------*/
661             if(ipos[j] == 3)
662             {
663                 cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
664             }
665             else
666             {
667 #ifdef ASM_OPT                 /* asm optimization branch */
668                 cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
669 #else
670                 cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
671 #endif
672             }
673             /*--------------------------------------------------*
674              * Find best positions of 2 pulses.                 *
675              *--------------------------------------------------*/
676             search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp,
677                     &ix, &iy, dn, dn2, cor_x, cor_y, rrixiy);
678 
679             ind[j] = ix;
680             ind[j + 1] = iy;
681 
682             if (sign[ix] < 0)
683                 p0 = h_inv - ix;
684             else
685                 p0 = h - ix;
686             if (sign[iy] < 0)
687                 p1 = h_inv - iy;
688             else
689                 p1 = h - iy;
690 
691             for (i = 0; i < L_SUBFR; i+=4)
692             {
693                 vec[i]   += add1((*p0++), (*p1++));
694                 vec[i+1] += add1((*p0++), (*p1++));
695                 vec[i+2] += add1((*p0++), (*p1++));
696                 vec[i+3] += add1((*p0++), (*p1++));
697             }
698         }
699         /* memorise the best codevector */
700         ps = vo_mult(ps, ps);
701         s = L_sub(vo_L_mult(alpk, ps), vo_L_mult(psk, alp));
702         if (s > 0)
703         {
704             psk = ps;
705             alpk = alp;
706             for (i = 0; i < nb_pulse; i++)
707             {
708                 codvec[i] = ind[i];
709             }
710             for (i = 0; i < L_SUBFR; i++)
711             {
712                 y[i] = vec[i];
713             }
714         }
715     }
716     /*-------------------------------------------------------------------*
717      * Build the codeword, the filtered codeword and index of codevector.*
718      *-------------------------------------------------------------------*/
719     for (i = 0; i < NPMAXPT * NB_TRACK; i++)
720     {
721         ind[i] = -1;
722     }
723     for (i = 0; i < L_SUBFR; i++)
724     {
725         code[i] = 0;
726         y[i] = vo_shr_r(y[i], 3);               /* Q12 to Q9 */
727     }
728     val = (512 >> h_shift);               /* codeword in Q9 format */
729     for (k = 0; k < nb_pulse; k++)
730     {
731         i = codvec[k];                       /* read pulse position */
732         j = sign[i];                         /* read sign           */
733         index = i >> 2;                 /* index = pos of pulse (0..15) */
734         track = (Word16) (i & 0x03);         /* track = i % NB_TRACK (0..3)  */
735 
736         if (j > 0)
737         {
738             code[i] += val;
739             codvec[k] += 128;
740         } else
741         {
742             code[i] -= val;
743             index += NB_POS;
744         }
745 
746         i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1));
747 
748         while (ind[i] >= 0)
749         {
750             i += 1;
751         }
752         ind[i] = index;
753     }
754 
755     k = 0;
756     /* Build index of codevector */
757     if(nbbits == 20)
758     {
759         for (track = 0; track < NB_TRACK; track++)
760         {
761             _index[track] = (Word16)(quant_1p_N1(ind[k], 4));
762             k += NPMAXPT;
763         }
764     } else if(nbbits == 36)
765     {
766         for (track = 0; track < NB_TRACK; track++)
767         {
768             _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
769             k += NPMAXPT;
770         }
771     } else if(nbbits == 44)
772     {
773         for (track = 0; track < NB_TRACK - 2; track++)
774         {
775             _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
776             k += NPMAXPT;
777         }
778         for (track = 2; track < NB_TRACK; track++)
779         {
780             _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
781             k += NPMAXPT;
782         }
783     } else if(nbbits == 52)
784     {
785         for (track = 0; track < NB_TRACK; track++)
786         {
787             _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
788             k += NPMAXPT;
789         }
790     } else if(nbbits == 64)
791     {
792         for (track = 0; track < NB_TRACK; track++)
793         {
794             L_index = quant_4p_4N(&ind[k], 4);
795             _index[track] = (Word16)((L_index >> 14) & 3);
796             _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
797             k += NPMAXPT;
798         }
799     } else if(nbbits == 72)
800     {
801         for (track = 0; track < NB_TRACK - 2; track++)
802         {
803             L_index = quant_5p_5N(&ind[k], 4);
804             _index[track] = (Word16)((L_index >> 10) & 0x03FF);
805             _index[track + NB_TRACK] = (Word16)(L_index & 0x03FF);
806             k += NPMAXPT;
807         }
808         for (track = 2; track < NB_TRACK; track++)
809         {
810             L_index = quant_4p_4N(&ind[k], 4);
811             _index[track] = (Word16)((L_index >> 14) & 3);
812             _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
813             k += NPMAXPT;
814         }
815     } else if(nbbits == 88)
816     {
817         for (track = 0; track < NB_TRACK; track++)
818         {
819             L_index = quant_6p_6N_2(&ind[k], 4);
820             _index[track] = (Word16)((L_index >> 11) & 0x07FF);
821             _index[track + NB_TRACK] = (Word16)(L_index & 0x07FF);
822             k += NPMAXPT;
823         }
824     }
825     return;
826 }
827 
828 
829 /*-------------------------------------------------------------------*
830  * Function  cor_h_vec()                                             *
831  * ~~~~~~~~~~~~~~~~~~~~~                                             *
832  * Compute correlations of h[] with vec[] for the specified track.   *
833  *-------------------------------------------------------------------*/
cor_h_vec_30(Word16 h[],Word16 vec[],Word16 track,Word16 sign[],Word16 rrixix[][NB_POS],Word16 cor_1[],Word16 cor_2[])834 void cor_h_vec_30(
835         Word16 h[],                           /* (i) scaled impulse response                 */
836         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
837         Word16 track,                         /* (i) track to use                            */
838         Word16 sign[],                        /* (i) sign vector                             */
839         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
840         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
841         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
842         )
843 {
844     Word32 i, j, pos, corr;
845     Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
846     Word32 L_sum1,L_sum2;
847     cor_x = cor_1;
848     cor_y = cor_2;
849     p0 = rrixix[track];
850     p3 = rrixix[0];
851     pos = track;
852 
853     for (i = 0; i < NB_POS; i+=2)
854     {
855         L_sum1 = L_sum2 = 0L;
856         p1 = h;
857         p2 = &vec[pos];
858         for (j=pos;j < L_SUBFR; j++)
859         {
860             L_sum1 = L_add(L_sum1, *p1 * *p2);
861             p2-=3;
862             L_sum2 = L_add(L_sum2, *p1++ * *p2);
863             p2+=4;
864         }
865         p2-=3;
866         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
867         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
868         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
869 
870         L_sum1 = L_shl(L_sum1, 2);
871         L_sum2 = L_shl(L_sum2, 2);
872 
873         corr = voround(L_sum1);
874         *cor_x++ = mult(corr, sign[pos]) + (*p0++);
875         corr = voround(L_sum2);
876         *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
877         pos += STEP;
878 
879         L_sum1 = L_sum2 = 0L;
880         p1 = h;
881         p2 = &vec[pos];
882         for (j=pos;j < L_SUBFR; j++)
883         {
884             L_sum1 = L_add(L_sum1, *p1 * *p2);
885             p2-=3;
886             L_sum2 = L_add(L_sum2, *p1++ * *p2);
887             p2+=4;
888         }
889         p2-=3;
890         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
891         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
892         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
893 
894         L_sum1 = L_shl(L_sum1, 2);
895         L_sum2 = L_shl(L_sum2, 2);
896 
897         corr = voround(L_sum1);
898         *cor_x++ = mult(corr, sign[pos]) + (*p0++);
899         corr = voround(L_sum2);
900         *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
901         pos += STEP;
902     }
903     return;
904 }
905 
cor_h_vec_012(Word16 h[],Word16 vec[],Word16 track,Word16 sign[],Word16 rrixix[][NB_POS],Word16 cor_1[],Word16 cor_2[])906 void cor_h_vec_012(
907         Word16 h[],                           /* (i) scaled impulse response                 */
908         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
909         Word16 track,                         /* (i) track to use                            */
910         Word16 sign[],                        /* (i) sign vector                             */
911         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
912         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
913         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
914         )
915 {
916     Word32 i, j, pos, corr;
917     Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
918     Word32 L_sum1,L_sum2;
919     cor_x = cor_1;
920     cor_y = cor_2;
921     p0 = rrixix[track];
922     p3 = rrixix[track+1];
923     pos = track;
924 
925     for (i = 0; i < NB_POS; i+=2)
926     {
927         L_sum1 = L_sum2 = 0L;
928         p1 = h;
929         p2 = &vec[pos];
930         for (j=62-pos ;j >= 0; j--)
931         {
932             L_sum1 = L_add(L_sum1, *p1 * *p2++);
933             L_sum2 = L_add(L_sum2, *p1++ * *p2);
934         }
935         L_sum1 = L_add(L_sum1, *p1 * *p2);
936         L_sum1 = L_shl(L_sum1, 2);
937         L_sum2 = L_shl(L_sum2, 2);
938 
939         corr = voround(L_sum1);
940         cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++);
941         corr = voround(L_sum2);
942         cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++);
943         pos += STEP;
944 
945         L_sum1 = L_sum2 = 0L;
946         p1 = h;
947         p2 = &vec[pos];
948         for (j= 62-pos;j >= 0; j--)
949         {
950             L_sum1 = L_add(L_sum1, *p1 * *p2++);
951             L_sum2 = L_add(L_sum2, *p1++ * *p2);
952         }
953         L_sum1 = L_add(L_sum1, *p1 * *p2);
954         L_sum1 = L_shl(L_sum1, 2);
955         L_sum2 = L_shl(L_sum2, 2);
956 
957         corr = voround(L_sum1);
958         cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++);
959         corr = voround(L_sum2);
960         cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++);
961         pos += STEP;
962     }
963     return;
964 }
965 
966 /*-------------------------------------------------------------------*
967  * Function  search_ixiy()                                           *
968  * ~~~~~~~~~~~~~~~~~~~~~~~                                           *
969  * Find the best positions of 2 pulses in a subframe.                *
970  *-------------------------------------------------------------------*/
971 
search_ixiy(Word16 nb_pos_ix,Word16 track_x,Word16 track_y,Word16 * ps,Word16 * alp,Word16 * ix,Word16 * iy,Word16 dn[],Word16 dn2[],Word16 cor_x[],Word16 cor_y[],Word16 rrixiy[][MSIZE])972 void search_ixiy(
973         Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
974         Word16 track_x,                       /* (i) track of pulse 1                   */
975         Word16 track_y,                       /* (i) track of pulse 2                   */
976         Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
977         Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
978         Word16 * ix,                          /* (o) position of pulse 1                */
979         Word16 * iy,                          /* (o) position of pulse 2                */
980         Word16 dn[],                          /* (i) corr. between target and h[]       */
981         Word16 dn2[],                         /* (i) vector of selected positions       */
982         Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
983         Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
984         Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
985         )
986 {
987     Word32 x, y, pos, thres_ix;
988     Word16 ps1, ps2, sq, sqk;
989     Word16 alp_16, alpk;
990     Word16 *p0, *p1, *p2;
991     Word32 s, alp0, alp1, alp2;
992 
993     p0 = cor_x;
994     p1 = cor_y;
995     p2 = rrixiy[track_x];
996 
997     thres_ix = nb_pos_ix - NB_MAX;
998 
999     alp0 = L_deposit_h(*alp);
1000     alp0 = (alp0 + 0x00008000L);       /* for rounding */
1001 
1002     sqk = -1;
1003     alpk = 1;
1004 
1005     for (x = track_x; x < L_SUBFR; x += STEP)
1006     {
1007         ps1 = *ps + dn[x];
1008         alp1 = L_add(alp0, ((*p0++)<<13));
1009 
1010         if (dn2[x] < thres_ix)
1011         {
1012             pos = -1;
1013             for (y = track_y; y < L_SUBFR; y += STEP)
1014             {
1015                 ps2 = add1(ps1, dn[y]);
1016 
1017                 alp2 = L_add(alp1, ((*p1++)<<13));
1018                 alp2 = L_add(alp2, ((*p2++)<<14));
1019                 alp_16 = extract_h(alp2);
1020                 sq = vo_mult(ps2, ps2);
1021                 s = L_sub(vo_L_mult(alpk, sq), L_mult(sqk, alp_16));
1022 
1023                 if (s > 0)
1024                 {
1025                     sqk = sq;
1026                     alpk = alp_16;
1027                     pos = y;
1028                 }
1029             }
1030             p1 -= NB_POS;
1031 
1032             if (pos >= 0)
1033             {
1034                 *ix = x;
1035                 *iy = pos;
1036             }
1037         } else
1038         {
1039             p2 += NB_POS;
1040         }
1041     }
1042 
1043     *ps = add1(*ps, add1(dn[*ix], dn[*iy]));
1044     *alp = alpk;
1045 
1046     return;
1047 }
1048 
1049 
1050 
1051 
1052