1 /*
2 ** Copyright 2003-2010, VisualOn, Inc.
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 ** http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16
17 /***********************************************************************
18 * File: c4t64fx.c *
19 * *
20 * Description:Performs algebraic codebook search for higher modes *
21 * *
22 ************************************************************************/
23
24 /************************************************************************
25 * Function: ACELP_4t64_fx() *
26 * *
27 * 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook. *
28 * 4 tracks x 16 positions per track = 64 samples. *
29 * *
30 * 20 bits --> 4 pulses in a frame of 64 samples. *
31 * 36 bits --> 8 pulses in a frame of 64 samples. *
32 * 44 bits --> 10 pulses in a frame of 64 samples. *
33 * 52 bits --> 12 pulses in a frame of 64 samples. *
34 * 64 bits --> 16 pulses in a frame of 64 samples. *
35 * 72 bits --> 18 pulses in a frame of 64 samples. *
36 * 88 bits --> 24 pulses in a frame of 64 samples. *
37 * *
38 * All pulses can have two (2) possible amplitudes: +1 or -1. *
39 * Each pulse can have sixteen (16) possible positions. *
40 *************************************************************************/
41
42 #include "typedef.h"
43 #include "basic_op.h"
44 #include "math_op.h"
45 #include "acelp.h"
46 #include "cnst.h"
47
48 #include "q_pulse.h"
49
50 static Word16 tipos[36] = {
51 0, 1, 2, 3, /* starting point &ipos[0], 1st iter */
52 1, 2, 3, 0, /* starting point &ipos[4], 2nd iter */
53 2, 3, 0, 1, /* starting point &ipos[8], 3rd iter */
54 3, 0, 1, 2, /* starting point &ipos[12], 4th iter */
55 0, 1, 2, 3,
56 1, 2, 3, 0,
57 2, 3, 0, 1,
58 3, 0, 1, 2,
59 0, 1, 2, 3}; /* end point for 24 pulses &ipos[35], 4th iter */
60
61 #define NB_PULSE_MAX 24
62
63 #define L_SUBFR 64
64 #define NB_TRACK 4
65 #define STEP 4
66 #define NB_POS 16
67 #define MSIZE 256
68 #define NB_MAX 8
69 #define NPMAXPT ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK)
70
71 /* Private functions */
72 void cor_h_vec_012(
73 Word16 h[], /* (i) scaled impulse response */
74 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
75 Word16 track, /* (i) track to use */
76 Word16 sign[], /* (i) sign vector */
77 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
78 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
79 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
80 );
81
82 void cor_h_vec_012_asm(
83 Word16 h[], /* (i) scaled impulse response */
84 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
85 Word16 track, /* (i) track to use */
86 Word16 sign[], /* (i) sign vector */
87 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
88 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
89 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
90 );
91
92 void cor_h_vec_30(
93 Word16 h[], /* (i) scaled impulse response */
94 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
95 Word16 track, /* (i) track to use */
96 Word16 sign[], /* (i) sign vector */
97 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
98 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
99 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
100 );
101
102 void search_ixiy(
103 Word16 nb_pos_ix, /* (i) nb of pos for pulse 1 (1..8) */
104 Word16 track_x, /* (i) track of pulse 1 */
105 Word16 track_y, /* (i) track of pulse 2 */
106 Word16 * ps, /* (i/o) correlation of all fixed pulses */
107 Word16 * alp, /* (i/o) energy of all fixed pulses */
108 Word16 * ix, /* (o) position of pulse 1 */
109 Word16 * iy, /* (o) position of pulse 2 */
110 Word16 dn[], /* (i) corr. between target and h[] */
111 Word16 dn2[], /* (i) vector of selected positions */
112 Word16 cor_x[], /* (i) corr. of pulse 1 with fixed pulses */
113 Word16 cor_y[], /* (i) corr. of pulse 2 with fixed pulses */
114 Word16 rrixiy[][MSIZE] /* (i) corr. of pulse 1 with pulse 2 */
115 );
116
117
ACELP_4t64_fx(Word16 dn[],Word16 cn[],Word16 H[],Word16 code[],Word16 y[],Word16 nbbits,Word16 ser_size,Word16 _index[])118 void ACELP_4t64_fx(
119 Word16 dn[], /* (i) <12b : correlation between target x[] and H[] */
120 Word16 cn[], /* (i) <12b : residual after long term prediction */
121 Word16 H[], /* (i) Q12: impulse response of weighted synthesis filter */
122 Word16 code[], /* (o) Q9 : algebraic (fixed) codebook excitation */
123 Word16 y[], /* (o) Q9 : filtered fixed codebook excitation */
124 Word16 nbbits, /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits */
125 Word16 ser_size, /* (i) : bit rate */
126 Word16 _index[] /* (o) : index (20): 5+5+5+5 = 20 bits. */
127 /* (o) : index (36): 9+9+9+9 = 36 bits. */
128 /* (o) : index (44): 13+9+13+9 = 44 bits. */
129 /* (o) : index (52): 13+13+13+13 = 52 bits. */
130 /* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits. */
131 /* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits. */
132 /* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits. */
133 )
134 {
135 Word32 i, j, k;
136 Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp;
137 Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp;
138 Word16 *p0, *p1, *p2, *p3, *psign;
139 Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift;
140 Word32 s, cor, L_tmp, L_index;
141 Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR];
142 Word16 ind[NPMAXPT * NB_TRACK];
143 Word16 codvec[NB_PULSE_MAX], nbpos[10];
144 Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK];
145 Word16 h_buf[4 * L_SUBFR];
146 Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE];
147 Word16 ipos[NB_PULSE_MAX];
148
149 switch (nbbits)
150 {
151 case 20: /* 20 bits, 4 pulses, 4 tracks */
152 nbiter = 4; /* 4x16x16=1024 loop */
153 alp = 8192; /* alp = 2.0 (Q12) */
154 nb_pulse = 4;
155 nbpos[0] = 4;
156 nbpos[1] = 8;
157 break;
158 case 36: /* 36 bits, 8 pulses, 4 tracks */
159 nbiter = 4; /* 4x20x16=1280 loop */
160 alp = 4096; /* alp = 1.0 (Q12) */
161 nb_pulse = 8;
162 nbpos[0] = 4;
163 nbpos[1] = 8;
164 nbpos[2] = 8;
165 break;
166 case 44: /* 44 bits, 10 pulses, 4 tracks */
167 nbiter = 4; /* 4x26x16=1664 loop */
168 alp = 4096; /* alp = 1.0 (Q12) */
169 nb_pulse = 10;
170 nbpos[0] = 4;
171 nbpos[1] = 6;
172 nbpos[2] = 8;
173 nbpos[3] = 8;
174 break;
175 case 52: /* 52 bits, 12 pulses, 4 tracks */
176 nbiter = 4; /* 4x26x16=1664 loop */
177 alp = 4096; /* alp = 1.0 (Q12) */
178 nb_pulse = 12;
179 nbpos[0] = 4;
180 nbpos[1] = 6;
181 nbpos[2] = 8;
182 nbpos[3] = 8;
183 break;
184 case 64: /* 64 bits, 16 pulses, 4 tracks */
185 nbiter = 3; /* 3x36x16=1728 loop */
186 alp = 3277; /* alp = 0.8 (Q12) */
187 nb_pulse = 16;
188 nbpos[0] = 4;
189 nbpos[1] = 4;
190 nbpos[2] = 6;
191 nbpos[3] = 6;
192 nbpos[4] = 8;
193 nbpos[5] = 8;
194 break;
195 case 72: /* 72 bits, 18 pulses, 4 tracks */
196 nbiter = 3; /* 3x35x16=1680 loop */
197 alp = 3072; /* alp = 0.75 (Q12) */
198 nb_pulse = 18;
199 nbpos[0] = 2;
200 nbpos[1] = 3;
201 nbpos[2] = 4;
202 nbpos[3] = 5;
203 nbpos[4] = 6;
204 nbpos[5] = 7;
205 nbpos[6] = 8;
206 break;
207 case 88: /* 88 bits, 24 pulses, 4 tracks */
208 if(ser_size > 462)
209 nbiter = 1;
210 else
211 nbiter = 2; /* 2x53x16=1696 loop */
212
213 alp = 2048; /* alp = 0.5 (Q12) */
214 nb_pulse = 24;
215 nbpos[0] = 2;
216 nbpos[1] = 2;
217 nbpos[2] = 3;
218 nbpos[3] = 4;
219 nbpos[4] = 5;
220 nbpos[5] = 6;
221 nbpos[6] = 7;
222 nbpos[7] = 8;
223 nbpos[8] = 8;
224 nbpos[9] = 8;
225 break;
226 default:
227 nbiter = 0;
228 alp = 0;
229 nb_pulse = 0;
230 }
231
232 for (i = 0; i < nb_pulse; i++)
233 {
234 codvec[i] = i;
235 }
236
237 /*----------------------------------------------------------------*
238 * Find sign for each pulse position. *
239 *----------------------------------------------------------------*/
240 /* calculate energy for normalization of cn[] and dn[] */
241 /* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */
242 #ifdef ASM_OPT /* asm optimization branch */
243 s = Dot_product12_asm(cn, cn, L_SUBFR, &exp);
244 #else
245 s = Dot_product12(cn, cn, L_SUBFR, &exp);
246 #endif
247
248 Isqrt_n(&s, &exp);
249 s = L_shl(s, (exp + 5));
250 k_cn = extract_h(L_add(s, 0x8000));
251
252 /* set k_dn = 32..512 (ener_dn = 2^30..2^22) */
253 #ifdef ASM_OPT /* asm optimization branch */
254 s = Dot_product12_asm(dn, dn, L_SUBFR, &exp);
255 #else
256 s = Dot_product12(dn, dn, L_SUBFR, &exp);
257 #endif
258
259 Isqrt_n(&s, &exp);
260 k_dn = voround(L_shl(s, (exp + 5 + 3))); /* k_dn = 256..4096 */
261 k_dn = vo_mult_r(alp, k_dn); /* alp in Q12 */
262
263 /* mix normalized cn[] and dn[] */
264 p0 = cn;
265 p1 = dn;
266 p2 = dn2;
267
268 for (i = 0; i < L_SUBFR/4; i++)
269 {
270 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
271 *p2++ = s >> 7;
272 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
273 *p2++ = s >> 7;
274 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
275 *p2++ = s >> 7;
276 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
277 *p2++ = s >> 7;
278 }
279
280 /* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[] */
281 for(i = 0; i < L_SUBFR; i++)
282 {
283 val = dn[i];
284 ps = dn2[i];
285 if (ps >= 0)
286 {
287 sign[i] = 32767; /* sign = +1 (Q12) */
288 vec[i] = -32768;
289 } else
290 {
291 sign[i] = -32768; /* sign = -1 (Q12) */
292 vec[i] = 32767;
293 dn[i] = -val;
294 dn2[i] = -ps;
295 }
296 }
297 /*----------------------------------------------------------------*
298 * Select NB_MAX position per track according to max of dn2[]. *
299 *----------------------------------------------------------------*/
300 pos = 0;
301 for (i = 0; i < NB_TRACK; i++)
302 {
303 for (k = 0; k < NB_MAX; k++)
304 {
305 ps = -1;
306 for (j = i; j < L_SUBFR; j += STEP)
307 {
308 if(dn2[j] > ps)
309 {
310 ps = dn2[j];
311 pos = j;
312 }
313 }
314 dn2[pos] = (k - NB_MAX); /* dn2 < 0 when position is selected */
315 if (k == 0)
316 {
317 pos_max[i] = pos;
318 }
319 }
320 }
321
322 /*--------------------------------------------------------------*
323 * Scale h[] to avoid overflow and to get maximum of precision *
324 * on correlation. *
325 * *
326 * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16). *
327 * ==> This allow addition of 16 pulses without saturation. *
328 * *
329 * Energy worst case (on resonant impulse response), *
330 * - energy of h[] is approximately MAX/16. *
331 * - During search, the energy is divided by 8 to avoid *
332 * overflow on "alp". (energy of h[] = MAX/128). *
333 * ==> "alp" worst case detected is 22854 on sinusoidal wave. *
334 *--------------------------------------------------------------*/
335
336 /* impulse response buffer for fast computation */
337
338 h = h_buf;
339 h_inv = h_buf + (2 * L_SUBFR);
340 L_tmp = 0;
341 for (i = 0; i < L_SUBFR; i++)
342 {
343 *h++ = 0;
344 *h_inv++ = 0;
345 L_tmp = L_add(L_tmp, (H[i] * H[i]) << 1);
346 }
347 /* scale h[] down (/2) when energy of h[] is high with many pulses used */
348 val = extract_h(L_tmp);
349 h_shift = 0;
350
351 if ((nb_pulse >= 12) && (val > 1024))
352 {
353 h_shift = 1;
354 }
355 p0 = H;
356 p1 = h;
357 p2 = h_inv;
358
359 for (i = 0; i < L_SUBFR/4; i++)
360 {
361 *p1 = *p0++ >> h_shift;
362 *p2++ = -(*p1++);
363 *p1 = *p0++ >> h_shift;
364 *p2++ = -(*p1++);
365 *p1 = *p0++ >> h_shift;
366 *p2++ = -(*p1++);
367 *p1 = *p0++ >> h_shift;
368 *p2++ = -(*p1++);
369 }
370
371 /*------------------------------------------------------------*
372 * Compute rrixix[][] needed for the codebook search. *
373 * This algorithm compute impulse response energy of all *
374 * positions (16) in each track (4). Total = 4x16 = 64. *
375 *------------------------------------------------------------*/
376
377 /* storage order --> i3i3, i2i2, i1i1, i0i0 */
378
379 /* Init pointers to last position of rrixix[] */
380 p0 = &rrixix[0][NB_POS - 1];
381 p1 = &rrixix[1][NB_POS - 1];
382 p2 = &rrixix[2][NB_POS - 1];
383 p3 = &rrixix[3][NB_POS - 1];
384
385 ptr_h1 = h;
386 cor = 0x00008000L; /* for rounding */
387 for (i = 0; i < NB_POS; i++)
388 {
389 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
390 ptr_h1++;
391 *p3-- = extract_h(cor);
392 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
393 ptr_h1++;
394 *p2-- = extract_h(cor);
395 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
396 ptr_h1++;
397 *p1-- = extract_h(cor);
398 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
399 ptr_h1++;
400 *p0-- = extract_h(cor);
401 }
402
403 /*------------------------------------------------------------*
404 * Compute rrixiy[][] needed for the codebook search. *
405 * This algorithm compute correlation between 2 pulses *
406 * (2 impulses responses) in 4 possible adjacents tracks. *
407 * (track 0-1, 1-2, 2-3 and 3-0). Total = 4x16x16 = 1024. *
408 *------------------------------------------------------------*/
409
410 /* storage order --> i2i3, i1i2, i0i1, i3i0 */
411
412 pos = MSIZE - 1;
413 ptr_hf = h + 1;
414
415 for (k = 0; k < NB_POS; k++)
416 {
417 p3 = &rrixiy[2][pos];
418 p2 = &rrixiy[1][pos];
419 p1 = &rrixiy[0][pos];
420 p0 = &rrixiy[3][pos - NB_POS];
421
422 cor = 0x00008000L; /* for rounding */
423 ptr_h1 = h;
424 ptr_h2 = ptr_hf;
425
426 for (i = k + 1; i < NB_POS; i++)
427 {
428 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
429 ptr_h1++;
430 ptr_h2++;
431 *p3 = extract_h(cor);
432 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
433 ptr_h1++;
434 ptr_h2++;
435 *p2 = extract_h(cor);
436 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
437 ptr_h1++;
438 ptr_h2++;
439 *p1 = extract_h(cor);
440 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
441 ptr_h1++;
442 ptr_h2++;
443 *p0 = extract_h(cor);
444
445 p3 -= (NB_POS + 1);
446 p2 -= (NB_POS + 1);
447 p1 -= (NB_POS + 1);
448 p0 -= (NB_POS + 1);
449 }
450 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
451 ptr_h1++;
452 ptr_h2++;
453 *p3 = extract_h(cor);
454 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
455 ptr_h1++;
456 ptr_h2++;
457 *p2 = extract_h(cor);
458 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
459 ptr_h1++;
460 ptr_h2++;
461 *p1 = extract_h(cor);
462
463 pos -= NB_POS;
464 ptr_hf += STEP;
465 }
466
467 /* storage order --> i3i0, i2i3, i1i2, i0i1 */
468
469 pos = MSIZE - 1;
470 ptr_hf = h + 3;
471
472 for (k = 0; k < NB_POS; k++)
473 {
474 p3 = &rrixiy[3][pos];
475 p2 = &rrixiy[2][pos - 1];
476 p1 = &rrixiy[1][pos - 1];
477 p0 = &rrixiy[0][pos - 1];
478
479 cor = 0x00008000L; /* for rounding */
480 ptr_h1 = h;
481 ptr_h2 = ptr_hf;
482
483 for (i = k + 1; i < NB_POS; i++)
484 {
485 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
486 ptr_h1++;
487 ptr_h2++;
488 *p3 = extract_h(cor);
489 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
490 ptr_h1++;
491 ptr_h2++;
492 *p2 = extract_h(cor);
493 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
494 ptr_h1++;
495 ptr_h2++;
496 *p1 = extract_h(cor);
497 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
498 ptr_h1++;
499 ptr_h2++;
500 *p0 = extract_h(cor);
501
502 p3 -= (NB_POS + 1);
503 p2 -= (NB_POS + 1);
504 p1 -= (NB_POS + 1);
505 p0 -= (NB_POS + 1);
506 }
507 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
508 ptr_h1++;
509 ptr_h2++;
510 *p3 = extract_h(cor);
511
512 pos--;
513 ptr_hf += STEP;
514 }
515
516 /*------------------------------------------------------------*
517 * Modification of rrixiy[][] to take signs into account. *
518 *------------------------------------------------------------*/
519
520 p0 = &rrixiy[0][0];
521
522 for (k = 0; k < NB_TRACK; k++)
523 {
524 j_temp = (k + 1)&0x03;
525 for (i = k; i < L_SUBFR; i += STEP)
526 {
527 psign = sign;
528 if (psign[i] < 0)
529 {
530 psign = vec;
531 }
532 j = j_temp;
533 for (; j < L_SUBFR; j += STEP)
534 {
535 *p0 = vo_mult(*p0, psign[j]);
536 p0++;
537 }
538 }
539 }
540
541 /*-------------------------------------------------------------------*
542 * Deep first search *
543 *-------------------------------------------------------------------*/
544
545 psk = -1;
546 alpk = 1;
547
548 for (k = 0; k < nbiter; k++)
549 {
550 j_temp = k<<2;
551 for (i = 0; i < nb_pulse; i++)
552 ipos[i] = tipos[j_temp + i];
553
554 if(nbbits == 20)
555 {
556 pos = 0;
557 ps = 0;
558 alp = 0;
559 for (i = 0; i < L_SUBFR; i++)
560 {
561 vec[i] = 0;
562 }
563 } else if ((nbbits == 36) || (nbbits == 44))
564 {
565 /* first stage: fix 2 pulses */
566 pos = 2;
567
568 ix = ind[0] = pos_max[ipos[0]];
569 iy = ind[1] = pos_max[ipos[1]];
570 ps = dn[ix] + dn[iy];
571 i = ix >> 2; /* ix / STEP */
572 j = iy >> 2; /* iy / STEP */
573 s = rrixix[ipos[0]][i] << 13;
574 s += rrixix[ipos[1]][j] << 13;
575 i = (i << 4) + j; /* (ix/STEP)*NB_POS + (iy/STEP) */
576 s += rrixiy[ipos[0]][i] << 14;
577 alp = (s + 0x8000) >> 16;
578 if (sign[ix] < 0)
579 p0 = h_inv - ix;
580 else
581 p0 = h - ix;
582 if (sign[iy] < 0)
583 p1 = h_inv - iy;
584 else
585 p1 = h - iy;
586
587 for (i = 0; i < L_SUBFR; i++)
588 {
589 vec[i] = (*p0++) + (*p1++);
590 }
591
592 if(nbbits == 44)
593 {
594 ipos[8] = 0;
595 ipos[9] = 1;
596 }
597 } else
598 {
599 /* first stage: fix 4 pulses */
600 pos = 4;
601
602 ix = ind[0] = pos_max[ipos[0]];
603 iy = ind[1] = pos_max[ipos[1]];
604 i = ind[2] = pos_max[ipos[2]];
605 j = ind[3] = pos_max[ipos[3]];
606 ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]);
607
608 if (sign[ix] < 0)
609 p0 = h_inv - ix;
610 else
611 p0 = h - ix;
612
613 if (sign[iy] < 0)
614 p1 = h_inv - iy;
615 else
616 p1 = h - iy;
617
618 if (sign[i] < 0)
619 p2 = h_inv - i;
620 else
621 p2 = h - i;
622
623 if (sign[j] < 0)
624 p3 = h_inv - j;
625 else
626 p3 = h - j;
627
628 L_tmp = 0L;
629 for(i = 0; i < L_SUBFR; i++)
630 {
631 Word32 vecSq2;
632 vec[i] = add1(add1(add1(*p0++, *p1++), *p2++), *p3++);
633 vecSq2 = (vec[i] * vec[i]) << 1;
634 if (vecSq2 > 0 && L_tmp > INT_MAX - vecSq2) {
635 L_tmp = INT_MAX;
636 } else if (vecSq2 < 0 && L_tmp < INT_MIN - vecSq2) {
637 L_tmp = INT_MIN;
638 } else {
639 L_tmp += vecSq2;
640 }
641 }
642
643 alp = ((L_tmp >> 3) + 0x8000) >> 16;
644
645 if(nbbits == 72)
646 {
647 ipos[16] = 0;
648 ipos[17] = 1;
649 }
650 }
651
652 /* other stages of 2 pulses */
653
654 for (j = pos, st = 0; j < nb_pulse; j += 2, st++)
655 {
656 /*--------------------------------------------------*
657 * Calculate correlation of all possible positions *
658 * of the next 2 pulses with previous fixed pulses. *
659 * Each pulse can have 16 possible positions. *
660 *--------------------------------------------------*/
661 if(ipos[j] == 3)
662 {
663 cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
664 }
665 else
666 {
667 #ifdef ASM_OPT /* asm optimization branch */
668 cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
669 #else
670 cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
671 #endif
672 }
673 /*--------------------------------------------------*
674 * Find best positions of 2 pulses. *
675 *--------------------------------------------------*/
676 search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp,
677 &ix, &iy, dn, dn2, cor_x, cor_y, rrixiy);
678
679 ind[j] = ix;
680 ind[j + 1] = iy;
681
682 if (sign[ix] < 0)
683 p0 = h_inv - ix;
684 else
685 p0 = h - ix;
686 if (sign[iy] < 0)
687 p1 = h_inv - iy;
688 else
689 p1 = h - iy;
690
691 for (i = 0; i < L_SUBFR; i+=4)
692 {
693 vec[i] += add1((*p0++), (*p1++));
694 vec[i+1] += add1((*p0++), (*p1++));
695 vec[i+2] += add1((*p0++), (*p1++));
696 vec[i+3] += add1((*p0++), (*p1++));
697 }
698 }
699 /* memorise the best codevector */
700 ps = vo_mult(ps, ps);
701 s = L_sub(vo_L_mult(alpk, ps), vo_L_mult(psk, alp));
702 if (s > 0)
703 {
704 psk = ps;
705 alpk = alp;
706 for (i = 0; i < nb_pulse; i++)
707 {
708 codvec[i] = ind[i];
709 }
710 for (i = 0; i < L_SUBFR; i++)
711 {
712 y[i] = vec[i];
713 }
714 }
715 }
716 /*-------------------------------------------------------------------*
717 * Build the codeword, the filtered codeword and index of codevector.*
718 *-------------------------------------------------------------------*/
719 for (i = 0; i < NPMAXPT * NB_TRACK; i++)
720 {
721 ind[i] = -1;
722 }
723 for (i = 0; i < L_SUBFR; i++)
724 {
725 code[i] = 0;
726 y[i] = vo_shr_r(y[i], 3); /* Q12 to Q9 */
727 }
728 val = (512 >> h_shift); /* codeword in Q9 format */
729 for (k = 0; k < nb_pulse; k++)
730 {
731 i = codvec[k]; /* read pulse position */
732 j = sign[i]; /* read sign */
733 index = i >> 2; /* index = pos of pulse (0..15) */
734 track = (Word16) (i & 0x03); /* track = i % NB_TRACK (0..3) */
735
736 if (j > 0)
737 {
738 code[i] += val;
739 codvec[k] += 128;
740 } else
741 {
742 code[i] -= val;
743 index += NB_POS;
744 }
745
746 i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1));
747
748 while (ind[i] >= 0)
749 {
750 i += 1;
751 }
752 ind[i] = index;
753 }
754
755 k = 0;
756 /* Build index of codevector */
757 if(nbbits == 20)
758 {
759 for (track = 0; track < NB_TRACK; track++)
760 {
761 _index[track] = (Word16)(quant_1p_N1(ind[k], 4));
762 k += NPMAXPT;
763 }
764 } else if(nbbits == 36)
765 {
766 for (track = 0; track < NB_TRACK; track++)
767 {
768 _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
769 k += NPMAXPT;
770 }
771 } else if(nbbits == 44)
772 {
773 for (track = 0; track < NB_TRACK - 2; track++)
774 {
775 _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
776 k += NPMAXPT;
777 }
778 for (track = 2; track < NB_TRACK; track++)
779 {
780 _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
781 k += NPMAXPT;
782 }
783 } else if(nbbits == 52)
784 {
785 for (track = 0; track < NB_TRACK; track++)
786 {
787 _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
788 k += NPMAXPT;
789 }
790 } else if(nbbits == 64)
791 {
792 for (track = 0; track < NB_TRACK; track++)
793 {
794 L_index = quant_4p_4N(&ind[k], 4);
795 _index[track] = (Word16)((L_index >> 14) & 3);
796 _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
797 k += NPMAXPT;
798 }
799 } else if(nbbits == 72)
800 {
801 for (track = 0; track < NB_TRACK - 2; track++)
802 {
803 L_index = quant_5p_5N(&ind[k], 4);
804 _index[track] = (Word16)((L_index >> 10) & 0x03FF);
805 _index[track + NB_TRACK] = (Word16)(L_index & 0x03FF);
806 k += NPMAXPT;
807 }
808 for (track = 2; track < NB_TRACK; track++)
809 {
810 L_index = quant_4p_4N(&ind[k], 4);
811 _index[track] = (Word16)((L_index >> 14) & 3);
812 _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
813 k += NPMAXPT;
814 }
815 } else if(nbbits == 88)
816 {
817 for (track = 0; track < NB_TRACK; track++)
818 {
819 L_index = quant_6p_6N_2(&ind[k], 4);
820 _index[track] = (Word16)((L_index >> 11) & 0x07FF);
821 _index[track + NB_TRACK] = (Word16)(L_index & 0x07FF);
822 k += NPMAXPT;
823 }
824 }
825 return;
826 }
827
828
829 /*-------------------------------------------------------------------*
830 * Function cor_h_vec() *
831 * ~~~~~~~~~~~~~~~~~~~~~ *
832 * Compute correlations of h[] with vec[] for the specified track. *
833 *-------------------------------------------------------------------*/
cor_h_vec_30(Word16 h[],Word16 vec[],Word16 track,Word16 sign[],Word16 rrixix[][NB_POS],Word16 cor_1[],Word16 cor_2[])834 void cor_h_vec_30(
835 Word16 h[], /* (i) scaled impulse response */
836 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
837 Word16 track, /* (i) track to use */
838 Word16 sign[], /* (i) sign vector */
839 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
840 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
841 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
842 )
843 {
844 Word32 i, j, pos, corr;
845 Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
846 Word32 L_sum1,L_sum2;
847 cor_x = cor_1;
848 cor_y = cor_2;
849 p0 = rrixix[track];
850 p3 = rrixix[0];
851 pos = track;
852
853 for (i = 0; i < NB_POS; i+=2)
854 {
855 L_sum1 = L_sum2 = 0L;
856 p1 = h;
857 p2 = &vec[pos];
858 for (j=pos;j < L_SUBFR; j++)
859 {
860 L_sum1 = L_add(L_sum1, *p1 * *p2);
861 p2-=3;
862 L_sum2 = L_add(L_sum2, *p1++ * *p2);
863 p2+=4;
864 }
865 p2-=3;
866 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
867 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
868 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
869
870 L_sum1 = L_shl(L_sum1, 2);
871 L_sum2 = L_shl(L_sum2, 2);
872
873 corr = voround(L_sum1);
874 *cor_x++ = mult(corr, sign[pos]) + (*p0++);
875 corr = voround(L_sum2);
876 *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
877 pos += STEP;
878
879 L_sum1 = L_sum2 = 0L;
880 p1 = h;
881 p2 = &vec[pos];
882 for (j=pos;j < L_SUBFR; j++)
883 {
884 L_sum1 = L_add(L_sum1, *p1 * *p2);
885 p2-=3;
886 L_sum2 = L_add(L_sum2, *p1++ * *p2);
887 p2+=4;
888 }
889 p2-=3;
890 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
891 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
892 L_sum2 = L_add(L_sum2, *p1++ * *p2++);
893
894 L_sum1 = L_shl(L_sum1, 2);
895 L_sum2 = L_shl(L_sum2, 2);
896
897 corr = voround(L_sum1);
898 *cor_x++ = mult(corr, sign[pos]) + (*p0++);
899 corr = voround(L_sum2);
900 *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
901 pos += STEP;
902 }
903 return;
904 }
905
cor_h_vec_012(Word16 h[],Word16 vec[],Word16 track,Word16 sign[],Word16 rrixix[][NB_POS],Word16 cor_1[],Word16 cor_2[])906 void cor_h_vec_012(
907 Word16 h[], /* (i) scaled impulse response */
908 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
909 Word16 track, /* (i) track to use */
910 Word16 sign[], /* (i) sign vector */
911 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
912 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
913 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
914 )
915 {
916 Word32 i, j, pos, corr;
917 Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
918 Word32 L_sum1,L_sum2;
919 cor_x = cor_1;
920 cor_y = cor_2;
921 p0 = rrixix[track];
922 p3 = rrixix[track+1];
923 pos = track;
924
925 for (i = 0; i < NB_POS; i+=2)
926 {
927 L_sum1 = L_sum2 = 0L;
928 p1 = h;
929 p2 = &vec[pos];
930 for (j=62-pos ;j >= 0; j--)
931 {
932 L_sum1 = L_add(L_sum1, *p1 * *p2++);
933 L_sum2 = L_add(L_sum2, *p1++ * *p2);
934 }
935 L_sum1 = L_add(L_sum1, *p1 * *p2);
936 L_sum1 = L_shl(L_sum1, 2);
937 L_sum2 = L_shl(L_sum2, 2);
938
939 corr = voround(L_sum1);
940 cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++);
941 corr = voround(L_sum2);
942 cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++);
943 pos += STEP;
944
945 L_sum1 = L_sum2 = 0L;
946 p1 = h;
947 p2 = &vec[pos];
948 for (j= 62-pos;j >= 0; j--)
949 {
950 L_sum1 = L_add(L_sum1, *p1 * *p2++);
951 L_sum2 = L_add(L_sum2, *p1++ * *p2);
952 }
953 L_sum1 = L_add(L_sum1, *p1 * *p2);
954 L_sum1 = L_shl(L_sum1, 2);
955 L_sum2 = L_shl(L_sum2, 2);
956
957 corr = voround(L_sum1);
958 cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++);
959 corr = voround(L_sum2);
960 cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++);
961 pos += STEP;
962 }
963 return;
964 }
965
966 /*-------------------------------------------------------------------*
967 * Function search_ixiy() *
968 * ~~~~~~~~~~~~~~~~~~~~~~~ *
969 * Find the best positions of 2 pulses in a subframe. *
970 *-------------------------------------------------------------------*/
971
search_ixiy(Word16 nb_pos_ix,Word16 track_x,Word16 track_y,Word16 * ps,Word16 * alp,Word16 * ix,Word16 * iy,Word16 dn[],Word16 dn2[],Word16 cor_x[],Word16 cor_y[],Word16 rrixiy[][MSIZE])972 void search_ixiy(
973 Word16 nb_pos_ix, /* (i) nb of pos for pulse 1 (1..8) */
974 Word16 track_x, /* (i) track of pulse 1 */
975 Word16 track_y, /* (i) track of pulse 2 */
976 Word16 * ps, /* (i/o) correlation of all fixed pulses */
977 Word16 * alp, /* (i/o) energy of all fixed pulses */
978 Word16 * ix, /* (o) position of pulse 1 */
979 Word16 * iy, /* (o) position of pulse 2 */
980 Word16 dn[], /* (i) corr. between target and h[] */
981 Word16 dn2[], /* (i) vector of selected positions */
982 Word16 cor_x[], /* (i) corr. of pulse 1 with fixed pulses */
983 Word16 cor_y[], /* (i) corr. of pulse 2 with fixed pulses */
984 Word16 rrixiy[][MSIZE] /* (i) corr. of pulse 1 with pulse 2 */
985 )
986 {
987 Word32 x, y, pos, thres_ix;
988 Word16 ps1, ps2, sq, sqk;
989 Word16 alp_16, alpk;
990 Word16 *p0, *p1, *p2;
991 Word32 s, alp0, alp1, alp2;
992
993 p0 = cor_x;
994 p1 = cor_y;
995 p2 = rrixiy[track_x];
996
997 thres_ix = nb_pos_ix - NB_MAX;
998
999 alp0 = L_deposit_h(*alp);
1000 alp0 = (alp0 + 0x00008000L); /* for rounding */
1001
1002 sqk = -1;
1003 alpk = 1;
1004
1005 for (x = track_x; x < L_SUBFR; x += STEP)
1006 {
1007 ps1 = *ps + dn[x];
1008 alp1 = L_add(alp0, ((*p0++)<<13));
1009
1010 if (dn2[x] < thres_ix)
1011 {
1012 pos = -1;
1013 for (y = track_y; y < L_SUBFR; y += STEP)
1014 {
1015 ps2 = add1(ps1, dn[y]);
1016
1017 alp2 = L_add(alp1, ((*p1++)<<13));
1018 alp2 = L_add(alp2, ((*p2++)<<14));
1019 alp_16 = extract_h(alp2);
1020 sq = vo_mult(ps2, ps2);
1021 s = L_sub(vo_L_mult(alpk, sq), L_mult(sqk, alp_16));
1022
1023 if (s > 0)
1024 {
1025 sqk = sq;
1026 alpk = alp_16;
1027 pos = y;
1028 }
1029 }
1030 p1 -= NB_POS;
1031
1032 if (pos >= 0)
1033 {
1034 *ix = x;
1035 *iy = pos;
1036 }
1037 } else
1038 {
1039 p2 += NB_POS;
1040 }
1041 }
1042
1043 *ps = add1(*ps, add1(dn[*ix], dn[*iy]));
1044 *alp = alpk;
1045
1046 return;
1047 }
1048
1049
1050
1051
1052