1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 /****************************************************************************************
19 Portions of this file are derived from the following 3GPP standard:
20 
21     3GPP TS 26.073
22     ANSI-C code for the Adaptive Multi-Rate (AMR) speech codec
23     Available from http://www.3gpp.org
24 
25 (C) 2004, 3GPP Organizational Partners (ARIB, ATIS, CCSA, ETSI, TTA, TTC)
26 Permission to distribute, modify and use this file under the standard license
27 terms listed above has been obtained from the copyright holder.
28 ****************************************************************************************/
29 /*
30 ------------------------------------------------------------------------------
31 
32 
33 
34  Pathname: ./audio/gsm-amr/c/src/cor_h.c
35 
36      Date: 06/12/2000
37 
38 ------------------------------------------------------------------------------
39  REVISION HISTORY
40 
41  Description: Updated template used to PV coding template. First attempt at
42           optimizing C code.
43 
44  Description: Used MAX_16 and MIN_16 when checking the result of Inv_sqrt.
45           Synced up to the new template.
46 
47  Description: Added setting of Overflow flag in inlined code.
48 
49  Description: Took out cor_h_x function and put it in its own file. Sync'ed
50           up with the single_func_template.c template. Delete version
51           ID variable.
52 
53  Description: Synchronized file with UTMS version 3.2.0. Updated coding
54               template. Removed unnecessary include files.
55 
56  Description: Fixed portion of the code that builds the rr[] matrix. There
57               was an error in the original inlining of code that caused
58               the code to be not bit-exact with UMTS version 3.2.0.
59 
60  Description: Added calls to L_add() and mult() in the code to handle overflow
61               scenario. Moved cor_h.h after cnst.h in the Include section.
62               Doing this allows the unit test to build using the cnst.h in the
63               /test/include directory. Fixed initialization of the accumulator
64               in the first calculation of the sum of squares.
65 
66  Description: Made the following changes per comments from Phase 2/3 review:
67               1. Used #define value instead of hard-coded numbers in the code.
68               2. Fixed typecasting issue with TI C compiler.
69               3. Removed typecasting of 0x00008000L in the call to L_add.
70 
71  Description: Changed pOverflow from a global variable into a function
72  parameter.
73 
74  Description:
75             1. Added pointer to avoid adding offsets in every pass
76             2. Eliminate variables defined as registers
77             3. Removed extra check for overflow by doing scaling right
78                after overflow is detected.
79             4. Eliminated calls to basic operations (like extract) not
80                needed because of the nature of the number (all bounded)
81             5. Eliminated duplicate loop accessing same data
82             6. Simplified matrix addressing by use of pointers
83 
84  Description:
85               1. Eliminated unused include files.
86               2. Access twice the number of points when delaing with matrices
87                  and in the process only 3 pointers (instead of 4) are needed
88               3. Replaced array addressing (array sign[]) by pointers
89 
90  Description: Changed round function name to pv_round to avoid conflict with
91               round function in C standard library.
92 
93  Description: Using inlines from fxp_arithmetic.h .
94 
95  Description: Replacing fxp_arithmetic.h with basic_op.h.
96 
97  Description:
98 
99 ------------------------------------------------------------------------------
100 */
101 
102 /*----------------------------------------------------------------------------
103 ; INCLUDES
104 ----------------------------------------------------------------------------*/
105 #include "cnst.h"
106 #include "cor_h.h"
107 #include "basicop_malloc.h"
108 #include "inv_sqrt.h"
109 #include "basic_op.h"
110 
111 /*----------------------------------------------------------------------------
112 ; MACROS
113 ; Define module specific macros here
114 ----------------------------------------------------------------------------*/
115 
116 /*----------------------------------------------------------------------------
117 ; DEFINES
118 ; Include all pre-processor statements here. Include conditional
119 ; compile variables also.
120 ----------------------------------------------------------------------------*/
121 
122 /*----------------------------------------------------------------------------
123 ; LOCAL FUNCTION DEFINITIONS
124 ; Function Prototype declaration
125 ----------------------------------------------------------------------------*/
126 
127 /*----------------------------------------------------------------------------
128 ; LOCAL STORE/BUFFER/POINTER DEFINITIONS
129 ; Variable declaration - defined here and used outside this module
130 ----------------------------------------------------------------------------*/
131 
132 /*
133 ------------------------------------------------------------------------------
134  FUNCTION NAME: cor_h
135 ------------------------------------------------------------------------------
136  INPUT AND OUTPUT DEFINITIONS
137 
138  Inputs:
139     h = vector containing the impulse response of the weighted synthesis
140         filter; vector contents are of type Word16; vector length is
141         2 * L_SUBFR
142     sign = vector containing the sign information for the correlation
143            values; vector contents are of type Word16; vector length is
144            L_CODE
145     rr = autocorrelation matrix; matrix contents are of type Word16;
146          matrix dimension is L_CODE by L_CODE
147 
148  Outputs:
149     rr contents are the newly calculated autocorrelation values
150 
151  Returns:
152     None
153 
154  Global Variables Used:
155     None
156 
157  Local Variables Needed:
158     None
159 
160 ------------------------------------------------------------------------------
161  FUNCTION DESCRIPTION
162 
163  This function computes correlations of the impulse response (h) needed for
164  the codebook search, and includes the sign information into the correlations.
165 
166  The correlations are given by:
167     rr[i][j] = sum_{n=i}^{L-1} h[n-i] h[n-j];   i>=j; i,j=0,...,L-1
168 
169  The sign information is included by:
170     rr[i][j] = rr[i][j]*sign[i]*sign[j]
171 
172 ------------------------------------------------------------------------------
173  REQUIREMENTS
174 
175  None
176 
177 ------------------------------------------------------------------------------
178  REFERENCES
179 
180  cor_h.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
181 
182 ------------------------------------------------------------------------------
183  PSEUDO-CODE
184 
185 void cor_h (
186     Word16 h[],         // (i) : impulse response of weighted synthesis
187                                  filter
188     Word16 sign[],      // (i) : sign of d[n]
189     Word16 rr[][L_CODE] // (o) : matrix of autocorrelation
190 )
191 {
192     Word16 i, j, k, dec, h2[L_CODE];
193     Word32 s;
194 
195     // Scaling for maximum precision
196 
197     s = 2;
198     for (i = 0; i < L_CODE; i++)
199         s = L_mac (s, h[i], h[i]);
200 
201     j = sub (extract_h (s), 32767);
202     if (j == 0)
203     {
204         for (i = 0; i < L_CODE; i++)
205         {
206             h2[i] = shr (h[i], 1);
207         }
208     }
209     else
210     {
211         s = L_shr (s, 1);
212         k = extract_h (L_shl (Inv_sqrt (s), 7));
213         k = mult (k, 32440);                     // k = 0.99*k
214 
215         for (i = 0; i < L_CODE; i++)
216         {
217             h2[i] = pv_round (L_shl (L_mult (h[i], k), 9));
218         }
219     }
220 
221     // build matrix rr[]
222     s = 0;
223     i = L_CODE - 1;
224     for (k = 0; k < L_CODE; k++, i--)
225     {
226         s = L_mac (s, h2[k], h2[k]);
227         rr[i][i] = pv_round (s);
228     }
229 
230     for (dec = 1; dec < L_CODE; dec++)
231     {
232         s = 0;
233         j = L_CODE - 1;
234         i = sub (j, dec);
235         for (k = 0; k < (L_CODE - dec); k++, i--, j--)
236         {
237             s = L_mac (s, h2[k], h2[k + dec]);
238             rr[j][i] = mult (pv_round (s), mult (sign[i], sign[j]));
239             rr[i][j] = rr[j][i];
240         }
241     }
242 }
243 
244 ---------------------------------------------------------------------------
245  RESOURCES USED [optional]
246 
247  When the code is written for a specific target processor the
248  the resources used should be documented below.
249 
250  HEAP MEMORY USED: x bytes
251 
252  STACK MEMORY USED: x bytes
253 
254  CLOCK CYCLES: (cycle count equation for this function) + (variable
255                 used to represent cycle count for each subroutine
256                 called)
257      where: (cycle count variable) = cycle count for [subroutine
258                                      name]
259 
260 ------------------------------------------------------------------------------
261  CAUTION [optional]
262  [State any special notes, constraints or cautions for users of this function]
263 
264 ------------------------------------------------------------------------------
265 */
266 
cor_h(Word16 h[],Word16 sign[],Word16 rr[][L_CODE],Flag * pOverflow)267 void cor_h(
268     Word16 h[],          /* (i) : impulse response of weighted synthesis
269                                   filter                                  */
270     Word16 sign[],       /* (i) : sign of d[n]                            */
271     Word16 rr[][L_CODE], /* (o) : matrix of autocorrelation               */
272     Flag  *pOverflow
273 )
274 {
275     Word16 i;
276     Word16 dec;
277 
278     Word16 h2[L_CODE];
279     Word32 s;
280     Word32 s2;
281     Word16 tmp1;
282     Word16 tmp2;
283     Word16 tmp11;
284     Word16 tmp22;
285 
286     Word16 *p_h;
287     Word16 *p_h2;
288     Word16 *rr1;
289     Word16 *rr2;
290     Word16 *rr3;
291     Word16 *p_rr_ref1;
292     Word16 *p_sign1;
293     Word16 *p_sign2;
294 
295     /* Scaling for maximum precision */
296 
297     /* Initialize accumulator to 1 since left shift happens    */
298     /* after the accumulation of the sum of squares (original  */
299     /* code initialized s to 2)                                */
300     s = 1;
301     p_h = h;
302 
303     for (i = (L_CODE >> 1); i != 0 ; i--)
304     {
305         tmp1 = *(p_h++);
306         s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
307         tmp1 = *(p_h++);
308         s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
309 
310     }
311 
312     s <<= 1;
313 
314     if (s & MIN_32)
315     {
316         p_h2 = h2;
317         p_h  = h;
318 
319         for (i = (L_CODE >> 1); i != 0; i--)
320         {
321             *(p_h2++) =  *(p_h++)  >> 1;
322             *(p_h2++) =  *(p_h++)  >> 1;
323         }
324     }
325     else
326     {
327 
328         s >>= 1;
329 
330         s = Inv_sqrt(s, pOverflow);
331 
332         if (s < (Word32) 0x00ffffffL)
333         {
334             /* k = 0.99*k */
335             dec = (Word16)(((s >> 9) * 32440) >> 15);
336         }
337         else
338         {
339             dec = 32440;  /* 0.99 */
340         }
341 
342         p_h  = h;
343         p_h2 = h2;
344 
345         for (i = (L_CODE >> 1); i != 0; i--)
346         {
347             *(p_h2++) = (Word16)((amrnb_fxp_mac_16_by_16bb((Word32) * (p_h++), (Word32) dec, 0x020L)) >> 6);
348             *(p_h2++) = (Word16)((amrnb_fxp_mac_16_by_16bb((Word32) * (p_h++), (Word32) dec, 0x020L)) >> 6);
349         }
350     }
351     /* build matrix rr[] */
352 
353     s = 0;
354 
355     p_h2 = h2;
356 
357     rr1 = &rr[L_CODE-1][L_CODE-1];
358 
359     for (i = L_CODE >> 1; i != 0 ; i--)
360     {
361         tmp1   = *(p_h2++);
362         s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
363         *rr1 = (Word16)((s + 0x00004000L) >> 15);
364         rr1 -= (L_CODE + 1);
365         tmp1   = *(p_h2++);
366         s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
367         *rr1 = (Word16)((s + 0x00004000L) >> 15);
368         rr1 -= (L_CODE + 1);
369     }
370 
371 
372     p_rr_ref1 = rr[L_CODE-1];
373 
374     for (dec = 1; dec < L_CODE; dec += 2)
375     {
376         rr1 = &p_rr_ref1[L_CODE-1-dec];
377 
378         rr2 = &rr[L_CODE-1-dec][L_CODE-1];
379         rr3 = &rr[L_CODE-1-(dec+1)][L_CODE-1];
380 
381         s  = 0;
382         s2 = 0;
383 
384         p_sign1 = &sign[L_CODE - 1];
385         p_sign2 = &sign[L_CODE - 1 - dec];
386 
387         p_h2 = h2;
388         p_h  = &h2[dec];
389 
390         for (i = (L_CODE - dec - 1); i != 0 ; i--)
391         {
392             s = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2), (Word32) * (p_h++), s);
393             s2 = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2++), (Word32) * (p_h), s2);
394 
395             tmp1  = (Word16)((s + 0x00004000L) >> 15);
396             tmp11 = (Word16)((s2 + 0x00004000L) >> 15);
397 
398             tmp2  = ((Word32) * (p_sign1) * *(p_sign2--)) >> 15;
399             tmp22 = ((Word32) * (p_sign1--) * *(p_sign2)) >> 15;
400 
401             *rr2 = ((Word32) tmp1 * tmp2) >> 15;
402             *(rr1--) = *rr2;
403             *rr1 = ((Word32) tmp11 * tmp22) >> 15;
404             *rr3 = *rr1;
405 
406             rr1 -= (L_CODE);
407             rr2 -= (L_CODE + 1);
408             rr3 -= (L_CODE + 1);
409 
410         }
411 
412         s = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2), (Word32) * (p_h), s);
413 
414         tmp1 = (Word16)((s + 0x00004000L) >> 15);
415 
416         tmp2 = ((Word32) * (p_sign1) * *(p_sign2)) >> 15;
417         *rr1 = ((Word32) tmp1 * tmp2) >> 15;
418 
419         *rr2 = *rr1;
420 
421         rr1 -= (L_CODE + 1);
422         rr2 -= (L_CODE + 1);
423 
424     }
425 
426     return;
427 
428 }
429 
430