1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 /****************************************************************************************
19 Portions of this file are derived from the following 3GPP standard:
20
21 3GPP TS 26.073
22 ANSI-C code for the Adaptive Multi-Rate (AMR) speech codec
23 Available from http://www.3gpp.org
24
25 (C) 2004, 3GPP Organizational Partners (ARIB, ATIS, CCSA, ETSI, TTA, TTC)
26 Permission to distribute, modify and use this file under the standard license
27 terms listed above has been obtained from the copyright holder.
28 ****************************************************************************************/
29 /*
30 ------------------------------------------------------------------------------
31
32
33
34 Pathname: ./audio/gsm-amr/c/src/cor_h.c
35
36 Date: 06/12/2000
37
38 ------------------------------------------------------------------------------
39 REVISION HISTORY
40
41 Description: Updated template used to PV coding template. First attempt at
42 optimizing C code.
43
44 Description: Used MAX_16 and MIN_16 when checking the result of Inv_sqrt.
45 Synced up to the new template.
46
47 Description: Added setting of Overflow flag in inlined code.
48
49 Description: Took out cor_h_x function and put it in its own file. Sync'ed
50 up with the single_func_template.c template. Delete version
51 ID variable.
52
53 Description: Synchronized file with UTMS version 3.2.0. Updated coding
54 template. Removed unnecessary include files.
55
56 Description: Fixed portion of the code that builds the rr[] matrix. There
57 was an error in the original inlining of code that caused
58 the code to be not bit-exact with UMTS version 3.2.0.
59
60 Description: Added calls to L_add() and mult() in the code to handle overflow
61 scenario. Moved cor_h.h after cnst.h in the Include section.
62 Doing this allows the unit test to build using the cnst.h in the
63 /test/include directory. Fixed initialization of the accumulator
64 in the first calculation of the sum of squares.
65
66 Description: Made the following changes per comments from Phase 2/3 review:
67 1. Used #define value instead of hard-coded numbers in the code.
68 2. Fixed typecasting issue with TI C compiler.
69 3. Removed typecasting of 0x00008000L in the call to L_add.
70
71 Description: Changed pOverflow from a global variable into a function
72 parameter.
73
74 Description:
75 1. Added pointer to avoid adding offsets in every pass
76 2. Eliminate variables defined as registers
77 3. Removed extra check for overflow by doing scaling right
78 after overflow is detected.
79 4. Eliminated calls to basic operations (like extract) not
80 needed because of the nature of the number (all bounded)
81 5. Eliminated duplicate loop accessing same data
82 6. Simplified matrix addressing by use of pointers
83
84 Description:
85 1. Eliminated unused include files.
86 2. Access twice the number of points when delaing with matrices
87 and in the process only 3 pointers (instead of 4) are needed
88 3. Replaced array addressing (array sign[]) by pointers
89
90 Description: Changed round function name to pv_round to avoid conflict with
91 round function in C standard library.
92
93 Description: Using inlines from fxp_arithmetic.h .
94
95 Description: Replacing fxp_arithmetic.h with basic_op.h.
96
97 Description:
98
99 ------------------------------------------------------------------------------
100 */
101
102 /*----------------------------------------------------------------------------
103 ; INCLUDES
104 ----------------------------------------------------------------------------*/
105 #include "cnst.h"
106 #include "cor_h.h"
107 #include "basicop_malloc.h"
108 #include "inv_sqrt.h"
109 #include "basic_op.h"
110
111 /*----------------------------------------------------------------------------
112 ; MACROS
113 ; Define module specific macros here
114 ----------------------------------------------------------------------------*/
115
116 /*----------------------------------------------------------------------------
117 ; DEFINES
118 ; Include all pre-processor statements here. Include conditional
119 ; compile variables also.
120 ----------------------------------------------------------------------------*/
121
122 /*----------------------------------------------------------------------------
123 ; LOCAL FUNCTION DEFINITIONS
124 ; Function Prototype declaration
125 ----------------------------------------------------------------------------*/
126
127 /*----------------------------------------------------------------------------
128 ; LOCAL STORE/BUFFER/POINTER DEFINITIONS
129 ; Variable declaration - defined here and used outside this module
130 ----------------------------------------------------------------------------*/
131
132 /*
133 ------------------------------------------------------------------------------
134 FUNCTION NAME: cor_h
135 ------------------------------------------------------------------------------
136 INPUT AND OUTPUT DEFINITIONS
137
138 Inputs:
139 h = vector containing the impulse response of the weighted synthesis
140 filter; vector contents are of type Word16; vector length is
141 2 * L_SUBFR
142 sign = vector containing the sign information for the correlation
143 values; vector contents are of type Word16; vector length is
144 L_CODE
145 rr = autocorrelation matrix; matrix contents are of type Word16;
146 matrix dimension is L_CODE by L_CODE
147
148 Outputs:
149 rr contents are the newly calculated autocorrelation values
150
151 Returns:
152 None
153
154 Global Variables Used:
155 None
156
157 Local Variables Needed:
158 None
159
160 ------------------------------------------------------------------------------
161 FUNCTION DESCRIPTION
162
163 This function computes correlations of the impulse response (h) needed for
164 the codebook search, and includes the sign information into the correlations.
165
166 The correlations are given by:
167 rr[i][j] = sum_{n=i}^{L-1} h[n-i] h[n-j]; i>=j; i,j=0,...,L-1
168
169 The sign information is included by:
170 rr[i][j] = rr[i][j]*sign[i]*sign[j]
171
172 ------------------------------------------------------------------------------
173 REQUIREMENTS
174
175 None
176
177 ------------------------------------------------------------------------------
178 REFERENCES
179
180 cor_h.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
181
182 ------------------------------------------------------------------------------
183 PSEUDO-CODE
184
185 void cor_h (
186 Word16 h[], // (i) : impulse response of weighted synthesis
187 filter
188 Word16 sign[], // (i) : sign of d[n]
189 Word16 rr[][L_CODE] // (o) : matrix of autocorrelation
190 )
191 {
192 Word16 i, j, k, dec, h2[L_CODE];
193 Word32 s;
194
195 // Scaling for maximum precision
196
197 s = 2;
198 for (i = 0; i < L_CODE; i++)
199 s = L_mac (s, h[i], h[i]);
200
201 j = sub (extract_h (s), 32767);
202 if (j == 0)
203 {
204 for (i = 0; i < L_CODE; i++)
205 {
206 h2[i] = shr (h[i], 1);
207 }
208 }
209 else
210 {
211 s = L_shr (s, 1);
212 k = extract_h (L_shl (Inv_sqrt (s), 7));
213 k = mult (k, 32440); // k = 0.99*k
214
215 for (i = 0; i < L_CODE; i++)
216 {
217 h2[i] = pv_round (L_shl (L_mult (h[i], k), 9));
218 }
219 }
220
221 // build matrix rr[]
222 s = 0;
223 i = L_CODE - 1;
224 for (k = 0; k < L_CODE; k++, i--)
225 {
226 s = L_mac (s, h2[k], h2[k]);
227 rr[i][i] = pv_round (s);
228 }
229
230 for (dec = 1; dec < L_CODE; dec++)
231 {
232 s = 0;
233 j = L_CODE - 1;
234 i = sub (j, dec);
235 for (k = 0; k < (L_CODE - dec); k++, i--, j--)
236 {
237 s = L_mac (s, h2[k], h2[k + dec]);
238 rr[j][i] = mult (pv_round (s), mult (sign[i], sign[j]));
239 rr[i][j] = rr[j][i];
240 }
241 }
242 }
243
244 ---------------------------------------------------------------------------
245 RESOURCES USED [optional]
246
247 When the code is written for a specific target processor the
248 the resources used should be documented below.
249
250 HEAP MEMORY USED: x bytes
251
252 STACK MEMORY USED: x bytes
253
254 CLOCK CYCLES: (cycle count equation for this function) + (variable
255 used to represent cycle count for each subroutine
256 called)
257 where: (cycle count variable) = cycle count for [subroutine
258 name]
259
260 ------------------------------------------------------------------------------
261 CAUTION [optional]
262 [State any special notes, constraints or cautions for users of this function]
263
264 ------------------------------------------------------------------------------
265 */
266
cor_h(Word16 h[],Word16 sign[],Word16 rr[][L_CODE],Flag * pOverflow)267 void cor_h(
268 Word16 h[], /* (i) : impulse response of weighted synthesis
269 filter */
270 Word16 sign[], /* (i) : sign of d[n] */
271 Word16 rr[][L_CODE], /* (o) : matrix of autocorrelation */
272 Flag *pOverflow
273 )
274 {
275 Word16 i;
276 Word16 dec;
277
278 Word16 h2[L_CODE];
279 Word32 s;
280 Word32 s2;
281 Word16 tmp1;
282 Word16 tmp2;
283 Word16 tmp11;
284 Word16 tmp22;
285
286 Word16 *p_h;
287 Word16 *p_h2;
288 Word16 *rr1;
289 Word16 *rr2;
290 Word16 *rr3;
291 Word16 *p_rr_ref1;
292 Word16 *p_sign1;
293 Word16 *p_sign2;
294
295 /* Scaling for maximum precision */
296
297 /* Initialize accumulator to 1 since left shift happens */
298 /* after the accumulation of the sum of squares (original */
299 /* code initialized s to 2) */
300 s = 1;
301 p_h = h;
302
303 for (i = (L_CODE >> 1); i != 0 ; i--)
304 {
305 tmp1 = *(p_h++);
306 s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
307 tmp1 = *(p_h++);
308 s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
309
310 }
311
312 s <<= 1;
313
314 if (s & MIN_32)
315 {
316 p_h2 = h2;
317 p_h = h;
318
319 for (i = (L_CODE >> 1); i != 0; i--)
320 {
321 *(p_h2++) = *(p_h++) >> 1;
322 *(p_h2++) = *(p_h++) >> 1;
323 }
324 }
325 else
326 {
327
328 s >>= 1;
329
330 s = Inv_sqrt(s, pOverflow);
331
332 if (s < (Word32) 0x00ffffffL)
333 {
334 /* k = 0.99*k */
335 dec = (Word16)(((s >> 9) * 32440) >> 15);
336 }
337 else
338 {
339 dec = 32440; /* 0.99 */
340 }
341
342 p_h = h;
343 p_h2 = h2;
344
345 for (i = (L_CODE >> 1); i != 0; i--)
346 {
347 *(p_h2++) = (Word16)((amrnb_fxp_mac_16_by_16bb((Word32) * (p_h++), (Word32) dec, 0x020L)) >> 6);
348 *(p_h2++) = (Word16)((amrnb_fxp_mac_16_by_16bb((Word32) * (p_h++), (Word32) dec, 0x020L)) >> 6);
349 }
350 }
351 /* build matrix rr[] */
352
353 s = 0;
354
355 p_h2 = h2;
356
357 rr1 = &rr[L_CODE-1][L_CODE-1];
358
359 for (i = L_CODE >> 1; i != 0 ; i--)
360 {
361 tmp1 = *(p_h2++);
362 s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
363 *rr1 = (Word16)((s + 0x00004000L) >> 15);
364 rr1 -= (L_CODE + 1);
365 tmp1 = *(p_h2++);
366 s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
367 *rr1 = (Word16)((s + 0x00004000L) >> 15);
368 rr1 -= (L_CODE + 1);
369 }
370
371
372 p_rr_ref1 = rr[L_CODE-1];
373
374 for (dec = 1; dec < L_CODE; dec += 2)
375 {
376 rr1 = &p_rr_ref1[L_CODE-1-dec];
377
378 rr2 = &rr[L_CODE-1-dec][L_CODE-1];
379 rr3 = &rr[L_CODE-1-(dec+1)][L_CODE-1];
380
381 s = 0;
382 s2 = 0;
383
384 p_sign1 = &sign[L_CODE - 1];
385 p_sign2 = &sign[L_CODE - 1 - dec];
386
387 p_h2 = h2;
388 p_h = &h2[dec];
389
390 for (i = (L_CODE - dec - 1); i != 0 ; i--)
391 {
392 s = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2), (Word32) * (p_h++), s);
393 s2 = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2++), (Word32) * (p_h), s2);
394
395 tmp1 = (Word16)((s + 0x00004000L) >> 15);
396 tmp11 = (Word16)((s2 + 0x00004000L) >> 15);
397
398 tmp2 = ((Word32) * (p_sign1) * *(p_sign2--)) >> 15;
399 tmp22 = ((Word32) * (p_sign1--) * *(p_sign2)) >> 15;
400
401 *rr2 = ((Word32) tmp1 * tmp2) >> 15;
402 *(rr1--) = *rr2;
403 *rr1 = ((Word32) tmp11 * tmp22) >> 15;
404 *rr3 = *rr1;
405
406 rr1 -= (L_CODE);
407 rr2 -= (L_CODE + 1);
408 rr3 -= (L_CODE + 1);
409
410 }
411
412 s = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2), (Word32) * (p_h), s);
413
414 tmp1 = (Word16)((s + 0x00004000L) >> 15);
415
416 tmp2 = ((Word32) * (p_sign1) * *(p_sign2)) >> 15;
417 *rr1 = ((Word32) tmp1 * tmp2) >> 15;
418
419 *rr2 = *rr1;
420
421 rr1 -= (L_CODE + 1);
422 rr2 -= (L_CODE + 1);
423
424 }
425
426 return;
427
428 }
429
430