1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "pitch_estimator.h"
12
13 #include <math.h>
14 #include <memory.h>
15 #ifdef WEBRTC_ANDROID
16 #include <stdlib.h>
17 #endif
18
19 static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160,
20 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640};
21
22 /* interpolation filter */
IntrepolFilter(double * data_ptr,double * intrp)23 __inline static void IntrepolFilter(double *data_ptr, double *intrp)
24 {
25 *intrp = kInterpolWin[0] * data_ptr[-3];
26 *intrp += kInterpolWin[1] * data_ptr[-2];
27 *intrp += kInterpolWin[2] * data_ptr[-1];
28 *intrp += kInterpolWin[3] * data_ptr[0];
29 *intrp += kInterpolWin[4] * data_ptr[1];
30 *intrp += kInterpolWin[5] * data_ptr[2];
31 *intrp += kInterpolWin[6] * data_ptr[3];
32 *intrp += kInterpolWin[7] * data_ptr[4];
33 }
34
35
36 /* 2D parabolic interpolation */
37 /* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */
Intrpol2D(double T[3][3],double * x,double * y,double * peak_val)38 __inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val)
39 {
40 double c, b[2], A[2][2];
41 double t1, t2, d;
42 double delta1, delta2;
43
44
45 // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}};
46 // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0
47
48 c = T[1][1];
49 b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]);
50 b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]);
51 A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]);
52 t1 = 0.5 * (T[0][0] + T[2][2]) - c;
53 t2 = 0.5 * (T[2][0] + T[0][2]) - c;
54 d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2;
55 A[0][0] = -t1 - 0.5 * d;
56 A[1][1] = -t2 - 0.5 * d;
57
58 /* deal with singularities or ill-conditioned cases */
59 if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) {
60 *peak_val = T[1][1];
61 return;
62 }
63
64 /* Cholesky decomposition: replace A by upper-triangular factor */
65 A[0][0] = sqrt(A[0][0]);
66 A[0][1] = A[0][1] / A[0][0];
67 A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]);
68
69 /* compute [x; y] = -0.5 * inv(A) * b */
70 t1 = b[0] / A[0][0];
71 t2 = (b[1] - t1 * A[0][1]) / A[1][1];
72 delta2 = t2 / A[1][1];
73 delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0];
74 delta2 *= 0.5;
75
76 /* limit norm */
77 t1 = delta1 * delta1 + delta2 * delta2;
78 if (t1 > 1.0) {
79 delta1 /= t1;
80 delta2 /= t1;
81 }
82
83 *peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c;
84
85 *x += delta1;
86 *y += delta2;
87 }
88
89
PCorr(const double * in,double * outcorr)90 static void PCorr(const double *in, double *outcorr)
91 {
92 double sum, ysum, prod;
93 const double *x, *inptr;
94 int k, n;
95
96 //ysum = 1e-6; /* use this with float (i.s.o. double)! */
97 ysum = 1e-13;
98 sum = 0.0;
99 x = in + PITCH_MAX_LAG/2 + 2;
100 for (n = 0; n < PITCH_CORR_LEN2; n++) {
101 ysum += in[n] * in[n];
102 sum += x[n] * in[n];
103 }
104
105 outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */
106 *outcorr = sum / sqrt(ysum);
107
108 for (k = 1; k < PITCH_LAG_SPAN2; k++) {
109 ysum -= in[k-1] * in[k-1];
110 ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1];
111 sum = 0.0;
112 inptr = &in[k];
113 prod = x[0] * inptr[0];
114 for (n = 1; n < PITCH_CORR_LEN2; n++) {
115 sum += prod;
116 prod = x[n] * inptr[n];
117 }
118 sum += prod;
119 outcorr--;
120 *outcorr = sum / sqrt(ysum);
121 }
122 }
123
124
WebRtcIsac_InitializePitch(const double * in,const double old_lag,const double old_gain,PitchAnalysisStruct * State,double * lags)125 void WebRtcIsac_InitializePitch(const double *in,
126 const double old_lag,
127 const double old_gain,
128 PitchAnalysisStruct *State,
129 double *lags)
130 {
131 double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2];
132 double ratio, log_lag, gain_bias;
133 double bias;
134 double corrvec1[PITCH_LAG_SPAN2];
135 double corrvec2[PITCH_LAG_SPAN2];
136 int m, k;
137 // Allocating 10 extra entries at the begining of the CorrSurf
138 double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)];
139 double* CorrSurf[2*PITCH_BW+3];
140 double *CorrSurfPtr1, *CorrSurfPtr2;
141 double LagWin[3] = {0.2, 0.5, 0.98};
142 int ind1, ind2, peaks_ind, peak, max_ind;
143 int peaks[PITCH_MAX_NUM_PEAKS];
144 double adj, gain_tmp;
145 double corr, corr_max;
146 double intrp_a, intrp_b, intrp_c, intrp_d;
147 double peak_vals[PITCH_MAX_NUM_PEAKS];
148 double lags1[PITCH_MAX_NUM_PEAKS];
149 double lags2[PITCH_MAX_NUM_PEAKS];
150 double T[3][3];
151 int row;
152
153 for(k = 0; k < 2*PITCH_BW+3; k++)
154 {
155 CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)];
156 }
157 /* reset CorrSurf matrix */
158 memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4)));
159
160 //warnings -DH
161 max_ind = 0;
162 peak = 0;
163
164 /* copy old values from state buffer */
165 memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
166
167 /* decimation; put result after the old values */
168 WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN,
169 &buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]);
170
171 /* low-pass filtering */
172 for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++)
173 buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2];
174
175 /* copy end part back into state buffer */
176 memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
177
178 /* compute correlation for first and second half of the frame */
179 PCorr(buf_dec, corrvec1);
180 PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2);
181
182 /* bias towards pitch lag of previous frame */
183 log_lag = log(0.5 * old_lag);
184 gain_bias = 4.0 * old_gain * old_gain;
185 if (gain_bias > 0.8) gain_bias = 0.8;
186 for (k = 0; k < PITCH_LAG_SPAN2; k++)
187 {
188 ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag;
189 bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio);
190 corrvec1[k] *= bias;
191 }
192
193 /* taper correlation functions */
194 for (k = 0; k < 3; k++) {
195 gain_tmp = LagWin[k];
196 corrvec1[k] *= gain_tmp;
197 corrvec2[k] *= gain_tmp;
198 corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
199 corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
200 }
201
202 corr_max = 0.0;
203 /* fill middle row of correlation surface */
204 ind1 = 0;
205 ind2 = 0;
206 CorrSurfPtr1 = &CorrSurf[PITCH_BW][2];
207 for (k = 0; k < PITCH_LAG_SPAN2; k++) {
208 corr = corrvec1[ind1++] + corrvec2[ind2++];
209 CorrSurfPtr1[k] = corr;
210 if (corr > corr_max) {
211 corr_max = corr; /* update maximum */
212 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
213 }
214 }
215 /* fill first and last rows of correlation surface */
216 ind1 = 0;
217 ind2 = PITCH_BW;
218 CorrSurfPtr1 = &CorrSurf[0][2];
219 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2];
220 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) {
221 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
222 adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
223 corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
224 CorrSurfPtr1[k] = corr;
225 if (corr > corr_max) {
226 corr_max = corr; /* update maximum */
227 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
228 }
229 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
230 CorrSurfPtr2[k] = corr;
231 if (corr > corr_max) {
232 corr_max = corr; /* update maximum */
233 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
234 }
235 }
236 /* fill second and next to last rows of correlation surface */
237 ind1 = 0;
238 ind2 = PITCH_BW-1;
239 CorrSurfPtr1 = &CorrSurf[1][2];
240 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1];
241 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) {
242 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
243 adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
244 corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
245 CorrSurfPtr1[k] = corr;
246 if (corr > corr_max) {
247 corr_max = corr; /* update maximum */
248 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
249 }
250 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
251 CorrSurfPtr2[k] = corr;
252 if (corr > corr_max) {
253 corr_max = corr; /* update maximum */
254 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
255 }
256 }
257 /* fill remainder of correlation surface */
258 for (m = 2; m < PITCH_BW; m++) {
259 ind1 = 0;
260 ind2 = PITCH_BW - m; /* always larger than ind1 */
261 CorrSurfPtr1 = &CorrSurf[m][2];
262 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m];
263 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) {
264 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
265 adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
266 corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
267 CorrSurfPtr1[k] = corr;
268 if (corr > corr_max) {
269 corr_max = corr; /* update maximum */
270 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
271 }
272 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
273 CorrSurfPtr2[k] = corr;
274 if (corr > corr_max) {
275 corr_max = corr; /* update maximum */
276 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
277 }
278 }
279 }
280
281 /* threshold value to qualify as a peak */
282 corr_max *= 0.6;
283
284 peaks_ind = 0;
285 /* find peaks */
286 for (m = 1; m < PITCH_BW+1; m++) {
287 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
288 CorrSurfPtr1 = &CorrSurf[m][2];
289 for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) {
290 corr = CorrSurfPtr1[k];
291 if (corr > corr_max) {
292 if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
293 if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
294 /* found a peak; store index into matrix */
295 peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
296 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
297 }
298 }
299 }
300 }
301 }
302 for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) {
303 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
304 CorrSurfPtr1 = &CorrSurf[m][2];
305 for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) {
306 corr = CorrSurfPtr1[k];
307 if (corr > corr_max) {
308 if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
309 if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
310 /* found a peak; store index into matrix */
311 peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
312 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
313 }
314 }
315 }
316 }
317 }
318
319 if (peaks_ind > 0) {
320 /* examine each peak */
321 CorrSurfPtr1 = &CorrSurf[0][0];
322 for (k = 0; k < peaks_ind; k++) {
323 peak = peaks[k];
324
325 /* compute four interpolated values around current peak */
326 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a);
327 IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b);
328 IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c);
329 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d);
330
331 /* determine maximum of the interpolated values */
332 corr = CorrSurfPtr1[peak];
333 corr_max = intrp_a;
334 if (intrp_b > corr_max) corr_max = intrp_b;
335 if (intrp_c > corr_max) corr_max = intrp_c;
336 if (intrp_d > corr_max) corr_max = intrp_d;
337
338 /* determine where the peak sits and fill a 3x3 matrix around it */
339 row = peak / (PITCH_LAG_SPAN2+4);
340 lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
341 lags2[k] = (double) (lags1[k] + PITCH_BW - row);
342 if ( corr > corr_max ) {
343 T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
344 T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
345 T[1][1] = corr;
346 T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
347 T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
348 T[1][0] = intrp_a;
349 T[0][1] = intrp_b;
350 T[2][1] = intrp_c;
351 T[1][2] = intrp_d;
352 } else {
353 if (intrp_a == corr_max) {
354 lags1[k] -= 0.5;
355 lags2[k] += 0.5;
356 IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]);
357 IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]);
358 T[1][1] = intrp_a;
359 T[0][2] = intrp_b;
360 T[2][2] = intrp_c;
361 T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)];
362 T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
363 T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
364 T[1][2] = corr;
365 } else if (intrp_b == corr_max) {
366 lags1[k] -= 0.5;
367 lags2[k] -= 0.5;
368 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]);
369 T[2][0] = intrp_a;
370 T[1][1] = intrp_b;
371 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]);
372 T[2][2] = intrp_d;
373 T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
374 T[0][1] = CorrSurfPtr1[peak - 1];
375 T[2][1] = corr;
376 T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
377 } else if (intrp_c == corr_max) {
378 lags1[k] += 0.5;
379 lags2[k] += 0.5;
380 T[0][0] = intrp_a;
381 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]);
382 T[1][1] = intrp_c;
383 T[0][2] = intrp_d;
384 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]);
385 T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
386 T[0][1] = corr;
387 T[2][1] = CorrSurfPtr1[peak + 1];
388 T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
389 } else {
390 lags1[k] += 0.5;
391 lags2[k] -= 0.5;
392 T[0][0] = intrp_b;
393 T[2][0] = intrp_c;
394 T[1][1] = intrp_d;
395 IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]);
396 IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]);
397 T[1][0] = corr;
398 T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
399 T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
400 T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)];
401 }
402 }
403
404 /* 2D parabolic interpolation gives more accurate lags and peak value */
405 Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]);
406 }
407
408 /* determine the highest peak, after applying a bias towards short lags */
409 corr_max = 0.0;
410 for (k = 0; k < peaks_ind; k++) {
411 corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k]));
412 if (corr > corr_max) {
413 corr_max = corr;
414 peak = k;
415 }
416 }
417
418 lags1[peak] *= 2.0;
419 lags2[peak] *= 2.0;
420
421 if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG;
422 if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG;
423 if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG;
424 if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG;
425
426 /* store lags of highest peak in output array */
427 lags[0] = lags1[peak];
428 lags[1] = lags1[peak];
429 lags[2] = lags2[peak];
430 lags[3] = lags2[peak];
431 }
432 else
433 {
434 row = max_ind / (PITCH_LAG_SPAN2+4);
435 lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
436 lags2[0] = (double) (lags1[0] + PITCH_BW - row);
437
438 if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG;
439 if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG;
440 if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG;
441 if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG;
442
443 /* store lags of highest peak in output array */
444 lags[0] = lags1[0];
445 lags[1] = lags1[0];
446 lags[2] = lags2[0];
447 lags[3] = lags2[0];
448 }
449 }
450
451
452
453 /* create weighting matrix by orthogonalizing a basis of polynomials of increasing order
454 * t = (0:4)';
455 * A = [t.^0, t.^1, t.^2, t.^3, t.^4];
456 * [Q, dummy] = qr(A);
457 * P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */
458 static const double kWeight[5][5] = {
459 { 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286},
460 {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857},
461 {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286},
462 { 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143},
463 { 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286}
464 };
465
466
WebRtcIsac_PitchAnalysis(const double * in,double * out,PitchAnalysisStruct * State,double * lags,double * gains)467 void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */
468 double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
469 PitchAnalysisStruct *State,
470 double *lags,
471 double *gains)
472 {
473 double HPin[PITCH_FRAME_LEN];
474 double Weighted[PITCH_FRAME_LEN];
475 double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD];
476 double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD];
477 double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead
478 double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD];
479 double old_lag, old_gain;
480 double nrg_wht, tmp;
481 double Wnrg, Wfluct, Wgain;
482 double H[4][4];
483 double grad[4];
484 double dG[4];
485 int k, m, n, iter;
486
487 /* high pass filtering using second order pole-zero filter */
488 WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN);
489
490 /* copy from state into buffer */
491 memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD);
492
493 /* compute weighted and whitened signals */
494 WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr));
495
496 /* copy from buffer into state */
497 memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD);
498
499 old_lag = State->PFstr_wght.oldlagp[0];
500 old_gain = State->PFstr_wght.oldgainp[0];
501
502 /* inital pitch estimate */
503 WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags);
504
505
506 /* Iterative optimization of lags - to be done */
507
508 /* compute energy of whitened signal */
509 nrg_wht = 0.0;
510 for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++)
511 nrg_wht += Whitened[k] * Whitened[k];
512
513
514 /* Iterative optimization of gains */
515
516 /* set weights for energy, gain fluctiation, and spectral gain penalty functions */
517 Wnrg = 1.0 / nrg_wht;
518 Wgain = 0.005;
519 Wfluct = 3.0;
520
521 /* set initial gains */
522 for (k = 0; k < 4; k++)
523 gains[k] = PITCH_MAX_GAIN_06;
524
525 /* two iterations should be enough */
526 for (iter = 0; iter < 2; iter++) {
527 /* compute Jacobian of pre-filter output towards gains */
528 WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains);
529
530 /* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */
531 for (k = 0; k < 4; k++) {
532 tmp = 0.0;
533 for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
534 tmp += out_G[n] * out_dG[k][n];
535 grad[k] = tmp * Wnrg;
536 }
537 for (k = 0; k < 4; k++) {
538 for (m = 0; m <= k; m++) {
539 tmp = 0.0;
540 for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
541 tmp += out_dG[m][n] * out_dG[k][n];
542 H[k][m] = tmp * Wnrg;
543 }
544 }
545
546 /* add gradient and Hessian (lower triangle) for dampening fast gain changes */
547 for (k = 0; k < 4; k++) {
548 tmp = kWeight[k+1][0] * old_gain;
549 for (m = 0; m < 4; m++)
550 tmp += kWeight[k+1][m+1] * gains[m];
551 grad[k] += tmp * Wfluct;
552 }
553 for (k = 0; k < 4; k++) {
554 for (m = 0; m <= k; m++) {
555 H[k][m] += kWeight[k+1][m+1] * Wfluct;
556 }
557 }
558
559 /* add gradient and Hessian for dampening gain */
560 for (k = 0; k < 3; k++) {
561 tmp = 1.0 / (1 - gains[k]);
562 grad[k] += tmp * tmp * Wgain;
563 H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain);
564 }
565 tmp = 1.0 / (1 - gains[3]);
566 grad[3] += 1.33 * (tmp * tmp * Wgain);
567 H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain);
568
569
570 /* compute Cholesky factorization of Hessian
571 * by overwritting the upper triangle; scale factors on diagonal
572 * (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */
573 H[0][1] = H[1][0] / H[0][0];
574 H[0][2] = H[2][0] / H[0][0];
575 H[0][3] = H[3][0] / H[0][0];
576 H[1][1] -= H[0][0] * H[0][1] * H[0][1];
577 H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1];
578 H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1];
579 H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2];
580 H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2];
581 H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3];
582
583 /* Compute update as delta_gains = -inv(H) * grad */
584 /* copy and negate */
585 for (k = 0; k < 4; k++)
586 dG[k] = -grad[k];
587 /* back substitution */
588 dG[1] -= dG[0] * H[0][1];
589 dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2];
590 dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3];
591 /* scale */
592 for (k = 0; k < 4; k++)
593 dG[k] /= H[k][k];
594 /* back substitution */
595 dG[2] -= dG[3] * H[2][3];
596 dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2];
597 dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1];
598
599 /* update gains and check range */
600 for (k = 0; k < 4; k++) {
601 gains[k] += dG[k];
602 if (gains[k] > PITCH_MAX_GAIN)
603 gains[k] = PITCH_MAX_GAIN;
604 else if (gains[k] < 0.0)
605 gains[k] = 0.0;
606 }
607 }
608
609 /* update state for next frame */
610 WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains);
611
612 /* concatenate previous input's end and current input */
613 memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD);
614 memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN);
615
616 /* lookahead pitch filtering for masking analysis */
617 WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains);
618
619 /* store last part of input */
620 for (k = 0; k < QLOOKAHEAD; k++)
621 State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN];
622 }
623