1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <string.h>
12 #include <math.h>
13 //#include <stdio.h>
14 #include <stdlib.h>
15 #include "noise_suppression.h"
16 #include "ns_core.h"
17 #include "windows_private.h"
18 #include "fft4g.h"
19 #include "signal_processing_library.h"
20
21 // Set Feature Extraction Parameters
WebRtcNs_set_feature_extraction_parameters(NSinst_t * inst)22 void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) {
23 //bin size of histogram
24 inst->featureExtractionParams.binSizeLrt = (float)0.1;
25 inst->featureExtractionParams.binSizeSpecFlat = (float)0.05;
26 inst->featureExtractionParams.binSizeSpecDiff = (float)0.1;
27
28 //range of histogram over which lrt threshold is computed
29 inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0;
30
31 //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
32 // thresholds for prior model
33 inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff
34 inst->featureExtractionParams.factor2ModelPars = (float)0.9; //for spectral_flatness:
35 // used when noise is flatter than speech
36
37 //peak limit for spectral flatness (varies between 0 and 1)
38 inst->featureExtractionParams.thresPosSpecFlat = (float)0.6;
39
40 //limit on spacing of two highest peaks in histogram: spacing determined by bin size
41 inst->featureExtractionParams.limitPeakSpacingSpecFlat =
42 2 * inst->featureExtractionParams.binSizeSpecFlat;
43 inst->featureExtractionParams.limitPeakSpacingSpecDiff =
44 2 * inst->featureExtractionParams.binSizeSpecDiff;
45
46 //limit on relevance of second peak:
47 inst->featureExtractionParams.limitPeakWeightsSpecFlat = (float)0.5;
48 inst->featureExtractionParams.limitPeakWeightsSpecDiff = (float)0.5;
49
50 // fluctuation limit of lrt feature
51 inst->featureExtractionParams.thresFluctLrt = (float)0.05;
52
53 //limit on the max and min values for the feature thresholds
54 inst->featureExtractionParams.maxLrt = (float)1.0;
55 inst->featureExtractionParams.minLrt = (float)0.20;
56
57 inst->featureExtractionParams.maxSpecFlat = (float)0.95;
58 inst->featureExtractionParams.minSpecFlat = (float)0.10;
59
60 inst->featureExtractionParams.maxSpecDiff = (float)1.0;
61 inst->featureExtractionParams.minSpecDiff = (float)0.16;
62
63 //criteria of weight of histogram peak to accept/reject feature
64 inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3
65 * (inst->modelUpdatePars[1])); //for spectral flatness
66 inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3
67 * (inst->modelUpdatePars[1])); //for spectral difference
68 }
69
70 // Initialize state
WebRtcNs_InitCore(NSinst_t * inst,WebRtc_UWord32 fs)71 int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs) {
72 int i;
73 //We only support 10ms frames
74
75 //check for valid pointer
76 if (inst == NULL) {
77 return -1;
78 }
79
80 // Initialization of struct
81 if (fs == 8000 || fs == 16000 || fs == 32000) {
82 inst->fs = fs;
83 } else {
84 return -1;
85 }
86 inst->windShift = 0;
87 if (fs == 8000) {
88 // We only support 10ms frames
89 inst->blockLen = 80;
90 inst->blockLen10ms = 80;
91 inst->anaLen = 128;
92 inst->window = kBlocks80w128;
93 inst->outLen = 0;
94 } else if (fs == 16000) {
95 // We only support 10ms frames
96 inst->blockLen = 160;
97 inst->blockLen10ms = 160;
98 inst->anaLen = 256;
99 inst->window = kBlocks160w256;
100 inst->outLen = 0;
101 } else if (fs == 32000) {
102 // We only support 10ms frames
103 inst->blockLen = 160;
104 inst->blockLen10ms = 160;
105 inst->anaLen = 256;
106 inst->window = kBlocks160w256;
107 inst->outLen = 0;
108 }
109 inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins
110
111 // Initialize fft work arrays.
112 inst->ip[0] = 0; // Setting this triggers initialization.
113 memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
114 WebRtc_rdft(inst->anaLen, 1, inst->dataBuf, inst->ip, inst->wfft);
115
116 memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
117 memset(inst->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
118
119 //for HB processing
120 memset(inst->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX);
121
122 //for quantile noise estimation
123 memset(inst->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL);
124 for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
125 inst->lquantile[i] = (float)8.0;
126 inst->density[i] = (float)0.3;
127 }
128
129 for (i = 0; i < SIMULT; i++) {
130 inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
131 }
132
133 inst->updates = 0;
134
135 // Wiener filter initialization
136 for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
137 inst->smooth[i] = (float)1.0;
138 }
139
140 // Set the aggressiveness: default
141 inst->aggrMode = 0;
142
143 //initialize variables for new method
144 inst->priorSpeechProb = (float)0.5; //prior prob for speech/noise
145 for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
146 inst->magnPrev[i] = (float)0.0; //previous mag spectrum
147 inst->noisePrev[i] = (float)0.0; //previous noise-spectrum
148 inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold)
149 inst->magnAvgPause[i] = (float)0.0; //conservative noise spectrum estimate
150 inst->speechProbHB[i] = (float)0.0; //for estimation of HB in second pass
151 inst->initMagnEst[i] = (float)0.0; //initial average mag spectrum
152 }
153
154 //feature quantities
155 inst->featureData[0] = SF_FEATURE_THR; //spectral flatness (start on threshold)
156 inst->featureData[1] = (float)0.0; //spectral entropy: not used in this version
157 inst->featureData[2] = (float)0.0; //spectral variance: not used in this version
158 inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold)
159 inst->featureData[4] = SF_FEATURE_THR; //spectral template diff (start on threshold)
160 inst->featureData[5] = (float)0.0; //normalization for spectral-diff
161 inst->featureData[6] = (float)0.0; //window time-average of input magnitude spectrum
162
163 //histogram quantities: used to estimate/update thresholds for features
164 for (i = 0; i < HIST_PAR_EST; i++) {
165 inst->histLrt[i] = 0;
166 inst->histSpecFlat[i] = 0;
167 inst->histSpecDiff[i] = 0;
168 }
169
170 inst->blockInd = -1; //frame counter
171 inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature
172 inst->priorModelPars[1] = (float)0.5; //threshold for spectral flatness:
173 // determined on-line
174 inst->priorModelPars[2] = (float)1.0; //sgn_map par for spectral measure:
175 // 1 for flatness measure
176 inst->priorModelPars[3] = (float)0.5; //threshold for template-difference feature:
177 // determined on-line
178 inst->priorModelPars[4] = (float)1.0; //default weighting parameter for lrt feature
179 inst->priorModelPars[5] = (float)0.0; //default weighting parameter for
180 // spectral flatness feature
181 inst->priorModelPars[6] = (float)0.0; //default weighting parameter for
182 // spectral difference feature
183
184 inst->modelUpdatePars[0] = 2; //update flag for parameters:
185 // 0 no update, 1=update once, 2=update every window
186 inst->modelUpdatePars[1] = 500; //window for update
187 inst->modelUpdatePars[2] = 0; //counter for update of conservative noise spectrum
188 //counter if the feature thresholds are updated during the sequence
189 inst->modelUpdatePars[3] = inst->modelUpdatePars[1];
190
191 inst->signalEnergy = 0.0;
192 inst->sumMagn = 0.0;
193 inst->whiteNoiseLevel = 0.0;
194 inst->pinkNoiseNumerator = 0.0;
195 inst->pinkNoiseExp = 0.0;
196
197 WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration
198
199 //default mode
200 WebRtcNs_set_policy_core(inst, 0);
201
202
203 memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX);
204
205 inst->initFlag = 1;
206 return 0;
207 }
208
WebRtcNs_set_policy_core(NSinst_t * inst,int mode)209 int WebRtcNs_set_policy_core(NSinst_t* inst, int mode) {
210 // allow for modes:0,1,2,3
211 if (mode < 0 || mode > 3) {
212 return (-1);
213 }
214
215 inst->aggrMode = mode;
216 if (mode == 0) {
217 inst->overdrive = (float)1.0;
218 inst->denoiseBound = (float)0.5;
219 inst->gainmap = 0;
220 } else if (mode == 1) {
221 //inst->overdrive = (float)1.25;
222 inst->overdrive = (float)1.0;
223 inst->denoiseBound = (float)0.25;
224 inst->gainmap = 1;
225 } else if (mode == 2) {
226 //inst->overdrive = (float)1.25;
227 inst->overdrive = (float)1.1;
228 inst->denoiseBound = (float)0.125;
229 inst->gainmap = 1;
230 } else if (mode == 3) {
231 //inst->overdrive = (float)1.30;
232 inst->overdrive = (float)1.25;
233 inst->denoiseBound = (float)0.09;
234 inst->gainmap = 1;
235 }
236 return 0;
237 }
238
239 // Estimate noise
WebRtcNs_NoiseEstimation(NSinst_t * inst,float * magn,float * noise)240 void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) {
241 int i, s, offset;
242 float lmagn[HALF_ANAL_BLOCKL], delta;
243
244 if (inst->updates < END_STARTUP_LONG) {
245 inst->updates++;
246 }
247
248 for (i = 0; i < inst->magnLen; i++) {
249 lmagn[i] = (float)log(magn[i]);
250 }
251
252 // loop over simultaneous estimates
253 for (s = 0; s < SIMULT; s++) {
254 offset = s * inst->magnLen;
255
256 // newquantest(...)
257 for (i = 0; i < inst->magnLen; i++) {
258 // compute delta
259 if (inst->density[offset + i] > 1.0) {
260 delta = FACTOR * (float)1.0 / inst->density[offset + i];
261 } else {
262 delta = FACTOR;
263 }
264
265 // update log quantile estimate
266 if (lmagn[i] > inst->lquantile[offset + i]) {
267 inst->lquantile[offset + i] += QUANTILE * delta
268 / (float)(inst->counter[s] + 1);
269 } else {
270 inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta
271 / (float)(inst->counter[s] + 1);
272 }
273
274 // update density estimate
275 if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) {
276 inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset
277 + i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] + 1);
278 }
279 } // end loop over magnitude spectrum
280
281 if (inst->counter[s] >= END_STARTUP_LONG) {
282 inst->counter[s] = 0;
283 if (inst->updates >= END_STARTUP_LONG) {
284 for (i = 0; i < inst->magnLen; i++) {
285 inst->quantile[i] = (float)exp(inst->lquantile[offset + i]);
286 }
287 }
288 }
289
290 inst->counter[s]++;
291 } // end loop over simultaneous estimates
292
293 // Sequentially update the noise during startup
294 if (inst->updates < END_STARTUP_LONG) {
295 // Use the last "s" to get noise during startup that differ from zero.
296 for (i = 0; i < inst->magnLen; i++) {
297 inst->quantile[i] = (float)exp(inst->lquantile[offset + i]);
298 }
299 }
300
301 for (i = 0; i < inst->magnLen; i++) {
302 noise[i] = inst->quantile[i];
303 }
304 }
305
306 // Extract thresholds for feature parameters
307 // histograms are computed over some window_size (given by inst->modelUpdatePars[1])
308 // thresholds and weights are extracted every window
309 // flag 0 means update histogram only, flag 1 means compute the thresholds/weights
310 // threshold and weights are returned in: inst->priorModelPars
WebRtcNs_FeatureParameterExtraction(NSinst_t * inst,int flag)311 void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
312 int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
313 int maxPeak1, maxPeak2;
314 int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff;
315
316 float binMid, featureSum;
317 float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
318 float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl;
319
320 //3 features: lrt, flatness, difference
321 //lrt_feature = inst->featureData[3];
322 //flat_feature = inst->featureData[0];
323 //diff_feature = inst->featureData[4];
324
325 //update histograms
326 if (flag == 0) {
327 // LRT
328 if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt)
329 && (inst->featureData[3] >= 0.0)) {
330 i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt);
331 inst->histLrt[i]++;
332 }
333 // Spectral flatness
334 if ((inst->featureData[0] < HIST_PAR_EST
335 * inst->featureExtractionParams.binSizeSpecFlat)
336 && (inst->featureData[0] >= 0.0)) {
337 i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat);
338 inst->histSpecFlat[i]++;
339 }
340 // Spectral difference
341 if ((inst->featureData[4] < HIST_PAR_EST
342 * inst->featureExtractionParams.binSizeSpecDiff)
343 && (inst->featureData[4] >= 0.0)) {
344 i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff);
345 inst->histSpecDiff[i]++;
346 }
347 }
348
349 // extract parameters for speech/noise probability
350 if (flag == 1) {
351 //lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt
352 avgHistLrt = 0.0;
353 avgHistLrtCompl = 0.0;
354 avgSquareHistLrt = 0.0;
355 numHistLrt = 0;
356 for (i = 0; i < HIST_PAR_EST; i++) {
357 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt;
358 if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) {
359 avgHistLrt += inst->histLrt[i] * binMid;
360 numHistLrt += inst->histLrt[i];
361 }
362 avgSquareHistLrt += inst->histLrt[i] * binMid * binMid;
363 avgHistLrtCompl += inst->histLrt[i] * binMid;
364 }
365 if (numHistLrt > 0) {
366 avgHistLrt = avgHistLrt / ((float)numHistLrt);
367 }
368 avgHistLrtCompl = avgHistLrtCompl / ((float)inst->modelUpdatePars[1]);
369 avgSquareHistLrt = avgSquareHistLrt / ((float)inst->modelUpdatePars[1]);
370 fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl;
371 // get threshold for lrt feature:
372 if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) {
373 //very low fluct, so likely noise
374 inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt;
375 } else {
376 inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars
377 * avgHistLrt;
378 // check if value is within min/max range
379 if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) {
380 inst->priorModelPars[0] = inst->featureExtractionParams.minLrt;
381 }
382 if (inst->priorModelPars[0] > inst->featureExtractionParams.maxLrt) {
383 inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt;
384 }
385 }
386 // done with lrt feature
387
388 //
389 // for spectral flatness and spectral difference: compute the main peaks of histogram
390 maxPeak1 = 0;
391 maxPeak2 = 0;
392 posPeak1SpecFlat = 0.0;
393 posPeak2SpecFlat = 0.0;
394 weightPeak1SpecFlat = 0;
395 weightPeak2SpecFlat = 0;
396
397 // peaks for flatness
398 for (i = 0; i < HIST_PAR_EST; i++) {
399 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat;
400 if (inst->histSpecFlat[i] > maxPeak1) {
401 // Found new "first" peak
402 maxPeak2 = maxPeak1;
403 weightPeak2SpecFlat = weightPeak1SpecFlat;
404 posPeak2SpecFlat = posPeak1SpecFlat;
405
406 maxPeak1 = inst->histSpecFlat[i];
407 weightPeak1SpecFlat = inst->histSpecFlat[i];
408 posPeak1SpecFlat = binMid;
409 } else if (inst->histSpecFlat[i] > maxPeak2) {
410 // Found new "second" peak
411 maxPeak2 = inst->histSpecFlat[i];
412 weightPeak2SpecFlat = inst->histSpecFlat[i];
413 posPeak2SpecFlat = binMid;
414 }
415 }
416
417 //compute two peaks for spectral difference
418 maxPeak1 = 0;
419 maxPeak2 = 0;
420 posPeak1SpecDiff = 0.0;
421 posPeak2SpecDiff = 0.0;
422 weightPeak1SpecDiff = 0;
423 weightPeak2SpecDiff = 0;
424 // peaks for spectral difference
425 for (i = 0; i < HIST_PAR_EST; i++) {
426 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff;
427 if (inst->histSpecDiff[i] > maxPeak1) {
428 // Found new "first" peak
429 maxPeak2 = maxPeak1;
430 weightPeak2SpecDiff = weightPeak1SpecDiff;
431 posPeak2SpecDiff = posPeak1SpecDiff;
432
433 maxPeak1 = inst->histSpecDiff[i];
434 weightPeak1SpecDiff = inst->histSpecDiff[i];
435 posPeak1SpecDiff = binMid;
436 } else if (inst->histSpecDiff[i] > maxPeak2) {
437 // Found new "second" peak
438 maxPeak2 = inst->histSpecDiff[i];
439 weightPeak2SpecDiff = inst->histSpecDiff[i];
440 posPeak2SpecDiff = binMid;
441 }
442 }
443
444 // for spectrum flatness feature
445 useFeatureSpecFlat = 1;
446 // merge the two peaks if they are close
447 if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat)
448 < inst->featureExtractionParams.limitPeakSpacingSpecFlat)
449 && (weightPeak2SpecFlat
450 > inst->featureExtractionParams.limitPeakWeightsSpecFlat
451 * weightPeak1SpecFlat)) {
452 weightPeak1SpecFlat += weightPeak2SpecFlat;
453 posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat);
454 }
455 //reject if weight of peaks is not large enough, or peak value too small
456 if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat
457 || posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) {
458 useFeatureSpecFlat = 0;
459 }
460 // if selected, get the threshold
461 if (useFeatureSpecFlat == 1) {
462 // compute the threshold
463 inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars
464 * posPeak1SpecFlat;
465 //check if value is within min/max range
466 if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) {
467 inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat;
468 }
469 if (inst->priorModelPars[1] > inst->featureExtractionParams.maxSpecFlat) {
470 inst->priorModelPars[1] = inst->featureExtractionParams.maxSpecFlat;
471 }
472 }
473 // done with flatness feature
474
475 // for template feature
476 useFeatureSpecDiff = 1;
477 // merge the two peaks if they are close
478 if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff)
479 < inst->featureExtractionParams.limitPeakSpacingSpecDiff)
480 && (weightPeak2SpecDiff
481 > inst->featureExtractionParams.limitPeakWeightsSpecDiff
482 * weightPeak1SpecDiff)) {
483 weightPeak1SpecDiff += weightPeak2SpecDiff;
484 posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff);
485 }
486 // get the threshold value
487 inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars
488 * posPeak1SpecDiff;
489 //reject if weight of peaks is not large enough
490 if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) {
491 useFeatureSpecDiff = 0;
492 }
493 //check if value is within min/max range
494 if (inst->priorModelPars[3] < inst->featureExtractionParams.minSpecDiff) {
495 inst->priorModelPars[3] = inst->featureExtractionParams.minSpecDiff;
496 }
497 if (inst->priorModelPars[3] > inst->featureExtractionParams.maxSpecDiff) {
498 inst->priorModelPars[3] = inst->featureExtractionParams.maxSpecDiff;
499 }
500 // done with spectral difference feature
501
502 // don't use template feature if fluctuation of lrt feature is very low:
503 // most likely just noise state
504 if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) {
505 useFeatureSpecDiff = 0;
506 }
507
508 // select the weights between the features
509 // inst->priorModelPars[4] is weight for lrt: always selected
510 // inst->priorModelPars[5] is weight for spectral flatness
511 // inst->priorModelPars[6] is weight for spectral difference
512 featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff);
513 inst->priorModelPars[4] = (float)1.0 / featureSum;
514 inst->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum;
515 inst->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum;
516
517 // set hists to zero for next update
518 if (inst->modelUpdatePars[0] >= 1) {
519 for (i = 0; i < HIST_PAR_EST; i++) {
520 inst->histLrt[i] = 0;
521 inst->histSpecFlat[i] = 0;
522 inst->histSpecDiff[i] = 0;
523 }
524 }
525 } // end of flag == 1
526 }
527
528 // Compute spectral flatness on input spectrum
529 // magnIn is the magnitude spectrum
530 // spectral flatness is returned in inst->featureData[0]
WebRtcNs_ComputeSpectralFlatness(NSinst_t * inst,float * magnIn)531 void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) {
532 int i;
533 int shiftLP = 1; //option to remove first bin(s) from spectral measures
534 float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp;
535
536 // comute spectral measures
537 // for flatness
538 avgSpectralFlatnessNum = 0.0;
539 avgSpectralFlatnessDen = inst->sumMagn;
540 for (i = 0; i < shiftLP; i++) {
541 avgSpectralFlatnessDen -= magnIn[i];
542 }
543 // compute log of ratio of the geometric to arithmetic mean: check for log(0) case
544 for (i = shiftLP; i < inst->magnLen; i++) {
545 if (magnIn[i] > 0.0) {
546 avgSpectralFlatnessNum += (float)log(magnIn[i]);
547 } else {
548 inst->featureData[0] -= SPECT_FL_TAVG * inst->featureData[0];
549 return;
550 }
551 }
552 //normalize
553 avgSpectralFlatnessDen = avgSpectralFlatnessDen / inst->magnLen;
554 avgSpectralFlatnessNum = avgSpectralFlatnessNum / inst->magnLen;
555
556 //ratio and inverse log: check for case of log(0)
557 spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen;
558
559 //time-avg update of spectral flatness feature
560 inst->featureData[0] += SPECT_FL_TAVG * (spectralTmp - inst->featureData[0]);
561 // done with flatness feature
562 }
563
564 // Compute the difference measure between input spectrum and a template/learned noise spectrum
565 // magnIn is the input spectrum
566 // the reference/template spectrum is inst->magnAvgPause[i]
567 // returns (normalized) spectral difference in inst->featureData[4]
WebRtcNs_ComputeSpectralDifference(NSinst_t * inst,float * magnIn)568 void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
569 // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause)
570 int i;
571 float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
572
573 avgPause = 0.0;
574 avgMagn = inst->sumMagn;
575 // compute average quantities
576 for (i = 0; i < inst->magnLen; i++) {
577 //conservative smooth noise spectrum from pause frames
578 avgPause += inst->magnAvgPause[i];
579 }
580 avgPause = avgPause / ((float)inst->magnLen);
581 avgMagn = avgMagn / ((float)inst->magnLen);
582
583 covMagnPause = 0.0;
584 varPause = 0.0;
585 varMagn = 0.0;
586 // compute variance and covariance quantities
587 for (i = 0; i < inst->magnLen; i++) {
588 covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause);
589 varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause);
590 varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn);
591 }
592 covMagnPause = covMagnPause / ((float)inst->magnLen);
593 varPause = varPause / ((float)inst->magnLen);
594 varMagn = varMagn / ((float)inst->magnLen);
595 // update of average magnitude spectrum
596 inst->featureData[6] += inst->signalEnergy;
597
598 avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001);
599 // normalize and compute time-avg update of difference feature
600 avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001));
601 inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]);
602 }
603
604 // Compute speech/noise probability
605 // speech/noise probability is returned in: probSpeechFinal
606 //magn is the input magnitude spectrum
607 //noise is the noise spectrum
608 //snrLocPrior is the prior snr for each freq.
609 //snr loc_post is the post snr for each freq.
WebRtcNs_SpeechNoiseProb(NSinst_t * inst,float * probSpeechFinal,float * snrLocPrior,float * snrLocPost)610 void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snrLocPrior,
611 float* snrLocPost) {
612 int i, sgnMap;
613 float invLrt, gainPrior, indPrior;
614 float logLrtTimeAvgKsum, besselTmp;
615 float indicator0, indicator1, indicator2;
616 float tmpFloat1, tmpFloat2;
617 float weightIndPrior0, weightIndPrior1, weightIndPrior2;
618 float threshPrior0, threshPrior1, threshPrior2;
619 float widthPrior, widthPrior0, widthPrior1, widthPrior2;
620
621 widthPrior0 = WIDTH_PR_MAP;
622 widthPrior1 = (float)2.0 * WIDTH_PR_MAP; //width for pause region:
623 // lower range, so increase width in tanh map
624 widthPrior2 = (float)2.0 * WIDTH_PR_MAP; //for spectral-difference measure
625
626 //threshold parameters for features
627 threshPrior0 = inst->priorModelPars[0];
628 threshPrior1 = inst->priorModelPars[1];
629 threshPrior2 = inst->priorModelPars[3];
630
631 //sign for flatness feature
632 sgnMap = (int)(inst->priorModelPars[2]);
633
634 //weight parameters for features
635 weightIndPrior0 = inst->priorModelPars[4];
636 weightIndPrior1 = inst->priorModelPars[5];
637 weightIndPrior2 = inst->priorModelPars[6];
638
639 // compute feature based on average LR factor
640 // this is the average over all frequencies of the smooth log lrt
641 logLrtTimeAvgKsum = 0.0;
642 for (i = 0; i < inst->magnLen; i++) {
643 tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i];
644 tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001);
645 besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2;
646 inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1)
647 - inst->logLrtTimeAvg[i]);
648 logLrtTimeAvgKsum += inst->logLrtTimeAvg[i];
649 }
650 logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen);
651 inst->featureData[3] = logLrtTimeAvgKsum;
652 // done with computation of LR factor
653
654 //
655 //compute the indicator functions
656 //
657
658 // average lrt feature
659 widthPrior = widthPrior0;
660 //use larger width in tanh map for pause regions
661 if (logLrtTimeAvgKsum < threshPrior0) {
662 widthPrior = widthPrior1;
663 }
664 // compute indicator function: sigmoid map
665 indicator0 = (float)0.5 * ((float)tanh(widthPrior *
666 (logLrtTimeAvgKsum - threshPrior0)) + (float)1.0);
667
668 //spectral flatness feature
669 tmpFloat1 = inst->featureData[0];
670 widthPrior = widthPrior0;
671 //use larger width in tanh map for pause regions
672 if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) {
673 widthPrior = widthPrior1;
674 }
675 if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) {
676 widthPrior = widthPrior1;
677 }
678 // compute indicator function: sigmoid map
679 indicator1 = (float)0.5 * ((float)tanh((float)sgnMap *
680 widthPrior * (threshPrior1 - tmpFloat1)) + (float)1.0);
681
682 //for template spectrum-difference
683 tmpFloat1 = inst->featureData[4];
684 widthPrior = widthPrior0;
685 //use larger width in tanh map for pause regions
686 if (tmpFloat1 < threshPrior2) {
687 widthPrior = widthPrior2;
688 }
689 // compute indicator function: sigmoid map
690 indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2))
691 + (float)1.0);
692
693 //combine the indicator function with the feature weights
694 indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2
695 * indicator2;
696 // done with computing indicator function
697
698 //compute the prior probability
699 inst->priorSpeechProb += PRIOR_UPDATE * (indPrior - inst->priorSpeechProb);
700 // make sure probabilities are within range: keep floor to 0.01
701 if (inst->priorSpeechProb > 1.0) {
702 inst->priorSpeechProb = (float)1.0;
703 }
704 if (inst->priorSpeechProb < 0.01) {
705 inst->priorSpeechProb = (float)0.01;
706 }
707
708 //final speech probability: combine prior model with LR factor:
709 gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001);
710 for (i = 0; i < inst->magnLen; i++) {
711 invLrt = (float)exp(-inst->logLrtTimeAvg[i]);
712 invLrt = (float)gainPrior * invLrt;
713 probSpeechFinal[i] = (float)1.0 / ((float)1.0 + invLrt);
714 }
715 }
716
WebRtcNs_ProcessCore(NSinst_t * inst,short * speechFrame,short * speechFrameHB,short * outFrame,short * outFrameHB)717 int WebRtcNs_ProcessCore(NSinst_t* inst,
718 short* speechFrame,
719 short* speechFrameHB,
720 short* outFrame,
721 short* outFrameHB) {
722 // main routine for noise reduction
723
724 int flagHB = 0;
725 int i;
726 const int kStartBand = 5; // Skip first frequency bins during estimation.
727 int updateParsFlag;
728
729 float energy1, energy2, gain, factor, factor1, factor2;
730 float signalEnergy, sumMagn;
731 float snrPrior, currentEstimateStsa;
732 float tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech;
733 float gammaNoiseTmp, gammaNoiseOld;
734 float noiseUpdateTmp, fTmp, dTmp;
735 float fin[BLOCKL_MAX], fout[BLOCKL_MAX];
736 float winData[ANAL_BLOCKL_MAX];
737 float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL];
738 float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL];
739 float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL];
740 float probSpeechFinal[HALF_ANAL_BLOCKL], previousEstimateStsa[HALF_ANAL_BLOCKL];
741 float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
742 // Variables during startup
743 float sum_log_i = 0.0;
744 float sum_log_i_square = 0.0;
745 float sum_log_magn = 0.0;
746 float sum_log_i_log_magn = 0.0;
747 float parametric_noise = 0.0;
748 float parametric_exp = 0.0;
749 float parametric_num = 0.0;
750
751 // SWB variables
752 int deltaBweHB = 1;
753 int deltaGainHB = 1;
754 float decayBweHB = 1.0;
755 float gainMapParHB = 1.0;
756 float gainTimeDomainHB = 1.0;
757 float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB;
758
759 // Check that initiation has been done
760 if (inst->initFlag != 1) {
761 return (-1);
762 }
763 // Check for valid pointers based on sampling rate
764 if (inst->fs == 32000) {
765 if (speechFrameHB == NULL) {
766 return -1;
767 }
768 flagHB = 1;
769 // range for averaging low band quantities for H band gain
770 deltaBweHB = (int)inst->magnLen / 4;
771 deltaGainHB = deltaBweHB;
772 }
773 //
774 updateParsFlag = inst->modelUpdatePars[0];
775 //
776
777 //for LB do all processing
778 // convert to float
779 for (i = 0; i < inst->blockLen10ms; i++) {
780 fin[i] = (float)speechFrame[i];
781 }
782 // update analysis buffer for L band
783 memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms,
784 sizeof(float) * (inst->anaLen - inst->blockLen10ms));
785 memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, fin,
786 sizeof(float) * inst->blockLen10ms);
787
788 if (flagHB == 1) {
789 // convert to float
790 for (i = 0; i < inst->blockLen10ms; i++) {
791 fin[i] = (float)speechFrameHB[i];
792 }
793 // update analysis buffer for H band
794 memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms,
795 sizeof(float) * (inst->anaLen - inst->blockLen10ms));
796 memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, fin,
797 sizeof(float) * inst->blockLen10ms);
798 }
799
800 // check if processing needed
801 if (inst->outLen == 0) {
802 // windowing
803 energy1 = 0.0;
804 for (i = 0; i < inst->anaLen; i++) {
805 winData[i] = inst->window[i] * inst->dataBuf[i];
806 energy1 += winData[i] * winData[i];
807 }
808 if (energy1 == 0.0) {
809 // synthesize the special case of zero input
810 // we want to avoid updating statistics in this case:
811 // Updating feature statistics when we have zeros only will cause thresholds to
812 // move towards zero signal situations. This in turn has the effect that once the
813 // signal is "turned on" (non-zero values) everything will be treated as speech
814 // and there is no noise suppression effect. Depending on the duration of the
815 // inactive signal it takes a considerable amount of time for the system to learn
816 // what is noise and what is speech.
817
818 // read out fully processed segment
819 for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) {
820 fout[i - inst->windShift] = inst->syntBuf[i];
821 }
822 // update synthesis buffer
823 memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen,
824 sizeof(float) * (inst->anaLen - inst->blockLen));
825 memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0,
826 sizeof(float) * inst->blockLen);
827
828 // out buffer
829 inst->outLen = inst->blockLen - inst->blockLen10ms;
830 if (inst->blockLen > inst->blockLen10ms) {
831 for (i = 0; i < inst->outLen; i++) {
832 inst->outBuf[i] = fout[i + inst->blockLen10ms];
833 }
834 }
835 // convert to short
836 for (i = 0; i < inst->blockLen10ms; i++) {
837 dTmp = fout[i];
838 if (dTmp < WEBRTC_SPL_WORD16_MIN) {
839 dTmp = WEBRTC_SPL_WORD16_MIN;
840 } else if (dTmp > WEBRTC_SPL_WORD16_MAX) {
841 dTmp = WEBRTC_SPL_WORD16_MAX;
842 }
843 outFrame[i] = (short)dTmp;
844 }
845
846 // for time-domain gain of HB
847 if (flagHB == 1) {
848 for (i = 0; i < inst->blockLen10ms; i++) {
849 dTmp = inst->dataBufHB[i];
850 if (dTmp < WEBRTC_SPL_WORD16_MIN) {
851 dTmp = WEBRTC_SPL_WORD16_MIN;
852 } else if (dTmp > WEBRTC_SPL_WORD16_MAX) {
853 dTmp = WEBRTC_SPL_WORD16_MAX;
854 }
855 outFrameHB[i] = (short)dTmp;
856 }
857 } // end of H band gain computation
858 //
859 return 0;
860 }
861
862 //
863 inst->blockInd++; // Update the block index only when we process a block.
864 // FFT
865 WebRtc_rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft);
866
867 imag[0] = 0;
868 real[0] = winData[0];
869 magn[0] = (float)(fabs(real[0]) + 1.0f);
870 imag[inst->magnLen - 1] = 0;
871 real[inst->magnLen - 1] = winData[1];
872 magn[inst->magnLen - 1] = (float)(fabs(real[inst->magnLen - 1]) + 1.0f);
873 signalEnergy = (float)(real[0] * real[0]) +
874 (float)(real[inst->magnLen - 1] * real[inst->magnLen - 1]);
875 sumMagn = magn[0] + magn[inst->magnLen - 1];
876 if (inst->blockInd < END_STARTUP_SHORT) {
877 inst->initMagnEst[0] += magn[0];
878 inst->initMagnEst[inst->magnLen - 1] += magn[inst->magnLen - 1];
879 tmpFloat2 = log((float)(inst->magnLen - 1));
880 sum_log_i = tmpFloat2;
881 sum_log_i_square = tmpFloat2 * tmpFloat2;
882 tmpFloat1 = log(magn[inst->magnLen - 1]);
883 sum_log_magn = tmpFloat1;
884 sum_log_i_log_magn = tmpFloat2 * tmpFloat1;
885 }
886 for (i = 1; i < inst->magnLen - 1; i++) {
887 real[i] = winData[2 * i];
888 imag[i] = winData[2 * i + 1];
889 // magnitude spectrum
890 fTmp = real[i] * real[i];
891 fTmp += imag[i] * imag[i];
892 signalEnergy += fTmp;
893 magn[i] = ((float)sqrt(fTmp)) + 1.0f;
894 sumMagn += magn[i];
895 if (inst->blockInd < END_STARTUP_SHORT) {
896 inst->initMagnEst[i] += magn[i];
897 if (i >= kStartBand) {
898 tmpFloat2 = log((float)i);
899 sum_log_i += tmpFloat2;
900 sum_log_i_square += tmpFloat2 * tmpFloat2;
901 tmpFloat1 = log(magn[i]);
902 sum_log_magn += tmpFloat1;
903 sum_log_i_log_magn += tmpFloat2 * tmpFloat1;
904 }
905 }
906 }
907 signalEnergy = signalEnergy / ((float)inst->magnLen);
908 inst->signalEnergy = signalEnergy;
909 inst->sumMagn = sumMagn;
910
911 //compute spectral flatness on input spectrum
912 WebRtcNs_ComputeSpectralFlatness(inst, magn);
913 // quantile noise estimate
914 WebRtcNs_NoiseEstimation(inst, magn, noise);
915 //compute simplified noise model during startup
916 if (inst->blockInd < END_STARTUP_SHORT) {
917 // Estimate White noise
918 inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive;
919 // Estimate Pink noise parameters
920 tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand));
921 tmpFloat1 -= (sum_log_i * sum_log_i);
922 tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
923 tmpFloat3 = tmpFloat2 / tmpFloat1;
924 // Constrain the estimated spectrum to be positive
925 if (tmpFloat3 < 0.0f) {
926 tmpFloat3 = 0.0f;
927 }
928 inst->pinkNoiseNumerator += tmpFloat3;
929 tmpFloat2 = (sum_log_i * sum_log_magn);
930 tmpFloat2 -= ((float)(inst->magnLen - kStartBand)) * sum_log_i_log_magn;
931 tmpFloat3 = tmpFloat2 / tmpFloat1;
932 // Constrain the pink noise power to be in the interval [0, 1];
933 if (tmpFloat3 < 0.0f) {
934 tmpFloat3 = 0.0f;
935 }
936 if (tmpFloat3 > 1.0f) {
937 tmpFloat3 = 1.0f;
938 }
939 inst->pinkNoiseExp += tmpFloat3;
940
941 // Calculate frequency independent parts of parametric noise estimate.
942 if (inst->pinkNoiseExp == 0.0f) {
943 // Use white noise estimate
944 parametric_noise = inst->whiteNoiseLevel;
945 } else {
946 // Use pink noise estimate
947 parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1));
948 parametric_num *= (float)(inst->blockInd + 1);
949 parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1);
950 parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp);
951 }
952 for (i = 0; i < inst->magnLen; i++) {
953 // Estimate the background noise using the white and pink noise parameters
954 if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) {
955 // Use pink noise estimate
956 parametric_noise = parametric_num / pow((float)i, parametric_exp);
957 }
958 theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise);
959 theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001);
960 // Weight quantile noise with modeled noise
961 noise[i] *= (inst->blockInd);
962 tmpFloat2 = parametric_noise * (END_STARTUP_SHORT - inst->blockInd);
963 noise[i] += (tmpFloat2 / (float)(inst->blockInd + 1));
964 noise[i] /= END_STARTUP_SHORT;
965 }
966 }
967 //compute average signal during END_STARTUP_LONG time:
968 // used to normalize spectral difference measure
969 if (inst->blockInd < END_STARTUP_LONG) {
970 inst->featureData[5] *= inst->blockInd;
971 inst->featureData[5] += signalEnergy;
972 inst->featureData[5] /= (inst->blockInd + 1);
973 }
974
975 #ifdef PROCESS_FLOW_0
976 if (inst->blockInd > END_STARTUP_LONG) {
977 //option: average the quantile noise: for check with AEC2
978 for (i = 0; i < inst->magnLen; i++) {
979 noise[i] = (float)0.6 * inst->noisePrev[i] + (float)0.4 * noise[i];
980 }
981 for (i = 0; i < inst->magnLen; i++) {
982 // Wiener with over sub-substraction:
983 theFilter[i] = (magn[i] - inst->overdrive * noise[i]) / (magn[i] + (float)0.0001);
984 }
985 }
986 #else
987 //start processing at frames == converged+1
988 //
989 // STEP 1: compute prior and post snr based on quantile noise est
990 //
991
992 // compute DD estimate of prior SNR: needed for new method
993 for (i = 0; i < inst->magnLen; i++) {
994 // post snr
995 snrLocPost[i] = (float)0.0;
996 if (magn[i] > noise[i]) {
997 snrLocPost[i] = magn[i] / (noise[i] + (float)0.0001) - (float)1.0;
998 }
999 // previous post snr
1000 // previous estimate: based on previous frame with gain filter
1001 previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001)
1002 * (inst->smooth[i]);
1003 // DD estimate is sum of two terms: current estimate and previous estimate
1004 // directed decision update of snrPrior
1005 snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR)
1006 * snrLocPost[i];
1007 // post and prior snr needed for step 2
1008 } // end of loop over freqs
1009 #ifdef PROCESS_FLOW_1
1010 for (i = 0; i < inst->magnLen; i++) {
1011 // gain filter
1012 tmpFloat1 = inst->overdrive + snrLocPrior[i];
1013 tmpFloat2 = (float)snrLocPrior[i] / tmpFloat1;
1014 theFilter[i] = (float)tmpFloat2;
1015 } // end of loop over freqs
1016 #endif
1017 // done with step 1: dd computation of prior and post snr
1018
1019 //
1020 //STEP 2: compute speech/noise likelihood
1021 //
1022 #ifdef PROCESS_FLOW_2
1023 // compute difference of input spectrum with learned/estimated noise spectrum
1024 WebRtcNs_ComputeSpectralDifference(inst, magn);
1025 // compute histograms for parameter decisions (thresholds and weights for features)
1026 // parameters are extracted once every window time (=inst->modelUpdatePars[1])
1027 if (updateParsFlag >= 1) {
1028 // counter update
1029 inst->modelUpdatePars[3]--;
1030 // update histogram
1031 if (inst->modelUpdatePars[3] > 0) {
1032 WebRtcNs_FeatureParameterExtraction(inst, 0);
1033 }
1034 // compute model parameters
1035 if (inst->modelUpdatePars[3] == 0) {
1036 WebRtcNs_FeatureParameterExtraction(inst, 1);
1037 inst->modelUpdatePars[3] = inst->modelUpdatePars[1];
1038 // if wish to update only once, set flag to zero
1039 if (updateParsFlag == 1) {
1040 inst->modelUpdatePars[0] = 0;
1041 } else {
1042 // update every window:
1043 // get normalization for spectral difference for next window estimate
1044 inst->featureData[6] = inst->featureData[6]
1045 / ((float)inst->modelUpdatePars[1]);
1046 inst->featureData[5] = (float)0.5 * (inst->featureData[6]
1047 + inst->featureData[5]);
1048 inst->featureData[6] = (float)0.0;
1049 }
1050 }
1051 }
1052 // compute speech/noise probability
1053 WebRtcNs_SpeechNoiseProb(inst, probSpeechFinal, snrLocPrior, snrLocPost);
1054 // time-avg parameter for noise update
1055 gammaNoiseTmp = NOISE_UPDATE;
1056 for (i = 0; i < inst->magnLen; i++) {
1057 probSpeech = probSpeechFinal[i];
1058 probNonSpeech = (float)1.0 - probSpeech;
1059 // temporary noise update:
1060 // use it for speech frames if update value is less than previous
1061 noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp)
1062 * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
1063 //
1064 // time-constant based on speech/noise state
1065 gammaNoiseOld = gammaNoiseTmp;
1066 gammaNoiseTmp = NOISE_UPDATE;
1067 // increase gamma (i.e., less noise update) for frame likely to be speech
1068 if (probSpeech > PROB_RANGE) {
1069 gammaNoiseTmp = SPEECH_UPDATE;
1070 }
1071 // conservative noise update
1072 if (probSpeech < PROB_RANGE) {
1073 inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]);
1074 }
1075 // noise update
1076 if (gammaNoiseTmp == gammaNoiseOld) {
1077 noise[i] = noiseUpdateTmp;
1078 } else {
1079 noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp)
1080 * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
1081 // allow for noise update downwards:
1082 // if noise update decreases the noise, it is safe, so allow it to happen
1083 if (noiseUpdateTmp < noise[i]) {
1084 noise[i] = noiseUpdateTmp;
1085 }
1086 }
1087 } // end of freq loop
1088 // done with step 2: noise update
1089
1090 //
1091 // STEP 3: compute dd update of prior snr and post snr based on new noise estimate
1092 //
1093 for (i = 0; i < inst->magnLen; i++) {
1094 // post and prior snr
1095 currentEstimateStsa = (float)0.0;
1096 if (magn[i] > noise[i]) {
1097 currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0;
1098 }
1099 // DD estimate is sume of two terms: current estimate and previous estimate
1100 // directed decision update of snrPrior
1101 snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR)
1102 * currentEstimateStsa;
1103 // gain filter
1104 tmpFloat1 = inst->overdrive + snrPrior;
1105 tmpFloat2 = (float)snrPrior / tmpFloat1;
1106 theFilter[i] = (float)tmpFloat2;
1107 } // end of loop over freqs
1108 // done with step3
1109 #endif
1110 #endif
1111
1112 for (i = 0; i < inst->magnLen; i++) {
1113 // flooring bottom
1114 if (theFilter[i] < inst->denoiseBound) {
1115 theFilter[i] = inst->denoiseBound;
1116 }
1117 // flooring top
1118 if (theFilter[i] > (float)1.0) {
1119 theFilter[i] = 1.0;
1120 }
1121 if (inst->blockInd < END_STARTUP_SHORT) {
1122 // flooring bottom
1123 if (theFilterTmp[i] < inst->denoiseBound) {
1124 theFilterTmp[i] = inst->denoiseBound;
1125 }
1126 // flooring top
1127 if (theFilterTmp[i] > (float)1.0) {
1128 theFilterTmp[i] = 1.0;
1129 }
1130 // Weight the two suppression filters
1131 theFilter[i] *= (inst->blockInd);
1132 theFilterTmp[i] *= (END_STARTUP_SHORT - inst->blockInd);
1133 theFilter[i] += theFilterTmp[i];
1134 theFilter[i] /= (END_STARTUP_SHORT);
1135 }
1136 // smoothing
1137 #ifdef PROCESS_FLOW_0
1138 inst->smooth[i] *= SMOOTH; // value set to 0.7 in define.h file
1139 inst->smooth[i] += ((float)1.0 - SMOOTH) * theFilter[i];
1140 #else
1141 inst->smooth[i] = theFilter[i];
1142 #endif
1143 real[i] *= inst->smooth[i];
1144 imag[i] *= inst->smooth[i];
1145 }
1146 // keep track of noise and magn spectrum for next frame
1147 for (i = 0; i < inst->magnLen; i++) {
1148 inst->noisePrev[i] = noise[i];
1149 inst->magnPrev[i] = magn[i];
1150 }
1151 // back to time domain
1152 winData[0] = real[0];
1153 winData[1] = real[inst->magnLen - 1];
1154 for (i = 1; i < inst->magnLen - 1; i++) {
1155 winData[2 * i] = real[i];
1156 winData[2 * i + 1] = imag[i];
1157 }
1158 WebRtc_rdft(inst->anaLen, -1, winData, inst->ip, inst->wfft);
1159
1160 for (i = 0; i < inst->anaLen; i++) {
1161 real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling
1162 }
1163
1164 //scale factor: only do it after END_STARTUP_LONG time
1165 factor = (float)1.0;
1166 if (inst->gainmap == 1 && inst->blockInd > END_STARTUP_LONG) {
1167 factor1 = (float)1.0;
1168 factor2 = (float)1.0;
1169
1170 energy2 = 0.0;
1171 for (i = 0; i < inst->anaLen; i++) {
1172 energy2 += (float)real[i] * (float)real[i];
1173 }
1174 gain = (float)sqrt(energy2 / (energy1 + (float)1.0));
1175
1176 #ifdef PROCESS_FLOW_2
1177 // scaling for new version
1178 if (gain > B_LIM) {
1179 factor1 = (float)1.0 + (float)1.3 * (gain - B_LIM);
1180 if (gain * factor1 > (float)1.0) {
1181 factor1 = (float)1.0 / gain;
1182 }
1183 }
1184 if (gain < B_LIM) {
1185 //don't reduce scale too much for pause regions:
1186 // attenuation here should be controlled by flooring
1187 if (gain <= inst->denoiseBound) {
1188 gain = inst->denoiseBound;
1189 }
1190 factor2 = (float)1.0 - (float)0.3 * (B_LIM - gain);
1191 }
1192 //combine both scales with speech/noise prob:
1193 // note prior (priorSpeechProb) is not frequency dependent
1194 factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb)
1195 * factor2;
1196 #else
1197 if (gain > B_LIM) {
1198 factor = (float)1.0 + (float)1.3 * (gain - B_LIM);
1199 } else {
1200 factor = (float)1.0 + (float)2.0 * (gain - B_LIM);
1201 }
1202 if (gain * factor > (float)1.0) {
1203 factor = (float)1.0 / gain;
1204 }
1205 #endif
1206 } // out of inst->gainmap==1
1207
1208 // synthesis
1209 for (i = 0; i < inst->anaLen; i++) {
1210 inst->syntBuf[i] += factor * inst->window[i] * (float)real[i];
1211 }
1212 // read out fully processed segment
1213 for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) {
1214 fout[i - inst->windShift] = inst->syntBuf[i];
1215 }
1216 // update synthesis buffer
1217 memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen,
1218 sizeof(float) * (inst->anaLen - inst->blockLen));
1219 memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0,
1220 sizeof(float) * inst->blockLen);
1221
1222 // out buffer
1223 inst->outLen = inst->blockLen - inst->blockLen10ms;
1224 if (inst->blockLen > inst->blockLen10ms) {
1225 for (i = 0; i < inst->outLen; i++) {
1226 inst->outBuf[i] = fout[i + inst->blockLen10ms];
1227 }
1228 }
1229 } // end of if out.len==0
1230 else {
1231 for (i = 0; i < inst->blockLen10ms; i++) {
1232 fout[i] = inst->outBuf[i];
1233 }
1234 memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms,
1235 sizeof(float) * (inst->outLen - inst->blockLen10ms));
1236 memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0,
1237 sizeof(float) * inst->blockLen10ms);
1238 inst->outLen -= inst->blockLen10ms;
1239 }
1240
1241 // convert to short
1242 for (i = 0; i < inst->blockLen10ms; i++) {
1243 dTmp = fout[i];
1244 if (dTmp < WEBRTC_SPL_WORD16_MIN) {
1245 dTmp = WEBRTC_SPL_WORD16_MIN;
1246 } else if (dTmp > WEBRTC_SPL_WORD16_MAX) {
1247 dTmp = WEBRTC_SPL_WORD16_MAX;
1248 }
1249 outFrame[i] = (short)dTmp;
1250 }
1251
1252 // for time-domain gain of HB
1253 if (flagHB == 1) {
1254 for (i = 0; i < inst->magnLen; i++) {
1255 inst->speechProbHB[i] = probSpeechFinal[i];
1256 }
1257 if (inst->blockInd > END_STARTUP_LONG) {
1258 // average speech prob from low band
1259 // avg over second half (i.e., 4->8kHz) of freq. spectrum
1260 avgProbSpeechHB = 0.0;
1261 for (i = inst->magnLen - deltaBweHB - 1; i < inst->magnLen - 1; i++) {
1262 avgProbSpeechHB += inst->speechProbHB[i];
1263 }
1264 avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB);
1265 // average filter gain from low band
1266 // average over second half (i.e., 4->8kHz) of freq. spectrum
1267 avgFilterGainHB = 0.0;
1268 for (i = inst->magnLen - deltaGainHB - 1; i < inst->magnLen - 1; i++) {
1269 avgFilterGainHB += inst->smooth[i];
1270 }
1271 avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB));
1272 avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0;
1273 // gain based on speech prob:
1274 gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
1275 //combine gain with low band gain
1276 gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB;
1277 if (avgProbSpeechHB >= (float)0.5) {
1278 gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB;
1279 }
1280 gainTimeDomainHB = gainTimeDomainHB * decayBweHB;
1281 } // end of converged
1282 //make sure gain is within flooring range
1283 // flooring bottom
1284 if (gainTimeDomainHB < inst->denoiseBound) {
1285 gainTimeDomainHB = inst->denoiseBound;
1286 }
1287 // flooring top
1288 if (gainTimeDomainHB > (float)1.0) {
1289 gainTimeDomainHB = 1.0;
1290 }
1291 //apply gain
1292 for (i = 0; i < inst->blockLen10ms; i++) {
1293 dTmp = gainTimeDomainHB * inst->dataBufHB[i];
1294 if (dTmp < WEBRTC_SPL_WORD16_MIN) {
1295 dTmp = WEBRTC_SPL_WORD16_MIN;
1296 } else if (dTmp > WEBRTC_SPL_WORD16_MAX) {
1297 dTmp = WEBRTC_SPL_WORD16_MAX;
1298 }
1299 outFrameHB[i] = (short)dTmp;
1300 }
1301 } // end of H band gain computation
1302 //
1303
1304 return 0;
1305 }
1306