1 /* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin)
2    Copyright (C) 2004-2006 Epic Games
3 
4    File: preprocess.c
5    Preprocessor with denoising based on the algorithm by Ephraim and Malah
6 
7    Redistribution and use in source and binary forms, with or without
8    modification, are permitted provided that the following conditions are
9    met:
10 
11    1. Redistributions of source code must retain the above copyright notice,
12    this list of conditions and the following disclaimer.
13 
14    2. Redistributions in binary form must reproduce the above copyright
15    notice, this list of conditions and the following disclaimer in the
16    documentation and/or other materials provided with the distribution.
17 
18    3. The name of the author may not be used to endorse or promote products
19    derived from this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
25    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31    POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 
35 /*
36    Recommended papers:
37 
38    Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
39    short-time spectral amplitude estimator". IEEE Transactions on Acoustics,
40    Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984.
41 
42    Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
43    log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and
44    Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985.
45 
46    I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments".
47    Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001.
48 
49    Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic
50    approach to combined acoustic echo cancellation and noise reduction". IEEE
51    Transactions on Speech and Audio Processing, 2002.
52 
53    J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation
54    of simultaneous non-stationary sources". In Proceedings IEEE International
55    Conference on Acoustics, Speech, and Signal Processing, 2004.
56 */
57 
58 #ifdef HAVE_CONFIG_H
59 #include "config.h"
60 #endif
61 
62 #include <math.h>
63 #include "speex/speex_preprocess.h"
64 #include "speex/speex_echo.h"
65 #include "arch.h"
66 #include "fftwrap.h"
67 #include "filterbank.h"
68 #include "math_approx.h"
69 #include "os_support.h"
70 
71 #define LOUDNESS_EXP 5.f
72 #define AMP_SCALE .001f
73 #define AMP_SCALE_1 1000.f
74 
75 #define NB_BANDS 24
76 
77 #define SPEECH_PROB_START_DEFAULT       QCONST16(0.35f,15)
78 #define SPEECH_PROB_CONTINUE_DEFAULT    QCONST16(0.20f,15)
79 #define NOISE_SUPPRESS_DEFAULT       -15
80 #define ECHO_SUPPRESS_DEFAULT        -40
81 #define ECHO_SUPPRESS_ACTIVE_DEFAULT -15
82 
83 #ifndef NULL
84 #define NULL 0
85 #endif
86 
87 #define SQR(x) ((x)*(x))
88 #define SQR16(x) (MULT16_16((x),(x)))
89 #define SQR16_Q15(x) (MULT16_16_Q15((x),(x)))
90 
91 #ifdef FIXED_POINT
DIV32_16_Q8(spx_word32_t a,spx_word32_t b)92 static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b)
93 {
94    if (SHR32(a,7) >= b)
95    {
96       return 32767;
97    } else {
98       if (b>=QCONST32(1,23))
99       {
100          a = SHR32(a,8);
101          b = SHR32(b,8);
102       }
103       if (b>=QCONST32(1,19))
104       {
105          a = SHR32(a,4);
106          b = SHR32(b,4);
107       }
108       if (b>=QCONST32(1,15))
109       {
110          a = SHR32(a,4);
111          b = SHR32(b,4);
112       }
113       a = SHL32(a,8);
114       return PDIV32_16(a,b);
115    }
116 
117 }
DIV32_16_Q15(spx_word32_t a,spx_word32_t b)118 static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b)
119 {
120    if (SHR32(a,15) >= b)
121    {
122       return 32767;
123    } else {
124       if (b>=QCONST32(1,23))
125       {
126          a = SHR32(a,8);
127          b = SHR32(b,8);
128       }
129       if (b>=QCONST32(1,19))
130       {
131          a = SHR32(a,4);
132          b = SHR32(b,4);
133       }
134       if (b>=QCONST32(1,15))
135       {
136          a = SHR32(a,4);
137          b = SHR32(b,4);
138       }
139       a = SHL32(a,15)-a;
140       return DIV32_16(a,b);
141    }
142 }
143 #define SNR_SCALING 256.f
144 #define SNR_SCALING_1 0.0039062f
145 #define SNR_SHIFT 8
146 
147 #define FRAC_SCALING 32767.f
148 #define FRAC_SCALING_1 3.0518e-05
149 #define FRAC_SHIFT 1
150 
151 #define EXPIN_SCALING 2048.f
152 #define EXPIN_SCALING_1 0.00048828f
153 #define EXPIN_SHIFT 11
154 #define EXPOUT_SCALING_1 1.5259e-05
155 
156 #define NOISE_SHIFT 7
157 
158 #else
159 
160 #define DIV32_16_Q8(a,b) ((a)/(b))
161 #define DIV32_16_Q15(a,b) ((a)/(b))
162 #define SNR_SCALING 1.f
163 #define SNR_SCALING_1 1.f
164 #define SNR_SHIFT 0
165 #define FRAC_SCALING 1.f
166 #define FRAC_SCALING_1 1.f
167 #define FRAC_SHIFT 0
168 #define NOISE_SHIFT 0
169 
170 #define EXPIN_SCALING 1.f
171 #define EXPIN_SCALING_1 1.f
172 #define EXPOUT_SCALING_1 1.f
173 
174 #endif
175 
176 /** Speex pre-processor state. */
177 struct SpeexPreprocessState_ {
178    /* Basic info */
179    int    frame_size;        /**< Number of samples processed each time */
180    int    ps_size;           /**< Number of points in the power spectrum */
181    int    sampling_rate;     /**< Sampling rate of the input/output */
182    int    nbands;
183    FilterBank *bank;
184 
185    /* Parameters */
186    int    denoise_enabled;
187    int    vad_enabled;
188    int    dereverb_enabled;
189    spx_word16_t  reverb_decay;
190    spx_word16_t  reverb_level;
191    spx_word16_t speech_prob_start;
192    spx_word16_t speech_prob_continue;
193    int    noise_suppress;
194    int    echo_suppress;
195    int    echo_suppress_active;
196    SpeexEchoState *echo_state;
197 
198    spx_word16_t	speech_prob;  /**< Probability last frame was speech */
199 
200    /* DSP-related arrays */
201    spx_word16_t *frame;      /**< Processing frame (2*ps_size) */
202    spx_word16_t *ft;         /**< Processing frame in freq domain (2*ps_size) */
203    spx_word32_t *ps;         /**< Current power spectrum */
204    spx_word16_t *gain2;      /**< Adjusted gains */
205    spx_word16_t *gain_floor; /**< Minimum gain allowed */
206    spx_word16_t *window;     /**< Analysis/Synthesis window */
207    spx_word32_t *noise;      /**< Noise estimate */
208    spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */
209    spx_word32_t *old_ps;     /**< Power spectrum for last frame */
210    spx_word16_t *gain;       /**< Ephraim Malah gain */
211    spx_word16_t *prior;      /**< A-priori SNR */
212    spx_word16_t *post;       /**< A-posteriori SNR */
213 
214    spx_word32_t *S;          /**< Smoothed power spectrum */
215    spx_word32_t *Smin;       /**< See Cohen paper */
216    spx_word32_t *Stmp;       /**< See Cohen paper */
217    int *update_prob;         /**< Probability of speech presence for noise update */
218 
219    spx_word16_t *zeta;       /**< Smoothed a priori SNR */
220    spx_word32_t *echo_noise;
221    spx_word32_t *residual_echo;
222 
223    /* Misc */
224    spx_word16_t *inbuf;      /**< Input buffer (overlapped analysis) */
225    spx_word16_t *outbuf;     /**< Output buffer (for overlap and add) */
226 
227    /* AGC stuff, only for floating point for now */
228 #ifndef FIXED_POINT
229    int    agc_enabled;
230    float  agc_level;
231    float  loudness_accum;
232    float *loudness_weight;   /**< Perceptual loudness curve */
233    float  loudness;          /**< Loudness estimate */
234    float  agc_gain;          /**< Current AGC gain */
235    float  max_gain;          /**< Maximum gain allowed */
236    float  max_increase_step; /**< Maximum increase in gain from one frame to another */
237    float  max_decrease_step; /**< Maximum decrease in gain from one frame to another */
238    float  prev_loudness;     /**< Loudness of previous frame */
239    float  init_max;          /**< Current gain limit during initialisation */
240 #endif
241    int    nb_adapt;          /**< Number of frames used for adaptation so far */
242    int    was_speech;
243    int    min_count;         /**< Number of frames processed so far */
244    void  *fft_lookup;        /**< Lookup table for the FFT */
245 #ifdef FIXED_POINT
246    int    frame_shift;
247 #endif
248 };
249 
250 
conj_window(spx_word16_t * w,int len)251 static void conj_window(spx_word16_t *w, int len)
252 {
253    int i;
254    for (i=0;i<len;i++)
255    {
256       spx_word16_t tmp;
257 #ifdef FIXED_POINT
258       spx_word16_t x = DIV32_16(MULT16_16(32767,i),len);
259 #else
260       spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len);
261 #endif
262       int inv=0;
263       if (x<QCONST16(1.f,13))
264       {
265       } else if (x<QCONST16(2.f,13))
266       {
267          x=QCONST16(2.f,13)-x;
268          inv=1;
269       } else if (x<QCONST16(3.f,13))
270       {
271          x=x-QCONST16(2.f,13);
272          inv=1;
273       } else {
274          x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */
275       }
276       x = MULT16_16_Q14(QCONST16(1.271903f,14), x);
277       tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(SHL32(EXTEND32(x),2))));
278       if (inv)
279          tmp=SUB16(Q15_ONE,tmp);
280       w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15));
281    }
282 }
283 
284 
285 #ifdef FIXED_POINT
286 /* This function approximates the gain function
287    y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
288    which multiplied by xi/(1+xi) is the optimal gain
289    in the loudness domain ( sqrt[amplitude] )
290    Input in Q11 format, output in Q15
291 */
hypergeom_gain(spx_word32_t xx)292 static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
293 {
294    int ind;
295    spx_word16_t frac;
296    /* Q13 table */
297    static const spx_word16_t table[21] = {
298        6730,  8357,  9868, 11267, 12563, 13770, 14898,
299       15959, 16961, 17911, 18816, 19682, 20512, 21311,
300       22082, 22827, 23549, 24250, 24931, 25594, 26241};
301       ind = SHR32(xx,10);
302       if (ind<0)
303          return Q15_ONE;
304       if (ind>19)
305          return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT))));
306       frac = SHL32(xx-SHL32(ind,10),5);
307       return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7);
308 }
309 
qcurve(spx_word16_t x)310 static inline spx_word16_t qcurve(spx_word16_t x)
311 {
312    x = MAX16(x, 1);
313    return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x))));
314 }
315 
316 /* Compute the gain floor based on different floors for the background noise and residual echo */
compute_gain_floor(int noise_suppress,int effective_echo_suppress,spx_word32_t * noise,spx_word32_t * echo,spx_word16_t * gain_floor,int len)317 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
318 {
319    int i;
320 
321    if (noise_suppress > effective_echo_suppress)
322    {
323       spx_word16_t noise_gain, gain_ratio;
324       noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1)));
325       gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1)));
326 
327       /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
328       for (i=0;i<len;i++)
329          gain_floor[i] = MULT16_16_Q15(noise_gain,
330                                        spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]),
331                                              (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
332    } else {
333       spx_word16_t echo_gain, gain_ratio;
334       echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1)));
335       gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1)));
336 
337       /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
338       for (i=0;i<len;i++)
339          gain_floor[i] = MULT16_16_Q15(echo_gain,
340                                        spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i],
341                                              (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
342    }
343 }
344 
345 #else
346 /* This function approximates the gain function
347    y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
348    which multiplied by xi/(1+xi) is the optimal gain
349    in the loudness domain ( sqrt[amplitude] )
350 */
hypergeom_gain(spx_word32_t xx)351 static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
352 {
353    int ind;
354    float integer, frac;
355    float x;
356    static const float table[21] = {
357       0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f,
358       1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f,
359       2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f};
360       x = EXPIN_SCALING_1*xx;
361       integer = floor(2*x);
362       ind = (int)integer;
363       if (ind<0)
364          return FRAC_SCALING;
365       if (ind>19)
366          return FRAC_SCALING*(1+.1296/x);
367       frac = 2*x-integer;
368       return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f);
369 }
370 
qcurve(spx_word16_t x)371 static inline spx_word16_t qcurve(spx_word16_t x)
372 {
373    return 1.f/(1.f+.15f/(SNR_SCALING_1*x));
374 }
375 
compute_gain_floor(int noise_suppress,int effective_echo_suppress,spx_word32_t * noise,spx_word32_t * echo,spx_word16_t * gain_floor,int len)376 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
377 {
378    int i;
379    float echo_floor;
380    float noise_floor;
381 
382    noise_floor = exp(.2302585f*noise_suppress);
383    echo_floor = exp(.2302585f*effective_echo_suppress);
384 
385    /* Compute the gain floor based on different floors for the background noise and residual echo */
386    for (i=0;i<len;i++)
387       gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]);
388 }
389 
390 #endif
speex_preprocess_state_init(int frame_size,int sampling_rate)391 EXPORT SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate)
392 {
393    int i;
394    int N, N3, N4, M;
395 
396    SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState));
397    st->frame_size = frame_size;
398 
399    /* Round ps_size down to the nearest power of two */
400 #if 0
401    i=1;
402    st->ps_size = st->frame_size;
403    while(1)
404    {
405       if (st->ps_size & ~i)
406       {
407          st->ps_size &= ~i;
408          i<<=1;
409       } else {
410          break;
411       }
412    }
413 
414 
415    if (st->ps_size < 3*st->frame_size/4)
416       st->ps_size = st->ps_size * 3 / 2;
417 #else
418    st->ps_size = st->frame_size;
419 #endif
420 
421    N = st->ps_size;
422    N3 = 2*N - st->frame_size;
423    N4 = st->frame_size - N3;
424 
425    st->sampling_rate = sampling_rate;
426    st->denoise_enabled = 1;
427    st->vad_enabled = 0;
428    st->dereverb_enabled = 0;
429    st->reverb_decay = 0;
430    st->reverb_level = 0;
431    st->noise_suppress = NOISE_SUPPRESS_DEFAULT;
432    st->echo_suppress = ECHO_SUPPRESS_DEFAULT;
433    st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT;
434 
435    st->speech_prob_start = SPEECH_PROB_START_DEFAULT;
436    st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT;
437 
438    st->echo_state = NULL;
439 
440    st->nbands = NB_BANDS;
441    M = st->nbands;
442    st->bank = filterbank_new(M, sampling_rate, N, 1);
443 
444    st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
445    st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
446    st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
447 
448    st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
449    st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
450    st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
451    st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
452    st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
453    st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
454    st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
455    st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
456    st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
457    st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
458    st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
459    st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
460 
461    st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
462    st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
463    st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
464    st->update_prob = (int*)speex_alloc(N*sizeof(int));
465 
466    st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
467    st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
468 
469    conj_window(st->window, 2*N3);
470    for (i=2*N3;i<2*st->ps_size;i++)
471       st->window[i]=Q15_ONE;
472 
473    if (N4>0)
474    {
475       for (i=N3-1;i>=0;i--)
476       {
477          st->window[i+N3+N4]=st->window[i+N3];
478          st->window[i+N3]=1;
479       }
480    }
481    for (i=0;i<N+M;i++)
482    {
483       st->noise[i]=QCONST32(1.f,NOISE_SHIFT);
484       st->reverb_estimate[i]=0;
485       st->old_ps[i]=1;
486       st->gain[i]=Q15_ONE;
487       st->post[i]=SHL16(1, SNR_SHIFT);
488       st->prior[i]=SHL16(1, SNR_SHIFT);
489    }
490 
491    for (i=0;i<N;i++)
492       st->update_prob[i] = 1;
493    for (i=0;i<N3;i++)
494    {
495       st->inbuf[i]=0;
496       st->outbuf[i]=0;
497    }
498 #ifndef FIXED_POINT
499    st->agc_enabled = 0;
500    st->agc_level = 8000;
501    st->loudness_weight = (float*)speex_alloc(N*sizeof(float));
502    for (i=0;i<N;i++)
503    {
504       float ff=((float)i)*.5*sampling_rate/((float)N);
505       /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/
506       st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f);
507       if (st->loudness_weight[i]<.01f)
508          st->loudness_weight[i]=.01f;
509       st->loudness_weight[i] *= st->loudness_weight[i];
510    }
511    /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/
512    st->loudness = 1e-15;
513    st->agc_gain = 1;
514    st->max_gain = 30;
515    st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate);
516    st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate);
517    st->prev_loudness = 1;
518    st->init_max = 1;
519 #endif
520    st->was_speech = 0;
521 
522    st->fft_lookup = spx_fft_init(2*N);
523 
524    st->nb_adapt=0;
525    st->min_count=0;
526    return st;
527 }
528 
speex_preprocess_state_destroy(SpeexPreprocessState * st)529 EXPORT void speex_preprocess_state_destroy(SpeexPreprocessState *st)
530 {
531    speex_free(st->frame);
532    speex_free(st->ft);
533    speex_free(st->ps);
534    speex_free(st->gain2);
535    speex_free(st->gain_floor);
536    speex_free(st->window);
537    speex_free(st->noise);
538    speex_free(st->reverb_estimate);
539    speex_free(st->old_ps);
540    speex_free(st->gain);
541    speex_free(st->prior);
542    speex_free(st->post);
543 #ifndef FIXED_POINT
544    speex_free(st->loudness_weight);
545 #endif
546    speex_free(st->echo_noise);
547    speex_free(st->residual_echo);
548 
549    speex_free(st->S);
550    speex_free(st->Smin);
551    speex_free(st->Stmp);
552    speex_free(st->update_prob);
553    speex_free(st->zeta);
554 
555    speex_free(st->inbuf);
556    speex_free(st->outbuf);
557 
558    spx_fft_destroy(st->fft_lookup);
559    filterbank_destroy(st->bank);
560    speex_free(st);
561 }
562 
563 /* FIXME: The AGC doesn't work yet with fixed-point*/
564 #ifndef FIXED_POINT
speex_compute_agc(SpeexPreprocessState * st,spx_word16_t Pframe,spx_word16_t * ft)565 static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft)
566 {
567    int i;
568    int N = st->ps_size;
569    float target_gain;
570    float loudness=1.f;
571    float rate;
572 
573    for (i=2;i<N;i++)
574    {
575       loudness += 2.f*N*st->ps[i]* st->loudness_weight[i];
576    }
577    loudness=sqrt(loudness);
578       /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) &&
579    loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/
580    if (Pframe>.3f)
581    {
582       /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/
583       rate = .03*Pframe*Pframe;
584       st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP);
585       st->loudness_accum = (1-rate)*st->loudness_accum + rate;
586       if (st->init_max < st->max_gain && st->nb_adapt > 20)
587          st->init_max *= 1.f + .1f*Pframe*Pframe;
588    }
589    /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/
590 
591    target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP);
592 
593    if ((Pframe>.5  && st->nb_adapt > 20) || target_gain < st->agc_gain)
594    {
595       if (target_gain > st->max_increase_step*st->agc_gain)
596          target_gain = st->max_increase_step*st->agc_gain;
597       if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness)
598          target_gain = st->max_decrease_step*st->agc_gain;
599       if (target_gain > st->max_gain)
600          target_gain = st->max_gain;
601       if (target_gain > st->init_max)
602          target_gain = st->init_max;
603 
604       st->agc_gain = target_gain;
605    }
606    /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/
607 
608    for (i=0;i<2*N;i++)
609       ft[i] *= st->agc_gain;
610    st->prev_loudness = loudness;
611 }
612 #endif
613 
preprocess_analysis(SpeexPreprocessState * st,spx_int16_t * x)614 static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x)
615 {
616    int i;
617    int N = st->ps_size;
618    int N3 = 2*N - st->frame_size;
619    int N4 = st->frame_size - N3;
620    spx_word32_t *ps=st->ps;
621 
622    /* 'Build' input frame */
623    for (i=0;i<N3;i++)
624       st->frame[i]=st->inbuf[i];
625    for (i=0;i<st->frame_size;i++)
626       st->frame[N3+i]=x[i];
627 
628    /* Update inbuf */
629    for (i=0;i<N3;i++)
630       st->inbuf[i]=x[N4+i];
631 
632    /* Windowing */
633    for (i=0;i<2*N;i++)
634       st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
635 
636 #ifdef FIXED_POINT
637    {
638       spx_word16_t max_val=0;
639       for (i=0;i<2*N;i++)
640          max_val = MAX16(max_val, ABS16(st->frame[i]));
641       st->frame_shift = 14-spx_ilog2(EXTEND32(max_val));
642       for (i=0;i<2*N;i++)
643          st->frame[i] = SHL16(st->frame[i], st->frame_shift);
644    }
645 #endif
646 
647    /* Perform FFT */
648    spx_fft(st->fft_lookup, st->frame, st->ft);
649 
650    /* Power spectrum */
651    ps[0]=MULT16_16(st->ft[0],st->ft[0]);
652    for (i=1;i<N;i++)
653       ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]);
654    for (i=0;i<N;i++)
655       st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift);
656 
657    filterbank_compute_bank32(st->bank, ps, ps+N);
658 }
659 
update_noise_prob(SpeexPreprocessState * st)660 static void update_noise_prob(SpeexPreprocessState *st)
661 {
662    int i;
663    int min_range;
664    int N = st->ps_size;
665 
666    for (i=1;i<N-1;i++)
667       st->S[i] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1])
668                       + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]);
669    st->S[0] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]);
670    st->S[N-1] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]);
671 
672    if (st->nb_adapt==1)
673    {
674       for (i=0;i<N;i++)
675          st->Smin[i] = st->Stmp[i] = 0;
676    }
677 
678    if (st->nb_adapt < 100)
679       min_range = 15;
680    else if (st->nb_adapt < 1000)
681       min_range = 50;
682    else if (st->nb_adapt < 10000)
683       min_range = 150;
684    else
685       min_range = 300;
686    if (st->min_count > min_range)
687    {
688       st->min_count = 0;
689       for (i=0;i<N;i++)
690       {
691          st->Smin[i] = MIN32(st->Stmp[i], st->S[i]);
692          st->Stmp[i] = st->S[i];
693       }
694    } else {
695       for (i=0;i<N;i++)
696       {
697          st->Smin[i] = MIN32(st->Smin[i], st->S[i]);
698          st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]);
699       }
700    }
701    for (i=0;i<N;i++)
702    {
703       if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > st->Smin[i])
704          st->update_prob[i] = 1;
705       else
706          st->update_prob[i] = 0;
707       /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/
708       /*fprintf (stderr, "%f ", st->update_prob[i]);*/
709    }
710 
711 }
712 
713 #define NOISE_OVERCOMPENS 1.
714 
715 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
716 
speex_preprocess(SpeexPreprocessState * st,spx_int16_t * x,spx_int32_t * echo)717 EXPORT int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
718 {
719    return speex_preprocess_run(st, x);
720 }
721 
speex_preprocess_run(SpeexPreprocessState * st,spx_int16_t * x)722 EXPORT int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x)
723 {
724    int i;
725    int M;
726    int N = st->ps_size;
727    int N3 = 2*N - st->frame_size;
728    int N4 = st->frame_size - N3;
729    spx_word32_t *ps=st->ps;
730    spx_word32_t Zframe;
731    spx_word16_t Pframe;
732    spx_word16_t beta, beta_1;
733    spx_word16_t effective_echo_suppress;
734 
735    st->nb_adapt++;
736    if (st->nb_adapt>20000)
737       st->nb_adapt = 20000;
738    st->min_count++;
739 
740    beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));
741    beta_1 = Q15_ONE-beta;
742    M = st->nbands;
743    /* Deal with residual echo if provided */
744    if (st->echo_state)
745    {
746       speex_echo_get_residual(st->echo_state, st->residual_echo, N);
747 #ifndef FIXED_POINT
748       /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */
749       if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f))
750       {
751          for (i=0;i<N;i++)
752             st->residual_echo[i] = 0;
753       }
754 #endif
755       for (i=0;i<N;i++)
756          st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]);
757       filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N);
758    } else {
759       for (i=0;i<N+M;i++)
760          st->echo_noise[i] = 0;
761    }
762    preprocess_analysis(st, x);
763 
764    update_noise_prob(st);
765 
766    /* Noise estimation always updated for the 10 first frames */
767    /*if (st->nb_adapt<10)
768    {
769       for (i=1;i<N-1;i++)
770          st->update_prob[i] = 0;
771    }
772    */
773 
774    /* Update the noise estimate for the frequencies where it can be */
775    for (i=0;i<N;i++)
776    {
777       if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))
778          st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));
779    }
780    filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
781 
782    /* Special case for first frame */
783    if (st->nb_adapt==1)
784       for (i=0;i<N+M;i++)
785          st->old_ps[i] = ps[i];
786 
787    /* Compute a posteriori SNR */
788    for (i=0;i<N+M;i++)
789    {
790       spx_word16_t gamma;
791 
792       /* Total noise estimate including residual echo and reverberation */
793       spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);
794 
795       /* A posteriori SNR = ps/noise - 1*/
796       st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
797       st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));
798 
799       /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */
800       gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));
801 
802       /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */
803       st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));
804       st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));
805    }
806 
807    /*print_vec(st->post, N+M, "");*/
808 
809    /* Recursive average of the a priori SNR. A bit smoothed for the psd components */
810    st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
811    for (i=1;i<N-1;i++)
812       st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),
813                            MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);
814    for (i=N-1;i<N+M;i++)
815       st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);
816 
817    /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */
818    Zframe = 0;
819    for (i=N;i<N+M;i++)
820       Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));
821    Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));
822 
823    effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));
824 
825    compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
826 
827    /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale)
828       Technically this is actually wrong because the EM gaim assumes a slightly different probability
829       distribution */
830    for (i=N;i<N+M;i++)
831    {
832       /* See EM and Cohen papers*/
833       spx_word32_t theta;
834       /* Gain from hypergeometric function */
835       spx_word32_t MM;
836       /* Weiner filter gain */
837       spx_word16_t prior_ratio;
838       /* a priority probability of speech presence based on Bark sub-band alone */
839       spx_word16_t P1;
840       /* Speech absence a priori probability (considering sub-band and frame) */
841       spx_word16_t q;
842 #ifdef FIXED_POINT
843       spx_word16_t tmp;
844 #endif
845 
846       prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
847       theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
848 
849       MM = hypergeom_gain(theta);
850       /* Gain with bound */
851       st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
852       /* Save old Bark power spectrum */
853       st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
854 
855       P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));
856       q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
857 #ifdef FIXED_POINT
858       theta = MIN32(theta, EXTEND32(32767));
859 /*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
860       tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/
861 /*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));
862       st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
863 #else
864       st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));
865 #endif
866    }
867    /* Convert the EM gains and speech prob to linear frequency */
868    filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
869    filterbank_compute_psd16(st->bank,st->gain+N, st->gain);
870 
871    /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */
872    if (1)
873    {
874       filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);
875 
876       /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */
877       for (i=0;i<N;i++)
878       {
879          spx_word32_t MM;
880          spx_word32_t theta;
881          spx_word16_t prior_ratio;
882          spx_word16_t tmp;
883          spx_word16_t p;
884          spx_word16_t g;
885 
886          /* Wiener filter gain */
887          prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
888          theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
889 
890          /* Optimal estimator for loudness domain */
891          MM = hypergeom_gain(theta);
892          /* EM gain with bound */
893          g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
894          /* Interpolated speech probability of presence */
895          p = st->gain2[i];
896 
897          /* Constrain the gain to be close to the Bark scale gain */
898          if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])
899             g = MULT16_16(3,st->gain[i]);
900          st->gain[i] = g;
901 
902          /* Save old power spectrum */
903          st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
904 
905          /* Apply gain floor */
906          if (st->gain[i] < st->gain_floor[i])
907             st->gain[i] = st->gain_floor[i];
908 
909          /* Exponential decay model for reverberation (unused) */
910          /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
911 
912          /* Take into account speech probability of presence (loudness domain MMSE estimator) */
913          /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */
914          tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
915          st->gain2[i]=SQR16_Q15(tmp);
916 
917          /* Use this if you want a log-domain MMSE estimator instead */
918          /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
919       }
920    } else {
921       for (i=N;i<N+M;i++)
922       {
923          spx_word16_t tmp;
924          spx_word16_t p = st->gain2[i];
925          st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);
926          tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
927          st->gain2[i]=SQR16_Q15(tmp);
928       }
929       filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
930    }
931 
932    /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */
933    if (!st->denoise_enabled)
934    {
935       for (i=0;i<N+M;i++)
936          st->gain2[i]=Q15_ONE;
937    }
938 
939    /* Apply computed gain */
940    for (i=1;i<N;i++)
941    {
942       st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);
943       st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);
944    }
945    st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);
946    st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);
947 
948    /*FIXME: This *will* not work for fixed-point */
949 #ifndef FIXED_POINT
950    if (st->agc_enabled)
951       speex_compute_agc(st, Pframe, st->ft);
952 #endif
953 
954    /* Inverse FFT with 1/N scaling */
955    spx_ifft(st->fft_lookup, st->ft, st->frame);
956    /* Scale back to original (lower) amplitude */
957    for (i=0;i<2*N;i++)
958       st->frame[i] = PSHR16(st->frame[i], st->frame_shift);
959 
960    /*FIXME: This *will* not work for fixed-point */
961 #ifndef FIXED_POINT
962    if (st->agc_enabled)
963    {
964       float max_sample=0;
965       for (i=0;i<2*N;i++)
966          if (fabs(st->frame[i])>max_sample)
967             max_sample = fabs(st->frame[i]);
968       if (max_sample>28000.f)
969       {
970          float damp = 28000.f/max_sample;
971          for (i=0;i<2*N;i++)
972             st->frame[i] *= damp;
973       }
974    }
975 #endif
976 
977    /* Synthesis window (for WOLA) */
978    for (i=0;i<2*N;i++)
979       st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
980 
981    /* Perform overlap and add */
982    for (i=0;i<N3;i++)
983       x[i] = st->outbuf[i] + st->frame[i];
984    for (i=0;i<N4;i++)
985       x[N3+i] = st->frame[N3+i];
986 
987    /* Update outbuf */
988    for (i=0;i<N3;i++)
989       st->outbuf[i] = st->frame[st->frame_size+i];
990 
991    /* FIXME: This VAD is a kludge */
992    st->speech_prob = Pframe;
993    if (st->vad_enabled)
994    {
995       if (st->speech_prob > st->speech_prob_start || (st->was_speech && st->speech_prob > st->speech_prob_continue))
996       {
997          st->was_speech=1;
998          return 1;
999       } else
1000       {
1001          st->was_speech=0;
1002          return 0;
1003       }
1004    } else {
1005       return 1;
1006    }
1007 }
1008 
speex_preprocess_estimate_update(SpeexPreprocessState * st,spx_int16_t * x)1009 EXPORT void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x)
1010 {
1011    int i;
1012    int N = st->ps_size;
1013    int N3 = 2*N - st->frame_size;
1014    int M;
1015    spx_word32_t *ps=st->ps;
1016 
1017    M = st->nbands;
1018    st->min_count++;
1019 
1020    preprocess_analysis(st, x);
1021 
1022    update_noise_prob(st);
1023 
1024    for (i=1;i<N-1;i++)
1025    {
1026       if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))
1027       {
1028          st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));
1029       }
1030    }
1031 
1032    for (i=0;i<N3;i++)
1033       st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);
1034 
1035    /* Save old power spectrum */
1036    for (i=0;i<N+M;i++)
1037       st->old_ps[i] = ps[i];
1038 
1039    for (i=0;i<N;i++)
1040       st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);
1041 }
1042 
1043 
speex_preprocess_ctl(SpeexPreprocessState * state,int request,void * ptr)1044 EXPORT int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
1045 {
1046    int i;
1047    SpeexPreprocessState *st;
1048    st=(SpeexPreprocessState*)state;
1049    switch(request)
1050    {
1051    case SPEEX_PREPROCESS_SET_DENOISE:
1052       st->denoise_enabled = (*(spx_int32_t*)ptr);
1053       break;
1054    case SPEEX_PREPROCESS_GET_DENOISE:
1055       (*(spx_int32_t*)ptr) = st->denoise_enabled;
1056       break;
1057 #ifndef FIXED_POINT
1058    case SPEEX_PREPROCESS_SET_AGC:
1059       st->agc_enabled = (*(spx_int32_t*)ptr);
1060       break;
1061    case SPEEX_PREPROCESS_GET_AGC:
1062       (*(spx_int32_t*)ptr) = st->agc_enabled;
1063       break;
1064 #ifndef DISABLE_FLOAT_API
1065    case SPEEX_PREPROCESS_SET_AGC_LEVEL:
1066       st->agc_level = (*(float*)ptr);
1067       if (st->agc_level<1)
1068          st->agc_level=1;
1069       if (st->agc_level>32768)
1070          st->agc_level=32768;
1071       break;
1072    case SPEEX_PREPROCESS_GET_AGC_LEVEL:
1073       (*(float*)ptr) = st->agc_level;
1074       break;
1075 #endif /* #ifndef DISABLE_FLOAT_API */
1076    case SPEEX_PREPROCESS_SET_AGC_INCREMENT:
1077       st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1078       break;
1079    case SPEEX_PREPROCESS_GET_AGC_INCREMENT:
1080       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size);
1081       break;
1082    case SPEEX_PREPROCESS_SET_AGC_DECREMENT:
1083       st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1084       break;
1085    case SPEEX_PREPROCESS_GET_AGC_DECREMENT:
1086       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size);
1087       break;
1088    case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN:
1089       st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr));
1090       break;
1091    case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN:
1092       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain));
1093       break;
1094 #endif
1095    case SPEEX_PREPROCESS_SET_VAD:
1096       speex_warning("The VAD has been replaced by a hack pending a complete rewrite");
1097       st->vad_enabled = (*(spx_int32_t*)ptr);
1098       break;
1099    case SPEEX_PREPROCESS_GET_VAD:
1100       (*(spx_int32_t*)ptr) = st->vad_enabled;
1101       break;
1102 
1103    case SPEEX_PREPROCESS_SET_DEREVERB:
1104       st->dereverb_enabled = (*(spx_int32_t*)ptr);
1105       for (i=0;i<st->ps_size;i++)
1106          st->reverb_estimate[i]=0;
1107       break;
1108    case SPEEX_PREPROCESS_GET_DEREVERB:
1109       (*(spx_int32_t*)ptr) = st->dereverb_enabled;
1110       break;
1111 
1112    case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
1113       /* FIXME: Re-enable when de-reverberation is actually enabled again */
1114       /*st->reverb_level = (*(float*)ptr);*/
1115       break;
1116    case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
1117       /* FIXME: Re-enable when de-reverberation is actually enabled again */
1118       /*(*(float*)ptr) = st->reverb_level;*/
1119       break;
1120 
1121    case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
1122       /* FIXME: Re-enable when de-reverberation is actually enabled again */
1123       /*st->reverb_decay = (*(float*)ptr);*/
1124       break;
1125    case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
1126       /* FIXME: Re-enable when de-reverberation is actually enabled again */
1127       /*(*(float*)ptr) = st->reverb_decay;*/
1128       break;
1129 
1130    case SPEEX_PREPROCESS_SET_PROB_START:
1131       *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1132       st->speech_prob_start = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1133       break;
1134    case SPEEX_PREPROCESS_GET_PROB_START:
1135       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);
1136       break;
1137 
1138    case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
1139       *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1140       st->speech_prob_continue = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1141       break;
1142    case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
1143       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);
1144       break;
1145 
1146    case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:
1147       st->noise_suppress = -ABS(*(spx_int32_t*)ptr);
1148       break;
1149    case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:
1150       (*(spx_int32_t*)ptr) = st->noise_suppress;
1151       break;
1152    case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:
1153       st->echo_suppress = -ABS(*(spx_int32_t*)ptr);
1154       break;
1155    case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:
1156       (*(spx_int32_t*)ptr) = st->echo_suppress;
1157       break;
1158    case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:
1159       st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);
1160       break;
1161    case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:
1162       (*(spx_int32_t*)ptr) = st->echo_suppress_active;
1163       break;
1164    case SPEEX_PREPROCESS_SET_ECHO_STATE:
1165       st->echo_state = (SpeexEchoState*)ptr;
1166       break;
1167    case SPEEX_PREPROCESS_GET_ECHO_STATE:
1168       (*(SpeexEchoState**)ptr) = (SpeexEchoState*)st->echo_state;
1169       break;
1170 #ifndef FIXED_POINT
1171    case SPEEX_PREPROCESS_GET_AGC_LOUDNESS:
1172       (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP);
1173       break;
1174    case SPEEX_PREPROCESS_GET_AGC_GAIN:
1175       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->agc_gain));
1176       break;
1177 #endif
1178    case SPEEX_PREPROCESS_GET_PSD_SIZE:
1179    case SPEEX_PREPROCESS_GET_NOISE_PSD_SIZE:
1180       (*(spx_int32_t*)ptr) = st->ps_size;
1181       break;
1182    case SPEEX_PREPROCESS_GET_PSD:
1183       for(i=0;i<st->ps_size;i++)
1184       	((spx_int32_t *)ptr)[i] = (spx_int32_t) st->ps[i];
1185       break;
1186    case SPEEX_PREPROCESS_GET_NOISE_PSD:
1187       for(i=0;i<st->ps_size;i++)
1188       	((spx_int32_t *)ptr)[i] = (spx_int32_t) PSHR32(st->noise[i], NOISE_SHIFT);
1189       break;
1190    case SPEEX_PREPROCESS_GET_PROB:
1191       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob, 100);
1192       break;
1193 #ifndef FIXED_POINT
1194    case SPEEX_PREPROCESS_SET_AGC_TARGET:
1195       st->agc_level = (*(spx_int32_t*)ptr);
1196       if (st->agc_level<1)
1197          st->agc_level=1;
1198       if (st->agc_level>32768)
1199          st->agc_level=32768;
1200       break;
1201    case SPEEX_PREPROCESS_GET_AGC_TARGET:
1202       (*(spx_int32_t*)ptr) = st->agc_level;
1203       break;
1204 #endif
1205    default:
1206       speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
1207       return -1;
1208    }
1209    return 0;
1210 }
1211 
1212 #ifdef FIXED_DEBUG
1213 long long spx_mips=0;
1214 #endif
1215 
1216