1 /* Copyright (C) 2003-2008 Jean-Marc Valin
2 
3    File: mdf.c
4    Echo canceller based on the MDF algorithm (see below)
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions are
8    met:
9 
10    1. Redistributions of source code must retain the above copyright notice,
11    this list of conditions and the following disclaimer.
12 
13    2. Redistributions in binary form must reproduce the above copyright
14    notice, this list of conditions and the following disclaimer in the
15    documentation and/or other materials provided with the distribution.
16 
17    3. The name of the author may not be used to endorse or promote products
18    derived from this software without specific prior written permission.
19 
20    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30    POSSIBILITY OF SUCH DAMAGE.
31 */
32 
33 /*
34    The echo canceller is based on the MDF algorithm described in:
35 
36    J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37    IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
38    February 1990.
39 
40    We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41    double-talk is achieved using a variable learning rate as described in:
42 
43    Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44    Cancellation With Double-Talk. IEEE Transactions on Audio,
45    Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
46    http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
47 
48    There is no explicit double-talk detection, but a continuous variation
49    in the learning rate based on residual echo, double-talk and background
50    noise.
51 
52    About the fixed-point version:
53    All the signals are represented with 16-bit words. The filter weights
54    are represented with 32-bit words, but only the top 16 bits are used
55    in most cases. The lower 16 bits are completely unreliable (due to the
56    fact that the update is done only on the top bits), but help in the
57    adaptation -- probably by removing a "threshold effect" due to
58    quantization (rounding going to zero) when the gradient is small.
59 
60    Another kludge that seems to work good: when performing the weight
61    update, we only move half the way toward the "goal" this seems to
62    reduce the effect of quantization noise in the update phase. This
63    can be seen as applying a gradient descent on a "soft constraint"
64    instead of having a hard constraint.
65 
66 */
67 
68 #ifdef HAVE_CONFIG_H
69 #include "config.h"
70 #endif
71 
72 #include "arch.h"
73 #include "speex/speex_echo.h"
74 #include "fftwrap.h"
75 #include "pseudofloat.h"
76 #include "math_approx.h"
77 #include "os_support.h"
78 
79 #ifndef M_PI
80 #define M_PI 3.14159265358979323846
81 #endif
82 
83 #ifdef FIXED_POINT
84 #define WEIGHT_SHIFT 11
85 #define NORMALIZE_SCALEDOWN 5
86 #define NORMALIZE_SCALEUP 3
87 #else
88 #define WEIGHT_SHIFT 0
89 #endif
90 
91 #ifdef FIXED_POINT
92 #define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
93 #else
94 #define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
95 #endif
96 
97 /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
98    and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
99 #define TWO_PATH
100 
101 #ifdef FIXED_POINT
102 static const spx_float_t MIN_LEAK = {20972, -22};
103 
104 /* Constants for the two-path filter */
105 static const spx_float_t VAR1_SMOOTH = {23593, -16};
106 static const spx_float_t VAR2_SMOOTH = {23675, -15};
107 static const spx_float_t VAR1_UPDATE = {16384, -15};
108 static const spx_float_t VAR2_UPDATE = {16384, -16};
109 static const spx_float_t VAR_BACKTRACK = {16384, -12};
110 #define TOP16(x) ((x)>>16)
111 
112 #else
113 
114 static const spx_float_t MIN_LEAK = .005f;
115 
116 /* Constants for the two-path filter */
117 static const spx_float_t VAR1_SMOOTH = .36f;
118 static const spx_float_t VAR2_SMOOTH = .7225f;
119 static const spx_float_t VAR1_UPDATE = .5f;
120 static const spx_float_t VAR2_UPDATE = .25f;
121 static const spx_float_t VAR_BACKTRACK = 4.f;
122 #define TOP16(x) (x)
123 #endif
124 
125 
126 #define PLAYBACK_DELAY 2
127 
128 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
129 
130 
131 /** Speex echo cancellation state. */
132 struct SpeexEchoState_ {
133    int frame_size;           /**< Number of samples processed each time */
134    int window_size;
135    int M;
136    int cancel_count;
137    int adapted;
138    int saturated;
139    int screwed_up;
140    int C;                    /** Number of input channels (microphones) */
141    int K;                    /** Number of output channels (loudspeakers) */
142    spx_int32_t sampling_rate;
143    spx_word16_t spec_average;
144    spx_word16_t beta0;
145    spx_word16_t beta_max;
146    spx_word32_t sum_adapt;
147    spx_word16_t leak_estimate;
148 
149    spx_word16_t *e;      /* scratch */
150    spx_word16_t *x;      /* Far-end input buffer (2N) */
151    spx_word16_t *X;      /* Far-end buffer (M+1 frames) in frequency domain */
152    spx_word16_t *input;  /* scratch */
153    spx_word16_t *y;      /* scratch */
154    spx_word16_t *last_y;
155    spx_word16_t *Y;      /* scratch */
156    spx_word16_t *E;
157    spx_word32_t *PHI;    /* scratch */
158    spx_word32_t *W;      /* (Background) filter weights */
159 #ifdef TWO_PATH
160    spx_word16_t *foreground; /* Foreground filter weights */
161    spx_word32_t  Davg1;  /* 1st recursive average of the residual power difference */
162    spx_word32_t  Davg2;  /* 2nd recursive average of the residual power difference */
163    spx_float_t   Dvar1;  /* Estimated variance of 1st estimator */
164    spx_float_t   Dvar2;  /* Estimated variance of 2nd estimator */
165 #endif
166    spx_word32_t *power;  /* Power of the far-end signal */
167    spx_float_t  *power_1;/* Inverse power of far-end */
168    spx_word16_t *wtmp;   /* scratch */
169 #ifdef FIXED_POINT
170    spx_word16_t *wtmp2;  /* scratch */
171 #endif
172    spx_word32_t *Rf;     /* scratch */
173    spx_word32_t *Yf;     /* scratch */
174    spx_word32_t *Xf;     /* scratch */
175    spx_word32_t *Eh;
176    spx_word32_t *Yh;
177    spx_float_t   Pey;
178    spx_float_t   Pyy;
179    spx_word16_t *window;
180    spx_word16_t *prop;
181    void *fft_table;
182    spx_word16_t *memX, *memD, *memE;
183    spx_word16_t preemph;
184    spx_word16_t notch_radius;
185    spx_mem_t *notch_mem;
186 
187    /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
188    spx_int16_t *play_buf;
189    int play_buf_pos;
190    int play_buf_started;
191 };
192 
filter_dc_notch16(const spx_int16_t * in,spx_word16_t radius,spx_word16_t * out,int len,spx_mem_t * mem,int stride)193 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
194 {
195    int i;
196    spx_word16_t den2;
197 #ifdef FIXED_POINT
198    den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
199 #else
200    den2 = radius*radius + .7*(1-radius)*(1-radius);
201 #endif
202    /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
203    for (i=0;i<len;i++)
204    {
205       spx_word16_t vin = in[i*stride];
206       spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
207 #ifdef FIXED_POINT
208       mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
209 #else
210       mem[0] = mem[1] + 2*(-vin + radius*vout);
211 #endif
212       mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
213       out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
214    }
215 }
216 
217 /* This inner product is slightly different from the codec version because of fixed-point */
mdf_inner_prod(const spx_word16_t * x,const spx_word16_t * y,int len)218 static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
219 {
220    spx_word32_t sum=0;
221    len >>= 1;
222    while(len--)
223    {
224       spx_word32_t part=0;
225       part = MAC16_16(part,*x++,*y++);
226       part = MAC16_16(part,*x++,*y++);
227       /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
228       sum = ADD32(sum,SHR32(part,6));
229    }
230    return sum;
231 }
232 
233 /** Compute power spectrum of a half-complex (packed) vector */
power_spectrum(const spx_word16_t * X,spx_word32_t * ps,int N)234 static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
235 {
236    int i, j;
237    ps[0]=MULT16_16(X[0],X[0]);
238    for (i=1,j=1;i<N-1;i+=2,j++)
239    {
240       ps[j] =  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
241    }
242    ps[j]=MULT16_16(X[i],X[i]);
243 }
244 
245 /** Compute power spectrum of a half-complex (packed) vector and accumulate */
power_spectrum_accum(const spx_word16_t * X,spx_word32_t * ps,int N)246 static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
247 {
248    int i, j;
249    ps[0]+=MULT16_16(X[0],X[0]);
250    for (i=1,j=1;i<N-1;i+=2,j++)
251    {
252       ps[j] +=  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
253    }
254    ps[j]+=MULT16_16(X[i],X[i]);
255 }
256 
257 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
258 #ifdef FIXED_POINT
spectral_mul_accum(const spx_word16_t * X,const spx_word32_t * Y,spx_word16_t * acc,int N,int M)259 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
260 {
261    int i,j;
262    spx_word32_t tmp1=0,tmp2=0;
263    for (j=0;j<M;j++)
264    {
265       tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
266    }
267    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
268    for (i=1;i<N-1;i+=2)
269    {
270       tmp1 = tmp2 = 0;
271       for (j=0;j<M;j++)
272       {
273          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
274          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
275       }
276       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
277       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
278    }
279    tmp1 = tmp2 = 0;
280    for (j=0;j<M;j++)
281    {
282       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
283    }
284    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
285 }
spectral_mul_accum16(const spx_word16_t * X,const spx_word16_t * Y,spx_word16_t * acc,int N,int M)286 static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
287 {
288    int i,j;
289    spx_word32_t tmp1=0,tmp2=0;
290    for (j=0;j<M;j++)
291    {
292       tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
293    }
294    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
295    for (i=1;i<N-1;i+=2)
296    {
297       tmp1 = tmp2 = 0;
298       for (j=0;j<M;j++)
299       {
300          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
301          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
302       }
303       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
304       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
305    }
306    tmp1 = tmp2 = 0;
307    for (j=0;j<M;j++)
308    {
309       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
310    }
311    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
312 }
313 
314 #else
spectral_mul_accum(const spx_word16_t * X,const spx_word32_t * Y,spx_word16_t * acc,int N,int M)315 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
316 {
317    int i,j;
318    for (i=0;i<N;i++)
319       acc[i] = 0;
320    for (j=0;j<M;j++)
321    {
322       acc[0] += X[0]*Y[0];
323       for (i=1;i<N-1;i+=2)
324       {
325          acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
326          acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
327       }
328       acc[i] += X[i]*Y[i];
329       X += N;
330       Y += N;
331    }
332 }
333 #define spectral_mul_accum16 spectral_mul_accum
334 #endif
335 
336 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
weighted_spectral_mul_conj(const spx_float_t * w,const spx_float_t p,const spx_word16_t * X,const spx_word16_t * Y,spx_word32_t * prod,int N)337 static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
338 {
339    int i, j;
340    spx_float_t W;
341    W = FLOAT_AMULT(p, w[0]);
342    prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
343    for (i=1,j=1;i<N-1;i+=2,j++)
344    {
345       W = FLOAT_AMULT(p, w[j]);
346       prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
347       prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
348    }
349    W = FLOAT_AMULT(p, w[j]);
350    prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
351 }
352 
mdf_adjust_prop(const spx_word32_t * W,int N,int M,int P,spx_word16_t * prop)353 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
354 {
355    int i, j, p;
356    spx_word16_t max_sum = 1;
357    spx_word32_t prop_sum = 1;
358    for (i=0;i<M;i++)
359    {
360       spx_word32_t tmp = 1;
361       for (p=0;p<P;p++)
362          for (j=0;j<N;j++)
363             tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
364 #ifdef FIXED_POINT
365       /* Just a security in case an overflow were to occur */
366       tmp = MIN32(ABS32(tmp), 536870912);
367 #endif
368       prop[i] = spx_sqrt(tmp);
369       if (prop[i] > max_sum)
370          max_sum = prop[i];
371    }
372    for (i=0;i<M;i++)
373    {
374       prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
375       prop_sum += EXTEND32(prop[i]);
376    }
377    for (i=0;i<M;i++)
378    {
379       prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
380       /*printf ("%f ", prop[i]);*/
381    }
382    /*printf ("\n");*/
383 }
384 
385 #ifdef DUMP_ECHO_CANCEL_DATA
386 #include <stdio.h>
387 static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
388 
dump_audio(const spx_int16_t * rec,const spx_int16_t * play,const spx_int16_t * out,int len)389 static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
390 {
391    if (!(rFile && pFile && oFile))
392    {
393       speex_fatal("Dump files not open");
394    }
395    fwrite(rec, sizeof(spx_int16_t), len, rFile);
396    fwrite(play, sizeof(spx_int16_t), len, pFile);
397    fwrite(out, sizeof(spx_int16_t), len, oFile);
398 }
399 #endif
400 
401 /** Creates a new echo canceller state */
speex_echo_state_init(int frame_size,int filter_length)402 EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
403 {
404    return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
405 }
406 
speex_echo_state_init_mc(int frame_size,int filter_length,int nb_mic,int nb_speakers)407 EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
408 {
409    int i,N,M, C, K;
410    SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
411 
412    st->K = nb_speakers;
413    st->C = nb_mic;
414    C=st->C;
415    K=st->K;
416 #ifdef DUMP_ECHO_CANCEL_DATA
417    if (rFile || pFile || oFile)
418       speex_fatal("Opening dump files twice");
419    rFile = fopen("aec_rec.sw", "wb");
420    pFile = fopen("aec_play.sw", "wb");
421    oFile = fopen("aec_out.sw", "wb");
422 #endif
423 
424    st->frame_size = frame_size;
425    st->window_size = 2*frame_size;
426    N = st->window_size;
427    M = st->M = (filter_length+st->frame_size-1)/frame_size;
428    st->cancel_count=0;
429    st->sum_adapt = 0;
430    st->saturated = 0;
431    st->screwed_up = 0;
432    /* This is the default sampling rate */
433    st->sampling_rate = 8000;
434    st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
435 #ifdef FIXED_POINT
436    st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
437    st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
438 #else
439    st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
440    st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
441 #endif
442    st->leak_estimate = 0;
443 
444    st->fft_table = spx_fft_init(N);
445 
446    st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
447    st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
448    st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
449    st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
450    st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
451    st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
452    st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
453    st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
454    st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
455    st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
456 
457    st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
458    st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
459    st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
460    st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
461 #ifdef TWO_PATH
462    st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
463 #endif
464    st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
465    st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
466    st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
467    st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
468    st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
469    st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
470 #ifdef FIXED_POINT
471    st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
472    for (i=0;i<N>>1;i++)
473    {
474       st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
475       st->window[N-i-1] = st->window[i];
476    }
477 #else
478    for (i=0;i<N;i++)
479       st->window[i] = .5-.5*cos(2*M_PI*i/N);
480 #endif
481    for (i=0;i<=st->frame_size;i++)
482       st->power_1[i] = FLOAT_ONE;
483    for (i=0;i<N*M*K*C;i++)
484       st->W[i] = 0;
485    {
486       spx_word32_t sum = 0;
487       /* Ratio of ~10 between adaptation rate of first and last block */
488       spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
489       st->prop[0] = QCONST16(.7, 15);
490       sum = EXTEND32(st->prop[0]);
491       for (i=1;i<M;i++)
492       {
493          st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
494          sum = ADD32(sum, EXTEND32(st->prop[i]));
495       }
496       for (i=M-1;i>=0;i--)
497       {
498          st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
499       }
500    }
501 
502    st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
503    st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
504    st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
505    st->preemph = QCONST16(.9,15);
506    if (st->sampling_rate<12000)
507       st->notch_radius = QCONST16(.9, 15);
508    else if (st->sampling_rate<24000)
509       st->notch_radius = QCONST16(.982, 15);
510    else
511       st->notch_radius = QCONST16(.992, 15);
512 
513    st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
514    st->adapted = 0;
515    st->Pey = st->Pyy = FLOAT_ONE;
516 
517 #ifdef TWO_PATH
518    st->Davg1 = st->Davg2 = 0;
519    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
520 #endif
521 
522    st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
523    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
524    st->play_buf_started = 0;
525 
526    return st;
527 }
528 
529 /** Resets echo canceller state */
speex_echo_state_reset(SpeexEchoState * st)530 EXPORT void speex_echo_state_reset(SpeexEchoState *st)
531 {
532    int i, M, N, C, K;
533    st->cancel_count=0;
534    st->screwed_up = 0;
535    N = st->window_size;
536    M = st->M;
537    C=st->C;
538    K=st->K;
539    for (i=0;i<N*M;i++)
540       st->W[i] = 0;
541 #ifdef TWO_PATH
542    for (i=0;i<N*M;i++)
543       st->foreground[i] = 0;
544 #endif
545    for (i=0;i<N*(M+1);i++)
546       st->X[i] = 0;
547    for (i=0;i<=st->frame_size;i++)
548    {
549       st->power[i] = 0;
550       st->power_1[i] = FLOAT_ONE;
551       st->Eh[i] = 0;
552       st->Yh[i] = 0;
553    }
554    for (i=0;i<st->frame_size;i++)
555    {
556       st->last_y[i] = 0;
557    }
558    for (i=0;i<N*C;i++)
559    {
560       st->E[i] = 0;
561    }
562    for (i=0;i<N*K;i++)
563    {
564       st->x[i] = 0;
565    }
566    for (i=0;i<2*C;i++)
567       st->notch_mem[i] = 0;
568    for (i=0;i<C;i++)
569       st->memD[i]=st->memE[i]=0;
570    for (i=0;i<K;i++)
571       st->memX[i]=0;
572 
573    st->saturated = 0;
574    st->adapted = 0;
575    st->sum_adapt = 0;
576    st->Pey = st->Pyy = FLOAT_ONE;
577 #ifdef TWO_PATH
578    st->Davg1 = st->Davg2 = 0;
579    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
580 #endif
581    for (i=0;i<3*st->frame_size;i++)
582       st->play_buf[i] = 0;
583    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
584    st->play_buf_started = 0;
585 
586 }
587 
588 /** Destroys an echo canceller state */
speex_echo_state_destroy(SpeexEchoState * st)589 EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
590 {
591    spx_fft_destroy(st->fft_table);
592 
593    speex_free(st->e);
594    speex_free(st->x);
595    speex_free(st->input);
596    speex_free(st->y);
597    speex_free(st->last_y);
598    speex_free(st->Yf);
599    speex_free(st->Rf);
600    speex_free(st->Xf);
601    speex_free(st->Yh);
602    speex_free(st->Eh);
603 
604    speex_free(st->X);
605    speex_free(st->Y);
606    speex_free(st->E);
607    speex_free(st->W);
608 #ifdef TWO_PATH
609    speex_free(st->foreground);
610 #endif
611    speex_free(st->PHI);
612    speex_free(st->power);
613    speex_free(st->power_1);
614    speex_free(st->window);
615    speex_free(st->prop);
616    speex_free(st->wtmp);
617 #ifdef FIXED_POINT
618    speex_free(st->wtmp2);
619 #endif
620    speex_free(st->memX);
621    speex_free(st->memD);
622    speex_free(st->memE);
623    speex_free(st->notch_mem);
624 
625    speex_free(st->play_buf);
626    speex_free(st);
627 
628 #ifdef DUMP_ECHO_CANCEL_DATA
629    fclose(rFile);
630    fclose(pFile);
631    fclose(oFile);
632    rFile = pFile = oFile = NULL;
633 #endif
634 }
635 
speex_echo_capture(SpeexEchoState * st,const spx_int16_t * rec,spx_int16_t * out)636 EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
637 {
638    int i;
639    /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
640    st->play_buf_started = 1;
641    if (st->play_buf_pos>=st->frame_size)
642    {
643       speex_echo_cancellation(st, rec, st->play_buf, out);
644       st->play_buf_pos -= st->frame_size;
645       for (i=0;i<st->play_buf_pos;i++)
646          st->play_buf[i] = st->play_buf[i+st->frame_size];
647    } else {
648       speex_warning("No playback frame available (your application is buggy and/or got xruns)");
649       if (st->play_buf_pos!=0)
650       {
651          speex_warning("internal playback buffer corruption?");
652          st->play_buf_pos = 0;
653       }
654       for (i=0;i<st->frame_size;i++)
655          out[i] = rec[i];
656    }
657 }
658 
speex_echo_playback(SpeexEchoState * st,const spx_int16_t * play)659 EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
660 {
661    /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
662    if (!st->play_buf_started)
663    {
664       speex_warning("discarded first playback frame");
665       return;
666    }
667    if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
668    {
669       int i;
670       for (i=0;i<st->frame_size;i++)
671          st->play_buf[st->play_buf_pos+i] = play[i];
672       st->play_buf_pos += st->frame_size;
673       if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
674       {
675          speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
676          for (i=0;i<st->frame_size;i++)
677             st->play_buf[st->play_buf_pos+i] = play[i];
678          st->play_buf_pos += st->frame_size;
679       }
680    } else {
681       speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
682    }
683 }
684 
685 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
speex_echo_cancel(SpeexEchoState * st,const spx_int16_t * in,const spx_int16_t * far_end,spx_int16_t * out,spx_int32_t * Yout)686 EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
687 {
688    speex_echo_cancellation(st, in, far_end, out);
689 }
690 
691 /** Performs echo cancellation on a frame */
speex_echo_cancellation(SpeexEchoState * st,const spx_int16_t * in,const spx_int16_t * far_end,spx_int16_t * out)692 EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
693 {
694    int i,j, chan, speak;
695    int N,M, C, K;
696    spx_word32_t Syy,See,Sxx,Sdd, Sff;
697 #ifdef TWO_PATH
698    spx_word32_t Dbf;
699    int update_foreground;
700 #endif
701    spx_word32_t Sey;
702    spx_word16_t ss, ss_1;
703    spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
704    spx_float_t alpha, alpha_1;
705    spx_word16_t RER;
706    spx_word32_t tmp32;
707 
708    N = st->window_size;
709    M = st->M;
710    C = st->C;
711    K = st->K;
712 
713    st->cancel_count++;
714 #ifdef FIXED_POINT
715    ss=DIV32_16(11469,M);
716    ss_1 = SUB16(32767,ss);
717 #else
718    ss=.35/M;
719    ss_1 = 1-ss;
720 #endif
721 
722    for (chan = 0; chan < C; chan++)
723    {
724       /* Apply a notch filter to make sure DC doesn't end up causing problems */
725       filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
726       /* Copy input data to buffer and apply pre-emphasis */
727       /* Copy input data to buffer */
728       for (i=0;i<st->frame_size;i++)
729       {
730          spx_word32_t tmp32;
731          /* FIXME: This core has changed a bit, need to merge properly */
732          tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
733 #ifdef FIXED_POINT
734          if (tmp32 > 32767)
735          {
736             tmp32 = 32767;
737             if (st->saturated == 0)
738                st->saturated = 1;
739          }
740          if (tmp32 < -32767)
741          {
742             tmp32 = -32767;
743             if (st->saturated == 0)
744                st->saturated = 1;
745          }
746 #endif
747          st->memD[chan] = st->input[chan*st->frame_size+i];
748          st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
749       }
750    }
751 
752    for (speak = 0; speak < K; speak++)
753    {
754       for (i=0;i<st->frame_size;i++)
755       {
756          spx_word32_t tmp32;
757          st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
758          tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
759 #ifdef FIXED_POINT
760          /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
761          if (tmp32 > 32767)
762          {
763             tmp32 = 32767;
764             st->saturated = M+1;
765          }
766          if (tmp32 < -32767)
767          {
768             tmp32 = -32767;
769             st->saturated = M+1;
770          }
771 #endif
772          st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
773          st->memX[speak] = far_end[i*K+speak];
774       }
775    }
776 
777    for (speak = 0; speak < K; speak++)
778    {
779       /* Shift memory: this could be optimized eventually*/
780       for (j=M-1;j>=0;j--)
781       {
782          for (i=0;i<N;i++)
783             st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
784       }
785       /* Convert x (echo input) to frequency domain */
786       spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
787    }
788 
789    Sxx = 0;
790    for (speak = 0; speak < K; speak++)
791    {
792       Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
793       power_spectrum_accum(st->X+speak*N, st->Xf, N);
794    }
795 
796    Sff = 0;
797    for (chan = 0; chan < C; chan++)
798    {
799 #ifdef TWO_PATH
800       /* Compute foreground filter */
801       spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
802       spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
803       for (i=0;i<st->frame_size;i++)
804          st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
805       Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
806 #endif
807    }
808 
809    /* Adjust proportional adaption rate */
810    /* FIXME: Adjust that for C, K*/
811    if (st->adapted)
812       mdf_adjust_prop (st->W, N, M, C*K, st->prop);
813    /* Compute weight gradient */
814    if (st->saturated == 0)
815    {
816       for (chan = 0; chan < C; chan++)
817       {
818          for (speak = 0; speak < K; speak++)
819          {
820             for (j=M-1;j>=0;j--)
821             {
822                weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
823                for (i=0;i<N;i++)
824                   st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
825             }
826          }
827       }
828    } else {
829       st->saturated--;
830    }
831 
832    /* FIXME: MC conversion required */
833    /* Update weight to prevent circular convolution (MDF / AUMDF) */
834    for (chan = 0; chan < C; chan++)
835    {
836       for (speak = 0; speak < K; speak++)
837       {
838          for (j=0;j<M;j++)
839          {
840             /* This is a variant of the Alternatively Updated MDF (AUMDF) */
841             /* Remove the "if" to make this an MDF filter */
842             if (j==0 || st->cancel_count%(M-1) == j-1)
843             {
844 #ifdef FIXED_POINT
845                for (i=0;i<N;i++)
846                   st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
847                spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
848                for (i=0;i<st->frame_size;i++)
849                {
850                   st->wtmp[i]=0;
851                }
852                for (i=st->frame_size;i<N;i++)
853                {
854                   st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
855                }
856                spx_fft(st->fft_table, st->wtmp, st->wtmp2);
857                /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
858                for (i=0;i<N;i++)
859                   st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
860 #else
861                spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
862                for (i=st->frame_size;i<N;i++)
863                {
864                   st->wtmp[i]=0;
865                }
866                spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
867 #endif
868             }
869          }
870       }
871    }
872 
873    /* So we can use power_spectrum_accum */
874    for (i=0;i<=st->frame_size;i++)
875       st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
876 
877    Dbf = 0;
878    See = 0;
879 #ifdef TWO_PATH
880    /* Difference in response, this is used to estimate the variance of our residual power estimate */
881    for (chan = 0; chan < C; chan++)
882    {
883       spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
884       spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
885       for (i=0;i<st->frame_size;i++)
886          st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
887       Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
888       for (i=0;i<st->frame_size;i++)
889          st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
890       See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
891    }
892 #endif
893 
894 #ifndef TWO_PATH
895    Sff = See;
896 #endif
897 
898 #ifdef TWO_PATH
899    /* Logic for updating the foreground filter */
900 
901    /* For two time windows, compute the mean of the energy difference, as well as the variance */
902    st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
903    st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
904    st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
905    st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
906 
907    /* Equivalent float code:
908    st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
909    st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
910    st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
911    st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
912    */
913 
914    update_foreground = 0;
915    /* Check if we have a statistically significant reduction in the residual echo */
916    /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
917    if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
918       update_foreground = 1;
919    else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
920       update_foreground = 1;
921    else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
922       update_foreground = 1;
923 
924    /* Do we update? */
925    if (update_foreground)
926    {
927       st->Davg1 = st->Davg2 = 0;
928       st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
929       /* Copy background filter to foreground filter */
930       for (i=0;i<N*M*C*K;i++)
931          st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
932       /* Apply a smooth transition so as to not introduce blocking artifacts */
933       for (chan = 0; chan < C; chan++)
934          for (i=0;i<st->frame_size;i++)
935             st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
936    } else {
937       int reset_background=0;
938       /* Otherwise, check if the background filter is significantly worse */
939       if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
940          reset_background = 1;
941       if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
942          reset_background = 1;
943       if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
944          reset_background = 1;
945       if (reset_background)
946       {
947          /* Copy foreground filter to background filter */
948          for (i=0;i<N*M*C*K;i++)
949             st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
950          /* We also need to copy the output so as to get correct adaptation */
951          for (chan = 0; chan < C; chan++)
952          {
953             for (i=0;i<st->frame_size;i++)
954                st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
955             for (i=0;i<st->frame_size;i++)
956                st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
957          }
958          See = Sff;
959          st->Davg1 = st->Davg2 = 0;
960          st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
961       }
962    }
963 #endif
964 
965    Sey = Syy = Sdd = 0;
966    for (chan = 0; chan < C; chan++)
967    {
968       /* Compute error signal (for the output with de-emphasis) */
969       for (i=0;i<st->frame_size;i++)
970       {
971          spx_word32_t tmp_out;
972 #ifdef TWO_PATH
973          tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
974 #else
975          tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
976 #endif
977          tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
978       /* This is an arbitrary test for saturation in the microphone signal */
979          if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
980          {
981          if (st->saturated == 0)
982             st->saturated = 1;
983          }
984          out[i*C+chan] = WORD2INT(tmp_out);
985          st->memE[chan] = tmp_out;
986       }
987 
988 #ifdef DUMP_ECHO_CANCEL_DATA
989       dump_audio(in, far_end, out, st->frame_size);
990 #endif
991 
992       /* Compute error signal (filter update version) */
993       for (i=0;i<st->frame_size;i++)
994       {
995          st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
996          st->e[chan*N+i] = 0;
997       }
998 
999       /* Compute a bunch of correlations */
1000       /* FIXME: bad merge */
1001       Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
1002       Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
1003       Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
1004 
1005       /* Convert error to frequency domain */
1006       spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
1007       for (i=0;i<st->frame_size;i++)
1008          st->y[i+chan*N] = 0;
1009       spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
1010 
1011       /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
1012       power_spectrum_accum(st->E+chan*N, st->Rf, N);
1013       power_spectrum_accum(st->Y+chan*N, st->Yf, N);
1014 
1015    }
1016 
1017    /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
1018 
1019    /* Do some sanity check */
1020    if (!(Syy>=0 && Sxx>=0 && See >= 0)
1021 #ifndef FIXED_POINT
1022        || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
1023 #endif
1024       )
1025    {
1026       /* Things have gone really bad */
1027       st->screwed_up += 50;
1028       for (i=0;i<st->frame_size*C;i++)
1029          out[i] = 0;
1030    } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
1031    {
1032       /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
1033       st->screwed_up++;
1034    } else {
1035       /* Everything's fine */
1036       st->screwed_up=0;
1037    }
1038    if (st->screwed_up>=50)
1039    {
1040       speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
1041       speex_echo_state_reset(st);
1042       return;
1043    }
1044 
1045    /* Add a small noise floor to make sure not to have problems when dividing */
1046    See = MAX32(See, SHR32(MULT16_16(N, 100),6));
1047 
1048    for (speak = 0; speak < K; speak++)
1049    {
1050       Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
1051       power_spectrum_accum(st->X+speak*N, st->Xf, N);
1052    }
1053 
1054 
1055    /* Smooth far end energy estimate over time */
1056    for (j=0;j<=st->frame_size;j++)
1057       st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
1058 
1059    /* Compute filtered spectra and (cross-)correlations */
1060    for (j=st->frame_size;j>=0;j--)
1061    {
1062       spx_float_t Eh, Yh;
1063       Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
1064       Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
1065       Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
1066       Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
1067 #ifdef FIXED_POINT
1068       st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
1069       st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
1070 #else
1071       st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
1072       st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
1073 #endif
1074    }
1075 
1076    Pyy = FLOAT_SQRT(Pyy);
1077    Pey = FLOAT_DIVU(Pey,Pyy);
1078 
1079    /* Compute correlation updatete rate */
1080    tmp32 = MULT16_32_Q15(st->beta0,Syy);
1081    if (tmp32 > MULT16_32_Q15(st->beta_max,See))
1082       tmp32 = MULT16_32_Q15(st->beta_max,See);
1083    alpha = FLOAT_DIV32(tmp32, See);
1084    alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1085    /* Update correlations (recursive average) */
1086    st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
1087    st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
1088    if (FLOAT_LT(st->Pyy, FLOAT_ONE))
1089       st->Pyy = FLOAT_ONE;
1090    /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1091    if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
1092       st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
1093    if (FLOAT_GT(st->Pey, st->Pyy))
1094       st->Pey = st->Pyy;
1095    /* leak_estimate is the linear regression result */
1096    st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
1097    /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1098    if (st->leak_estimate > 16383)
1099       st->leak_estimate = 32767;
1100    else
1101       st->leak_estimate = SHL16(st->leak_estimate,1);
1102    /*printf ("%f\n", st->leak_estimate);*/
1103 
1104    /* Compute Residual to Error Ratio */
1105 #ifdef FIXED_POINT
1106    tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
1107    tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
1108    /* Check for y in e (lower bound on RER) */
1109    {
1110       spx_float_t bound = PSEUDOFLOAT(Sey);
1111       bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
1112       if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
1113          tmp32 = See;
1114       else if (tmp32 < FLOAT_EXTRACT32(bound))
1115          tmp32 = FLOAT_EXTRACT32(bound);
1116    }
1117    if (tmp32 > SHR32(See,1))
1118       tmp32 = SHR32(See,1);
1119    RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
1120 #else
1121    RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
1122    /* Check for y in e (lower bound on RER) */
1123    if (RER < Sey*Sey/(1+See*Syy))
1124       RER = Sey*Sey/(1+See*Syy);
1125    if (RER > .5)
1126       RER = .5;
1127 #endif
1128 
1129    /* We consider that the filter has had minimal adaptation if the following is true*/
1130    if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
1131    {
1132       st->adapted = 1;
1133    }
1134 
1135    if (st->adapted)
1136    {
1137       /* Normal learning rate calculation once we're past the minimal adaptation phase */
1138       for (i=0;i<=st->frame_size;i++)
1139       {
1140          spx_word32_t r, e;
1141          /* Compute frequency-domain adaptation mask */
1142          r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
1143          e = SHL32(st->Rf[i],3)+1;
1144 #ifdef FIXED_POINT
1145          if (r>SHR32(e,1))
1146             r = SHR32(e,1);
1147 #else
1148          if (r>.5*e)
1149             r = .5*e;
1150 #endif
1151          r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1152          /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1153          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
1154       }
1155    } else {
1156       /* Temporary adaption rate if filter is not yet adapted enough */
1157       spx_word16_t adapt_rate=0;
1158 
1159       if (Sxx > SHR32(MULT16_16(N, 1000),6))
1160       {
1161          tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1162 #ifdef FIXED_POINT
1163          if (tmp32 > SHR32(See,2))
1164             tmp32 = SHR32(See,2);
1165 #else
1166          if (tmp32 > .25*See)
1167             tmp32 = .25*See;
1168 #endif
1169          adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1170       }
1171       for (i=0;i<=st->frame_size;i++)
1172          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
1173 
1174 
1175       /* How much have we adapted so far? */
1176       st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1177    }
1178 
1179    /* FIXME: MC conversion required */
1180       for (i=0;i<st->frame_size;i++)
1181          st->last_y[i] = st->last_y[st->frame_size+i];
1182    if (st->adapted)
1183    {
1184       /* If the filter is adapted, take the filtered echo */
1185       for (i=0;i<st->frame_size;i++)
1186          st->last_y[st->frame_size+i] = in[i]-out[i];
1187    } else {
1188       /* If filter isn't adapted yet, all we can do is take the far end signal directly */
1189       /* moved earlier: for (i=0;i<N;i++)
1190       st->last_y[i] = st->x[i];*/
1191    }
1192 
1193 }
1194 
1195 /* Compute spectrum of estimated echo for use in an echo post-filter */
speex_echo_get_residual(SpeexEchoState * st,spx_word32_t * residual_echo,int len)1196 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
1197 {
1198    int i;
1199    spx_word16_t leak2;
1200    int N;
1201 
1202    N = st->window_size;
1203 
1204    /* Apply hanning window (should pre-compute it)*/
1205    for (i=0;i<N;i++)
1206       st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1207 
1208    /* Compute power spectrum of the echo */
1209    spx_fft(st->fft_table, st->y, st->Y);
1210    power_spectrum(st->Y, residual_echo, N);
1211 
1212 #ifdef FIXED_POINT
1213    if (st->leak_estimate > 16383)
1214       leak2 = 32767;
1215    else
1216       leak2 = SHL16(st->leak_estimate, 1);
1217 #else
1218    if (st->leak_estimate>.5)
1219       leak2 = 1;
1220    else
1221       leak2 = 2*st->leak_estimate;
1222 #endif
1223    /* Estimate residual echo */
1224    for (i=0;i<=st->frame_size;i++)
1225       residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1226 
1227 }
1228 
speex_echo_ctl(SpeexEchoState * st,int request,void * ptr)1229 EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1230 {
1231    switch(request)
1232    {
1233 
1234       case SPEEX_ECHO_GET_FRAME_SIZE:
1235          (*(int*)ptr) = st->frame_size;
1236          break;
1237       case SPEEX_ECHO_SET_SAMPLING_RATE:
1238          st->sampling_rate = (*(int*)ptr);
1239          st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
1240 #ifdef FIXED_POINT
1241          st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
1242          st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
1243 #else
1244          st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
1245          st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
1246 #endif
1247          if (st->sampling_rate<12000)
1248             st->notch_radius = QCONST16(.9, 15);
1249          else if (st->sampling_rate<24000)
1250             st->notch_radius = QCONST16(.982, 15);
1251          else
1252             st->notch_radius = QCONST16(.992, 15);
1253          break;
1254       case SPEEX_ECHO_GET_SAMPLING_RATE:
1255          (*(int*)ptr) = st->sampling_rate;
1256          break;
1257       case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
1258          /*FIXME: Implement this for multiple channels */
1259          *((spx_int32_t *)ptr) = st->M * st->frame_size;
1260          break;
1261       case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
1262       {
1263          int M = st->M, N = st->window_size, n = st->frame_size, i, j;
1264          spx_int32_t *filt = (spx_int32_t *) ptr;
1265          for(j=0;j<M;j++)
1266          {
1267             /*FIXME: Implement this for multiple channels */
1268 #ifdef FIXED_POINT
1269             for (i=0;i<N;i++)
1270                st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
1271             spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
1272 #else
1273             spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
1274 #endif
1275             for(i=0;i<n;i++)
1276                filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
1277          }
1278       }
1279          break;
1280       default:
1281          speex_warning_int("Unknown speex_echo_ctl request: ", request);
1282          return -1;
1283    }
1284    return 0;
1285 }
1286