1 /* Copyright (c) 2009-2010 Xiph.Org Foundation
2    Written by Jean-Marc Valin */
3 /*
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7 
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10 
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 
32 #include "celt_lpc.h"
33 #include "stack_alloc.h"
34 #include "mathops.h"
35 #include "pitch.h"
36 
_celt_lpc(opus_val16 * _lpc,const opus_val32 * ac,int p)37 void _celt_lpc(
38       opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
39 const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
40 int          p
41 )
42 {
43    int i, j;
44    opus_val32 r;
45    opus_val32 error = ac[0];
46 #ifdef FIXED_POINT
47    opus_val32 lpc[LPC_ORDER];
48 #else
49    float *lpc = _lpc;
50 #endif
51 
52    for (i = 0; i < p; i++)
53       lpc[i] = 0;
54    if (ac[0] != 0)
55    {
56       for (i = 0; i < p; i++) {
57          /* Sum up this iteration's reflection coefficient */
58          opus_val32 rr = 0;
59          for (j = 0; j < i; j++)
60             rr += MULT32_32_Q31(lpc[j],ac[i - j]);
61          rr += SHR32(ac[i + 1],3);
62          r = -frac_div32(SHL32(rr,3), error);
63          /*  Update LPC coefficients and total error */
64          lpc[i] = SHR32(r,3);
65          for (j = 0; j < (i+1)>>1; j++)
66          {
67             opus_val32 tmp1, tmp2;
68             tmp1 = lpc[j];
69             tmp2 = lpc[i-1-j];
70             lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
71             lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
72          }
73 
74          error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
75          /* Bail out once we get 30 dB gain */
76 #ifdef FIXED_POINT
77          if (error<SHR32(ac[0],10))
78             break;
79 #else
80          if (error<.001f*ac[0])
81             break;
82 #endif
83       }
84    }
85 #ifdef FIXED_POINT
86    for (i=0;i<p;i++)
87       _lpc[i] = ROUND16(lpc[i],16);
88 #endif
89 }
90 
celt_fir(const opus_val16 * _x,const opus_val16 * num,opus_val16 * _y,int N,int ord,opus_val16 * mem)91 void celt_fir(const opus_val16 *_x,
92          const opus_val16 *num,
93          opus_val16 *_y,
94          int N,
95          int ord,
96          opus_val16 *mem)
97 {
98    int i,j;
99    VARDECL(opus_val16, rnum);
100    VARDECL(opus_val16, x);
101    SAVE_STACK;
102 
103    ALLOC(rnum, ord, opus_val16);
104    ALLOC(x, N+ord, opus_val16);
105    for(i=0;i<ord;i++)
106       rnum[i] = num[ord-i-1];
107    for(i=0;i<ord;i++)
108       x[i] = mem[ord-i-1];
109    for (i=0;i<N;i++)
110       x[i+ord]=_x[i];
111    for(i=0;i<ord;i++)
112       mem[i] = _x[N-i-1];
113 #ifdef SMALL_FOOTPRINT
114    for (i=0;i<N;i++)
115    {
116       opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
117       for (j=0;j<ord;j++)
118       {
119          sum = MAC16_16(sum,rnum[j],x[i+j]);
120       }
121       _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
122    }
123 #else
124    for (i=0;i<N-3;i+=4)
125    {
126       opus_val32 sum[4]={0,0,0,0};
127       xcorr_kernel(rnum, x+i, sum, ord);
128       _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
129       _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
130       _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
131       _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
132    }
133    for (;i<N;i++)
134    {
135       opus_val32 sum = 0;
136       for (j=0;j<ord;j++)
137          sum = MAC16_16(sum,rnum[j],x[i+j]);
138       _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
139    }
140 #endif
141    RESTORE_STACK;
142 }
143 
celt_iir(const opus_val32 * _x,const opus_val16 * den,opus_val32 * _y,int N,int ord,opus_val16 * mem)144 void celt_iir(const opus_val32 *_x,
145          const opus_val16 *den,
146          opus_val32 *_y,
147          int N,
148          int ord,
149          opus_val16 *mem)
150 {
151 #ifdef SMALL_FOOTPRINT
152    int i,j;
153    for (i=0;i<N;i++)
154    {
155       opus_val32 sum = _x[i];
156       for (j=0;j<ord;j++)
157       {
158          sum -= MULT16_16(den[j],mem[j]);
159       }
160       for (j=ord-1;j>=1;j--)
161       {
162          mem[j]=mem[j-1];
163       }
164       mem[0] = ROUND16(sum,SIG_SHIFT);
165       _y[i] = sum;
166    }
167 #else
168    int i,j;
169    VARDECL(opus_val16, rden);
170    VARDECL(opus_val16, y);
171    SAVE_STACK;
172 
173    celt_assert((ord&3)==0);
174    ALLOC(rden, ord, opus_val16);
175    ALLOC(y, N+ord, opus_val16);
176    for(i=0;i<ord;i++)
177       rden[i] = den[ord-i-1];
178    for(i=0;i<ord;i++)
179       y[i] = -mem[ord-i-1];
180    for(;i<N+ord;i++)
181       y[i]=0;
182    for (i=0;i<N-3;i+=4)
183    {
184       /* Unroll by 4 as if it were an FIR filter */
185       opus_val32 sum[4];
186       sum[0]=_x[i];
187       sum[1]=_x[i+1];
188       sum[2]=_x[i+2];
189       sum[3]=_x[i+3];
190       xcorr_kernel(rden, y+i, sum, ord);
191 
192       /* Patch up the result to compensate for the fact that this is an IIR */
193       y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
194       _y[i  ] = sum[0];
195       sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
196       y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
197       _y[i+1] = sum[1];
198       sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
199       sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
200       y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
201       _y[i+2] = sum[2];
202 
203       sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
204       sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
205       sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
206       y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
207       _y[i+3] = sum[3];
208    }
209    for (;i<N;i++)
210    {
211       opus_val32 sum = _x[i];
212       for (j=0;j<ord;j++)
213          sum -= MULT16_16(rden[j],y[i+j]);
214       y[i+ord] = ROUND16(sum,SIG_SHIFT);
215       _y[i] = sum;
216    }
217    for(i=0;i<ord;i++)
218       mem[i] = _y[N-i-1];
219    RESTORE_STACK;
220 #endif
221 }
222 
_celt_autocorr(const opus_val16 * x,opus_val32 * ac,const opus_val16 * window,int overlap,int lag,int n,int arch)223 int _celt_autocorr(
224                    const opus_val16 *x,   /*  in: [0...n-1] samples x   */
225                    opus_val32       *ac,  /* out: [0...lag-1] ac values */
226                    const opus_val16       *window,
227                    int          overlap,
228                    int          lag,
229                    int          n,
230                    int          arch
231                   )
232 {
233    opus_val32 d;
234    int i, k;
235    int fastN=n-lag;
236    int shift;
237    const opus_val16 *xptr;
238    VARDECL(opus_val16, xx);
239    SAVE_STACK;
240    ALLOC(xx, n, opus_val16);
241    celt_assert(n>0);
242    celt_assert(overlap>=0);
243    if (overlap == 0)
244    {
245       xptr = x;
246    } else {
247       for (i=0;i<n;i++)
248          xx[i] = x[i];
249       for (i=0;i<overlap;i++)
250       {
251          xx[i] = MULT16_16_Q15(x[i],window[i]);
252          xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
253       }
254       xptr = xx;
255    }
256    shift=0;
257 #ifdef FIXED_POINT
258    {
259       opus_val32 ac0;
260       ac0 = 1+(n<<7);
261       if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
262       for(i=(n&1);i<n;i+=2)
263       {
264          ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
265          ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
266       }
267 
268       shift = celt_ilog2(ac0)-30+10;
269       shift = (shift)/2;
270       if (shift>0)
271       {
272          for(i=0;i<n;i++)
273             xx[i] = PSHR32(xptr[i], shift);
274          xptr = xx;
275       } else
276          shift = 0;
277    }
278 #endif
279    celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
280    for (k=0;k<=lag;k++)
281    {
282       for (i = k+fastN, d = 0; i < n; i++)
283          d = MAC16_16(d, xptr[i], xptr[i-k]);
284       ac[k] += d;
285    }
286 #ifdef FIXED_POINT
287    shift = 2*shift;
288    if (shift<=0)
289       ac[0] += SHL32((opus_int32)1, -shift);
290    if (ac[0] < 268435456)
291    {
292       int shift2 = 29 - EC_ILOG(ac[0]);
293       for (i=0;i<=lag;i++)
294          ac[i] = SHL32(ac[i], shift2);
295       shift -= shift2;
296    } else if (ac[0] >= 536870912)
297    {
298       int shift2=1;
299       if (ac[0] >= 1073741824)
300          shift2++;
301       for (i=0;i<=lag;i++)
302          ac[i] = SHR32(ac[i], shift2);
303       shift += shift2;
304    }
305 #endif
306 
307    RESTORE_STACK;
308    return shift;
309 }
310