• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3   *
4   *  Use of this source code is governed by a BSD-style license
5   *  that can be found in the LICENSE file in the root of the source
6   *  tree. An additional intellectual property rights grant can be found
7   *  in the file PATENTS.  All contributing project authors may
8   *  be found in the AUTHORS file in the root of the source tree.
9   */
10  
11  /*
12   * lattice.c
13   *
14   * Contains the normalized lattice filter routines (MA and AR) for iSAC codec
15   *
16   */
17  
18  #include "codec.h"
19  #include "settings.h"
20  
21  #define LATTICE_MUL_32_32_RSFT16(a32a, a32b, b32)                  \
22    ((int32_t)(WEBRTC_SPL_MUL(a32a, b32) + (WEBRTC_SPL_MUL_16_32_RSFT16(a32b, b32))))
23  /* This macro is FORBIDDEN to use elsewhere than in a function in this file and
24     its corresponding neon version. It might give unpredictable results, since a
25     general int32_t*int32_t multiplication results in a 64 bit value.
26     The result is then shifted just 16 steps to the right, giving need for 48
27     bits, i.e. in the generel case, it will NOT fit in a int32_t. In the
28     cases used in here, the int32_t will be enough, since (for a good
29     reason) the involved multiplicands aren't big enough to overflow a
30     int32_t after shifting right 16 bits. I have compared the result of a
31     multiplication between t32 and tmp32, done in two ways:
32     1) Using (int32_t) (((float)(tmp32))*((float)(tmp32b))/65536.0);
33     2) Using LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
34     By running 25 files, I haven't found any bigger diff than 64 - this was in the
35     case when  method 1) gave 650235648 and 2) gave 650235712.
36  */
37  
38  /* Function prototype: filtering ar_g_Q0[] and ar_f_Q0[] through an AR filter
39     with coefficients cth_Q15[] and sth_Q15[].
40     Implemented for both generic and ARMv7 platforms.
41   */
42  void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0,
43                                  int16_t* ar_f_Q0,
44                                  int16_t* cth_Q15,
45                                  int16_t* sth_Q15,
46                                  size_t order_coef);
47  
48  /* Inner loop used for function WebRtcIsacfix_NormLatticeFilterMa(). It does:
49     for 0 <= n < HALF_SUBFRAMELEN - 1:
50       *ptr2 = input2 * (*ptr2) + input0 * (*ptr0));
51       *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
52     Note, function WebRtcIsacfix_FilterMaLoopNeon and WebRtcIsacfix_FilterMaLoopC
53     are not bit-exact. The accuracy by the ARM Neon function is same or better.
54  */
WebRtcIsacfix_FilterMaLoopC(int16_t input0,int16_t input1,int32_t input2,int32_t * ptr0,int32_t * ptr1,int32_t * ptr2)55  void WebRtcIsacfix_FilterMaLoopC(int16_t input0,  // Filter coefficient
56                                   int16_t input1,  // Filter coefficient
57                                   int32_t input2,  // Inverse coeff. (1/input1)
58                                   int32_t* ptr0,   // Sample buffer
59                                   int32_t* ptr1,   // Sample buffer
60                                   int32_t* ptr2) { // Sample buffer
61    int n = 0;
62  
63    // Separate the 32-bit variable input2 into two 16-bit integers (high 16 and
64    // low 16 bits), for using LATTICE_MUL_32_32_RSFT16 in the loop.
65    int16_t t16a = (int16_t)(input2 >> 16);
66    int16_t t16b = (int16_t)input2;
67    if (t16b < 0) t16a++;
68  
69    // The loop filtering the samples *ptr0, *ptr1, *ptr2 with filter coefficients
70    // input0, input1, and input2.
71    for(n = 0; n < HALF_SUBFRAMELEN - 1; n++, ptr0++, ptr1++, ptr2++) {
72      int32_t tmp32a = 0;
73      int32_t tmp32b = 0;
74  
75      // Calculate *ptr2 = input2 * (*ptr2 + input0 * (*ptr0));
76      tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr0); // Q15 * Q15 >> 15 = Q15
77      tmp32b = *ptr2 + tmp32a; // Q15 + Q15 = Q15
78      *ptr2 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
79  
80      // Calculate *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
81      tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input1, *ptr0); // Q15*Q15>>15 = Q15
82      tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr2); // Q15*Q15>>15 = Q15
83      *ptr1 = tmp32a + tmp32b; // Q15 + Q15 = Q15
84    }
85  }
86  
87  /* filter the signal using normalized lattice filter */
88  /* MA filter */
WebRtcIsacfix_NormLatticeFilterMa(size_t orderCoef,int32_t * stateGQ15,int16_t * lat_inQ0,int16_t * filt_coefQ15,int32_t * gain_lo_hiQ17,int16_t lo_hi,int16_t * lat_outQ9)89  void WebRtcIsacfix_NormLatticeFilterMa(size_t orderCoef,
90                                         int32_t *stateGQ15,
91                                         int16_t *lat_inQ0,
92                                         int16_t *filt_coefQ15,
93                                         int32_t *gain_lo_hiQ17,
94                                         int16_t lo_hi,
95                                         int16_t *lat_outQ9)
96  {
97    int16_t sthQ15[MAX_AR_MODEL_ORDER];
98    int16_t cthQ15[MAX_AR_MODEL_ORDER];
99  
100    int u, n;
101    size_t i, k;
102    int16_t temp2,temp3;
103    size_t ord_1 = orderCoef+1;
104    int32_t inv_cthQ16[MAX_AR_MODEL_ORDER];
105  
106    int32_t gain32, fQtmp;
107    int16_t gain16;
108    int16_t gain_sh;
109  
110    int32_t tmp32, tmp32b;
111    int32_t fQ15vec[HALF_SUBFRAMELEN];
112    int32_t gQ15[MAX_AR_MODEL_ORDER+1][HALF_SUBFRAMELEN];
113    int16_t sh;
114    int16_t t16a;
115    int16_t t16b;
116  
117    for (u=0;u<SUBFRAMES;u++)
118    {
119      int32_t temp1 = u * HALF_SUBFRAMELEN;
120  
121      /* set the Direct Form coefficients */
122      temp2 = (int16_t)(u * orderCoef);
123      temp3 = (int16_t)(2 * u + lo_hi);
124  
125      /* compute lattice filter coefficients */
126      memcpy(sthQ15, &filt_coefQ15[temp2], orderCoef * sizeof(int16_t));
127  
128      WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
129  
130      /* compute the gain */
131      gain32 = gain_lo_hiQ17[temp3];
132      gain_sh = WebRtcSpl_NormW32(gain32);
133      gain32 <<= gain_sh;  // Q(17+gain_sh)
134  
135      for (k=0;k<orderCoef;k++)
136      {
137        gain32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], gain32); //Q15*Q(17+gain_sh)>>15 = Q(17+gain_sh)
138        inv_cthQ16[k] = WebRtcSpl_DivW32W16((int32_t)2147483647, cthQ15[k]); // 1/cth[k] in Q31/Q15 = Q16
139      }
140      gain16 = (int16_t)(gain32 >> 16);  // Q(1+gain_sh).
141  
142      /* normalized lattice filter */
143      /*****************************/
144  
145      /* initial conditions */
146      for (i=0;i<HALF_SUBFRAMELEN;i++)
147      {
148        fQ15vec[i] = lat_inQ0[i + temp1] << 15;  // Q15
149        gQ15[0][i] = lat_inQ0[i + temp1] << 15;  // Q15
150      }
151  
152  
153      fQtmp = fQ15vec[0];
154  
155      /* get the state of f&g for the first input, for all orders */
156      for (i=1;i<ord_1;i++)
157      {
158        // Calculate f[i][0] = inv_cth[i-1]*(f[i-1][0] + sth[i-1]*stateG[i-1]);
159        tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], stateGQ15[i-1]);//Q15*Q15>>15 = Q15
160        tmp32b= fQtmp + tmp32; //Q15+Q15=Q15
161        tmp32 = inv_cthQ16[i-1]; //Q16
162        t16a = (int16_t)(tmp32 >> 16);
163        t16b = (int16_t)(tmp32 - (t16a << 16));
164        if (t16b<0) t16a++;
165        tmp32 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
166        fQtmp = tmp32; // Q15
167  
168        // Calculate g[i][0] = cth[i-1]*stateG[i-1] + sth[i-1]* f[i][0];
169        tmp32  = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[i-1], stateGQ15[i-1]); //Q15*Q15>>15 = Q15
170        tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], fQtmp); //Q15*Q15>>15 = Q15
171        tmp32  = tmp32 + tmp32b;//Q15+Q15 = Q15
172        gQ15[i][0] = tmp32; // Q15
173      }
174  
175      /* filtering */
176      /* save the states */
177      for(k=0;k<orderCoef;k++)
178      {
179        // for 0 <= n < HALF_SUBFRAMELEN - 1:
180        //   f[k+1][n+1] = inv_cth[k]*(f[k][n+1] + sth[k]*g[k][n]);
181        //   g[k+1][n+1] = cth[k]*g[k][n] + sth[k]* f[k+1][n+1];
182        WebRtcIsacfix_FilterMaLoopFix(sthQ15[k], cthQ15[k], inv_cthQ16[k],
183                                      &gQ15[k][0], &gQ15[k+1][1], &fQ15vec[1]);
184      }
185  
186      fQ15vec[0] = fQtmp;
187  
188      for(n=0;n<HALF_SUBFRAMELEN;n++)
189      {
190        //gain32 >>= gain_sh; // Q(17+gain_sh) -> Q17
191        tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(gain16, fQ15vec[n]); //Q(1+gain_sh)*Q15>>16 = Q(gain_sh)
192        sh = 9-gain_sh; //number of needed shifts to reach Q9
193        t16a = (int16_t) WEBRTC_SPL_SHIFT_W32(tmp32, sh);
194        lat_outQ9[n + temp1] = t16a;
195      }
196  
197      /* save the states */
198      for (i=0;i<ord_1;i++)
199      {
200        stateGQ15[i] = gQ15[i][HALF_SUBFRAMELEN-1];
201      }
202      //process next frame
203    }
204  
205    return;
206  }
207  
208  
209  
210  
211  
212  /* ----------------AR filter-------------------------*/
213  /* filter the signal using normalized lattice filter */
WebRtcIsacfix_NormLatticeFilterAr(size_t orderCoef,int16_t * stateGQ0,int32_t * lat_inQ25,int16_t * filt_coefQ15,int32_t * gain_lo_hiQ17,int16_t lo_hi,int16_t * lat_outQ0)214  void WebRtcIsacfix_NormLatticeFilterAr(size_t orderCoef,
215                                         int16_t *stateGQ0,
216                                         int32_t *lat_inQ25,
217                                         int16_t *filt_coefQ15,
218                                         int32_t *gain_lo_hiQ17,
219                                         int16_t lo_hi,
220                                         int16_t *lat_outQ0)
221  {
222    size_t ii, k, i;
223    int n, u;
224    int16_t sthQ15[MAX_AR_MODEL_ORDER];
225    int16_t cthQ15[MAX_AR_MODEL_ORDER];
226    int32_t tmp32;
227  
228  
229    int16_t tmpAR;
230    int16_t ARfQ0vec[HALF_SUBFRAMELEN];
231    int16_t ARgQ0vec[MAX_AR_MODEL_ORDER+1];
232  
233    int32_t inv_gain32;
234    int16_t inv_gain16;
235    int16_t den16;
236    int16_t sh;
237  
238    int16_t temp2,temp3;
239    size_t ord_1 = orderCoef+1;
240  
241    for (u=0;u<SUBFRAMES;u++)
242    {
243      int32_t temp1 = u * HALF_SUBFRAMELEN;
244  
245      //set the denominator and numerator of the Direct Form
246      temp2 = (int16_t)(u * orderCoef);
247      temp3 = (int16_t)(2 * u + lo_hi);
248  
249      for (ii=0; ii<orderCoef; ii++) {
250        sthQ15[ii] = filt_coefQ15[temp2+ii];
251      }
252  
253      WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
254  
255      /* Simulation of the 25 files shows that maximum value in
256         the vector gain_lo_hiQ17[] is 441344, which means that
257         it is log2((2^31)/441344) = 12.2 shifting bits from
258         saturation. Therefore, it should be safe to use Q27 instead
259         of Q17. */
260  
261      tmp32 = gain_lo_hiQ17[temp3] << 10;  // Q27
262  
263      for (k=0;k<orderCoef;k++) {
264        tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], tmp32); // Q15*Q27>>15 = Q27
265      }
266  
267      sh = WebRtcSpl_NormW32(tmp32); // tmp32 is the gain
268      den16 = (int16_t) WEBRTC_SPL_SHIFT_W32(tmp32, sh-16); //Q(27+sh-16) = Q(sh+11) (all 16 bits are value bits)
269      inv_gain32 = WebRtcSpl_DivW32W16((int32_t)2147483647, den16); // 1/gain in Q31/Q(sh+11) = Q(20-sh)
270  
271      //initial conditions
272      inv_gain16 = (int16_t)(inv_gain32 >> 2);  // 1/gain in Q(20-sh-2) = Q(18-sh)
273  
274      for (i=0;i<HALF_SUBFRAMELEN;i++)
275      {
276  
277        tmp32 = lat_inQ25[i + temp1] << 1;  // Q25->Q26
278        tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(inv_gain16, tmp32); //lat_in[]*inv_gain in (Q(18-sh)*Q26)>>16 = Q(28-sh)
279        tmp32 = WEBRTC_SPL_SHIFT_W32(tmp32, -(28-sh)); // lat_in[]*inv_gain in Q0
280  
281        ARfQ0vec[i] = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
282      }
283  
284      // Get the state of f & g for the first input, for all orders.
285      for (i = orderCoef; i > 0; i--)
286      {
287        tmp32 = (cthQ15[i - 1] * ARfQ0vec[0] - sthQ15[i - 1] * stateGQ0[i - 1] +
288                 16384) >> 15;
289        tmpAR = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
290  
291        tmp32 = (sthQ15[i - 1] * ARfQ0vec[0] + cthQ15[i - 1] * stateGQ0[i - 1] +
292                 16384) >> 15;
293        ARgQ0vec[i] = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
294        ARfQ0vec[0] = tmpAR;
295      }
296      ARgQ0vec[0] = ARfQ0vec[0];
297  
298      // Filter ARgQ0vec[] and ARfQ0vec[] through coefficients cthQ15[] and sthQ15[].
299      WebRtcIsacfix_FilterArLoop(ARgQ0vec, ARfQ0vec, cthQ15, sthQ15, orderCoef);
300  
301      for(n=0;n<HALF_SUBFRAMELEN;n++)
302      {
303        lat_outQ0[n + temp1] = ARfQ0vec[n];
304      }
305  
306  
307      /* cannot use memcpy in the following */
308  
309      for (i=0;i<ord_1;i++)
310      {
311        stateGQ0[i] = ARgQ0vec[i];
312      }
313    }
314  
315    return;
316  }
317