• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /* Copyright (C) 2005 Analog Devices */
2  /**
3     @file filters_bfin.h
4     @brief Various analysis/synthesis filters (Blackfin version)
5  */
6  /*
7     Redistribution and use in source and binary forms, with or without
8     modification, are permitted provided that the following conditions
9     are met:
10  
11     - Redistributions of source code must retain the above copyright
12     notice, this list of conditions and the following disclaimer.
13  
14     - Redistributions in binary form must reproduce the above copyright
15     notice, this list of conditions and the following disclaimer in the
16     documentation and/or other materials provided with the distribution.
17  
18     - Neither the name of the Xiph.org Foundation nor the names of its
19     contributors may be used to endorse or promote products derived from
20     this software without specific prior written permission.
21  
22     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23     ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25     A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
26     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27     EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28     PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29     PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30     LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31     NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34  
35  #define OVERRIDE_NORMALIZE16
normalize16(const spx_sig_t * x,spx_word16_t * y,spx_sig_t max_scale,int len)36  int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
37  {
38     spx_sig_t max_val=1;
39     int sig_shift;
40     __asm__
41     (
42     "%0 = 0;\n\t"
43     "I0 = %1;\n\t"
44     "L0 = 0;\n\t"
45     "R1 = [I0++];\n\t"
46     "LOOP norm_max%= LC0 = %2;\n\t"
47     "LOOP_BEGIN norm_max%=;\n\t"
48        "R2 = ABS R1 || R1 = [I0++];\n\t"
49        "%0 = MAX(%0, R2);\n\t"
50     "LOOP_END norm_max%=;\n\t"
51     : "=&d" (max_val)
52     : "a" (x), "a" (len)
53     : "R1", "R2"
54     );
55  
56     sig_shift=0;
57     while (max_val>max_scale)
58     {
59        sig_shift++;
60        max_val >>= 1;
61     }
62  
63     __asm__ __volatile__
64     (
65     "I0 = %0;\n\t"
66     "L0 = 0;\n\t"
67     "P1 = %1;\n\t"
68     "R0 = [I0++];\n\t"
69     "LOOP norm_shift%= LC0 = %3;\n\t"
70     "LOOP_BEGIN norm_shift%=;\n\t"
71        "R1 = ASHIFT R0 by %2.L || R0 = [I0++];\n\t"
72        "W[P1++] = R1;\n\t"
73     "LOOP_END norm_shift%=;\n\t"
74     "R1 = ASHIFT R0 by %2.L;\n\t"
75     "W[P1++] = R1;\n\t"
76     : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len-1)
77     : "I0", "L0", "P1", "R0", "R1", "memory"
78     );
79     return sig_shift;
80  }
81  
82  
83  
84  #define OVERRIDE_FILTER_MEM16
filter_mem16(const spx_word16_t * _x,const spx_coef_t * num,const spx_coef_t * den,spx_word16_t * _y,int N,int ord,spx_mem_t * mem,char * stack)85  void filter_mem16(const spx_word16_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack)
86  {
87     VARDECL(spx_word32_t *xy2);
88     VARDECL(spx_word32_t *numden_a);
89     spx_word32_t *xy;
90     spx_word16_t *numden;
91     int i;
92  
93     ALLOC(xy2, (N+1), spx_word32_t);
94     ALLOC(numden_a, (2*ord+2), spx_word32_t);
95     xy = xy2+1;
96     numden = (spx_word16_t*) numden_a;
97  
98     for (i=0;i<ord;i++)
99     {
100        numden[2*i] = num[i];
101        numden[2*i+1] = den[i];
102     }
103     __asm__ __volatile__
104     (
105     /* Register setup */
106     "R0 = %5;\n\t"      /*ord */
107  
108     "P0 = %3;\n\t"
109     "I0 = P0;\n\t"
110     "B0 = P0;\n\t" /* numden */
111     "L0 = 0;\n\t"
112  
113     "P2 = %0;\n\t" /* Fused xy */
114     "I2 = P2;\n\t"
115     "L2 = 0;\n\t"
116  
117     "P4 = %6;\n\t" /* mem */
118     "P0 = %1;\n\t" /* _x */
119     "P1 = %2;\n\t" /* _y */
120  
121     /* First sample */
122     "R1 = [P4++];\n\t"
123     "R1 <<= 3;\n\t" /* shift mem */
124     "R1.L = R1 (RND);\n\t"
125     "R2 = W[P0++];\n\t" /* load x[0] */
126     "R1.L = R1.L + R2.L;\n\t"
127     "W[P1++] = R1;\n\t" /* store y[0] */
128     "R2 = PACK(R1.L, R2.L);\n\t" /* pack x16 and y16 */
129     "[P2] = R2;\n\t"
130  
131     /* Samples 1 to ord-1 (using memory) */
132     "R0 += -1;\n\t"
133     "R3 = 0;\n\t"
134     "LC0 = R0;\n\t"
135     "LOOP filter_start%= LC0;\n\t"
136     "LOOP_BEGIN filter_start%=;\n\t"
137        "R3 += 1;\n\t"
138        "LC1 = R3;\n\t"
139  
140        "R1 = [P4++];\n\t"
141        "A1 = R1;\n\t"
142        "A0 = 0;\n\t"
143        "I0 = B0;\n\t"
144        "I2 = P2;\n\t"
145        "P2 += 4;\n\t"
146        "R4 = [I0++] || R5 = [I2--];\n\t"
147        "LOOP filter_start_inner%= LC1;\n\t"
148        "LOOP_BEGIN filter_start_inner%=;\n\t"
149           "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
150        "LOOP_END filter_start_inner%=;\n\t"
151        "A0 += A1;\n\t"
152        "R4 = A0;\n\t"
153        "R4 <<= 3;\n\t" /* shift mem */
154        "R4.L = R4 (RND);\n\t"
155        "R2 = W[P0++];\n\t" /* load x */
156        "R4.L = R4.L + R2.L;\n\t"
157        "W[P1++] = R4;\n\t" /* store y */
158        //"R4 <<= 2;\n\t"
159        //"R2 <<= 2;\n\t"
160        "R2 = PACK(R4.L, R2.L);\n\t" /* pack x16 and y16 */
161        "[P2] = R2;\n\t"
162  
163     "LOOP_END filter_start%=;\n\t"
164  
165     /* Samples ord to N*/
166     "R0 = %5;\n\t"
167     "R0 <<= 1;\n\t"
168     "I0 = B0;\n\t" /* numden */
169     "R0 <<= 1;\n\t"
170     "L0 = R0;\n\t"
171  
172     "R0 = %5;\n\t" /* org */
173     "R2 = %4;\n\t" /* N */
174     "R2 = R2 - R0;\n\t"
175     "R4 = [I0++];\n\t" /* numden */
176     "LC0 = R2;\n\t"
177     "P3 = R0;\n\t"
178     "R0 <<= 2;\n\t"
179     "R0 += 8;\n\t"
180     "I2 = P2;\n\t"
181     "M0 = R0;\n\t"
182     "A1 = A0 = 0;\n\t"
183     "R5 = [I2--];\n\t" /* load xy */
184     "LOOP filter_mid%= LC0;\n\t"
185     "LOOP_BEGIN filter_mid%=;\n\t"
186        "LOOP filter_mid_inner%= LC1=P3;\n\t"
187        "LOOP_BEGIN filter_mid_inner%=;\n\t"
188           "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
189        "LOOP_END filter_mid_inner%=;\n\t"
190        "R0 = (A0 += A1) || I2 += M0;\n\t"
191        "R0 = R0 << 3 || R5 = W[P0++];\n\t" /* load x */
192        "R0.L = R0 (RND);\n\t"
193        "R0.L = R0.L + R5.L;\n\t"
194        "R5 = PACK(R0.L, R5.L) || W[P1++] = R0;\n\t" /* shift y | store y */
195        "A1 = A0 = 0 || [I2--] = R5\n\t"
196        "LOOP_END filter_mid%=;\n\t"
197     "I2 += 4;\n\t"
198     "P2 = I2;\n\t"
199     /* Update memory */
200     "P4 = %6;\n\t"
201     "R0 = %5;\n\t"
202     "LC0 = R0;\n\t"
203     "P0 = B0;\n\t"
204     "A1 = A0 = 0;\n\t"
205     "LOOP mem_update%= LC0;\n\t"
206     "LOOP_BEGIN mem_update%=;\n\t"
207        "I2 = P2;\n\t"
208        "I0 = P0;\n\t"
209        "P0 += 4;\n\t"
210        "R0 = LC0;\n\t"
211        "LC1 = R0;\n\t"
212        "R5 = [I2--] || R4 = [I0++];\n\t"
213        "LOOP mem_accum%= LC1;\n\t"
214        "LOOP_BEGIN mem_accum%=;\n\t"
215           "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
216        "LOOP_END mem_accum%=;\n\t"
217        "R0 = (A0 += A1);\n\t"
218        "A1 = A0 = 0 || [P4++] = R0;\n\t"
219     "LOOP_END mem_update%=;\n\t"
220     "L0 = 0;\n\t"
221     : : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem)
222     : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory"
223     );
224  
225  }
226  
227  
228  
229  #define OVERRIDE_IIR_MEM16
iir_mem16(const spx_word16_t * _x,const spx_coef_t * den,spx_word16_t * _y,int N,int ord,spx_mem_t * mem,char * stack)230  void iir_mem16(const spx_word16_t *_x, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack)
231  {
232     VARDECL(spx_word16_t *y);
233     spx_word16_t *yy;
234  
235     ALLOC(y, (N+2), spx_word16_t);
236     yy = y+2;
237  
238     __asm__ __volatile__
239     (
240     /* Register setup */
241     "R0 = %5;\n\t"      /*ord */
242  
243     "P1 = %3;\n\t"
244     "I1 = P1;\n\t"
245     "B1 = P1;\n\t"
246     "L1 = 0;\n\t"
247  
248     "P3 = %0;\n\t"
249     "I3 = P3;\n\t"
250     "L3 = 0;\n\t"
251  
252     "P4 = %6;\n\t"
253     "P0 = %1;\n\t"
254     "P1 = %2;\n\t"
255  
256     /* First sample */
257     "R1 = [P4++];\n\t"
258     "R1 = R1 << 3 (S);\n\t"
259     "R1.L = R1 (RND);\n\t"
260     "R2 = W[P0++];\n\t"
261     "R1 = R1 + R2;\n\t"
262     "W[P1++] = R1;\n\t"
263     "W[P3] = R1;\n\t"
264  
265     /* Samples 1 to ord-1 (using memory) */
266     "R0 += -1;\n\t"
267     "R3 = 0;\n\t"
268     "LC0 = R0;\n\t"
269     "LOOP filter_start%= LC0;\n\t"
270     "LOOP_BEGIN filter_start%=;\n\t"
271        "R3 += 1;\n\t"
272        "LC1 = R3;\n\t"
273  
274        "R1 = [P4++];\n\t"
275        "A1 = R1;\n\t"
276        "I1 = B1;\n\t"
277        "I3 = P3;\n\t"
278        "P3 += 2;\n\t"
279        "LOOP filter_start_inner%= LC1;\n\t"
280        "LOOP_BEGIN filter_start_inner%=;\n\t"
281           "R4.L = W[I1++];\n\t"
282           "R5.L = W[I3--];\n\t"
283           "A1 -= R4.L*R5.L (IS);\n\t"
284        "LOOP_END filter_start_inner%=;\n\t"
285  
286        "R1 = A1;\n\t"
287        "R1 <<= 3;\n\t"
288        "R1.L = R1 (RND);\n\t"
289        "R2 = W[P0++];\n\t"
290        "R1 = R1 + R2;\n\t"
291        "W[P1++] = R1;\n\t"
292        "W[P3] = R1;\n\t"
293     "LOOP_END filter_start%=;\n\t"
294  
295     /* Samples ord to N*/
296     "R0 = %5;\n\t"
297     "R0 <<= 1;\n\t"
298     "I1 = B1;\n\t"
299     "L1 = R0;\n\t"
300  
301     "R0 = %5;\n\t"
302     "R2 = %4;\n\t"
303     "R2 = R2 - R0;\n\t"
304     "R4.L = W[I1++];\n\t"
305     "LC0 = R2;\n\t"
306     "LOOP filter_mid%= LC0;\n\t"
307     "LOOP_BEGIN filter_mid%=;\n\t"
308        "LC1 = R0;\n\t"
309        "A1 = 0;\n\t"
310        "I3 = P3;\n\t"
311        "P3 += 2;\n\t"
312        "R5.L = W[I3--];\n\t"
313        "LOOP filter_mid_inner%= LC1;\n\t"
314        "LOOP_BEGIN filter_mid_inner%=;\n\t"
315           "A1 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t"
316        "LOOP_END filter_mid_inner%=;\n\t"
317        "R1 = A1;\n\t"
318        "R1 = R1 << 3 || R2 = W[P0++];\n\t"
319        "R1.L = R1 (RND);\n\t"
320        "R1 = R1 + R2;\n\t"
321        "W[P1++] = R1;\n\t"
322        "W[P3] = R1;\n\t"
323     "LOOP_END filter_mid%=;\n\t"
324  
325     /* Update memory */
326     "P4 = %6;\n\t"
327     "R0 = %5;\n\t"
328     "LC0 = R0;\n\t"
329     "P1 = B1;\n\t"
330     "LOOP mem_update%= LC0;\n\t"
331     "LOOP_BEGIN mem_update%=;\n\t"
332        "A0 = 0;\n\t"
333        "I3 = P3;\n\t"
334        "I1 = P1;\n\t"
335        "P1 += 2;\n\t"
336        "R0 = LC0;\n\t"
337        "LC1=R0;\n\t"
338        "R5.L = W[I3--] || R4.L = W[I1++];\n\t"
339        "LOOP mem_accum%= LC1;\n\t"
340        "LOOP_BEGIN mem_accum%=;\n\t"
341           "A0 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t"
342        "LOOP_END mem_accum%=;\n\t"
343        "R0 = A0;\n\t"
344        "[P4++] = R0;\n\t"
345     "LOOP_END mem_update%=;\n\t"
346     "L1 = 0;\n\t"
347     : : "m" (yy), "m" (_x), "m" (_y), "m" (den), "m" (N), "m" (ord), "m" (mem)
348     : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory"
349     );
350  
351  }
352  
353  
354  #define OVERRIDE_FIR_MEM16
fir_mem16(const spx_word16_t * x,const spx_coef_t * num,spx_word16_t * y,int N,int ord,spx_mem_t * mem,char * stack)355  void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
356  {
357     int i;
358     spx_coef_t den2[12];
359     spx_coef_t *den;
360     den = (spx_coef_t*)((((int)den2)+4)&0xfffffffc);
361     for (i=0;i<10;i++)
362        den[i] = 0;
363     filter_mem16(x, num, den, y, N, ord, mem, stack);
364  }
365  
366  
367  #define OVERRIDE_COMPUTE_IMPULSE_RESPONSE
compute_impulse_response(const spx_coef_t * ak,const spx_coef_t * awk1,const spx_coef_t * awk2,spx_word16_t * y,int N,int ord,char * stack)368  void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
369  {
370     int i;
371     VARDECL(spx_word16_t *ytmp);
372     ALLOC(ytmp, N, spx_word16_t);
373     spx_word16_t *ytmp2 = ytmp;
374     y[0] = LPC_SCALING;
375     for (i=0;i<ord;i++)
376        y[i+1] = awk1[i];
377     i++;
378     for (;i<N;i++)
379        y[i] = 0;
380  
381     N-=1;
382     __asm__ __volatile__
383     (
384           "I0 = %0;\n\t"
385           "I1 = %1;\n\t"
386           "L0 = 0;\n\t"
387           "L1 = 0;\n\t"
388           "L2 = 0;\n\t"
389           "L3 = 0;\n\t"
390           "R0 = 1;\n\t"
391           "R0 <<= 13;\n\t"
392           "W[I0] = R0.L;\n\t"
393           "R0 <<= 1;\n\t"
394           "W[I1] = R0.L;\n\t"
395           "R0 = %5;\n\t"
396           "LC0 = R0;\n\t"
397           "R2 = 0;\n\t"
398           "LOOP samples%= LC0;\n\t"
399           "LOOP_BEGIN samples%=;\n\t"
400              "R2 += 1;\n\t"
401              "R2 = MIN(R2, %4);\n\t"
402              "I0 = %0;\n\t"
403              "I1 = %1;\n\t"
404              "I2 = %2;\n\t"
405              "I3 = %3;\n\t"
406              "%0 += 2;\n\t"
407              "%1 += 2;\n\t"
408              "A1 = A0 = 0;\n\t"
409              "R0.L = W[I0--] || R1.L = W[I2++];\n\t"
410              "LC1 = R2;\n\t"
411              "LOOP filter%= LC1;\n\t"
412              "LOOP_BEGIN filter%=;\n\t"
413                 "A0 -= R0.L*R1.L (IS) || R0.L = W[I1--] || R1.L = W[I3++];\n\t"
414                 "A1 -= R0.L*R1.L (IS) || R0.L = W[I0--] || R1.L = W[I2++];\n\t"
415              "LOOP_END filter%=;\n\t"
416              "R0 = A0, R1 = A1;\n\t"
417              "R3 = W[%1] (X);\n\t"
418              "R3 <<= 13;\n\t"
419              "R0 = R0 + R3;\n\t"
420              "R3 = R0 >>> 13;\n\t"
421              "W[%0] = R3.L;\n\t"
422              "R0 <<= 1;\n\t"
423              "R1 = R1 + R0;\n\t"
424              "R1 >>>= 13;\n\t"
425              "W[%1] = R1.L;\n\t"
426           "LOOP_END samples%=;\n\t"
427     : "=a" (ytmp2), "=a" (y)
428     : "a" (awk2), "a" (ak), "d" (ord), "m" (N), "0" (ytmp2), "1" (y)
429     : "A0", "A1", "R0", "R1", "R2", "R3", "I0", "I1", "I2", "I3", "L0", "L1", "L2", "L3", "A0", "A1"
430     );
431  }
432  
433  
434  
435  #if 0 /* Equivalent C function for filter_mem2 and compute_impulse_response */
436  #define min(a,b) ((a)<(b) ? (a):(b))
437  
438  void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
439  {
440     int i,j;
441     VARDECL(spx_word16_t *ytmp);
442     ALLOC(ytmp, N, spx_word16_t);
443  
444     y[0] = LPC_SCALING;
445     for (i=0;i<ord;i++)
446        y[i+1] = awk1[i];
447     i++;
448     for (;i<N;i++)
449        y[i] = 0;
450  
451     for (i=0;i<N;i++)
452     {
453        spx_word32_t yi = SHL32(EXTEND32(y[i]),LPC_SHIFT);
454        spx_word32_t yi2 = 0;
455        for (j=0;j<min(i,ord);j++)
456        {
457           yi = MAC16_16(yi, awk2[j], -ytmp[i-j-1]);
458           yi2 = MAC16_16(yi2, ak[j], -y[i-j-1]);
459        }
460        ytmp[i] = EXTRACT16(SHR32(yi,LPC_SHIFT));
461        yi2 = ADD32(yi2,SHL32(yi,1));
462        y[i] = EXTRACT16(SHR32(yi2,LPC_SHIFT));
463     }
464  
465  }
466  
467  
468  void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *_y, int N, int ord, spx_mem_t *mem)
469  {
470     int i,j;
471     spx_word16_t xi,yi,nyi;
472     spx_word16_t x[N],y[N];
473     spx_word16_t *xx, *yy;
474     xx = x;
475     yy = y;
476     for (i=0;i<N;i++)
477     {
478        x[i] = EXTRACT16(SHR32(_x[i],SIG_SHIFT));
479     }
480  
481     for (i=0;i<ord;i++)
482     {
483        spx_word32_t yi = mem[i];
484        for (j=0;j<i;j++)
485        {
486           yi = MAC16_16(yi, num[j], x[i-j-1]);
487           yi = MAC16_16(yi, den[j], -y[i-j-1]);
488        }
489        _y[i] = ADD32(_x[i],SHL32(yi,1));
490        y[i] = EXTRACT16(SHR32(_y[i],SIG_SHIFT));
491     }
492     for (i=ord;i<N;i++)
493     {
494        spx_word32_t yi = 0;
495        for (j=0;j<ord;j++)
496        {
497           yi = MAC16_16(yi, num[j], x[i-j-1]);
498           yi = MAC16_16(yi, den[j], -y[i-j-1]);
499        }
500        _y[i] = ADD32(_x[i],SHL32(yi,1));
501        y[i] = EXTRACT16(SHR32(_y[i],SIG_SHIFT));
502     }
503  
504     for (i=0;i<ord;i++)
505     {
506        spx_mem_t m = 0;
507        for (j=0;j<ord-i;j++)
508        {
509           m = MAC16_16(m, x[N-1-j], num[j+i]);
510           m = MAC16_16(m, -y[N-1-j], den[j+i]);
511        }
512        mem[i] = m;
513     }
514  }
515  #endif
516