1 /*
2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stddef.h>
12 
13 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
14 #include "webrtc/typedefs.h"
15 
16 // Filter ar_g_Q0[] and ar_f_Q0[] through an AR filter with coefficients
17 // cth_Q15[] and sth_Q15[].
WebRtcIsacfix_FilterArLoop(int16_t * ar_g_Q0,int16_t * ar_f_Q0,int16_t * cth_Q15,int16_t * sth_Q15,size_t order_coef)18 void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0,     // Input samples
19                                 int16_t* ar_f_Q0,     // Input samples
20                                 int16_t* cth_Q15,     // Filter coefficients
21                                 int16_t* sth_Q15,     // Filter coefficients
22                                 size_t order_coef) { // order of the filter
23   int n = 0;
24 
25   for (n = 0; n < HALF_SUBFRAMELEN - 1; n++) {
26     int count = (int)(order_coef - 1);
27     int offset;
28 #if !defined(MIPS_DSP_R1_LE)
29     int16_t* tmp_cth;
30     int16_t* tmp_sth;
31     int16_t* tmp_arg;
32     int32_t max_q16 = 0x7fff;
33     int32_t min_q16 = 0xffff8000;
34 #endif
35     // Declare variables used as temporary registers.
36     int32_t r0, r1, r2, t0, t1, t2, t_ar;
37 
38     __asm __volatile (
39       ".set          push                                                \n\t"
40       ".set          noreorder                                           \n\t"
41       "bltz          %[count],     2f                                    \n\t"
42       " lh           %[t_ar],      0(%[tmp])                             \n\t"
43       // Inner loop
44      "1:                                                                 \n\t"
45       "sll           %[offset],    %[count],               1             \n\t"
46 #if defined(MIPS_DSP_R1_LE)
47       "lhx           %[r0],        %[offset](%[cth_Q15])                 \n\t"
48       "lhx           %[r1],        %[offset](%[sth_Q15])                 \n\t"
49       "lhx           %[r2],        %[offset](%[ar_g_Q0])                 \n\t"
50 #else
51       "addu          %[tmp_cth],   %[cth_Q15],             %[offset]     \n\t"
52       "addu          %[tmp_sth],   %[sth_Q15],             %[offset]     \n\t"
53       "addu          %[tmp_arg],   %[ar_g_Q0],             %[offset]     \n\t"
54       "lh            %[r0],        0(%[tmp_cth])                         \n\t"
55       "lh            %[r1],        0(%[tmp_sth])                         \n\t"
56       "lh            %[r2],        0(%[tmp_arg])                         \n\t"
57 #endif
58       "mul           %[t0],        %[r0],                  %[t_ar]       \n\t"
59       "mul           %[t1],        %[r1],                  %[t_ar]       \n\t"
60       "mul           %[t2],        %[r1],                  %[r2]         \n\t"
61       "mul           %[r0],        %[r0],                  %[r2]         \n\t"
62       "subu          %[t0],        %[t0],                  %[t2]         \n\t"
63       "addu          %[t1],        %[t1],                  %[r0]         \n\t"
64 #if defined(MIPS_DSP_R1_LE)
65       "shra_r.w      %[t1],        %[t1],                  15            \n\t"
66       "shra_r.w      %[t0],        %[t0],                  15            \n\t"
67 #else
68       "addiu         %[t1],        %[t1],                  0x4000        \n\t"
69       "sra           %[t1],        %[t1],                  15            \n\t"
70       "addiu         %[t0],        %[t0],                  0x4000        \n\t"
71       "sra           %[t0],        %[t0],                  15            \n\t"
72 #endif
73       "addiu         %[offset],    %[offset],              2             \n\t"
74 #if defined(MIPS_DSP_R1_LE)
75       "shll_s.w      %[t1],        %[t1],                  16            \n\t"
76       "shll_s.w      %[t_ar],      %[t0],                  16            \n\t"
77 #else
78       "slt           %[r0],        %[t1],                  %[max_q16]    \n\t"
79       "slt           %[r1],        %[t0],                  %[max_q16]    \n\t"
80       "movz          %[t1],        %[max_q16],             %[r0]         \n\t"
81       "movz          %[t0],        %[max_q16],             %[r1]         \n\t"
82 #endif
83       "addu          %[offset],    %[offset],              %[ar_g_Q0]    \n\t"
84 #if defined(MIPS_DSP_R1_LE)
85       "sra           %[t1],        %[t1],                  16            \n\t"
86       "sra           %[t_ar],      %[t_ar],                16            \n\t"
87 #else
88       "slt           %[r0],        %[t1],                  %[min_q16]    \n\t"
89       "slt           %[r1],        %[t0],                  %[min_q16]    \n\t"
90       "movn          %[t1],        %[min_q16],             %[r0]         \n\t"
91       "movn          %[t0],        %[min_q16],             %[r1]         \n\t"
92       "addu          %[t_ar],      $zero,                  %[t0]         \n\t"
93 #endif
94       "sh            %[t1],        0(%[offset])                          \n\t"
95       "bgtz          %[count],     1b                                    \n\t"
96       " addiu        %[count],     %[count],               -1            \n\t"
97      "2:                                                                 \n\t"
98       "sh            %[t_ar],      0(%[tmp])                             \n\t"
99       "sh            %[t_ar],      0(%[ar_g_Q0])                         \n\t"
100       ".set          pop                                                 \n\t"
101       : [t_ar] "=&r" (t_ar), [count] "+r" (count), [offset] "=&r" (offset),
102         [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [t0] "=&r" (t0),
103 #if !defined(MIPS_DSP_R1_LE)
104         [tmp_cth] "=&r" (tmp_cth), [tmp_sth] "=&r" (tmp_sth),
105         [tmp_arg] "=&r" (tmp_arg),
106 #endif
107         [t1] "=&r" (t1), [t2] "=&r" (t2)
108       : [tmp] "r" (&ar_f_Q0[n+1]), [cth_Q15] "r" (cth_Q15),
109 #if !defined(MIPS_DSP_R1_LE)
110         [max_q16] "r" (max_q16), [min_q16] "r" (min_q16),
111 #endif
112         [sth_Q15] "r" (sth_Q15), [ar_g_Q0] "r" (ar_g_Q0)
113       : "memory", "hi", "lo"
114     );
115   }
116 }
117 
118 // MIPS optimization of the inner loop used for function
119 // WebRtcIsacfix_NormLatticeFilterMa(). It does:
120 //
121 // for 0 <= n < HALF_SUBFRAMELEN - 1:
122 //   *ptr2 = input2 * (*ptr2) + input0 * (*ptr0));
123 //   *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
124 //
125 // Note, function WebRtcIsacfix_FilterMaLoopMIPS and WebRtcIsacfix_FilterMaLoopC
126 // are not bit-exact. The accuracy of the MIPS function is same or better.
WebRtcIsacfix_FilterMaLoopMIPS(int16_t input0,int16_t input1,int32_t input2,int32_t * ptr0,int32_t * ptr1,int32_t * ptr2)127 void WebRtcIsacfix_FilterMaLoopMIPS(int16_t input0,  // Filter coefficient
128                                     int16_t input1,  // Filter coefficient
129                                     int32_t input2,  // Inverse coeff (1/input1)
130                                     int32_t* ptr0,   // Sample buffer
131                                     int32_t* ptr1,   // Sample buffer
132                                     int32_t* ptr2) { // Sample buffer
133 #if defined(MIPS_DSP_R2_LE)
134   // MIPS DSPR2 version. 4 available accumulators allows loop unrolling 4 times.
135   // This variant is not bit-exact with WebRtcIsacfix_FilterMaLoopC, since we
136   // are exploiting 64-bit accumulators. The accuracy of the MIPS DSPR2 function
137   // is same or better.
138   int n = (HALF_SUBFRAMELEN - 1) >> 2;
139   int m = (HALF_SUBFRAMELEN - 1) & 3;
140 
141   int r0, r1, r2, r3;
142   int t0, t1, t2, t3;
143   int s0, s1, s2, s3;
144 
145   __asm __volatile (
146     ".set          push                                      \n\t"
147     ".set          noreorder                                 \n\t"
148    "1:                                                       \n\t"
149     "lw            %[r0],        0(%[ptr0])                  \n\t"
150     "lw            %[r1],        4(%[ptr0])                  \n\t"
151     "lw            %[r2],        8(%[ptr0])                  \n\t"
152     "lw            %[r3],        12(%[ptr0])                 \n\t"
153     "mult          $ac0,         %[r0],        %[input0]     \n\t"
154     "mult          $ac1,         %[r1],        %[input0]     \n\t"
155     "mult          $ac2,         %[r2],        %[input0]     \n\t"
156     "mult          $ac3,         %[r3],        %[input0]     \n\t"
157     "lw            %[t0],        0(%[ptr2])                  \n\t"
158     "extr_rs.w     %[s0],        $ac0,         15            \n\t"
159     "extr_rs.w     %[s1],        $ac1,         15            \n\t"
160     "extr_rs.w     %[s2],        $ac2,         15            \n\t"
161     "extr_rs.w     %[s3],        $ac3,         15            \n\t"
162     "lw            %[t1],        4(%[ptr2])                  \n\t"
163     "lw            %[t2],        8(%[ptr2])                  \n\t"
164     "lw            %[t3],        12(%[ptr2])                 \n\t"
165     "addu          %[t0],        %[t0],        %[s0]         \n\t"
166     "addu          %[t1],        %[t1],        %[s1]         \n\t"
167     "addu          %[t2],        %[t2],        %[s2]         \n\t"
168     "addu          %[t3],        %[t3],        %[s3]         \n\t"
169     "mult          $ac0,         %[t0],        %[input2]     \n\t"
170     "mult          $ac1,         %[t1],        %[input2]     \n\t"
171     "mult          $ac2,         %[t2],        %[input2]     \n\t"
172     "mult          $ac3,         %[t3],        %[input2]     \n\t"
173     "addiu         %[ptr0],      %[ptr0],      16            \n\t"
174     "extr_rs.w     %[t0],        $ac0,         16            \n\t"
175     "extr_rs.w     %[t1],        $ac1,         16            \n\t"
176     "extr_rs.w     %[t2],        $ac2,         16            \n\t"
177     "extr_rs.w     %[t3],        $ac3,         16            \n\t"
178     "addiu         %[n],         %[n],         -1            \n\t"
179     "mult          $ac0,         %[r0],        %[input1]     \n\t"
180     "mult          $ac1,         %[r1],        %[input1]     \n\t"
181     "mult          $ac2,         %[r2],        %[input1]     \n\t"
182     "mult          $ac3,         %[r3],        %[input1]     \n\t"
183     "sw            %[t0],        0(%[ptr2])                  \n\t"
184     "extr_rs.w     %[s0],        $ac0,         15            \n\t"
185     "extr_rs.w     %[s1],        $ac1,         15            \n\t"
186     "extr_rs.w     %[s2],        $ac2,         15            \n\t"
187     "extr_rs.w     %[s3],        $ac3,         15            \n\t"
188     "sw            %[t1],        4(%[ptr2])                  \n\t"
189     "sw            %[t2],        8(%[ptr2])                  \n\t"
190     "sw            %[t3],        12(%[ptr2])                 \n\t"
191     "mult          $ac0,         %[t0],        %[input0]     \n\t"
192     "mult          $ac1,         %[t1],        %[input0]     \n\t"
193     "mult          $ac2,         %[t2],        %[input0]     \n\t"
194     "mult          $ac3,         %[t3],        %[input0]     \n\t"
195     "addiu         %[ptr2],      %[ptr2],      16            \n\t"
196     "extr_rs.w     %[t0],        $ac0,         15            \n\t"
197     "extr_rs.w     %[t1],        $ac1,         15            \n\t"
198     "extr_rs.w     %[t2],        $ac2,         15            \n\t"
199     "extr_rs.w     %[t3],        $ac3,         15            \n\t"
200     "addu          %[t0],        %[t0],        %[s0]         \n\t"
201     "addu          %[t1],        %[t1],        %[s1]         \n\t"
202     "addu          %[t2],        %[t2],        %[s2]         \n\t"
203     "addu          %[t3],        %[t3],        %[s3]         \n\t"
204     "sw            %[t0],        0(%[ptr1])                  \n\t"
205     "sw            %[t1],        4(%[ptr1])                  \n\t"
206     "sw            %[t2],        8(%[ptr1])                  \n\t"
207     "sw            %[t3],        12(%[ptr1])                 \n\t"
208     "bgtz          %[n],         1b                          \n\t"
209     " addiu        %[ptr1],      %[ptr1],      16            \n\t"
210     "beq           %[m],         %0,           3f            \n\t"
211     " nop                                                    \n\t"
212    "2:                                                       \n\t"
213     "lw            %[r0],        0(%[ptr0])                  \n\t"
214     "lw            %[t0],        0(%[ptr2])                  \n\t"
215     "addiu         %[ptr0],      %[ptr0],      4             \n\t"
216     "mult          $ac0,         %[r0],        %[input0]     \n\t"
217     "mult          $ac1,         %[r0],        %[input1]     \n\t"
218     "extr_rs.w     %[r1],        $ac0,         15            \n\t"
219     "extr_rs.w     %[t1],        $ac1,         15            \n\t"
220     "addu          %[t0],        %[t0],        %[r1]         \n\t"
221     "mult          $ac0,         %[t0],        %[input2]     \n\t"
222     "extr_rs.w     %[t0],        $ac0,         16            \n\t"
223     "sw            %[t0],        0(%[ptr2])                  \n\t"
224     "mult          $ac0,         %[t0],        %[input0]     \n\t"
225     "addiu         %[ptr2],      %[ptr2],      4             \n\t"
226     "addiu         %[m],         %[m],         -1            \n\t"
227     "extr_rs.w     %[t0],        $ac0,         15            \n\t"
228     "addu          %[t0],        %[t0],        %[t1]         \n\t"
229     "sw            %[t0],        0(%[ptr1])                  \n\t"
230     "bgtz          %[m],         2b                          \n\t"
231     " addiu        %[ptr1],      %[ptr1],      4             \n\t"
232    "3:                                                       \n\t"
233     ".set          pop                                       \n\t"
234     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
235       [r3] "=&r" (r3), [t0] "=&r" (t0), [t1] "=&r" (t1),
236       [t2] "=&r" (t2), [t3] "=&r" (t3), [s0] "=&r" (s0),
237       [s1] "=&r" (s1), [s2] "=&r" (s2), [s3] "=&r" (s3),
238       [ptr0] "+r" (ptr0), [ptr1] "+r" (ptr1), [m] "+r" (m),
239       [ptr2] "+r" (ptr2), [n] "+r" (n)
240     : [input0] "r" (input0), [input1] "r" (input1),
241       [input2] "r" (input2)
242     : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi",
243       "$ac2lo", "$ac3hi", "$ac3lo"
244   );
245 #else
246   // Non-DSPR2 version of the function. Avoiding the accumulator usage due to
247   // large latencies. This variant is bit-exact with C code.
248   int n = HALF_SUBFRAMELEN - 1;
249   int32_t t16a, t16b;
250   int32_t r0, r1, r2, r3, r4;
251 
252   __asm __volatile (
253     ".set          push                                      \n\t"
254     ".set          noreorder                                 \n\t"
255     "sra           %[t16a],      %[input2],     16           \n\t"
256     "andi          %[t16b],      %[input2],     0xFFFF       \n\t"
257 #if defined(MIPS32R2_LE)
258     "seh           %[t16b],      %[t16b]                     \n\t"
259     "seh           %[input0],    %[input0]                   \n\t"
260     "seh           %[input1],    %[input1]                   \n\t"
261 #else
262     "sll           %[t16b],      %[t16b],       16           \n\t"
263     "sra           %[t16b],      %[t16b],       16           \n\t"
264     "sll           %[input0],    %[input0],     16           \n\t"
265     "sra           %[input0],    %[input0],     16           \n\t"
266     "sll           %[input1],    %[input1],     16           \n\t"
267     "sra           %[input1],    %[input1],     16           \n\t"
268 #endif
269     "addiu         %[r0],        %[t16a],       1            \n\t"
270     "slt           %[r1],        %[t16b],       $zero        \n\t"
271     "movn          %[t16a],      %[r0],         %[r1]        \n\t"
272    "1:                                                       \n\t"
273     "lw            %[r0],        0(%[ptr0])                  \n\t"
274     "lw            %[r1],        0(%[ptr2])                  \n\t"
275     "addiu         %[ptr0],      %[ptr0],       4            \n\t"
276     "sra           %[r2],        %[r0],         16           \n\t"
277     "andi          %[r0],        %[r0],         0xFFFF       \n\t"
278     "mul           %[r3],        %[r2],         %[input0]    \n\t"
279     "mul           %[r4],        %[r0],         %[input0]    \n\t"
280     "mul           %[r2],        %[r2],         %[input1]    \n\t"
281     "mul           %[r0],        %[r0],         %[input1]    \n\t"
282     "addiu         %[ptr2],      %[ptr2],       4            \n\t"
283     "sll           %[r3],        %[r3],         1            \n\t"
284     "sra           %[r4],        %[r4],         1            \n\t"
285     "addiu         %[r4],        %[r4],         0x2000       \n\t"
286     "sra           %[r4],        %[r4],         14           \n\t"
287     "addu          %[r3],        %[r3],         %[r4]        \n\t"
288     "addu          %[r1],        %[r1],         %[r3]        \n\t"
289     "sra           %[r3],        %[r1],         16           \n\t"
290     "andi          %[r4],        %[r1],         0xFFFF       \n\t"
291     "sra           %[r4],        %[r4],         1            \n\t"
292     "mul           %[r1],        %[r1],         %[t16a]      \n\t"
293     "mul           %[r3],        %[r3],         %[t16b]      \n\t"
294     "mul           %[r4],        %[r4],         %[t16b]      \n\t"
295     "sll           %[r2],        %[r2],         1            \n\t"
296     "sra           %[r0],        %[r0],         1            \n\t"
297     "addiu         %[r0],        %[r0],         0x2000       \n\t"
298     "sra           %[r0],        %[r0],         14           \n\t"
299     "addu          %[r0],        %[r0],         %[r2]        \n\t"
300     "addiu         %[n],         %[n],          -1           \n\t"
301     "addu          %[r1],        %[r1],         %[r3]        \n\t"
302     "addiu         %[r4],        %[r4],         0x4000       \n\t"
303     "sra           %[r4],        %[r4],         15           \n\t"
304     "addu          %[r1],        %[r1],         %[r4]        \n\t"
305     "sra           %[r2],        %[r1],         16           \n\t"
306     "andi          %[r3],        %[r1],         0xFFFF       \n\t"
307     "mul           %[r3],        %[r3],         %[input0]    \n\t"
308     "mul           %[r2],        %[r2],         %[input0]    \n\t"
309     "sw            %[r1],        -4(%[ptr2])                 \n\t"
310     "sra           %[r3],        %[r3],         1            \n\t"
311     "addiu         %[r3],        %[r3],         0x2000       \n\t"
312     "sra           %[r3],        %[r3],         14           \n\t"
313     "addu          %[r0],        %[r0],         %[r3]        \n\t"
314     "sll           %[r2],        %[r2],         1            \n\t"
315     "addu          %[r0],        %[r0],         %[r2]        \n\t"
316     "sw            %[r0],        0(%[ptr1])                  \n\t"
317     "bgtz          %[n],         1b                          \n\t"
318     " addiu        %[ptr1],      %[ptr1],       4            \n\t"
319     ".set          pop                                       \n\t"
320     : [t16a] "=&r" (t16a), [t16b] "=&r" (t16b), [r0] "=&r" (r0),
321       [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
322       [r4] "=&r" (r4), [ptr0] "+r" (ptr0), [ptr1] "+r" (ptr1),
323       [ptr2] "+r" (ptr2), [n] "+r" (n)
324     : [input0] "r" (input0), [input1] "r" (input1),
325       [input2] "r" (input2)
326     : "hi", "lo", "memory"
327   );
328 #endif
329 }
330