1 /*
2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h"
12 
13 // WebRtcIsacfix_AllpassFilter2FixDec16 function optimized for MIPSDSP platform.
14 // Bit-exact with WebRtcIsacfix_AllpassFilter2FixDec16C from filterbanks.c.
WebRtcIsacfix_AllpassFilter2FixDec16MIPS(int16_t * data_ch1,int16_t * data_ch2,const int16_t * factor_ch1,const int16_t * factor_ch2,const int length,int32_t * filter_state_ch1,int32_t * filter_state_ch2)15 void WebRtcIsacfix_AllpassFilter2FixDec16MIPS(
16     int16_t* data_ch1,            // Input and output in channel 1, in Q0.
17     int16_t* data_ch2,            // Input and output in channel 2, in Q0.
18     const int16_t* factor_ch1,    // Scaling factor for channel 1, in Q15.
19     const int16_t* factor_ch2,    // Scaling factor for channel 2, in Q15.
20     const int length,             // Length of the data buffers.
21     int32_t* filter_state_ch1,    // Filter state for channel 1, in Q16.
22     int32_t* filter_state_ch2) {  // Filter state for channel 2, in Q16.
23 
24   int32_t st0_ch1, st1_ch1;                // channel1 state variables.
25   int32_t st0_ch2, st1_ch2;                // channel2 state variables.
26   int32_t f_ch10, f_ch11, f_ch20, f_ch21;  // factor variables.
27   int32_t r0, r1, r2, r3, r4, r5;          // temporary register variables.
28 
29   __asm __volatile (
30     ".set           push                                                  \n\t"
31     ".set           noreorder                                             \n\t"
32     // Load all the state and factor variables.
33     "lh             %[f_ch10],      0(%[factor_ch1])                      \n\t"
34     "lh             %[f_ch20],      0(%[factor_ch2])                      \n\t"
35     "lh             %[f_ch11],      2(%[factor_ch1])                      \n\t"
36     "lh             %[f_ch21],      2(%[factor_ch2])                      \n\t"
37     "lw             %[st0_ch1],     0(%[filter_state_ch1])                \n\t"
38     "lw             %[st1_ch1],     4(%[filter_state_ch1])                \n\t"
39     "lw             %[st0_ch2],     0(%[filter_state_ch2])                \n\t"
40     "lw             %[st1_ch2],     4(%[filter_state_ch2])                \n\t"
41     // Allpass filtering loop.
42    "1:                                                                    \n\t"
43     "lh             %[r0],          0(%[data_ch1])                        \n\t"
44     "lh             %[r1],          0(%[data_ch2])                        \n\t"
45     "addiu          %[length],      %[length],              -1            \n\t"
46     "mul            %[r2],          %[r0],                  %[f_ch10]     \n\t"
47     "mul            %[r3],          %[r1],                  %[f_ch20]     \n\t"
48     "sll            %[r0],          %[r0],                  16            \n\t"
49     "sll            %[r1],          %[r1],                  16            \n\t"
50     "sll            %[r2],          %[r2],                  1             \n\t"
51     "addq_s.w       %[r2],          %[r2],                  %[st0_ch1]    \n\t"
52     "sll            %[r3],          %[r3],                  1             \n\t"
53     "addq_s.w       %[r3],          %[r3],                  %[st0_ch2]    \n\t"
54     "sra            %[r2],          %[r2],                  16            \n\t"
55     "mul            %[st0_ch1],     %[f_ch10],              %[r2]         \n\t"
56     "sra            %[r3],          %[r3],                  16            \n\t"
57     "mul            %[st0_ch2],     %[f_ch20],              %[r3]         \n\t"
58     "mul            %[r4],          %[r2],                  %[f_ch11]     \n\t"
59     "mul            %[r5],          %[r3],                  %[f_ch21]     \n\t"
60     "sll            %[st0_ch1],     %[st0_ch1],             1             \n\t"
61     "subq_s.w       %[st0_ch1],     %[r0],                  %[st0_ch1]    \n\t"
62     "sll            %[st0_ch2],     %[st0_ch2],             1             \n\t"
63     "subq_s.w       %[st0_ch2],     %[r1],                  %[st0_ch2]    \n\t"
64     "sll            %[r4],          %[r4],                  1             \n\t"
65     "addq_s.w       %[r4],          %[r4],                  %[st1_ch1]    \n\t"
66     "sll            %[r5],          %[r5],                  1             \n\t"
67     "addq_s.w       %[r5],          %[r5],                  %[st1_ch2]    \n\t"
68     "sra            %[r4],          %[r4],                  16            \n\t"
69     "mul            %[r0],          %[r4],                  %[f_ch11]     \n\t"
70     "sra            %[r5],          %[r5],                  16            \n\t"
71     "mul            %[r1],          %[r5],                  %[f_ch21]     \n\t"
72     "sh             %[r4],          0(%[data_ch1])                        \n\t"
73     "sh             %[r5],          0(%[data_ch2])                        \n\t"
74     "addiu          %[data_ch1],    %[data_ch1],            2             \n\t"
75     "sll            %[r2],          %[r2],                  16            \n\t"
76     "sll            %[r0],          %[r0],                  1             \n\t"
77     "subq_s.w       %[st1_ch1],     %[r2],                  %[r0]         \n\t"
78     "sll            %[r3],          %[r3],                  16            \n\t"
79     "sll            %[r1],          %[r1],                  1             \n\t"
80     "subq_s.w       %[st1_ch2],     %[r3],                  %[r1]         \n\t"
81     "bgtz           %[length],      1b                                    \n\t"
82     " addiu         %[data_ch2],    %[data_ch2],            2             \n\t"
83     // Store channel states.
84     "sw             %[st0_ch1],     0(%[filter_state_ch1])                \n\t"
85     "sw             %[st1_ch1],     4(%[filter_state_ch1])                \n\t"
86     "sw             %[st0_ch2],     0(%[filter_state_ch2])                \n\t"
87     "sw             %[st1_ch2],     4(%[filter_state_ch2])                \n\t"
88     ".set           pop                                                   \n\t"
89     : [f_ch10] "=&r" (f_ch10), [f_ch20] "=&r" (f_ch20),
90       [f_ch11] "=&r" (f_ch11), [f_ch21] "=&r" (f_ch21),
91       [st0_ch1] "=&r" (st0_ch1), [st1_ch1] "=&r" (st1_ch1),
92       [st0_ch2] "=&r" (st0_ch2), [st1_ch2] "=&r" (st1_ch2),
93       [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
94       [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5)
95     : [factor_ch1] "r" (factor_ch1), [factor_ch2] "r" (factor_ch2),
96       [filter_state_ch1] "r" (filter_state_ch1),
97       [filter_state_ch2] "r" (filter_state_ch2),
98       [data_ch1] "r" (data_ch1), [data_ch2] "r" (data_ch2),
99       [length] "r" (length)
100     : "memory", "hi", "lo"
101   );
102 }
103 
104 // WebRtcIsacfix_HighpassFilterFixDec32 function optimized for MIPSDSP platform.
105 // Bit-exact with WebRtcIsacfix_HighpassFilterFixDec32C from filterbanks.c.
WebRtcIsacfix_HighpassFilterFixDec32MIPS(int16_t * io,int16_t len,const int16_t * coefficient,int32_t * state)106 void WebRtcIsacfix_HighpassFilterFixDec32MIPS(int16_t* io,
107                                               int16_t len,
108                                               const int16_t* coefficient,
109                                               int32_t* state) {
110   int k;
111   int32_t a1, a2, b1, b2, in;
112   int32_t state0 = state[0];
113   int32_t state1 = state[1];
114 
115   int32_t c0, c1, c2, c3;
116   int32_t c4, c5, c6, c7;
117   int32_t state0_lo, state0_hi;
118   int32_t state1_lo, state1_hi;
119   int32_t t0, t1, t2, t3, t4, t5;
120 
121   __asm  __volatile (
122     "lh         %[c0],         0(%[coeff_ptr])            \n\t"
123     "lh         %[c1],         2(%[coeff_ptr])            \n\t"
124     "lh         %[c2],         4(%[coeff_ptr])            \n\t"
125     "lh         %[c3],         6(%[coeff_ptr])            \n\t"
126     "sra        %[state0_hi],  %[state0],        16       \n\t"
127     "sra        %[state1_hi],  %[state1],        16       \n\t"
128     "andi       %[state0_lo],  %[state0],        0xFFFF   \n\t"
129     "andi       %[state1_lo],  %[state1],        0xFFFF   \n\t"
130     "lh         %[c4],         8(%[coeff_ptr])            \n\t"
131     "lh         %[c5],         10(%[coeff_ptr])           \n\t"
132     "lh         %[c6],         12(%[coeff_ptr])           \n\t"
133     "lh         %[c7],         14(%[coeff_ptr])           \n\t"
134     "sra        %[state0_lo],  %[state0_lo],     1        \n\t"
135     "sra        %[state1_lo],  %[state1_lo],     1        \n\t"
136     : [c0] "=&r" (c0), [c1] "=&r" (c1), [c2] "=&r" (c2), [c3] "=&r" (c3),
137       [c4] "=&r" (c4), [c5] "=&r" (c5), [c6] "=&r" (c6), [c7] "=&r" (c7),
138       [state0_hi] "=&r" (state0_hi), [state0_lo] "=&r" (state0_lo),
139       [state1_hi] "=&r" (state1_hi), [state1_lo] "=&r" (state1_lo)
140     : [coeff_ptr] "r" (coefficient), [state0] "r" (state0),
141       [state1] "r" (state1)
142     : "memory"
143   );
144 
145   for (k = 0; k < len; k++) {
146     in = (int32_t)io[k];
147 
148     __asm __volatile (
149       ".set      push                                      \n\t"
150       ".set      noreorder                                 \n\t"
151       "mul       %[t2],        %[c4],        %[state0_lo]  \n\t"
152       "mul       %[t0],        %[c5],        %[state0_lo]  \n\t"
153       "mul       %[t1],        %[c4],        %[state0_hi]  \n\t"
154       "mul       %[a1],        %[c5],        %[state0_hi]  \n\t"
155       "mul       %[t5],        %[c6],        %[state1_lo]  \n\t"
156       "mul       %[t3],        %[c7],        %[state1_lo]  \n\t"
157       "mul       %[t4],        %[c6],        %[state1_hi]  \n\t"
158       "mul       %[b1],        %[c7],        %[state1_hi]  \n\t"
159       "shra_r.w  %[t2],        %[t2],        15            \n\t"
160       "shra_r.w  %[t0],        %[t0],        15            \n\t"
161       "addu      %[t1],        %[t1],        %[t2]         \n\t"
162       "addu      %[a1],        %[a1],        %[t0]         \n\t"
163       "sra       %[t1],        %[t1],        16            \n\t"
164       "addu      %[a1],        %[a1],        %[t1]         \n\t"
165       "shra_r.w  %[t5],        %[t5],        15            \n\t"
166       "shra_r.w  %[t3],        %[t3],        15            \n\t"
167       "addu      %[t4],        %[t4],        %[t5]         \n\t"
168       "addu      %[b1],        %[b1],        %[t3]         \n\t"
169       "sra       %[t4],        %[t4],        16            \n\t"
170       "addu      %[b1],        %[b1],        %[t4]         \n\t"
171       "mul       %[t2],        %[c0],        %[state0_lo]  \n\t"
172       "mul       %[t0],        %[c1],        %[state0_lo]  \n\t"
173       "mul       %[t1],        %[c0],        %[state0_hi]  \n\t"
174       "mul       %[a2],        %[c1],        %[state0_hi]  \n\t"
175       "mul       %[t5],        %[c2],        %[state1_lo]  \n\t"
176       "mul       %[t3],        %[c3],        %[state1_lo]  \n\t"
177       "mul       %[t4],        %[c2],        %[state1_hi]  \n\t"
178       "mul       %[b2],        %[c3],        %[state1_hi]  \n\t"
179       "shra_r.w  %[t2],        %[t2],        15            \n\t"
180       "shra_r.w  %[t0],        %[t0],        15            \n\t"
181       "addu      %[t1],        %[t1],        %[t2]         \n\t"
182       "addu      %[a2],        %[a2],        %[t0]         \n\t"
183       "sra       %[t1],        %[t1],        16            \n\t"
184       "addu      %[a2],        %[a2],        %[t1]         \n\t"
185       "shra_r.w  %[t5],        %[t5],        15            \n\t"
186       "shra_r.w  %[t3],        %[t3],        15            \n\t"
187       "addu      %[t4],        %[t4],        %[t5]         \n\t"
188       "addu      %[b2],        %[b2],        %[t3]         \n\t"
189       "sra       %[t4],        %[t4],        16            \n\t"
190       "addu      %[b2],        %[b2],        %[t4]         \n\t"
191       "addu      %[a1],        %[a1],        %[b1]         \n\t"
192       "sra       %[a1],        %[a1],        7             \n\t"
193       "addu      %[a1],        %[a1],        %[in]         \n\t"
194       "sll       %[t0],        %[in],        2             \n\t"
195       "addu      %[a2],        %[a2],        %[b2]         \n\t"
196       "subu      %[t0],        %[t0],        %[a2]         \n\t"
197       "shll_s.w  %[a1],        %[a1],        16            \n\t"
198       "shll_s.w  %[t0],        %[t0],        2             \n\t"
199       "sra       %[a1],        %[a1],        16            \n\t"
200       "addu      %[state1_hi], %[state0_hi], $0            \n\t"
201       "addu      %[state1_lo], %[state0_lo], $0            \n\t"
202       "sra       %[state0_hi], %[t0],        16            \n\t"
203       "andi      %[state0_lo], %[t0],        0xFFFF        \n\t"
204       "sra       %[state0_lo], %[state0_lo], 1             \n\t"
205       ".set      pop                                       \n\t"
206       : [a1] "=&r" (a1), [b1] "=&r" (b1), [a2] "=&r" (a2), [b2] "=&r" (b2),
207         [state0_hi] "+r" (state0_hi), [state0_lo] "+r" (state0_lo),
208         [state1_hi] "+r" (state1_hi), [state1_lo] "+r" (state1_lo),
209         [t0] "=&r" (t0), [t1] "=&r" (t1), [t2] "=&r" (t2),
210         [t3] "=&r" (t3), [t4] "=&r" (t4), [t5] "=&r" (t5)
211       : [c0] "r" (c0), [c1] "r" (c1), [c2] "r" (c2), [c3] "r" (c3),
212         [c4] "r" (c4), [c5] "r" (c5), [c6] "r" (c6), [c7] "r" (c7),
213         [in] "r" (in)
214       : "hi", "lo"
215     );
216     io[k] = (int16_t)a1;
217   }
218   __asm __volatile (
219     ".set            push                                            \n\t"
220     ".set            noreorder                                       \n\t"
221 #if !defined(MIPS_DSP_R2_LE)
222     "sll             %[state0_hi],   %[state0_hi],   16              \n\t"
223     "sll             %[state0_lo],   %[state0_lo],   1               \n\t"
224     "sll             %[state1_hi],   %[state1_hi],   16              \n\t"
225     "sll             %[state1_lo],   %[state1_lo],   1               \n\t"
226     "or              %[state0_hi],   %[state0_hi],   %[state0_lo]    \n\t"
227     "or              %[state1_hi],   %[state1_hi],   %[state1_lo]    \n\t"
228 #else
229     "sll             %[state0_lo],   %[state0_lo],   1               \n\t"
230     "sll             %[state1_lo],   %[state1_lo],   1               \n\t"
231     "precr_sra.ph.w  %[state0_hi],   %[state0_lo],   0               \n\t"
232     "precr_sra.ph.w  %[state1_hi],   %[state1_lo],   0               \n\t"
233 #endif
234     "sw              %[state0_hi],   0(%[state])                     \n\t"
235     "sw              %[state1_hi],   4(%[state])                     \n\t"
236     ".set            pop                                             \n\t"
237     : [state0_hi] "+r" (state0_hi), [state0_lo] "+r" (state0_lo),
238       [state1_hi] "+r" (state1_hi), [state1_lo] "+r" (state1_lo)
239     : [state] "r" (state)
240     : "memory"
241   );
242 }
243