1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * This file contains the implementation of function
13  * WebRtcSpl_MaxAbsValueW16()
14  *
15  * The description header can be found in signal_processing_library.h.
16  *
17  */
18 
19 #include <assert.h>
20 
21 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
22 
23 // Maximum absolute value of word16 vector.
WebRtcSpl_MaxAbsValueW16_mips(const int16_t * vector,size_t length)24 int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) {
25   int32_t totMax = 0;
26   int32_t tmp32_0, tmp32_1, tmp32_2, tmp32_3;
27   size_t i, loop_size;
28 
29   assert(length > 0);
30 
31 #if defined(MIPS_DSP_R1)
32   const int32_t* tmpvec32 = (int32_t*)vector;
33   loop_size = length >> 4;
34 
35   for (i = 0; i < loop_size; i++) {
36     __asm__ volatile (
37       "lw         %[tmp32_0],     0(%[tmpvec32])              \n\t"
38       "lw         %[tmp32_1],     4(%[tmpvec32])              \n\t"
39       "lw         %[tmp32_2],     8(%[tmpvec32])              \n\t"
40       "lw         %[tmp32_3],     12(%[tmpvec32])             \n\t"
41 
42       "absq_s.ph  %[tmp32_0],     %[tmp32_0]                  \n\t"
43       "absq_s.ph  %[tmp32_1],     %[tmp32_1]                  \n\t"
44       "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
45       "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
46 
47       "lw         %[tmp32_0],     16(%[tmpvec32])             \n\t"
48       "absq_s.ph  %[tmp32_2],     %[tmp32_2]                  \n\t"
49       "cmp.lt.ph  %[totMax],      %[tmp32_1]                  \n\t"
50       "pick.ph    %[totMax],      %[tmp32_1],     %[totMax]   \n\t"
51 
52       "lw         %[tmp32_1],     20(%[tmpvec32])             \n\t"
53       "absq_s.ph  %[tmp32_3],     %[tmp32_3]                  \n\t"
54       "cmp.lt.ph  %[totMax],      %[tmp32_2]                  \n\t"
55       "pick.ph    %[totMax],      %[tmp32_2],     %[totMax]   \n\t"
56 
57       "lw         %[tmp32_2],     24(%[tmpvec32])             \n\t"
58       "cmp.lt.ph  %[totMax],      %[tmp32_3]                  \n\t"
59       "pick.ph    %[totMax],      %[tmp32_3],     %[totMax]   \n\t"
60 
61       "lw         %[tmp32_3],     28(%[tmpvec32])             \n\t"
62       "absq_s.ph  %[tmp32_0],     %[tmp32_0]                  \n\t"
63       "absq_s.ph  %[tmp32_1],     %[tmp32_1]                  \n\t"
64       "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
65       "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
66 
67       "absq_s.ph  %[tmp32_2],     %[tmp32_2]                  \n\t"
68       "cmp.lt.ph  %[totMax],      %[tmp32_1]                  \n\t"
69       "pick.ph    %[totMax],      %[tmp32_1],     %[totMax]   \n\t"
70       "absq_s.ph  %[tmp32_3],     %[tmp32_3]                  \n\t"
71       "cmp.lt.ph  %[totMax],      %[tmp32_2]                  \n\t"
72       "pick.ph    %[totMax],      %[tmp32_2],     %[totMax]   \n\t"
73 
74       "cmp.lt.ph  %[totMax],      %[tmp32_3]                  \n\t"
75       "pick.ph    %[totMax],      %[tmp32_3],     %[totMax]   \n\t"
76 
77       "addiu      %[tmpvec32],    %[tmpvec32],    32          \n\t"
78       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
79         [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
80         [totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32)
81       :
82       : "memory"
83     );
84   }
85   __asm__ volatile (
86     "rotr       %[tmp32_0],     %[totMax],      16          \n\t"
87     "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
88     "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
89     "packrl.ph  %[totMax],      $0,             %[totMax]   \n\t"
90     : [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax)
91     :
92   );
93   loop_size = length & 0xf;
94   for (i = 0; i < loop_size; i++) {
95     __asm__ volatile (
96       "lh         %[tmp32_0],     0(%[tmpvec32])              \n\t"
97       "addiu      %[tmpvec32],    %[tmpvec32],     2          \n\t"
98       "absq_s.w   %[tmp32_0],     %[tmp32_0]                  \n\t"
99       "slt        %[tmp32_1],     %[totMax],       %[tmp32_0] \n\t"
100       "movn       %[totMax],      %[tmp32_0],      %[tmp32_1] \n\t"
101       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
102         [tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax)
103       :
104       : "memory"
105     );
106   }
107 #else  // #if defined(MIPS_DSP_R1)
108   int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX;
109   int32_t r, r1, r2, r3;
110   const int16_t* tmpvector = vector;
111   loop_size = length >> 4;
112   for (i = 0; i < loop_size; i++) {
113     __asm__ volatile (
114       "lh     %[tmp32_0],     0(%[tmpvector])                 \n\t"
115       "lh     %[tmp32_1],     2(%[tmpvector])                 \n\t"
116       "lh     %[tmp32_2],     4(%[tmpvector])                 \n\t"
117       "lh     %[tmp32_3],     6(%[tmpvector])                 \n\t"
118 
119       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
120       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
121       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
122       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
123 
124       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
125       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
126       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
127       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
128       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
129       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
130       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
131       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
132 
133       "lh     %[tmp32_0],     8(%[tmpvector])                 \n\t"
134       "lh     %[tmp32_1],     10(%[tmpvector])                \n\t"
135       "lh     %[tmp32_2],     12(%[tmpvector])                \n\t"
136       "lh     %[tmp32_3],     14(%[tmpvector])                \n\t"
137 
138       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
139       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
140       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
141       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
142 
143       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
144       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
145       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
146       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
147       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
148       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
149       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
150       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
151 
152       "lh     %[tmp32_0],     16(%[tmpvector])                \n\t"
153       "lh     %[tmp32_1],     18(%[tmpvector])                \n\t"
154       "lh     %[tmp32_2],     20(%[tmpvector])                \n\t"
155       "lh     %[tmp32_3],     22(%[tmpvector])                \n\t"
156 
157       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
158       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
159       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
160       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
161 
162       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
163       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
164       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
165       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
166       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
167       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
168       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
169       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
170 
171       "lh     %[tmp32_0],     24(%[tmpvector])                \n\t"
172       "lh     %[tmp32_1],     26(%[tmpvector])                \n\t"
173       "lh     %[tmp32_2],     28(%[tmpvector])                \n\t"
174       "lh     %[tmp32_3],     30(%[tmpvector])                \n\t"
175 
176       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
177       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
178       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
179       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
180 
181       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
182       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
183       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
184       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
185       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
186       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
187       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
188       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
189 
190       "addiu  %[tmpvector],   %[tmpvector],   32              \n\t"
191       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
192         [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
193         [totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector),
194         [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
195       :
196       : "memory"
197     );
198   }
199   loop_size = length & 0xf;
200   for (i = 0; i < loop_size; i++) {
201     __asm__ volatile (
202       "lh         %[tmp32_0],     0(%[tmpvector])             \n\t"
203       "addiu      %[tmpvector],   %[tmpvector],    2          \n\t"
204       "abs        %[tmp32_0],     %[tmp32_0]                  \n\t"
205       "slt        %[tmp32_1],     %[totMax],       %[tmp32_0] \n\t"
206       "movn       %[totMax],      %[tmp32_0],      %[tmp32_1] \n\t"
207       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
208         [tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax)
209       :
210       : "memory"
211     );
212   }
213 
214   __asm__ volatile (
215     "slt    %[r],       %[v16MaxMax],   %[totMax]   \n\t"
216     "movn   %[totMax],  %[v16MaxMax],   %[r]        \n\t"
217     : [totMax] "+r" (totMax), [r] "=&r" (r)
218     : [v16MaxMax] "r" (v16MaxMax)
219   );
220 #endif  // #if defined(MIPS_DSP_R1)
221   return (int16_t)totMax;
222 }
223 
224 #if defined(MIPS_DSP_R1_LE)
225 // Maximum absolute value of word32 vector. Version for MIPS platform.
WebRtcSpl_MaxAbsValueW32_mips(const int32_t * vector,size_t length)226 int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) {
227   // Use uint32_t for the local variables, to accommodate the return value
228   // of abs(0x80000000), which is 0x80000000.
229 
230   uint32_t absolute = 0, maximum = 0;
231   int tmp1 = 0, max_value = 0x7fffffff;
232 
233   assert(length > 0);
234 
235   __asm__ volatile (
236     ".set push                                                        \n\t"
237     ".set noreorder                                                   \n\t"
238 
239    "1:                                                                \n\t"
240     "lw         %[absolute],      0(%[vector])                        \n\t"
241     "absq_s.w   %[absolute],      %[absolute]                         \n\t"
242     "addiu      %[length],        %[length],          -1              \n\t"
243     "slt        %[tmp1],          %[maximum],         %[absolute]     \n\t"
244     "movn       %[maximum],       %[absolute],        %[tmp1]         \n\t"
245     "bgtz       %[length],        1b                                  \n\t"
246     " addiu     %[vector],        %[vector],          4               \n\t"
247     "slt        %[tmp1],          %[max_value],       %[maximum]      \n\t"
248     "movn       %[maximum],       %[max_value],       %[tmp1]         \n\t"
249 
250     ".set pop                                                         \n\t"
251 
252     : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute)
253     : [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value)
254     : "memory"
255   );
256 
257   return (int32_t)maximum;
258 }
259 #endif  // #if defined(MIPS_DSP_R1_LE)
260 
261 // Maximum value of word16 vector. Version for MIPS platform.
WebRtcSpl_MaxValueW16_mips(const int16_t * vector,size_t length)262 int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) {
263   int16_t maximum = WEBRTC_SPL_WORD16_MIN;
264   int tmp1;
265   int16_t value;
266 
267   assert(length > 0);
268 
269   __asm__ volatile (
270     ".set push                                                        \n\t"
271     ".set noreorder                                                   \n\t"
272 
273    "1:                                                                \n\t"
274     "lh         %[value],         0(%[vector])                        \n\t"
275     "addiu      %[length],        %[length],          -1              \n\t"
276     "slt        %[tmp1],          %[maximum],         %[value]        \n\t"
277     "movn       %[maximum],       %[value],           %[tmp1]         \n\t"
278     "bgtz       %[length],        1b                                  \n\t"
279     " addiu     %[vector],        %[vector],          2               \n\t"
280     ".set pop                                                         \n\t"
281 
282     : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
283     : [vector] "r" (vector), [length] "r" (length)
284     : "memory"
285   );
286 
287   return maximum;
288 }
289 
290 // Maximum value of word32 vector. Version for MIPS platform.
WebRtcSpl_MaxValueW32_mips(const int32_t * vector,size_t length)291 int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) {
292   int32_t maximum = WEBRTC_SPL_WORD32_MIN;
293   int tmp1, value;
294 
295   assert(length > 0);
296 
297   __asm__ volatile (
298     ".set push                                                        \n\t"
299     ".set noreorder                                                   \n\t"
300 
301    "1:                                                                \n\t"
302     "lw         %[value],         0(%[vector])                        \n\t"
303     "addiu      %[length],        %[length],          -1              \n\t"
304     "slt        %[tmp1],          %[maximum],         %[value]        \n\t"
305     "movn       %[maximum],       %[value],           %[tmp1]         \n\t"
306     "bgtz       %[length],        1b                                  \n\t"
307     " addiu     %[vector],        %[vector],          4               \n\t"
308 
309     ".set pop                                                         \n\t"
310 
311     : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
312     : [vector] "r" (vector), [length] "r" (length)
313     : "memory"
314   );
315 
316   return maximum;
317 }
318 
319 // Minimum value of word16 vector. Version for MIPS platform.
WebRtcSpl_MinValueW16_mips(const int16_t * vector,size_t length)320 int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) {
321   int16_t minimum = WEBRTC_SPL_WORD16_MAX;
322   int tmp1;
323   int16_t value;
324 
325   assert(length > 0);
326 
327   __asm__ volatile (
328     ".set push                                                        \n\t"
329     ".set noreorder                                                   \n\t"
330 
331    "1:                                                                \n\t"
332     "lh         %[value],         0(%[vector])                        \n\t"
333     "addiu      %[length],        %[length],          -1              \n\t"
334     "slt        %[tmp1],          %[value],           %[minimum]      \n\t"
335     "movn       %[minimum],       %[value],           %[tmp1]         \n\t"
336     "bgtz       %[length],        1b                                  \n\t"
337     " addiu     %[vector],        %[vector],          2               \n\t"
338 
339     ".set pop                                                         \n\t"
340 
341     : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
342     : [vector] "r" (vector), [length] "r" (length)
343     : "memory"
344   );
345 
346   return minimum;
347 }
348 
349 // Minimum value of word32 vector. Version for MIPS platform.
WebRtcSpl_MinValueW32_mips(const int32_t * vector,size_t length)350 int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) {
351   int32_t minimum = WEBRTC_SPL_WORD32_MAX;
352   int tmp1, value;
353 
354   assert(length > 0);
355 
356   __asm__ volatile (
357     ".set push                                                        \n\t"
358     ".set noreorder                                                   \n\t"
359 
360    "1:                                                                \n\t"
361     "lw         %[value],         0(%[vector])                        \n\t"
362     "addiu      %[length],        %[length],          -1              \n\t"
363     "slt        %[tmp1],          %[value],           %[minimum]      \n\t"
364     "movn       %[minimum],       %[value],           %[tmp1]         \n\t"
365     "bgtz       %[length],        1b                                  \n\t"
366     " addiu     %[vector],        %[vector],          4               \n\t"
367 
368     ".set pop                                                         \n\t"
369 
370     : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
371     : [vector] "r" (vector), [length] "r" (length)
372     : "memory"
373   );
374 
375   return minimum;
376 }
377