1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * This file contains the implementation of function
13  * WebRtcSpl_MaxAbsValueW16()
14  *
15  * The description header can be found in signal_processing_library.h.
16  *
17  */
18 
19 #include "rtc_base/checks.h"
20 #include "common_audio/signal_processing/include/signal_processing_library.h"
21 
22 // Maximum absolute value of word16 vector.
WebRtcSpl_MaxAbsValueW16_mips(const int16_t * vector,size_t length)23 int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) {
24   int32_t totMax = 0;
25   int32_t tmp32_0, tmp32_1, tmp32_2, tmp32_3;
26   size_t i, loop_size;
27 
28   RTC_DCHECK_GT(length, 0);
29 
30 #if defined(MIPS_DSP_R1)
31   const int32_t* tmpvec32 = (int32_t*)vector;
32   loop_size = length >> 4;
33 
34   for (i = 0; i < loop_size; i++) {
35     __asm__ volatile (
36       "lw         %[tmp32_0],     0(%[tmpvec32])              \n\t"
37       "lw         %[tmp32_1],     4(%[tmpvec32])              \n\t"
38       "lw         %[tmp32_2],     8(%[tmpvec32])              \n\t"
39       "lw         %[tmp32_3],     12(%[tmpvec32])             \n\t"
40 
41       "absq_s.ph  %[tmp32_0],     %[tmp32_0]                  \n\t"
42       "absq_s.ph  %[tmp32_1],     %[tmp32_1]                  \n\t"
43       "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
44       "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
45 
46       "lw         %[tmp32_0],     16(%[tmpvec32])             \n\t"
47       "absq_s.ph  %[tmp32_2],     %[tmp32_2]                  \n\t"
48       "cmp.lt.ph  %[totMax],      %[tmp32_1]                  \n\t"
49       "pick.ph    %[totMax],      %[tmp32_1],     %[totMax]   \n\t"
50 
51       "lw         %[tmp32_1],     20(%[tmpvec32])             \n\t"
52       "absq_s.ph  %[tmp32_3],     %[tmp32_3]                  \n\t"
53       "cmp.lt.ph  %[totMax],      %[tmp32_2]                  \n\t"
54       "pick.ph    %[totMax],      %[tmp32_2],     %[totMax]   \n\t"
55 
56       "lw         %[tmp32_2],     24(%[tmpvec32])             \n\t"
57       "cmp.lt.ph  %[totMax],      %[tmp32_3]                  \n\t"
58       "pick.ph    %[totMax],      %[tmp32_3],     %[totMax]   \n\t"
59 
60       "lw         %[tmp32_3],     28(%[tmpvec32])             \n\t"
61       "absq_s.ph  %[tmp32_0],     %[tmp32_0]                  \n\t"
62       "absq_s.ph  %[tmp32_1],     %[tmp32_1]                  \n\t"
63       "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
64       "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
65 
66       "absq_s.ph  %[tmp32_2],     %[tmp32_2]                  \n\t"
67       "cmp.lt.ph  %[totMax],      %[tmp32_1]                  \n\t"
68       "pick.ph    %[totMax],      %[tmp32_1],     %[totMax]   \n\t"
69       "absq_s.ph  %[tmp32_3],     %[tmp32_3]                  \n\t"
70       "cmp.lt.ph  %[totMax],      %[tmp32_2]                  \n\t"
71       "pick.ph    %[totMax],      %[tmp32_2],     %[totMax]   \n\t"
72 
73       "cmp.lt.ph  %[totMax],      %[tmp32_3]                  \n\t"
74       "pick.ph    %[totMax],      %[tmp32_3],     %[totMax]   \n\t"
75 
76       "addiu      %[tmpvec32],    %[tmpvec32],    32          \n\t"
77       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
78         [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
79         [totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32)
80       :
81       : "memory"
82     );
83   }
84   __asm__ volatile (
85     "rotr       %[tmp32_0],     %[totMax],      16          \n\t"
86     "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
87     "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
88     "packrl.ph  %[totMax],      $0,             %[totMax]   \n\t"
89     : [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax)
90     :
91   );
92   loop_size = length & 0xf;
93   for (i = 0; i < loop_size; i++) {
94     __asm__ volatile (
95       "lh         %[tmp32_0],     0(%[tmpvec32])              \n\t"
96       "addiu      %[tmpvec32],    %[tmpvec32],     2          \n\t"
97       "absq_s.w   %[tmp32_0],     %[tmp32_0]                  \n\t"
98       "slt        %[tmp32_1],     %[totMax],       %[tmp32_0] \n\t"
99       "movn       %[totMax],      %[tmp32_0],      %[tmp32_1] \n\t"
100       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
101         [tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax)
102       :
103       : "memory"
104     );
105   }
106 #else  // #if defined(MIPS_DSP_R1)
107   int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX;
108   int32_t r, r1, r2, r3;
109   const int16_t* tmpvector = vector;
110   loop_size = length >> 4;
111   for (i = 0; i < loop_size; i++) {
112     __asm__ volatile (
113       "lh     %[tmp32_0],     0(%[tmpvector])                 \n\t"
114       "lh     %[tmp32_1],     2(%[tmpvector])                 \n\t"
115       "lh     %[tmp32_2],     4(%[tmpvector])                 \n\t"
116       "lh     %[tmp32_3],     6(%[tmpvector])                 \n\t"
117 
118       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
119       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
120       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
121       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
122 
123       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
124       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
125       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
126       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
127       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
128       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
129       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
130       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
131 
132       "lh     %[tmp32_0],     8(%[tmpvector])                 \n\t"
133       "lh     %[tmp32_1],     10(%[tmpvector])                \n\t"
134       "lh     %[tmp32_2],     12(%[tmpvector])                \n\t"
135       "lh     %[tmp32_3],     14(%[tmpvector])                \n\t"
136 
137       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
138       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
139       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
140       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
141 
142       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
143       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
144       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
145       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
146       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
147       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
148       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
149       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
150 
151       "lh     %[tmp32_0],     16(%[tmpvector])                \n\t"
152       "lh     %[tmp32_1],     18(%[tmpvector])                \n\t"
153       "lh     %[tmp32_2],     20(%[tmpvector])                \n\t"
154       "lh     %[tmp32_3],     22(%[tmpvector])                \n\t"
155 
156       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
157       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
158       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
159       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
160 
161       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
162       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
163       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
164       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
165       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
166       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
167       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
168       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
169 
170       "lh     %[tmp32_0],     24(%[tmpvector])                \n\t"
171       "lh     %[tmp32_1],     26(%[tmpvector])                \n\t"
172       "lh     %[tmp32_2],     28(%[tmpvector])                \n\t"
173       "lh     %[tmp32_3],     30(%[tmpvector])                \n\t"
174 
175       "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
176       "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
177       "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
178       "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
179 
180       "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
181       "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
182       "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
183       "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
184       "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
185       "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
186       "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
187       "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
188 
189       "addiu  %[tmpvector],   %[tmpvector],   32              \n\t"
190       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
191         [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
192         [totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector),
193         [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
194       :
195       : "memory"
196     );
197   }
198   loop_size = length & 0xf;
199   for (i = 0; i < loop_size; i++) {
200     __asm__ volatile (
201       "lh         %[tmp32_0],     0(%[tmpvector])             \n\t"
202       "addiu      %[tmpvector],   %[tmpvector],    2          \n\t"
203       "abs        %[tmp32_0],     %[tmp32_0]                  \n\t"
204       "slt        %[tmp32_1],     %[totMax],       %[tmp32_0] \n\t"
205       "movn       %[totMax],      %[tmp32_0],      %[tmp32_1] \n\t"
206       : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
207         [tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax)
208       :
209       : "memory"
210     );
211   }
212 
213   __asm__ volatile (
214     "slt    %[r],       %[v16MaxMax],   %[totMax]   \n\t"
215     "movn   %[totMax],  %[v16MaxMax],   %[r]        \n\t"
216     : [totMax] "+r" (totMax), [r] "=&r" (r)
217     : [v16MaxMax] "r" (v16MaxMax)
218   );
219 #endif  // #if defined(MIPS_DSP_R1)
220   return (int16_t)totMax;
221 }
222 
223 #if defined(MIPS_DSP_R1_LE)
224 // Maximum absolute value of word32 vector. Version for MIPS platform.
WebRtcSpl_MaxAbsValueW32_mips(const int32_t * vector,size_t length)225 int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) {
226   // Use uint32_t for the local variables, to accommodate the return value
227   // of abs(0x80000000), which is 0x80000000.
228 
229   uint32_t absolute = 0, maximum = 0;
230   int tmp1 = 0, max_value = 0x7fffffff;
231 
232   RTC_DCHECK_GT(length, 0);
233 
234   __asm__ volatile (
235     ".set push                                                        \n\t"
236     ".set noreorder                                                   \n\t"
237 
238    "1:                                                                \n\t"
239     "lw         %[absolute],      0(%[vector])                        \n\t"
240     "absq_s.w   %[absolute],      %[absolute]                         \n\t"
241     "addiu      %[length],        %[length],          -1              \n\t"
242     "slt        %[tmp1],          %[maximum],         %[absolute]     \n\t"
243     "movn       %[maximum],       %[absolute],        %[tmp1]         \n\t"
244     "bgtz       %[length],        1b                                  \n\t"
245     " addiu     %[vector],        %[vector],          4               \n\t"
246     "slt        %[tmp1],          %[max_value],       %[maximum]      \n\t"
247     "movn       %[maximum],       %[max_value],       %[tmp1]         \n\t"
248 
249     ".set pop                                                         \n\t"
250 
251     : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute)
252     : [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value)
253     : "memory"
254   );
255 
256   return (int32_t)maximum;
257 }
258 #endif  // #if defined(MIPS_DSP_R1_LE)
259 
260 // Maximum value of word16 vector. Version for MIPS platform.
WebRtcSpl_MaxValueW16_mips(const int16_t * vector,size_t length)261 int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) {
262   int16_t maximum = WEBRTC_SPL_WORD16_MIN;
263   int tmp1;
264   int16_t value;
265 
266   RTC_DCHECK_GT(length, 0);
267 
268   __asm__ volatile (
269     ".set push                                                        \n\t"
270     ".set noreorder                                                   \n\t"
271 
272    "1:                                                                \n\t"
273     "lh         %[value],         0(%[vector])                        \n\t"
274     "addiu      %[length],        %[length],          -1              \n\t"
275     "slt        %[tmp1],          %[maximum],         %[value]        \n\t"
276     "movn       %[maximum],       %[value],           %[tmp1]         \n\t"
277     "bgtz       %[length],        1b                                  \n\t"
278     " addiu     %[vector],        %[vector],          2               \n\t"
279     ".set pop                                                         \n\t"
280 
281     : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
282     : [vector] "r" (vector), [length] "r" (length)
283     : "memory"
284   );
285 
286   return maximum;
287 }
288 
289 // Maximum value of word32 vector. Version for MIPS platform.
WebRtcSpl_MaxValueW32_mips(const int32_t * vector,size_t length)290 int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) {
291   int32_t maximum = WEBRTC_SPL_WORD32_MIN;
292   int tmp1, value;
293 
294   RTC_DCHECK_GT(length, 0);
295 
296   __asm__ volatile (
297     ".set push                                                        \n\t"
298     ".set noreorder                                                   \n\t"
299 
300    "1:                                                                \n\t"
301     "lw         %[value],         0(%[vector])                        \n\t"
302     "addiu      %[length],        %[length],          -1              \n\t"
303     "slt        %[tmp1],          %[maximum],         %[value]        \n\t"
304     "movn       %[maximum],       %[value],           %[tmp1]         \n\t"
305     "bgtz       %[length],        1b                                  \n\t"
306     " addiu     %[vector],        %[vector],          4               \n\t"
307 
308     ".set pop                                                         \n\t"
309 
310     : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
311     : [vector] "r" (vector), [length] "r" (length)
312     : "memory"
313   );
314 
315   return maximum;
316 }
317 
318 // Minimum value of word16 vector. Version for MIPS platform.
WebRtcSpl_MinValueW16_mips(const int16_t * vector,size_t length)319 int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) {
320   int16_t minimum = WEBRTC_SPL_WORD16_MAX;
321   int tmp1;
322   int16_t value;
323 
324   RTC_DCHECK_GT(length, 0);
325 
326   __asm__ volatile (
327     ".set push                                                        \n\t"
328     ".set noreorder                                                   \n\t"
329 
330    "1:                                                                \n\t"
331     "lh         %[value],         0(%[vector])                        \n\t"
332     "addiu      %[length],        %[length],          -1              \n\t"
333     "slt        %[tmp1],          %[value],           %[minimum]      \n\t"
334     "movn       %[minimum],       %[value],           %[tmp1]         \n\t"
335     "bgtz       %[length],        1b                                  \n\t"
336     " addiu     %[vector],        %[vector],          2               \n\t"
337 
338     ".set pop                                                         \n\t"
339 
340     : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
341     : [vector] "r" (vector), [length] "r" (length)
342     : "memory"
343   );
344 
345   return minimum;
346 }
347 
348 // Minimum value of word32 vector. Version for MIPS platform.
WebRtcSpl_MinValueW32_mips(const int32_t * vector,size_t length)349 int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) {
350   int32_t minimum = WEBRTC_SPL_WORD32_MAX;
351   int tmp1, value;
352 
353   RTC_DCHECK_GT(length, 0);
354 
355   __asm__ volatile (
356     ".set push                                                        \n\t"
357     ".set noreorder                                                   \n\t"
358 
359    "1:                                                                \n\t"
360     "lw         %[value],         0(%[vector])                        \n\t"
361     "addiu      %[length],        %[length],          -1              \n\t"
362     "slt        %[tmp1],          %[value],           %[minimum]      \n\t"
363     "movn       %[minimum],       %[value],           %[tmp1]         \n\t"
364     "bgtz       %[length],        1b                                  \n\t"
365     " addiu     %[vector],        %[vector],          4               \n\t"
366 
367     ".set pop                                                         \n\t"
368 
369     : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
370     : [vector] "r" (vector), [length] "r" (length)
371     : "memory"
372   );
373 
374   return minimum;
375 }
376