1 /*
2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"
12 
13 // MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy
14 // Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file
15 // lpc_masking_model.c
WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,int32_t q_val_corr,int q_val_polynomial,int16_t * a_polynomial,int32_t * corr_coeffs,int * q_val_residual_energy)16 int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,
17                                                   int32_t q_val_corr,
18                                                   int q_val_polynomial,
19                                                   int16_t* a_polynomial,
20                                                   int32_t* corr_coeffs,
21                                                   int* q_val_residual_energy) {
22 
23   int i = 0, j = 0;
24   int shift_internal = 0, shift_norm = 0;
25   int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0;
26   int32_t tmp_corr_c = corr_coeffs[0];
27   int16_t* tmp_a_poly = &a_polynomial[0];
28   int32_t sum64_hi = 0;
29   int32_t sum64_lo = 0;
30 
31   for (j = 0; j <= lpc_order; j++) {
32     // For the case of i == 0:
33     //   residual_energy +=
34     //     a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i];
35 
36     int32_t tmp2, tmp3;
37     int16_t sign_1;
38     int16_t sign_2;
39     int16_t sign_3;
40 
41     __asm __volatile (
42       ".set      push                                                \n\t"
43       ".set      noreorder                                           \n\t"
44       "lh        %[tmp2],         0(%[tmp_a_poly])                   \n\t"
45       "mul       %[tmp32],        %[tmp2],            %[tmp2]        \n\t"
46       "addiu     %[tmp_a_poly],   %[tmp_a_poly],      2              \n\t"
47       "sra       %[sign_2],       %[sum64_hi],        31             \n\t"
48       "mult      $ac0,            %[tmp32],           %[tmp_corr_c]  \n\t"
49       "shilov    $ac0,            %[shift_internal]                  \n\t"
50       "mfhi      %[tmp2],         $ac0                               \n\t"
51       "mflo      %[tmp3],         $ac0                               \n\t"
52       "sra       %[sign_1],       %[tmp2],            31             \n\t"
53       "xor       %[sign_3],       %[sign_1],          %[sign_2]      \n\t"
54       ".set      pop                                                 \n\t"
55       : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
56         [tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1),
57         [sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2),
58         [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
59       : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
60       : "hi", "lo", "memory"
61     );
62 
63     if (sign_3 != 0) {
64       __asm __volatile (
65         ".set      push                                      \n\t"
66         ".set      noreorder                                 \n\t"
67         "addsc     %[sum64_lo],   %[sum64_lo],    %[tmp3]    \n\t"
68         "addwc     %[sum64_hi],   %[sum64_hi],    %[tmp2]    \n\t"
69         ".set      pop                                       \n\t"
70         : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
71         : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
72         : "hi", "lo", "memory"
73       );
74     } else {
75       if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
76           ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
77         // Shift right for overflow.
78         __asm __volatile (
79           ".set      push                                             \n\t"
80           ".set      noreorder                                        \n\t"
81           "addiu     %[shift_internal], %[shift_internal],  1         \n\t"
82           "prepend   %[sum64_lo],       %[sum64_hi],        1         \n\t"
83           "sra       %[sum64_hi],       %[sum64_hi],        1         \n\t"
84           "prepend   %[tmp3],           %[tmp2],            1         \n\t"
85           "sra       %[tmp2],           %[tmp2],            1         \n\t"
86           "addsc     %[sum64_lo],       %[sum64_lo],        %[tmp3]   \n\t"
87           "addwc     %[sum64_hi],       %[sum64_hi],        %[tmp2]   \n\t"
88           ".set      pop                                              \n\t"
89           : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
90             [shift_internal] "+r" (shift_internal),
91             [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
92           :
93           : "hi", "lo", "memory"
94         );
95       } else {
96         __asm __volatile (
97           ".set      push                                      \n\t"
98           ".set      noreorder                                 \n\t"
99           "addsc     %[sum64_lo],   %[sum64_lo],    %[tmp3]    \n\t"
100           "addwc     %[sum64_hi],   %[sum64_hi],    %[tmp2]    \n\t"
101           ".set      pop                                       \n\t"
102           : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
103           : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
104           : "hi", "lo", "memory"
105         );
106       }
107     }
108   }
109 
110   for (i = 1; i <= lpc_order; i++) {
111     tmp_corr_c = corr_coeffs[i];
112     int16_t* tmp_a_poly_j = &a_polynomial[i];
113     int16_t* tmp_a_poly_j_i = &a_polynomial[0];
114     for (j = i; j <= lpc_order; j++) {
115       // For the case of i = 1 .. lpc_order:
116       //   residual_energy +=
117       //     a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2;
118 
119       int32_t tmp2, tmp3;
120       int16_t sign_1;
121       int16_t sign_2;
122       int16_t sign_3;
123 
124       __asm __volatile (
125         ".set      push                                                   \n\t"
126         ".set      noreorder                                              \n\t"
127         "lh        %[tmp3],           0(%[tmp_a_poly_j])                  \n\t"
128         "lh        %[tmp2],           0(%[tmp_a_poly_j_i])                \n\t"
129         "addiu     %[tmp_a_poly_j],   %[tmp_a_poly_j],    2               \n\t"
130         "addiu     %[tmp_a_poly_j_i], %[tmp_a_poly_j_i],  2               \n\t"
131         "mul       %[tmp32],          %[tmp3],            %[tmp2]         \n\t"
132         "sll       %[tmp32],          %[tmp32],           1               \n\t"
133         "mult      $ac0,              %[tmp32],           %[tmp_corr_c]   \n\t"
134         "shilov    $ac0,              %[shift_internal]                   \n\t"
135         "mfhi      %[tmp2],           $ac0                                \n\t"
136         "mflo      %[tmp3],           $ac0                                \n\t"
137         "sra       %[sign_1],         %[tmp2],            31              \n\t"
138         "sra       %[sign_2],         %[sum64_hi],        31              \n\t"
139         "xor       %[sign_3],         %[sign_1],          %[sign_2]       \n\t"
140         ".set      pop                                                    \n\t"
141         : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
142           [tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1),
143           [tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2),
144           [sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi),
145           [sum64_lo] "+r" (sum64_lo)
146         : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
147         : "hi", "lo", "memory"
148       );
149       if (sign_3 != 0) {
150         __asm __volatile (
151           ".set      push                                     \n\t"
152           ".set      noreorder                                \n\t"
153           "addsc     %[sum64_lo],   %[sum64_lo],   %[tmp3]    \n\t"
154           "addwc     %[sum64_hi],   %[sum64_hi],   %[tmp2]    \n\t"
155           ".set      pop                                      \n\t"
156           : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi),
157             [sum64_lo] "+r" (sum64_lo)
158           :
159           :"memory"
160         );
161       } else {
162         // Test overflow and sum the result.
163         if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
164             ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
165           // Shift right for overflow.
166           __asm __volatile (
167             ".set      push                                              \n\t"
168             ".set      noreorder                                         \n\t"
169             "addiu     %[shift_internal],  %[shift_internal],  1         \n\t"
170             "prepend   %[sum64_lo],        %[sum64_hi],        1         \n\t"
171             "sra       %[sum64_hi],        %[sum64_hi],        1         \n\t"
172             "prepend   %[tmp3],            %[tmp2],            1         \n\t"
173             "sra       %[tmp2],            %[tmp2],            1         \n\t"
174             "addsc     %[sum64_lo],        %[sum64_lo],        %[tmp3]   \n\t"
175             "addwc     %[sum64_hi],        %[sum64_hi],        %[tmp2]   \n\t"
176             ".set      pop                                               \n\t"
177             : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
178               [shift_internal] "+r" (shift_internal),
179               [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
180             :
181             : "hi", "lo", "memory"
182           );
183         } else {
184           __asm __volatile (
185             ".set      push                                      \n\t"
186             ".set      noreorder                                 \n\t"
187             "addsc     %[sum64_lo],    %[sum64_lo],   %[tmp3]    \n\t"
188             "addwc     %[sum64_hi],    %[sum64_hi],   %[tmp2]    \n\t"
189             ".set      pop                                       \n\t"
190             : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
191               [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
192             :
193             : "hi", "lo", "memory"
194           );
195         }
196       }
197     }
198   }
199   word32_high = sum64_hi;
200   word32_low = sum64_lo;
201 
202   // Calculate the value of shifting (shift_norm) for the 64-bit sum.
203   if (word32_high != 0) {
204     shift_norm = 32 - WebRtcSpl_NormW32(word32_high);
205     int tmp1;
206     __asm __volatile (
207       ".set    push                                                     \n\t"
208       ".set    noreorder                                                \n\t"
209       "srl     %[residual_energy],  %[sum64_lo],         %[shift_norm]  \n\t"
210       "li      %[tmp1],             32                                  \n\t"
211       "subu    %[tmp1],             %[tmp1],             %[shift_norm]  \n\t"
212       "sll     %[tmp1],             %[sum64_hi],         %[tmp1]        \n\t"
213       "or      %[residual_energy],  %[residual_energy],  %[tmp1]        \n\t"
214       ".set    pop                                                      \n\t"
215       : [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1),
216         [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
217       : [shift_norm] "r" (shift_norm)
218       : "memory"
219     );
220   } else {
221     if ((word32_low & 0x80000000) != 0) {
222       shift_norm = 1;
223       residual_energy = (uint32_t)word32_low >> 1;
224     } else {
225       shift_norm = WebRtcSpl_NormW32(word32_low);
226       residual_energy = word32_low << shift_norm;
227       shift_norm = -shift_norm;
228     }
229   }
230 
231   // Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm
232   //   = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2)
233   *q_val_residual_energy =
234       q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2;
235 
236   return residual_energy;
237 }
238