1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"
12
13 // MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy
14 // Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file
15 // lpc_masking_model.c
WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,int32_t q_val_corr,int q_val_polynomial,int16_t * a_polynomial,int32_t * corr_coeffs,int * q_val_residual_energy)16 int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,
17 int32_t q_val_corr,
18 int q_val_polynomial,
19 int16_t* a_polynomial,
20 int32_t* corr_coeffs,
21 int* q_val_residual_energy) {
22
23 int i = 0, j = 0;
24 int shift_internal = 0, shift_norm = 0;
25 int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0;
26 int32_t tmp_corr_c = corr_coeffs[0];
27 int16_t* tmp_a_poly = &a_polynomial[0];
28 int32_t sum64_hi = 0;
29 int32_t sum64_lo = 0;
30
31 for (j = 0; j <= lpc_order; j++) {
32 // For the case of i == 0:
33 // residual_energy +=
34 // a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i];
35
36 int32_t tmp2, tmp3;
37 int16_t sign_1;
38 int16_t sign_2;
39 int16_t sign_3;
40
41 __asm __volatile (
42 ".set push \n\t"
43 ".set noreorder \n\t"
44 "lh %[tmp2], 0(%[tmp_a_poly]) \n\t"
45 "mul %[tmp32], %[tmp2], %[tmp2] \n\t"
46 "addiu %[tmp_a_poly], %[tmp_a_poly], 2 \n\t"
47 "sra %[sign_2], %[sum64_hi], 31 \n\t"
48 "mult $ac0, %[tmp32], %[tmp_corr_c] \n\t"
49 "shilov $ac0, %[shift_internal] \n\t"
50 "mfhi %[tmp2], $ac0 \n\t"
51 "mflo %[tmp3], $ac0 \n\t"
52 "sra %[sign_1], %[tmp2], 31 \n\t"
53 "xor %[sign_3], %[sign_1], %[sign_2] \n\t"
54 ".set pop \n\t"
55 : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
56 [tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1),
57 [sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2),
58 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
59 : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
60 : "hi", "lo", "memory"
61 );
62
63 if (sign_3 != 0) {
64 __asm __volatile (
65 ".set push \n\t"
66 ".set noreorder \n\t"
67 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
68 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
69 ".set pop \n\t"
70 : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
71 : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
72 : "hi", "lo", "memory"
73 );
74 } else {
75 if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
76 ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
77 // Shift right for overflow.
78 __asm __volatile (
79 ".set push \n\t"
80 ".set noreorder \n\t"
81 "addiu %[shift_internal], %[shift_internal], 1 \n\t"
82 "prepend %[sum64_lo], %[sum64_hi], 1 \n\t"
83 "sra %[sum64_hi], %[sum64_hi], 1 \n\t"
84 "prepend %[tmp3], %[tmp2], 1 \n\t"
85 "sra %[tmp2], %[tmp2], 1 \n\t"
86 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
87 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
88 ".set pop \n\t"
89 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
90 [shift_internal] "+r" (shift_internal),
91 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
92 :
93 : "hi", "lo", "memory"
94 );
95 } else {
96 __asm __volatile (
97 ".set push \n\t"
98 ".set noreorder \n\t"
99 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
100 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
101 ".set pop \n\t"
102 : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
103 : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
104 : "hi", "lo", "memory"
105 );
106 }
107 }
108 }
109
110 for (i = 1; i <= lpc_order; i++) {
111 tmp_corr_c = corr_coeffs[i];
112 int16_t* tmp_a_poly_j = &a_polynomial[i];
113 int16_t* tmp_a_poly_j_i = &a_polynomial[0];
114 for (j = i; j <= lpc_order; j++) {
115 // For the case of i = 1 .. lpc_order:
116 // residual_energy +=
117 // a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2;
118
119 int32_t tmp2, tmp3;
120 int16_t sign_1;
121 int16_t sign_2;
122 int16_t sign_3;
123
124 __asm __volatile (
125 ".set push \n\t"
126 ".set noreorder \n\t"
127 "lh %[tmp3], 0(%[tmp_a_poly_j]) \n\t"
128 "lh %[tmp2], 0(%[tmp_a_poly_j_i]) \n\t"
129 "addiu %[tmp_a_poly_j], %[tmp_a_poly_j], 2 \n\t"
130 "addiu %[tmp_a_poly_j_i], %[tmp_a_poly_j_i], 2 \n\t"
131 "mul %[tmp32], %[tmp3], %[tmp2] \n\t"
132 "sll %[tmp32], %[tmp32], 1 \n\t"
133 "mult $ac0, %[tmp32], %[tmp_corr_c] \n\t"
134 "shilov $ac0, %[shift_internal] \n\t"
135 "mfhi %[tmp2], $ac0 \n\t"
136 "mflo %[tmp3], $ac0 \n\t"
137 "sra %[sign_1], %[tmp2], 31 \n\t"
138 "sra %[sign_2], %[sum64_hi], 31 \n\t"
139 "xor %[sign_3], %[sign_1], %[sign_2] \n\t"
140 ".set pop \n\t"
141 : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
142 [tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1),
143 [tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2),
144 [sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi),
145 [sum64_lo] "+r" (sum64_lo)
146 : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
147 : "hi", "lo", "memory"
148 );
149 if (sign_3 != 0) {
150 __asm __volatile (
151 ".set push \n\t"
152 ".set noreorder \n\t"
153 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
154 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
155 ".set pop \n\t"
156 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi),
157 [sum64_lo] "+r" (sum64_lo)
158 :
159 :"memory"
160 );
161 } else {
162 // Test overflow and sum the result.
163 if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
164 ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
165 // Shift right for overflow.
166 __asm __volatile (
167 ".set push \n\t"
168 ".set noreorder \n\t"
169 "addiu %[shift_internal], %[shift_internal], 1 \n\t"
170 "prepend %[sum64_lo], %[sum64_hi], 1 \n\t"
171 "sra %[sum64_hi], %[sum64_hi], 1 \n\t"
172 "prepend %[tmp3], %[tmp2], 1 \n\t"
173 "sra %[tmp2], %[tmp2], 1 \n\t"
174 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
175 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
176 ".set pop \n\t"
177 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
178 [shift_internal] "+r" (shift_internal),
179 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
180 :
181 : "hi", "lo", "memory"
182 );
183 } else {
184 __asm __volatile (
185 ".set push \n\t"
186 ".set noreorder \n\t"
187 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
188 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
189 ".set pop \n\t"
190 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
191 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
192 :
193 : "hi", "lo", "memory"
194 );
195 }
196 }
197 }
198 }
199 word32_high = sum64_hi;
200 word32_low = sum64_lo;
201
202 // Calculate the value of shifting (shift_norm) for the 64-bit sum.
203 if (word32_high != 0) {
204 shift_norm = 32 - WebRtcSpl_NormW32(word32_high);
205 int tmp1;
206 __asm __volatile (
207 ".set push \n\t"
208 ".set noreorder \n\t"
209 "srl %[residual_energy], %[sum64_lo], %[shift_norm] \n\t"
210 "li %[tmp1], 32 \n\t"
211 "subu %[tmp1], %[tmp1], %[shift_norm] \n\t"
212 "sll %[tmp1], %[sum64_hi], %[tmp1] \n\t"
213 "or %[residual_energy], %[residual_energy], %[tmp1] \n\t"
214 ".set pop \n\t"
215 : [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1),
216 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
217 : [shift_norm] "r" (shift_norm)
218 : "memory"
219 );
220 } else {
221 if ((word32_low & 0x80000000) != 0) {
222 shift_norm = 1;
223 residual_energy = (uint32_t)word32_low >> 1;
224 } else {
225 shift_norm = WebRtcSpl_NormW32(word32_low);
226 residual_energy = word32_low << shift_norm;
227 shift_norm = -shift_norm;
228 }
229 }
230
231 // Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm
232 // = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2)
233 *q_val_residual_energy =
234 q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2;
235
236 return residual_energy;
237 }
238