1 /**
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 /*------------------------------------------------------------------------------
17 *
18 * This file includes convolution functions required for the Qmf.
19 *
20 *----------------------------------------------------------------------------*/
21
22 #include "Qmf.h"
23
AsmQmfConvO(const int16_t * p1dl_buffPtr,const int16_t * p2dl_buffPtr,const int32_t * coeffPtr,int32_t * convSumDiff)24 void AsmQmfConvO(const int16_t* p1dl_buffPtr, const int16_t* p2dl_buffPtr,
25 const int32_t* coeffPtr, int32_t* convSumDiff) {
26 /* Since all manipulated data are "int16_t" it is possible to
27 * reduce the number of loads by using int32_t type and manipulating
28 * pairs of data
29 */
30 int32_t acc;
31 // Manual inlining as IAR compiler does not seem to do it itself...
32 // WARNING: This inlining assumes that m_qmfDelayLineLength == 16
33 int32_t tmp_round0;
34 int64_t local_acc0;
35 int64_t local_acc1;
36 int32_t coeffVal0;
37 int32_t coeffVal1;
38 int16_t data0;
39 int16_t data1;
40 int16_t data2;
41 int16_t data3;
42 int32_t phaseConv[2];
43 int32_t convSum;
44 int32_t convDiff;
45
46 coeffVal0 = (*(coeffPtr));
47 coeffVal1 = (*(coeffPtr + 1));
48 data0 = (*(p1dl_buffPtr));
49 data1 = (*(p2dl_buffPtr));
50 data2 = (*(p1dl_buffPtr - 1));
51 data3 = (*(p2dl_buffPtr + 1));
52
53 local_acc0 = ((int64_t)(coeffVal0) * (int64_t)data0);
54 local_acc1 = ((int64_t)(coeffVal0) * (int64_t)data1);
55 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
56 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
57
58 coeffVal0 = (*(coeffPtr + 2));
59 coeffVal1 = (*(coeffPtr + 3));
60 data0 = (*(p1dl_buffPtr - 2));
61 data1 = (*(p2dl_buffPtr + 2));
62 data2 = (*(p1dl_buffPtr - 3));
63 data3 = (*(p2dl_buffPtr + 3));
64
65 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
66 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
67 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
68 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
69
70 coeffVal0 = (*(coeffPtr + 4));
71 coeffVal1 = (*(coeffPtr + 5));
72 data0 = (*(p1dl_buffPtr - 4));
73 data1 = (*(p2dl_buffPtr + 4));
74 data2 = (*(p1dl_buffPtr - 5));
75 data3 = (*(p2dl_buffPtr + 5));
76
77 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
78 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
79 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
80 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
81
82 coeffVal0 = (*(coeffPtr + 6));
83 coeffVal1 = (*(coeffPtr + 7));
84 data0 = (*(p1dl_buffPtr - 6));
85 data1 = (*(p2dl_buffPtr + 6));
86 data2 = (*(p1dl_buffPtr - 7));
87 data3 = (*(p2dl_buffPtr + 7));
88
89 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
90 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
91 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
92 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
93
94 coeffVal0 = (*(coeffPtr + 8));
95 coeffVal1 = (*(coeffPtr + 9));
96 data0 = (*(p1dl_buffPtr - 8));
97 data1 = (*(p2dl_buffPtr + 8));
98 data2 = (*(p1dl_buffPtr - 9));
99 data3 = (*(p2dl_buffPtr + 9));
100
101 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
102 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
103 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
104 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
105
106 coeffVal0 = (*(coeffPtr + 10));
107 coeffVal1 = (*(coeffPtr + 11));
108 data0 = (*(p1dl_buffPtr - 10));
109 data1 = (*(p2dl_buffPtr + 10));
110 data2 = (*(p1dl_buffPtr - 11));
111 data3 = (*(p2dl_buffPtr + 11));
112
113 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
114 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
115 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
116 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
117
118 coeffVal0 = (*(coeffPtr + 12));
119 coeffVal1 = (*(coeffPtr + 13));
120 data0 = (*(p1dl_buffPtr - 12));
121 data1 = (*(p2dl_buffPtr + 12));
122 data2 = (*(p1dl_buffPtr - 13));
123 data3 = (*(p2dl_buffPtr + 13));
124
125 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
126 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
127 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
128 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
129
130 coeffVal0 = (*(coeffPtr + 14));
131 coeffVal1 = (*(coeffPtr + 15));
132 data0 = (*(p1dl_buffPtr - 14));
133 data1 = (*(p2dl_buffPtr + 14));
134 data2 = (*(p1dl_buffPtr - 15));
135 data3 = (*(p2dl_buffPtr + 15));
136
137 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
138 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
139 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
140 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
141
142 tmp_round0 = (int32_t)local_acc0 & 0x00FFFFL;
143
144 local_acc0 += 0x004000L;
145 acc = (int32_t)(local_acc0 >> 15);
146 if (tmp_round0 == 0x004000L) {
147 acc--;
148 }
149 if (acc > 8388607) {
150 acc = 8388607;
151 }
152 if (acc < -8388608) {
153 acc = -8388608;
154 }
155
156 phaseConv[0] = acc;
157
158 tmp_round0 = (int32_t)local_acc1 & 0x00FFFFL;
159
160 local_acc1 += 0x004000L;
161 acc = (int32_t)(local_acc1 >> 15);
162 if (tmp_round0 == 0x004000L) {
163 acc--;
164 }
165 if (acc > 8388607) {
166 acc = 8388607;
167 }
168 if (acc < -8388608) {
169 acc = -8388608;
170 }
171
172 phaseConv[1] = acc;
173
174 convSum = phaseConv[1] + phaseConv[0];
175 if (convSum > 8388607) {
176 convSum = 8388607;
177 }
178 if (convSum < -8388608) {
179 convSum = -8388608;
180 }
181
182 convDiff = phaseConv[1] - phaseConv[0];
183 if (convDiff > 8388607) {
184 convDiff = 8388607;
185 }
186 if (convDiff < -8388608) {
187 convDiff = -8388608;
188 }
189
190 *(convSumDiff) = convSum;
191 *(convSumDiff + 2) = convDiff;
192 }
193
AsmQmfConvI(const int32_t * p1dl_buffPtr,const int32_t * p2dl_buffPtr,const int32_t * coeffPtr,int32_t * filterOutputs)194 void AsmQmfConvI(const int32_t* p1dl_buffPtr, const int32_t* p2dl_buffPtr,
195 const int32_t* coeffPtr, int32_t* filterOutputs) {
196 int32_t acc;
197 // WARNING: This inlining assumes that m_qmfDelayLineLength == 16
198 int32_t tmp_round0;
199 int64_t local_acc0;
200 int64_t local_acc1;
201 int32_t coeffVal0;
202 int32_t coeffVal1;
203 int32_t data0;
204 int32_t data1;
205 int32_t data2;
206 int32_t data3;
207 int32_t phaseConv[2];
208 int32_t convSum;
209 int32_t convDiff;
210
211 coeffVal0 = (*(coeffPtr));
212 coeffVal1 = (*(coeffPtr + 1));
213 data0 = (*(p1dl_buffPtr));
214 data1 = (*(p2dl_buffPtr));
215 data2 = (*(p1dl_buffPtr - 1));
216 data3 = (*(p2dl_buffPtr + 1));
217
218 local_acc0 = ((int64_t)(coeffVal0)*data0);
219 local_acc1 = ((int64_t)(coeffVal0)*data1);
220 local_acc0 += ((int64_t)(coeffVal1)*data2);
221 local_acc1 += ((int64_t)(coeffVal1)*data3);
222
223 coeffVal0 = (*(coeffPtr + 2));
224 coeffVal1 = (*(coeffPtr + 3));
225 data0 = (*(p1dl_buffPtr - 2));
226 data1 = (*(p2dl_buffPtr + 2));
227 data2 = (*(p1dl_buffPtr - 3));
228 data3 = (*(p2dl_buffPtr + 3));
229
230 local_acc0 += ((int64_t)(coeffVal0)*data0);
231 local_acc1 += ((int64_t)(coeffVal0)*data1);
232 local_acc0 += ((int64_t)(coeffVal1)*data2);
233 local_acc1 += ((int64_t)(coeffVal1)*data3);
234
235 coeffVal0 = (*(coeffPtr + 4));
236 coeffVal1 = (*(coeffPtr + 5));
237 data0 = (*(p1dl_buffPtr - 4));
238 data1 = (*(p2dl_buffPtr + 4));
239 data2 = (*(p1dl_buffPtr - 5));
240 data3 = (*(p2dl_buffPtr + 5));
241
242 local_acc0 += ((int64_t)(coeffVal0)*data0);
243 local_acc1 += ((int64_t)(coeffVal0)*data1);
244 local_acc0 += ((int64_t)(coeffVal1)*data2);
245 local_acc1 += ((int64_t)(coeffVal1)*data3);
246
247 coeffVal0 = (*(coeffPtr + 6));
248 coeffVal1 = (*(coeffPtr + 7));
249 data0 = (*(p1dl_buffPtr - 6));
250 data1 = (*(p2dl_buffPtr + 6));
251 data2 = (*(p1dl_buffPtr - 7));
252 data3 = (*(p2dl_buffPtr + 7));
253
254 local_acc0 += ((int64_t)(coeffVal0)*data0);
255 local_acc1 += ((int64_t)(coeffVal0)*data1);
256 local_acc0 += ((int64_t)(coeffVal1)*data2);
257 local_acc1 += ((int64_t)(coeffVal1)*data3);
258
259 coeffVal0 = (*(coeffPtr + 8));
260 coeffVal1 = (*(coeffPtr + 9));
261 data0 = (*(p1dl_buffPtr - 8));
262 data1 = (*(p2dl_buffPtr + 8));
263 data2 = (*(p1dl_buffPtr - 9));
264 data3 = (*(p2dl_buffPtr + 9));
265
266 local_acc0 += ((int64_t)(coeffVal0)*data0);
267 local_acc1 += ((int64_t)(coeffVal0)*data1);
268 local_acc0 += ((int64_t)(coeffVal1)*data2);
269 local_acc1 += ((int64_t)(coeffVal1)*data3);
270
271 coeffVal0 = (*(coeffPtr + 10));
272 coeffVal1 = (*(coeffPtr + 11));
273 data0 = (*(p1dl_buffPtr - 10));
274 data1 = (*(p2dl_buffPtr + 10));
275 data2 = (*(p1dl_buffPtr - 11));
276 data3 = (*(p2dl_buffPtr + 11));
277
278 local_acc0 += ((int64_t)(coeffVal0)*data0);
279 local_acc1 += ((int64_t)(coeffVal0)*data1);
280 local_acc0 += ((int64_t)(coeffVal1)*data2);
281 local_acc1 += ((int64_t)(coeffVal1)*data3);
282
283 coeffVal0 = (*(coeffPtr + 12));
284 coeffVal1 = (*(coeffPtr + 13));
285 data0 = (*(p1dl_buffPtr - 12));
286 data1 = (*(p2dl_buffPtr + 12));
287 data2 = (*(p1dl_buffPtr - 13));
288 data3 = (*(p2dl_buffPtr + 13));
289
290 local_acc0 += ((int64_t)(coeffVal0)*data0);
291 local_acc1 += ((int64_t)(coeffVal0)*data1);
292 local_acc0 += ((int64_t)(coeffVal1)*data2);
293 local_acc1 += ((int64_t)(coeffVal1)*data3);
294
295 coeffVal0 = (*(coeffPtr + 14));
296 coeffVal1 = (*(coeffPtr + 15));
297 data0 = (*(p1dl_buffPtr - 14));
298 data1 = (*(p2dl_buffPtr + 14));
299 data2 = (*(p1dl_buffPtr - 15));
300 data3 = (*(p2dl_buffPtr + 15));
301
302 local_acc0 += ((int64_t)(coeffVal0)*data0);
303 local_acc1 += ((int64_t)(coeffVal0)*data1);
304 local_acc0 += ((int64_t)(coeffVal1)*data2);
305 local_acc1 += ((int64_t)(coeffVal1)*data3);
306
307 tmp_round0 = (int32_t)local_acc0;
308
309 local_acc0 += 0x00400000L;
310 acc = (int32_t)(local_acc0 >> 23);
311
312 if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
313 acc--;
314 }
315
316 if (acc > 8388607) {
317 acc = 8388607;
318 }
319 if (acc < -8388608) {
320 acc = -8388608;
321 }
322
323 phaseConv[0] = acc;
324 tmp_round0 = (int32_t)local_acc1;
325
326 local_acc1 += 0x00400000L;
327 acc = (int32_t)(local_acc1 >> 23);
328 if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
329 acc--;
330 }
331
332 if (acc > 8388607) {
333 acc = 8388607;
334 }
335 if (acc < -8388608) {
336 acc = -8388608;
337 }
338
339 phaseConv[1] = acc;
340
341 convSum = phaseConv[1] + phaseConv[0];
342 if (convSum > 8388607) {
343 convSum = 8388607;
344 }
345 if (convSum < -8388608) {
346 convSum = -8388608;
347 }
348
349 *(filterOutputs) = convSum;
350
351 convDiff = phaseConv[1] - phaseConv[0];
352 if (convDiff > 8388607) {
353 convDiff = 8388607;
354 }
355 if (convDiff < -8388608) {
356 convDiff = -8388608;
357 }
358
359 *(filterOutputs + 1) = convDiff;
360 }
361