1 /**
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 /*------------------------------------------------------------------------------
17 *
18 * This file includes convolution functions required for the Qmf.
19 *
20 *----------------------------------------------------------------------------*/
21
22 #include "Qmf.h"
23
AsmQmfConvO_HD(const int32_t * p1dl_buffPtr,const int32_t * p2dl_buffPtr,const int32_t * coeffPtr,int32_t * convSumDiff)24 void AsmQmfConvO_HD(const int32_t* p1dl_buffPtr, const int32_t* p2dl_buffPtr,
25 const int32_t* coeffPtr, int32_t* convSumDiff) {
26 /* Since all manipulated data are "int16_t" it is possible to
27 * reduce the number of loads by using int32_t type and manipulating
28 * pairs of data
29 */
30
31 int32_t acc;
32 // Manual inlining as IAR compiler does not seem to do it itself...
33 // WARNING: This inlining assumes that m_qmfDelayLineLength == 16
34 int32_t tmp_round0;
35 int64_t local_acc0;
36 int64_t local_acc1;
37
38 int32_t coeffVal0;
39 int32_t coeffVal1;
40 int32_t data0;
41 int32_t data1;
42 int32_t data2;
43 int32_t data3;
44 int32_t phaseConv[2];
45 int32_t convSum;
46 int32_t convDiff;
47
48 coeffVal0 = (*(coeffPtr));
49 coeffVal1 = (*(coeffPtr + 1));
50 data0 = (*(p1dl_buffPtr));
51 data1 = (*(p2dl_buffPtr));
52 data2 = (*(p1dl_buffPtr - 1));
53 data3 = (*(p2dl_buffPtr + 1));
54
55 local_acc0 = ((int64_t)(coeffVal0) * (int64_t)data0);
56 local_acc1 = ((int64_t)(coeffVal0) * (int64_t)data1);
57 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
58 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
59
60 coeffVal0 = (*(coeffPtr + 2));
61 coeffVal1 = (*(coeffPtr + 3));
62 data0 = (*(p1dl_buffPtr - 2));
63 data1 = (*(p2dl_buffPtr + 2));
64 data2 = (*(p1dl_buffPtr - 3));
65 data3 = (*(p2dl_buffPtr + 3));
66
67 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
68 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
69 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
70 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
71
72 coeffVal0 = (*(coeffPtr + 4));
73 coeffVal1 = (*(coeffPtr + 5));
74 data0 = (*(p1dl_buffPtr - 4));
75 data1 = (*(p2dl_buffPtr + 4));
76 data2 = (*(p1dl_buffPtr - 5));
77 data3 = (*(p2dl_buffPtr + 5));
78
79 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
80 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
81 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
82 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
83
84 coeffVal0 = (*(coeffPtr + 6));
85 coeffVal1 = (*(coeffPtr + 7));
86 data0 = (*(p1dl_buffPtr - 6));
87 data1 = (*(p2dl_buffPtr + 6));
88 data2 = (*(p1dl_buffPtr - 7));
89 data3 = (*(p2dl_buffPtr + 7));
90
91 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
92 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
93 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
94 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
95
96 coeffVal0 = (*(coeffPtr + 8));
97 coeffVal1 = (*(coeffPtr + 9));
98 data0 = (*(p1dl_buffPtr - 8));
99 data1 = (*(p2dl_buffPtr + 8));
100 data2 = (*(p1dl_buffPtr - 9));
101 data3 = (*(p2dl_buffPtr + 9));
102
103 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
104 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
105 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
106 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
107
108 coeffVal0 = (*(coeffPtr + 10));
109 coeffVal1 = (*(coeffPtr + 11));
110 data0 = (*(p1dl_buffPtr - 10));
111 data1 = (*(p2dl_buffPtr + 10));
112 data2 = (*(p1dl_buffPtr - 11));
113 data3 = (*(p2dl_buffPtr + 11));
114
115 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
116 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
117 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
118 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
119
120 coeffVal0 = (*(coeffPtr + 12));
121 coeffVal1 = (*(coeffPtr + 13));
122 data0 = (*(p1dl_buffPtr - 12));
123 data1 = (*(p2dl_buffPtr + 12));
124 data2 = (*(p1dl_buffPtr - 13));
125 data3 = (*(p2dl_buffPtr + 13));
126
127 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
128 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
129 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
130 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
131
132 coeffVal0 = (*(coeffPtr + 14));
133 coeffVal1 = (*(coeffPtr + 15));
134 data0 = (*(p1dl_buffPtr - 14));
135 data1 = (*(p2dl_buffPtr + 14));
136 data2 = (*(p1dl_buffPtr - 15));
137 data3 = (*(p2dl_buffPtr + 15));
138
139 local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
140 local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
141 local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
142 local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
143
144 tmp_round0 = (int32_t)local_acc0;
145
146 local_acc0 += 0x00400000L;
147 acc = (int32_t)(local_acc0 >> 23);
148
149 if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
150 acc--;
151 }
152
153 if (acc > 8388607) {
154 acc = 8388607;
155 }
156 if (acc < -8388608) {
157 acc = -8388608;
158 }
159
160 phaseConv[0] = acc;
161
162 tmp_round0 = (int32_t)local_acc1;
163
164 local_acc1 += 0x00400000L;
165 acc = (int32_t)(local_acc1 >> 23);
166 if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
167 acc--;
168 }
169
170 if (acc > 8388607) {
171 acc = 8388607;
172 }
173 if (acc < -8388608) {
174 acc = -8388608;
175 }
176
177 phaseConv[1] = acc;
178
179 convSum = phaseConv[1] + phaseConv[0];
180 if (convSum > 8388607) {
181 convSum = 8388607;
182 }
183 if (convSum < -8388608) {
184 convSum = -8388608;
185 }
186
187 convDiff = phaseConv[1] - phaseConv[0];
188 if (convDiff > 8388607) {
189 convDiff = 8388607;
190 }
191 if (convDiff < -8388608) {
192 convDiff = -8388608;
193 }
194
195 *(convSumDiff) = convSum;
196 *(convSumDiff + 2) = convDiff;
197 }
198
AsmQmfConvI_HD(const int32_t * p1dl_buffPtr,const int32_t * p2dl_buffPtr,const int32_t * coeffPtr,int32_t * filterOutputs)199 void AsmQmfConvI_HD(const int32_t* p1dl_buffPtr, const int32_t* p2dl_buffPtr,
200 const int32_t* coeffPtr, int32_t* filterOutputs) {
201 int32_t acc;
202 // WARNING: This inlining assumes that m_qmfDelayLineLength == 16
203 int32_t tmp_round0;
204 int64_t local_acc0;
205 int64_t local_acc1;
206
207 int32_t coeffVal0;
208 int32_t coeffVal1;
209 int32_t data0;
210 int32_t data1;
211 int32_t data2;
212 int32_t data3;
213 int32_t phaseConv[2];
214 int32_t convSum;
215 int32_t convDiff;
216
217 coeffVal0 = (*(coeffPtr));
218 coeffVal1 = (*(coeffPtr + 1));
219 data0 = (*(p1dl_buffPtr));
220 data1 = (*(p2dl_buffPtr));
221 data2 = (*(p1dl_buffPtr - 1));
222 data3 = (*(p2dl_buffPtr + 1));
223
224 local_acc0 = ((int64_t)(coeffVal0)*data0);
225 local_acc1 = ((int64_t)(coeffVal0)*data1);
226 local_acc0 += ((int64_t)(coeffVal1)*data2);
227 local_acc1 += ((int64_t)(coeffVal1)*data3);
228
229 coeffVal0 = (*(coeffPtr + 2));
230 coeffVal1 = (*(coeffPtr + 3));
231 data0 = (*(p1dl_buffPtr - 2));
232 data1 = (*(p2dl_buffPtr + 2));
233 data2 = (*(p1dl_buffPtr - 3));
234 data3 = (*(p2dl_buffPtr + 3));
235
236 local_acc0 += ((int64_t)(coeffVal0)*data0);
237 local_acc1 += ((int64_t)(coeffVal0)*data1);
238 local_acc0 += ((int64_t)(coeffVal1)*data2);
239 local_acc1 += ((int64_t)(coeffVal1)*data3);
240
241 coeffVal0 = (*(coeffPtr + 4));
242 coeffVal1 = (*(coeffPtr + 5));
243 data0 = (*(p1dl_buffPtr - 4));
244 data1 = (*(p2dl_buffPtr + 4));
245 data2 = (*(p1dl_buffPtr - 5));
246 data3 = (*(p2dl_buffPtr + 5));
247
248 local_acc0 += ((int64_t)(coeffVal0)*data0);
249 local_acc1 += ((int64_t)(coeffVal0)*data1);
250 local_acc0 += ((int64_t)(coeffVal1)*data2);
251 local_acc1 += ((int64_t)(coeffVal1)*data3);
252
253 coeffVal0 = (*(coeffPtr + 6));
254 coeffVal1 = (*(coeffPtr + 7));
255 data0 = (*(p1dl_buffPtr - 6));
256 data1 = (*(p2dl_buffPtr + 6));
257 data2 = (*(p1dl_buffPtr - 7));
258 data3 = (*(p2dl_buffPtr + 7));
259
260 local_acc0 += ((int64_t)(coeffVal0)*data0);
261 local_acc1 += ((int64_t)(coeffVal0)*data1);
262 local_acc0 += ((int64_t)(coeffVal1)*data2);
263 local_acc1 += ((int64_t)(coeffVal1)*data3);
264
265 coeffVal0 = (*(coeffPtr + 8));
266 coeffVal1 = (*(coeffPtr + 9));
267 data0 = (*(p1dl_buffPtr - 8));
268 data1 = (*(p2dl_buffPtr + 8));
269 data2 = (*(p1dl_buffPtr - 9));
270 data3 = (*(p2dl_buffPtr + 9));
271
272 local_acc0 += ((int64_t)(coeffVal0)*data0);
273 local_acc1 += ((int64_t)(coeffVal0)*data1);
274 local_acc0 += ((int64_t)(coeffVal1)*data2);
275 local_acc1 += ((int64_t)(coeffVal1)*data3);
276
277 coeffVal0 = (*(coeffPtr + 10));
278 coeffVal1 = (*(coeffPtr + 11));
279 data0 = (*(p1dl_buffPtr - 10));
280 data1 = (*(p2dl_buffPtr + 10));
281 data2 = (*(p1dl_buffPtr - 11));
282 data3 = (*(p2dl_buffPtr + 11));
283
284 local_acc0 += ((int64_t)(coeffVal0)*data0);
285 local_acc1 += ((int64_t)(coeffVal0)*data1);
286 local_acc0 += ((int64_t)(coeffVal1)*data2);
287 local_acc1 += ((int64_t)(coeffVal1)*data3);
288
289 coeffVal0 = (*(coeffPtr + 12));
290 coeffVal1 = (*(coeffPtr + 13));
291 data0 = (*(p1dl_buffPtr - 12));
292 data1 = (*(p2dl_buffPtr + 12));
293 data2 = (*(p1dl_buffPtr - 13));
294 data3 = (*(p2dl_buffPtr + 13));
295
296 local_acc0 += ((int64_t)(coeffVal0)*data0);
297 local_acc1 += ((int64_t)(coeffVal0)*data1);
298 local_acc0 += ((int64_t)(coeffVal1)*data2);
299 local_acc1 += ((int64_t)(coeffVal1)*data3);
300
301 coeffVal0 = (*(coeffPtr + 14));
302 coeffVal1 = (*(coeffPtr + 15));
303 data0 = (*(p1dl_buffPtr - 14));
304 data1 = (*(p2dl_buffPtr + 14));
305 data2 = (*(p1dl_buffPtr - 15));
306 data3 = (*(p2dl_buffPtr + 15));
307
308 local_acc0 += ((int64_t)(coeffVal0)*data0);
309 local_acc1 += ((int64_t)(coeffVal0)*data1);
310 local_acc0 += ((int64_t)(coeffVal1)*data2);
311 local_acc1 += ((int64_t)(coeffVal1)*data3);
312
313 tmp_round0 = (int32_t)local_acc0;
314
315 local_acc0 += 0x00400000L;
316 acc = (int32_t)(local_acc0 >> 23);
317
318 if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
319 acc--;
320 }
321
322 if (acc > 8388607) {
323 acc = 8388607;
324 }
325 if (acc < -8388608) {
326 acc = -8388608;
327 }
328
329 phaseConv[0] = acc;
330
331 tmp_round0 = (int32_t)local_acc1;
332
333 local_acc1 += 0x00400000L;
334 acc = (int32_t)(local_acc1 >> 23);
335 if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
336 acc--;
337 }
338
339 if (acc > 8388607) {
340 acc = 8388607;
341 }
342 if (acc < -8388608) {
343 acc = -8388608;
344 }
345
346 phaseConv[1] = acc;
347
348 convSum = phaseConv[1] + phaseConv[0];
349 if (convSum > 8388607) {
350 convSum = 8388607;
351 }
352 if (convSum < -8388608) {
353 convSum = -8388608;
354 }
355
356 *(filterOutputs) = convSum;
357
358 convDiff = phaseConv[1] - phaseConv[0];
359 if (convDiff > 8388607) {
360 convDiff = 8388607;
361 }
362 if (convDiff < -8388608) {
363 convDiff = -8388608;
364 }
365
366 *(filterOutputs + 1) = convDiff;
367 }
368