1 /**
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /*------------------------------------------------------------------------------
17  *
18  *  This file includes convolution functions required for the Qmf.
19  *
20  *----------------------------------------------------------------------------*/
21 
22 #include "Qmf.h"
23 
AsmQmfConvO_HD(const int32_t * p1dl_buffPtr,const int32_t * p2dl_buffPtr,const int32_t * coeffPtr,int32_t * convSumDiff)24 void AsmQmfConvO_HD(const int32_t* p1dl_buffPtr, const int32_t* p2dl_buffPtr,
25                     const int32_t* coeffPtr, int32_t* convSumDiff) {
26   /* Since all manipulated data are "int16_t" it is possible to
27    * reduce the number of loads by using int32_t type and manipulating
28    * pairs of data
29    */
30 
31   int32_t acc;
32   // Manual inlining as IAR compiler does not seem to do it itself...
33   // WARNING: This inlining assumes that m_qmfDelayLineLength == 16
34   int32_t tmp_round0;
35   int64_t local_acc0;
36   int64_t local_acc1;
37 
38   int32_t coeffVal0;
39   int32_t coeffVal1;
40   int32_t data0;
41   int32_t data1;
42   int32_t data2;
43   int32_t data3;
44   int32_t phaseConv[2];
45   int32_t convSum;
46   int32_t convDiff;
47 
48   coeffVal0 = (*(coeffPtr));
49   coeffVal1 = (*(coeffPtr + 1));
50   data0 = (*(p1dl_buffPtr));
51   data1 = (*(p2dl_buffPtr));
52   data2 = (*(p1dl_buffPtr - 1));
53   data3 = (*(p2dl_buffPtr + 1));
54 
55   local_acc0 = ((int64_t)(coeffVal0) * (int64_t)data0);
56   local_acc1 = ((int64_t)(coeffVal0) * (int64_t)data1);
57   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
58   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
59 
60   coeffVal0 = (*(coeffPtr + 2));
61   coeffVal1 = (*(coeffPtr + 3));
62   data0 = (*(p1dl_buffPtr - 2));
63   data1 = (*(p2dl_buffPtr + 2));
64   data2 = (*(p1dl_buffPtr - 3));
65   data3 = (*(p2dl_buffPtr + 3));
66 
67   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
68   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
69   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
70   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
71 
72   coeffVal0 = (*(coeffPtr + 4));
73   coeffVal1 = (*(coeffPtr + 5));
74   data0 = (*(p1dl_buffPtr - 4));
75   data1 = (*(p2dl_buffPtr + 4));
76   data2 = (*(p1dl_buffPtr - 5));
77   data3 = (*(p2dl_buffPtr + 5));
78 
79   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
80   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
81   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
82   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
83 
84   coeffVal0 = (*(coeffPtr + 6));
85   coeffVal1 = (*(coeffPtr + 7));
86   data0 = (*(p1dl_buffPtr - 6));
87   data1 = (*(p2dl_buffPtr + 6));
88   data2 = (*(p1dl_buffPtr - 7));
89   data3 = (*(p2dl_buffPtr + 7));
90 
91   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
92   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
93   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
94   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
95 
96   coeffVal0 = (*(coeffPtr + 8));
97   coeffVal1 = (*(coeffPtr + 9));
98   data0 = (*(p1dl_buffPtr - 8));
99   data1 = (*(p2dl_buffPtr + 8));
100   data2 = (*(p1dl_buffPtr - 9));
101   data3 = (*(p2dl_buffPtr + 9));
102 
103   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
104   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
105   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
106   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
107 
108   coeffVal0 = (*(coeffPtr + 10));
109   coeffVal1 = (*(coeffPtr + 11));
110   data0 = (*(p1dl_buffPtr - 10));
111   data1 = (*(p2dl_buffPtr + 10));
112   data2 = (*(p1dl_buffPtr - 11));
113   data3 = (*(p2dl_buffPtr + 11));
114 
115   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
116   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
117   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
118   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
119 
120   coeffVal0 = (*(coeffPtr + 12));
121   coeffVal1 = (*(coeffPtr + 13));
122   data0 = (*(p1dl_buffPtr - 12));
123   data1 = (*(p2dl_buffPtr + 12));
124   data2 = (*(p1dl_buffPtr - 13));
125   data3 = (*(p2dl_buffPtr + 13));
126 
127   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
128   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
129   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
130   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
131 
132   coeffVal0 = (*(coeffPtr + 14));
133   coeffVal1 = (*(coeffPtr + 15));
134   data0 = (*(p1dl_buffPtr - 14));
135   data1 = (*(p2dl_buffPtr + 14));
136   data2 = (*(p1dl_buffPtr - 15));
137   data3 = (*(p2dl_buffPtr + 15));
138 
139   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
140   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
141   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
142   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
143 
144   tmp_round0 = (int32_t)local_acc0;
145 
146   local_acc0 += 0x00400000L;
147   acc = (int32_t)(local_acc0 >> 23);
148 
149   if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
150     acc--;
151   }
152 
153   if (acc > 8388607) {
154     acc = 8388607;
155   }
156   if (acc < -8388608) {
157     acc = -8388608;
158   }
159 
160   phaseConv[0] = acc;
161 
162   tmp_round0 = (int32_t)local_acc1;
163 
164   local_acc1 += 0x00400000L;
165   acc = (int32_t)(local_acc1 >> 23);
166   if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
167     acc--;
168   }
169 
170   if (acc > 8388607) {
171     acc = 8388607;
172   }
173   if (acc < -8388608) {
174     acc = -8388608;
175   }
176 
177   phaseConv[1] = acc;
178 
179   convSum = phaseConv[1] + phaseConv[0];
180   if (convSum > 8388607) {
181     convSum = 8388607;
182   }
183   if (convSum < -8388608) {
184     convSum = -8388608;
185   }
186 
187   convDiff = phaseConv[1] - phaseConv[0];
188   if (convDiff > 8388607) {
189     convDiff = 8388607;
190   }
191   if (convDiff < -8388608) {
192     convDiff = -8388608;
193   }
194 
195   *(convSumDiff) = convSum;
196   *(convSumDiff + 2) = convDiff;
197 }
198 
AsmQmfConvI_HD(const int32_t * p1dl_buffPtr,const int32_t * p2dl_buffPtr,const int32_t * coeffPtr,int32_t * filterOutputs)199 void AsmQmfConvI_HD(const int32_t* p1dl_buffPtr, const int32_t* p2dl_buffPtr,
200                     const int32_t* coeffPtr, int32_t* filterOutputs) {
201   int32_t acc;
202   // WARNING: This inlining assumes that m_qmfDelayLineLength == 16
203   int32_t tmp_round0;
204   int64_t local_acc0;
205   int64_t local_acc1;
206 
207   int32_t coeffVal0;
208   int32_t coeffVal1;
209   int32_t data0;
210   int32_t data1;
211   int32_t data2;
212   int32_t data3;
213   int32_t phaseConv[2];
214   int32_t convSum;
215   int32_t convDiff;
216 
217   coeffVal0 = (*(coeffPtr));
218   coeffVal1 = (*(coeffPtr + 1));
219   data0 = (*(p1dl_buffPtr));
220   data1 = (*(p2dl_buffPtr));
221   data2 = (*(p1dl_buffPtr - 1));
222   data3 = (*(p2dl_buffPtr + 1));
223 
224   local_acc0 = ((int64_t)(coeffVal0)*data0);
225   local_acc1 = ((int64_t)(coeffVal0)*data1);
226   local_acc0 += ((int64_t)(coeffVal1)*data2);
227   local_acc1 += ((int64_t)(coeffVal1)*data3);
228 
229   coeffVal0 = (*(coeffPtr + 2));
230   coeffVal1 = (*(coeffPtr + 3));
231   data0 = (*(p1dl_buffPtr - 2));
232   data1 = (*(p2dl_buffPtr + 2));
233   data2 = (*(p1dl_buffPtr - 3));
234   data3 = (*(p2dl_buffPtr + 3));
235 
236   local_acc0 += ((int64_t)(coeffVal0)*data0);
237   local_acc1 += ((int64_t)(coeffVal0)*data1);
238   local_acc0 += ((int64_t)(coeffVal1)*data2);
239   local_acc1 += ((int64_t)(coeffVal1)*data3);
240 
241   coeffVal0 = (*(coeffPtr + 4));
242   coeffVal1 = (*(coeffPtr + 5));
243   data0 = (*(p1dl_buffPtr - 4));
244   data1 = (*(p2dl_buffPtr + 4));
245   data2 = (*(p1dl_buffPtr - 5));
246   data3 = (*(p2dl_buffPtr + 5));
247 
248   local_acc0 += ((int64_t)(coeffVal0)*data0);
249   local_acc1 += ((int64_t)(coeffVal0)*data1);
250   local_acc0 += ((int64_t)(coeffVal1)*data2);
251   local_acc1 += ((int64_t)(coeffVal1)*data3);
252 
253   coeffVal0 = (*(coeffPtr + 6));
254   coeffVal1 = (*(coeffPtr + 7));
255   data0 = (*(p1dl_buffPtr - 6));
256   data1 = (*(p2dl_buffPtr + 6));
257   data2 = (*(p1dl_buffPtr - 7));
258   data3 = (*(p2dl_buffPtr + 7));
259 
260   local_acc0 += ((int64_t)(coeffVal0)*data0);
261   local_acc1 += ((int64_t)(coeffVal0)*data1);
262   local_acc0 += ((int64_t)(coeffVal1)*data2);
263   local_acc1 += ((int64_t)(coeffVal1)*data3);
264 
265   coeffVal0 = (*(coeffPtr + 8));
266   coeffVal1 = (*(coeffPtr + 9));
267   data0 = (*(p1dl_buffPtr - 8));
268   data1 = (*(p2dl_buffPtr + 8));
269   data2 = (*(p1dl_buffPtr - 9));
270   data3 = (*(p2dl_buffPtr + 9));
271 
272   local_acc0 += ((int64_t)(coeffVal0)*data0);
273   local_acc1 += ((int64_t)(coeffVal0)*data1);
274   local_acc0 += ((int64_t)(coeffVal1)*data2);
275   local_acc1 += ((int64_t)(coeffVal1)*data3);
276 
277   coeffVal0 = (*(coeffPtr + 10));
278   coeffVal1 = (*(coeffPtr + 11));
279   data0 = (*(p1dl_buffPtr - 10));
280   data1 = (*(p2dl_buffPtr + 10));
281   data2 = (*(p1dl_buffPtr - 11));
282   data3 = (*(p2dl_buffPtr + 11));
283 
284   local_acc0 += ((int64_t)(coeffVal0)*data0);
285   local_acc1 += ((int64_t)(coeffVal0)*data1);
286   local_acc0 += ((int64_t)(coeffVal1)*data2);
287   local_acc1 += ((int64_t)(coeffVal1)*data3);
288 
289   coeffVal0 = (*(coeffPtr + 12));
290   coeffVal1 = (*(coeffPtr + 13));
291   data0 = (*(p1dl_buffPtr - 12));
292   data1 = (*(p2dl_buffPtr + 12));
293   data2 = (*(p1dl_buffPtr - 13));
294   data3 = (*(p2dl_buffPtr + 13));
295 
296   local_acc0 += ((int64_t)(coeffVal0)*data0);
297   local_acc1 += ((int64_t)(coeffVal0)*data1);
298   local_acc0 += ((int64_t)(coeffVal1)*data2);
299   local_acc1 += ((int64_t)(coeffVal1)*data3);
300 
301   coeffVal0 = (*(coeffPtr + 14));
302   coeffVal1 = (*(coeffPtr + 15));
303   data0 = (*(p1dl_buffPtr - 14));
304   data1 = (*(p2dl_buffPtr + 14));
305   data2 = (*(p1dl_buffPtr - 15));
306   data3 = (*(p2dl_buffPtr + 15));
307 
308   local_acc0 += ((int64_t)(coeffVal0)*data0);
309   local_acc1 += ((int64_t)(coeffVal0)*data1);
310   local_acc0 += ((int64_t)(coeffVal1)*data2);
311   local_acc1 += ((int64_t)(coeffVal1)*data3);
312 
313   tmp_round0 = (int32_t)local_acc0;
314 
315   local_acc0 += 0x00400000L;
316   acc = (int32_t)(local_acc0 >> 23);
317 
318   if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
319     acc--;
320   }
321 
322   if (acc > 8388607) {
323     acc = 8388607;
324   }
325   if (acc < -8388608) {
326     acc = -8388608;
327   }
328 
329   phaseConv[0] = acc;
330 
331   tmp_round0 = (int32_t)local_acc1;
332 
333   local_acc1 += 0x00400000L;
334   acc = (int32_t)(local_acc1 >> 23);
335   if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
336     acc--;
337   }
338 
339   if (acc > 8388607) {
340     acc = 8388607;
341   }
342   if (acc < -8388608) {
343     acc = -8388608;
344   }
345 
346   phaseConv[1] = acc;
347 
348   convSum = phaseConv[1] + phaseConv[0];
349   if (convSum > 8388607) {
350     convSum = 8388607;
351   }
352   if (convSum < -8388608) {
353     convSum = -8388608;
354   }
355 
356   *(filterOutputs) = convSum;
357 
358   convDiff = phaseConv[1] - phaseConv[0];
359   if (convDiff > 8388607) {
360     convDiff = 8388607;
361   }
362   if (convDiff < -8388608) {
363     convDiff = -8388608;
364   }
365 
366   *(filterOutputs + 1) = convDiff;
367 }
368