1 /**
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /*------------------------------------------------------------------------------
17  *
18  *  This file includes convolution functions required for the Qmf.
19  *
20  *----------------------------------------------------------------------------*/
21 
22 #include "Qmf.h"
23 
AsmQmfConvO(const int16_t * p1dl_buffPtr,const int16_t * p2dl_buffPtr,const int32_t * coeffPtr,int32_t * convSumDiff)24 void AsmQmfConvO(const int16_t* p1dl_buffPtr, const int16_t* p2dl_buffPtr,
25                  const int32_t* coeffPtr, int32_t* convSumDiff) {
26   /* Since all manipulated data are "int16_t" it is possible to
27    * reduce the number of loads by using int32_t type and manipulating
28    * pairs of data
29    */
30   int32_t acc;
31   // Manual inlining as IAR compiler does not seem to do it itself...
32   // WARNING: This inlining assumes that m_qmfDelayLineLength == 16
33   int32_t tmp_round0;
34   int64_t local_acc0;
35   int64_t local_acc1;
36   int32_t coeffVal0;
37   int32_t coeffVal1;
38   int16_t data0;
39   int16_t data1;
40   int16_t data2;
41   int16_t data3;
42   int32_t phaseConv[2];
43   int32_t convSum;
44   int32_t convDiff;
45 
46   coeffVal0 = (*(coeffPtr));
47   coeffVal1 = (*(coeffPtr + 1));
48   data0 = (*(p1dl_buffPtr));
49   data1 = (*(p2dl_buffPtr));
50   data2 = (*(p1dl_buffPtr - 1));
51   data3 = (*(p2dl_buffPtr + 1));
52 
53   local_acc0 = ((int64_t)(coeffVal0) * (int64_t)data0);
54   local_acc1 = ((int64_t)(coeffVal0) * (int64_t)data1);
55   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
56   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
57 
58   coeffVal0 = (*(coeffPtr + 2));
59   coeffVal1 = (*(coeffPtr + 3));
60   data0 = (*(p1dl_buffPtr - 2));
61   data1 = (*(p2dl_buffPtr + 2));
62   data2 = (*(p1dl_buffPtr - 3));
63   data3 = (*(p2dl_buffPtr + 3));
64 
65   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
66   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
67   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
68   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
69 
70   coeffVal0 = (*(coeffPtr + 4));
71   coeffVal1 = (*(coeffPtr + 5));
72   data0 = (*(p1dl_buffPtr - 4));
73   data1 = (*(p2dl_buffPtr + 4));
74   data2 = (*(p1dl_buffPtr - 5));
75   data3 = (*(p2dl_buffPtr + 5));
76 
77   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
78   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
79   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
80   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
81 
82   coeffVal0 = (*(coeffPtr + 6));
83   coeffVal1 = (*(coeffPtr + 7));
84   data0 = (*(p1dl_buffPtr - 6));
85   data1 = (*(p2dl_buffPtr + 6));
86   data2 = (*(p1dl_buffPtr - 7));
87   data3 = (*(p2dl_buffPtr + 7));
88 
89   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
90   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
91   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
92   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
93 
94   coeffVal0 = (*(coeffPtr + 8));
95   coeffVal1 = (*(coeffPtr + 9));
96   data0 = (*(p1dl_buffPtr - 8));
97   data1 = (*(p2dl_buffPtr + 8));
98   data2 = (*(p1dl_buffPtr - 9));
99   data3 = (*(p2dl_buffPtr + 9));
100 
101   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
102   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
103   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
104   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
105 
106   coeffVal0 = (*(coeffPtr + 10));
107   coeffVal1 = (*(coeffPtr + 11));
108   data0 = (*(p1dl_buffPtr - 10));
109   data1 = (*(p2dl_buffPtr + 10));
110   data2 = (*(p1dl_buffPtr - 11));
111   data3 = (*(p2dl_buffPtr + 11));
112 
113   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
114   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
115   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
116   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
117 
118   coeffVal0 = (*(coeffPtr + 12));
119   coeffVal1 = (*(coeffPtr + 13));
120   data0 = (*(p1dl_buffPtr - 12));
121   data1 = (*(p2dl_buffPtr + 12));
122   data2 = (*(p1dl_buffPtr - 13));
123   data3 = (*(p2dl_buffPtr + 13));
124 
125   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
126   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
127   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
128   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
129 
130   coeffVal0 = (*(coeffPtr + 14));
131   coeffVal1 = (*(coeffPtr + 15));
132   data0 = (*(p1dl_buffPtr - 14));
133   data1 = (*(p2dl_buffPtr + 14));
134   data2 = (*(p1dl_buffPtr - 15));
135   data3 = (*(p2dl_buffPtr + 15));
136 
137   local_acc0 += ((int64_t)(coeffVal0) * (int64_t)data0);
138   local_acc1 += ((int64_t)(coeffVal0) * (int64_t)data1);
139   local_acc0 += ((int64_t)(coeffVal1) * (int64_t)data2);
140   local_acc1 += ((int64_t)(coeffVal1) * (int64_t)data3);
141 
142   tmp_round0 = (int32_t)local_acc0 & 0x00FFFFL;
143 
144   local_acc0 += 0x004000L;
145   acc = (int32_t)(local_acc0 >> 15);
146   if (tmp_round0 == 0x004000L) {
147     acc--;
148   }
149   if (acc > 8388607) {
150     acc = 8388607;
151   }
152   if (acc < -8388608) {
153     acc = -8388608;
154   }
155 
156   phaseConv[0] = acc;
157 
158   tmp_round0 = (int32_t)local_acc1 & 0x00FFFFL;
159 
160   local_acc1 += 0x004000L;
161   acc = (int32_t)(local_acc1 >> 15);
162   if (tmp_round0 == 0x004000L) {
163     acc--;
164   }
165   if (acc > 8388607) {
166     acc = 8388607;
167   }
168   if (acc < -8388608) {
169     acc = -8388608;
170   }
171 
172   phaseConv[1] = acc;
173 
174   convSum = phaseConv[1] + phaseConv[0];
175   if (convSum > 8388607) {
176     convSum = 8388607;
177   }
178   if (convSum < -8388608) {
179     convSum = -8388608;
180   }
181 
182   convDiff = phaseConv[1] - phaseConv[0];
183   if (convDiff > 8388607) {
184     convDiff = 8388607;
185   }
186   if (convDiff < -8388608) {
187     convDiff = -8388608;
188   }
189 
190   *(convSumDiff) = convSum;
191   *(convSumDiff + 2) = convDiff;
192 }
193 
AsmQmfConvI(const int32_t * p1dl_buffPtr,const int32_t * p2dl_buffPtr,const int32_t * coeffPtr,int32_t * filterOutputs)194 void AsmQmfConvI(const int32_t* p1dl_buffPtr, const int32_t* p2dl_buffPtr,
195                  const int32_t* coeffPtr, int32_t* filterOutputs) {
196   int32_t acc;
197   // WARNING: This inlining assumes that m_qmfDelayLineLength == 16
198   int32_t tmp_round0;
199   int64_t local_acc0;
200   int64_t local_acc1;
201   int32_t coeffVal0;
202   int32_t coeffVal1;
203   int32_t data0;
204   int32_t data1;
205   int32_t data2;
206   int32_t data3;
207   int32_t phaseConv[2];
208   int32_t convSum;
209   int32_t convDiff;
210 
211   coeffVal0 = (*(coeffPtr));
212   coeffVal1 = (*(coeffPtr + 1));
213   data0 = (*(p1dl_buffPtr));
214   data1 = (*(p2dl_buffPtr));
215   data2 = (*(p1dl_buffPtr - 1));
216   data3 = (*(p2dl_buffPtr + 1));
217 
218   local_acc0 = ((int64_t)(coeffVal0)*data0);
219   local_acc1 = ((int64_t)(coeffVal0)*data1);
220   local_acc0 += ((int64_t)(coeffVal1)*data2);
221   local_acc1 += ((int64_t)(coeffVal1)*data3);
222 
223   coeffVal0 = (*(coeffPtr + 2));
224   coeffVal1 = (*(coeffPtr + 3));
225   data0 = (*(p1dl_buffPtr - 2));
226   data1 = (*(p2dl_buffPtr + 2));
227   data2 = (*(p1dl_buffPtr - 3));
228   data3 = (*(p2dl_buffPtr + 3));
229 
230   local_acc0 += ((int64_t)(coeffVal0)*data0);
231   local_acc1 += ((int64_t)(coeffVal0)*data1);
232   local_acc0 += ((int64_t)(coeffVal1)*data2);
233   local_acc1 += ((int64_t)(coeffVal1)*data3);
234 
235   coeffVal0 = (*(coeffPtr + 4));
236   coeffVal1 = (*(coeffPtr + 5));
237   data0 = (*(p1dl_buffPtr - 4));
238   data1 = (*(p2dl_buffPtr + 4));
239   data2 = (*(p1dl_buffPtr - 5));
240   data3 = (*(p2dl_buffPtr + 5));
241 
242   local_acc0 += ((int64_t)(coeffVal0)*data0);
243   local_acc1 += ((int64_t)(coeffVal0)*data1);
244   local_acc0 += ((int64_t)(coeffVal1)*data2);
245   local_acc1 += ((int64_t)(coeffVal1)*data3);
246 
247   coeffVal0 = (*(coeffPtr + 6));
248   coeffVal1 = (*(coeffPtr + 7));
249   data0 = (*(p1dl_buffPtr - 6));
250   data1 = (*(p2dl_buffPtr + 6));
251   data2 = (*(p1dl_buffPtr - 7));
252   data3 = (*(p2dl_buffPtr + 7));
253 
254   local_acc0 += ((int64_t)(coeffVal0)*data0);
255   local_acc1 += ((int64_t)(coeffVal0)*data1);
256   local_acc0 += ((int64_t)(coeffVal1)*data2);
257   local_acc1 += ((int64_t)(coeffVal1)*data3);
258 
259   coeffVal0 = (*(coeffPtr + 8));
260   coeffVal1 = (*(coeffPtr + 9));
261   data0 = (*(p1dl_buffPtr - 8));
262   data1 = (*(p2dl_buffPtr + 8));
263   data2 = (*(p1dl_buffPtr - 9));
264   data3 = (*(p2dl_buffPtr + 9));
265 
266   local_acc0 += ((int64_t)(coeffVal0)*data0);
267   local_acc1 += ((int64_t)(coeffVal0)*data1);
268   local_acc0 += ((int64_t)(coeffVal1)*data2);
269   local_acc1 += ((int64_t)(coeffVal1)*data3);
270 
271   coeffVal0 = (*(coeffPtr + 10));
272   coeffVal1 = (*(coeffPtr + 11));
273   data0 = (*(p1dl_buffPtr - 10));
274   data1 = (*(p2dl_buffPtr + 10));
275   data2 = (*(p1dl_buffPtr - 11));
276   data3 = (*(p2dl_buffPtr + 11));
277 
278   local_acc0 += ((int64_t)(coeffVal0)*data0);
279   local_acc1 += ((int64_t)(coeffVal0)*data1);
280   local_acc0 += ((int64_t)(coeffVal1)*data2);
281   local_acc1 += ((int64_t)(coeffVal1)*data3);
282 
283   coeffVal0 = (*(coeffPtr + 12));
284   coeffVal1 = (*(coeffPtr + 13));
285   data0 = (*(p1dl_buffPtr - 12));
286   data1 = (*(p2dl_buffPtr + 12));
287   data2 = (*(p1dl_buffPtr - 13));
288   data3 = (*(p2dl_buffPtr + 13));
289 
290   local_acc0 += ((int64_t)(coeffVal0)*data0);
291   local_acc1 += ((int64_t)(coeffVal0)*data1);
292   local_acc0 += ((int64_t)(coeffVal1)*data2);
293   local_acc1 += ((int64_t)(coeffVal1)*data3);
294 
295   coeffVal0 = (*(coeffPtr + 14));
296   coeffVal1 = (*(coeffPtr + 15));
297   data0 = (*(p1dl_buffPtr - 14));
298   data1 = (*(p2dl_buffPtr + 14));
299   data2 = (*(p1dl_buffPtr - 15));
300   data3 = (*(p2dl_buffPtr + 15));
301 
302   local_acc0 += ((int64_t)(coeffVal0)*data0);
303   local_acc1 += ((int64_t)(coeffVal0)*data1);
304   local_acc0 += ((int64_t)(coeffVal1)*data2);
305   local_acc1 += ((int64_t)(coeffVal1)*data3);
306 
307   tmp_round0 = (int32_t)local_acc0;
308 
309   local_acc0 += 0x00400000L;
310   acc = (int32_t)(local_acc0 >> 23);
311 
312   if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
313     acc--;
314   }
315 
316   if (acc > 8388607) {
317     acc = 8388607;
318   }
319   if (acc < -8388608) {
320     acc = -8388608;
321   }
322 
323   phaseConv[0] = acc;
324   tmp_round0 = (int32_t)local_acc1;
325 
326   local_acc1 += 0x00400000L;
327   acc = (int32_t)(local_acc1 >> 23);
328   if ((((tmp_round0 << 8) ^ 0x40000000) == 0)) {
329     acc--;
330   }
331 
332   if (acc > 8388607) {
333     acc = 8388607;
334   }
335   if (acc < -8388608) {
336     acc = -8388608;
337   }
338 
339   phaseConv[1] = acc;
340 
341   convSum = phaseConv[1] + phaseConv[0];
342   if (convSum > 8388607) {
343     convSum = 8388607;
344   }
345   if (convSum < -8388608) {
346     convSum = -8388608;
347   }
348 
349   *(filterOutputs) = convSum;
350 
351   convDiff = phaseConv[1] - phaseConv[0];
352   if (convDiff > 8388607) {
353     convDiff = 8388607;
354   }
355   if (convDiff < -8388608) {
356     convDiff = -8388608;
357   }
358 
359   *(filterOutputs + 1) = convDiff;
360 }
361