1 /**
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /*------------------------------------------------------------------------------
17  *
18  *  Subband processing consists of:
19  *  inverse quantisation (defined in a separate file),
20  *  predictor coefficient update (Pole and Zero Coeff update),
21  *  predictor filtering.
22  *
23  *----------------------------------------------------------------------------*/
24 
25 #ifndef SUBBANDFUNCTIONSCOMMON_H
26 #define SUBBANDFUNCTIONSCOMMON_H
27 
28 enum reg64_reg { reg64_H = 1, reg64_L = 0 };
29 
30 void processSubband_HD(const int32_t qCode, const int32_t ditherVal,
31                        Subband_data* SubbandDataPt, IQuantiser_data* iqDataPt);
32 void processSubband_HDLL(const int32_t qCode, const int32_t ditherVal,
33                          Subband_data* SubbandDataPt,
34                          IQuantiser_data* iqDataPt);
35 void processSubband_HDHL(const int32_t qCode, const int32_t ditherVal,
36                          Subband_data* SubbandDataPt,
37                          IQuantiser_data* iqDataPt);
38 
39 /* Function to carry out inverse quantisation for a subband */
invertQuantisation(const int32_t qCode,const int32_t ditherVal,IQuantiser_data * iqdata_pt)40 XBT_INLINE_ void invertQuantisation(const int32_t qCode,
41                                     const int32_t ditherVal,
42                                     IQuantiser_data* iqdata_pt) {
43   int32_t invQ;
44   int32_t index;
45   int32_t acc;
46   reg64_t tmp_r64;
47   int64_t tmp_acc;
48   int32_t tmp_accL;
49   int32_t tmp_accH;
50   uint32_t tmp_round0;
51   uint32_t tmp_round1;
52   unsigned u16t;
53   /* log delta leak value (Q23) */
54   const uint32_t logDeltaLeakVal = 0x7F6CL;
55 
56   /* Turn the quantised code back into an index into the threshold table. This
57    * involves bitwise inversion of the code (if -ve) and adding 1 (phantom
58    * element at table base). Then set invQ to be +/- the threshold value,
59    * depending on the code sign. */
60   index = qCode;
61   if (qCode < 0) {
62     index = (~index);
63   }
64   index = index + 1;
65   invQ = iqdata_pt->thresholdTablePtr_sl1[index];
66   if (qCode < 0) {
67     invQ = -invQ;
68   }
69 
70   /* Load invQ into the accumulator. Add the product of the dither value times
71    * the indexed dither table value. Then get the result back from the
72    * accumulator as an updated invQ. */
73   tmp_r64.s64 = ((int64_t)ditherVal * iqdata_pt->ditherTablePtr_sf1[index]);
74   tmp_r64.s32.h += invQ >> 1;
75 
76   acc = tmp_r64.s32.h;
77 
78   tmp_round1 = tmp_r64.s32.h & 0x00000001L;
79   if (tmp_r64.u32.l >= 0x80000000) {
80     acc++;
81   }
82   if (tmp_round1 == 0 && tmp_r64.s32.l == (int32_t)0x80000000L) {
83     acc--;
84   }
85   acc = ssat24(acc);
86 
87   invQ = acc;
88 
89   /* Scale invQ by the current delta value. Left-shift the result (in the
90    * accumulator) by 4 positions for the delta scaling. Get the updated invQ
91    * back from the accumulator. */
92   u16t = iqdata_pt->logDelta;
93   tmp_acc = ((int64_t)invQ * iqdata_pt->delta);
94   tmp_accL = u16t * logDeltaLeakVal;
95   tmp_accH = iqdata_pt->incrTablePtr[index];
96   acc = (int32_t)(tmp_acc >> (23 - deltaScale));
97   invQ = ssat24(acc);
98 
99   /* Now update the value of logDelta. Load the accumulator with the index
100    * value of the logDelta increment table. Add the product of the current
101    * logDelta scaled by a leaky coefficient (16310 in Q14). Get the value back
102    * from the accumulator. */
103   tmp_accH += tmp_accL >> (32 - 17);
104 
105   acc = tmp_accH;
106 
107   tmp_r64.u32.l = ((uint32_t)tmp_accL << 17);
108   tmp_r64.s32.h = tmp_accH;
109 
110   tmp_round0 = tmp_r64.u32.l;
111   tmp_round1 = (int32_t)(tmp_r64.u64 >> 1);
112   if (tmp_round0 >= 0x80000000L) {
113     acc++;
114   }
115   if (tmp_round1 == 0x40000000L) {
116     acc--;
117   }
118 
119   /* Limit the updated logDelta between 0 and its subband-specific maximum. */
120   if (acc < 0) {
121     acc = 0;
122   }
123   if (acc > iqdata_pt->maxLogDelta) {
124     acc = iqdata_pt->maxLogDelta;
125   }
126 
127   iqdata_pt->logDelta = (uint16_t)acc;
128 
129   /* The updated value of delta is the logTable output (indexed by 5 bits from
130    * the updated logDelta) shifted by a value involving the logDelta minimum
131    * and the updated logDelta itself. */
132   iqdata_pt->delta = iqdata_pt->iquantTableLogPtr[(acc >> 3) & 0x1f] >>
133                      (22 - 25 - iqdata_pt->minLogDelta - (acc >> 8));
134 
135   iqdata_pt->invQ = invQ;
136 }
137 
invertQuantisationHL(const int32_t qCode,const int32_t ditherVal,IQuantiser_data * iqdata_pt)138 XBT_INLINE_ void invertQuantisationHL(const int32_t qCode,
139                                       const int32_t ditherVal,
140                                       IQuantiser_data* iqdata_pt) {
141   int32_t invQ;
142   int32_t index;
143   int32_t acc;
144   reg64_t tmp_r64;
145   int64_t tmp_acc;
146   int32_t tmp_accL;
147   int32_t tmp_accH;
148   uint32_t tmp_round0;
149   uint32_t tmp_round1;
150   unsigned u16t;
151   /* log delta leak value (Q23) */
152   const uint32_t logDeltaLeakVal = 0x7F6CL;
153 
154   /* Turn the quantised code back into an index into the threshold table. This
155    * involves bitwise inversion of the code (if -ve) and adding 1 (phantom
156    * element at table base). Then set invQ to be +/- the threshold value,
157    * depending on the code sign. */
158   index = qCode;
159   if (qCode < 0) {
160     index = (~index);
161   }
162   index = index + 1;
163   invQ = iqdata_pt->thresholdTablePtr_sl1[index];
164   if (qCode < 0) {
165     invQ = -invQ;
166   }
167 
168   /* Load invQ into the accumulator. Add the product of the dither value times
169    * the indexed dither table value. Then get the result back from the
170    * accumulator as an updated invQ. */
171   tmp_r64.s64 = ((int64_t)ditherVal * iqdata_pt->ditherTablePtr_sf1[index]);
172   tmp_r64.s32.h += invQ >> 1;
173 
174   acc = tmp_r64.s32.h;
175 
176   tmp_round1 = tmp_r64.s32.h & 0x00000001L;
177   if (tmp_r64.u32.l >= 0x80000000) {
178     acc++;
179   }
180   if (tmp_round1 == 0 && tmp_r64.u32.l == 0x80000000L) {
181     acc--;
182   }
183   acc = ssat24(acc);
184 
185   invQ = acc;
186 
187   /* Scale invQ by the current delta value. Left-shift the result (in the
188    * accumulator) by 4 positions for the delta scaling. Get the updated invQ
189    * back from the accumulator. */
190   u16t = iqdata_pt->logDelta;
191   tmp_acc = ((int64_t)invQ * iqdata_pt->delta);
192   tmp_accL = u16t * logDeltaLeakVal;
193   tmp_accH = iqdata_pt->incrTablePtr[index];
194   acc = (int32_t)(tmp_acc >> (23 - deltaScale));
195   invQ = ssat24(acc);
196 
197   /* Now update the value of logDelta. Load the accumulator with the index
198    * value of the logDelta increment table. Add the product of the current
199    * logDelta scaled by a leaky coefficient (16310 in Q14). Get the value back
200    * from the accumulator. */
201   tmp_accH += tmp_accL >> (32 - 17);
202 
203   acc = tmp_accH;
204 
205   tmp_r64.u32.l = ((uint32_t)tmp_accL << 17);
206   tmp_r64.s32.h = tmp_accH;
207 
208   tmp_round0 = tmp_r64.u32.l;
209   tmp_round1 = (int32_t)(tmp_r64.u64 >> 1);
210   if (tmp_round0 >= 0x80000000L) {
211     acc++;
212   }
213   if (tmp_round1 == 0x40000000L) {
214     acc--;
215   }
216 
217   /* Limit the updated logDelta between 0 and its subband-specific maximum. */
218   if (acc < 0) {
219     acc = 0;
220   }
221   if (acc > iqdata_pt->maxLogDelta) {
222     acc = iqdata_pt->maxLogDelta;
223   }
224 
225   iqdata_pt->logDelta = (uint16_t)acc;
226 
227   /* The updated value of delta is the logTable output (indexed by 5 bits from
228    * the updated logDelta) shifted by a value involving the logDelta minimum
229    * and the updated logDelta itself. */
230   iqdata_pt->delta = iqdata_pt->iquantTableLogPtr[(acc >> 3) & 0x1f] >>
231                      (22 - 25 - iqdata_pt->minLogDelta - (acc >> 8));
232 
233   iqdata_pt->invQ = invQ;
234 }
235 
236 /* Function to carry out prediction ARMA filtering for the current subband
237  * performPredictionFiltering should only be used for HH and LH subband! */
performPredictionFiltering(const int32_t invQ,Subband_data * SubbandDataPt)238 XBT_INLINE_ void performPredictionFiltering(const int32_t invQ,
239                                             Subband_data* SubbandDataPt) {
240   int32_t poleVal;
241   int32_t acc;
242   int64_t accL;
243   uint32_t pointer;
244   int32_t poleDelayLine;
245   int32_t predVal;
246   int32_t* zeroCoeffPt = SubbandDataPt->m_ZeroCoeffData.m_zeroCoeff;
247   int32_t* poleCoeff = SubbandDataPt->m_PoleCoeffData.m_poleCoeff;
248   int32_t* cbuf_pt;
249   int32_t invQincr_pos;
250   int32_t invQincr_neg;
251   int32_t k;
252   int32_t oldZData;
253   /* Pole coefficient and data indices */
254   enum { a1 = 0, a2 = 1 };
255 
256   /* Write the newest pole input sample to the pole delay line.
257    * Ensure the sum of the current dequantised error and the previous
258    * predictor output is saturated if necessary. */
259   poleDelayLine = invQ + SubbandDataPt->m_predData.m_predVal;
260 
261   poleDelayLine = ssat24(poleDelayLine);
262 
263   /* Pole filter convolution. Shift convolution result 1 place to the left
264    * before retrieving it, since the pole coefficients are Q22 (data is Q23)
265    * and we want a Q23 result */
266   accL = ((int64_t)poleCoeff[a2] *
267           (int64_t)SubbandDataPt->m_predData.m_poleDelayLine[a2]);
268   /* Update the pole delay line for the next pass by writing the new input
269    * sample into the 2nd element */
270   SubbandDataPt->m_predData.m_poleDelayLine[a2] = poleDelayLine;
271   accL += ((int64_t)poleCoeff[a1] * (int64_t)poleDelayLine);
272   poleVal = (int32_t)(accL >> 22);
273   poleVal = ssat24(poleVal);
274 
275   /* Create (2^(-7)) * sgn(invQ) in Q22 format. */
276   if (invQ == 0) {
277     invQincr_pos = 0L;
278   } else {
279     invQincr_pos = 0x800000;
280   }
281   if (invQ < 0L) {
282     invQincr_pos = -invQincr_pos;
283   }
284 
285   invQincr_neg = 0x0080 - invQincr_pos;
286   invQincr_pos += 0x0080;
287 
288   pointer = (SubbandDataPt->m_predData.m_zeroDelayLine.pointer++) + 12;
289   cbuf_pt = &SubbandDataPt->m_predData.m_zeroDelayLine.buffer[pointer];
290   /* partial manual unrolling to improve performance */
291   if (SubbandDataPt->m_predData.m_zeroDelayLine.pointer >= 12) {
292     SubbandDataPt->m_predData.m_zeroDelayLine.pointer = 0;
293   }
294 
295   SubbandDataPt->m_predData.m_zeroDelayLine.modulo = invQ;
296 
297   /* Iterate over the number of coefficients for this subband */
298   oldZData = invQ;
299   accL = 0;
300   for (k = 0; k < 12; k++) {
301     uint32_t tmp_round0;
302     int32_t coeffValue;
303     int32_t zData0;
304 
305     /* ------------------------------------------------------------------*/
306     zData0 = (*(cbuf_pt--));
307     coeffValue = *(zeroCoeffPt + k);
308     if (zData0 < 0L) {
309       acc = invQincr_neg - coeffValue;
310     } else {
311       acc = invQincr_pos - coeffValue;
312     }
313     tmp_round0 = acc;
314     acc = (acc >> 8) + coeffValue;
315     if (((tmp_round0 << 23) ^ 0x80000000) == 0) {
316       acc--;
317     }
318     accL += (int64_t)acc * (int64_t)(oldZData);
319     oldZData = zData0;
320     *(zeroCoeffPt + k) = acc;
321   }
322 
323   acc = (int32_t)(accL >> 22);
324   acc = ssat24(acc);
325 
326   /* Predictor output is the sum of the pole and zero filter outputs. Ensure
327    * this is saturated, if necessary. */
328   predVal = acc + poleVal;
329   predVal = ssat24(predVal);
330   SubbandDataPt->m_predData.m_zeroVal = acc;
331   SubbandDataPt->m_predData.m_predVal = predVal;
332 
333   /* Update the zero filter delay line by writing the new input sample to the
334    * circular buffer. */
335   SubbandDataPt->m_predData.m_zeroDelayLine
336       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer] =
337       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
338   SubbandDataPt->m_predData.m_zeroDelayLine
339       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer + 12] =
340       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
341 }
342 
performPredictionFilteringLL(const int32_t invQ,Subband_data * SubbandDataPt)343 XBT_INLINE_ void performPredictionFilteringLL(const int32_t invQ,
344                                               Subband_data* SubbandDataPt) {
345   int32_t poleVal;
346   int32_t acc;
347   int64_t accL;
348   uint32_t pointer;
349   int32_t poleDelayLine;
350   int32_t predVal;
351   int32_t* zeroCoeffPt = SubbandDataPt->m_ZeroCoeffData.m_zeroCoeff;
352   int32_t* poleCoeff = SubbandDataPt->m_PoleCoeffData.m_poleCoeff;
353   int32_t* cbuf_pt;
354   int32_t invQincr_pos;
355   int32_t invQincr_neg;
356   int32_t k;
357   int32_t oldZData;
358   /* Pole coefficient and data indices */
359   enum { a1 = 0, a2 = 1 };
360 
361   /* Write the newest pole input sample to the pole delay line.
362    * Ensure the sum of the current dequantised error and the previous
363    * predictor output is saturated if necessary. */
364   poleDelayLine = invQ + SubbandDataPt->m_predData.m_predVal;
365 
366   poleDelayLine = ssat24(poleDelayLine);
367 
368   /* Pole filter convolution. Shift convolution result 1 place to the left
369    * before retrieving it, since the pole coefficients are Q22 (data is Q23)
370    * and we want a Q23 result */
371   accL = ((int64_t)poleCoeff[a2] *
372           (int64_t)SubbandDataPt->m_predData.m_poleDelayLine[a2]);
373   /* Update the pole delay line for the next pass by writing the new input
374    * sample into the 2nd element */
375   SubbandDataPt->m_predData.m_poleDelayLine[a2] = poleDelayLine;
376   accL += ((int64_t)poleCoeff[a1] * (int64_t)poleDelayLine);
377   poleVal = (int32_t)(accL >> 22);
378   poleVal = ssat24(poleVal);
379   /* store poleVal to free one register. */
380   SubbandDataPt->m_predData.m_predVal = poleVal;
381 
382   /* Create (2^(-7)) * sgn(invQ) in Q22 format. */
383   if (invQ == 0) {
384     invQincr_pos = 0L;
385   } else {
386     invQincr_pos = 0x800000;
387   }
388   if (invQ < 0L) {
389     invQincr_pos = -invQincr_pos;
390   }
391 
392   invQincr_neg = 0x0080 - invQincr_pos;
393   invQincr_pos += 0x0080;
394 
395   pointer = (SubbandDataPt->m_predData.m_zeroDelayLine.pointer++) + 24;
396   cbuf_pt = &SubbandDataPt->m_predData.m_zeroDelayLine.buffer[pointer];
397   /* partial manual unrolling to improve performance */
398   if (SubbandDataPt->m_predData.m_zeroDelayLine.pointer >= 24) {
399     SubbandDataPt->m_predData.m_zeroDelayLine.pointer = 0;
400   }
401 
402   SubbandDataPt->m_predData.m_zeroDelayLine.modulo = invQ;
403 
404   /* Iterate over the number of coefficients for this subband */
405   oldZData = invQ;
406   accL = 0;
407   for (k = 0; k < 24; k++) {
408     int32_t zData0;
409     int32_t coeffValue;
410 
411     zData0 = (*(cbuf_pt--));
412     coeffValue = *(zeroCoeffPt + k);
413     if (zData0 < 0L) {
414       acc = invQincr_neg - coeffValue;
415     } else {
416       acc = invQincr_pos - coeffValue;
417     }
418     if (((acc << 23) ^ 0x80000000) == 0) {
419       coeffValue--;
420     }
421     acc = (acc >> 8) + coeffValue;
422     accL += (int64_t)acc * (int64_t)(oldZData);
423     oldZData = zData0;
424     *(zeroCoeffPt + k) = acc;
425   }
426 
427   acc = (int32_t)(accL >> 22);
428   acc = ssat24(acc);
429 
430   /* Predictor output is the sum of the pole and zero filter outputs. Ensure
431    * this is saturated, if necessary.
432    * recover value of PoleVal stored at beginning of routine... */
433   predVal = acc + SubbandDataPt->m_predData.m_predVal;
434   predVal = ssat24(predVal);
435   SubbandDataPt->m_predData.m_zeroVal = acc;
436   SubbandDataPt->m_predData.m_predVal = predVal;
437 
438   /* Update the zero filter delay line by writing the new input sample to the
439    * circular buffer. */
440   SubbandDataPt->m_predData.m_zeroDelayLine
441       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer] =
442       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
443   SubbandDataPt->m_predData.m_zeroDelayLine
444       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer + 24] =
445       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
446 }
447 
performPredictionFilteringHL(const int32_t invQ,Subband_data * SubbandDataPt)448 XBT_INLINE_ void performPredictionFilteringHL(const int32_t invQ,
449                                               Subband_data* SubbandDataPt) {
450   int32_t poleVal;
451   int32_t acc;
452   int64_t accL;
453   uint32_t pointer;
454   int32_t poleDelayLine;
455   int32_t predVal;
456   int32_t* zeroCoeffPt = SubbandDataPt->m_ZeroCoeffData.m_zeroCoeff;
457   int32_t* poleCoeff = SubbandDataPt->m_PoleCoeffData.m_poleCoeff;
458   int32_t* cbuf_pt;
459   int32_t invQincr_pos;
460   int32_t invQincr_neg;
461   int32_t k;
462   int32_t oldZData;
463   const int32_t roundCte = 0x80000000;
464   /* Pole coefficient and data indices */
465   enum { a1 = 0, a2 = 1 };
466 
467   /* Write the newest pole input sample to the pole delay line.
468    * Ensure the sum of the current dequantised error and the previous
469    * predictor output is saturated if necessary. */
470   poleDelayLine = invQ + SubbandDataPt->m_predData.m_predVal;
471 
472   poleDelayLine = ssat24(poleDelayLine);
473 
474   /* Pole filter convolution. Shift convolution result 1 place to the left
475    * before retrieving it, since the pole coefficients are Q22 (data is Q23)
476    * and we want a Q23 result */
477   accL = ((int64_t)poleCoeff[a2] *
478           (int64_t)SubbandDataPt->m_predData.m_poleDelayLine[a2]);
479   /* Update the pole delay line for the next pass by writing the new input
480    * sample into the 2nd element */
481   SubbandDataPt->m_predData.m_poleDelayLine[a2] = poleDelayLine;
482   accL += ((int64_t)poleCoeff[a1] * (int64_t)poleDelayLine);
483   poleVal = (int32_t)(accL >> 22);
484   poleVal = ssat24(poleVal);
485 
486   /* Create (2^(-7)) * sgn(invQ) in Q22 format. */
487   invQincr_pos = 0L;
488   if (invQ != 0) {
489     invQincr_pos = 0x800000;
490   }
491   if (invQ < 0L) {
492     invQincr_pos = -invQincr_pos;
493   }
494 
495   invQincr_neg = 0x0080 - invQincr_pos;
496   invQincr_pos += 0x0080;
497 
498   pointer = (SubbandDataPt->m_predData.m_zeroDelayLine.pointer++) + 6;
499   cbuf_pt = &SubbandDataPt->m_predData.m_zeroDelayLine.buffer[pointer];
500   /* partial manual unrolling to improve performance */
501   if (SubbandDataPt->m_predData.m_zeroDelayLine.pointer >= 6) {
502     SubbandDataPt->m_predData.m_zeroDelayLine.pointer = 0;
503   }
504 
505   SubbandDataPt->m_predData.m_zeroDelayLine.modulo = invQ;
506 
507   /* Iterate over the number of coefficients for this subband */
508   oldZData = invQ;
509   accL = 0;
510 
511   for (k = 0; k < 6; k++) {
512     uint32_t tmp_round0;
513     int32_t coeffValue;
514     int32_t zData0;
515 
516     /* ------------------------------------------------------------------*/
517     zData0 = (*(cbuf_pt--));
518     coeffValue = *(zeroCoeffPt + k);
519     if (zData0 < 0L) {
520       acc = invQincr_neg - coeffValue;
521     } else {
522       acc = invQincr_pos - coeffValue;
523     }
524     tmp_round0 = acc;
525     acc = (acc >> 8) + coeffValue;
526     if (((tmp_round0 << 23) ^ roundCte) == 0) {
527       acc--;
528     }
529     accL += (int64_t)acc * (int64_t)(oldZData);
530     oldZData = zData0;
531     *(zeroCoeffPt + k) = acc;
532   }
533 
534   acc = (int32_t)(accL >> 22);
535   acc = ssat24(acc);
536 
537   /* Predictor output is the sum of the pole and zero filter outputs. Ensure
538    * this is saturated, if necessary. */
539   predVal = acc + poleVal;
540   predVal = ssat24(predVal);
541   SubbandDataPt->m_predData.m_zeroVal = acc;
542   SubbandDataPt->m_predData.m_predVal = predVal;
543 
544   /* Update the zero filter delay line by writing the new input sample to the
545    * circular buffer. */
546   SubbandDataPt->m_predData.m_zeroDelayLine
547       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer] =
548       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
549   SubbandDataPt->m_predData.m_zeroDelayLine
550       .buffer[SubbandDataPt->m_predData.m_zeroDelayLine.pointer + 6] =
551       SubbandDataPt->m_predData.m_zeroDelayLine.modulo;
552 }
553 
554 #endif  // SUBBANDFUNCTIONSCOMMON_H
555