1 /**
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "AptxParameters.h"
17 #include "AptxTables.h"
18 #include "Quantiser.h"
19 
BsearchLL(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)20 XBT_INLINE_ int32_t BsearchLL(const int32_t absDiffSignalShifted,
21                               const int32_t delta,
22                               const int32_t* dqbitTablePrt) {
23   int32_t qCode;
24   reg64_t tmp_acc;
25   int32_t tmp;
26   int32_t lc_delta = delta << 8;
27 
28   qCode = 0;
29 
30   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[32];
31   tmp_acc.s32.h -= absDiffSignalShifted;
32   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
33   if (tmp <= 0) {
34     qCode = 32;
35   }
36 
37   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 16];
38   tmp_acc.s32.h -= absDiffSignalShifted;
39   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
40   if (tmp <= 0) {
41     qCode += 16;
42   }
43   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 8];
44   tmp_acc.s32.h -= absDiffSignalShifted;
45   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
46   if (tmp <= 0) {
47     qCode += 8;
48   }
49 
50   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 4];
51   tmp_acc.s32.h -= absDiffSignalShifted;
52   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
53   if (tmp <= 0) {
54     qCode += 4;
55   }
56 
57   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
58   tmp_acc.s32.h -= absDiffSignalShifted;
59   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
60   if (tmp <= 0) {
61     qCode += 2;
62   }
63 
64   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
65   tmp_acc.s32.h -= absDiffSignalShifted;
66   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
67   if (tmp <= 0) {
68     qCode++;
69   }
70 
71   return (qCode);
72 }
73 
BsearchHL(const int32_t absDiffSignalShifted,const int32_t delta)74 XBT_INLINE_ int32_t BsearchHL(const int32_t absDiffSignalShifted,
75                               const int32_t delta) {
76   reg64_t tmp_acc;
77   int32_t lc_delta = delta << 8;
78 
79   /* first iteration */
80   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)(97040 << 1);
81   tmp_acc.s32.h -= absDiffSignalShifted;
82   return (tmp_acc.s64 <= 0);
83 }
84 
BsearchHH(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)85 XBT_INLINE_ int32_t BsearchHH(const int32_t absDiffSignalShifted,
86                               const int32_t delta,
87                               const int32_t* dqbitTablePrt) {
88   int32_t qCode;
89   reg64_t tmp_acc;
90   int32_t tmp;
91   int32_t lc_delta = delta << 8;
92   qCode = 0;
93 
94   /* first iteration */
95   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[2];
96   tmp_acc.s32.h -= absDiffSignalShifted;
97   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
98   if (tmp <= 0) {
99     qCode += 2;
100   }
101   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
102   tmp_acc.s32.h -= absDiffSignalShifted;
103   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
104   if (tmp <= 0) {
105     qCode++;
106   }
107 
108   return (qCode);
109 }
110 
BsearchLH(const int32_t absDiffSignalShifted,const int32_t delta,const int32_t * dqbitTablePrt)111 XBT_INLINE_ int32_t BsearchLH(const int32_t absDiffSignalShifted,
112                               const int32_t delta,
113                               const int32_t* dqbitTablePrt) {
114   int32_t qCode;
115   reg64_t tmp_acc;
116   int32_t tmp;
117   int32_t lc_delta = delta << 8;
118 
119   /* first iteration */
120   qCode = 0;
121 
122   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[4];
123   tmp_acc.s32.h -= absDiffSignalShifted;
124   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
125   if (tmp <= 0) {
126     qCode = 4;
127   }
128 
129   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 2];
130   tmp_acc.s32.h -= absDiffSignalShifted;
131   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
132   if (tmp <= 0) {
133     qCode += 2;
134   }
135   tmp_acc.s64 = (int64_t)lc_delta * (int64_t)dqbitTablePrt[qCode + 1];
136   tmp_acc.s32.h -= absDiffSignalShifted;
137   tmp = tmp_acc.s32.h | (tmp_acc.u32.l >> 1);
138   if (tmp <= 0) {
139     qCode++;
140   }
141 
142   return (qCode);
143 }
144 
quantiseDifferenceHL(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)145 void quantiseDifferenceHL(const int32_t diffSignal, const int32_t ditherVal,
146                           const int32_t delta, Quantiser_data* qdata_pt) {
147   int32_t absDiffSignal;
148   int32_t absDiffSignalShifted;
149   int32_t index;
150   int32_t dithSquared;
151   int32_t minusLambdaD;
152   int32_t acc;
153   int32_t threshDiff;
154   reg64_t tmp_acc;
155   reg64_t tmp_reg64;
156   int32_t tmp_accL;
157   int32_t tmp_qCode;
158   int32_t tmp_altQcode;
159   uint32_t tmp_round0;
160   int32_t _delta;
161 
162   /* Form the absolute value of the difference signal and maintain a version
163    * that is right-shifted 4 places for delta scaling. */
164   absDiffSignal = -diffSignal;
165   if (diffSignal >= 0) {
166     absDiffSignal = diffSignal;
167   }
168   absDiffSignal = ssat24(absDiffSignal);
169   absDiffSignalShifted = absDiffSignal >> deltaScale;
170   absDiffSignalShifted = ssat24(absDiffSignalShifted);
171 
172   /* Binary search for the quantised code. This search terminates with the
173    * table index of the LARGEST threshold table value for which
174    * absDiffSignalShifted >= (delta * threshold)
175    */
176   index = BsearchHL(absDiffSignalShifted, delta);
177 
178   /* We actually wanted the SMALLEST magnitude quantised code for which
179    * absDiffSignalShifted < (delta * threshold)
180    * i.e. the code with the next highest magnitude than the one we actually
181    * found. We could add +1 to the code magnitude to do this, but we need to
182    * subtract 1 from the code magnitude to compensate for the "phantom
183    * element" at the base of the quantisation table. These two effects cancel
184    * out, so we leave the value of code alone. However, we need to form code+1
185    * to get the proper index into the both the threshold and dither tables,
186    * since we must skip over the phantom element at the base. */
187   qdata_pt->qCode = index;
188 
189   /* Square the dither and get the value back from the ALU
190    * (saturated/rounded). */
191   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
192 
193   acc = tmp_acc.s32.h;
194 
195   tmp_round0 = (uint32_t)acc << 8;
196 
197   acc = (acc >> 6) + 1;
198   acc >>= 1;
199   if (tmp_round0 == 0x40000000L) {
200     acc--;
201   }
202 
203   acc = ssat24(acc);
204 
205   dithSquared = acc;
206 
207   /* Form the negative difference of the dither values at index and index-1.
208    * Load the accumulator with this value divided by 2. Ensure saturation is
209    * applied to the difference calculation. */
210   minusLambdaD = qdata_pt->minusLambdaDTable[index];
211 
212   tmp_accL = (1 << 23) - dithSquared;
213   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
214 
215   tmp_round0 = tmp_acc.s32.l << 8;
216 
217   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
218   acc++;
219   acc >>= 1;
220   if (tmp_round0 == 0x40000000L) {
221     acc--;
222   }
223 
224   // worst case value for acc = 0x000d3e08
225   // no saturation required
226 
227   /* Add the threshold table values at index and index-1 to the accumulated
228    * value. */
229   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
230   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
231   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
232   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
233 
234   /* Form the threshold table difference at index and index-1. Ensure
235    * saturation is applied to the difference calculation. */
236   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] -
237                qdata_pt->thresholdTablePtr_sl1[index];
238 
239   /* Based on the sign of the difference signal, either add or subtract the
240    * threshold table difference from the accumulated value. Recover the final
241    * accumulated value (saturated/rounded) */
242   if (diffSignal < 0) {
243     threshDiff = -threshDiff;
244   }
245   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
246 
247   tmp_reg64.s32.h += acc;
248   acc = tmp_reg64.s32.h;
249 
250   if (tmp_reg64.u32.l >= 0x80000000) {
251     acc++;
252   }
253   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
254 
255   acc = ssat24(acc);
256 
257   if (tmp_round0 == 0x40000000L) {
258     acc--;
259   }
260   _delta = -delta << 8;
261 
262   acc = (int32_t)((uint32_t)acc << 4);
263 
264   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
265    * signal used to determine if dithering alters the quantised code value or
266    * not. */
267   // worst case value for delta is 0x7d400
268   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
269   tmp_reg64.s32.h += absDiffSignal;
270   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
271   acc = tmp_reg64.s32.h + (1 << 2);
272   acc >>= 3;
273   if (tmp_round0 == 0x40000000L) {
274     acc--;
275   }
276 
277   tmp_qCode = qdata_pt->qCode;
278   tmp_altQcode = tmp_qCode - 1;
279   /* Check the sign of the distance penalty. Get the sign from the
280    * full-precision accumulator, as done in the Kalimba code. */
281   if (tmp_reg64.s32.h < 0) {
282     /* The distance is -ve. The optimum code needs decremented by 1 and the
283      * alternative code is 1 greater than this. Get the rounded version of the
284      * -ve distance penalty and negate this (form distance magnitude) before
285      *  writing the value out */
286     tmp_qCode = tmp_altQcode;
287     tmp_altQcode++;
288     acc = -acc;
289   }
290 
291   qdata_pt->distPenalty = acc;
292   /* If the difference signal is negative, bitwise invert the code (restores
293    * sign to the magnitude). */
294   if (diffSignal < 0) {
295     tmp_qCode = ~tmp_qCode;
296     tmp_altQcode = ~tmp_altQcode;
297   }
298   qdata_pt->altQcode = tmp_altQcode;
299   qdata_pt->qCode = tmp_qCode;
300 }
301 
quantiseDifferenceHH(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)302 void quantiseDifferenceHH(const int32_t diffSignal, const int32_t ditherVal,
303                           const int32_t delta, Quantiser_data* qdata_pt) {
304   int32_t absDiffSignal;
305   int32_t absDiffSignalShifted;
306   int32_t index;
307   int32_t dithSquared;
308   int32_t minusLambdaD;
309   int32_t acc;
310   int32_t threshDiff;
311   reg64_t tmp_acc;
312   reg64_t tmp_reg64;
313   int32_t tmp_accL;
314   int32_t tmp_qCode;
315   int32_t tmp_altQcode;
316   uint32_t tmp_round0;
317   int32_t _delta;
318 
319   /* Form the absolute value of the difference signal and maintain a version
320    * that is right-shifted 4 places for delta scaling. */
321   absDiffSignal = -diffSignal;
322   if (diffSignal >= 0) {
323     absDiffSignal = diffSignal;
324   }
325   absDiffSignal = ssat24(absDiffSignal);
326   absDiffSignalShifted = absDiffSignal >> deltaScale;
327   absDiffSignalShifted = ssat24(absDiffSignalShifted);
328 
329   /* Binary search for the quantised code. This search terminates with the
330    * table index of the LARGEST threshold table value for which
331    * absDiffSignalShifted >= (delta * threshold)
332    */
333   index =
334       BsearchHH(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
335 
336   /* We actually wanted the SMALLEST magnitude quantised code for which
337    * absDiffSignalShifted < (delta * threshold)
338    * i.e. the code with the next highest magnitude than the one we actually
339    * found. We could add +1 to the code magnitude to do this, but we need to
340    * subtract 1 from the code magnitude to compensate for the "phantom
341    * element" at the base of the quantisation table. These two effects cancel
342    * out, so we leave the value of code alone. However, we need to form code+1
343    * to get the proper index into the both the threshold and dither tables,
344    * since we must skip over the phantom element at the base. */
345   qdata_pt->qCode = index;
346 
347   /* Square the dither and get the value back from the ALU
348    * (saturated/rounded). */
349   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
350 
351   acc = tmp_acc.s32.h;
352 
353   tmp_round0 = (uint32_t)acc << 8;
354 
355   acc = (acc >> 6) + 1;
356   acc >>= 1;
357   if (tmp_round0 == 0x40000000L) {
358     acc--;
359   }
360 
361   acc = ssat24(acc);
362 
363   dithSquared = acc;
364 
365   /* Form the negative difference of the dither values at index and index-1.
366    * Load the accumulator with this value divided by 2. Ensure saturation is
367    * applied to the difference calculation. */
368   minusLambdaD = qdata_pt->minusLambdaDTable[index];
369 
370   tmp_accL = (1 << 23) - dithSquared;
371   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
372 
373   tmp_round0 = tmp_acc.s32.l << 8;
374 
375   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
376   acc++;
377   acc >>= 1;
378   if (tmp_round0 == 0x40000000L) {
379     acc--;
380   }
381 
382   // worst case value for acc = 0x000d3e08
383   // no saturation required
384 
385   /* Add the threshold table values at index and index-1 to the accumulated
386    * value. */
387   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
388   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
389   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
390   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
391 
392   /* Form the threshold table difference at index and index-1. Ensure
393    * saturation is applied to the difference calculation. */
394   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] -
395                qdata_pt->thresholdTablePtr_sl1[index];
396 
397   /* Based on the sign of the difference signal, either add or subtract the
398    * threshold table difference from the accumulated value. Recover the final
399    * accumulated value (saturated/rounded) */
400   if (diffSignal < 0) {
401     threshDiff = -threshDiff;
402   }
403   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
404   tmp_reg64.s32.h += acc;
405   acc = tmp_reg64.s32.h;
406 
407   if (tmp_reg64.u32.l >= 0x80000000) {
408     acc++;
409   }
410   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
411 
412   acc = ssat24(acc);
413 
414   if (tmp_round0 == 0x40000000L) {
415     acc--;
416   }
417   _delta = -delta << 8;
418 
419   acc = (int32_t)((uint32_t)acc << 4);
420 
421   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
422    * signal used to determine if dithering alters the quantised code value or
423    * not. */
424   // worst case value for delta is 0x7d400
425   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
426   tmp_reg64.s32.h += absDiffSignal;
427   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
428   acc = tmp_reg64.s32.h + (1 << 2);
429   acc >>= 3;
430   if (tmp_round0 == 0x40000000L) {
431     acc--;
432   }
433 
434   tmp_qCode = qdata_pt->qCode;
435   tmp_altQcode = tmp_qCode - 1;
436   /* Check the sign of the distance penalty. Get the sign from the
437    * full-precision accumulator, as done in the Kalimba code. */
438   if (tmp_reg64.s32.h < 0) {
439     /* The distance is -ve. The optimum code needs decremented by 1 and the
440      * alternative code is 1 greater than this. Get the rounded version of the
441      * -ve distance penalty and negate this (form distance magnitude) before
442      *  writing the value out */
443     tmp_qCode = tmp_altQcode;
444     tmp_altQcode++;
445     acc = -acc;
446   }
447 
448   qdata_pt->distPenalty = acc;
449   /* If the difference signal is negative, bitwise invert the code (restores
450    * sign to the magnitude). */
451   if (diffSignal < 0) {
452     tmp_qCode = ~tmp_qCode;
453     tmp_altQcode = ~tmp_altQcode;
454   }
455   qdata_pt->altQcode = tmp_altQcode;
456   qdata_pt->qCode = tmp_qCode;
457 }
458 
quantiseDifferenceLL(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)459 void quantiseDifferenceLL(const int32_t diffSignal, const int32_t ditherVal,
460                           const int32_t delta, Quantiser_data* qdata_pt) {
461   int32_t absDiffSignal;
462   int32_t absDiffSignalShifted;
463   int32_t index;
464   int32_t dithSquared;
465   int32_t minusLambdaD;
466   int32_t acc;
467   int32_t threshDiff;
468   reg64_t tmp_acc;
469   reg64_t tmp_reg64;
470   int32_t tmp_accL;
471   int32_t tmp_qCode;
472   int32_t tmp_altQcode;
473   uint32_t tmp_round0;
474   int32_t _delta;
475 
476   /* Form the absolute value of the difference signal and maintain a version
477    * that is right-shifted 4 places for delta scaling. */
478   absDiffSignal = -diffSignal;
479   if (diffSignal >= 0) {
480     absDiffSignal = diffSignal;
481   }
482   absDiffSignal = ssat24(absDiffSignal);
483   absDiffSignalShifted = absDiffSignal >> deltaScale;
484 
485   /* Binary search for the quantised code. This search terminates with the
486    * table index of the LARGEST threshold table value for which
487    * absDiffSignalShifted >= (delta * threshold)
488    */
489   index =
490       BsearchLL(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
491 
492   /* We actually wanted the SMALLEST magnitude quantised code for which
493    * absDiffSignalShifted < (delta * threshold)
494    * i.e. the code with the next highest magnitude than the one we actually
495    * found. We could add +1 to the code magnitude to do this, but we need to
496    * subtract 1 from the code magnitude to compensate for the "phantom
497    * element" at the base of the quantisation table. These two effects cancel
498    * out, so we leave the value of code alone. However, we need to form code+1
499    * to get the proper index into the both the threshold and dither tables,
500    * since we must skip over the phantom element at the base. */
501   qdata_pt->qCode = index;
502 
503   /* Square the dither and get the value back from the ALU
504    * (saturated/rounded). */
505   tmp_acc.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
506 
507   acc = tmp_acc.s32.h;
508 
509   tmp_round0 = (uint32_t)acc << 8;
510 
511   acc = (acc >> 6) + 1;
512   acc >>= 1;
513   if (tmp_round0 == 0x40000000L) {
514     acc--;
515   }
516 
517   acc = ssat24(acc);
518 
519   dithSquared = acc;
520 
521   /* Form the negative difference of the dither values at index and index-1.
522    * Load the accumulator with this value divided by 2. Ensure saturation is
523    * applied to the difference calculation. */
524   minusLambdaD = qdata_pt->minusLambdaDTable[index];
525 
526   tmp_accL = (1 << 23) - dithSquared;
527   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
528 
529   tmp_round0 = tmp_acc.s32.l << 8;
530 
531   tmp_acc.s64 >>= 22;
532   acc = tmp_acc.s32.l;
533   acc++;
534   acc >>= 1;
535   if (tmp_round0 == 0x40000000L) {
536     acc--;
537   }
538 
539   // worst case value for acc = 0x000d3e08
540   // no saturation required
541 
542   /* Add the threshold table values at index and index-1 to the accumulated
543    * value. */
544   acc += qdata_pt->thresholdTablePtr_sl1[index + 1] >> 1;
545   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
546   acc += qdata_pt->thresholdTablePtr_sl1[index] >> 1;
547   //// worst case value for acc = 0x511FE3 + 0x362FEC = 874FCF
548 
549   /* Form the threshold table difference at index and index-1. Ensure
550    * saturation is applied to the difference calculation. */
551   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] -
552                qdata_pt->thresholdTablePtr_sl1[index];
553 
554   /* Based on the sign of the difference signal, either add or subtract the
555    * threshold table difference from the accumulated value. Recover the final
556    * accumulated value (saturated/rounded) */
557   if (diffSignal < 0) {
558     threshDiff = -threshDiff;
559   }
560   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
561   tmp_reg64.s32.h += acc;
562   acc = tmp_reg64.s32.h;
563 
564   if (tmp_reg64.u32.l >= 0x80000000) {
565     acc++;
566   }
567   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
568 
569   acc = ssat24(acc);
570 
571   if (tmp_round0 == 0x40000000L) {
572     acc--;
573   }
574   _delta = -delta << 8;
575 
576   acc = (int32_t)((uint32_t)acc << 4);
577 
578   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
579    * signal used to determine if dithering alters the quantised code value or
580    * not. */
581   // worst case value for delta is 0x7d400
582 
583   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
584   tmp_reg64.s32.h += absDiffSignal;
585   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
586   acc = tmp_reg64.s32.h + (1 << 2);
587   acc >>= 3;
588   if (tmp_round0 == 0x40000000L) {
589     acc--;
590   }
591 
592   tmp_qCode = qdata_pt->qCode;
593   tmp_altQcode = tmp_qCode - 1;
594   /* Check the sign of the distance penalty. Get the sign from the
595    * full-precision accumulator, as done in the Kalimba code. */
596   if (tmp_reg64.s32.h < 0) {
597     /* The distance is -ve. The optimum code needs decremented by 1 and the
598      * alternative code is 1 greater than this. Get the rounded version of the
599      * -ve distance penalty and negate this (form distance magnitude) before
600      *  writing the value out */
601     tmp_qCode = tmp_altQcode;
602     tmp_altQcode++;
603     acc = -acc;
604   }
605 
606   qdata_pt->distPenalty = acc;
607   /* If the difference signal is negative, bitwise invert the code (restores
608    * sign to the magnitude). */
609   if (diffSignal < 0) {
610     tmp_qCode = ~tmp_qCode;
611     tmp_altQcode = ~tmp_altQcode;
612   }
613   qdata_pt->altQcode = tmp_altQcode;
614   qdata_pt->qCode = tmp_qCode;
615 }
616 
quantiseDifferenceLH(const int32_t diffSignal,const int32_t ditherVal,const int32_t delta,Quantiser_data * qdata_pt)617 void quantiseDifferenceLH(const int32_t diffSignal, const int32_t ditherVal,
618                           const int32_t delta, Quantiser_data* qdata_pt) {
619   int32_t absDiffSignal;
620   int32_t absDiffSignalShifted;
621   int32_t index;
622   int32_t dithSquared;
623   int32_t minusLambdaD;
624   int32_t acc;
625   int32_t threshDiff;
626   reg64_t tmp_acc;
627   reg64_t tmp_reg64;
628   int32_t tmp_accL;
629   int32_t tmp_qCode;
630   int32_t tmp_altQcode;
631   uint32_t tmp_round0;
632   int32_t _delta;
633 
634   /* Form the absolute value of the difference signal and maintain a version
635    * that is right-shifted 4 places for delta scaling. */
636   absDiffSignal = -diffSignal;
637   if (diffSignal >= 0) {
638     absDiffSignal = diffSignal;
639   }
640   absDiffSignal = ssat24(absDiffSignal);
641   absDiffSignalShifted = absDiffSignal >> deltaScale;
642 
643   /* Binary search for the quantised code. This search terminates with the
644    * table index of the LARGEST threshold table value for which
645    * absDiffSignalShifted >= (delta * threshold)
646    */
647   index =
648       BsearchLH(absDiffSignalShifted, delta, qdata_pt->thresholdTablePtr_sl1);
649 
650   /* We actually wanted the SMALLEST magnitude quantised code for which
651    * absDiffSignalShifted < (delta * threshold)
652    * i.e. the code with the next highest magnitude than the one we actually
653    * found. We could add +1 to the code magnitude to do this, but we need to
654    * subtract 1 from the code magnitude to compensate for the "phantom
655    * element" at the base of the quantisation table. These two effects cancel
656    * out, so we leave the value of code alone. However, we need to form code+1
657    * to get the proper index into the both the threshold and dither tables,
658    * since we must skip over the phantom element at the base. */
659   qdata_pt->qCode = index;
660 
661   /* Square the dither and get the value back from the ALU
662    * (saturated/rounded). */
663   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)ditherVal);
664 
665   acc = tmp_reg64.s32.h;
666 
667   tmp_round0 = (uint32_t)acc << 8;
668 
669   acc = (acc >> 6) + 1;
670   acc >>= 1;
671   if (tmp_round0 == 0x40000000L) {
672     acc--;
673   }
674 
675   acc = ssat24(acc);
676 
677   dithSquared = acc;
678 
679   /* Form the negative difference of the dither values at index and index-1.
680    * Load the accumulator with this value divided by 2. Ensure saturation is
681    * applied to the difference calculation. */
682   minusLambdaD = qdata_pt->minusLambdaDTable[index];
683 
684   tmp_accL = (1 << 23) - dithSquared;
685   tmp_acc.s64 = (int64_t)tmp_accL * minusLambdaD;
686 
687   tmp_round0 = tmp_acc.s32.l << 8;
688 
689   acc = (tmp_acc.u32.l >> 22) | (tmp_acc.s32.h << 10);
690   if (tmp_round0 == 0x40000000L) {
691     acc -= 2;
692   }
693   acc++;
694 
695   // worst case value for acc = 0x000d3e08
696   // no saturation required
697   /* Add the threshold table values at index and index-1 to the accumulated
698    * value. */
699   acc += qdata_pt->thresholdTablePtr_sl1[index + 1];
700   //// worst case value for acc = 0x000d3e08 + 0x43E1DB = 511FE3
701   acc += qdata_pt->thresholdTablePtr_sl1[index];
702   acc >>= 1;
703 
704   /* Form the threshold table difference at index and index-1. Ensure
705    * saturation is applied to the difference calculation. */
706   threshDiff = qdata_pt->thresholdTablePtr_sl1[index + 1] -
707                qdata_pt->thresholdTablePtr_sl1[index];
708 
709   /* Based on the sign of the difference signal, either add or subtract the
710    * threshold table difference from the accumulated value. Recover the final
711    * accumulated value (saturated/rounded) */
712   if (diffSignal < 0) {
713     threshDiff = -threshDiff;
714   }
715   tmp_reg64.s64 = ((int64_t)ditherVal * (int64_t)threshDiff);
716 
717   tmp_reg64.s32.h += acc;
718   acc = tmp_reg64.s32.h;
719 
720   if (tmp_reg64.u32.l >= 0x80000000) {
721     acc++;
722   }
723   tmp_round0 = (tmp_reg64.u32.l >> 1) | (tmp_reg64.s32.h << 31);
724 
725   acc = ssat24(acc);
726 
727   if (tmp_round0 == 0x40000000L) {
728     acc--;
729   }
730   _delta = -delta << 8;
731 
732   acc = (int32_t)((uint32_t)acc << 4);
733 
734   /* Form (absDiffSignal * 0.125) - (acc * delta), which is the final distance
735    * signal used to determine if dithering alters the quantised code value or
736    * not. */
737   // worst case value for delta is 0x7d400
738   tmp_reg64.s64 = ((int64_t)acc * (int64_t)_delta);
739   tmp_reg64.s32.h += absDiffSignal;
740   tmp_round0 = (tmp_reg64.u32.l >> 4) | (tmp_reg64.s32.h << 28);
741   acc = tmp_reg64.s32.h + (1 << 2);
742   acc >>= 3;
743   if (tmp_round0 == 0x40000000L) {
744     acc--;
745   }
746 
747   tmp_qCode = qdata_pt->qCode;
748   tmp_altQcode = tmp_qCode - 1;
749   /* Check the sign of the distance penalty. Get the sign from the
750    * full-precision accumulator, as done in the Kalimba code. */
751 
752   if (tmp_reg64.s32.h < 0) {
753     /* The distance is -ve. The optimum code needs decremented by 1 and the
754      * alternative code is 1 greater than this. Get the rounded version of the
755      * -ve distance penalty and negate this (form distance magnitude) before
756      *  writing the value out */
757     tmp_qCode = tmp_altQcode;
758     tmp_altQcode++;
759     acc = -acc;
760   }
761 
762   qdata_pt->distPenalty = acc;
763   /* If the difference signal is negative, bitwise invert the code (restores
764    * sign to the magnitude). */
765   if (diffSignal < 0) {
766     tmp_qCode = ~tmp_qCode;
767     tmp_altQcode = ~tmp_altQcode;
768   }
769   qdata_pt->altQcode = tmp_altQcode;
770   qdata_pt->qCode = tmp_qCode;
771 }
772