1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
18 #define ANDROID_AUDIO_PRIMITIVES_H
19 
20 #include <math.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <sys/cdefs.h>
24 
25 /** \cond */
26 __BEGIN_DECLS
27 /** \endcond */
28 
29 /**
30  * \file primitives.h
31  * The memcpy_* conversion routines are designed to work in-place on same dst as src
32  * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
33  * This allows the loops to go upwards for faster cache access (and may be more flexible
34  * for future optimization later).
35  */
36 
37 /**
38  * Deprecated. Use memcpy_to_i16_from_q4_27() instead (double the pairs for the count).
39  * Neither this function nor memcpy_to_i16_from_q4_27() actually dither.
40  *
41  * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
42  * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
43  * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
44  * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
45  * is dithered and the remaining fraction is converted to the output Q.15, with clamping
46  * on the 4 integer guard bits.
47  *
48  * For interleaved stereo, pairs is the number of sample pairs,
49  * and out is an array of interleaved pairs of 16-bit samples per channel.
50  * For mono, pairs is the number of samples / 2, and out is an array of 16-bit samples.
51  * The name "dither" is a misnomer; the current implementation does not actually dither
52  * but uses truncation.  This may change.
53  * The out and sums buffers must either be completely separate (non-overlapping), or
54  * they must both start at the same address.  Partially overlapping buffers are not supported.
55  */
56 void ditherAndClamp(int32_t *out, const int32_t *sums, size_t pairs);
57 
58 /**
59  * Copy samples from signed fixed-point 32-bit Q4.27 to 16-bit Q0.15
60  *
61  *  \param dst     Destination buffer
62  *  \param src     Source buffer
63  *  \param count   Number of samples to copy
64  *
65  * The destination and source buffers must either be completely separate (non-overlapping), or
66  * they must both start at the same address.  Partially overlapping buffers are not supported.
67  */
68 void memcpy_to_i16_from_q4_27(int16_t *dst, const int32_t *src, size_t count);
69 
70 /**
71  * Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
72  *
73  *  \param dst     Destination buffer
74  *  \param src     Source buffer
75  *  \param count   Number of samples to copy
76  *
77  * The destination and source buffers must either be completely separate (non-overlapping), or
78  * they must both start at the same address.  Partially overlapping buffers are not supported.
79  */
80 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
81 
82 /**
83  * Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
84  *
85  *  \param dst     Destination buffer
86  *  \param src     Source buffer
87  *  \param count   Number of samples to copy
88  *
89  * The destination and source buffers must either be completely separate (non-overlapping), or
90  * they must both start at the same address.  Partially overlapping buffers are not supported.
91  * The conversion is done by truncation, without dithering, so it loses resolution.
92  */
93 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
94 
95 /**
96  * Copy samples from float to unsigned 8-bit offset by 0x80.
97  *
98  *  \param dst     Destination buffer
99  *  \param src     Source buffer
100  *  \param count   Number of samples to copy
101  *
102  * The destination and source buffers must either be completely separate (non-overlapping), or
103  * they must both start at the same address.  Partially overlapping buffers are not supported.
104  * The conversion is done by truncation, without dithering, so it loses resolution.
105  */
106 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count);
107 
108 /**
109  * Copy samples from signed fixed-point packed 24 bit Q0.23 to unsigned 8-bit offset by 0x80.
110  *
111  *  \param dst     Destination buffer
112  *  \param src     Source buffer
113  *  \param count   Number of samples to copy
114  *
115  * The destination and source buffers must either be completely separate (non-overlapping), or
116  * they must both start at the same address.  Partially overlapping buffers are not supported.
117  * The conversion is done by truncation, without dithering, so it loses resolution.
118  */
119 void memcpy_to_u8_from_p24(uint8_t *dst, const uint8_t *src, size_t count);
120 
121 /**
122  * Copy samples from signed 32-bit fixed-point Q0.31 to unsigned 8-bit offset by 0x80.
123  *
124  *  \param dst     Destination buffer
125  *  \param src     Source buffer
126  *  \param count   Number of samples to copy
127  *
128  * The destination and source buffers must either be completely separate (non-overlapping), or
129  * they must both start at the same address.  Partially overlapping buffers are not supported.
130  * The conversion is done by truncation, without dithering, so it loses resolution.
131  */
132 void memcpy_to_u8_from_i32(uint8_t *dst, const int32_t *src, size_t count);
133 
134 /**
135  * Copy samples from signed fixed-point 32-bit Q8.23 to unsigned 8-bit offset by 0x80.
136  *
137  *  \param dst     Destination buffer
138  *  \param src     Source buffer
139  *  \param count   Number of samples to copy
140  *
141  * The destination and source buffers must either be completely separate (non-overlapping), or
142  * they must both start at the same address.  Partially overlapping buffers are not supported.
143  * The conversion is done by truncation, without dithering, so it loses resolution.
144  */
145 void memcpy_to_u8_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
146 
147 /**
148  * Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
149  *
150  *  \param dst     Destination buffer
151  *  \param src     Source buffer
152  *  \param count   Number of samples to copy
153  *
154  * The destination and source buffers must either be completely separate (non-overlapping), or
155  * they must both start at the same address.  Partially overlapping buffers are not supported.
156  * The conversion is done by truncation, without dithering, so it loses resolution.
157  */
158 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
159 
160 /**
161  * Shrink and copy samples from single-precision floating-point to signed 16-bit.
162  * Each float should be in the range -1.0 to 1.0.  Values outside that range are clamped,
163  * refer to clamp16_from_float().
164  *
165  *  \param dst     Destination buffer
166  *  \param src     Source buffer
167  *  \param count   Number of samples to copy
168  *
169  * The destination and source buffers must either be completely separate (non-overlapping), or
170  * they must both start at the same address.  Partially overlapping buffers are not supported.
171  * The conversion is done by truncation, without dithering, so it loses resolution.
172  */
173 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
174 
175 /**
176  * Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
177  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
178  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].  Note the closed range
179  * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
180  *
181  *  \param dst     Destination buffer
182  *  \param src     Source buffer
183  *  \param count   Number of samples to copy
184  *
185  * The destination and source buffers must either be completely separate (non-overlapping), or
186  * they must both start at the same address.  Partially overlapping buffers are not supported.
187  */
188 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
189 
190 /**
191  * Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
192  * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
193  * No rounding is needed as the representation is exact.
194  *
195  *  \param dst     Destination buffer
196  *  \param src     Source buffer
197  *  \param count   Number of samples to copy
198  *
199  * The destination and source buffers must either be completely separate (non-overlapping), or
200  * they must both start at the same address.  Partially overlapping buffers are not supported.
201  */
202 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
203 
204 /**
205  * Copy samples from unsigned fixed-point 8 bit to single-precision floating-point.
206  * The output float range is [-1.0, 1.0) for the fixed-point range [0x00, 0xFF].
207  * No rounding is needed as the representation is exact.
208  *
209  *  \param dst     Destination buffer
210  *  \param src     Source buffer
211  *  \param count   Number of samples to copy
212  *
213  * The destination and source buffers must either be completely separate (non-overlapping), or
214  * they must both start at the same address.  Partially overlapping buffers are not supported.
215  */
216 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count);
217 
218 /**
219  * Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
220  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
221  * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
222  * No rounding is needed as the representation is exact.
223  *
224  *  \param dst     Destination buffer
225  *  \param src     Source buffer
226  *  \param count   Number of samples to copy
227  *
228  * The destination and source buffers must either be completely separate (non-overlapping), or
229  * they must both start at the same address.  Partially overlapping buffers are not supported.
230  */
231 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
232 
233 /**
234  * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
235  * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
236  * The data is truncated without rounding.
237  *
238  *  \param dst     Destination buffer
239  *  \param src     Source buffer
240  *  \param count   Number of samples to copy
241  *
242  * The destination and source buffers must either be completely separate (non-overlapping), or
243  * they must both start at the same address.  Partially overlapping buffers are not supported.
244  */
245 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
246 
247 /**
248  * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed-point 32-bit Q0.31.
249  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
250  * The output data range is [0x80000000, 0x7fffff00] at intervals of 0x100.
251  *
252  *  \param dst     Destination buffer
253  *  \param src     Source buffer
254  *  \param count   Number of samples to copy
255  *
256  * The destination and source buffers must either be completely separate (non-overlapping), or
257  * they must both start at the same address.  Partially overlapping buffers are not supported.
258  */
259 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count);
260 
261 /**
262  * Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
263  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
264  * The output data range is [0x800000, 0x7fff00] (not full).
265  * Nevertheless there is no DC offset on the output, if the input has no DC offset.
266  *
267  *  \param dst     Destination buffer
268  *  \param src     Source buffer
269  *  \param count   Number of samples to copy
270  *
271  * The destination and source buffers must either be completely separate (non-overlapping), or
272  * they must both start at the same address.  Partially overlapping buffers are not supported.
273  */
274 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
275 
276 /**
277  * Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
278  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
279  * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
280  * for details.
281  *
282  *  \param dst     Destination buffer
283  *  \param src     Source buffer
284  *  \param count   Number of samples to copy
285  *
286  * The destination and source buffers must either be completely separate (non-overlapping), or
287  * they must both start at the same address.  Partially overlapping buffers are not supported.
288  */
289 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
290 
291 /**
292  * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed-point packed 24 bit Q0.23.
293  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
294  * The data is clamped to the range is [0x800000, 0x7fffff].
295  *
296  *  \param dst     Destination buffer
297  *  \param src     Source buffer
298  *  \param count   Number of samples to copy
299  *
300  * The destination and source buffers must either be completely separate (non-overlapping), or
301  * they must both start at the same address.
302  */
303 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
304 
305 /**
306  * Shrink and copy samples from signed 32-bit fixed-point Q0.31
307  * to signed fixed-point packed 24 bit Q0.23.
308  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
309  *
310  *  \param dst     Destination buffer
311  *  \param src     Source buffer
312  *  \param count   Number of samples to copy
313  *
314  * The destination and source buffers must either be completely separate (non-overlapping), or
315  * they must both start at the same address.  Partially overlapping buffers are not supported.
316  * The conversion is done by truncation, without dithering, so it loses resolution.
317  */
318 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count);
319 
320 /**
321  * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
322  * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
323  *
324  *  \param dst     Destination buffer
325  *  \param src     Source buffer
326  *  \param count   Number of samples to copy
327  *
328  * The destination and source buffers must either be completely separate (non-overlapping), or
329  * they must both start at the same address.  Partially overlapping buffers are not supported.
330  */
331 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
332 
333 /**
334  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
335  * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
336  * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
337  * See clamp24_from_float() for details.
338  *
339  *  \param dst     Destination buffer
340  *  \param src     Source buffer
341  *  \param count   Number of samples to copy
342  *
343  * The destination and source buffers must either be completely separate (non-overlapping), or
344  * they must both start at the same address.  Partially overlapping buffers are not supported.
345  */
346 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
347 
348 /**
349  * Copy samples from signed fixed point packed 24-bit Q0.23 to signed fixed-point 32-bit Q8.23.
350  * The output data range is [0xff800000, 0x007fffff].
351  *
352  *  \param dst     Destination buffer
353  *  \param src     Source buffer
354  *  \param count   Number of samples to copy
355  *
356  * The destination and source buffers must either be completely separate (non-overlapping), or
357  * they must both start at the same address.  Partially overlapping buffers are not supported.
358  */
359 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count);
360 
361 /**
362  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
363  * The conversion will use the full available Q4.27 range, including guard bits.
364  * Fractional lsb is rounded to nearest, ties away from zero.
365  * See clampq4_27_from_float() for details.
366  *
367  *  \param dst     Destination buffer
368  *  \param src     Source buffer
369  *  \param count   Number of samples to copy
370  *
371  * The destination and source buffers must either be completely separate (non-overlapping), or
372  * they must both start at the same address.  Partially overlapping buffers are not supported.
373  */
374 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
375 
376 /**
377  * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
378  * The data is clamped, and truncated without rounding.
379  *
380  *  \param dst     Destination buffer
381  *  \param src     Source buffer
382  *  \param count   Number of samples to copy
383  *
384  * The destination and source buffers must either be completely separate (non-overlapping), or
385  * they must both start at the same address.  Partially overlapping buffers are not supported.
386  */
387 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
388 
389 /**
390  * Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
391  * The nominal output float range is [-1.0, 1.0) for the fixed-point
392  * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
393  * No rounding is needed as the representation is exact for nominal values.
394  * Rounding for overflow values is to nearest, ties to even.
395  *
396  *  \param dst     Destination buffer
397  *  \param src     Source buffer
398  *  \param count   Number of samples to copy
399  *
400  * The destination and source buffers must either be completely separate (non-overlapping), or
401  * they must both start at the same address.  Partially overlapping buffers are not supported.
402  */
403 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
404 
405 /**
406  * Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 32-bit.
407  *
408  *  \param dst     Destination buffer
409  *  \param src     Source buffer
410  *  \param count   Number of samples to copy
411  *
412  * The destination and source buffers must either be completely separate (non-overlapping), or
413  * they must both start at the same address.  Partially overlapping buffers are not supported.
414  */
415 void memcpy_to_i32_from_u8(int32_t *dst, const uint8_t *src, size_t count);
416 
417 /**
418  * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
419  * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
420  *
421  *  \param dst     Destination buffer
422  *  \param src     Source buffer
423  *  \param count   Number of samples to copy
424  *
425  * The destination and source buffers must either be completely separate (non-overlapping), or
426  * they must both start at the same address.  Partially overlapping buffers are not supported.
427  */
428 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
429 
430 /**
431  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
432  * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
433  * ties away from zero. See clamp32_from_float() for details.
434  *
435  *  \param dst     Destination buffer
436  *  \param src     Source buffer
437  *  \param count   Number of samples to copy
438  *
439  * The destination and source buffers must either be completely separate (non-overlapping), or
440  * they must both start at the same address.  Partially overlapping buffers are not supported.
441  */
442 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
443 
444 /**
445  * Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
446  * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
447  * Rounding is done according to float_from_i32().
448  *
449  *  \param dst     Destination buffer
450  *  \param src     Source buffer
451  *  \param count   Number of samples to copy
452  *
453  * The destination and source buffers must either be completely separate (non-overlapping), or
454  * they must both start at the same address.  Partially overlapping buffers are not supported.
455  */
456 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
457 
458 /**
459  * Copy samples from unrestricted float to range restricted float [-absMax, absMax].
460  * Any float sample not in the range [-absMax, absMax] will be clamped in this range.
461  *
462  *  \param dst     Destination buffer
463  *  \param src     Source buffer
464  *  \param count   Number of samples to copy
465  *  \param absMax  Maximum of the absolute value of the copied samples.
466  *
467  * The destination and source buffers must either be completely separate (non-overlapping), or
468  * they must both start at the same address.  Partially overlapping buffers are not supported.
469  * Note: NAN is clamped to absMax and not 0 for performance reason (~2xfaster).
470  */
471 void memcpy_to_float_from_float_with_clamping(float *dst, const float *src, size_t count,
472                                               float absMax);
473 
474 /**
475  * Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
476  *
477  *  \param dst     Destination buffer
478  *  \param src     Source buffer
479  *  \param count   Number of stereo frames to downmix
480  *
481  * The destination and source buffers must be completely separate (non-overlapping).
482  * The current implementation truncates the mean rather than dither, but this may change.
483  */
484 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
485 
486 /**
487  * Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
488  * duplicating.
489  *
490  *  \param dst     Destination buffer
491  *  \param src     Source buffer
492  *  \param count   Number of mono samples to upmix
493  *
494  * The destination and source buffers must either be completely separate (non-overlapping), or
495  * they must both start at the same address.  Partially overlapping buffers are not supported.
496  */
497 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
498 
499 /**
500  * Downmix pairs of interleaved stereo input float samples to mono output float samples
501  * by averaging the stereo pair together.
502  *
503  *  \param dst     Destination buffer
504  *  \param src     Source buffer
505  *  \param count   Number of stereo frames to downmix
506  *
507  * The destination and source buffers must be completely separate (non-overlapping),
508  * or they must both start at the same address.
509  */
510 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t count);
511 
512 /**
513  * Upmix mono input float samples to pairs of interleaved stereo output float samples by
514  * duplicating.
515  *
516  *  \param dst     Destination buffer
517  *  \param src     Source buffer
518  *  \param count   Number of mono samples to upmix
519  *
520  * The destination and source buffers must either be completely separate (non-overlapping), or
521  * they must both start at the same address.  Partially overlapping buffers are not supported.
522  */
523 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t count);
524 
525 /**
526  * \return the total number of non-zero 32-bit samples.
527  */
528 size_t nonZeroMono32(const int32_t *samples, size_t count);
529 
530 /**
531  * \return the total number of non-zero 16-bit samples.
532  */
533 size_t nonZeroMono16(const int16_t *samples, size_t count);
534 
535 /**
536  * \return the total number of non-zero stereo frames, where a frame is considered non-zero
537  * if either of its constituent 32-bit samples is non-zero.
538  */
539 size_t nonZeroStereo32(const int32_t *frames, size_t count);
540 
541 /**
542  * \return the total number of non-zero stereo frames, where a frame is considered non-zero
543  * if either of its constituent 16-bit samples is non-zero.
544  */
545 size_t nonZeroStereo16(const int16_t *frames, size_t count);
546 
547 /**
548  * Copy frames, selecting source samples based on a source channel mask to fit
549  * the destination channel mask. Unmatched channels in the destination channel mask
550  * are zero filled. Unmatched channels in the source channel mask are dropped.
551  * Channels present in the channel mask are represented by set bits in the
552  * uint32_t value and are matched without further interpretation.
553  *
554  *  \param dst         Destination buffer
555  *  \param dst_mask    Bit mask corresponding to destination channels present
556  *  \param src         Source buffer
557  *  \param src_mask    Bit mask corresponding to source channels present
558  *  \param sample_size Size of each sample in bytes.  Must be 1, 2, 3, or 4.
559  *  \param count       Number of frames to copy
560  *
561  * The destination and source buffers must be completely separate (non-overlapping).
562  * If the sample size is not in range, the function will abort.
563  */
564 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
565         const void *src, uint32_t src_mask, size_t sample_size, size_t count);
566 
567 /**
568  * Copy frames, selecting source samples based on an index array (idxary).
569  * The idxary[] consists of dst_channels number of elements.
570  * The ith element if idxary[] corresponds the ith destination channel.
571  * A non-negative value is the channel index in the source frame.
572  * A negative index (-1) represents filling with 0.
573  *
574  * Example: Swapping L and R channels for stereo streams
575  * <PRE>
576  * idxary[0] = 1;
577  * idxary[1] = 0;
578  * </PRE>
579  *
580  * Example: Copying a mono source to the front center 5.1 channel
581  * <PRE>
582  * idxary[0] = -1;
583  * idxary[1] = -1;
584  * idxary[2] = 0;
585  * idxary[3] = -1;
586  * idxary[4] = -1;
587  * idxary[5] = -1;
588  * </PRE>
589  *
590  * This copy allows swizzling of channels or replication of channels.
591  *
592  *  \param dst           Destination buffer
593  *  \param dst_channels  Number of destination channels per frame
594  *  \param src           Source buffer
595  *  \param src_channels  Number of source channels per frame
596  *  \param idxary        Array of indices representing channels in the source frame
597  *  \param sample_size   Size of each sample in bytes.  Must be 1, 2, 3, or 4.
598  *  \param count         Number of frames to copy
599  *
600  * The destination and source buffers must be completely separate (non-overlapping).
601  * If the sample size is not in range, the function will abort.
602  */
603 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
604         const void *src, uint32_t src_channels,
605         const int8_t *idxary, size_t sample_size, size_t count);
606 
607 /**
608  * Prepares an index array (idxary) from channel masks, which can be later
609  * used by memcpy_by_index_array().
610  *
611  * \return the number of array elements required.
612  * This may be greater than idxcount, so the return value should be checked
613  * if idxary size is less than 32.
614  *
615  * Note that idxary is a caller allocated array
616  * of at least as many channels as present in the dst_mask.
617  * Channels present in the channel mask are represented by set bits in the
618  * uint32_t value and are matched without further interpretation.
619  *
620  * This function is typically used for converting audio data with different
621  * channel position masks.
622  *
623  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
624  *  \param idxcount    Number of caller allocated elements in idxary
625  *  \param dst_mask    Bit mask corresponding to destination channels present
626  *  \param src_mask    Bit mask corresponding to source channels present
627  */
628 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
629         uint32_t dst_mask, uint32_t src_mask);
630 
631 /**
632  * Prepares an index array (idxary) from channel masks, which can be later
633  * used by memcpy_by_index_array().
634  *
635  * \return the number of array elements required.
636  *
637  * For a source channel index mask, the source channels will map to the destination
638  * channels as if counting the set bits in dst_mask in order from lsb to msb
639  * (zero bits are ignored). The ith bit of the src_mask corresponds to the
640  * ith SET bit of dst_mask and the ith destination channel.  Hence, a zero ith
641  * bit of the src_mask indicates that the ith destination channel plays silence.
642  *
643  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
644  *  \param idxcount    Number of caller allocated elements in idxary
645  *  \param dst_mask    Bit mask corresponding to destination channels present
646  *  \param src_mask    Bit mask corresponding to source channels present
647  */
648 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
649         uint32_t dst_mask, uint32_t src_mask);
650 
651 /**
652  * Prepares an index array (idxary) from channel mask bits, which can be later
653  * used by memcpy_by_index_array().
654  *
655  * \return the number of array elements required.
656  *
657  * This initialization is for a destination channel index mask from a positional
658  * source mask.
659  *
660  * For an destination channel index mask, the input channels will map
661  * to the destination channels, with the ith SET bit in the source bits corresponding
662  * to the ith bit in the destination bits. If there is a zero bit in the middle
663  * of set destination bits (unlikely), the corresponding source channel will
664  * be dropped.
665  *
666  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
667  *  \param idxcount    Number of caller allocated elements in idxary
668  *  \param dst_mask    Bit mask corresponding to destination channels present
669  *  \param src_mask    Bit mask corresponding to source channels present
670  */
671 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
672         uint32_t dst_mask, uint32_t src_mask);
673 
674 /**
675  * Add and clamp signed 16-bit samples.
676  *
677  *  \param dst     Destination buffer
678  *  \param src     Source buffer
679  *  \param count   Number of samples to add
680  *
681  * The destination and source buffers must either be completely separate (non-overlapping), or
682  * they must both start at the same address.  Partially overlapping buffers are not supported.
683  */
684 void accumulate_i16(int16_t *dst, const int16_t *src, size_t count);
685 
686 /**
687  * Add and clamp unsigned 8-bit samples.
688  *
689  *  \param dst     Destination buffer
690  *  \param src     Source buffer
691  *  \param count   Number of samples to add
692  *
693  * The destination and source buffers must either be completely separate (non-overlapping), or
694  * they must both start at the same address.  Partially overlapping buffers are not supported.
695  */
696 void accumulate_u8(uint8_t *dst, const uint8_t *src, size_t count);
697 
698 /**
699  * Add and clamp packed 24-bit Q0.23 samples.
700  *
701  *  \param dst     Destination buffer
702  *  \param src     Source buffer
703  *  \param count   Number of samples to add
704  *
705  * The destination and source buffers must either be completely separate (non-overlapping), or
706  * they must both start at the same address.  Partially overlapping buffers are not supported.
707  */
708 void accumulate_p24(uint8_t *dst, const uint8_t *src, size_t count);
709 
710 /**
711  * Add and clamp 32-bit Q8.23 samples.
712  *
713  *  \param dst     Destination buffer
714  *  \param src     Source buffer
715  *  \param count   Number of samples to add
716  *
717  * The destination and source buffers must either be completely separate (non-overlapping), or
718  * they must both start at the same address.  Partially overlapping buffers are not supported.
719  */
720 void accumulate_q8_23(int32_t *dst, const int32_t *src, size_t count);
721 
722 /**
723  * Add and clamp signed 32-bit Q0.31 samples.
724  *
725  *  \param dst     Destination buffer
726  *  \param src     Source buffer
727  *  \param count   Number of samples to add
728  *
729  * The destination and source buffers must either be completely separate (non-overlapping), or
730  * they must both start at the same address.  Partially overlapping buffers are not supported.
731  */
732 void accumulate_i32(int32_t *dst, const int32_t *src, size_t count);
733 
734 /**
735  * Add float samples. Result is not clamped.
736  *
737  *  \param dst     Destination buffer
738  *  \param src     Source buffer
739  *  \param count   Number of samples to add
740  *
741  * The destination and source buffers must either be completely separate (non-overlapping), or
742  * they must both start at the same address.  Partially overlapping buffers are not supported.
743  */
744 void accumulate_float(float *dst, const float *src, size_t count);
745 
746 /**
747  * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
748  */
clamp16(int32_t sample)749 static inline int16_t clamp16(int32_t sample)
750 {
751     if ((sample>>15) ^ (sample>>31))
752         sample = 0x7FFF ^ (sample>>31);
753     return sample;
754 }
755 
756 /**
757  * Clamp (aka hard limit or clip) a signed 64-bit sample to 32-bit range.
758  */
clamp32(int64_t sample)759 static inline int32_t clamp32(int64_t sample)
760 {
761     if ((sample>>31) ^ (sample>>63))
762         sample = 0x7fffffff ^ (sample>>63);
763     return sample;
764 }
765 
766 /**
767  * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
768  * with clamping.  Note the open bound at 1.0, values within 1/65536 of 1.0 map
769  * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
770  *
771  * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
772  * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
773  * depending on the sign bit inside NaN (whose representation is not unique).
774  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
775  *
776  * OLD code disabled: Rounding of 0.5 lsb is to even (default for IEEE 754).
777  * NEW code enabled: Rounding of 0.5 lsb is away from 0.
778  */
clamp16_from_float(float f)779 static inline int16_t clamp16_from_float(float f)
780 {
781 #if 0
782     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
783      * floating point significand. The normal shift is 3<<22, but the -15 offset
784      * is used to multiply by 32768.
785      */
786     static const float offset = (float)(3 << (22 - 15));
787     /* zero = (0x10f << 22) =  0x43c00000 (not directly used) */
788     static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
789     static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
790 
791     union {
792         float f;
793         int32_t i;
794     } u;
795 
796     u.f = f + offset; /* recenter valid range */
797     /* Now the valid range is represented as integers between [limneg, limpos].
798      * Clamp using the fact that float representation (as an integer) is an ordered set.
799      */
800     if (u.i < limneg)
801         u.i = -32768;
802     else if (u.i > limpos)
803         u.i = 32767;
804     return u.i; /* Return lower 16 bits, the part of interest in the significand. */
805 #else
806     static const float scale = 1 << 15;
807     return roundf(fmaxf(fminf(f * scale, scale - 1.f), -scale));
808 #endif
809 }
810 
811 /**
812  * Convert a IEEE 754 single precision float [-1.0, 1.0) to uint8_t [0, 0xff]
813  * with clamping.  Note the open bound at 1.0, values within 1/128 of 1.0 map
814  * to 255 instead of 256 (early clamping due to the smaller positive integer subrange).
815  *
816  * Values outside the range [-1.0, 1.0) are properly clamped to 0 and 255,
817  * including -Inf and +Inf. NaN will generally be treated either as 0 or 255,
818  * depending on the sign bit inside NaN (whose representation is not unique).
819  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
820  *
821  * OLD code disabled: Rounding of 0.5 lsb is to even (default for IEEE 754).
822  * NEW code enabled: Rounding of 0.5 lsb is away from 0.
823  */
clamp8_from_float(float f)824 static inline uint8_t clamp8_from_float(float f)
825 {
826 #if 0
827     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
828      * floating point significand. The normal shift is 3<<22, but the -7 offset
829      * is used to multiply by 128.
830      */
831     static const float offset = (float)((3 << (22 - 7)) + 1 /* to cancel -1.0 */);
832     /* zero = (0x11f << 22) =  0x47c00000 */
833     static const int32_t limneg = (0x11f << 22) /*zero*/;
834     static const int32_t limpos = (0x11f << 22) /*zero*/ + 255; /* 0x47c000ff */
835 
836     union {
837         float f;
838         int32_t i;
839     } u;
840 
841     u.f = f + offset; /* recenter valid range */
842     /* Now the valid range is represented as integers between [limneg, limpos].
843      * Clamp using the fact that float representation (as an integer) is an ordered set.
844      */
845     if (u.i < limneg)
846         return 0;
847     if (u.i > limpos)
848         return 255;
849     return u.i; /* Return lower 8 bits, the part of interest in the significand. */
850 #else
851     return roundf(fmaxf(fminf(f * 128.f + 128.f, 255.f), 0.f));
852 #endif
853 }
854 
855 /**
856  * Convert a signed fixed-point 32-bit Q8.23 value to uint8_t [0, 0xff]
857  * with clamping.
858  *
859  * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
860  */
clamp8_from_q8_23(int32_t ival)861 static inline uint8_t clamp8_from_q8_23(int32_t ival)
862 {
863     static const int32_t limpos = 0x7fffff;
864     static const int32_t limneg = -0x800000;
865     if (ival < limneg) {
866         return 0;
867     } else if (ival > limpos) {
868         return 0xff;
869     } else {
870         return (ival >> 16) + 0x80;
871     }
872 }
873 
874 /**
875  * Convert a single-precision floating point value to a Q0.23 integer value, stored in a
876  * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
877  *
878  * OLD code disabled: Rounds to nearest, ties away from 0.
879  * NEW code enabled: Rounding of 0.5 lsb is away from 0.
880  *
881  * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
882  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
883  * depending on hardware and future implementation of this function.
884  */
clamp24_from_float(float f)885 static inline int32_t clamp24_from_float(float f)
886 {
887 #if 0
888     static const float scale = (float)(1 << 23);
889     static const float limpos = 0x7fffff / scale;
890     static const float limneg = -0x800000 / scale;
891 
892     if (f <= limneg) {
893         return -0x800000;
894     } else if (f >= limpos) {
895         return 0x7fffff;
896     }
897     f *= scale;
898     /* integer conversion is through truncation (though int to float is not).
899      * ensure that we round to nearest, ties away from 0.
900      */
901     return f > 0 ? f + 0.5 : f - 0.5;
902 #else
903     static const float scale = 1 << 23;
904     return roundf(fmaxf(fminf(f * scale, scale - 1.f), -scale));
905 #endif
906 }
907 
908 /**
909  * Convert a signed fixed-point 32-bit Q8.23 value to a Q0.23 integer value,
910  * stored in a 32-bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
911  *
912  * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
913  */
clamp24_from_q8_23(int32_t ival)914 static inline int32_t clamp24_from_q8_23(int32_t ival)
915 {
916     static const int32_t limpos = 0x7fffff;
917     static const int32_t limneg = -0x800000;
918     if (ival < limneg) {
919         return limneg;
920     } else if (ival > limpos) {
921         return limpos;
922     } else {
923         return ival;
924     }
925 }
926 
927 /**
928  * Convert a single-precision floating point value to a Q4.27 integer value.
929  * Rounds to nearest, ties away from 0.
930  *
931  * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
932  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
933  * depending on hardware and future implementation of this function.
934  */
clampq4_27_from_float(float f)935 static inline int32_t clampq4_27_from_float(float f)
936 {
937     static const float scale = (float)(1UL << 27);
938     static const float limpos = 16.;
939     static const float limneg = -16.;
940 
941     if (f <= limneg) {
942         return -0x80000000; /* or 0x80000000 */
943     } else if (f >= limpos) {
944         return 0x7fffffff;
945     }
946     f *= scale;
947     /* integer conversion is through truncation (though int to float is not).
948      * ensure that we round to nearest, ties away from 0.
949      */
950     return f > 0 ? f + 0.5 : f - 0.5;
951 }
952 
953 /**
954  * Convert a single-precision floating point value to a Q0.31 integer value.
955  * Rounds to nearest, ties away from 0.
956  *
957  * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
958  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
959  * depending on hardware and future implementation of this function.
960  */
clamp32_from_float(float f)961 static inline int32_t clamp32_from_float(float f)
962 {
963     static const float scale = (float)(1UL << 31);
964     static const float limpos = 1.;
965     static const float limneg = -1.;
966 
967     if (f <= limneg) {
968         return -0x80000000; /* or 0x80000000 */
969     } else if (f >= limpos) {
970         return 0x7fffffff;
971     }
972     f *= scale;
973     /* integer conversion is through truncation (though int to float is not).
974      * ensure that we round to nearest, ties away from 0.
975      */
976     return f > 0 ? f + 0.5 : f - 0.5;
977 }
978 
979 /**
980  * Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
981  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
982  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].
983  *
984  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
985  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
986  * precision floating point, the 0.5 lsb in the significand conversion will round
987  * towards even, as per IEEE 754 default.
988  */
float_from_q4_27(int32_t ival)989 static inline float float_from_q4_27(int32_t ival)
990 {
991     /* The scale factor is the reciprocal of the fractional bits.
992      *
993      * Since the scale factor is a power of 2, the scaling is exact, and there
994      * is no rounding due to the multiplication - the bit pattern is preserved.
995      * However, there may be rounding due to the fixed-point to float conversion,
996      * as described above.
997      */
998     static const float scale = 1. / (float)(1UL << 27);
999 
1000     return ival * scale;
1001 }
1002 
1003 /**
1004  * Convert an unsigned fixed-point 32-bit U4.28 value to single-precision floating-point.
1005  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
1006  * [0x00000000, 0x10000000].  The full float range is [0.0, 16.0].
1007  *
1008  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
1009  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
1010  * precision floating point, the 0.5 lsb in the significand conversion will round
1011  * towards even, as per IEEE 754 default.
1012  */
float_from_u4_28(uint32_t uval)1013 static inline float float_from_u4_28(uint32_t uval)
1014 {
1015     static const float scale = 1. / (float)(1UL << 28);
1016 
1017     return uval * scale;
1018 }
1019 
1020 /**
1021  * Convert an unsigned fixed-point 16-bit U4.12 value to single-precision floating-point.
1022  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
1023  * [0x0000, 0x1000].  The full float range is [0.0, 16.0).
1024  */
float_from_u4_12(uint16_t uval)1025 static inline float float_from_u4_12(uint16_t uval)
1026 {
1027     static const float scale = 1. / (float)(1UL << 12);
1028 
1029     return uval * scale;
1030 }
1031 
1032 /**
1033  * Convert a single-precision floating point value to a U4.28 integer value.
1034  * Rounds to nearest, ties away from 0.
1035  *
1036  * Values outside the range [0, 16.0] are properly clamped to [0, 4294967295]
1037  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
1038  * depending on hardware and future implementation of this function.
1039  */
u4_28_from_float(float f)1040 static inline uint32_t u4_28_from_float(float f)
1041 {
1042     static const float scale = (float)(1 << 28);
1043     static const float limpos = ((float) 0xffffffffUL) / scale;
1044 
1045     if (f <= 0.) {
1046         return 0;
1047     } else if (f >= limpos) {
1048         return 0xffffffff;
1049     }
1050     /* integer conversion is through truncation (though int to float is not).
1051      * ensure that we round to nearest, ties away from 0.
1052      */
1053     return f * scale + 0.5;
1054 }
1055 
1056 /**
1057  * Convert a single-precision floating point value to a U4.12 integer value.
1058  * Rounds to nearest, ties away from 0.
1059  *
1060  * Values outside the range [0, 16.0) are properly clamped to [0, 65535]
1061  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
1062  * depending on hardware and future implementation of this function.
1063  */
u4_12_from_float(float f)1064 static inline uint16_t u4_12_from_float(float f)
1065 {
1066     static const float scale = (float)(1 << 12);
1067     static const float limpos = 0xffff / scale;
1068 
1069     if (f <= 0.) {
1070         return 0;
1071     } else if (f >= limpos) {
1072         return 0xffff;
1073     }
1074     /* integer conversion is through truncation (though int to float is not).
1075      * ensure that we round to nearest, ties away from 0.
1076      */
1077     return f * scale + 0.5;
1078 }
1079 
1080 /**
1081  * Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
1082  * The output float range is [-1.0, 1.0) for the fixed-point range
1083  * [0x8000, 0x7fff].
1084  *
1085  * There is no rounding, the conversion and representation is exact.
1086  */
float_from_i16(int16_t ival)1087 static inline float float_from_i16(int16_t ival)
1088 {
1089     /* The scale factor is the reciprocal of the nominal 16 bit integer
1090      * half-sided range (32768).
1091      *
1092      * Since the scale factor is a power of 2, the scaling is exact, and there
1093      * is no rounding due to the multiplication - the bit pattern is preserved.
1094      */
1095     static const float scale = 1. / (float)(1UL << 15);
1096 
1097     return ival * scale;
1098 }
1099 
1100 /**
1101  * Convert an unsigned fixed-point 8-bit U0.8 value to single-precision floating-point.
1102  * The nominal output float range is [-1.0, 1.0) if the fixed-point range is
1103  * [0x00, 0xff].
1104  */
float_from_u8(uint8_t uval)1105 static inline float float_from_u8(uint8_t uval)
1106 {
1107     static const float scale = 1. / (float)(1UL << 7);
1108 
1109     return ((int)uval - 128) * scale;
1110 }
1111 
1112 /**
1113  * Convert a packed 24bit Q0.23 value stored native-endian in a uint8_t ptr
1114  * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
1115  * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
1116  * Even though the output range is limited on the positive side, there is no
1117  * DC offset on the output, if the input has no DC offset.
1118  *
1119  * Avoid relying on the limited output range, as future implementations may go
1120  * to full range.
1121  */
i32_from_p24(const uint8_t * packed24)1122 static inline int32_t i32_from_p24(const uint8_t *packed24)
1123 {
1124     /* convert to 32b */
1125     return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
1126 }
1127 
1128 /**
1129  * Convert a 32-bit Q0.31 value to single-precision floating-point.
1130  * The output float range is [-1.0, 1.0] for the fixed-point range
1131  * [0x80000000, 0x7fffffff].
1132  *
1133  * Rounding may occur in the least significant 8 bits for large fixed point
1134  * values due to storage into the 24-bit floating-point significand.
1135  * Rounding will be to nearest, ties to even.
1136  */
float_from_i32(int32_t ival)1137 static inline float float_from_i32(int32_t ival)
1138 {
1139     static const float scale = 1. / (float)(1UL << 31);
1140 
1141     return ival * scale;
1142 }
1143 
1144 /**
1145  * Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
1146  * to single-precision floating-point. The output float range is [-1.0, 1.0)
1147  * for the fixed-point range [0x800000, 0x7fffff].
1148  *
1149  * There is no rounding, the conversion and representation is exact.
1150  */
float_from_p24(const uint8_t * packed24)1151 static inline float float_from_p24(const uint8_t *packed24)
1152 {
1153     return float_from_i32(i32_from_p24(packed24));
1154 }
1155 
1156 /**
1157  * Convert a 24-bit Q8.23 value to single-precision floating-point.
1158  * The nominal output float range is [-1.0, 1.0) for the fixed-point
1159  * range [0xff800000, 0x007fffff].  The maximum float range is [-256.0, 256.0).
1160  *
1161  * There is no rounding in the nominal range, the conversion and representation
1162  * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
1163  */
float_from_q8_23(int32_t ival)1164 static inline float float_from_q8_23(int32_t ival)
1165 {
1166     static const float scale = 1. / (float)(1UL << 23);
1167 
1168     return ival * scale;
1169 }
1170 
1171 /**
1172  * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
1173  */
1174 static inline
mulAdd(int16_t in,int16_t v,int32_t a)1175 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
1176 {
1177 #if defined(__arm__) && !defined(__thumb__)
1178     int32_t out;
1179     asm( "smlabb %[out], %[in], %[v], %[a] \n"
1180          : [out]"=r"(out)
1181          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
1182          : );
1183     return out;
1184 #else
1185     return a + in * (int32_t)v;
1186 #endif
1187 }
1188 
1189 /**
1190  * Multiply 16-bit terms with 32-bit result: return in*v.
1191  */
1192 static inline
mul(int16_t in,int16_t v)1193 int32_t mul(int16_t in, int16_t v)
1194 {
1195 #if defined(__arm__) && !defined(__thumb__)
1196     int32_t out;
1197     asm( "smulbb %[out], %[in], %[v] \n"
1198          : [out]"=r"(out)
1199          : [in]"%r"(in), [v]"r"(v)
1200          : );
1201     return out;
1202 #else
1203     return in * (int32_t)v;
1204 #endif
1205 }
1206 
1207 /**
1208  * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
1209  */
1210 static inline
mulAddRL(int left,uint32_t inRL,uint32_t vRL,int32_t a)1211 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
1212 {
1213 #if defined(__arm__) && !defined(__thumb__)
1214     int32_t out;
1215     if (left) {
1216         asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
1217              : [out]"=r"(out)
1218              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1219              : );
1220     } else {
1221         asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
1222              : [out]"=r"(out)
1223              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1224              : );
1225     }
1226     return out;
1227 #else
1228     if (left) {
1229         return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1230     } else {
1231         return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1232     }
1233 #endif
1234 }
1235 
1236 /**
1237  * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
1238  */
1239 static inline
mulRL(int left,uint32_t inRL,uint32_t vRL)1240 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
1241 {
1242 #if defined(__arm__) && !defined(__thumb__)
1243     int32_t out;
1244     if (left) {
1245         asm( "smulbb %[out], %[inRL], %[vRL] \n"
1246              : [out]"=r"(out)
1247              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1248              : );
1249     } else {
1250         asm( "smultt %[out], %[inRL], %[vRL] \n"
1251              : [out]"=r"(out)
1252              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1253              : );
1254     }
1255     return out;
1256 #else
1257     if (left) {
1258         return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1259     } else {
1260         return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1261     }
1262 #endif
1263 }
1264 
1265 /** \cond */
1266 __END_DECLS
1267 /** \endcond */
1268 
1269 #endif  // ANDROID_AUDIO_PRIMITIVES_H
1270