1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
18 #define ANDROID_AUDIO_PRIMITIVES_H
19 
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/cdefs.h>
23 
24 /** \cond */
25 __BEGIN_DECLS
26 /** \endcond */
27 
28 /**
29  * \file primitives.h
30  * The memcpy_* conversion routines are designed to work in-place on same dst as src
31  * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
32  * This allows the loops to go upwards for faster cache access (and may be more flexible
33  * for future optimization later).
34  */
35 
36 /**
37  * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
38  * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
39  * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
40  * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
41  * is dithered and the remaining fraction is converted to the output Q.15, with clamping
42  * on the 4 integer guard bits.
43  *
44  * For interleaved stereo, c is the number of sample pairs,
45  * and out is an array of interleaved pairs of 16-bit samples per channel.
46  * For mono, c is the number of samples / 2, and out is an array of 16-bit samples.
47  * The name "dither" is a misnomer; the current implementation does not actually dither
48  * but uses truncation.  This may change.
49  * The out and sums buffers must either be completely separate (non-overlapping), or
50  * they must both start at the same address.  Partially overlapping buffers are not supported.
51  */
52 void ditherAndClamp(int32_t* out, const int32_t *sums, size_t c);
53 
54 /**
55  * Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
56  *
57  *  \param dst     Destination buffer
58  *  \param src     Source buffer
59  *  \param count   Number of samples to copy
60  *
61  * The destination and source buffers must either be completely separate (non-overlapping), or
62  * they must both start at the same address.  Partially overlapping buffers are not supported.
63  */
64 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
65 
66 /**
67  * Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
68  *
69  *  \param dst     Destination buffer
70  *  \param src     Source buffer
71  *  \param count   Number of samples to copy
72  *
73  * The destination and source buffers must either be completely separate (non-overlapping), or
74  * they must both start at the same address.  Partially overlapping buffers are not supported.
75  * The conversion is done by truncation, without dithering, so it loses resolution.
76  */
77 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
78 
79 /**
80  * Copy samples from float to unsigned 8-bit offset by 0x80.
81  *
82  *  \param dst     Destination buffer
83  *  \param src     Source buffer
84  *  \param count   Number of samples to copy
85  *
86  * The destination and source buffers must either be completely separate (non-overlapping), or
87  * they must both start at the same address.  Partially overlapping buffers are not supported.
88  * The conversion is done by truncation, without dithering, so it loses resolution.
89  */
90 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count);
91 
92 /**
93  * Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
94  *
95  *  \param dst     Destination buffer
96  *  \param src     Source buffer
97  *  \param count   Number of samples to copy
98  *
99  * The destination and source buffers must either be completely separate (non-overlapping), or
100  * they must both start at the same address.  Partially overlapping buffers are not supported.
101  * The conversion is done by truncation, without dithering, so it loses resolution.
102  */
103 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
104 
105 /**
106  * Shrink and copy samples from single-precision floating-point to signed 16-bit.
107  * Each float should be in the range -1.0 to 1.0.  Values outside that range are clamped,
108  * refer to clamp16_from_float().
109  *
110  *  \param dst     Destination buffer
111  *  \param src     Source buffer
112  *  \param count   Number of samples to copy
113  *
114  * The destination and source buffers must either be completely separate (non-overlapping), or
115  * they must both start at the same address.  Partially overlapping buffers are not supported.
116  * The conversion is done by truncation, without dithering, so it loses resolution.
117  */
118 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
119 
120 /**
121  * Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
122  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
123  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].  Note the closed range
124  * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
125  *
126  *  \param dst     Destination buffer
127  *  \param src     Source buffer
128  *  \param count   Number of samples to copy
129  *
130  * The destination and source buffers must either be completely separate (non-overlapping), or
131  * they must both start at the same address.  Partially overlapping buffers are not supported.
132  */
133 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
134 
135 /**
136  * Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
137  * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
138  * No rounding is needed as the representation is exact.
139  *
140  *  \param dst     Destination buffer
141  *  \param src     Source buffer
142  *  \param count   Number of samples to copy
143  *
144  * The destination and source buffers must be completely separate.
145  */
146 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
147 
148 /**
149  * Copy samples from unsigned fixed-point 8 bit to single-precision floating-point.
150  * The output float range is [-1.0, 1.0) for the fixed-point range [0x00, 0xFF].
151  * No rounding is needed as the representation is exact.
152  *
153  *  \param dst     Destination buffer
154  *  \param src     Source buffer
155  *  \param count   Number of samples to copy
156  *
157  * The destination and source buffers must be completely separate.
158  */
159 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count);
160 
161 /**
162  * Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
163  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
164  * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
165  * No rounding is needed as the representation is exact.
166  *
167  *  \param dst     Destination buffer
168  *  \param src     Source buffer
169  *  \param count   Number of samples to copy
170  *
171  * The destination and source buffers must be completely separate.
172  */
173 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
174 
175 /**
176  * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
177  * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
178  * The data is truncated without rounding.
179  *
180  *  \param dst     Destination buffer
181  *  \param src     Source buffer
182  *  \param count   Number of samples to copy
183  *
184  * The destination and source buffers must either be completely separate (non-overlapping), or
185  * they must both start at the same address.  Partially overlapping buffers are not supported.
186  */
187 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
188 
189 /**
190  * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed-point 32-bit Q0.31.
191  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
192  * The output data range is [0x80000000, 0x7fffff00] at intervals of 0x100.
193  *
194  *  \param dst     Destination buffer
195  *  \param src     Source buffer
196  *  \param count   Number of samples to copy
197  *
198  * The destination and source buffers must be completely separate.
199  */
200 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count);
201 
202 /**
203  * Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
204  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
205  * The output data range is [0x800000, 0x7fff00] (not full).
206  * Nevertheless there is no DC offset on the output, if the input has no DC offset.
207  *
208  *  \param dst     Destination buffer
209  *  \param src     Source buffer
210  *  \param count   Number of samples to copy
211  *
212  * The destination and source buffers must be completely separate.
213  */
214 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
215 
216 /**
217  * Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
218  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
219  * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
220  * for details.
221  *
222  *  \param dst     Destination buffer
223  *  \param src     Source buffer
224  *  \param count   Number of samples to copy
225  *
226  * The destination and source buffers must either be completely separate (non-overlapping), or
227  * they must both start at the same address.  Partially overlapping buffers are not supported.
228  */
229 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
230 
231 /**
232  * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed-point packed 24 bit Q0.23.
233  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
234  * The data is clamped to the range is [0x800000, 0x7fffff].
235  *
236  *  \param dst     Destination buffer
237  *  \param src     Source buffer
238  *  \param count   Number of samples to copy
239  *
240  * The destination and source buffers must be completely separate.
241  */
242 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
243 
244 /**
245  * Shrink and copy samples from signed 32-bit fixed-point Q0.31
246  * to signed fixed-point packed 24 bit Q0.23.
247  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
248  *
249  *  \param dst     Destination buffer
250  *  \param src     Source buffer
251  *  \param count   Number of samples to copy
252  *
253  * The destination and source buffers must either be completely separate (non-overlapping), or
254  * they must both start at the same address.  Partially overlapping buffers are not supported.
255  * The conversion is done by truncation, without dithering, so it loses resolution.
256  */
257 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count);
258 
259 /**
260  * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
261  * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
262  *
263  *  \param dst     Destination buffer
264  *  \param src     Source buffer
265  *  \param count   Number of samples to copy
266  *
267  * The destination and source buffers must be completely separate.
268  */
269 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
270 
271 /**
272  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
273  * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
274  * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
275  * See clamp24_from_float() for details.
276  *
277  *  \param dst     Destination buffer
278  *  \param src     Source buffer
279  *  \param count   Number of samples to copy
280  *
281  * The destination and source buffers must either be completely separate (non-overlapping), or
282  * they must both start at the same address.  Partially overlapping buffers are not supported.
283  */
284 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
285 
286 /**
287  * Copy samples from signed fixed point packed 24-bit Q0.23 to signed fixed-point 32-bit Q8.23.
288  * The output data range is [0xff800000, 0x007fffff].
289  *
290  *  \param dst     Destination buffer
291  *  \param src     Source buffer
292  *  \param count   Number of samples to copy
293  *
294  * The destination and source buffers must be completely separate.
295  */
296 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count);
297 
298 /**
299  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
300  * The conversion will use the full available Q4.27 range, including guard bits.
301  * Fractional lsb is rounded to nearest, ties away from zero.
302  * See clampq4_27_from_float() for details.
303  *
304  *  \param dst     Destination buffer
305  *  \param src     Source buffer
306  *  \param count   Number of samples to copy
307  *
308  * The destination and source buffers must either be completely separate (non-overlapping), or
309  * they must both start at the same address.  Partially overlapping buffers are not supported.
310  */
311 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
312 
313 /**
314  * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
315  * The data is clamped, and truncated without rounding.
316  *
317  *  \param dst     Destination buffer
318  *  \param src     Source buffer
319  *  \param count   Number of samples to copy
320  *
321  * The destination and source buffers must either be completely separate (non-overlapping), or
322  * they must both start at the same address.  Partially overlapping buffers are not supported.
323  */
324 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
325 
326 /**
327  * Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
328  * The nominal output float range is [-1.0, 1.0) for the fixed-point
329  * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
330  * No rounding is needed as the representation is exact for nominal values.
331  * Rounding for overflow values is to nearest, ties to even.
332  *
333  *  \param dst     Destination buffer
334  *  \param src     Source buffer
335  *  \param count   Number of samples to copy
336  *
337  * The destination and source buffers must either be completely separate (non-overlapping), or
338  * they must both start at the same address.  Partially overlapping buffers are not supported.
339  */
340 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
341 
342 /**
343  * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
344  * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
345  *
346  *  \param dst     Destination buffer
347  *  \param src     Source buffer
348  *  \param count   Number of samples to copy
349  *
350  * The destination and source buffers must be completely separate.
351  */
352 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
353 
354 /**
355  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
356  * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
357  * ties away from zero. See clamp32_from_float() for details.
358  *
359  *  \param dst     Destination buffer
360  *  \param src     Source buffer
361  *  \param count   Number of samples to copy
362  *
363  * The destination and source buffers must either be completely separate (non-overlapping), or
364  * they must both start at the same address.  Partially overlapping buffers are not supported.
365  */
366 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
367 
368 /**
369  * Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
370  * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
371  * Rounding is done according to float_from_i32().
372  *
373  *  \param dst     Destination buffer
374  *  \param src     Source buffer
375  *  \param count   Number of samples to copy
376  *
377  * The destination and source buffers must either be completely separate (non-overlapping), or
378  * they must both start at the same address.  Partially overlapping buffers are not supported.
379  */
380 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
381 
382 /**
383  * Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
384  *
385  *  \param dst     Destination buffer
386  *  \param src     Source buffer
387  *  \param count   Number of stereo frames to downmix
388  *
389  * The destination and source buffers must be completely separate (non-overlapping).
390  * The current implementation truncates the mean rather than dither, but this may change.
391  */
392 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
393 
394 /**
395  * Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
396  * duplicating.
397  *
398  *  \param dst     Destination buffer
399  *  \param src     Source buffer
400  *  \param count   Number of mono samples to upmix
401  *
402  * The destination and source buffers must be completely separate (non-overlapping).
403  */
404 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
405 
406 /**
407  * Downmix pairs of interleaved stereo input float samples to mono output float samples
408  * by averaging the stereo pair together.
409  *
410  *  \param dst     Destination buffer
411  *  \param src     Source buffer
412  *  \param count   Number of stereo frames to downmix
413  *
414  * The destination and source buffers must be completely separate (non-overlapping),
415  * or they must both start at the same address.
416  */
417 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t count);
418 
419 /**
420  * Upmix mono input float samples to pairs of interleaved stereo output float samples by
421  * duplicating.
422  *
423  *  \param dst     Destination buffer
424  *  \param src     Source buffer
425  *  \param count   Number of mono samples to upmix
426  *
427  * The destination and source buffers must be completely separate (non-overlapping).
428  */
429 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t count);
430 
431 /**
432  * \return the total number of non-zero 32-bit samples.
433  */
434 size_t nonZeroMono32(const int32_t *samples, size_t count);
435 
436 /**
437  * \return the total number of non-zero 16-bit samples.
438  */
439 size_t nonZeroMono16(const int16_t *samples, size_t count);
440 
441 /**
442  * \return the total number of non-zero stereo frames, where a frame is considered non-zero
443  * if either of its constituent 32-bit samples is non-zero.
444  */
445 size_t nonZeroStereo32(const int32_t *frames, size_t count);
446 
447 /**
448  * \return the total number of non-zero stereo frames, where a frame is considered non-zero
449  * if either of its constituent 16-bit samples is non-zero.
450  */
451 size_t nonZeroStereo16(const int16_t *frames, size_t count);
452 
453 /**
454  * Copy frames, selecting source samples based on a source channel mask to fit
455  * the destination channel mask. Unmatched channels in the destination channel mask
456  * are zero filled. Unmatched channels in the source channel mask are dropped.
457  * Channels present in the channel mask are represented by set bits in the
458  * uint32_t value and are matched without further interpretation.
459  *
460  *  \param dst         Destination buffer
461  *  \param dst_mask    Bit mask corresponding to destination channels present
462  *  \param src         Source buffer
463  *  \param src_mask    Bit mask corresponding to source channels present
464  *  \param sample_size Size of each sample in bytes.  Must be 1, 2, 3, or 4.
465  *  \param count       Number of frames to copy
466  *
467  * The destination and source buffers must be completely separate (non-overlapping).
468  * If the sample size is not in range, the function will abort.
469  */
470 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
471         const void *src, uint32_t src_mask, size_t sample_size, size_t count);
472 
473 /**
474  * Copy frames, selecting source samples based on an index array (idxary).
475  * The idxary[] consists of dst_channels number of elements.
476  * The ith element if idxary[] corresponds the ith destination channel.
477  * A non-negative value is the channel index in the source frame.
478  * A negative index (-1) represents filling with 0.
479  *
480  * Example: Swapping L and R channels for stereo streams
481  * <PRE>
482  * idxary[0] = 1;
483  * idxary[1] = 0;
484  * </PRE>
485  *
486  * Example: Copying a mono source to the front center 5.1 channel
487  * <PRE>
488  * idxary[0] = -1;
489  * idxary[1] = -1;
490  * idxary[2] = 0;
491  * idxary[3] = -1;
492  * idxary[4] = -1;
493  * idxary[5] = -1;
494  * </PRE>
495  *
496  * This copy allows swizzling of channels or replication of channels.
497  *
498  *  \param dst           Destination buffer
499  *  \param dst_channels  Number of destination channels per frame
500  *  \param src           Source buffer
501  *  \param src_channels  Number of source channels per frame
502  *  \param idxary        Array of indices representing channels in the source frame
503  *  \param sample_size   Size of each sample in bytes.  Must be 1, 2, 3, or 4.
504  *  \param count         Number of frames to copy
505  *
506  * The destination and source buffers must be completely separate (non-overlapping).
507  * If the sample size is not in range, the function will abort.
508  */
509 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
510         const void *src, uint32_t src_channels,
511         const int8_t *idxary, size_t sample_size, size_t count);
512 
513 /**
514  * Prepares an index array (idxary) from channel masks, which can be later
515  * used by memcpy_by_index_array().
516  *
517  * \return the number of array elements required.
518  * This may be greater than idxcount, so the return value should be checked
519  * if idxary size is less than 32.
520  *
521  * Note that idxary is a caller allocated array
522  * of at least as many channels as present in the dst_mask.
523  * Channels present in the channel mask are represented by set bits in the
524  * uint32_t value and are matched without further interpretation.
525  *
526  * This function is typically used for converting audio data with different
527  * channel position masks.
528  *
529  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
530  *  \param idxcount    Number of caller allocated elements in idxary
531  *  \param dst_mask    Bit mask corresponding to destination channels present
532  *  \param src_mask    Bit mask corresponding to source channels present
533  */
534 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
535         uint32_t dst_mask, uint32_t src_mask);
536 
537 /**
538  * Prepares an index array (idxary) from channel masks, which can be later
539  * used by memcpy_by_index_array().
540  *
541  * \return the number of array elements required.
542  *
543  * For a source channel index mask, the source channels will map to the destination
544  * channels as if counting the set bits in dst_mask in order from lsb to msb
545  * (zero bits are ignored). The ith bit of the src_mask corresponds to the
546  * ith SET bit of dst_mask and the ith destination channel.  Hence, a zero ith
547  * bit of the src_mask indicates that the ith destination channel plays silence.
548  *
549  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
550  *  \param idxcount    Number of caller allocated elements in idxary
551  *  \param dst_mask    Bit mask corresponding to destination channels present
552  *  \param src_mask    Bit mask corresponding to source channels present
553  */
554 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
555         uint32_t dst_mask, uint32_t src_mask);
556 
557 /**
558  * Prepares an index array (idxary) from channel mask bits, which can be later
559  * used by memcpy_by_index_array().
560  *
561  * \return the number of array elements required.
562  *
563  * This initialization is for a destination channel index mask from a positional
564  * source mask.
565  *
566  * For an destination channel index mask, the input channels will map
567  * to the destination channels, with the ith SET bit in the source bits corresponding
568  * to the ith bit in the destination bits. If there is a zero bit in the middle
569  * of set destination bits (unlikely), the corresponding source channel will
570  * be dropped.
571  *
572  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
573  *  \param idxcount    Number of caller allocated elements in idxary
574  *  \param dst_mask    Bit mask corresponding to destination channels present
575  *  \param src_mask    Bit mask corresponding to source channels present
576  */
577 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
578         uint32_t dst_mask, uint32_t src_mask);
579 
580 /**
581  * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
582  */
clamp16(int32_t sample)583 static inline int16_t clamp16(int32_t sample)
584 {
585     if ((sample>>15) ^ (sample>>31))
586         sample = 0x7FFF ^ (sample>>31);
587     return sample;
588 }
589 
590 /**
591  * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
592  * with clamping.  Note the open bound at 1.0, values within 1/65536 of 1.0 map
593  * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
594  *
595  * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
596  * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
597  * depending on the sign bit inside NaN (whose representation is not unique).
598  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
599  *
600  * Rounding of 0.5 lsb is to even (default for IEEE 754).
601  */
clamp16_from_float(float f)602 static inline int16_t clamp16_from_float(float f)
603 {
604     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
605      * floating point significand. The normal shift is 3<<22, but the -15 offset
606      * is used to multiply by 32768.
607      */
608     static const float offset = (float)(3 << (22 - 15));
609     /* zero = (0x10f << 22) =  0x43c00000 (not directly used) */
610     static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
611     static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
612 
613     union {
614         float f;
615         int32_t i;
616     } u;
617 
618     u.f = f + offset; /* recenter valid range */
619     /* Now the valid range is represented as integers between [limneg, limpos].
620      * Clamp using the fact that float representation (as an integer) is an ordered set.
621      */
622     if (u.i < limneg)
623         u.i = -32768;
624     else if (u.i > limpos)
625         u.i = 32767;
626     return u.i; /* Return lower 16 bits, the part of interest in the significand. */
627 }
628 
629 /**
630  * Convert a IEEE 754 single precision float [-1.0, 1.0) to uint8_t [0, 0xff]
631  * with clamping.  Note the open bound at 1.0, values within 1/128 of 1.0 map
632  * to 255 instead of 256 (early clamping due to the smaller positive integer subrange).
633  *
634  * Values outside the range [-1.0, 1.0) are properly clamped to 0 and 255,
635  * including -Inf and +Inf. NaN will generally be treated either as 0 or 255,
636  * depending on the sign bit inside NaN (whose representation is not unique).
637  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
638  *
639  * Rounding of 0.5 lsb is to even (default for IEEE 754).
640  */
clamp8_from_float(float f)641 static inline uint8_t clamp8_from_float(float f)
642 {
643     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
644      * floating point significand. The normal shift is 3<<22, but the -7 offset
645      * is used to multiply by 128.
646      */
647     static const float offset = (float)((3 << (22 - 7)) + 1 /* to cancel -1.0 */);
648     /* zero = (0x11f << 22) =  0x47c00000 */
649     static const int32_t limneg = (0x11f << 22) /*zero*/;
650     static const int32_t limpos = (0x11f << 22) /*zero*/ + 255; /* 0x47c000ff */
651 
652     union {
653         float f;
654         int32_t i;
655     } u;
656 
657     u.f = f + offset; /* recenter valid range */
658     /* Now the valid range is represented as integers between [limneg, limpos].
659      * Clamp using the fact that float representation (as an integer) is an ordered set.
660      */
661     if (u.i < limneg)
662         return 0;
663     if (u.i > limpos)
664         return 255;
665     return u.i; /* Return lower 8 bits, the part of interest in the significand. */
666 }
667 
668 /**
669  * Convert a single-precision floating point value to a Q0.23 integer value, stored in a
670  * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
671  *
672  * Rounds to nearest, ties away from 0.
673  *
674  * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
675  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
676  * depending on hardware and future implementation of this function.
677  */
clamp24_from_float(float f)678 static inline int32_t clamp24_from_float(float f)
679 {
680     static const float scale = (float)(1 << 23);
681     static const float limpos = 0x7fffff / scale;
682     static const float limneg = -0x800000 / scale;
683 
684     if (f <= limneg) {
685         return -0x800000;
686     } else if (f >= limpos) {
687         return 0x7fffff;
688     }
689     f *= scale;
690     /* integer conversion is through truncation (though int to float is not).
691      * ensure that we round to nearest, ties away from 0.
692      */
693     return f > 0 ? f + 0.5 : f - 0.5;
694 }
695 
696 /**
697  * Convert a signed fixed-point 32-bit Q8.23 value to a Q0.23 integer value,
698  * stored in a 32-bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
699  *
700  * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
701  */
clamp24_from_q8_23(int32_t ival)702 static inline int32_t clamp24_from_q8_23(int32_t ival)
703 {
704     static const int32_t limpos = 0x7fffff;
705     static const int32_t limneg = -0x800000;
706     if (ival < limneg) {
707         return limneg;
708     } else if (ival > limpos) {
709         return limpos;
710     } else {
711         return ival;
712     }
713 }
714 
715 /**
716  * Convert a single-precision floating point value to a Q4.27 integer value.
717  * Rounds to nearest, ties away from 0.
718  *
719  * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
720  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
721  * depending on hardware and future implementation of this function.
722  */
clampq4_27_from_float(float f)723 static inline int32_t clampq4_27_from_float(float f)
724 {
725     static const float scale = (float)(1UL << 27);
726     static const float limpos = 16.;
727     static const float limneg = -16.;
728 
729     if (f <= limneg) {
730         return -0x80000000; /* or 0x80000000 */
731     } else if (f >= limpos) {
732         return 0x7fffffff;
733     }
734     f *= scale;
735     /* integer conversion is through truncation (though int to float is not).
736      * ensure that we round to nearest, ties away from 0.
737      */
738     return f > 0 ? f + 0.5 : f - 0.5;
739 }
740 
741 /**
742  * Convert a single-precision floating point value to a Q0.31 integer value.
743  * Rounds to nearest, ties away from 0.
744  *
745  * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
746  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
747  * depending on hardware and future implementation of this function.
748  */
clamp32_from_float(float f)749 static inline int32_t clamp32_from_float(float f)
750 {
751     static const float scale = (float)(1UL << 31);
752     static const float limpos = 1.;
753     static const float limneg = -1.;
754 
755     if (f <= limneg) {
756         return -0x80000000; /* or 0x80000000 */
757     } else if (f >= limpos) {
758         return 0x7fffffff;
759     }
760     f *= scale;
761     /* integer conversion is through truncation (though int to float is not).
762      * ensure that we round to nearest, ties away from 0.
763      */
764     return f > 0 ? f + 0.5 : f - 0.5;
765 }
766 
767 /**
768  * Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
769  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
770  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].
771  *
772  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
773  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
774  * precision floating point, the 0.5 lsb in the significand conversion will round
775  * towards even, as per IEEE 754 default.
776  */
float_from_q4_27(int32_t ival)777 static inline float float_from_q4_27(int32_t ival)
778 {
779     /* The scale factor is the reciprocal of the fractional bits.
780      *
781      * Since the scale factor is a power of 2, the scaling is exact, and there
782      * is no rounding due to the multiplication - the bit pattern is preserved.
783      * However, there may be rounding due to the fixed-point to float conversion,
784      * as described above.
785      */
786     static const float scale = 1. / (float)(1UL << 27);
787 
788     return ival * scale;
789 }
790 
791 /**
792  * Convert an unsigned fixed-point 32-bit U4.28 value to single-precision floating-point.
793  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
794  * [0x00000000, 0x10000000].  The full float range is [0.0, 16.0].
795  *
796  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
797  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
798  * precision floating point, the 0.5 lsb in the significand conversion will round
799  * towards even, as per IEEE 754 default.
800  */
float_from_u4_28(uint32_t uval)801 static inline float float_from_u4_28(uint32_t uval)
802 {
803     static const float scale = 1. / (float)(1UL << 28);
804 
805     return uval * scale;
806 }
807 
808 /**
809  * Convert an unsigned fixed-point 16-bit U4.12 value to single-precision floating-point.
810  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
811  * [0x0000, 0x1000].  The full float range is [0.0, 16.0).
812  */
float_from_u4_12(uint16_t uval)813 static inline float float_from_u4_12(uint16_t uval)
814 {
815     static const float scale = 1. / (float)(1UL << 12);
816 
817     return uval * scale;
818 }
819 
820 /**
821  * Convert a single-precision floating point value to a U4.28 integer value.
822  * Rounds to nearest, ties away from 0.
823  *
824  * Values outside the range [0, 16.0] are properly clamped to [0, 4294967295]
825  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
826  * depending on hardware and future implementation of this function.
827  */
u4_28_from_float(float f)828 static inline uint32_t u4_28_from_float(float f)
829 {
830     static const float scale = (float)(1 << 28);
831     static const float limpos = 0xffffffffUL / scale;
832 
833     if (f <= 0.) {
834         return 0;
835     } else if (f >= limpos) {
836         return 0xffffffff;
837     }
838     /* integer conversion is through truncation (though int to float is not).
839      * ensure that we round to nearest, ties away from 0.
840      */
841     return f * scale + 0.5;
842 }
843 
844 /**
845  * Convert a single-precision floating point value to a U4.12 integer value.
846  * Rounds to nearest, ties away from 0.
847  *
848  * Values outside the range [0, 16.0) are properly clamped to [0, 65535]
849  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
850  * depending on hardware and future implementation of this function.
851  */
u4_12_from_float(float f)852 static inline uint16_t u4_12_from_float(float f)
853 {
854     static const float scale = (float)(1 << 12);
855     static const float limpos = 0xffff / scale;
856 
857     if (f <= 0.) {
858         return 0;
859     } else if (f >= limpos) {
860         return 0xffff;
861     }
862     /* integer conversion is through truncation (though int to float is not).
863      * ensure that we round to nearest, ties away from 0.
864      */
865     return f * scale + 0.5;
866 }
867 
868 /**
869  * Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
870  * The output float range is [-1.0, 1.0) for the fixed-point range
871  * [0x8000, 0x7fff].
872  *
873  * There is no rounding, the conversion and representation is exact.
874  */
float_from_i16(int16_t ival)875 static inline float float_from_i16(int16_t ival)
876 {
877     /* The scale factor is the reciprocal of the nominal 16 bit integer
878      * half-sided range (32768).
879      *
880      * Since the scale factor is a power of 2, the scaling is exact, and there
881      * is no rounding due to the multiplication - the bit pattern is preserved.
882      */
883     static const float scale = 1. / (float)(1UL << 15);
884 
885     return ival * scale;
886 }
887 
888 /**
889  * Convert an unsigned fixed-point 8-bit U0.8 value to single-precision floating-point.
890  * The nominal output float range is [-1.0, 1.0) if the fixed-point range is
891  * [0x00, 0xff].
892  */
float_from_u8(uint8_t uval)893 static inline float float_from_u8(uint8_t uval)
894 {
895     static const float scale = 1. / (float)(1UL << 7);
896 
897     return ((int)uval - 128) * scale;
898 }
899 
900 /**
901  * Convert a packed 24bit Q0.23 value stored native-endian in a uint8_t ptr
902  * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
903  * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
904  * Even though the output range is limited on the positive side, there is no
905  * DC offset on the output, if the input has no DC offset.
906  *
907  * Avoid relying on the limited output range, as future implementations may go
908  * to full range.
909  */
i32_from_p24(const uint8_t * packed24)910 static inline int32_t i32_from_p24(const uint8_t *packed24)
911 {
912     /* convert to 32b */
913     return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
914 }
915 
916 /**
917  * Convert a 32-bit Q0.31 value to single-precision floating-point.
918  * The output float range is [-1.0, 1.0] for the fixed-point range
919  * [0x80000000, 0x7fffffff].
920  *
921  * Rounding may occur in the least significant 8 bits for large fixed point
922  * values due to storage into the 24-bit floating-point significand.
923  * Rounding will be to nearest, ties to even.
924  */
float_from_i32(int32_t ival)925 static inline float float_from_i32(int32_t ival)
926 {
927     static const float scale = 1. / (float)(1UL << 31);
928 
929     return ival * scale;
930 }
931 
932 /**
933  * Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
934  * to single-precision floating-point. The output float range is [-1.0, 1.0)
935  * for the fixed-point range [0x800000, 0x7fffff].
936  *
937  * There is no rounding, the conversion and representation is exact.
938  */
float_from_p24(const uint8_t * packed24)939 static inline float float_from_p24(const uint8_t *packed24)
940 {
941     return float_from_i32(i32_from_p24(packed24));
942 }
943 
944 /**
945  * Convert a 24-bit Q8.23 value to single-precision floating-point.
946  * The nominal output float range is [-1.0, 1.0) for the fixed-point
947  * range [0xff800000, 0x007fffff].  The maximum float range is [-256.0, 256.0).
948  *
949  * There is no rounding in the nominal range, the conversion and representation
950  * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
951  */
float_from_q8_23(int32_t ival)952 static inline float float_from_q8_23(int32_t ival)
953 {
954     static const float scale = 1. / (float)(1UL << 23);
955 
956     return ival * scale;
957 }
958 
959 /**
960  * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
961  */
962 static inline
mulAdd(int16_t in,int16_t v,int32_t a)963 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
964 {
965 #if defined(__arm__) && !defined(__thumb__)
966     int32_t out;
967     asm( "smlabb %[out], %[in], %[v], %[a] \n"
968          : [out]"=r"(out)
969          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
970          : );
971     return out;
972 #else
973     return a + in * (int32_t)v;
974 #endif
975 }
976 
977 /**
978  * Multiply 16-bit terms with 32-bit result: return in*v.
979  */
980 static inline
mul(int16_t in,int16_t v)981 int32_t mul(int16_t in, int16_t v)
982 {
983 #if defined(__arm__) && !defined(__thumb__)
984     int32_t out;
985     asm( "smulbb %[out], %[in], %[v] \n"
986          : [out]"=r"(out)
987          : [in]"%r"(in), [v]"r"(v)
988          : );
989     return out;
990 #else
991     return in * (int32_t)v;
992 #endif
993 }
994 
995 /**
996  * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
997  */
998 static inline
mulAddRL(int left,uint32_t inRL,uint32_t vRL,int32_t a)999 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
1000 {
1001 #if defined(__arm__) && !defined(__thumb__)
1002     int32_t out;
1003     if (left) {
1004         asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
1005              : [out]"=r"(out)
1006              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1007              : );
1008     } else {
1009         asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
1010              : [out]"=r"(out)
1011              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1012              : );
1013     }
1014     return out;
1015 #else
1016     if (left) {
1017         return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1018     } else {
1019         return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1020     }
1021 #endif
1022 }
1023 
1024 /**
1025  * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
1026  */
1027 static inline
mulRL(int left,uint32_t inRL,uint32_t vRL)1028 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
1029 {
1030 #if defined(__arm__) && !defined(__thumb__)
1031     int32_t out;
1032     if (left) {
1033         asm( "smulbb %[out], %[inRL], %[vRL] \n"
1034              : [out]"=r"(out)
1035              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1036              : );
1037     } else {
1038         asm( "smultt %[out], %[inRL], %[vRL] \n"
1039              : [out]"=r"(out)
1040              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1041              : );
1042     }
1043     return out;
1044 #else
1045     if (left) {
1046         return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1047     } else {
1048         return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1049     }
1050 #endif
1051 }
1052 
1053 /** \cond */
1054 __END_DECLS
1055 /** \endcond */
1056 
1057 #endif  // ANDROID_AUDIO_PRIMITIVES_H
1058