1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
18 #define ANDROID_AUDIO_PRIMITIVES_H
19 
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/cdefs.h>
23 
24 __BEGIN_DECLS
25 
26 /* The memcpy_* conversion routines are designed to work in-place on same dst as src
27  * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
28  * This allows the loops to go upwards for faster cache access (and may be more flexible
29  * for future optimization later).
30  */
31 
32 /**
33  * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
34  * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
35  * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
36  * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
37  * is dithered and the remaining fraction is converted to the output Q.15, with clamping
38  * on the 4 integer guard bits.
39  *
40  * For interleaved stereo, c is the number of sample pairs,
41  * and out is an array of interleaved pairs of 16-bit samples per channel.
42  * For mono, c is the number of samples / 2, and out is an array of 16-bit samples.
43  * The name "dither" is a misnomer; the current implementation does not actually dither
44  * but uses truncation.  This may change.
45  * The out and sums buffers must either be completely separate (non-overlapping), or
46  * they must both start at the same address.  Partially overlapping buffers are not supported.
47  */
48 void ditherAndClamp(int32_t* out, const int32_t *sums, size_t c);
49 
50 /* Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
51  * Parameters:
52  *  dst     Destination buffer
53  *  src     Source buffer
54  *  count   Number of samples to copy
55  * The destination and source buffers must either be completely separate (non-overlapping), or
56  * they must both start at the same address.  Partially overlapping buffers are not supported.
57  */
58 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
59 
60 /* Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
61  * Parameters:
62  *  dst     Destination buffer
63  *  src     Source buffer
64  *  count   Number of samples to copy
65  * The destination and source buffers must either be completely separate (non-overlapping), or
66  * they must both start at the same address.  Partially overlapping buffers are not supported.
67  * The conversion is done by truncation, without dithering, so it loses resolution.
68  */
69 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
70 
71 /* Copy samples from float to unsigned 8-bit offset by 0x80.
72  * Parameters:
73  *  dst     Destination buffer
74  *  src     Source buffer
75  *  count   Number of samples to copy
76  * The destination and source buffers must either be completely separate (non-overlapping), or
77  * they must both start at the same address.  Partially overlapping buffers are not supported.
78  * The conversion is done by truncation, without dithering, so it loses resolution.
79  */
80 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count);
81 
82 /* Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
83  * Parameters:
84  *  dst     Destination buffer
85  *  src     Source buffer
86  *  count   Number of samples to copy
87  * The destination and source buffers must either be completely separate (non-overlapping), or
88  * they must both start at the same address.  Partially overlapping buffers are not supported.
89  * The conversion is done by truncation, without dithering, so it loses resolution.
90  */
91 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
92 
93 /* Shrink and copy samples from single-precision floating-point to signed 16-bit.
94  * Each float should be in the range -1.0 to 1.0.  Values outside that range are clamped,
95  * refer to clamp16_from_float().
96  * Parameters:
97  *  dst     Destination buffer
98  *  src     Source buffer
99  *  count   Number of samples to copy
100  * The destination and source buffers must either be completely separate (non-overlapping), or
101  * they must both start at the same address.  Partially overlapping buffers are not supported.
102  * The conversion is done by truncation, without dithering, so it loses resolution.
103  */
104 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
105 
106 /* Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
107  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
108  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].  Note the closed range
109  * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
110  * Parameters:
111  *  dst     Destination buffer
112  *  src     Source buffer
113  *  count   Number of samples to copy
114  * The destination and source buffers must either be completely separate (non-overlapping), or
115  * they must both start at the same address.  Partially overlapping buffers are not supported.
116  */
117 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
118 
119 /* Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
120  * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
121  * No rounding is needed as the representation is exact.
122  * Parameters:
123  *  dst     Destination buffer
124  *  src     Source buffer
125  *  count   Number of samples to copy
126  * The destination and source buffers must be completely separate.
127  */
128 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
129 
130 /* Copy samples from unsigned fixed-point 8 bit to single-precision floating-point.
131  * The output float range is [-1.0, 1.0) for the fixed-point range [0x00, 0xFF].
132  * No rounding is needed as the representation is exact.
133  * Parameters:
134  *  dst     Destination buffer
135  *  src     Source buffer
136  *  count   Number of samples to copy
137  * The destination and source buffers must be completely separate.
138  */
139 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count);
140 
141 /* Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
142  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
143  * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
144  * No rounding is needed as the representation is exact.
145  * Parameters:
146  *  dst     Destination buffer
147  *  src     Source buffer
148  *  count   Number of samples to copy
149  * The destination and source buffers must be completely separate.
150  */
151 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
152 
153 /* Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
154  * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
155  * The data is truncated without rounding.
156  * Parameters:
157  *  dst     Destination buffer
158  *  src     Source buffer
159  *  count   Number of samples to copy
160  * The destination and source buffers must either be completely separate (non-overlapping), or
161  * they must both start at the same address.  Partially overlapping buffers are not supported.
162  */
163 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
164 
165 /* Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed-point 32-bit Q0.31.
166  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
167  * The output data range is [0x80000000, 0x7fffff00] at intervals of 0x100.
168  * Parameters:
169  *  dst     Destination buffer
170  *  src     Source buffer
171  *  count   Number of samples to copy
172  * The destination and source buffers must be completely separate.
173  */
174 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count);
175 
176 /* Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
177  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
178  * The output data range is [0x800000, 0x7fff00] (not full).
179  * Nevertheless there is no DC offset on the output, if the input has no DC offset.
180  * Parameters:
181  *  dst     Destination buffer
182  *  src     Source buffer
183  *  count   Number of samples to copy
184  * The destination and source buffers must be completely separate.
185  */
186 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
187 
188 /* Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
189  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
190  * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
191  * for details.
192  * Parameters:
193  *  dst     Destination buffer
194  *  src     Source buffer
195  *  count   Number of samples to copy
196  * The destination and source buffers must either be completely separate (non-overlapping), or
197  * they must both start at the same address.  Partially overlapping buffers are not supported.
198  */
199 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
200 
201 /* Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed-point packed 24 bit Q0.23.
202  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
203  * The data is clamped to the range is [0x800000, 0x7fffff].
204  * Parameters:
205  *  dst     Destination buffer
206  *  src     Source buffer
207  *  count   Number of samples to copy
208  * The destination and source buffers must be completely separate.
209  */
210 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
211 
212 /* Shrink and copy samples from signed 32-bit fixed-point Q0.31
213  * to signed fixed-point packed 24 bit Q0.23.
214  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
215  * Parameters:
216  *  dst     Destination buffer
217  *  src     Source buffer
218  *  count   Number of samples to copy
219  * The destination and source buffers must either be completely separate (non-overlapping), or
220  * they must both start at the same address.  Partially overlapping buffers are not supported.
221  * The conversion is done by truncation, without dithering, so it loses resolution.
222  */
223 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count);
224 
225 /* Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
226  * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
227  * Parameters:
228  *  dst     Destination buffer
229  *  src     Source buffer
230  *  count   Number of samples to copy
231  * The destination and source buffers must be completely separate.
232  */
233 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
234 
235 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
236  * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
237  * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
238  * See clamp24_from_float() for details.
239  * Parameters:
240  *  dst     Destination buffer
241  *  src     Source buffer
242  *  count   Number of samples to copy
243  * The destination and source buffers must either be completely separate (non-overlapping), or
244  * they must both start at the same address.  Partially overlapping buffers are not supported.
245  */
246 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
247 
248 /* Copy samples from signed fixed point packed 24-bit Q0.23 to signed fixed-point 32-bit Q8.23.
249  * The output data range is [0xff800000, 0x007fffff].
250  * Parameters:
251  *  dst     Destination buffer
252  *  src     Source buffer
253  *  count   Number of samples to copy
254  * The destination and source buffers must be completely separate.
255  */
256 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count);
257 
258 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
259  * The conversion will use the full available Q4.27 range, including guard bits.
260  * Fractional lsb is rounded to nearest, ties away from zero.
261  * See clampq4_27_from_float() for details.
262  * Parameters:
263  *  dst     Destination buffer
264  *  src     Source buffer
265  *  count   Number of samples to copy
266  * The destination and source buffers must either be completely separate (non-overlapping), or
267  * they must both start at the same address.  Partially overlapping buffers are not supported.
268  */
269 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
270 
271 /* Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
272  * The data is clamped, and truncated without rounding.
273  * Parameters:
274  *  dst     Destination buffer
275  *  src     Source buffer
276  *  count   Number of samples to copy
277  * The destination and source buffers must either be completely separate (non-overlapping), or
278  * they must both start at the same address.  Partially overlapping buffers are not supported.
279  */
280 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
281 
282 /* Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
283  * The nominal output float range is [-1.0, 1.0) for the fixed-point
284  * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
285  * No rounding is needed as the representation is exact for nominal values.
286  * Rounding for overflow values is to nearest, ties to even.
287  * Parameters:
288  *  dst     Destination buffer
289  *  src     Source buffer
290  *  count   Number of samples to copy
291  * The destination and source buffers must either be completely separate (non-overlapping), or
292  * they must both start at the same address.  Partially overlapping buffers are not supported.
293  */
294 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
295 
296 /* Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
297  * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
298  * Parameters:
299  *  dst     Destination buffer
300  *  src     Source buffer
301  *  count   Number of samples to copy
302  * The destination and source buffers must be completely separate.
303  */
304 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
305 
306 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
307  * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
308  * ties away from zero. See clamp32_from_float() for details.
309  * Parameters:
310  *  dst     Destination buffer
311  *  src     Source buffer
312  *  count   Number of samples to copy
313  * The destination and source buffers must either be completely separate (non-overlapping), or
314  * they must both start at the same address.  Partially overlapping buffers are not supported.
315  */
316 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
317 
318 /* Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
319  * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
320  * Rounding is done according to float_from_i32().
321  * Parameters:
322  *  dst     Destination buffer
323  *  src     Source buffer
324  *  count   Number of samples to copy
325  * The destination and source buffers must either be completely separate (non-overlapping), or
326  * they must both start at the same address.  Partially overlapping buffers are not supported.
327  */
328 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
329 
330 /* Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
331  * Parameters:
332  *  dst     Destination buffer
333  *  src     Source buffer
334  *  count   Number of stereo frames to downmix
335  * The destination and source buffers must be completely separate (non-overlapping).
336  * The current implementation truncates the mean rather than dither, but this may change.
337  */
338 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
339 
340 /* Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
341  * duplicating.
342  * Parameters:
343  *  dst     Destination buffer
344  *  src     Source buffer
345  *  count   Number of mono samples to upmix
346  * The destination and source buffers must be completely separate (non-overlapping).
347  */
348 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
349 
350 /* Downmix pairs of interleaved stereo input float samples to mono output float samples
351  * by averaging the stereo pair together.
352  * Parameters:
353  *  dst     Destination buffer
354  *  src     Source buffer
355  *  count   Number of stereo frames to downmix
356  * The destination and source buffers must be completely separate (non-overlapping),
357  * or they must both start at the same address.
358  */
359 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t count);
360 
361 /* Upmix mono input float samples to pairs of interleaved stereo output float samples by
362  * duplicating.
363  * Parameters:
364  *  dst     Destination buffer
365  *  src     Source buffer
366  *  count   Number of mono samples to upmix
367  * The destination and source buffers must be completely separate (non-overlapping).
368  */
369 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t count);
370 
371 /* Return the total number of non-zero 32-bit samples */
372 size_t nonZeroMono32(const int32_t *samples, size_t count);
373 
374 /* Return the total number of non-zero 16-bit samples */
375 size_t nonZeroMono16(const int16_t *samples, size_t count);
376 
377 /* Return the total number of non-zero stereo frames, where a frame is considered non-zero
378  * if either of its constituent 32-bit samples is non-zero
379  */
380 size_t nonZeroStereo32(const int32_t *frames, size_t count);
381 
382 /* Return the total number of non-zero stereo frames, where a frame is considered non-zero
383  * if either of its constituent 16-bit samples is non-zero
384  */
385 size_t nonZeroStereo16(const int16_t *frames, size_t count);
386 
387 /* Copy frames, selecting source samples based on a source channel mask to fit
388  * the destination channel mask. Unmatched channels in the destination channel mask
389  * are zero filled. Unmatched channels in the source channel mask are dropped.
390  * Channels present in the channel mask are represented by set bits in the
391  * uint32_t value and are matched without further interpretation.
392  * Parameters:
393  *  dst         Destination buffer
394  *  dst_mask    Bit mask corresponding to destination channels present
395  *  src         Source buffer
396  *  src_mask    Bit mask corresponding to source channels present
397  *  sample_size Size of each sample in bytes.  Must be 1, 2, 3, or 4.
398  *  count       Number of frames to copy
399  * The destination and source buffers must be completely separate (non-overlapping).
400  * If the sample size is not in range, the function will abort.
401  */
402 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
403         const void *src, uint32_t src_mask, size_t sample_size, size_t count);
404 
405 /* Copy frames, selecting source samples based on an index array (idxary).
406  * The idxary[] consists of dst_channels number of elements.
407  * The ith element if idxary[] corresponds the ith destination channel.
408  * A non-negative value is the channel index in the source frame.
409  * A negative index (-1) represents filling with 0.
410  *
411  * Example: Swapping L and R channels for stereo streams
412  * idxary[0] = 1;
413  * idxary[1] = 0;
414  *
415  * Example: Copying a mono source to the front center 5.1 channel
416  * idxary[0] = -1;
417  * idxary[1] = -1;
418  * idxary[2] = 0;
419  * idxary[3] = -1;
420  * idxary[4] = -1;
421  * idxary[5] = -1;
422  *
423  * This copy allows swizzling of channels or replication of channels.
424  *
425  * Parameters:
426  *  dst           Destination buffer
427  *  dst_channels  Number of destination channels per frame
428  *  src           Source buffer
429  *  src_channels  Number of source channels per frame
430  *  idxary        Array of indices representing channels in the source frame
431  *  sample_size   Size of each sample in bytes.  Must be 1, 2, 3, or 4.
432  *  count         Number of frames to copy
433  * The destination and source buffers must be completely separate (non-overlapping).
434  * If the sample size is not in range, the function will abort.
435  */
436 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
437         const void *src, uint32_t src_channels,
438         const int8_t *idxary, size_t sample_size, size_t count);
439 
440 /* Prepares an index array (idxary) from channel masks, which can be later
441  * used by memcpy_by_index_array(). Returns the number of array elements required.
442  * This may be greater than idxcount, so the return value should be checked
443  * if idxary size is less than 32. Note that idxary is a caller allocated array
444  * of at least as many channels as present in the dst_mask.
445  * Channels present in the channel mask are represented by set bits in the
446  * uint32_t value and are matched without further interpretation.
447  *
448  * This function is typically used for converting audio data with different
449  * channel position masks.
450  *
451  * Parameters:
452  *  idxary      Updated array of indices of channels in the src frame for the dst frame
453  *  idxcount    Number of caller allocated elements in idxary
454  *  dst_mask    Bit mask corresponding to destination channels present
455  *  src_mask    Bit mask corresponding to source channels present
456  */
457 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
458         uint32_t dst_mask, uint32_t src_mask);
459 
460 /* Prepares an index array (idxary) from channel masks, which can be later
461  * used by memcpy_by_index_array(). Returns the number of array elements required.
462  *
463  * For a source channel index mask, the source channels will map to the destination
464  * channels as if counting the set bits in dst_mask in order from lsb to msb
465  * (zero bits are ignored). The ith bit of the src_mask corresponds to the
466  * ith SET bit of dst_mask and the ith destination channel.  Hence, a zero ith
467  * bit of the src_mask indicates that the ith destination channel plays silence.
468  *
469  * Parameters:
470  *  idxary      Updated array of indices of channels in the src frame for the dst frame
471  *  idxcount    Number of caller allocated elements in idxary
472  *  dst_mask    Bit mask corresponding to destination channels present
473  *  src_mask    Bit mask corresponding to source channels present
474  */
475 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
476         uint32_t dst_mask, uint32_t src_mask);
477 
478 /* Prepares an index array (idxary) from channel mask bits, which can be later
479  * used by memcpy_by_index_array(). Returns the number of array elements required.
480  *
481  * This initialization is for a destination channel index mask from a positional
482  * source mask.
483  *
484  * For an destination channel index mask, the input channels will map
485  * to the destination channels, with the ith SET bit in the source bits corresponding
486  * to the ith bit in the destination bits. If there is a zero bit in the middle
487  * of set destination bits (unlikely), the corresponding source channel will
488  * be dropped.
489  *
490  * Parameters:
491  *  idxary      Updated array of indices of channels in the src frame for the dst frame
492  *  idxcount    Number of caller allocated elements in idxary
493  *  dst_mask    Bit mask corresponding to destination channels present
494  *  src_mask    Bit mask corresponding to source channels present
495  */
496 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
497         uint32_t dst_mask, uint32_t src_mask);
498 
499 /**
500  * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
501  */
clamp16(int32_t sample)502 static inline int16_t clamp16(int32_t sample)
503 {
504     if ((sample>>15) ^ (sample>>31))
505         sample = 0x7FFF ^ (sample>>31);
506     return sample;
507 }
508 
509 /*
510  * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
511  * with clamping.  Note the open bound at 1.0, values within 1/65536 of 1.0 map
512  * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
513  *
514  * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
515  * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
516  * depending on the sign bit inside NaN (whose representation is not unique).
517  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
518  *
519  * Rounding of 0.5 lsb is to even (default for IEEE 754).
520  */
clamp16_from_float(float f)521 static inline int16_t clamp16_from_float(float f)
522 {
523     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
524      * floating point significand. The normal shift is 3<<22, but the -15 offset
525      * is used to multiply by 32768.
526      */
527     static const float offset = (float)(3 << (22 - 15));
528     /* zero = (0x10f << 22) =  0x43c00000 (not directly used) */
529     static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
530     static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
531 
532     union {
533         float f;
534         int32_t i;
535     } u;
536 
537     u.f = f + offset; /* recenter valid range */
538     /* Now the valid range is represented as integers between [limneg, limpos].
539      * Clamp using the fact that float representation (as an integer) is an ordered set.
540      */
541     if (u.i < limneg)
542         u.i = -32768;
543     else if (u.i > limpos)
544         u.i = 32767;
545     return u.i; /* Return lower 16 bits, the part of interest in the significand. */
546 }
547 
548 /*
549  * Convert a IEEE 754 single precision float [-1.0, 1.0) to uint8_t [0, 0xff]
550  * with clamping.  Note the open bound at 1.0, values within 1/128 of 1.0 map
551  * to 255 instead of 256 (early clamping due to the smaller positive integer subrange).
552  *
553  * Values outside the range [-1.0, 1.0) are properly clamped to 0 and 255,
554  * including -Inf and +Inf. NaN will generally be treated either as 0 or 255,
555  * depending on the sign bit inside NaN (whose representation is not unique).
556  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
557  *
558  * Rounding of 0.5 lsb is to even (default for IEEE 754).
559  */
clamp8_from_float(float f)560 static inline uint8_t clamp8_from_float(float f)
561 {
562     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
563      * floating point significand. The normal shift is 3<<22, but the -7 offset
564      * is used to multiply by 128.
565      */
566     static const float offset = (float)((3 << (22 - 7)) + 1 /* to cancel -1.0 */);
567     /* zero = (0x11f << 22) =  0x47c00000 */
568     static const int32_t limneg = (0x11f << 22) /*zero*/;
569     static const int32_t limpos = (0x11f << 22) /*zero*/ + 255; /* 0x47c000ff */
570 
571     union {
572         float f;
573         int32_t i;
574     } u;
575 
576     u.f = f + offset; /* recenter valid range */
577     /* Now the valid range is represented as integers between [limneg, limpos].
578      * Clamp using the fact that float representation (as an integer) is an ordered set.
579      */
580     if (u.i < limneg)
581         return 0;
582     if (u.i > limpos)
583         return 255;
584     return u.i; /* Return lower 8 bits, the part of interest in the significand. */
585 }
586 
587 /* Convert a single-precision floating point value to a Q0.23 integer value, stored in a
588  * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
589  *
590  * Rounds to nearest, ties away from 0.
591  *
592  * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
593  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
594  * depending on hardware and future implementation of this function.
595  */
clamp24_from_float(float f)596 static inline int32_t clamp24_from_float(float f)
597 {
598     static const float scale = (float)(1 << 23);
599     static const float limpos = 0x7fffff / scale;
600     static const float limneg = -0x800000 / scale;
601 
602     if (f <= limneg) {
603         return -0x800000;
604     } else if (f >= limpos) {
605         return 0x7fffff;
606     }
607     f *= scale;
608     /* integer conversion is through truncation (though int to float is not).
609      * ensure that we round to nearest, ties away from 0.
610      */
611     return f > 0 ? f + 0.5 : f - 0.5;
612 }
613 
614 /* Convert a signed fixed-point 32-bit Q8.23 value to a Q0.23 integer value,
615  * stored in a 32-bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
616  *
617  * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
618  */
clamp24_from_q8_23(int32_t ival)619 static inline int32_t clamp24_from_q8_23(int32_t ival)
620 {
621     static const int32_t limpos = 0x7fffff;
622     static const int32_t limneg = -0x800000;
623     if (ival < limneg) {
624         return limneg;
625     } else if (ival > limpos) {
626         return limpos;
627     } else {
628         return ival;
629     }
630 }
631 
632 /* Convert a single-precision floating point value to a Q4.27 integer value.
633  * Rounds to nearest, ties away from 0.
634  *
635  * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
636  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
637  * depending on hardware and future implementation of this function.
638  */
clampq4_27_from_float(float f)639 static inline int32_t clampq4_27_from_float(float f)
640 {
641     static const float scale = (float)(1UL << 27);
642     static const float limpos = 16.;
643     static const float limneg = -16.;
644 
645     if (f <= limneg) {
646         return -0x80000000; /* or 0x80000000 */
647     } else if (f >= limpos) {
648         return 0x7fffffff;
649     }
650     f *= scale;
651     /* integer conversion is through truncation (though int to float is not).
652      * ensure that we round to nearest, ties away from 0.
653      */
654     return f > 0 ? f + 0.5 : f - 0.5;
655 }
656 
657 /* Convert a single-precision floating point value to a Q0.31 integer value.
658  * Rounds to nearest, ties away from 0.
659  *
660  * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
661  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
662  * depending on hardware and future implementation of this function.
663  */
clamp32_from_float(float f)664 static inline int32_t clamp32_from_float(float f)
665 {
666     static const float scale = (float)(1UL << 31);
667     static const float limpos = 1.;
668     static const float limneg = -1.;
669 
670     if (f <= limneg) {
671         return -0x80000000; /* or 0x80000000 */
672     } else if (f >= limpos) {
673         return 0x7fffffff;
674     }
675     f *= scale;
676     /* integer conversion is through truncation (though int to float is not).
677      * ensure that we round to nearest, ties away from 0.
678      */
679     return f > 0 ? f + 0.5 : f - 0.5;
680 }
681 
682 /* Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
683  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
684  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].
685  *
686  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
687  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
688  * precision floating point, the 0.5 lsb in the significand conversion will round
689  * towards even, as per IEEE 754 default.
690  */
float_from_q4_27(int32_t ival)691 static inline float float_from_q4_27(int32_t ival)
692 {
693     /* The scale factor is the reciprocal of the fractional bits.
694      *
695      * Since the scale factor is a power of 2, the scaling is exact, and there
696      * is no rounding due to the multiplication - the bit pattern is preserved.
697      * However, there may be rounding due to the fixed-point to float conversion,
698      * as described above.
699      */
700     static const float scale = 1. / (float)(1UL << 27);
701 
702     return ival * scale;
703 }
704 
705 /* Convert an unsigned fixed-point 32-bit U4.28 value to single-precision floating-point.
706  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
707  * [0x00000000, 0x10000000].  The full float range is [0.0, 16.0].
708  *
709  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
710  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
711  * precision floating point, the 0.5 lsb in the significand conversion will round
712  * towards even, as per IEEE 754 default.
713  */
float_from_u4_28(uint32_t uval)714 static inline float float_from_u4_28(uint32_t uval)
715 {
716     static const float scale = 1. / (float)(1UL << 28);
717 
718     return uval * scale;
719 }
720 
721 /* Convert an unsigned fixed-point 16-bit U4.12 value to single-precision floating-point.
722  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
723  * [0x0000, 0x1000].  The full float range is [0.0, 16.0).
724  */
float_from_u4_12(uint16_t uval)725 static inline float float_from_u4_12(uint16_t uval)
726 {
727     static const float scale = 1. / (float)(1UL << 12);
728 
729     return uval * scale;
730 }
731 
732 /* Convert a single-precision floating point value to a U4.28 integer value.
733  * Rounds to nearest, ties away from 0.
734  *
735  * Values outside the range [0, 16.0] are properly clamped to [0, 4294967295]
736  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
737  * depending on hardware and future implementation of this function.
738  */
u4_28_from_float(float f)739 static inline uint32_t u4_28_from_float(float f)
740 {
741     static const float scale = (float)(1 << 28);
742     static const float limpos = 0xffffffffUL / scale;
743 
744     if (f <= 0.) {
745         return 0;
746     } else if (f >= limpos) {
747         return 0xffffffff;
748     }
749     /* integer conversion is through truncation (though int to float is not).
750      * ensure that we round to nearest, ties away from 0.
751      */
752     return f * scale + 0.5;
753 }
754 
755 /* Convert a single-precision floating point value to a U4.12 integer value.
756  * Rounds to nearest, ties away from 0.
757  *
758  * Values outside the range [0, 16.0) are properly clamped to [0, 65535]
759  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
760  * depending on hardware and future implementation of this function.
761  */
u4_12_from_float(float f)762 static inline uint16_t u4_12_from_float(float f)
763 {
764     static const float scale = (float)(1 << 12);
765     static const float limpos = 0xffff / scale;
766 
767     if (f <= 0.) {
768         return 0;
769     } else if (f >= limpos) {
770         return 0xffff;
771     }
772     /* integer conversion is through truncation (though int to float is not).
773      * ensure that we round to nearest, ties away from 0.
774      */
775     return f * scale + 0.5;
776 }
777 
778 /* Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
779  * The output float range is [-1.0, 1.0) for the fixed-point range
780  * [0x8000, 0x7fff].
781  *
782  * There is no rounding, the conversion and representation is exact.
783  */
float_from_i16(int16_t ival)784 static inline float float_from_i16(int16_t ival)
785 {
786     /* The scale factor is the reciprocal of the nominal 16 bit integer
787      * half-sided range (32768).
788      *
789      * Since the scale factor is a power of 2, the scaling is exact, and there
790      * is no rounding due to the multiplication - the bit pattern is preserved.
791      */
792     static const float scale = 1. / (float)(1UL << 15);
793 
794     return ival * scale;
795 }
796 
797 /* Convert an unsigned fixed-point 8-bit U0.8 value to single-precision floating-point.
798  * The nominal output float range is [-1.0, 1.0) if the fixed-point range is
799  * [0x00, 0xff].
800  */
float_from_u8(uint8_t uval)801 static inline float float_from_u8(uint8_t uval)
802 {
803     static const float scale = 1. / (float)(1UL << 7);
804 
805     return ((int)uval - 128) * scale;
806 }
807 
808 /* Convert a packed 24bit Q0.23 value stored native-endian in a uint8_t ptr
809  * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
810  * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
811  * Even though the output range is limited on the positive side, there is no
812  * DC offset on the output, if the input has no DC offset.
813  *
814  * Avoid relying on the limited output range, as future implementations may go
815  * to full range.
816  */
i32_from_p24(const uint8_t * packed24)817 static inline int32_t i32_from_p24(const uint8_t *packed24)
818 {
819     /* convert to 32b */
820     return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
821 }
822 
823 /* Convert a 32-bit Q0.31 value to single-precision floating-point.
824  * The output float range is [-1.0, 1.0] for the fixed-point range
825  * [0x80000000, 0x7fffffff].
826  *
827  * Rounding may occur in the least significant 8 bits for large fixed point
828  * values due to storage into the 24-bit floating-point significand.
829  * Rounding will be to nearest, ties to even.
830  */
float_from_i32(int32_t ival)831 static inline float float_from_i32(int32_t ival)
832 {
833     static const float scale = 1. / (float)(1UL << 31);
834 
835     return ival * scale;
836 }
837 
838 /* Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
839  * to single-precision floating-point. The output float range is [-1.0, 1.0)
840  * for the fixed-point range [0x800000, 0x7fffff].
841  *
842  * There is no rounding, the conversion and representation is exact.
843  */
float_from_p24(const uint8_t * packed24)844 static inline float float_from_p24(const uint8_t *packed24)
845 {
846     return float_from_i32(i32_from_p24(packed24));
847 }
848 
849 /* Convert a 24-bit Q8.23 value to single-precision floating-point.
850  * The nominal output float range is [-1.0, 1.0) for the fixed-point
851  * range [0xff800000, 0x007fffff].  The maximum float range is [-256.0, 256.0).
852  *
853  * There is no rounding in the nominal range, the conversion and representation
854  * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
855  */
float_from_q8_23(int32_t ival)856 static inline float float_from_q8_23(int32_t ival)
857 {
858     static const float scale = 1. / (float)(1UL << 23);
859 
860     return ival * scale;
861 }
862 
863 /**
864  * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
865  */
866 static inline
mulAdd(int16_t in,int16_t v,int32_t a)867 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
868 {
869 #if defined(__arm__) && !defined(__thumb__)
870     int32_t out;
871     asm( "smlabb %[out], %[in], %[v], %[a] \n"
872          : [out]"=r"(out)
873          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
874          : );
875     return out;
876 #else
877     return a + in * (int32_t)v;
878 #endif
879 }
880 
881 /**
882  * Multiply 16-bit terms with 32-bit result: return in*v.
883  */
884 static inline
mul(int16_t in,int16_t v)885 int32_t mul(int16_t in, int16_t v)
886 {
887 #if defined(__arm__) && !defined(__thumb__)
888     int32_t out;
889     asm( "smulbb %[out], %[in], %[v] \n"
890          : [out]"=r"(out)
891          : [in]"%r"(in), [v]"r"(v)
892          : );
893     return out;
894 #else
895     return in * (int32_t)v;
896 #endif
897 }
898 
899 /**
900  * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
901  */
902 static inline
mulAddRL(int left,uint32_t inRL,uint32_t vRL,int32_t a)903 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
904 {
905 #if defined(__arm__) && !defined(__thumb__)
906     int32_t out;
907     if (left) {
908         asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
909              : [out]"=r"(out)
910              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
911              : );
912     } else {
913         asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
914              : [out]"=r"(out)
915              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
916              : );
917     }
918     return out;
919 #else
920     if (left) {
921         return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
922     } else {
923         return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
924     }
925 #endif
926 }
927 
928 /**
929  * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
930  */
931 static inline
mulRL(int left,uint32_t inRL,uint32_t vRL)932 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
933 {
934 #if defined(__arm__) && !defined(__thumb__)
935     int32_t out;
936     if (left) {
937         asm( "smulbb %[out], %[inRL], %[vRL] \n"
938              : [out]"=r"(out)
939              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
940              : );
941     } else {
942         asm( "smultt %[out], %[inRL], %[vRL] \n"
943              : [out]"=r"(out)
944              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
945              : );
946     }
947     return out;
948 #else
949     if (left) {
950         return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
951     } else {
952         return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
953     }
954 #endif
955 }
956 
957 __END_DECLS
958 
959 #endif  // ANDROID_AUDIO_PRIMITIVES_H
960