1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
18 #define ANDROID_AUDIO_PRIMITIVES_H
19
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/cdefs.h>
23
24 /** \cond */
25 __BEGIN_DECLS
26 /** \endcond */
27
28 /**
29 * \file primitives.h
30 * The memcpy_* conversion routines are designed to work in-place on same dst as src
31 * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
32 * This allows the loops to go upwards for faster cache access (and may be more flexible
33 * for future optimization later).
34 */
35
36 /**
37 * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
38 * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
39 * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
40 * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
41 * is dithered and the remaining fraction is converted to the output Q.15, with clamping
42 * on the 4 integer guard bits.
43 *
44 * For interleaved stereo, c is the number of sample pairs,
45 * and out is an array of interleaved pairs of 16-bit samples per channel.
46 * For mono, c is the number of samples / 2, and out is an array of 16-bit samples.
47 * The name "dither" is a misnomer; the current implementation does not actually dither
48 * but uses truncation. This may change.
49 * The out and sums buffers must either be completely separate (non-overlapping), or
50 * they must both start at the same address. Partially overlapping buffers are not supported.
51 */
52 void ditherAndClamp(int32_t* out, const int32_t *sums, size_t c);
53
54 /**
55 * Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
56 *
57 * \param dst Destination buffer
58 * \param src Source buffer
59 * \param count Number of samples to copy
60 *
61 * The destination and source buffers must either be completely separate (non-overlapping), or
62 * they must both start at the same address. Partially overlapping buffers are not supported.
63 */
64 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
65
66 /**
67 * Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
68 *
69 * \param dst Destination buffer
70 * \param src Source buffer
71 * \param count Number of samples to copy
72 *
73 * The destination and source buffers must either be completely separate (non-overlapping), or
74 * they must both start at the same address. Partially overlapping buffers are not supported.
75 * The conversion is done by truncation, without dithering, so it loses resolution.
76 */
77 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
78
79 /**
80 * Copy samples from float to unsigned 8-bit offset by 0x80.
81 *
82 * \param dst Destination buffer
83 * \param src Source buffer
84 * \param count Number of samples to copy
85 *
86 * The destination and source buffers must either be completely separate (non-overlapping), or
87 * they must both start at the same address. Partially overlapping buffers are not supported.
88 * The conversion is done by truncation, without dithering, so it loses resolution.
89 */
90 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count);
91
92 /**
93 * Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
94 *
95 * \param dst Destination buffer
96 * \param src Source buffer
97 * \param count Number of samples to copy
98 *
99 * The destination and source buffers must either be completely separate (non-overlapping), or
100 * they must both start at the same address. Partially overlapping buffers are not supported.
101 * The conversion is done by truncation, without dithering, so it loses resolution.
102 */
103 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
104
105 /**
106 * Shrink and copy samples from single-precision floating-point to signed 16-bit.
107 * Each float should be in the range -1.0 to 1.0. Values outside that range are clamped,
108 * refer to clamp16_from_float().
109 *
110 * \param dst Destination buffer
111 * \param src Source buffer
112 * \param count Number of samples to copy
113 *
114 * The destination and source buffers must either be completely separate (non-overlapping), or
115 * they must both start at the same address. Partially overlapping buffers are not supported.
116 * The conversion is done by truncation, without dithering, so it loses resolution.
117 */
118 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
119
120 /**
121 * Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
122 * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
123 * [0xf8000000, 0x07ffffff]. The full float range is [-16.0, 16.0]. Note the closed range
124 * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
125 *
126 * \param dst Destination buffer
127 * \param src Source buffer
128 * \param count Number of samples to copy
129 *
130 * The destination and source buffers must either be completely separate (non-overlapping), or
131 * they must both start at the same address. Partially overlapping buffers are not supported.
132 */
133 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
134
135 /**
136 * Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
137 * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
138 * No rounding is needed as the representation is exact.
139 *
140 * \param dst Destination buffer
141 * \param src Source buffer
142 * \param count Number of samples to copy
143 *
144 * The destination and source buffers must be completely separate.
145 */
146 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
147
148 /**
149 * Copy samples from unsigned fixed-point 8 bit to single-precision floating-point.
150 * The output float range is [-1.0, 1.0) for the fixed-point range [0x00, 0xFF].
151 * No rounding is needed as the representation is exact.
152 *
153 * \param dst Destination buffer
154 * \param src Source buffer
155 * \param count Number of samples to copy
156 *
157 * The destination and source buffers must be completely separate.
158 */
159 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count);
160
161 /**
162 * Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
163 * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
164 * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
165 * No rounding is needed as the representation is exact.
166 *
167 * \param dst Destination buffer
168 * \param src Source buffer
169 * \param count Number of samples to copy
170 *
171 * The destination and source buffers must be completely separate.
172 */
173 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
174
175 /**
176 * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
177 * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
178 * The data is truncated without rounding.
179 *
180 * \param dst Destination buffer
181 * \param src Source buffer
182 * \param count Number of samples to copy
183 *
184 * The destination and source buffers must either be completely separate (non-overlapping), or
185 * they must both start at the same address. Partially overlapping buffers are not supported.
186 */
187 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
188
189 /**
190 * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed-point 32-bit Q0.31.
191 * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
192 * The output data range is [0x80000000, 0x7fffff00] at intervals of 0x100.
193 *
194 * \param dst Destination buffer
195 * \param src Source buffer
196 * \param count Number of samples to copy
197 *
198 * The destination and source buffers must be completely separate.
199 */
200 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count);
201
202 /**
203 * Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
204 * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
205 * The output data range is [0x800000, 0x7fff00] (not full).
206 * Nevertheless there is no DC offset on the output, if the input has no DC offset.
207 *
208 * \param dst Destination buffer
209 * \param src Source buffer
210 * \param count Number of samples to copy
211 *
212 * The destination and source buffers must be completely separate.
213 */
214 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
215
216 /**
217 * Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
218 * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
219 * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
220 * for details.
221 *
222 * \param dst Destination buffer
223 * \param src Source buffer
224 * \param count Number of samples to copy
225 *
226 * The destination and source buffers must either be completely separate (non-overlapping), or
227 * they must both start at the same address. Partially overlapping buffers are not supported.
228 */
229 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
230
231 /**
232 * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed-point packed 24 bit Q0.23.
233 * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
234 * The data is clamped to the range is [0x800000, 0x7fffff].
235 *
236 * \param dst Destination buffer
237 * \param src Source buffer
238 * \param count Number of samples to copy
239 *
240 * The destination and source buffers must be completely separate.
241 */
242 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
243
244 /**
245 * Shrink and copy samples from signed 32-bit fixed-point Q0.31
246 * to signed fixed-point packed 24 bit Q0.23.
247 * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
248 *
249 * \param dst Destination buffer
250 * \param src Source buffer
251 * \param count Number of samples to copy
252 *
253 * The destination and source buffers must either be completely separate (non-overlapping), or
254 * they must both start at the same address. Partially overlapping buffers are not supported.
255 * The conversion is done by truncation, without dithering, so it loses resolution.
256 */
257 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count);
258
259 /**
260 * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
261 * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
262 *
263 * \param dst Destination buffer
264 * \param src Source buffer
265 * \param count Number of samples to copy
266 *
267 * The destination and source buffers must be completely separate.
268 */
269 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
270
271 /**
272 * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
273 * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
274 * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
275 * See clamp24_from_float() for details.
276 *
277 * \param dst Destination buffer
278 * \param src Source buffer
279 * \param count Number of samples to copy
280 *
281 * The destination and source buffers must either be completely separate (non-overlapping), or
282 * they must both start at the same address. Partially overlapping buffers are not supported.
283 */
284 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
285
286 /**
287 * Copy samples from signed fixed point packed 24-bit Q0.23 to signed fixed-point 32-bit Q8.23.
288 * The output data range is [0xff800000, 0x007fffff].
289 *
290 * \param dst Destination buffer
291 * \param src Source buffer
292 * \param count Number of samples to copy
293 *
294 * The destination and source buffers must be completely separate.
295 */
296 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count);
297
298 /**
299 * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
300 * The conversion will use the full available Q4.27 range, including guard bits.
301 * Fractional lsb is rounded to nearest, ties away from zero.
302 * See clampq4_27_from_float() for details.
303 *
304 * \param dst Destination buffer
305 * \param src Source buffer
306 * \param count Number of samples to copy
307 *
308 * The destination and source buffers must either be completely separate (non-overlapping), or
309 * they must both start at the same address. Partially overlapping buffers are not supported.
310 */
311 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
312
313 /**
314 * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
315 * The data is clamped, and truncated without rounding.
316 *
317 * \param dst Destination buffer
318 * \param src Source buffer
319 * \param count Number of samples to copy
320 *
321 * The destination and source buffers must either be completely separate (non-overlapping), or
322 * they must both start at the same address. Partially overlapping buffers are not supported.
323 */
324 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
325
326 /**
327 * Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
328 * The nominal output float range is [-1.0, 1.0) for the fixed-point
329 * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
330 * No rounding is needed as the representation is exact for nominal values.
331 * Rounding for overflow values is to nearest, ties to even.
332 *
333 * \param dst Destination buffer
334 * \param src Source buffer
335 * \param count Number of samples to copy
336 *
337 * The destination and source buffers must either be completely separate (non-overlapping), or
338 * they must both start at the same address. Partially overlapping buffers are not supported.
339 */
340 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
341
342 /**
343 * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
344 * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
345 *
346 * \param dst Destination buffer
347 * \param src Source buffer
348 * \param count Number of samples to copy
349 *
350 * The destination and source buffers must be completely separate.
351 */
352 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
353
354 /**
355 * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
356 * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
357 * ties away from zero. See clamp32_from_float() for details.
358 *
359 * \param dst Destination buffer
360 * \param src Source buffer
361 * \param count Number of samples to copy
362 *
363 * The destination and source buffers must either be completely separate (non-overlapping), or
364 * they must both start at the same address. Partially overlapping buffers are not supported.
365 */
366 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
367
368 /**
369 * Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
370 * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
371 * Rounding is done according to float_from_i32().
372 *
373 * \param dst Destination buffer
374 * \param src Source buffer
375 * \param count Number of samples to copy
376 *
377 * The destination and source buffers must either be completely separate (non-overlapping), or
378 * they must both start at the same address. Partially overlapping buffers are not supported.
379 */
380 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
381
382 /**
383 * Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
384 *
385 * \param dst Destination buffer
386 * \param src Source buffer
387 * \param count Number of stereo frames to downmix
388 *
389 * The destination and source buffers must be completely separate (non-overlapping).
390 * The current implementation truncates the mean rather than dither, but this may change.
391 */
392 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
393
394 /**
395 * Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
396 * duplicating.
397 *
398 * \param dst Destination buffer
399 * \param src Source buffer
400 * \param count Number of mono samples to upmix
401 *
402 * The destination and source buffers must be completely separate (non-overlapping).
403 */
404 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
405
406 /**
407 * Downmix pairs of interleaved stereo input float samples to mono output float samples
408 * by averaging the stereo pair together.
409 *
410 * \param dst Destination buffer
411 * \param src Source buffer
412 * \param count Number of stereo frames to downmix
413 *
414 * The destination and source buffers must be completely separate (non-overlapping),
415 * or they must both start at the same address.
416 */
417 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t count);
418
419 /**
420 * Upmix mono input float samples to pairs of interleaved stereo output float samples by
421 * duplicating.
422 *
423 * \param dst Destination buffer
424 * \param src Source buffer
425 * \param count Number of mono samples to upmix
426 *
427 * The destination and source buffers must be completely separate (non-overlapping).
428 */
429 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t count);
430
431 /**
432 * \return the total number of non-zero 32-bit samples.
433 */
434 size_t nonZeroMono32(const int32_t *samples, size_t count);
435
436 /**
437 * \return the total number of non-zero 16-bit samples.
438 */
439 size_t nonZeroMono16(const int16_t *samples, size_t count);
440
441 /**
442 * \return the total number of non-zero stereo frames, where a frame is considered non-zero
443 * if either of its constituent 32-bit samples is non-zero.
444 */
445 size_t nonZeroStereo32(const int32_t *frames, size_t count);
446
447 /**
448 * \return the total number of non-zero stereo frames, where a frame is considered non-zero
449 * if either of its constituent 16-bit samples is non-zero.
450 */
451 size_t nonZeroStereo16(const int16_t *frames, size_t count);
452
453 /**
454 * Copy frames, selecting source samples based on a source channel mask to fit
455 * the destination channel mask. Unmatched channels in the destination channel mask
456 * are zero filled. Unmatched channels in the source channel mask are dropped.
457 * Channels present in the channel mask are represented by set bits in the
458 * uint32_t value and are matched without further interpretation.
459 *
460 * \param dst Destination buffer
461 * \param dst_mask Bit mask corresponding to destination channels present
462 * \param src Source buffer
463 * \param src_mask Bit mask corresponding to source channels present
464 * \param sample_size Size of each sample in bytes. Must be 1, 2, 3, or 4.
465 * \param count Number of frames to copy
466 *
467 * The destination and source buffers must be completely separate (non-overlapping).
468 * If the sample size is not in range, the function will abort.
469 */
470 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
471 const void *src, uint32_t src_mask, size_t sample_size, size_t count);
472
473 /**
474 * Copy frames, selecting source samples based on an index array (idxary).
475 * The idxary[] consists of dst_channels number of elements.
476 * The ith element if idxary[] corresponds the ith destination channel.
477 * A non-negative value is the channel index in the source frame.
478 * A negative index (-1) represents filling with 0.
479 *
480 * Example: Swapping L and R channels for stereo streams
481 * <PRE>
482 * idxary[0] = 1;
483 * idxary[1] = 0;
484 * </PRE>
485 *
486 * Example: Copying a mono source to the front center 5.1 channel
487 * <PRE>
488 * idxary[0] = -1;
489 * idxary[1] = -1;
490 * idxary[2] = 0;
491 * idxary[3] = -1;
492 * idxary[4] = -1;
493 * idxary[5] = -1;
494 * </PRE>
495 *
496 * This copy allows swizzling of channels or replication of channels.
497 *
498 * \param dst Destination buffer
499 * \param dst_channels Number of destination channels per frame
500 * \param src Source buffer
501 * \param src_channels Number of source channels per frame
502 * \param idxary Array of indices representing channels in the source frame
503 * \param sample_size Size of each sample in bytes. Must be 1, 2, 3, or 4.
504 * \param count Number of frames to copy
505 *
506 * The destination and source buffers must be completely separate (non-overlapping).
507 * If the sample size is not in range, the function will abort.
508 */
509 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
510 const void *src, uint32_t src_channels,
511 const int8_t *idxary, size_t sample_size, size_t count);
512
513 /**
514 * Prepares an index array (idxary) from channel masks, which can be later
515 * used by memcpy_by_index_array().
516 *
517 * \return the number of array elements required.
518 * This may be greater than idxcount, so the return value should be checked
519 * if idxary size is less than 32.
520 *
521 * Note that idxary is a caller allocated array
522 * of at least as many channels as present in the dst_mask.
523 * Channels present in the channel mask are represented by set bits in the
524 * uint32_t value and are matched without further interpretation.
525 *
526 * This function is typically used for converting audio data with different
527 * channel position masks.
528 *
529 * \param idxary Updated array of indices of channels in the src frame for the dst frame
530 * \param idxcount Number of caller allocated elements in idxary
531 * \param dst_mask Bit mask corresponding to destination channels present
532 * \param src_mask Bit mask corresponding to source channels present
533 */
534 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
535 uint32_t dst_mask, uint32_t src_mask);
536
537 /**
538 * Prepares an index array (idxary) from channel masks, which can be later
539 * used by memcpy_by_index_array().
540 *
541 * \return the number of array elements required.
542 *
543 * For a source channel index mask, the source channels will map to the destination
544 * channels as if counting the set bits in dst_mask in order from lsb to msb
545 * (zero bits are ignored). The ith bit of the src_mask corresponds to the
546 * ith SET bit of dst_mask and the ith destination channel. Hence, a zero ith
547 * bit of the src_mask indicates that the ith destination channel plays silence.
548 *
549 * \param idxary Updated array of indices of channels in the src frame for the dst frame
550 * \param idxcount Number of caller allocated elements in idxary
551 * \param dst_mask Bit mask corresponding to destination channels present
552 * \param src_mask Bit mask corresponding to source channels present
553 */
554 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
555 uint32_t dst_mask, uint32_t src_mask);
556
557 /**
558 * Prepares an index array (idxary) from channel mask bits, which can be later
559 * used by memcpy_by_index_array().
560 *
561 * \return the number of array elements required.
562 *
563 * This initialization is for a destination channel index mask from a positional
564 * source mask.
565 *
566 * For an destination channel index mask, the input channels will map
567 * to the destination channels, with the ith SET bit in the source bits corresponding
568 * to the ith bit in the destination bits. If there is a zero bit in the middle
569 * of set destination bits (unlikely), the corresponding source channel will
570 * be dropped.
571 *
572 * \param idxary Updated array of indices of channels in the src frame for the dst frame
573 * \param idxcount Number of caller allocated elements in idxary
574 * \param dst_mask Bit mask corresponding to destination channels present
575 * \param src_mask Bit mask corresponding to source channels present
576 */
577 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
578 uint32_t dst_mask, uint32_t src_mask);
579
580 /**
581 * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
582 */
clamp16(int32_t sample)583 static inline int16_t clamp16(int32_t sample)
584 {
585 if ((sample>>15) ^ (sample>>31))
586 sample = 0x7FFF ^ (sample>>31);
587 return sample;
588 }
589
590 /**
591 * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
592 * with clamping. Note the open bound at 1.0, values within 1/65536 of 1.0 map
593 * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
594 *
595 * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
596 * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
597 * depending on the sign bit inside NaN (whose representation is not unique).
598 * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
599 *
600 * Rounding of 0.5 lsb is to even (default for IEEE 754).
601 */
clamp16_from_float(float f)602 static inline int16_t clamp16_from_float(float f)
603 {
604 /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
605 * floating point significand. The normal shift is 3<<22, but the -15 offset
606 * is used to multiply by 32768.
607 */
608 static const float offset = (float)(3 << (22 - 15));
609 /* zero = (0x10f << 22) = 0x43c00000 (not directly used) */
610 static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
611 static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
612
613 union {
614 float f;
615 int32_t i;
616 } u;
617
618 u.f = f + offset; /* recenter valid range */
619 /* Now the valid range is represented as integers between [limneg, limpos].
620 * Clamp using the fact that float representation (as an integer) is an ordered set.
621 */
622 if (u.i < limneg)
623 u.i = -32768;
624 else if (u.i > limpos)
625 u.i = 32767;
626 return u.i; /* Return lower 16 bits, the part of interest in the significand. */
627 }
628
629 /**
630 * Convert a IEEE 754 single precision float [-1.0, 1.0) to uint8_t [0, 0xff]
631 * with clamping. Note the open bound at 1.0, values within 1/128 of 1.0 map
632 * to 255 instead of 256 (early clamping due to the smaller positive integer subrange).
633 *
634 * Values outside the range [-1.0, 1.0) are properly clamped to 0 and 255,
635 * including -Inf and +Inf. NaN will generally be treated either as 0 or 255,
636 * depending on the sign bit inside NaN (whose representation is not unique).
637 * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
638 *
639 * Rounding of 0.5 lsb is to even (default for IEEE 754).
640 */
clamp8_from_float(float f)641 static inline uint8_t clamp8_from_float(float f)
642 {
643 /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
644 * floating point significand. The normal shift is 3<<22, but the -7 offset
645 * is used to multiply by 128.
646 */
647 static const float offset = (float)((3 << (22 - 7)) + 1 /* to cancel -1.0 */);
648 /* zero = (0x11f << 22) = 0x47c00000 */
649 static const int32_t limneg = (0x11f << 22) /*zero*/;
650 static const int32_t limpos = (0x11f << 22) /*zero*/ + 255; /* 0x47c000ff */
651
652 union {
653 float f;
654 int32_t i;
655 } u;
656
657 u.f = f + offset; /* recenter valid range */
658 /* Now the valid range is represented as integers between [limneg, limpos].
659 * Clamp using the fact that float representation (as an integer) is an ordered set.
660 */
661 if (u.i < limneg)
662 return 0;
663 if (u.i > limpos)
664 return 255;
665 return u.i; /* Return lower 8 bits, the part of interest in the significand. */
666 }
667
668 /**
669 * Convert a single-precision floating point value to a Q0.23 integer value, stored in a
670 * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
671 *
672 * Rounds to nearest, ties away from 0.
673 *
674 * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
675 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
676 * depending on hardware and future implementation of this function.
677 */
clamp24_from_float(float f)678 static inline int32_t clamp24_from_float(float f)
679 {
680 static const float scale = (float)(1 << 23);
681 static const float limpos = 0x7fffff / scale;
682 static const float limneg = -0x800000 / scale;
683
684 if (f <= limneg) {
685 return -0x800000;
686 } else if (f >= limpos) {
687 return 0x7fffff;
688 }
689 f *= scale;
690 /* integer conversion is through truncation (though int to float is not).
691 * ensure that we round to nearest, ties away from 0.
692 */
693 return f > 0 ? f + 0.5 : f - 0.5;
694 }
695
696 /**
697 * Convert a signed fixed-point 32-bit Q8.23 value to a Q0.23 integer value,
698 * stored in a 32-bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
699 *
700 * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
701 */
clamp24_from_q8_23(int32_t ival)702 static inline int32_t clamp24_from_q8_23(int32_t ival)
703 {
704 static const int32_t limpos = 0x7fffff;
705 static const int32_t limneg = -0x800000;
706 if (ival < limneg) {
707 return limneg;
708 } else if (ival > limpos) {
709 return limpos;
710 } else {
711 return ival;
712 }
713 }
714
715 /**
716 * Convert a single-precision floating point value to a Q4.27 integer value.
717 * Rounds to nearest, ties away from 0.
718 *
719 * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
720 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
721 * depending on hardware and future implementation of this function.
722 */
clampq4_27_from_float(float f)723 static inline int32_t clampq4_27_from_float(float f)
724 {
725 static const float scale = (float)(1UL << 27);
726 static const float limpos = 16.;
727 static const float limneg = -16.;
728
729 if (f <= limneg) {
730 return -0x80000000; /* or 0x80000000 */
731 } else if (f >= limpos) {
732 return 0x7fffffff;
733 }
734 f *= scale;
735 /* integer conversion is through truncation (though int to float is not).
736 * ensure that we round to nearest, ties away from 0.
737 */
738 return f > 0 ? f + 0.5 : f - 0.5;
739 }
740
741 /**
742 * Convert a single-precision floating point value to a Q0.31 integer value.
743 * Rounds to nearest, ties away from 0.
744 *
745 * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
746 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
747 * depending on hardware and future implementation of this function.
748 */
clamp32_from_float(float f)749 static inline int32_t clamp32_from_float(float f)
750 {
751 static const float scale = (float)(1UL << 31);
752 static const float limpos = 1.;
753 static const float limneg = -1.;
754
755 if (f <= limneg) {
756 return -0x80000000; /* or 0x80000000 */
757 } else if (f >= limpos) {
758 return 0x7fffffff;
759 }
760 f *= scale;
761 /* integer conversion is through truncation (though int to float is not).
762 * ensure that we round to nearest, ties away from 0.
763 */
764 return f > 0 ? f + 0.5 : f - 0.5;
765 }
766
767 /**
768 * Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
769 * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
770 * [0xf8000000, 0x07ffffff]. The full float range is [-16.0, 16.0].
771 *
772 * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
773 * In more detail: if the fixed-point integer exceeds 24 bit significand of single
774 * precision floating point, the 0.5 lsb in the significand conversion will round
775 * towards even, as per IEEE 754 default.
776 */
float_from_q4_27(int32_t ival)777 static inline float float_from_q4_27(int32_t ival)
778 {
779 /* The scale factor is the reciprocal of the fractional bits.
780 *
781 * Since the scale factor is a power of 2, the scaling is exact, and there
782 * is no rounding due to the multiplication - the bit pattern is preserved.
783 * However, there may be rounding due to the fixed-point to float conversion,
784 * as described above.
785 */
786 static const float scale = 1. / (float)(1UL << 27);
787
788 return ival * scale;
789 }
790
791 /**
792 * Convert an unsigned fixed-point 32-bit U4.28 value to single-precision floating-point.
793 * The nominal output float range is [0.0, 1.0] if the fixed-point range is
794 * [0x00000000, 0x10000000]. The full float range is [0.0, 16.0].
795 *
796 * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
797 * In more detail: if the fixed-point integer exceeds 24 bit significand of single
798 * precision floating point, the 0.5 lsb in the significand conversion will round
799 * towards even, as per IEEE 754 default.
800 */
float_from_u4_28(uint32_t uval)801 static inline float float_from_u4_28(uint32_t uval)
802 {
803 static const float scale = 1. / (float)(1UL << 28);
804
805 return uval * scale;
806 }
807
808 /**
809 * Convert an unsigned fixed-point 16-bit U4.12 value to single-precision floating-point.
810 * The nominal output float range is [0.0, 1.0] if the fixed-point range is
811 * [0x0000, 0x1000]. The full float range is [0.0, 16.0).
812 */
float_from_u4_12(uint16_t uval)813 static inline float float_from_u4_12(uint16_t uval)
814 {
815 static const float scale = 1. / (float)(1UL << 12);
816
817 return uval * scale;
818 }
819
820 /**
821 * Convert a single-precision floating point value to a U4.28 integer value.
822 * Rounds to nearest, ties away from 0.
823 *
824 * Values outside the range [0, 16.0] are properly clamped to [0, 4294967295]
825 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
826 * depending on hardware and future implementation of this function.
827 */
u4_28_from_float(float f)828 static inline uint32_t u4_28_from_float(float f)
829 {
830 static const float scale = (float)(1 << 28);
831 static const float limpos = 0xffffffffUL / scale;
832
833 if (f <= 0.) {
834 return 0;
835 } else if (f >= limpos) {
836 return 0xffffffff;
837 }
838 /* integer conversion is through truncation (though int to float is not).
839 * ensure that we round to nearest, ties away from 0.
840 */
841 return f * scale + 0.5;
842 }
843
844 /**
845 * Convert a single-precision floating point value to a U4.12 integer value.
846 * Rounds to nearest, ties away from 0.
847 *
848 * Values outside the range [0, 16.0) are properly clamped to [0, 65535]
849 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
850 * depending on hardware and future implementation of this function.
851 */
u4_12_from_float(float f)852 static inline uint16_t u4_12_from_float(float f)
853 {
854 static const float scale = (float)(1 << 12);
855 static const float limpos = 0xffff / scale;
856
857 if (f <= 0.) {
858 return 0;
859 } else if (f >= limpos) {
860 return 0xffff;
861 }
862 /* integer conversion is through truncation (though int to float is not).
863 * ensure that we round to nearest, ties away from 0.
864 */
865 return f * scale + 0.5;
866 }
867
868 /**
869 * Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
870 * The output float range is [-1.0, 1.0) for the fixed-point range
871 * [0x8000, 0x7fff].
872 *
873 * There is no rounding, the conversion and representation is exact.
874 */
float_from_i16(int16_t ival)875 static inline float float_from_i16(int16_t ival)
876 {
877 /* The scale factor is the reciprocal of the nominal 16 bit integer
878 * half-sided range (32768).
879 *
880 * Since the scale factor is a power of 2, the scaling is exact, and there
881 * is no rounding due to the multiplication - the bit pattern is preserved.
882 */
883 static const float scale = 1. / (float)(1UL << 15);
884
885 return ival * scale;
886 }
887
888 /**
889 * Convert an unsigned fixed-point 8-bit U0.8 value to single-precision floating-point.
890 * The nominal output float range is [-1.0, 1.0) if the fixed-point range is
891 * [0x00, 0xff].
892 */
float_from_u8(uint8_t uval)893 static inline float float_from_u8(uint8_t uval)
894 {
895 static const float scale = 1. / (float)(1UL << 7);
896
897 return ((int)uval - 128) * scale;
898 }
899
900 /**
901 * Convert a packed 24bit Q0.23 value stored native-endian in a uint8_t ptr
902 * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
903 * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
904 * Even though the output range is limited on the positive side, there is no
905 * DC offset on the output, if the input has no DC offset.
906 *
907 * Avoid relying on the limited output range, as future implementations may go
908 * to full range.
909 */
i32_from_p24(const uint8_t * packed24)910 static inline int32_t i32_from_p24(const uint8_t *packed24)
911 {
912 /* convert to 32b */
913 return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
914 }
915
916 /**
917 * Convert a 32-bit Q0.31 value to single-precision floating-point.
918 * The output float range is [-1.0, 1.0] for the fixed-point range
919 * [0x80000000, 0x7fffffff].
920 *
921 * Rounding may occur in the least significant 8 bits for large fixed point
922 * values due to storage into the 24-bit floating-point significand.
923 * Rounding will be to nearest, ties to even.
924 */
float_from_i32(int32_t ival)925 static inline float float_from_i32(int32_t ival)
926 {
927 static const float scale = 1. / (float)(1UL << 31);
928
929 return ival * scale;
930 }
931
932 /**
933 * Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
934 * to single-precision floating-point. The output float range is [-1.0, 1.0)
935 * for the fixed-point range [0x800000, 0x7fffff].
936 *
937 * There is no rounding, the conversion and representation is exact.
938 */
float_from_p24(const uint8_t * packed24)939 static inline float float_from_p24(const uint8_t *packed24)
940 {
941 return float_from_i32(i32_from_p24(packed24));
942 }
943
944 /**
945 * Convert a 24-bit Q8.23 value to single-precision floating-point.
946 * The nominal output float range is [-1.0, 1.0) for the fixed-point
947 * range [0xff800000, 0x007fffff]. The maximum float range is [-256.0, 256.0).
948 *
949 * There is no rounding in the nominal range, the conversion and representation
950 * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
951 */
float_from_q8_23(int32_t ival)952 static inline float float_from_q8_23(int32_t ival)
953 {
954 static const float scale = 1. / (float)(1UL << 23);
955
956 return ival * scale;
957 }
958
959 /**
960 * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
961 */
962 static inline
mulAdd(int16_t in,int16_t v,int32_t a)963 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
964 {
965 #if defined(__arm__) && !defined(__thumb__)
966 int32_t out;
967 asm( "smlabb %[out], %[in], %[v], %[a] \n"
968 : [out]"=r"(out)
969 : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
970 : );
971 return out;
972 #else
973 return a + in * (int32_t)v;
974 #endif
975 }
976
977 /**
978 * Multiply 16-bit terms with 32-bit result: return in*v.
979 */
980 static inline
mul(int16_t in,int16_t v)981 int32_t mul(int16_t in, int16_t v)
982 {
983 #if defined(__arm__) && !defined(__thumb__)
984 int32_t out;
985 asm( "smulbb %[out], %[in], %[v] \n"
986 : [out]"=r"(out)
987 : [in]"%r"(in), [v]"r"(v)
988 : );
989 return out;
990 #else
991 return in * (int32_t)v;
992 #endif
993 }
994
995 /**
996 * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
997 */
998 static inline
mulAddRL(int left,uint32_t inRL,uint32_t vRL,int32_t a)999 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
1000 {
1001 #if defined(__arm__) && !defined(__thumb__)
1002 int32_t out;
1003 if (left) {
1004 asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
1005 : [out]"=r"(out)
1006 : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1007 : );
1008 } else {
1009 asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
1010 : [out]"=r"(out)
1011 : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1012 : );
1013 }
1014 return out;
1015 #else
1016 if (left) {
1017 return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1018 } else {
1019 return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1020 }
1021 #endif
1022 }
1023
1024 /**
1025 * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
1026 */
1027 static inline
mulRL(int left,uint32_t inRL,uint32_t vRL)1028 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
1029 {
1030 #if defined(__arm__) && !defined(__thumb__)
1031 int32_t out;
1032 if (left) {
1033 asm( "smulbb %[out], %[inRL], %[vRL] \n"
1034 : [out]"=r"(out)
1035 : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1036 : );
1037 } else {
1038 asm( "smultt %[out], %[inRL], %[vRL] \n"
1039 : [out]"=r"(out)
1040 : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1041 : );
1042 }
1043 return out;
1044 #else
1045 if (left) {
1046 return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1047 } else {
1048 return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1049 }
1050 #endif
1051 }
1052
1053 /** \cond */
1054 __END_DECLS
1055 /** \endcond */
1056
1057 #endif // ANDROID_AUDIO_PRIMITIVES_H
1058