1 /* Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2  * Use of this source code is governed by a BSD-style license that can be
3  * found in the LICENSE file.
4  */
5 
6 #include <stdint.h>
7 
8 #include "cras_system_state.h"
9 #include "cras_mix_ops.h"
10 
11 #define MAX_VOLUME_TO_SCALE 0.9999999
12 #define MIN_VOLUME_TO_SCALE 0.0000001
13 
14 /* function suffixes for SIMD ops */
15 #ifdef OPS_SSE42
16 	#define OPS(a) a ## _sse42
17 #elif OPS_AVX
18 	#define OPS(a) a ## _avx
19 #elif OPS_AVX2
20 	#define OPS(a) a ## _avx2
21 #elif OPS_FMA
22 	#define OPS(a) a ## _fma
23 #else
24 	#define OPS(a) a
25 #endif
26 
27 /* Checks if the scaler needs a scaling operation.
28  * We skip scaling for scaler too close to 1.0.
29  * Note that this is not subjected to MAX_VOLUME_TO_SCALE
30  * and MIN_VOLUME_TO_SCALE. */
need_to_scale(float scaler)31 static inline int need_to_scale(float scaler) {
32 	return (scaler < 0.99 || scaler > 1.01);
33 }
34 
35 /*
36  * Signed 16 bit little endian functions.
37  */
38 
cras_mix_add_clip_s16_le(int16_t * dst,const int16_t * src,size_t count)39 static void cras_mix_add_clip_s16_le(int16_t *dst,
40 				     const int16_t *src,
41 				     size_t count)
42 {
43 	int32_t sum;
44 	size_t i;
45 
46 	for (i = 0; i < count; i++) {
47 		sum = dst[i] + src[i];
48 		if (sum > INT16_MAX)
49 			sum = INT16_MAX;
50 		else if (sum < INT16_MIN)
51 			sum = INT16_MIN;
52 		dst[i] = sum;
53 	}
54 }
55 
56 /* Adds src into dst, after scaling by vol.
57  * Just hard limits to the min and max S16 value, can be improved later. */
scale_add_clip_s16_le(int16_t * dst,const int16_t * src,size_t count,float vol)58 static void scale_add_clip_s16_le(int16_t *dst,
59 				  const int16_t *src,
60 				  size_t count,
61 				  float vol)
62 {
63 	int32_t sum;
64 	size_t i;
65 
66 	if (vol > MAX_VOLUME_TO_SCALE)
67 		return cras_mix_add_clip_s16_le(dst, src, count);
68 
69 	for (i = 0; i < count; i++) {
70 		sum = dst[i] + (int16_t)(src[i] * vol);
71 		if (sum > INT16_MAX)
72 			sum = INT16_MAX;
73 		else if (sum < INT16_MIN)
74 			sum = INT16_MIN;
75 		dst[i] = sum;
76 	}
77 }
78 
79 /* Adds the first stream to the mix.  Don't need to mix, just setup to the new
80  * values. If volume is 1.0, just memcpy. */
copy_scaled_s16_le(int16_t * dst,const int16_t * src,size_t count,float volume_scaler)81 static void copy_scaled_s16_le(int16_t *dst,
82 			       const int16_t *src,
83 			       size_t count,
84 			       float volume_scaler)
85 {
86 	int i;
87 
88 	if (volume_scaler > MAX_VOLUME_TO_SCALE) {
89 		memcpy(dst, src, count * sizeof(*src));
90 		return;
91 	}
92 
93 	for (i = 0; i < count; i++)
94 		dst[i] = src[i] * volume_scaler;
95 }
96 
cras_scale_buffer_inc_s16_le(uint8_t * buffer,unsigned int count,float scaler,float increment,int step)97 static void cras_scale_buffer_inc_s16_le(uint8_t *buffer, unsigned int count,
98 					 float scaler, float increment, int step)
99 {
100 	int i = 0, j;
101 	int16_t *out = (int16_t *)buffer;
102 
103 	if (scaler > MAX_VOLUME_TO_SCALE && increment > 0)
104 		return;
105 
106 	if (scaler < MIN_VOLUME_TO_SCALE && increment < 0) {
107 		memset(out, 0, count * sizeof(*out));
108 		return;
109 	}
110 
111 	while (i + step <= count) {
112 		for (j = 0; j < step; j++) {
113 			if (scaler > MAX_VOLUME_TO_SCALE) {
114 			} else if (scaler < MIN_VOLUME_TO_SCALE) {
115 				out[i] = 0;
116 			} else {
117 				out[i] *= scaler;
118 			}
119 			i++;
120 		}
121 		scaler += increment;
122 	}
123 }
124 
cras_scale_buffer_s16_le(uint8_t * buffer,unsigned int count,float scaler)125 static void cras_scale_buffer_s16_le(uint8_t *buffer, unsigned int count,
126 				     float scaler)
127 {
128 	int i;
129 	int16_t *out = (int16_t *)buffer;
130 
131 	if (scaler > MAX_VOLUME_TO_SCALE)
132 		return;
133 
134 	if (scaler < MIN_VOLUME_TO_SCALE) {
135 		memset(out, 0, count * sizeof(*out));
136 		return;
137 	}
138 
139 	for (i = 0; i < count; i++)
140 		out[i] *= scaler;
141 }
142 
cras_mix_add_s16_le(uint8_t * dst,uint8_t * src,unsigned int count,unsigned int index,int mute,float mix_vol)143 static void cras_mix_add_s16_le(uint8_t *dst, uint8_t *src,
144 				unsigned int count, unsigned int index,
145 				int mute, float mix_vol)
146 {
147 	int16_t *out = (int16_t *)dst;
148 	int16_t *in = (int16_t *)src;
149 
150 	if (mute || (mix_vol < MIN_VOLUME_TO_SCALE)) {
151 		if (index == 0)
152 			memset(out, 0, count * sizeof(*out));
153 		return;
154 	}
155 
156 	if (index == 0)
157 		return copy_scaled_s16_le(out, in, count, mix_vol);
158 
159 	scale_add_clip_s16_le(out, in, count, mix_vol);
160 }
161 
cras_mix_add_scale_stride_s16_le(uint8_t * dst,uint8_t * src,unsigned int dst_stride,unsigned int src_stride,unsigned int count,float scaler)162 static void cras_mix_add_scale_stride_s16_le(uint8_t *dst, uint8_t *src,
163 				unsigned int dst_stride,
164 				unsigned int src_stride,
165 				unsigned int count,
166 				float scaler)
167 {
168 	unsigned int i;
169 
170 	/* optimise the loops for vectorization */
171 	if (dst_stride == src_stride && dst_stride == 2) {
172 
173 		for (i = 0; i < count; i++) {
174 			int32_t sum;
175 			if (need_to_scale(scaler))
176 				sum = *(int16_t *)dst +
177 						*(int16_t *)src * scaler;
178 			else
179 				sum = *(int16_t *)dst + *(int16_t *)src;
180 			if (sum > INT16_MAX)
181 				sum = INT16_MAX;
182 			else if (sum < INT16_MIN)
183 				sum = INT16_MIN;
184 			*(int16_t*)dst = sum;
185 			dst += 2;
186 			src += 2;
187 		}
188 	} else if (dst_stride == src_stride && dst_stride == 4) {
189 
190 		for (i = 0; i < count; i++) {
191 			int32_t sum;
192 			if (need_to_scale(scaler))
193 				sum = *(int16_t *)dst +
194 						*(int16_t *)src * scaler;
195 			else
196 				sum = *(int16_t *)dst + *(int16_t *)src;
197 			if (sum > INT16_MAX)
198 				sum = INT16_MAX;
199 			else if (sum < INT16_MIN)
200 				sum = INT16_MIN;
201 			*(int16_t*)dst = sum;
202 			dst += 4;
203 			src += 4;
204 		}
205 	} else {
206 		for (i = 0; i < count; i++) {
207 			int32_t sum;
208 			if (need_to_scale(scaler))
209 				sum = *(int16_t *)dst +
210 						*(int16_t *)src * scaler;
211 			else
212 				sum = *(int16_t *)dst + *(int16_t *)src;
213 			if (sum > INT16_MAX)
214 				sum = INT16_MAX;
215 			else if (sum < INT16_MIN)
216 				sum = INT16_MIN;
217 			*(int16_t*)dst = sum;
218 			dst += dst_stride;
219 			src += src_stride;
220 		}
221 	}
222 }
223 
224 /*
225  * Signed 24 bit little endian functions.
226  */
227 
cras_mix_add_clip_s24_le(int32_t * dst,const int32_t * src,size_t count)228 static void cras_mix_add_clip_s24_le(int32_t *dst,
229 				     const int32_t *src,
230 				     size_t count)
231 {
232 	int32_t sum;
233 	size_t i;
234 
235 	for (i = 0; i < count; i++) {
236 		sum = dst[i] + src[i];
237 		if (sum > 0x007fffff)
238 			sum = 0x007fffff;
239 		else if (sum < (int32_t)0xff800000)
240 			sum = (int32_t)0xff800000;
241 		dst[i] = sum;
242 	}
243 }
244 
245 /* Adds src into dst, after scaling by vol.
246  * Just hard limits to the min and max S24 value, can be improved later. */
scale_add_clip_s24_le(int32_t * dst,const int32_t * src,size_t count,float vol)247 static void scale_add_clip_s24_le(int32_t *dst,
248 				  const int32_t *src,
249 				  size_t count,
250 				  float vol)
251 {
252 	int32_t sum;
253 	size_t i;
254 
255 	if (vol > MAX_VOLUME_TO_SCALE)
256 		return cras_mix_add_clip_s24_le(dst, src, count);
257 
258 	for (i = 0; i < count; i++) {
259 		sum = dst[i] + (int32_t)(src[i] * vol);
260 		if (sum > 0x007fffff)
261 			sum = 0x007fffff;
262 		else if (sum < (int32_t)0xff800000)
263 			sum = (int32_t)0xff800000;
264 		dst[i] = sum;
265 	}
266 }
267 
268 /* Adds the first stream to the mix.  Don't need to mix, just setup to the new
269  * values. If volume is 1.0, just memcpy. */
copy_scaled_s24_le(int32_t * dst,const int32_t * src,size_t count,float volume_scaler)270 static void copy_scaled_s24_le(int32_t *dst,
271 			       const int32_t *src,
272 			       size_t count,
273 			       float volume_scaler)
274 {
275 	int i;
276 
277 	if (volume_scaler > MAX_VOLUME_TO_SCALE) {
278 		memcpy(dst, src, count * sizeof(*src));
279 		return;
280 	}
281 
282 	for (i = 0; i < count; i++)
283 		dst[i] = src[i] * volume_scaler;
284 }
285 
cras_scale_buffer_inc_s24_le(uint8_t * buffer,unsigned int count,float scaler,float increment,int step)286 static void cras_scale_buffer_inc_s24_le(uint8_t *buffer, unsigned int count,
287 					 float scaler, float increment, int step)
288 {
289 	int i = 0, j;
290 	int32_t *out = (int32_t *)buffer;
291 
292 	if (scaler > MAX_VOLUME_TO_SCALE && increment > 0)
293 		return;
294 
295 	if (scaler < MIN_VOLUME_TO_SCALE && increment < 0) {
296 		memset(out, 0, count * sizeof(*out));
297 		return;
298 	}
299 
300 	while (i + step <= count) {
301 		for (j = 0; j < step; j++) {
302 			if (scaler > MAX_VOLUME_TO_SCALE) {
303 			} else if (scaler < MIN_VOLUME_TO_SCALE) {
304 				out[i] = 0;
305 			} else {
306 				out[i] *= scaler;
307 			}
308 			i++;
309 		}
310 		scaler += increment;
311 	}
312 }
313 
cras_scale_buffer_s24_le(uint8_t * buffer,unsigned int count,float scaler)314 static void cras_scale_buffer_s24_le(uint8_t *buffer, unsigned int count,
315 				     float scaler)
316 {
317 	int i;
318 	int32_t *out = (int32_t *)buffer;
319 
320 	if (scaler > MAX_VOLUME_TO_SCALE)
321 		return;
322 
323 	if (scaler < MIN_VOLUME_TO_SCALE) {
324 		memset(out, 0, count * sizeof(*out));
325 		return;
326 	}
327 
328 	for (i = 0; i < count; i++)
329 		out[i] *= scaler;
330 }
331 
cras_mix_add_s24_le(uint8_t * dst,uint8_t * src,unsigned int count,unsigned int index,int mute,float mix_vol)332 static void cras_mix_add_s24_le(uint8_t *dst, uint8_t *src,
333 				unsigned int count, unsigned int index,
334 				int mute, float mix_vol)
335 {
336 	int32_t *out = (int32_t *)dst;
337 	int32_t *in = (int32_t *)src;
338 
339 	if (mute || (mix_vol < MIN_VOLUME_TO_SCALE)) {
340 		if (index == 0)
341 			memset(out, 0, count * sizeof(*out));
342 		return;
343 	}
344 
345 	if (index == 0)
346 		return copy_scaled_s24_le(out, in, count, mix_vol);
347 
348 	scale_add_clip_s24_le(out, in, count, mix_vol);
349 }
350 
cras_mix_add_scale_stride_s24_le(uint8_t * dst,uint8_t * src,unsigned int dst_stride,unsigned int src_stride,unsigned int count,float scaler)351 static void cras_mix_add_scale_stride_s24_le(uint8_t *dst, uint8_t *src,
352 				unsigned int dst_stride,
353 				unsigned int src_stride,
354 				unsigned int count,
355 				float scaler)
356 {
357 	unsigned int i;
358 
359 	/* optimise the loops for vectorization */
360 	if (dst_stride == src_stride && dst_stride == 4) {
361 
362 		for (i = 0; i < count; i++) {
363 			int32_t sum;
364 			if (need_to_scale(scaler))
365 				sum = *(int32_t *)dst +
366 						*(int32_t *)src * scaler;
367 			else
368 				sum = *(int32_t *)dst + *(int32_t *)src;
369 			if (sum > 0x007fffff)
370 				sum = 0x007fffff;
371 			else if (sum < (int32_t)0xff800000)
372 				sum = (int32_t)0xff800000;
373 			*(int32_t*)dst = sum;
374 			dst += 4;
375 			src += 4;
376 		}
377 	} else {
378 
379 		for (i = 0; i < count; i++) {
380 			int32_t sum;
381 			if (need_to_scale(scaler))
382 				sum = *(int32_t *)dst +
383 						*(int32_t *)src * scaler;
384 			else
385 				sum = *(int32_t *)dst + *(int32_t *)src;
386 			if (sum > 0x007fffff)
387 				sum = 0x007fffff;
388 			else if (sum < (int32_t)0xff800000)
389 				sum = (int32_t)0xff800000;
390 			*(int32_t*)dst = sum;
391 			dst += dst_stride;
392 			src += src_stride;
393 		}
394 	}
395 }
396 
397 /*
398  * Signed 32 bit little endian functions.
399  */
400 
cras_mix_add_clip_s32_le(int32_t * dst,const int32_t * src,size_t count)401 static void cras_mix_add_clip_s32_le(int32_t *dst,
402 				     const int32_t *src,
403 				     size_t count)
404 {
405 	int64_t sum;
406 	size_t i;
407 
408 	for (i = 0; i < count; i++) {
409 		sum = (int64_t)dst[i] + (int64_t)src[i];
410 		if (sum > INT32_MAX)
411 			sum = INT32_MAX;
412 		else if (sum < INT32_MIN)
413 			sum = INT32_MIN;
414 		dst[i] = sum;
415 	}
416 }
417 
418 /* Adds src into dst, after scaling by vol.
419  * Just hard limits to the min and max S32 value, can be improved later. */
scale_add_clip_s32_le(int32_t * dst,const int32_t * src,size_t count,float vol)420 static void scale_add_clip_s32_le(int32_t *dst,
421 				  const int32_t *src,
422 				  size_t count,
423 				  float vol)
424 {
425 	int64_t sum;
426 	size_t i;
427 
428 	if (vol > MAX_VOLUME_TO_SCALE)
429 		return cras_mix_add_clip_s32_le(dst, src, count);
430 
431 	for (i = 0; i < count; i++) {
432 		sum = (int64_t)dst[i] + (int64_t)(src[i] * vol);
433 		if (sum > INT32_MAX)
434 			sum = INT32_MAX;
435 		else if (sum < INT32_MIN)
436 			sum = INT32_MIN;
437 		dst[i] = sum;
438 	}
439 }
440 
441 /* Adds the first stream to the mix.  Don't need to mix, just setup to the new
442  * values. If volume is 1.0, just memcpy. */
copy_scaled_s32_le(int32_t * dst,const int32_t * src,size_t count,float volume_scaler)443 static void copy_scaled_s32_le(int32_t *dst,
444 			       const int32_t *src,
445 			       size_t count,
446 			       float volume_scaler)
447 {
448 	int i;
449 
450 	if (volume_scaler > MAX_VOLUME_TO_SCALE) {
451 		memcpy(dst, src, count * sizeof(*src));
452 		return;
453 	}
454 
455 	for (i = 0; i < count; i++)
456 		dst[i] = src[i] * volume_scaler;
457 }
458 
cras_scale_buffer_inc_s32_le(uint8_t * buffer,unsigned int count,float scaler,float increment,int step)459 static void cras_scale_buffer_inc_s32_le(uint8_t *buffer, unsigned int count,
460 					 float scaler, float increment, int step)
461 {
462 	int i = 0, j;
463 	int32_t *out = (int32_t *)buffer;
464 
465 	if (scaler > MAX_VOLUME_TO_SCALE && increment > 0)
466 		return;
467 
468 	if (scaler < MIN_VOLUME_TO_SCALE && increment < 0) {
469 		memset(out, 0, count * sizeof(*out));
470 		return;
471 	}
472 
473 	while (i + step <= count) {
474 		for (j = 0; j < step; j++) {
475 			if (scaler > MAX_VOLUME_TO_SCALE) {
476 			} else if (scaler < MIN_VOLUME_TO_SCALE) {
477 				out[i] = 0;
478 			} else {
479 				out[i] *= scaler;
480 			}
481 			i++;
482 		}
483 		scaler += increment;
484 	}
485 }
486 
cras_scale_buffer_s32_le(uint8_t * buffer,unsigned int count,float scaler)487 static void cras_scale_buffer_s32_le(uint8_t *buffer, unsigned int count,
488 				     float scaler)
489 {
490 	int i;
491 	int32_t *out = (int32_t *)buffer;
492 
493 	if (scaler > MAX_VOLUME_TO_SCALE)
494 		return;
495 
496 	if (scaler < MIN_VOLUME_TO_SCALE) {
497 		memset(out, 0, count * sizeof(*out));
498 		return;
499 	}
500 
501 	for (i = 0; i < count; i++)
502 		out[i] *= scaler;
503 }
504 
cras_mix_add_s32_le(uint8_t * dst,uint8_t * src,unsigned int count,unsigned int index,int mute,float mix_vol)505 static void cras_mix_add_s32_le(uint8_t *dst, uint8_t *src,
506 				unsigned int count, unsigned int index,
507 				int mute, float mix_vol)
508 {
509 	int32_t *out = (int32_t *)dst;
510 	int32_t *in = (int32_t *)src;
511 
512 	if (mute || (mix_vol < MIN_VOLUME_TO_SCALE)) {
513 		if (index == 0)
514 			memset(out, 0, count * sizeof(*out));
515 		return;
516 	}
517 
518 	if (index == 0)
519 		return copy_scaled_s32_le(out, in, count, mix_vol);
520 
521 	scale_add_clip_s32_le(out, in, count, mix_vol);
522 }
523 
cras_mix_add_scale_stride_s32_le(uint8_t * dst,uint8_t * src,unsigned int dst_stride,unsigned int src_stride,unsigned int count,float scaler)524 static void cras_mix_add_scale_stride_s32_le(uint8_t *dst, uint8_t *src,
525 				unsigned int dst_stride,
526 				unsigned int src_stride,
527 				unsigned int count,
528 				float scaler)
529 {
530 	unsigned int i;
531 
532 	/* optimise the loops for vectorization */
533 	if (dst_stride == src_stride && dst_stride == 4) {
534 
535 		for (i = 0; i < count; i++) {
536 			int64_t sum;
537 			if (need_to_scale(scaler))
538 				sum = *(int32_t *)dst +
539 						*(int32_t *)src * scaler;
540 			else
541 				sum = *(int32_t *)dst + *(int32_t *)src;
542 			if (sum > INT32_MAX)
543 				sum = INT32_MAX;
544 			else if (sum < INT32_MIN)
545 				sum = INT32_MIN;
546 			*(int32_t*)dst = sum;
547 			dst += 4;
548 			src += 4;
549 		}
550 	} else {
551 
552 		for (i = 0; i < count; i++) {
553 			int64_t sum;
554 			if (need_to_scale(scaler))
555 				sum = *(int32_t *)dst +
556 						*(int32_t *)src * scaler;
557 			else
558 				sum = *(int32_t *)dst + *(int32_t *)src;
559 			if (sum > INT32_MAX)
560 				sum = INT32_MAX;
561 			else if (sum < INT32_MIN)
562 				sum = INT32_MIN;
563 			*(int32_t*)dst = sum;
564 			dst += dst_stride;
565 			src += src_stride;
566 		}
567 	}
568 }
569 
570 /*
571  * Signed 24 bit little endian in three bytes functions.
572  */
573 
574 /* Convert 3bytes Signed 24bit integer to a Signed 32bit integer.
575  * Just a helper function. */
convert_single_s243le_to_s32le(int32_t * dst,const uint8_t * src)576 static inline void convert_single_s243le_to_s32le(int32_t *dst,
577 						  const uint8_t *src)
578 {
579 	*dst = 0;
580 	memcpy((uint8_t *)dst + 1, src, 3);
581 }
582 
convert_single_s32le_to_s243le(uint8_t * dst,const int32_t * src)583 static inline void convert_single_s32le_to_s243le(uint8_t *dst,
584 						  const int32_t *src)
585 {
586 	memcpy(dst, (uint8_t *)src + 1, 3);
587 }
588 
cras_mix_add_clip_s24_3le(uint8_t * dst,const uint8_t * src,size_t count)589 static void cras_mix_add_clip_s24_3le(uint8_t *dst,
590 				      const uint8_t *src,
591 				      size_t count)
592 {
593 	int64_t sum;
594 	int32_t dst_frame;
595 	int32_t src_frame;
596 	size_t i;
597 
598 	for (i = 0; i < count; i++, dst += 3, src += 3) {
599 		convert_single_s243le_to_s32le(&dst_frame, dst);
600 		convert_single_s243le_to_s32le(&src_frame, src);
601 		sum = (int64_t)dst_frame + (int64_t)src_frame;
602 		if (sum > INT32_MAX)
603 			sum = INT32_MAX;
604 		else if (sum < INT32_MIN)
605 			sum = INT32_MIN;
606 		dst_frame = (int32_t)sum;
607 		convert_single_s32le_to_s243le(dst, &dst_frame);
608 	}
609 }
610 
611 /* Adds src into dst, after scaling by vol.
612  * Just hard limits to the min and max S24 value, can be improved later. */
scale_add_clip_s24_3le(uint8_t * dst,const uint8_t * src,size_t count,float vol)613 static void scale_add_clip_s24_3le(uint8_t *dst,
614 				   const uint8_t *src,
615 				   size_t count,
616 				   float vol)
617 {
618 	int64_t sum;
619 	int32_t dst_frame;
620 	int32_t src_frame;
621 	size_t i;
622 
623 	if (vol > MAX_VOLUME_TO_SCALE)
624 		return cras_mix_add_clip_s24_3le(dst, src, count);
625 
626 	for (i = 0; i < count; i++, dst += 3, src += 3) {
627 		convert_single_s243le_to_s32le(&dst_frame, dst);
628 		convert_single_s243le_to_s32le(&src_frame, src);
629 		sum = (int64_t)dst_frame + (int64_t)(src_frame * vol);
630 		if (sum > INT32_MAX)
631 			sum = INT32_MAX;
632 		else if (sum < INT32_MIN)
633 			sum = INT32_MIN;
634 		dst_frame = (int32_t)sum;
635 		convert_single_s32le_to_s243le(dst, &dst_frame);
636 	}
637 }
638 
639 /* Adds the first stream to the mix.  Don't need to mix, just setup to the new
640  * values. If volume is 1.0, just memcpy. */
copy_scaled_s24_3le(uint8_t * dst,const uint8_t * src,size_t count,float volume_scaler)641 static void copy_scaled_s24_3le(uint8_t *dst,
642 			        const uint8_t *src,
643 			        size_t count,
644 			        float volume_scaler)
645 {
646 	int32_t frame;
647 	size_t i;
648 
649 	if (volume_scaler > MAX_VOLUME_TO_SCALE) {
650 		memcpy(dst, src, 3 * count * sizeof(*src));
651 		return;
652 	}
653 
654 	for (i = 0; i < count; i++, dst += 3, src += 3) {
655 		convert_single_s243le_to_s32le(&frame, src);
656 		frame *= volume_scaler;
657 		convert_single_s32le_to_s243le(dst, &frame);
658 	}
659 }
660 
cras_scale_buffer_inc_s24_3le(uint8_t * buffer,unsigned int count,float scaler,float increment,int step)661 static void cras_scale_buffer_inc_s24_3le(uint8_t *buffer, unsigned int count,
662 					  float scaler, float increment, int step)
663 {
664 	int32_t frame;
665 	int i = 0, j;
666 
667 	if (scaler > MAX_VOLUME_TO_SCALE && increment > 0)
668 		return;
669 
670 	if (scaler < MIN_VOLUME_TO_SCALE && increment < 0) {
671 		memset(buffer, 0, 3 * count * sizeof(*buffer));
672 		return;
673 	}
674 
675 	while (i + step <= count) {
676 		for (j = 0; j < step; j++) {
677 			convert_single_s243le_to_s32le(&frame, buffer);
678 
679 			if (scaler > MAX_VOLUME_TO_SCALE) {
680 			} else if (scaler < MIN_VOLUME_TO_SCALE) {
681 				frame = 0;
682 			} else {
683 				frame *= scaler;
684 			}
685 
686 			convert_single_s32le_to_s243le(buffer, &frame);
687 
688 			i++;
689 			buffer += 3;
690 		}
691 		scaler += increment;
692 	}
693 }
694 
cras_scale_buffer_s24_3le(uint8_t * buffer,unsigned int count,float scaler)695 static void cras_scale_buffer_s24_3le(uint8_t *buffer, unsigned int count,
696 				      float scaler)
697 {
698 	int32_t frame;
699 	int i;
700 
701 	if (scaler > MAX_VOLUME_TO_SCALE)
702 		return;
703 
704 	if (scaler < MIN_VOLUME_TO_SCALE) {
705 		memset(buffer, 0, 3 * count * sizeof(*buffer));
706 		return;
707 	}
708 
709 	for (i = 0; i < count; i++, buffer += 3) {
710 		convert_single_s243le_to_s32le(&frame, buffer);
711 		frame *= scaler;
712 		convert_single_s32le_to_s243le(buffer, &frame);
713 	}
714 }
715 
cras_mix_add_s24_3le(uint8_t * dst,uint8_t * src,unsigned int count,unsigned int index,int mute,float mix_vol)716 static void cras_mix_add_s24_3le(uint8_t *dst, uint8_t *src,
717 				 unsigned int count, unsigned int index,
718 				 int mute, float mix_vol)
719 {
720 	uint8_t *out = dst;
721 	uint8_t *in = src;
722 
723 	if (mute || (mix_vol < MIN_VOLUME_TO_SCALE)) {
724 		if (index == 0)
725 			memset(out, 0, 3 * count * sizeof(*out));
726 		return;
727 	}
728 
729 	if (index == 0)
730 		return copy_scaled_s24_3le(out, in, count, mix_vol);
731 
732 	scale_add_clip_s24_3le(out, in, count, mix_vol);
733 }
734 
cras_mix_add_scale_stride_s24_3le(uint8_t * dst,uint8_t * src,unsigned int dst_stride,unsigned int src_stride,unsigned int count,float scaler)735 static void cras_mix_add_scale_stride_s24_3le(uint8_t *dst, uint8_t *src,
736 				 unsigned int dst_stride,
737 				 unsigned int src_stride,
738 				 unsigned int count,
739 				 float scaler)
740 {
741 	unsigned int i;
742 	int64_t sum;
743 	int32_t dst_frame;
744 	int32_t src_frame;
745 
746 	for (i = 0; i < count; i++) {
747 		convert_single_s243le_to_s32le(&dst_frame, dst);
748 		convert_single_s243le_to_s32le(&src_frame, src);
749 		if (need_to_scale(scaler))
750 			sum = (int64_t)dst_frame + (int64_t)src_frame * scaler;
751 		else
752 			sum = (int64_t)dst_frame + (int64_t)src_frame;
753 		if (sum > INT32_MAX)
754 			sum = INT32_MAX;
755 		else if (sum < INT32_MIN)
756 			sum = INT32_MIN;
757 		dst_frame = (int32_t)sum;
758 		convert_single_s32le_to_s243le(dst, &dst_frame);
759 		dst += dst_stride;
760 		src += src_stride;
761 	}
762 }
763 
scale_buffer_increment(snd_pcm_format_t fmt,uint8_t * buff,unsigned int count,float scaler,float increment,int step)764 static void scale_buffer_increment(snd_pcm_format_t fmt, uint8_t *buff,
765 				   unsigned int count, float scaler,
766 				   float increment, int step)
767 {
768 	switch (fmt) {
769 	case SND_PCM_FORMAT_S16_LE:
770 		return cras_scale_buffer_inc_s16_le(buff, count, scaler,
771 						    increment, step);
772 	case SND_PCM_FORMAT_S24_LE:
773 		return cras_scale_buffer_inc_s24_le(buff, count, scaler,
774 						    increment, step);
775 	case SND_PCM_FORMAT_S32_LE:
776 		return cras_scale_buffer_inc_s32_le(buff, count, scaler,
777 						    increment, step);
778 	case SND_PCM_FORMAT_S24_3LE:
779 		return cras_scale_buffer_inc_s24_3le(buff, count, scaler,
780 						     increment, step);
781 	default:
782 		break;
783 	}
784 }
785 
scale_buffer(snd_pcm_format_t fmt,uint8_t * buff,unsigned int count,float scaler)786 static void scale_buffer(snd_pcm_format_t fmt, uint8_t *buff, unsigned int count,
787 		       float scaler)
788 {
789 	switch (fmt) {
790 	case SND_PCM_FORMAT_S16_LE:
791 		return cras_scale_buffer_s16_le(buff, count, scaler);
792 	case SND_PCM_FORMAT_S24_LE:
793 		return cras_scale_buffer_s24_le(buff, count, scaler);
794 	case SND_PCM_FORMAT_S32_LE:
795 		return cras_scale_buffer_s32_le(buff, count, scaler);
796 	case SND_PCM_FORMAT_S24_3LE:
797 		return cras_scale_buffer_s24_3le(buff, count, scaler);
798 	default:
799 		break;
800 	}
801 }
802 
mix_add(snd_pcm_format_t fmt,uint8_t * dst,uint8_t * src,unsigned int count,unsigned int index,int mute,float mix_vol)803 static void mix_add(snd_pcm_format_t fmt, uint8_t *dst, uint8_t *src,
804 		  unsigned int count, unsigned int index,
805 		  int mute, float mix_vol)
806 {
807 	switch (fmt) {
808 	case SND_PCM_FORMAT_S16_LE:
809 		return cras_mix_add_s16_le(dst, src, count, index, mute,
810 					   mix_vol);
811 	case SND_PCM_FORMAT_S24_LE:
812 		return cras_mix_add_s24_le(dst, src, count, index, mute,
813 					   mix_vol);
814 	case SND_PCM_FORMAT_S32_LE:
815 		return cras_mix_add_s32_le(dst, src, count, index, mute,
816 					   mix_vol);
817 	case SND_PCM_FORMAT_S24_3LE:
818 		return cras_mix_add_s24_3le(dst, src, count, index, mute,
819 					    mix_vol);
820 	default:
821 		break;
822 	}
823 }
824 
mix_add_scale_stride(snd_pcm_format_t fmt,uint8_t * dst,uint8_t * src,unsigned int count,unsigned int dst_stride,unsigned int src_stride,float scaler)825 static void mix_add_scale_stride(snd_pcm_format_t fmt, uint8_t *dst,
826 			uint8_t *src, unsigned int count,
827 			unsigned int dst_stride, unsigned int src_stride,
828 			float scaler)
829 {
830 	switch (fmt) {
831 	case SND_PCM_FORMAT_S16_LE:
832 		return cras_mix_add_scale_stride_s16_le(dst, src, dst_stride,
833 						  src_stride, count, scaler);
834 	case SND_PCM_FORMAT_S24_LE:
835 		return cras_mix_add_scale_stride_s24_le(dst, src, dst_stride,
836 						  src_stride, count, scaler);
837 	case SND_PCM_FORMAT_S32_LE:
838 		return cras_mix_add_scale_stride_s32_le(dst, src, dst_stride,
839 						  src_stride, count, scaler);
840 	case SND_PCM_FORMAT_S24_3LE:
841 		return cras_mix_add_scale_stride_s24_3le(dst, src, dst_stride,
842 						   src_stride, count, scaler);
843 	default:
844 		break;
845 	}
846 }
847 
mix_mute_buffer(uint8_t * dst,size_t frame_bytes,size_t count)848 static size_t mix_mute_buffer(uint8_t *dst,
849 			    size_t frame_bytes,
850 			    size_t count)
851 {
852 	memset(dst, 0, count * frame_bytes);
853 	return count;
854 }
855 
856 const struct cras_mix_ops OPS(mixer_ops) = {
857 	.scale_buffer = scale_buffer,
858 	.scale_buffer_increment = scale_buffer_increment,
859 	.add = mix_add,
860 	.add_scale_stride = mix_add_scale_stride,
861 	.mute_buffer = mix_mute_buffer,
862 };
863