1 /* Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2  * Use of this source code is governed by a BSD-style license that can be
3  * found in the LICENSE file.
4  */
5 
6 /* Copyright (C) 2011 Google Inc. All rights reserved.
7  * Use of this source code is governed by a BSD-style license that can be
8  * found in the LICENSE.WEBKIT file.
9  */
10 
11 #include <assert.h>
12 #include <stdlib.h>
13 
14 #include "drc.h"
15 #include "drc_math.h"
16 
17 static void set_default_parameters(struct drc *drc);
18 static void init_data_buffer(struct drc *drc);
19 static void init_emphasis_eq(struct drc *drc);
20 static void init_crossover(struct drc *drc);
21 static void init_kernel(struct drc *drc);
22 static void free_data_buffer(struct drc *drc);
23 static void free_emphasis_eq(struct drc *drc);
24 static void free_kernel(struct drc *drc);
25 
drc_new(float sample_rate)26 struct drc *drc_new(float sample_rate)
27 {
28 	struct drc *drc = (struct drc *)calloc(1, sizeof(struct drc));
29 	drc->sample_rate = sample_rate;
30 	set_default_parameters(drc);
31 	return drc;
32 }
33 
drc_init(struct drc * drc)34 void drc_init(struct drc *drc)
35 {
36 	init_data_buffer(drc);
37 	init_emphasis_eq(drc);
38 	init_crossover(drc);
39 	init_kernel(drc);
40 }
41 
drc_free(struct drc * drc)42 void drc_free(struct drc *drc)
43 {
44 	free_kernel(drc);
45 	free_emphasis_eq(drc);
46 	free_data_buffer(drc);
47 	free(drc);
48 }
49 
50 /* Allocates temporary buffers used during drc_process(). */
init_data_buffer(struct drc * drc)51 static void init_data_buffer(struct drc *drc)
52 {
53 	int i;
54 	size_t size = sizeof(float) * DRC_PROCESS_MAX_FRAMES;
55 
56 	for (i = 0; i < DRC_NUM_CHANNELS; i++) {
57 		drc->data1[i] = (float *)calloc(1, size);
58 		drc->data2[i] = (float *)calloc(1, size);
59 	}
60 }
61 
62 /* Frees temporary buffers */
free_data_buffer(struct drc * drc)63 static void free_data_buffer(struct drc *drc)
64 {
65 	int i;
66 
67 	for (i = 0; i < DRC_NUM_CHANNELS; i++) {
68 		free(drc->data1[i]);
69 		free(drc->data2[i]);
70 	}
71 }
72 
drc_set_param(struct drc * drc,int index,unsigned paramID,float value)73 void drc_set_param(struct drc *drc, int index, unsigned paramID, float value)
74 {
75 	assert(paramID < PARAM_LAST);
76 	if (paramID < PARAM_LAST)
77 		drc->parameters[index][paramID] = value;
78 }
79 
drc_get_param(struct drc * drc,int index,unsigned paramID)80 static float drc_get_param(struct drc *drc, int index, unsigned paramID)
81 {
82 	assert(paramID < PARAM_LAST);
83 	return drc->parameters[index][paramID];
84 }
85 
86 /* Initializes parameters to default values. */
set_default_parameters(struct drc * drc)87 static void set_default_parameters(struct drc *drc)
88 {
89 	float nyquist = drc->sample_rate / 2;
90 	int i;
91 
92 	for (i = 0; i < DRC_NUM_KERNELS; i++) {
93 		float *param = drc->parameters[i];
94 		param[PARAM_THRESHOLD] = -24; /* dB */
95 		param[PARAM_KNEE] = 30; /* dB */
96 		param[PARAM_RATIO] = 12; /* unit-less */
97 		param[PARAM_ATTACK] = 0.003f; /* seconds */
98 		param[PARAM_RELEASE] = 0.250f; /* seconds */
99 		param[PARAM_PRE_DELAY] = DRC_DEFAULT_PRE_DELAY; /* seconds */
100 
101 		/* Release zone values 0 -> 1. */
102 		param[PARAM_RELEASE_ZONE1] = 0.09f;
103 		param[PARAM_RELEASE_ZONE2] = 0.16f;
104 		param[PARAM_RELEASE_ZONE3] = 0.42f;
105 		param[PARAM_RELEASE_ZONE4] = 0.98f;
106 
107 		/* This is effectively a master volume on the compressed
108 		 * signal */
109 		param[PARAM_POST_GAIN] = 0; /* dB */
110 		param[PARAM_ENABLED] = 0;
111 	}
112 
113 	drc->parameters[0][PARAM_CROSSOVER_LOWER_FREQ] = 0;
114 	drc->parameters[1][PARAM_CROSSOVER_LOWER_FREQ] = 200 / nyquist;
115 	drc->parameters[2][PARAM_CROSSOVER_LOWER_FREQ] = 2000 / nyquist;
116 
117 	/* These parameters has only one copy */
118 	drc->parameters[0][PARAM_FILTER_STAGE_GAIN] = 4.4f; /* dB */
119 	drc->parameters[0][PARAM_FILTER_STAGE_RATIO] = 2;
120 	drc->parameters[0][PARAM_FILTER_ANCHOR] = 15000 / nyquist;
121 }
122 
123 /* Finds the zero and pole for one stage of the emphasis filter */
emphasis_stage_roots(float gain,float normalized_frequency,float * zero,float * pole)124 static void emphasis_stage_roots(float gain, float normalized_frequency,
125 				 float *zero, float *pole)
126 {
127 	float gk = 1 - gain / 20;
128 	float f1 = normalized_frequency * gk;
129 	float f2 = normalized_frequency / gk;
130 	*zero = expf(-f1 * PI_FLOAT);
131 	*pole = expf(-f2 * PI_FLOAT);
132 }
133 
134 /* Calculates the biquad coefficients for two emphasis stages. */
emphasis_stage_pair_biquads(float gain,float f1,float f2,struct biquad * emphasis,struct biquad * deemphasis)135 static void emphasis_stage_pair_biquads(float gain, float f1, float f2,
136 					struct biquad *emphasis,
137 					struct biquad *deemphasis)
138 {
139 	float z1, p1;
140 	float z2, p2;
141 
142 	emphasis_stage_roots(gain, f1, &z1, &p1);
143 	emphasis_stage_roots(gain, f2, &z2, &p2);
144 
145 	float b0 = 1;
146 	float b1 = -(z1 + z2);
147 	float b2 = z1 * z2;
148 	float a0 = 1;
149 	float a1 = -(p1 + p2);
150 	float a2 = p1 * p2;
151 
152 	/* Gain compensation to make 0dB @ 0Hz */
153 	float alpha = (a0 + a1 + a2) / (b0 + b1 + b2);
154 
155 	emphasis->b0 = b0 * alpha;
156 	emphasis->b1 = b1 * alpha;
157 	emphasis->b2 = b2 * alpha;
158 	emphasis->a1 = a1;
159 	emphasis->a2 = a2;
160 
161 	float beta = (b0 + b1 + b2) / (a0 + a1 + a2);
162 
163 	deemphasis->b0 = a0 * beta;
164 	deemphasis->b1 = a1 * beta;
165 	deemphasis->b2 = a2 * beta;
166 	deemphasis->a1 = b1;
167 	deemphasis->a2 = b2;
168 }
169 
170 /* Initializes the emphasis and deemphasis filter */
init_emphasis_eq(struct drc * drc)171 static void init_emphasis_eq(struct drc *drc)
172 {
173 	struct biquad e = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
174 	struct biquad d = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
175 	int i, j;
176 
177 	float stage_gain = drc_get_param(drc, 0, PARAM_FILTER_STAGE_GAIN);
178 	float stage_ratio = drc_get_param(drc, 0, PARAM_FILTER_STAGE_RATIO);
179 	float anchor_freq = drc_get_param(drc, 0,  PARAM_FILTER_ANCHOR);
180 
181 	drc->emphasis_eq = eq2_new();
182 	drc->deemphasis_eq = eq2_new();
183 
184 	for (i = 0; i < 2; i++) {
185 		emphasis_stage_pair_biquads(stage_gain, anchor_freq,
186 					    anchor_freq / stage_ratio,
187 					    &e, &d);
188 		for (j = 0; j < 2; j++) {
189 			eq2_append_biquad_direct(drc->emphasis_eq, j, &e);
190 			eq2_append_biquad_direct(drc->deemphasis_eq, j, &d);
191 		}
192 		anchor_freq /= (stage_ratio * stage_ratio);
193 	}
194 }
195 
196 /* Frees the emphasis and deemphasis filter */
free_emphasis_eq(struct drc * drc)197 static void free_emphasis_eq(struct drc *drc)
198 {
199 	eq2_free(drc->emphasis_eq);
200 	eq2_free(drc->deemphasis_eq);
201 }
202 
203 /* Initializes the crossover filter */
init_crossover(struct drc * drc)204 static void init_crossover(struct drc *drc)
205 {
206 	float freq1 = drc->parameters[1][PARAM_CROSSOVER_LOWER_FREQ];
207 	float freq2 = drc->parameters[2][PARAM_CROSSOVER_LOWER_FREQ];
208 
209 	crossover2_init(&drc->xo2, freq1, freq2);
210 }
211 
212 /* Initializes the compressor kernels */
init_kernel(struct drc * drc)213 static void init_kernel(struct drc *drc)
214 {
215 	int i;
216 
217 	for (i = 0; i < DRC_NUM_KERNELS; i++) {
218 		dk_init(&drc->kernel[i], drc->sample_rate);
219 
220 		float db_threshold = drc_get_param(drc, i, PARAM_THRESHOLD);
221 		float db_knee = drc_get_param(drc, i, PARAM_KNEE);
222 		float ratio = drc_get_param(drc, i, PARAM_RATIO);
223 		float attack_time = drc_get_param(drc, i, PARAM_ATTACK);
224 		float release_time = drc_get_param(drc, i, PARAM_RELEASE);
225 		float pre_delay_time = drc_get_param(drc, i, PARAM_PRE_DELAY);
226 		float releaseZone1 = drc_get_param(drc, i, PARAM_RELEASE_ZONE1);
227 		float releaseZone2 = drc_get_param(drc, i, PARAM_RELEASE_ZONE2);
228 		float releaseZone3 = drc_get_param(drc, i, PARAM_RELEASE_ZONE3);
229 		float releaseZone4 = drc_get_param(drc, i, PARAM_RELEASE_ZONE4);
230 		float db_post_gain = drc_get_param(drc, i, PARAM_POST_GAIN);
231 		int enabled = drc_get_param(drc, i, PARAM_ENABLED);
232 
233 		dk_set_parameters(&drc->kernel[i],
234 				  db_threshold,
235 				  db_knee,
236 				  ratio,
237 				  attack_time,
238 				  release_time,
239 				  pre_delay_time,
240 				  db_post_gain,
241 				  releaseZone1,
242 				  releaseZone2,
243 				  releaseZone3,
244 				  releaseZone4
245 			);
246 
247 		dk_set_enabled(&drc->kernel[i], enabled);
248 	}
249 }
250 
251 /* Frees the compressor kernels */
free_kernel(struct drc * drc)252 static void free_kernel(struct drc *drc)
253 {
254 	int i;
255 	for (i = 0; i < DRC_NUM_KERNELS; i++)
256 		dk_free(&drc->kernel[i]);
257 }
258 
259 // Note gcc 4.9+ with -O2 on aarch64 produces vectorized version of C
260 // that is comparable performance, but twice as large.  -O1 and -Os produce
261 // small but slower code (4x slower than Neon).
262 #if defined(__aarch64__)
sum3(float * data,const float * data1,const float * data2,int n)263 static void sum3(float *data, const float *data1, const float *data2, int n)
264 {
265 	int count = n / 4;
266 	int i;
267 
268 	if (count) {
269 		__asm__ __volatile(
270 			"1:                                         \n"
271 			"ld1 {v0.4s}, [%[data1]], #16               \n"
272 			"ld1 {v1.4s}, [%[data2]], #16               \n"
273 			"ld1 {v2.4s}, [%[data]]                     \n"
274 			"fadd v0.4s, v0.4s, v1.4s                   \n"
275 			"fadd v0.4s, v0.4s, v2.4s                   \n"
276 			"st1 {v0.4s}, [%[data]], #16                \n"
277 			"subs %w[count], %w[count], #1              \n"
278 			"b.ne 1b                                    \n"
279 			: /* output */
280 			  [data]"+r"(data),
281 			  [data1]"+r"(data1),
282 			  [data2]"+r"(data2),
283 			  [count]"+r"(count)
284 			: /* input */
285 			: /* clobber */
286 			  "v0", "v1", "v2", "memory", "cc"
287 			);
288 	}
289 
290 	n &= 3;
291 	for (i = 0; i < n; i++)
292 		data[i] += data1[i] + data2[i];
293 }
294 #elif defined(__ARM_NEON__)
sum3(float * data,const float * data1,const float * data2,int n)295 static void sum3(float *data, const float *data1, const float *data2, int n)
296 {
297 	int count = n / 4;
298 	int i;
299 
300 	if (count) {
301 		__asm__ __volatile(
302 			"1:                                         \n"
303 			"vld1.32 {q0}, [%[data1]]!                  \n"
304 			"vld1.32 {q1}, [%[data2]]!                  \n"
305 			"vld1.32 {q2}, [%[data]]                    \n"
306 			"vadd.f32 q0, q0, q1                        \n"
307 			"vadd.f32 q0, q0, q2                        \n"
308 			"vst1.32 {q0}, [%[data]]!                   \n"
309 			"subs %[count], #1                          \n"
310 			"bne 1b                                     \n"
311 			: /* output */
312 			  [data]"+r"(data),
313 			  [data1]"+r"(data1),
314 			  [data2]"+r"(data2),
315 			  [count]"+r"(count)
316 			: /* input */
317 			: /* clobber */
318 			  "q0", "q1", "q2", "memory", "cc"
319 			);
320 	}
321 
322 	n &= 3;
323 	for (i = 0; i < n; i++)
324 		data[i] += data1[i] + data2[i];
325 }
326 #elif defined(__SSE3__)
327 #include <emmintrin.h>
sum3(float * data,const float * data1,const float * data2,int n)328 static void sum3(float *data, const float *data1, const float *data2, int n)
329 {
330 	__m128 x, y, z;
331 	int count = n / 4;
332 	int i;
333 
334 	if (count) {
335 		__asm__ __volatile(
336 			"1:                                         \n"
337 			"lddqu (%[data1]), %[x]                     \n"
338 			"lddqu (%[data2]), %[y]                     \n"
339 			"lddqu (%[data]), %[z]                      \n"
340 			"addps %[x], %[y]                           \n"
341 			"addps %[y], %[z]                           \n"
342 			"movdqu %[z], (%[data])                     \n"
343 			"add $16, %[data1]                          \n"
344 			"add $16, %[data2]                          \n"
345 			"add $16, %[data]                           \n"
346 			"sub $1, %[count]                           \n"
347 			"jne 1b                                     \n"
348 			: /* output */
349 			  [data]"+r"(data),
350 			  [data1]"+r"(data1),
351 			  [data2]"+r"(data2),
352 			  [count]"+r"(count),
353 			  [x]"=x"(x),
354 			  [y]"=x"(y),
355 			  [z]"=x"(z)
356 			: /* input */
357 			: /* clobber */
358 			  "memory", "cc"
359 			);
360 	}
361 
362 	n &= 3;
363 	for (i = 0; i < n; i++)
364 		data[i] += data1[i] + data2[i];
365 }
366 #else
sum3(float * data,const float * data1,const float * data2,int n)367 static void sum3(float *data, const float *data1, const float *data2, int n)
368 {
369 	int i;
370 	for (i = 0; i < n; i++)
371 		data[i] += data1[i] + data2[i];
372 }
373 #endif
374 
drc_process(struct drc * drc,float ** data,int frames)375 void drc_process(struct drc *drc, float **data, int frames)
376 {
377 	int i;
378 	float **data1 = drc->data1;
379 	float **data2 = drc->data2;
380 
381 	/* Apply pre-emphasis filter if it is not disabled. */
382 	if (!drc->emphasis_disabled)
383 		eq2_process(drc->emphasis_eq, data[0], data[1], frames);
384 
385 	/* Crossover */
386 	crossover2_process(&drc->xo2, frames, data[0], data[1],
387 			   data1[0], data1[1], data2[0], data2[1]);
388 
389 	/* Apply compression to each band of the signal. The processing is
390 	 * performed in place.
391 	 */
392 	dk_process(&drc->kernel[0], data, frames);
393 	dk_process(&drc->kernel[1], data1, frames);
394 	dk_process(&drc->kernel[2], data2, frames);
395 
396 	/* Sum the three bands of signal */
397 	for (i = 0; i < DRC_NUM_CHANNELS; i++)
398 		sum3(data[i], data1[i], data2[i], frames);
399 
400 	/* Apply de-emphasis filter if emphasis is not disabled. */
401 	if (!drc->emphasis_disabled)
402 		eq2_process(drc->deemphasis_eq, data[0], data[1], frames);
403 }
404