1 /* replaygain_synthesis - Routines for applying ReplayGain to a signal
2  * Copyright (C) 2002-2009  Josh Coalson
3  * Copyright (C) 2011-2016  Xiph.Org Foundation
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 /*
20  * This is an aggregation of pieces of code from John Edwards' WaveGain
21  * program.  Mostly cosmetic changes were made; otherwise, the dithering
22  * code is almost untouched and the gain processing was converted from
23  * processing a whole file to processing chunks of samples.
24  *
25  * The original copyright notices for WaveGain's dither.c and wavegain.c
26  * appear below:
27  */
28 /*
29  * (c) 2002 John Edwards
30  * mostly lifted from work by Frank Klemm
31  * random functions for dithering.
32  */
33 /*
34  * Copyright (C) 2002 John Edwards
35  * Additional code by Magnus Holmgren and Gian-Carlo Pascutto
36  */
37 
38 #ifdef HAVE_CONFIG_H
39 #  include <config.h>
40 #endif
41 
42 #include <string.h> /* for memset() */
43 #include <math.h>
44 #include "share/compat.h"
45 #include "share/replaygain_synthesis.h"
46 #include "FLAC/assert.h"
47 
48 #define FLAC__I64L(x) x##LL
49 
50 
51 /*
52  * the following is based on parts of dither.c
53  */
54 
55 
56 /*
57  *  This is a simple random number generator with good quality for audio purposes.
58  *  It consists of two polycounters with opposite rotation direction and different
59  *  periods. The periods are coprime, so the total period is the product of both.
60  *
61  *     -------------------------------------------------------------------------------------------------
62  * +-> |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0|
63  * |   -------------------------------------------------------------------------------------------------
64  * |                                                                          |  |  |  |     |        |
65  * |                                                                          +--+--+--+-XOR-+--------+
66  * |                                                                                      |
67  * +--------------------------------------------------------------------------------------+
68  *
69  *     -------------------------------------------------------------------------------------------------
70  *     |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0| <-+
71  *     -------------------------------------------------------------------------------------------------   |
72  *       |  |           |  |                                                                               |
73  *       +--+----XOR----+--+                                                                               |
74  *                |                                                                                        |
75  *                +----------------------------------------------------------------------------------------+
76  *
77  *
78  *  The first has an period of 3*5*17*257*65537, the second of 7*47*73*178481,
79  *  which gives a period of 18.410.713.077.675.721.215. The result is the
80  *  XORed values of both generators.
81  */
82 
random_int_(void)83 static uint32_t random_int_(void)
84 {
85 	static const uint8_t parity_[256] = {
86 		0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
87 		1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
88 		1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
89 		0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
90 		1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
91 		0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
92 		0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
93 		1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
94 	};
95 	static uint32_t r1_ = 1;
96 	static uint32_t r2_ = 1;
97 
98 	uint32_t t1, t2, t3, t4;
99 
100 	/* Parity calculation is done via table lookup, this is also available
101 	 * on CPUs without parity, can be implemented in C and avoid unpredictable
102 	 * jumps and slow rotate through the carry flag operations.
103 	 */
104 	t3   = t1 = r1_;    t4   = t2 = r2_;
105 	t1  &= 0xF5;        t2 >>= 25;
106 	t1   = parity_[t1]; t2  &= 0x63;
107 	t1 <<= 31;          t2   = parity_[t2];
108 
109 	return (r1_ = (t3 >> 1) | t1 ) ^ (r2_ = (t4 + t4) | t2 );
110 }
111 
112 /* gives a equal distributed random number */
113 /* between -2^31*mult and +2^31*mult */
random_equi_(double mult)114 static double random_equi_(double mult)
115 {
116 	return mult * (int) random_int_();
117 }
118 
119 /* gives a triangular distributed random number */
120 /* between -2^32*mult and +2^32*mult */
random_triangular_(double mult)121 static double random_triangular_(double mult)
122 {
123 	return mult * ( (double) (int) random_int_() + (double) (int) random_int_() );
124 }
125 
126 
127 static const float  F44_0 [16 + 32] = {
128 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
129 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
130 
131 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
132 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
133 
134 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
135 	(float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0
136 };
137 
138 
139 static const float  F44_1 [16 + 32] = {  /* SNR(w) = 4.843163 dB, SNR = -3.192134 dB */
140 	(float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
141 	(float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
142 	(float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
143 	(float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
144 
145 	(float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
146 	(float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
147 	(float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
148 	(float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
149 
150 	(float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
151 	(float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
152 	(float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
153 	(float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
154 };
155 
156 
157 static const float  F44_2 [16 + 32] = {  /* SNR(w) = 10.060213 dB, SNR = -12.766730 dB */
158 	(float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
159 	(float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
160 	(float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
161 	(float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
162 
163 	(float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
164 	(float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
165 	(float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
166 	(float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
167 
168 	(float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
169 	(float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
170 	(float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
171 	(float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
172 };
173 
174 
175 static const float  F44_3 [16 + 32] = {  /* SNR(w) = 15.382598 dB, SNR = -29.402334 dB */
176 	(float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
177 	(float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
178 	(float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
179 	(float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099,
180 
181 	(float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
182 	(float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
183 	(float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
184 	(float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099,
185 
186 	(float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
187 	(float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
188 	(float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
189 	(float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099
190 };
191 
192 
scalar16_(const float * x,const float * y)193 static double scalar16_(const float* x, const float* y)
194 {
195 	return
196 		x[ 0]*y[ 0] + x[ 1]*y[ 1] + x[ 2]*y[ 2] + x[ 3]*y[ 3] +
197 		x[ 4]*y[ 4] + x[ 5]*y[ 5] + x[ 6]*y[ 6] + x[ 7]*y[ 7] +
198 		x[ 8]*y[ 8] + x[ 9]*y[ 9] + x[10]*y[10] + x[11]*y[11] +
199 		x[12]*y[12] + x[13]*y[13] + x[14]*y[14] + x[15]*y[15];
200 }
201 
202 
FLAC__replaygain_synthesis__init_dither_context(DitherContext * d,int bits,int shapingtype)203 void FLAC__replaygain_synthesis__init_dither_context(DitherContext *d, int bits, int shapingtype)
204 {
205 	static uint8_t default_dither [] = { 92, 92, 88, 84, 81, 78, 74, 67,  0,  0 };
206 	static const float*               F [] = { F44_0, F44_1, F44_2, F44_3 };
207 
208 	int indx;
209 
210 	if (shapingtype < 0) shapingtype = 0;
211 	if (shapingtype > 3) shapingtype = 3;
212 	d->ShapingType = (NoiseShaping)shapingtype;
213 	indx = bits - 11 - shapingtype;
214 	if (indx < 0) indx = 0;
215 	if (indx > 9) indx = 9;
216 
217 	memset ( d->ErrorHistory , 0, sizeof (d->ErrorHistory ) );
218 	memset ( d->DitherHistory, 0, sizeof (d->DitherHistory) );
219 
220 	d->FilterCoeff = F [shapingtype];
221 	d->Mask   = ((FLAC__uint64)-1) << (32 - bits);
222 	d->Add    = 0.5     * ((1L << (32 - bits)) - 1);
223 	d->Dither = 0.01f*default_dither[indx] / (((FLAC__int64)1) << bits);
224 	d->LastHistoryIndex = 0;
225 }
226 
227 static inline int64_t
ROUND64(DitherContext * d,double x)228 ROUND64 (DitherContext *d, double x)
229 {
230 	union {
231 		double d;
232 		int64_t i;
233 	} doubletmp;
234 
235     doubletmp.d = x + d->Add + (int64_t)FLAC__I64L(0x001FFFFD80000000);
236 
237     return doubletmp.i - (int64_t)FLAC__I64L(0x433FFFFD80000000);
238 }
239 
240 /*
241  * the following is based on parts of wavegain.c
242  */
243 
dither_output_(DitherContext * d,FLAC__bool do_dithering,int shapingtype,int i,double Sum,int k)244 static int64_t dither_output_(DitherContext *d, FLAC__bool do_dithering, int shapingtype, int i, double Sum, int k)
245 {
246 	double Sum2;
247 	int64_t val;
248 
249 	if(do_dithering) {
250 		if(shapingtype == 0) {
251 			double  tmp = random_equi_(d->Dither);
252 			Sum2 = tmp - d->LastRandomNumber [k];
253 			d->LastRandomNumber [k] = (int)tmp;
254 			Sum2 = Sum += Sum2;
255 			val = ROUND64(d, Sum2) & d->Mask;
256 		}
257 		else {
258 			Sum2 = random_triangular_(d->Dither) - scalar16_(d->DitherHistory[k], d->FilterCoeff + i);
259 			Sum += d->DitherHistory [k] [(-1-i)&15] = (float)Sum2;
260 			Sum2 = Sum + scalar16_(d->ErrorHistory [k], d->FilterCoeff + i);
261 			val = ROUND64(d, Sum2) & d->Mask;
262 			d->ErrorHistory [k] [(-1-i)&15] = (float)(Sum - val);
263 		}
264 		return val;
265 	}
266 
267 	return ROUND64(d, Sum);
268 }
269 
270 #if 0
271 	float        peak = 0.f,
272 	             new_peak,
273 	             factor_clip
274 	double       scale,
275 	             dB;
276 
277 	...
278 
279 	peak is in the range -32768.0 .. 32767.0
280 
281 	/* calculate factors for ReplayGain and ClippingPrevention */
282 	*track_gain = GetTitleGain() + settings->man_gain;
283 	scale = (float) pow(10., *track_gain * 0.05);
284 	if(settings->clip_prev) {
285 		factor_clip  = (float) (32767./( peak + 1));
286 		if(scale < factor_clip)
287 			factor_clip = 1.f;
288 		else
289 			factor_clip /= scale;
290 		scale *= factor_clip;
291 	}
292 	new_peak = (float) peak * scale;
293 
294 	dB = 20. * log10(scale);
295 	*track_gain = (float) dB;
296 
297  	const double scale = pow(10., (double)gain * 0.05);
298 #endif
299 
300 
FLAC__replaygain_synthesis__apply_gain(FLAC__byte * data_out,FLAC__bool little_endian_data_out,FLAC__bool uint32_t_data_out,const FLAC__int32 * const input[],uint32_t wide_samples,uint32_t channels,const uint32_t source_bps,const uint32_t target_bps,const double scale,const FLAC__bool hard_limit,FLAC__bool do_dithering,DitherContext * dither_context)301 size_t FLAC__replaygain_synthesis__apply_gain(FLAC__byte *data_out, FLAC__bool little_endian_data_out, FLAC__bool uint32_t_data_out, const FLAC__int32 * const input[], uint32_t wide_samples, uint32_t channels, const uint32_t source_bps, const uint32_t target_bps, const double scale, const FLAC__bool hard_limit, FLAC__bool do_dithering, DitherContext *dither_context)
302 {
303 	static const FLAC__int64 hard_clip_factors_[33] = {
304 		0, /* 0 bits-per-sample (not supported) */
305 		0, /* 1 bits-per-sample (not supported) */
306 		0, /* 2 bits-per-sample (not supported) */
307 		0, /* 3 bits-per-sample (not supported) */
308 		-8, /* 4 bits-per-sample */
309 		-16, /* 5 bits-per-sample */
310 		-32, /* 6 bits-per-sample */
311 		-64, /* 7 bits-per-sample */
312 		-128, /* 8 bits-per-sample */
313 		-256, /* 9 bits-per-sample */
314 		-512, /* 10 bits-per-sample */
315 		-1024, /* 11 bits-per-sample */
316 		-2048, /* 12 bits-per-sample */
317 		-4096, /* 13 bits-per-sample */
318 		-8192, /* 14 bits-per-sample */
319 		-16384, /* 15 bits-per-sample */
320 		-32768, /* 16 bits-per-sample */
321 		-65536, /* 17 bits-per-sample */
322 		-131072, /* 18 bits-per-sample */
323 		-262144, /* 19 bits-per-sample */
324 		-524288, /* 20 bits-per-sample */
325 		-1048576, /* 21 bits-per-sample */
326 		-2097152, /* 22 bits-per-sample */
327 		-4194304, /* 23 bits-per-sample */
328 		-8388608, /* 24 bits-per-sample */
329 		-16777216, /* 25 bits-per-sample */
330 		-33554432, /* 26 bits-per-sample */
331 		-67108864, /* 27 bits-per-sample */
332 		-134217728, /* 28 bits-per-sample */
333 		-268435456, /* 29 bits-per-sample */
334 		-536870912, /* 30 bits-per-sample */
335 		-1073741824, /* 31 bits-per-sample */
336 		(FLAC__int64)(-1073741824) * 2 /* 32 bits-per-sample */
337 	};
338 	const FLAC__int32 conv_shift = 32 - target_bps;
339 	const FLAC__int64 hard_clip_factor = hard_clip_factors_[target_bps];
340 	/*
341 	 * The integer input coming in has a varying range based on the
342 	 * source_bps.  We want to normalize it to [-1.0, 1.0) so instead
343 	 * of doing two multiplies on each sample, we just multiple
344 	 * 'scale' by 1/(2^(source_bps-1))
345 	 */
346 	const double multi_scale = scale / (double)(1u << (source_bps-1));
347 
348 	FLAC__byte * const start = data_out;
349 	uint32_t i, channel;
350 	const FLAC__int32 *input_;
351 	double sample;
352 	const uint32_t bytes_per_sample = target_bps / 8;
353 	const uint32_t last_history_index = dither_context->LastHistoryIndex;
354 	NoiseShaping noise_shaping = dither_context->ShapingType;
355 	FLAC__int64 val64;
356 	FLAC__int32 val32;
357 	FLAC__int32 uval32;
358 	const FLAC__uint32 twiggle = 1u << (target_bps - 1);
359 
360 	FLAC__ASSERT(channels > 0 && channels <= FLAC_SHARE__MAX_SUPPORTED_CHANNELS);
361 	FLAC__ASSERT(source_bps >= 4);
362 	FLAC__ASSERT(target_bps >= 4);
363 	FLAC__ASSERT(source_bps <= 32);
364 	FLAC__ASSERT(target_bps < 32);
365 	FLAC__ASSERT((target_bps & 7) == 0);
366 
367 	for(channel = 0; channel < channels; channel++) {
368 		const uint32_t incr = bytes_per_sample * channels;
369 		data_out = start + bytes_per_sample * channel;
370 		input_ = input[channel];
371 		for(i = 0; i < wide_samples; i++, data_out += incr) {
372 			sample = (double)input_[i] * multi_scale;
373 
374 			if(hard_limit) {
375 				/* hard 6dB limiting */
376 				if(sample < -0.5)
377 					sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
378 				else if(sample > 0.5)
379 					sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
380 			}
381 			sample *= 2147483647.;
382 
383 			val64 = dither_output_(dither_context, do_dithering, noise_shaping, (i + last_history_index) % 32, sample, channel) >> conv_shift;
384 
385 			val32 = (FLAC__int32)val64;
386 			if(val64 >= -hard_clip_factor)
387 				val32 = (FLAC__int32)(-(hard_clip_factor+1));
388 			else if(val64 < hard_clip_factor)
389 				val32 = (FLAC__int32)hard_clip_factor;
390 
391 			uval32 = (FLAC__uint32)val32;
392 			if (uint32_t_data_out)
393 				uval32 ^= twiggle;
394 
395 			if (little_endian_data_out) {
396 				switch(target_bps) {
397 					case 24:
398 						data_out[2] = (FLAC__byte)(uval32 >> 16);
399 						/* fall through */
400 					case 16:
401 						data_out[1] = (FLAC__byte)(uval32 >> 8);
402 						/* fall through */
403 					case 8:
404 						data_out[0] = (FLAC__byte)uval32;
405 						break;
406 				}
407 			}
408 			else {
409 				switch(target_bps) {
410 					case 24:
411 						data_out[0] = (FLAC__byte)(uval32 >> 16);
412 						data_out[1] = (FLAC__byte)(uval32 >> 8);
413 						data_out[2] = (FLAC__byte)uval32;
414 						break;
415 					case 16:
416 						data_out[0] = (FLAC__byte)(uval32 >> 8);
417 						data_out[1] = (FLAC__byte)uval32;
418 						break;
419 					case 8:
420 						data_out[0] = (FLAC__byte)uval32;
421 						break;
422 				}
423 			}
424 		}
425 	}
426 	dither_context->LastHistoryIndex = (last_history_index + wide_samples) % 32;
427 
428 	return wide_samples * channels * (target_bps/8);
429 }
430