1 /* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6 #include <math.h> /* for abs() */
7 #include <stdio.h> /* for printf() */
8 #include <string.h> /* for memset() */
9 #include <stdint.h> /* for uint64 definition */
10 #include <stdlib.h> /* for exit() definition */
11 #include <time.h> /* for clock_gettime */
12
13 #include "../drc_math.h"
14 #include "../dsp_util.h"
15
16
17 /* Constant for converting time to milliseconds. */
18 #define BILLION 1000000000LL
19 /* Number of iterations for performance testing. */
20 #define ITERATIONS 400000
21
22 #if defined(__aarch64__)
float_to_short(float a)23 int16_t float_to_short(float a) {
24 int32_t ret;
25 asm volatile ("fcvtas %s[ret], %s[a]\n"
26 "sqxtn %h[ret], %s[ret]\n"
27 : [ret] "=w" (ret)
28 : [a] "w" (a)
29 :);
30 return (int16_t)(ret);
31 }
32 #else
float_to_short(float a)33 int16_t float_to_short(float a) {
34 a += (a >= 0) ? 0.5f : -0.5f;
35 return (int16_t)(max(-32768, min(32767, a)));
36 }
37 #endif
38
dsp_util_deinterleave_reference(int16_t * input,float * const * output,int channels,int frames)39 void dsp_util_deinterleave_reference(int16_t *input, float *const *output,
40 int channels, int frames)
41 {
42 float *output_ptr[channels];
43 int i, j;
44
45 for (i = 0; i < channels; i++)
46 output_ptr[i] = output[i];
47
48 for (i = 0; i < frames; i++)
49 for (j = 0; j < channels; j++)
50 *(output_ptr[j]++) = *input++ / 32768.0f;
51 }
52
dsp_util_interleave_reference(float * const * input,int16_t * output,int channels,int frames)53 void dsp_util_interleave_reference(float *const *input, int16_t *output,
54 int channels, int frames)
55 {
56 float *input_ptr[channels];
57 int i, j;
58
59 for (i = 0; i < channels; i++)
60 input_ptr[i] = input[i];
61
62 for (i = 0; i < frames; i++)
63 for (j = 0; j < channels; j++) {
64 float f = *(input_ptr[j]++) * 32768.0f;
65 *output++ = float_to_short(f);
66 }
67 }
68
69 /* Use fixed size allocation to avoid performance fluctuation of allocation. */
70 #define MAXSAMPLES 4096
71 #define MINSAMPLES 256
72 /* PAD buffer to check for overflows. */
73 #define PAD 4096
74
TestRounding(float in,int16_t expected,int samples)75 void TestRounding(float in, int16_t expected, int samples)
76 {
77 int i;
78 int max_diff;
79 int d;
80
81 short* in_shorts = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
82 float* out_floats_left_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
83 float* out_floats_right_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
84 float* out_floats_left_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
85 float* out_floats_right_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
86 short* out_shorts_c = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
87 short* out_shorts_opt = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
88
89 memset(in_shorts, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
90 memset(out_floats_left_c, 0xfb, MAXSAMPLES * 4 + PAD);
91 memset(out_floats_right_c, 0xfb, MAXSAMPLES * 4 + PAD);
92 memset(out_floats_left_opt, 0xfb, MAXSAMPLES * 4 + PAD);
93 memset(out_floats_right_opt, 0xfb, MAXSAMPLES * 4 + PAD);
94 memset(out_shorts_c, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
95 memset(out_shorts_opt, 0xfb, MAXSAMPLES * 2 * 2 + PAD);
96
97 float *out_floats_ptr_c[2];
98 float *out_floats_ptr_opt[2];
99
100 out_floats_ptr_c[0] = out_floats_left_c;
101 out_floats_ptr_c[1] = out_floats_right_c;
102 out_floats_ptr_opt[0] = out_floats_left_opt;
103 out_floats_ptr_opt[1] = out_floats_right_opt;
104
105 for (i = 0; i < MAXSAMPLES; ++i) {
106 out_floats_left_c[i] = in;
107 out_floats_right_c[i] = in;
108 }
109
110 /* reference C interleave */
111 dsp_util_interleave_reference(out_floats_ptr_c, out_shorts_c, 2,
112 samples);
113
114 /* measure optimized interleave */
115 for (i = 0; i < ITERATIONS; ++i) {
116 dsp_util_interleave(out_floats_ptr_c, (uint8_t *)out_shorts_opt,
117 2, SND_PCM_FORMAT_S16_LE, samples);
118 }
119
120 max_diff = 0;
121 for (i = 0; i < (MAXSAMPLES * 2 + PAD / 2); ++i) {
122 d = abs(out_shorts_c[i] - out_shorts_opt[i]);
123 if (d > max_diff) {
124 max_diff = d;
125 }
126 }
127 printf("test interleave compare %6d, %10f %13f %6d %6d %6d %s\n",
128 max_diff, in, in * 32768.0f, out_shorts_c[0], out_shorts_opt[0],
129 expected,
130 max_diff == 0 ? "PASS" : (out_shorts_opt[0] == expected ?
131 "EXPECTED DIFFERENCE" : "UNEXPECTED DIFFERENCE"));
132
133 /* measure reference C deinterleave */
134 dsp_util_deinterleave_reference(in_shorts, out_floats_ptr_c, 2,
135 samples);
136
137 /* measure optimized deinterleave */
138 dsp_util_deinterleave((uint8_t *)in_shorts, out_floats_ptr_opt, 2,
139 SND_PCM_FORMAT_S16_LE, samples);
140
141 d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0], samples * 4);
142 if (d) printf("left compare %d, %f %f\n", d, out_floats_ptr_c[0][0],
143 out_floats_ptr_opt[0][0]);
144 d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1], samples * 4);
145 if (d) printf("right compare %d, %f %f\n", d, out_floats_ptr_c[1][0],
146 out_floats_ptr_opt[1][0]);
147
148 free(in_shorts);
149 free(out_floats_left_c);
150 free(out_floats_right_c);
151 free(out_floats_left_opt);
152 free(out_floats_right_opt);
153 free(out_shorts_c);
154 free(out_shorts_opt);
155 }
156
main(int argc,char ** argv)157 int main(int argc, char **argv)
158 {
159 float e = 0.000000001f;
160 int samples = 16;
161
162 dsp_enable_flush_denormal_to_zero();
163
164 // Print headings for TestRounding output.
165 printf("test interleave compare maxdif, float, float * 32k "
166 "C SIMD expect pass\n");
167
168 // test clamping
169 TestRounding(1.0f, 32767, samples);
170 TestRounding(-1.0f, -32768, samples);
171 TestRounding(1.1f, 32767, samples);
172 TestRounding(-1.1f, -32768, samples);
173 TestRounding(2000000000.f / 32768.f, 32767, samples);
174 TestRounding(-2000000000.f / 32768.f, -32768, samples);
175
176 /* Infinity produces zero on arm64. */
177 #if defined(__aarch64__)
178 #define EXPECTED_INF_RESULT 0
179 #define EXPECTED_NEGINF_RESULT 0
180 #elif defined(__i386__) || defined(__x86_64__)
181 #define EXPECTED_INF_RESULT -32768
182 #define EXPECTED_NEGINF_RESULT 0
183 #else
184 #define EXPECTED_INF_RESULT 32767
185 #define EXPECTED_NEGINF_RESULT -32768
186 #endif
187
188 TestRounding(5000000000.f / 32768.f, EXPECTED_INF_RESULT, samples);
189 TestRounding(-5000000000.f / 32768.f, EXPECTED_NEGINF_RESULT, samples);
190
191 // test infinity
192 union ieee754_float inf;
193 inf.ieee.negative = 0;
194 inf.ieee.exponent = 0xfe;
195 inf.ieee.mantissa = 0x7fffff;
196 TestRounding(inf.f, EXPECTED_INF_RESULT, samples); // expect fail
197 inf.ieee.negative = 1;
198 inf.ieee.exponent = 0xfe;
199 inf.ieee.mantissa = 0x7fffff;
200 TestRounding(inf.f, EXPECTED_NEGINF_RESULT, samples); // expect fail
201
202 // test rounding
203 TestRounding(0.25f, 8192, samples);
204 TestRounding(-0.25f, -8192, samples);
205 TestRounding(0.50f, 16384, samples);
206 TestRounding(-0.50f, -16384, samples);
207 TestRounding(1.0f / 32768.0f, 1, samples);
208 TestRounding(-1.0f / 32768.0f, -1, samples);
209 TestRounding(1.0f / 32768.0f + e, 1, samples);
210 TestRounding(-1.0f / 32768.0f - e, -1, samples);
211 TestRounding(1.0f / 32768.0f - e, 1, samples);
212 TestRounding(-1.0f / 32768.0f + e, -1, samples);
213
214 /* Rounding on 'tie' is different for Intel. */
215 #if defined(__i386__) || defined(__x86_64__)
216 TestRounding(0.5f / 32768.0f, 0, samples); /* Expect round to even */
217 TestRounding(-0.5f / 32768.0f, 0, samples);
218 #else
219 TestRounding(0.5f / 32768.0f, 1, samples); /* Expect round away */
220 TestRounding(-0.5f / 32768.0f, -1, samples);
221 #endif
222
223 TestRounding(0.5f / 32768.0f + e, 1, samples);
224 TestRounding(-0.5f / 32768.0f - e, 1, samples);
225 TestRounding(0.5f / 32768.0f - e, 0, samples);
226 TestRounding(-0.5f / 32768.0f + e, 0, samples);
227
228 TestRounding(1.5f / 32768.0f, 2, samples);
229 TestRounding(-1.5f / 32768.0f, -2, samples);
230 TestRounding(1.5f / 32768.0f + e, 2, samples);
231 TestRounding(-1.5f / 32768.0f - e, -2, samples);
232 TestRounding(1.5f / 32768.0f - e, 1, samples);
233 TestRounding(-1.5f / 32768.0f + e, -1, samples);
234
235 /* Test denormals */
236 union ieee754_float denorm;
237 denorm.ieee.negative = 0;
238 denorm.ieee.exponent = 0;
239 denorm.ieee.mantissa = 1;
240 TestRounding(denorm.f, 0, samples);
241 denorm.ieee.negative = 1;
242 denorm.ieee.exponent = 0;
243 denorm.ieee.mantissa = 1;
244 TestRounding(denorm.f, 0, samples);
245
246 /* Test NaNs. Caveat Results vary by implementation. */
247 #if defined(__i386__) || defined(__x86_64__)
248 #define EXPECTED_NAN_RESULT -32768
249 #else
250 #define EXPECTED_NAN_RESULT 0
251 #endif
252 union ieee754_float nan; /* Quiet NaN */
253 nan.ieee.negative = 0;
254 nan.ieee.exponent = 0xff;
255 nan.ieee.mantissa = 0x400001;
256 TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
257 nan.ieee.negative = 0;
258 nan.ieee.exponent = 0xff;
259 nan.ieee.mantissa = 0x000001; /* Signalling NaN */
260 TestRounding(nan.f, EXPECTED_NAN_RESULT, samples);
261
262 /* Test Performance */
263 uint64_t diff;
264 struct timespec start, end;
265 int i;
266 int d;
267
268 short* in_shorts = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
269 float* out_floats_left_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
270 float* out_floats_right_c = (float*) malloc(MAXSAMPLES * 4 + PAD);
271 float* out_floats_left_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
272 float* out_floats_right_opt = (float*) malloc(MAXSAMPLES * 4 + PAD);
273 short* out_shorts_c = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
274 short* out_shorts_opt = (short*) malloc(MAXSAMPLES * 2 * 2 + PAD);
275
276 memset(in_shorts, 0x11, MAXSAMPLES * 2 * 2 + PAD);
277 memset(out_floats_left_c, 0x22, MAXSAMPLES * 4 + PAD);
278 memset(out_floats_right_c, 0x33, MAXSAMPLES * 4 + PAD);
279 memset(out_floats_left_opt, 0x44, MAXSAMPLES * 4 + PAD);
280 memset(out_floats_right_opt, 0x55, MAXSAMPLES * 4 + PAD);
281 memset(out_shorts_c, 0x66, MAXSAMPLES * 2 * 2 + PAD);
282 memset(out_shorts_opt, 0x66, MAXSAMPLES * 2 * 2 + PAD);
283
284 float *out_floats_ptr_c[2];
285 float *out_floats_ptr_opt[2];
286
287 out_floats_ptr_c[0] = out_floats_left_c;
288 out_floats_ptr_c[1] = out_floats_right_c;
289 out_floats_ptr_opt[0] = out_floats_left_opt;
290 out_floats_ptr_opt[1] = out_floats_right_opt;
291
292 /* Benchmark dsp_util_interleave */
293 for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
294
295 /* measure original C interleave */
296 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
297 for (i = 0; i < ITERATIONS; ++i) {
298 dsp_util_interleave_reference(out_floats_ptr_c,
299 out_shorts_c,
300 2, samples);
301 }
302 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
303 diff = (BILLION * (end.tv_sec - start.tv_sec) +
304 end.tv_nsec - start.tv_nsec) / 1000000;
305 printf("interleave ORIG size = %6d, elapsed time = %llu ms\n",
306 samples, (long long unsigned int) diff);
307
308 /* measure optimized interleave */
309 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
310 for (i = 0; i < ITERATIONS; ++i) {
311 dsp_util_interleave(out_floats_ptr_c,
312 (uint8_t *)out_shorts_opt, 2,
313 SND_PCM_FORMAT_S16_LE, samples);
314 }
315 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
316 diff = (BILLION * (end.tv_sec - start.tv_sec) +
317 end.tv_nsec - start.tv_nsec) / 1000000;
318 printf("interleave SIMD size = %6d, elapsed time = %llu ms\n",
319 samples, (long long unsigned int) diff);
320
321 /* Test C and SIMD output match */
322 d = memcmp(out_shorts_c, out_shorts_opt,
323 MAXSAMPLES * 2 * 2 + PAD);
324 if (d) printf("interleave compare %d, %d %d, %d %d\n", d,
325 out_shorts_c[0], out_shorts_c[1],
326 out_shorts_opt[0], out_shorts_opt[1]);
327 }
328
329 /* Benchmark dsp_util_deinterleave */
330 for (samples = MAXSAMPLES; samples >= MINSAMPLES; samples /= 2) {
331
332 /* Measure original C deinterleave */
333 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
334 for (i = 0; i < ITERATIONS; ++i) {
335 dsp_util_deinterleave_reference(in_shorts,
336 out_floats_ptr_c,
337 2, samples);
338 }
339 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
340 diff = (BILLION * (end.tv_sec - start.tv_sec) +
341 end.tv_nsec - start.tv_nsec) / 1000000;
342 printf("deinterleave ORIG size = %6d, "
343 "elapsed time = %llu ms\n",
344 samples, (long long unsigned int) diff);
345
346 /* Measure optimized deinterleave */
347 clock_gettime(CLOCK_MONOTONIC, &start); /* mark start time */
348 for (i = 0; i < ITERATIONS; ++i) {
349 dsp_util_deinterleave((uint8_t *)in_shorts,
350 out_floats_ptr_opt, 2,
351 SND_PCM_FORMAT_S16_LE, samples);
352 }
353 clock_gettime(CLOCK_MONOTONIC, &end); /* mark the end time */
354 diff = (BILLION * (end.tv_sec - start.tv_sec) +
355 end.tv_nsec - start.tv_nsec) / 1000000;
356 printf("deinterleave SIMD size = %6d, elapsed time = %llu ms\n",
357 samples, (long long unsigned int) diff);
358
359 /* Test C and SIMD output match */
360 d = memcmp(out_floats_ptr_c[0], out_floats_ptr_opt[0],
361 samples * 4);
362 if (d) printf("left compare %d, %f %f\n", d,
363 out_floats_ptr_c[0][0], out_floats_ptr_opt[0][0]);
364 d = memcmp(out_floats_ptr_c[1], out_floats_ptr_opt[1],
365 samples * 4);
366 if (d) printf("right compare %d, %f %f\n", d,
367 out_floats_ptr_c[1][0], out_floats_ptr_opt[1][0]);
368 }
369
370 free(in_shorts);
371 free(out_floats_left_c);
372 free(out_floats_right_c);
373 free(out_floats_left_opt);
374 free(out_floats_right_opt);
375 free(out_shorts_c);
376 free(out_shorts_opt);
377
378 return 0;
379 }
380