1 /*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "AudioResamplerSinc"
18 //#define LOG_NDEBUG 0
19
20 #include <malloc.h>
21 #include <pthread.h>
22 #include <string.h>
23 #include <stdlib.h>
24 #include <dlfcn.h>
25
26 #include <cutils/compiler.h>
27 #include <cutils/properties.h>
28
29 #include <utils/Log.h>
30 #include <audio_utils/primitives.h>
31
32 #include "AudioResamplerSinc.h"
33
34 #if defined(__clang__) && !__has_builtin(__builtin_assume_aligned)
35 #define __builtin_assume_aligned(p, a) \
36 (((uintptr_t(p) % (a)) == 0) ? (p) : (__builtin_unreachable(), (p)))
37 #endif
38
39 #if defined(__arm__) && !defined(__thumb__)
40 #define USE_INLINE_ASSEMBLY (true)
41 #else
42 #define USE_INLINE_ASSEMBLY (false)
43 #endif
44
45 #if defined(__aarch64__) || defined(__ARM_NEON__)
46 #ifndef USE_NEON
47 #define USE_NEON (true)
48 #endif
49 #else
50 #define USE_NEON (false)
51 #endif
52 #if USE_NEON
53 #include <arm_neon.h>
54 #endif
55
56 #define UNUSED(x) ((void)(x))
57
58 namespace android {
59 // ----------------------------------------------------------------------------
60
61
62 /*
63 * These coeficients are computed with the "fir" utility found in
64 * tools/resampler_tools
65 * cmd-line: fir -l 7 -s 48000 -c 20478
66 */
67 const uint32_t AudioResamplerSinc::mFirCoefsUp[] __attribute__ ((aligned (32))) = {
68 #include "AudioResamplerSincUp.h"
69 };
70
71 /*
72 * These coefficients are optimized for 48KHz -> 44.1KHz
73 * cmd-line: fir -l 7 -s 48000 -c 17189
74 */
75 const uint32_t AudioResamplerSinc::mFirCoefsDown[] __attribute__ ((aligned (32))) = {
76 #include "AudioResamplerSincDown.h"
77 };
78
79 // we use 15 bits to interpolate between these samples
80 // this cannot change because the mul below rely on it.
81 static const int pLerpBits = 15;
82
83 static pthread_once_t once_control = PTHREAD_ONCE_INIT;
84 static readCoefficientsFn readResampleCoefficients = NULL;
85
86 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::highQualityConstants;
87 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::veryHighQualityConstants;
88
init_routine()89 void AudioResamplerSinc::init_routine()
90 {
91 // for high quality resampler, the parameters for coefficients are compile-time constants
92 Constants *c = &highQualityConstants;
93 c->coefsBits = RESAMPLE_FIR_LERP_INT_BITS;
94 c->cShift = kNumPhaseBits - c->coefsBits;
95 c->cMask = ((1<< c->coefsBits)-1) << c->cShift;
96 c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
97 c->pMask = ((1<< pLerpBits)-1) << c->pShift;
98 c->halfNumCoefs = RESAMPLE_FIR_NUM_COEF;
99
100 // for very high quality resampler, the parameters are load-time constants
101 veryHighQualityConstants = highQualityConstants;
102
103 // Open the dll to get the coefficients for VERY_HIGH_QUALITY
104 void *resampleCoeffLib = dlopen("libaudio-resampler.so", RTLD_NOW);
105 ALOGV("Open libaudio-resampler library = %p", resampleCoeffLib);
106 if (resampleCoeffLib == NULL) {
107 ALOGE("Could not open audio-resampler library: %s", dlerror());
108 return;
109 }
110
111 readResampleFirNumCoeffFn readResampleFirNumCoeff;
112 readResampleFirLerpIntBitsFn readResampleFirLerpIntBits;
113
114 readResampleCoefficients = (readCoefficientsFn)
115 dlsym(resampleCoeffLib, "readResamplerCoefficients");
116 readResampleFirNumCoeff = (readResampleFirNumCoeffFn)
117 dlsym(resampleCoeffLib, "readResampleFirNumCoeff");
118 readResampleFirLerpIntBits = (readResampleFirLerpIntBitsFn)
119 dlsym(resampleCoeffLib, "readResampleFirLerpIntBits");
120
121 if (!readResampleCoefficients || !readResampleFirNumCoeff || !readResampleFirLerpIntBits) {
122 readResampleCoefficients = NULL;
123 dlclose(resampleCoeffLib);
124 resampleCoeffLib = NULL;
125 ALOGE("Could not find symbol: %s", dlerror());
126 return;
127 }
128
129 c = &veryHighQualityConstants;
130 c->coefsBits = readResampleFirLerpIntBits();
131 c->cShift = kNumPhaseBits - c->coefsBits;
132 c->cMask = ((1<<c->coefsBits)-1) << c->cShift;
133 c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
134 c->pMask = ((1<<pLerpBits)-1) << c->pShift;
135 // number of zero-crossing on each side
136 c->halfNumCoefs = readResampleFirNumCoeff();
137 ALOGV("coefsBits = %d", c->coefsBits);
138 ALOGV("halfNumCoefs = %d", c->halfNumCoefs);
139 // note that we "leak" resampleCoeffLib until the process exits
140 }
141
142 // ----------------------------------------------------------------------------
143
144 #if !USE_NEON
145
146 static inline
mulRL(int left,int32_t in,uint32_t vRL)147 int32_t mulRL(int left, int32_t in, uint32_t vRL)
148 {
149 #if USE_INLINE_ASSEMBLY
150 int32_t out;
151 if (left) {
152 asm( "smultb %[out], %[in], %[vRL] \n"
153 : [out]"=r"(out)
154 : [in]"%r"(in), [vRL]"r"(vRL)
155 : );
156 } else {
157 asm( "smultt %[out], %[in], %[vRL] \n"
158 : [out]"=r"(out)
159 : [in]"%r"(in), [vRL]"r"(vRL)
160 : );
161 }
162 return out;
163 #else
164 int16_t v = left ? int16_t(vRL) : int16_t(vRL>>16);
165 return int32_t((int64_t(in) * v) >> 16);
166 #endif
167 }
168
169 static inline
mulAdd(int16_t in,int32_t v,int32_t a)170 int32_t mulAdd(int16_t in, int32_t v, int32_t a)
171 {
172 #if USE_INLINE_ASSEMBLY
173 int32_t out;
174 asm( "smlawb %[out], %[v], %[in], %[a] \n"
175 : [out]"=r"(out)
176 : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
177 : );
178 return out;
179 #else
180 return a + int32_t((int64_t(v) * in) >> 16);
181 #endif
182 }
183
184 static inline
mulAddRL(int left,uint32_t inRL,int32_t v,int32_t a)185 int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a)
186 {
187 #if USE_INLINE_ASSEMBLY
188 int32_t out;
189 if (left) {
190 asm( "smlawb %[out], %[v], %[inRL], %[a] \n"
191 : [out]"=r"(out)
192 : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
193 : );
194 } else {
195 asm( "smlawt %[out], %[v], %[inRL], %[a] \n"
196 : [out]"=r"(out)
197 : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
198 : );
199 }
200 return out;
201 #else
202 int16_t s = left ? int16_t(inRL) : int16_t(inRL>>16);
203 return a + int32_t((int64_t(v) * s) >> 16);
204 #endif
205 }
206
207 #endif // !USE_NEON
208
209 // ----------------------------------------------------------------------------
210
AudioResamplerSinc(int inChannelCount,int32_t sampleRate,src_quality quality)211 AudioResamplerSinc::AudioResamplerSinc(
212 int inChannelCount, int32_t sampleRate, src_quality quality)
213 : AudioResampler(inChannelCount, sampleRate, quality),
214 mState(0), mImpulse(0), mRingFull(0), mFirCoefs(0)
215 {
216 /*
217 * Layout of the state buffer for 32 tap:
218 *
219 * "present" sample beginning of 2nd buffer
220 * v v
221 * 0 01 2 23 3
222 * 0 F0 0 F0 F
223 * [pppppppppppppppInnnnnnnnnnnnnnnnpppppppppppppppInnnnnnnnnnnnnnnn]
224 * ^ ^ head
225 *
226 * p = past samples, convoluted with the (p)ositive side of sinc()
227 * n = future samples, convoluted with the (n)egative side of sinc()
228 * r = extra space for implementing the ring buffer
229 *
230 */
231
232 mVolumeSIMD[0] = 0;
233 mVolumeSIMD[1] = 0;
234
235 // Load the constants for coefficients
236 int ok = pthread_once(&once_control, init_routine);
237 if (ok != 0) {
238 ALOGE("%s pthread_once failed: %d", __func__, ok);
239 }
240 mConstants = (quality == VERY_HIGH_QUALITY) ?
241 &veryHighQualityConstants : &highQualityConstants;
242 }
243
244
~AudioResamplerSinc()245 AudioResamplerSinc::~AudioResamplerSinc() {
246 free(mState);
247 }
248
init()249 void AudioResamplerSinc::init() {
250 const Constants& c(*mConstants);
251 const size_t numCoefs = 2 * c.halfNumCoefs;
252 const size_t stateSize = numCoefs * mChannelCount * 2;
253 mState = (int16_t*)memalign(32, stateSize*sizeof(int16_t));
254 memset(mState, 0, sizeof(int16_t)*stateSize);
255 mImpulse = mState + (c.halfNumCoefs-1)*mChannelCount;
256 mRingFull = mImpulse + (numCoefs+1)*mChannelCount;
257 }
258
setVolume(float left,float right)259 void AudioResamplerSinc::setVolume(float left, float right) {
260 AudioResampler::setVolume(left, right);
261 // convert to U4_28 (rounding down).
262 // integer volume values are clamped to 0 to UNITY_GAIN.
263 mVolumeSIMD[0] = u4_28_from_float(clampFloatVol(left));
264 mVolumeSIMD[1] = u4_28_from_float(clampFloatVol(right));
265 }
266
resample(int32_t * out,size_t outFrameCount,AudioBufferProvider * provider)267 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
268 AudioBufferProvider* provider)
269 {
270 // FIXME store current state (up or down sample) and only load the coefs when the state
271 // changes. Or load two pointers one for up and one for down in the init function.
272 // Not critical now since the read functions are fast, but would be important if read was slow.
273 if (mConstants == &veryHighQualityConstants && readResampleCoefficients) {
274 mFirCoefs = readResampleCoefficients( mInSampleRate <= mSampleRate );
275 } else {
276 mFirCoefs = (const int32_t *)
277 ((mInSampleRate <= mSampleRate) ? mFirCoefsUp : mFirCoefsDown);
278 }
279
280 // select the appropriate resampler
281 switch (mChannelCount) {
282 case 1:
283 return resample<1>(out, outFrameCount, provider);
284 case 2:
285 return resample<2>(out, outFrameCount, provider);
286 default:
287 LOG_ALWAYS_FATAL("invalid channel count: %d", mChannelCount);
288 return 0;
289 }
290 }
291
292
293 template<int CHANNELS>
resample(int32_t * out,size_t outFrameCount,AudioBufferProvider * provider)294 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
295 AudioBufferProvider* provider)
296 {
297 const Constants& c(*mConstants);
298 const size_t headOffset = c.halfNumCoefs*CHANNELS;
299 int16_t* impulse = mImpulse;
300 uint32_t vRL = mVolumeRL;
301 size_t inputIndex = mInputIndex;
302 uint32_t phaseFraction = mPhaseFraction;
303 uint32_t phaseIncrement = mPhaseIncrement;
304 size_t outputIndex = 0;
305 size_t outputSampleCount = outFrameCount * 2;
306 size_t inFrameCount = getInFrameCountRequired(outFrameCount);
307
308 while (outputIndex < outputSampleCount) {
309 // buffer is empty, fetch a new one
310 while (mBuffer.frameCount == 0) {
311 mBuffer.frameCount = inFrameCount;
312 provider->getNextBuffer(&mBuffer);
313 if (mBuffer.raw == NULL) {
314 goto resample_exit;
315 }
316 const uint32_t phaseIndex = phaseFraction >> kNumPhaseBits;
317 if (phaseIndex == 1) {
318 // read one frame
319 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
320 } else if (phaseIndex == 2) {
321 // read 2 frames
322 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
323 inputIndex++;
324 if (inputIndex >= mBuffer.frameCount) {
325 inputIndex -= mBuffer.frameCount;
326 provider->releaseBuffer(&mBuffer);
327 } else {
328 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
329 }
330 }
331 }
332 int16_t const * const in = mBuffer.i16;
333 const size_t frameCount = mBuffer.frameCount;
334
335 // Always read-in the first samples from the input buffer
336 int16_t* head = impulse + headOffset;
337 for (size_t i=0 ; i<CHANNELS ; i++) {
338 head[i] = in[inputIndex*CHANNELS + i];
339 }
340
341 // handle boundary case
342 while (CC_LIKELY(outputIndex < outputSampleCount)) {
343 filterCoefficient<CHANNELS>(&out[outputIndex], phaseFraction, impulse, vRL);
344 outputIndex += 2;
345
346 phaseFraction += phaseIncrement;
347 const size_t phaseIndex = phaseFraction >> kNumPhaseBits;
348 for (size_t i=0 ; i<phaseIndex ; i++) {
349 inputIndex++;
350 if (inputIndex >= frameCount) {
351 goto done; // need a new buffer
352 }
353 read<CHANNELS>(impulse, phaseFraction, in, inputIndex);
354 }
355 }
356 done:
357 // if done with buffer, save samples
358 if (inputIndex >= frameCount) {
359 inputIndex -= frameCount;
360 provider->releaseBuffer(&mBuffer);
361 }
362 }
363
364 resample_exit:
365 mImpulse = impulse;
366 mInputIndex = inputIndex;
367 mPhaseFraction = phaseFraction;
368 return outputIndex / CHANNELS;
369 }
370
371 template<int CHANNELS>
372 /***
373 * read()
374 *
375 * This function reads only one frame from input buffer and writes it in
376 * state buffer
377 *
378 **/
read(int16_t * & impulse,uint32_t & phaseFraction,const int16_t * in,size_t inputIndex)379 void AudioResamplerSinc::read(
380 int16_t*& impulse, uint32_t& phaseFraction,
381 const int16_t* in, size_t inputIndex)
382 {
383 impulse += CHANNELS;
384 phaseFraction -= 1LU<<kNumPhaseBits;
385
386 const Constants& c(*mConstants);
387 if (CC_UNLIKELY(impulse >= mRingFull)) {
388 const size_t stateSize = (c.halfNumCoefs*2)*CHANNELS;
389 memcpy(mState, mState+stateSize, sizeof(int16_t)*stateSize);
390 impulse -= stateSize;
391 }
392
393 int16_t* head = impulse + c.halfNumCoefs*CHANNELS;
394 for (size_t i=0 ; i<CHANNELS ; i++) {
395 head[i] = in[inputIndex*CHANNELS + i];
396 }
397 }
398
399 template<int CHANNELS>
filterCoefficient(int32_t * out,uint32_t phase,const int16_t * samples,uint32_t vRL)400 void AudioResamplerSinc::filterCoefficient(int32_t* out, uint32_t phase,
401 const int16_t *samples, uint32_t vRL)
402 {
403 // NOTE: be very careful when modifying the code here. register
404 // pressure is very high and a small change might cause the compiler
405 // to generate far less efficient code.
406 // Always validate the result with objdump or test-resample.
407
408 // compute the index of the coefficient on the positive side and
409 // negative side
410 const Constants& c(*mConstants);
411 const int32_t ONE = c.cMask | c.pMask;
412 uint32_t indexP = ( phase & c.cMask) >> c.cShift;
413 uint32_t lerpP = ( phase & c.pMask) >> c.pShift;
414 uint32_t indexN = ((ONE-phase) & c.cMask) >> c.cShift;
415 uint32_t lerpN = ((ONE-phase) & c.pMask) >> c.pShift;
416
417 const size_t offset = c.halfNumCoefs;
418 indexP *= offset;
419 indexN *= offset;
420
421 int32_t const* coefsP = mFirCoefs + indexP;
422 int32_t const* coefsN = mFirCoefs + indexN;
423 int16_t const* sP = samples;
424 int16_t const* sN = samples + CHANNELS;
425
426 size_t count = offset;
427
428 #if !USE_NEON
429 int32_t l = 0;
430 int32_t r = 0;
431 for (size_t i=0 ; i<count ; i++) {
432 interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP);
433 sP -= CHANNELS;
434 interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN);
435 sN += CHANNELS;
436 }
437 out[0] += 2 * mulRL(1, l, vRL);
438 out[1] += 2 * mulRL(0, r, vRL);
439 #else
440 UNUSED(vRL);
441 if (CHANNELS == 1) {
442 int32_t const* coefsP1 = coefsP + offset;
443 int32_t const* coefsN1 = coefsN + offset;
444 sP -= CHANNELS*3;
445
446 int32x4_t sum;
447 int32x2_t lerpPN;
448 lerpPN = vdup_n_s32(0);
449 lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
450 lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
451 lerpPN = vshl_n_s32(lerpPN, 16);
452 sum = vdupq_n_s32(0);
453
454 int16x4_t sampleP, sampleN;
455 int32x4_t samplePExt, sampleNExt;
456 int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
457
458 coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
459 coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
460 coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
461 coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
462 for (; count > 0; count -= 4) {
463 sampleP = vld1_s16(sP);
464 sampleN = vld1_s16(sN);
465 coefsPV0 = vld1q_s32(coefsP);
466 coefsNV0 = vld1q_s32(coefsN);
467 coefsPV1 = vld1q_s32(coefsP1);
468 coefsNV1 = vld1q_s32(coefsN1);
469 sP -= 4;
470 sN += 4;
471 coefsP += 4;
472 coefsN += 4;
473 coefsP1 += 4;
474 coefsN1 += 4;
475
476 sampleP = vrev64_s16(sampleP);
477
478 // interpolate (step1)
479 coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
480 coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
481 samplePExt = vshll_n_s16(sampleP, 15);
482 // interpolate (step2)
483 coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
484 coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
485 sampleNExt = vshll_n_s16(sampleN, 15);
486 // interpolate (step3)
487 coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
488 coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
489
490 samplePExt = vqrdmulhq_s32(samplePExt, coefsPV0);
491 sampleNExt = vqrdmulhq_s32(sampleNExt, coefsNV0);
492 sum = vaddq_s32(sum, samplePExt);
493 sum = vaddq_s32(sum, sampleNExt);
494 }
495 int32x2_t volumesV, outV;
496 volumesV = vld1_s32(mVolumeSIMD);
497 outV = vld1_s32(out);
498
499 //add all 4 partial sums
500 int32x2_t sumLow, sumHigh;
501 sumLow = vget_low_s32(sum);
502 sumHigh = vget_high_s32(sum);
503 sumLow = vpadd_s32(sumLow, sumHigh);
504 sumLow = vpadd_s32(sumLow, sumLow);
505
506 sumLow = vqrdmulh_s32(sumLow, volumesV);
507 outV = vadd_s32(outV, sumLow);
508 vst1_s32(out, outV);
509 } else if (CHANNELS == 2) {
510 int32_t const* coefsP1 = coefsP + offset;
511 int32_t const* coefsN1 = coefsN + offset;
512 sP -= CHANNELS*3;
513
514 int32x4_t sum0, sum1;
515 int32x2_t lerpPN;
516
517 lerpPN = vdup_n_s32(0);
518 lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
519 lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
520 lerpPN = vshl_n_s32(lerpPN, 16);
521 sum0 = vdupq_n_s32(0);
522 sum1 = vdupq_n_s32(0);
523
524 int16x4x2_t sampleP, sampleN;
525 int32x4x2_t samplePExt, sampleNExt;
526 int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
527
528 coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
529 coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
530 coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
531 coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
532 for (; count > 0; count -= 4) {
533 sampleP = vld2_s16(sP);
534 sampleN = vld2_s16(sN);
535 coefsPV0 = vld1q_s32(coefsP);
536 coefsNV0 = vld1q_s32(coefsN);
537 coefsPV1 = vld1q_s32(coefsP1);
538 coefsNV1 = vld1q_s32(coefsN1);
539 sP -= 8;
540 sN += 8;
541 coefsP += 4;
542 coefsN += 4;
543 coefsP1 += 4;
544 coefsN1 += 4;
545
546 sampleP.val[0] = vrev64_s16(sampleP.val[0]);
547 sampleP.val[1] = vrev64_s16(sampleP.val[1]);
548
549 // interpolate (step1)
550 coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
551 coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
552 samplePExt.val[0] = vshll_n_s16(sampleP.val[0], 15);
553 samplePExt.val[1] = vshll_n_s16(sampleP.val[1], 15);
554 // interpolate (step2)
555 coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
556 coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
557 sampleNExt.val[0] = vshll_n_s16(sampleN.val[0], 15);
558 sampleNExt.val[1] = vshll_n_s16(sampleN.val[1], 15);
559 // interpolate (step3)
560 coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
561 coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
562
563 samplePExt.val[0] = vqrdmulhq_s32(samplePExt.val[0], coefsPV0);
564 samplePExt.val[1] = vqrdmulhq_s32(samplePExt.val[1], coefsPV0);
565 sampleNExt.val[0] = vqrdmulhq_s32(sampleNExt.val[0], coefsNV0);
566 sampleNExt.val[1] = vqrdmulhq_s32(sampleNExt.val[1], coefsNV0);
567 sum0 = vaddq_s32(sum0, samplePExt.val[0]);
568 sum1 = vaddq_s32(sum1, samplePExt.val[1]);
569 sum0 = vaddq_s32(sum0, sampleNExt.val[0]);
570 sum1 = vaddq_s32(sum1, sampleNExt.val[1]);
571 }
572 int32x2_t volumesV, outV;
573 volumesV = vld1_s32(mVolumeSIMD);
574 outV = vld1_s32(out);
575
576 //add all 4 partial sums
577 int32x2_t sumLow0, sumHigh0, sumLow1, sumHigh1;
578 sumLow0 = vget_low_s32(sum0);
579 sumHigh0 = vget_high_s32(sum0);
580 sumLow1 = vget_low_s32(sum1);
581 sumHigh1 = vget_high_s32(sum1);
582 sumLow0 = vpadd_s32(sumLow0, sumHigh0);
583 sumLow0 = vpadd_s32(sumLow0, sumLow0);
584 sumLow1 = vpadd_s32(sumLow1, sumHigh1);
585 sumLow1 = vpadd_s32(sumLow1, sumLow1);
586
587 sumLow0 = vtrn_s32(sumLow0, sumLow1).val[0];
588 sumLow0 = vqrdmulh_s32(sumLow0, volumesV);
589 outV = vadd_s32(outV, sumLow0);
590 vst1_s32(out, outV);
591 }
592 #endif
593 }
594
595 template<int CHANNELS>
interpolate(int32_t & l,int32_t & r,const int32_t * coefs,size_t offset,int32_t lerp,const int16_t * samples)596 void AudioResamplerSinc::interpolate(
597 int32_t& l, int32_t& r,
598 const int32_t* coefs, size_t offset,
599 int32_t lerp, const int16_t* samples)
600 {
601 int32_t c0 = coefs[0];
602 int32_t c1 = coefs[offset];
603 int32_t sinc = mulAdd(lerp, (c1-c0)<<1, c0);
604 if (CHANNELS == 2) {
605 uint32_t rl = *reinterpret_cast<const uint32_t*>(samples);
606 l = mulAddRL(1, rl, sinc, l);
607 r = mulAddRL(0, rl, sinc, r);
608 } else {
609 r = l = mulAdd(samples[0], sinc, l);
610 }
611 }
612 // ----------------------------------------------------------------------------
613 } // namespace android
614