1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "AudioResamplerSinc"
18 //#define LOG_NDEBUG 0
19 
20 #define __STDC_CONSTANT_MACROS
21 #include <malloc.h>
22 #include <string.h>
23 #include <stdlib.h>
24 #include <dlfcn.h>
25 
26 #include <cutils/compiler.h>
27 #include <cutils/properties.h>
28 
29 #include <utils/Log.h>
30 #include <audio_utils/primitives.h>
31 
32 #include "AudioResamplerSinc.h"
33 
34 #if defined(__clang__) && !__has_builtin(__builtin_assume_aligned)
35 #define __builtin_assume_aligned(p, a) \
36 	(((uintptr_t(p) % (a)) == 0) ? (p) : (__builtin_unreachable(), (p)))
37 #endif
38 
39 #if defined(__arm__) && !defined(__thumb__)
40 #define USE_INLINE_ASSEMBLY (true)
41 #else
42 #define USE_INLINE_ASSEMBLY (false)
43 #endif
44 
45 #if defined(__aarch64__) || defined(__ARM_NEON__)
46 #ifndef USE_NEON
47 #define USE_NEON (true)
48 #endif
49 #else
50 #define USE_NEON (false)
51 #endif
52 #if USE_NEON
53 #include <arm_neon.h>
54 #endif
55 
56 #define UNUSED(x) ((void)(x))
57 
58 namespace android {
59 // ----------------------------------------------------------------------------
60 
61 
62 /*
63  * These coeficients are computed with the "fir" utility found in
64  * tools/resampler_tools
65  * cmd-line: fir -l 7 -s 48000 -c 20478
66  */
67 const uint32_t AudioResamplerSinc::mFirCoefsUp[] __attribute__ ((aligned (32))) = {
68 #include "AudioResamplerSincUp.h"
69 };
70 
71 /*
72  * These coefficients are optimized for 48KHz -> 44.1KHz
73  * cmd-line: fir -l 7 -s 48000 -c 17189
74  */
75 const uint32_t AudioResamplerSinc::mFirCoefsDown[] __attribute__ ((aligned (32))) = {
76 #include "AudioResamplerSincDown.h"
77 };
78 
79 // we use 15 bits to interpolate between these samples
80 // this cannot change because the mul below rely on it.
81 static const int pLerpBits = 15;
82 
83 static pthread_once_t once_control = PTHREAD_ONCE_INIT;
84 static readCoefficientsFn readResampleCoefficients = NULL;
85 
86 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::highQualityConstants;
87 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::veryHighQualityConstants;
88 
init_routine()89 void AudioResamplerSinc::init_routine()
90 {
91     // for high quality resampler, the parameters for coefficients are compile-time constants
92     Constants *c = &highQualityConstants;
93     c->coefsBits = RESAMPLE_FIR_LERP_INT_BITS;
94     c->cShift = kNumPhaseBits - c->coefsBits;
95     c->cMask = ((1<< c->coefsBits)-1) << c->cShift;
96     c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
97     c->pMask = ((1<< pLerpBits)-1) << c->pShift;
98     c->halfNumCoefs = RESAMPLE_FIR_NUM_COEF;
99 
100     // for very high quality resampler, the parameters are load-time constants
101     veryHighQualityConstants = highQualityConstants;
102 
103     // Open the dll to get the coefficients for VERY_HIGH_QUALITY
104     void *resampleCoeffLib = dlopen("libaudio-resampler.so", RTLD_NOW);
105     ALOGV("Open libaudio-resampler library = %p", resampleCoeffLib);
106     if (resampleCoeffLib == NULL) {
107         ALOGE("Could not open audio-resampler library: %s", dlerror());
108         return;
109     }
110 
111     readResampleFirNumCoeffFn readResampleFirNumCoeff;
112     readResampleFirLerpIntBitsFn readResampleFirLerpIntBits;
113 
114     readResampleCoefficients = (readCoefficientsFn)
115             dlsym(resampleCoeffLib, "readResamplerCoefficients");
116     readResampleFirNumCoeff = (readResampleFirNumCoeffFn)
117             dlsym(resampleCoeffLib, "readResampleFirNumCoeff");
118     readResampleFirLerpIntBits = (readResampleFirLerpIntBitsFn)
119             dlsym(resampleCoeffLib, "readResampleFirLerpIntBits");
120 
121     if (!readResampleCoefficients || !readResampleFirNumCoeff || !readResampleFirLerpIntBits) {
122         readResampleCoefficients = NULL;
123         dlclose(resampleCoeffLib);
124         resampleCoeffLib = NULL;
125         ALOGE("Could not find symbol: %s", dlerror());
126         return;
127     }
128 
129     c = &veryHighQualityConstants;
130     c->coefsBits = readResampleFirLerpIntBits();
131     c->cShift = kNumPhaseBits - c->coefsBits;
132     c->cMask = ((1<<c->coefsBits)-1) << c->cShift;
133     c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
134     c->pMask = ((1<<pLerpBits)-1) << c->pShift;
135     // number of zero-crossing on each side
136     c->halfNumCoefs = readResampleFirNumCoeff();
137     ALOGV("coefsBits = %d", c->coefsBits);
138     ALOGV("halfNumCoefs = %d", c->halfNumCoefs);
139     // note that we "leak" resampleCoeffLib until the process exits
140 }
141 
142 // ----------------------------------------------------------------------------
143 
144 #if !USE_NEON
145 
146 static inline
mulRL(int left,int32_t in,uint32_t vRL)147 int32_t mulRL(int left, int32_t in, uint32_t vRL)
148 {
149 #if USE_INLINE_ASSEMBLY
150     int32_t out;
151     if (left) {
152         asm( "smultb %[out], %[in], %[vRL] \n"
153              : [out]"=r"(out)
154              : [in]"%r"(in), [vRL]"r"(vRL)
155              : );
156     } else {
157         asm( "smultt %[out], %[in], %[vRL] \n"
158              : [out]"=r"(out)
159              : [in]"%r"(in), [vRL]"r"(vRL)
160              : );
161     }
162     return out;
163 #else
164     int16_t v = left ? int16_t(vRL) : int16_t(vRL>>16);
165     return int32_t((int64_t(in) * v) >> 16);
166 #endif
167 }
168 
169 static inline
mulAdd(int16_t in,int32_t v,int32_t a)170 int32_t mulAdd(int16_t in, int32_t v, int32_t a)
171 {
172 #if USE_INLINE_ASSEMBLY
173     int32_t out;
174     asm( "smlawb %[out], %[v], %[in], %[a] \n"
175          : [out]"=r"(out)
176          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
177          : );
178     return out;
179 #else
180     return a + int32_t((int64_t(v) * in) >> 16);
181 #endif
182 }
183 
184 static inline
mulAddRL(int left,uint32_t inRL,int32_t v,int32_t a)185 int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a)
186 {
187 #if USE_INLINE_ASSEMBLY
188     int32_t out;
189     if (left) {
190         asm( "smlawb %[out], %[v], %[inRL], %[a] \n"
191              : [out]"=r"(out)
192              : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
193              : );
194     } else {
195         asm( "smlawt %[out], %[v], %[inRL], %[a] \n"
196              : [out]"=r"(out)
197              : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
198              : );
199     }
200     return out;
201 #else
202     int16_t s = left ? int16_t(inRL) : int16_t(inRL>>16);
203     return a + int32_t((int64_t(v) * s) >> 16);
204 #endif
205 }
206 
207 #endif // !USE_NEON
208 
209 // ----------------------------------------------------------------------------
210 
AudioResamplerSinc(int inChannelCount,int32_t sampleRate,src_quality quality)211 AudioResamplerSinc::AudioResamplerSinc(
212         int inChannelCount, int32_t sampleRate, src_quality quality)
213     : AudioResampler(inChannelCount, sampleRate, quality),
214     mState(0), mImpulse(0), mRingFull(0), mFirCoefs(0)
215 {
216     /*
217      * Layout of the state buffer for 32 tap:
218      *
219      * "present" sample            beginning of 2nd buffer
220      *                 v                v
221      *  0              01               2              23              3
222      *  0              F0               0              F0              F
223      * [pppppppppppppppInnnnnnnnnnnnnnnnpppppppppppppppInnnnnnnnnnnnnnnn]
224      *                 ^               ^ head
225      *
226      * p = past samples, convoluted with the (p)ositive side of sinc()
227      * n = future samples, convoluted with the (n)egative side of sinc()
228      * r = extra space for implementing the ring buffer
229      *
230      */
231 
232     mVolumeSIMD[0] = 0;
233     mVolumeSIMD[1] = 0;
234 
235     // Load the constants for coefficients
236     int ok = pthread_once(&once_control, init_routine);
237     if (ok != 0) {
238         ALOGE("%s pthread_once failed: %d", __func__, ok);
239     }
240     mConstants = (quality == VERY_HIGH_QUALITY) ?
241             &veryHighQualityConstants : &highQualityConstants;
242 }
243 
244 
~AudioResamplerSinc()245 AudioResamplerSinc::~AudioResamplerSinc() {
246     free(mState);
247 }
248 
init()249 void AudioResamplerSinc::init() {
250     const Constants& c(*mConstants);
251     const size_t numCoefs = 2 * c.halfNumCoefs;
252     const size_t stateSize = numCoefs * mChannelCount * 2;
253     mState = (int16_t*)memalign(32, stateSize*sizeof(int16_t));
254     memset(mState, 0, sizeof(int16_t)*stateSize);
255     mImpulse  = mState   + (c.halfNumCoefs-1)*mChannelCount;
256     mRingFull = mImpulse + (numCoefs+1)*mChannelCount;
257 }
258 
setVolume(float left,float right)259 void AudioResamplerSinc::setVolume(float left, float right) {
260     AudioResampler::setVolume(left, right);
261     // convert to U4_28 (rounding down).
262     // integer volume values are clamped to 0 to UNITY_GAIN.
263     mVolumeSIMD[0] = u4_28_from_float(clampFloatVol(left));
264     mVolumeSIMD[1] = u4_28_from_float(clampFloatVol(right));
265 }
266 
resample(int32_t * out,size_t outFrameCount,AudioBufferProvider * provider)267 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
268             AudioBufferProvider* provider)
269 {
270     // FIXME store current state (up or down sample) and only load the coefs when the state
271     // changes. Or load two pointers one for up and one for down in the init function.
272     // Not critical now since the read functions are fast, but would be important if read was slow.
273     if (mConstants == &veryHighQualityConstants && readResampleCoefficients) {
274         mFirCoefs = readResampleCoefficients( mInSampleRate <= mSampleRate );
275     } else {
276         mFirCoefs = (const int32_t *)
277                 ((mInSampleRate <= mSampleRate) ? mFirCoefsUp : mFirCoefsDown);
278     }
279 
280     // select the appropriate resampler
281     switch (mChannelCount) {
282     case 1:
283         return resample<1>(out, outFrameCount, provider);
284     case 2:
285         return resample<2>(out, outFrameCount, provider);
286     default:
287         LOG_ALWAYS_FATAL("invalid channel count: %d", mChannelCount);
288         return 0;
289     }
290 }
291 
292 
293 template<int CHANNELS>
resample(int32_t * out,size_t outFrameCount,AudioBufferProvider * provider)294 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
295         AudioBufferProvider* provider)
296 {
297     const Constants& c(*mConstants);
298     const size_t headOffset = c.halfNumCoefs*CHANNELS;
299     int16_t* impulse = mImpulse;
300     uint32_t vRL = mVolumeRL;
301     size_t inputIndex = mInputIndex;
302     uint32_t phaseFraction = mPhaseFraction;
303     uint32_t phaseIncrement = mPhaseIncrement;
304     size_t outputIndex = 0;
305     size_t outputSampleCount = outFrameCount * 2;
306     size_t inFrameCount = getInFrameCountRequired(outFrameCount);
307 
308     while (outputIndex < outputSampleCount) {
309         // buffer is empty, fetch a new one
310         while (mBuffer.frameCount == 0) {
311             mBuffer.frameCount = inFrameCount;
312             provider->getNextBuffer(&mBuffer);
313             if (mBuffer.raw == NULL) {
314                 goto resample_exit;
315             }
316             const uint32_t phaseIndex = phaseFraction >> kNumPhaseBits;
317             if (phaseIndex == 1) {
318                 // read one frame
319                 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
320             } else if (phaseIndex == 2) {
321                 // read 2 frames
322                 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
323                 inputIndex++;
324                 if (inputIndex >= mBuffer.frameCount) {
325                     inputIndex -= mBuffer.frameCount;
326                     provider->releaseBuffer(&mBuffer);
327                 } else {
328                     read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
329                 }
330             }
331         }
332         int16_t const * const in = mBuffer.i16;
333         const size_t frameCount = mBuffer.frameCount;
334 
335         // Always read-in the first samples from the input buffer
336         int16_t* head = impulse + headOffset;
337         for (size_t i=0 ; i<CHANNELS ; i++) {
338             head[i] = in[inputIndex*CHANNELS + i];
339         }
340 
341         // handle boundary case
342         while (CC_LIKELY(outputIndex < outputSampleCount)) {
343             filterCoefficient<CHANNELS>(&out[outputIndex], phaseFraction, impulse, vRL);
344             outputIndex += 2;
345 
346             phaseFraction += phaseIncrement;
347             const size_t phaseIndex = phaseFraction >> kNumPhaseBits;
348             for (size_t i=0 ; i<phaseIndex ; i++) {
349                 inputIndex++;
350                 if (inputIndex >= frameCount) {
351                     goto done;  // need a new buffer
352                 }
353                 read<CHANNELS>(impulse, phaseFraction, in, inputIndex);
354             }
355         }
356 done:
357         // if done with buffer, save samples
358         if (inputIndex >= frameCount) {
359             inputIndex -= frameCount;
360             provider->releaseBuffer(&mBuffer);
361         }
362     }
363 
364 resample_exit:
365     mImpulse = impulse;
366     mInputIndex = inputIndex;
367     mPhaseFraction = phaseFraction;
368     return outputIndex / CHANNELS;
369 }
370 
371 template<int CHANNELS>
372 /***
373 * read()
374 *
375 * This function reads only one frame from input buffer and writes it in
376 * state buffer
377 *
378 **/
read(int16_t * & impulse,uint32_t & phaseFraction,const int16_t * in,size_t inputIndex)379 void AudioResamplerSinc::read(
380         int16_t*& impulse, uint32_t& phaseFraction,
381         const int16_t* in, size_t inputIndex)
382 {
383     impulse += CHANNELS;
384     phaseFraction -= 1LU<<kNumPhaseBits;
385 
386     const Constants& c(*mConstants);
387     if (CC_UNLIKELY(impulse >= mRingFull)) {
388         const size_t stateSize = (c.halfNumCoefs*2)*CHANNELS;
389         memcpy(mState, mState+stateSize, sizeof(int16_t)*stateSize);
390         impulse -= stateSize;
391     }
392 
393     int16_t* head = impulse + c.halfNumCoefs*CHANNELS;
394     for (size_t i=0 ; i<CHANNELS ; i++) {
395         head[i] = in[inputIndex*CHANNELS + i];
396     }
397 }
398 
399 template<int CHANNELS>
filterCoefficient(int32_t * out,uint32_t phase,const int16_t * samples,uint32_t vRL)400 void AudioResamplerSinc::filterCoefficient(int32_t* out, uint32_t phase,
401          const int16_t *samples, uint32_t vRL)
402 {
403     // NOTE: be very careful when modifying the code here. register
404     // pressure is very high and a small change might cause the compiler
405     // to generate far less efficient code.
406     // Always sanity check the result with objdump or test-resample.
407 
408     // compute the index of the coefficient on the positive side and
409     // negative side
410     const Constants& c(*mConstants);
411     const int32_t ONE = c.cMask | c.pMask;
412     uint32_t indexP = ( phase & c.cMask) >> c.cShift;
413     uint32_t lerpP  = ( phase & c.pMask) >> c.pShift;
414     uint32_t indexN = ((ONE-phase) & c.cMask) >> c.cShift;
415     uint32_t lerpN  = ((ONE-phase) & c.pMask) >> c.pShift;
416 
417     const size_t offset = c.halfNumCoefs;
418     indexP *= offset;
419     indexN *= offset;
420 
421     int32_t const* coefsP = mFirCoefs + indexP;
422     int32_t const* coefsN = mFirCoefs + indexN;
423     int16_t const* sP = samples;
424     int16_t const* sN = samples + CHANNELS;
425 
426     size_t count = offset;
427 
428 #if !USE_NEON
429     int32_t l = 0;
430     int32_t r = 0;
431     for (size_t i=0 ; i<count ; i++) {
432         interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP);
433         sP -= CHANNELS;
434         interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN);
435         sN += CHANNELS;
436     }
437     out[0] += 2 * mulRL(1, l, vRL);
438     out[1] += 2 * mulRL(0, r, vRL);
439 #else
440     UNUSED(vRL);
441     if (CHANNELS == 1) {
442         int32_t const* coefsP1 = coefsP + offset;
443         int32_t const* coefsN1 = coefsN + offset;
444         sP -= CHANNELS*3;
445 
446         int32x4_t sum;
447         int32x2_t lerpPN;
448         lerpPN = vdup_n_s32(0);
449         lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
450         lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
451         lerpPN = vshl_n_s32(lerpPN, 16);
452         sum = vdupq_n_s32(0);
453 
454         int16x4_t sampleP, sampleN;
455         int32x4_t samplePExt, sampleNExt;
456         int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
457 
458         coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
459         coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
460         coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
461         coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
462         for (; count > 0; count -= 4) {
463             sampleP = vld1_s16(sP);
464             sampleN = vld1_s16(sN);
465             coefsPV0 = vld1q_s32(coefsP);
466             coefsNV0 = vld1q_s32(coefsN);
467             coefsPV1 = vld1q_s32(coefsP1);
468             coefsNV1 = vld1q_s32(coefsN1);
469             sP -= 4;
470             sN += 4;
471             coefsP += 4;
472             coefsN += 4;
473             coefsP1 += 4;
474             coefsN1 += 4;
475 
476             sampleP = vrev64_s16(sampleP);
477 
478             // interpolate (step1)
479             coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
480             coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
481             samplePExt = vshll_n_s16(sampleP, 15);
482             // interpolate (step2)
483             coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
484             coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
485             sampleNExt = vshll_n_s16(sampleN, 15);
486             // interpolate (step3)
487             coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
488             coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
489 
490             samplePExt = vqrdmulhq_s32(samplePExt, coefsPV0);
491             sampleNExt = vqrdmulhq_s32(sampleNExt, coefsNV0);
492             sum = vaddq_s32(sum, samplePExt);
493             sum = vaddq_s32(sum, sampleNExt);
494         }
495         int32x2_t volumesV, outV;
496         volumesV = vld1_s32(mVolumeSIMD);
497         outV = vld1_s32(out);
498 
499         //add all 4 partial sums
500         int32x2_t sumLow, sumHigh;
501         sumLow = vget_low_s32(sum);
502         sumHigh = vget_high_s32(sum);
503         sumLow = vpadd_s32(sumLow, sumHigh);
504         sumLow = vpadd_s32(sumLow, sumLow);
505 
506         sumLow = vqrdmulh_s32(sumLow, volumesV);
507         outV = vadd_s32(outV, sumLow);
508         vst1_s32(out, outV);
509     } else if (CHANNELS == 2) {
510         int32_t const* coefsP1 = coefsP + offset;
511         int32_t const* coefsN1 = coefsN + offset;
512         sP -= CHANNELS*3;
513 
514         int32x4_t sum0, sum1;
515         int32x2_t lerpPN;
516 
517         lerpPN = vdup_n_s32(0);
518         lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
519         lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
520         lerpPN = vshl_n_s32(lerpPN, 16);
521         sum0 = vdupq_n_s32(0);
522         sum1 = vdupq_n_s32(0);
523 
524         int16x4x2_t sampleP, sampleN;
525         int32x4x2_t samplePExt, sampleNExt;
526         int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
527 
528         coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
529         coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
530         coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
531         coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
532         for (; count > 0; count -= 4) {
533             sampleP = vld2_s16(sP);
534             sampleN = vld2_s16(sN);
535             coefsPV0 = vld1q_s32(coefsP);
536             coefsNV0 = vld1q_s32(coefsN);
537             coefsPV1 = vld1q_s32(coefsP1);
538             coefsNV1 = vld1q_s32(coefsN1);
539             sP -= 8;
540             sN += 8;
541             coefsP += 4;
542             coefsN += 4;
543             coefsP1 += 4;
544             coefsN1 += 4;
545 
546             sampleP.val[0] = vrev64_s16(sampleP.val[0]);
547             sampleP.val[1] = vrev64_s16(sampleP.val[1]);
548 
549             // interpolate (step1)
550             coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
551             coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
552             samplePExt.val[0] = vshll_n_s16(sampleP.val[0], 15);
553             samplePExt.val[1] = vshll_n_s16(sampleP.val[1], 15);
554             // interpolate (step2)
555             coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
556             coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
557             sampleNExt.val[0] = vshll_n_s16(sampleN.val[0], 15);
558             sampleNExt.val[1] = vshll_n_s16(sampleN.val[1], 15);
559             // interpolate (step3)
560             coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
561             coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
562 
563             samplePExt.val[0] = vqrdmulhq_s32(samplePExt.val[0], coefsPV0);
564             samplePExt.val[1] = vqrdmulhq_s32(samplePExt.val[1], coefsPV0);
565             sampleNExt.val[0] = vqrdmulhq_s32(sampleNExt.val[0], coefsNV0);
566             sampleNExt.val[1] = vqrdmulhq_s32(sampleNExt.val[1], coefsNV0);
567             sum0 = vaddq_s32(sum0, samplePExt.val[0]);
568             sum1 = vaddq_s32(sum1, samplePExt.val[1]);
569             sum0 = vaddq_s32(sum0, sampleNExt.val[0]);
570             sum1 = vaddq_s32(sum1, sampleNExt.val[1]);
571         }
572         int32x2_t volumesV, outV;
573         volumesV = vld1_s32(mVolumeSIMD);
574         outV = vld1_s32(out);
575 
576         //add all 4 partial sums
577         int32x2_t sumLow0, sumHigh0, sumLow1, sumHigh1;
578         sumLow0 = vget_low_s32(sum0);
579         sumHigh0 = vget_high_s32(sum0);
580         sumLow1 = vget_low_s32(sum1);
581         sumHigh1 = vget_high_s32(sum1);
582         sumLow0 = vpadd_s32(sumLow0, sumHigh0);
583         sumLow0 = vpadd_s32(sumLow0, sumLow0);
584         sumLow1 = vpadd_s32(sumLow1, sumHigh1);
585         sumLow1 = vpadd_s32(sumLow1, sumLow1);
586 
587         sumLow0 = vtrn_s32(sumLow0, sumLow1).val[0];
588         sumLow0 = vqrdmulh_s32(sumLow0, volumesV);
589         outV = vadd_s32(outV, sumLow0);
590         vst1_s32(out, outV);
591     }
592 #endif
593 }
594 
595 template<int CHANNELS>
interpolate(int32_t & l,int32_t & r,const int32_t * coefs,size_t offset,int32_t lerp,const int16_t * samples)596 void AudioResamplerSinc::interpolate(
597         int32_t& l, int32_t& r,
598         const int32_t* coefs, size_t offset,
599         int32_t lerp, const int16_t* samples)
600 {
601     int32_t c0 = coefs[0];
602     int32_t c1 = coefs[offset];
603     int32_t sinc = mulAdd(lerp, (c1-c0)<<1, c0);
604     if (CHANNELS == 2) {
605         uint32_t rl = *reinterpret_cast<const uint32_t*>(samples);
606         l = mulAddRL(1, rl, sinc, l);
607         r = mulAddRL(0, rl, sinc, r);
608     } else {
609         r = l = mulAdd(samples[0], sinc, l);
610     }
611 }
612 // ----------------------------------------------------------------------------
613 } // namespace android
614