1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
18 #define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
19 
20 namespace android {
21 
22 /*
23  * generates a sine wave at equal steps.
24  *
25  * As most of our functions use sine or cosine at equal steps,
26  * it is very efficient to compute them that way (single multiply and subtract),
27  * rather than invoking the math library sin() or cos() each time.
28  *
29  * SineGen uses Goertzel's Algorithm (as a generator not a filter)
30  * to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep)
31  * by stepping through 0, 1, ... n.
32  *
33  * e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep)
34  *
35  * or looking at just the imaginary sine term, as the cosine follows identically:
36  *
37  * sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep)
38  *
39  * Goertzel's algorithm is more efficient than the angle addition formula,
40  * e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to
41  * 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and
42  * cosine generation due to the complex * complex multiply (full rotation).
43  *
44  * See: http://en.wikipedia.org/wiki/Goertzel_algorithm
45  *
46  */
47 
48 class SineGen {
49 public:
50     SineGen(double wstart, double wstep, bool cosine = false) {
51         if (cosine) {
52             mCurrent = cos(wstart);
53             mPrevious = cos(wstart - wstep);
54         } else {
55             mCurrent = sin(wstart);
56             mPrevious = sin(wstart - wstep);
57         }
58         mTwoCos = 2.*cos(wstep);
59     }
SineGen(double expNow,double expPrev,double twoCosStep)60     SineGen(double expNow, double expPrev, double twoCosStep) {
61         mCurrent = expNow;
62         mPrevious = expPrev;
63         mTwoCos = twoCosStep;
64     }
value()65     inline double value() const {
66         return mCurrent;
67     }
advance()68     inline void advance() {
69         double tmp = mCurrent;
70         mCurrent = mCurrent*mTwoCos - mPrevious;
71         mPrevious = tmp;
72     }
valueAdvance()73     inline double valueAdvance() {
74         double tmp = mCurrent;
75         mCurrent = mCurrent*mTwoCos - mPrevious;
76         mPrevious = tmp;
77         return tmp;
78     }
79 
80 private:
81     double mCurrent; // current value of sine/cosine
82     double mPrevious; // previous value of sine/cosine
83     double mTwoCos; // stepping factor
84 };
85 
86 /*
87  * generates a series of sine generators, phase offset by fixed steps.
88  *
89  * This is used to generate polyphase sine generators, one per polyphase
90  * in the filter code below.
91  *
92  * The SineGen returned by value() starts at innerStart = outerStart + n*outerStep;
93  * increments by innerStep.
94  *
95  */
96 
97 class SineGenGen {
98 public:
99     SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false)
mSineInnerCur(outerStart,outerStep,cosine)100             : mSineInnerCur(outerStart, outerStep, cosine),
101               mSineInnerPrev(outerStart-innerStep, outerStep, cosine)
102     {
103         mTwoCos = 2.*cos(innerStep);
104     }
value()105     inline SineGen value() {
106         return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos);
107     }
advance()108     inline void advance() {
109         mSineInnerCur.advance();
110         mSineInnerPrev.advance();
111     }
valueAdvance()112     inline SineGen valueAdvance() {
113         return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos);
114     }
115 
116 private:
117     SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep).
118     SineGen mSineInnerPrev; // generate the inner sine previous values
119                             // (behind by innerStep, stepped by outerStep).
120     double mTwoCos; // the inner stepping factor for the returned SineGen.
121 };
122 
sqr(double x)123 static inline double sqr(double x) {
124     return x * x;
125 }
126 
127 /*
128  * rounds a double to the nearest integer for FIR coefficients.
129  *
130  * One variant uses noise shaping, which must keep error history
131  * to work (the err parameter, initialized to 0).
132  * The other variant is a non-noise shaped version for
133  * S32 coefficients (noise shaping doesn't gain much).
134  *
135  * Caution: No bounds saturation is applied, but isn't needed in this case.
136  *
137  * @param x is the value to round.
138  *
139  * @param maxval is the maximum integer scale factor expressed as an int64 (for headroom).
140  * Typically this may be the maximum positive integer+1 (using the fact that double precision
141  * FIR coefficients generated here are never that close to 1.0 to pose an overflow condition).
142  *
143  * @param err is the previous error (actual - rounded) for the previous rounding op.
144  * For 16b coefficients this can improve stopband dB performance by up to 2dB.
145  *
146  * Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping
147  *
148  */
149 
toint(double x,int64_t maxval,double & err)150 static inline int64_t toint(double x, int64_t maxval, double& err) {
151     double val = x * maxval;
152     double ival = floor(val + 0.5 + err*0.2);
153     err = val - ival;
154     return static_cast<int64_t>(ival);
155 }
156 
toint(double x,int64_t maxval)157 static inline int64_t toint(double x, int64_t maxval) {
158     return static_cast<int64_t>(floor(x * maxval + 0.5));
159 }
160 
161 /*
162  * Modified Bessel function of the first kind
163  * http://en.wikipedia.org/wiki/Bessel_function
164  *
165  * The formulas are taken from Abramowitz and Stegun,
166  * _Handbook of Mathematical Functions_ (links below):
167  *
168  * http://people.math.sfu.ca/~cbm/aands/page_375.htm
169  * http://people.math.sfu.ca/~cbm/aands/page_378.htm
170  *
171  * http://dlmf.nist.gov/10.25
172  * http://dlmf.nist.gov/10.40
173  *
174  * Note we assume x is nonnegative (the function is symmetric,
175  * pass in the absolute value as needed).
176  *
177  * Constants are compile time derived with templates I0Term<> and
178  * I0ATerm<> to the precision of the compiler.  The series can be expanded
179  * to any precision needed, but currently set around 24b precision.
180  *
181  * We use a bit of template math here, constexpr would probably be
182  * more appropriate for a C++11 compiler.
183  *
184  * For the intermediate range 3.75 < x < 15, we use minimax polynomial fit.
185  *
186  */
187 
188 template <int N>
189 struct I0Term {
190     static const double value = I0Term<N-1>::value / (4. * N * N);
191 };
192 
193 template <>
194 struct I0Term<0> {
195     static const double value = 1.;
196 };
197 
198 template <int N>
199 struct I0ATerm {
200     static const double value = I0ATerm<N-1>::value * (2.*N-1.) * (2.*N-1.) / (8. * N);
201 };
202 
203 template <>
204 struct I0ATerm<0> { // 1/sqrt(2*PI);
205     static const double value = 0.398942280401432677939946059934381868475858631164934657665925;
206 };
207 
208 #if USE_HORNERS_METHOD
209 /* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
210  * using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method
211  *
212  * This has fewer multiplications than Estrin's method below, but has back to back
213  * floating point dependencies.
214  *
215  * On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled.
216  */
217 
218 inline double Poly2(double A, double B, double x) {
219     return A + x * B;
220 }
221 
222 inline double Poly4(double A, double B, double C, double D, double x) {
223     return A + x * (B + x * (C + x * (D)));
224 }
225 
226 inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
227         double x) {
228     return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G))))));
229 }
230 
231 inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
232         double H, double I, double x) {
233     return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I))))))));
234 }
235 
236 #else
237 /* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
238  * using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme
239  *
240  * This is typically faster, perhaps gains about 5-10% overall on ARM processors
241  * over Horner's method above.
242  */
243 
244 inline double Poly2(double A, double B, double x) {
245     return A + B * x;
246 }
247 
248 inline double Poly3(double A, double B, double C, double x, double x2) {
249     return Poly2(A, B, x) + C * x2;
250 }
251 
252 inline double Poly3(double A, double B, double C, double x) {
253     return Poly2(A, B, x) + C * x * x;
254 }
255 
256 inline double Poly4(double A, double B, double C, double D, double x, double x2) {
257     return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2);
258 }
259 
260 inline double Poly4(double A, double B, double C, double D, double x) {
261     return Poly4(A, B, C, D, x, x * x);
262 }
263 
264 inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
265         double x) {
266     double x2 = x * x;
267     return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2);
268 }
269 
270 inline double Poly8(double A, double B, double C, double D, double E, double F, double G,
271         double H, double x, double x2, double x4) {
272     return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4;
273 }
274 
275 inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
276         double H, double I, double x) {
277     double x2 = x * x;
278 #if 1
279     // It does not seem faster to explicitly decompose Poly8 into Poly4, but
280     // could depend on compiler floating point scheduling.
281     double x4 = x2 * x2;
282     return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4);
283 #else
284     double val = Poly4(A, B, C, D, x, x2);
285     double x4 = x2 * x2;
286     return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4);
287 #endif
288 }
289 #endif
290 
291 static inline double I0(double x) {
292     if (x < 3.75) {
293         x *= x;
294         return Poly7(I0Term<0>::value, I0Term<1>::value,
295                 I0Term<2>::value, I0Term<3>::value,
296                 I0Term<4>::value, I0Term<5>::value,
297                 I0Term<6>::value, x); // e < 1.6e-7
298     }
299     if (1) {
300         /*
301          * Series expansion coefs are easy to calculate, but are expanded around 0,
302          * so error is unequal over the interval 0 < x < 3.75, the error being
303          * significantly better near 0.
304          *
305          * A better solution is to use precise minimax polynomial fits.
306          *
307          * We use a slightly more complicated solution for 3.75 < x < 15, based on
308          * the tables in Blair and Edwards, "Stable Rational Minimax Approximations
309          * to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory,
310          * AECL-4928.
311          *
312          * http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf
313          *
314          * See Table 11 for 0 < x < 15; e < 10^(-7.13).
315          *
316          * Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b).
317          *
318          * This speeds up overall computation by about 40% over using the else clause below,
319          * which requires sqrt and exp.
320          *
321          */
322 
323         x *= x;
324         double num = Poly9(-0.13544938430e9, -0.33153754512e8,
325                 -0.19406631946e7, -0.48058318783e5,
326                 -0.63269783360e3, -0.49520779070e1,
327                 -0.24970910370e-1, -0.74741159550e-4,
328                 -0.18257612460e-6, x);
329         double y = x - 225.; // reflection around 15 (squared)
330         double den = Poly4(-0.34598737196e8, 0.23852643181e6,
331                 -0.70699387620e3, 0.10000000000e1, y);
332         return num / den;
333 
334 #if IO_EXTENDED_BETA
335         /* Table 42 for x > 15; e < 10^(-8.11).
336          * This is used for Beta>15, but is disabled here as
337          * we never use Beta that high.
338          *
339          * NOTE: This should be enabled only for x > 15.
340          */
341 
342         double y = 1./x;
343         double z = y - (1./15);
344         double num = Poly2(0.415079861746e1, -0.5149092496e1, z);
345         double den = Poly3(0.103150763823e2, -0.14181687413e2,
346                 0.1000000000e1, z);
347         return exp(x) * sqrt(y) * num / den;
348 #endif
349     } else {
350         /*
351          * NOT USED, but reference for large Beta.
352          *
353          * Abramowitz and Stegun asymptotic formula.
354          * works for x > 3.75.
355          */
356         double y = 1./x;
357         return exp(x) * sqrt(y) *
358                 // note: reciprocal squareroot may be easier!
359                 // http://en.wikipedia.org/wiki/Fast_inverse_square_root
360                 Poly9(I0ATerm<0>::value, I0ATerm<1>::value,
361                         I0ATerm<2>::value, I0ATerm<3>::value,
362                         I0ATerm<4>::value, I0ATerm<5>::value,
363                         I0ATerm<6>::value, I0ATerm<7>::value,
364                         I0ATerm<8>::value, y); // (... e) < 1.9e-7
365     }
366 }
367 
368 /* A speed optimized version of the Modified Bessel I0() which incorporates
369  * the sqrt and numerator multiply and denominator divide into the computation.
370  * This speeds up filter computation by about 10-15%.
371  */
372 static inline double I0SqrRat(double x2, double num, double den) {
373     if (x2 < (3.75 * 3.75)) {
374         return Poly7(I0Term<0>::value, I0Term<1>::value,
375                 I0Term<2>::value, I0Term<3>::value,
376                 I0Term<4>::value, I0Term<5>::value,
377                 I0Term<6>::value, x2) * num / den; // e < 1.6e-7
378     }
379     num *= Poly9(-0.13544938430e9, -0.33153754512e8,
380             -0.19406631946e7, -0.48058318783e5,
381             -0.63269783360e3, -0.49520779070e1,
382             -0.24970910370e-1, -0.74741159550e-4,
383             -0.18257612460e-6, x2); // e < 10^(-7.13).
384     double y = x2 - 225.; // reflection around 15 (squared)
385     den *= Poly4(-0.34598737196e8, 0.23852643181e6,
386             -0.70699387620e3, 0.10000000000e1, y);
387     return num / den;
388 }
389 
390 /*
391  * calculates the transition bandwidth for a Kaiser filter
392  *
393  * Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
394  * Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
395  *
396  * @param halfNumCoef is half the number of coefficients per filter phase.
397  *
398  * @param stopBandAtten is the stop band attenuation desired.
399  *
400  * @return the transition bandwidth in normalized frequency (0 <= f <= 0.5)
401  */
402 static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) {
403     return (stopBandAtten - 7.95)/((2.*14.36)*halfNumCoef);
404 }
405 
406 /*
407  * calculates the fir transfer response of the overall polyphase filter at w.
408  *
409  * Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the
410  * fact that h[n] is symmetric (cosines only, no complex arithmetic).
411  *
412  * We use Goertzel's algorithm to accelerate the computation to essentially
413  * a single multiply and 2 adds per filter coefficient h[].
414  *
415  * Be careful be careful to consider that h[n] is the overall polyphase filter,
416  * with L phases, so rescaling H(w)/L is probably what you expect for "unity gain",
417  * as you only use one of the polyphases at a time.
418  */
419 template <typename T>
420 static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) {
421     double accum = static_cast<double>(coef[0])*0.5;  // "center coefficient" from first bank
422     coef += halfNumCoef;    // skip first filterbank (picked up by the last filterbank).
423 #if SLOW_FIRTRANSFER
424     /* Original code for reference.  This is equivalent to the code below, but slower. */
425     for (int i=1 ; i<=L ; ++i) {
426         for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
427             accum += cos(ix*w)*static_cast<double>(*coef++);
428         }
429     }
430 #else
431     /*
432      * Our overall filter is stored striped by polyphases, not a contiguous h[n].
433      * We could fetch coefficients in a non-contiguous fashion
434      * but that will not scale to vector processing.
435      *
436      * We apply Goertzel's algorithm directly to each polyphase filter bank instead of
437      * using cosine generation/multiplication, thereby saving one multiply per inner loop.
438      *
439      * See: http://en.wikipedia.org/wiki/Goertzel_algorithm
440      * Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720.
441      *
442      * We use the basic recursion to incorporate the cosine steps into real sequence x[n]:
443      * s[n] = x[n] + (2cosw)*s[n-1] + s[n-2]
444      *
445      * y[n] = s[n] - e^(iw)s[n-1]
446      *      = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k))
447      *      = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk)
448      *
449      * The summation contains the frequency steps we want multiplied by the source
450      * (similar to a DTFT).
451      *
452      * Using symmetry, and just the real part (be careful, this must happen
453      * after any internal complex multiplications), the polyphase filterbank
454      * transfer function is:
455      *
456      * Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0)
457      *                = Re{ e^(iwn + iw_0) y[n]}
458      *                = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1]
459      *
460      * using the fact that s[n] of real x[n] is real.
461      *
462      */
463     double dcos = 2. * cos(L*w);
464     int start = ((halfNumCoef)*L + 1);
465     SineGen cc((start - L) * w, w, true); // cosine
466     SineGen cp(start * w, w, true); // cosine
467     for (int i=1 ; i<=L ; ++i) {
468         double sc = 0;
469         double sp = 0;
470         for (int j=0 ; j<halfNumCoef ; ++j) {
471             double tmp = sc;
472             sc  = static_cast<double>(*coef++) + dcos*sc - sp;
473             sp = tmp;
474         }
475         // If we are awfully clever, we can apply Goertzel's algorithm
476         // again on the sc and sp sequences returned here.
477         accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp;
478     }
479 #endif
480     return accum*2.;
481 }
482 
483 /*
484  * evaluates the minimum and maximum |H(f)| bound in a band region.
485  *
486  * This is usually done with equally spaced increments in the target band in question.
487  * The passband is often very small, and sampled that way. The stopband is often much
488  * larger.
489  *
490  * We use the fact that the overall polyphase filter has an additional bank at the end
491  * for interpolation; hence it is overspecified for the H(f) computation.  Thus the
492  * first polyphase is never actually checked, excepting its first term.
493  *
494  * In this code we use the firTransfer() evaluator above, which uses Goertzel's
495  * algorithm to calculate the transfer function at each point.
496  *
497  * TODO: An alternative with equal spacing is the FFT/DFT.  An alternative with unequal
498  * spacing is a chirp transform.
499  *
500  * @param coef is the designed polyphase filter banks
501  *
502  * @param L is the number of phases (for interpolation)
503  *
504  * @param halfNumCoef should be half the number of coefficients for a single
505  * polyphase.
506  *
507  * @param fstart is the normalized frequency start.
508  *
509  * @param fend is the normalized frequency end.
510  *
511  * @param steps is the number of steps to take (sampling) between frequency start and end
512  *
513  * @param firMin returns the minimum transfer |H(f)| found
514  *
515  * @param firMax returns the maximum transfer |H(f)| found
516  *
517  * 0 <= f <= 0.5.
518  * This is used to test passband and stopband performance.
519  */
520 template <typename T>
521 static void testFir(const T* coef, int L, int halfNumCoef,
522         double fstart, double fend, int steps, double &firMin, double &firMax) {
523     double wstart = fstart*(2.*M_PI);
524     double wend = fend*(2.*M_PI);
525     double wstep = (wend - wstart)/steps;
526     double fmax, fmin;
527     double trf = firTransfer(coef, L, halfNumCoef, wstart);
528     if (trf<0) {
529         trf = -trf;
530     }
531     fmin = fmax = trf;
532     wstart += wstep;
533     for (int i=1; i<steps; ++i) {
534         trf = firTransfer(coef, L, halfNumCoef, wstart);
535         if (trf<0) {
536             trf = -trf;
537         }
538         if (trf>fmax) {
539             fmax = trf;
540         }
541         else if (trf<fmin) {
542             fmin = trf;
543         }
544         wstart += wstep;
545     }
546     // renormalize - this is only needed for integer filter types
547     double norm = 1./((1ULL<<(sizeof(T)*8-1))*L);
548 
549     firMin = fmin * norm;
550     firMax = fmax * norm;
551 }
552 
553 /*
554  * evaluates the |H(f)| lowpass band characteristics.
555  *
556  * This function tests the lowpass characteristics for the overall polyphase filter,
557  * and is used to verify the design.  For this case, fp should be set to the
558  * passband normalized frequency from 0 to 0.5 for the overall filter (thus it
559  * is the designed polyphase bank value / L).  Likewise for fs.
560  *
561  * @param coef is the designed polyphase filter banks
562  *
563  * @param L is the number of phases (for interpolation)
564  *
565  * @param halfNumCoef should be half the number of coefficients for a single
566  * polyphase.
567  *
568  * @param fp is the passband normalized frequency, 0 < fp < fs < 0.5.
569  *
570  * @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5.
571  *
572  * @param passSteps is the number of passband sampling steps.
573  *
574  * @param stopSteps is the number of stopband sampling steps.
575  *
576  * @param passMin is the minimum value in the passband
577  *
578  * @param passMax is the maximum value in the passband (useful for scaling).  This should
579  * be less than 1., to avoid sine wave test overflow.
580  *
581  * @param passRipple is the passband ripple.  Typically this should be less than 0.1 for
582  * an audio filter.  Generally speaker/headphone device characteristics will dominate
583  * the passband term.
584  *
585  * @param stopMax is the maximum value in the stopband.
586  *
587  * @param stopRipple is the stopband ripple, also known as stopband attenuation.
588  * Typically this should be greater than ~80dB for low quality, and greater than
589  * ~100dB for full 16b quality, otherwise aliasing may become noticeable.
590  *
591  */
592 template <typename T>
593 static void testFir(const T* coef, int L, int halfNumCoef,
594         double fp, double fs, int passSteps, int stopSteps,
595         double &passMin, double &passMax, double &passRipple,
596         double &stopMax, double &stopRipple) {
597     double fmin, fmax;
598     testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax);
599     double d1 = (fmax - fmin)/2.;
600     passMin = fmin;
601     passMax = fmax;
602     passRipple = -20.*log10(1. - d1); // passband ripple
603     testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax);
604     // fmin is really not important for the stopband.
605     stopMax = fmax;
606     stopRipple = -20.*log10(fmax); // stopband ripple/attenuation
607 }
608 
609 /*
610  * Calculates the overall polyphase filter based on a windowed sinc function.
611  *
612  * The windowed sinc is an odd length symmetric filter of exactly L*halfNumCoef*2+1
613  * taps for the entire kernel.  This is then decomposed into L+1 polyphase filterbanks.
614  * The last filterbank is used for interpolation purposes (and is mostly composed
615  * of the first bank shifted by one sample), and is unnecessary if one does
616  * not do interpolation.
617  *
618  * We use the last filterbank for some transfer function calculation purposes,
619  * so it needs to be generated anyways.
620  *
621  * @param coef is the caller allocated space for coefficients.  This should be
622  * exactly (L+1)*halfNumCoef in size.
623  *
624  * @param L is the number of phases (for interpolation)
625  *
626  * @param halfNumCoef should be half the number of coefficients for a single
627  * polyphase.
628  *
629  * @param stopBandAtten is the stopband value, should be >50dB.
630  *
631  * @param fcr is cutoff frequency/sampling rate (<0.5).  At this point, the energy
632  * should be 6dB less. (fcr is where the amplitude drops by half).  Use the
633  * firKaiserTbw() to calculate the transition bandwidth.  fcr is the midpoint
634  * between the stop band and the pass band (fstop+fpass)/2.
635  *
636  * @param atten is the attenuation (generally slightly less than 1).
637  */
638 
639 template <typename T>
640 static inline void firKaiserGen(T* coef, int L, int halfNumCoef,
641         double stopBandAtten, double fcr, double atten) {
642     //
643     // Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
644     // Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
645     //
646     // See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf
647     //
648     // Kaiser window and beta parameter
649     //
650     //         | 0.1102*(A - 8.7)                         A > 50
651     //  beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21)   21 <= A <= 50
652     //         | 0.                                       A < 21
653     //
654     // with A is the desired stop-band attenuation in dBFS
655     //
656     //    30 dB    2.210
657     //    40 dB    3.384
658     //    50 dB    4.538
659     //    60 dB    5.658
660     //    70 dB    6.764
661     //    80 dB    7.865
662     //    90 dB    8.960
663     //   100 dB   10.056
664 
665     const int N = L * halfNumCoef; // non-negative half
666     const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always
667     const double xstep = (2. * M_PI) * fcr / L;
668     const double xfrac = 1. / N;
669     const double yscale = atten * L / (I0(beta) * M_PI);
670     const double sqrbeta = sqr(beta);
671 
672     // We use sine generators, which computes sines on regular step intervals.
673     // This speeds up overall computation about 40% from computing the sine directly.
674 
675     SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase)
676 
677     for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation
678 
679         // computation for a single polyphase of the overall filter.
680         SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop.
681         double err = 0; // for noise shaping on int16_t coefficients (over each polyphase)
682 
683         for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
684             double y;
685             if (CC_LIKELY(ix)) {
686                 double x = static_cast<double>(ix);
687 
688                 // sine generator: sg.valueAdvance() returns sin(ix*xstep);
689                 // y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x;
690                 y = I0SqrRat(sqrbeta * (1.0 - sqr(x * xfrac)), yscale * sg.valueAdvance(), x);
691             } else {
692                 y = 2. * atten * fcr; // center of filter, sinc(0) = 1.
693                 sg.advance();
694             }
695 
696             if (is_same<T, int16_t>::value) { // int16_t needs noise shaping
697                 *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1), err));
698             } else if (is_same<T, int32_t>::value) {
699                 *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1)));
700             } else { // assumed float or double
701                 *coef++ = static_cast<T>(y);
702             }
703         }
704     }
705 }
706 
707 }; // namespace android
708 
709 #endif /*ANDROID_AUDIO_RESAMPLER_FIR_GEN_H*/
710