1 /*
2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * This file includes the implementation of the internal filterbank associated functions.
13  * For function description, see vad_filterbank.h.
14  */
15 
16 #include "vad_filterbank.h"
17 
18 #include "signal_processing_library.h"
19 #include "typedefs.h"
20 #include "vad_defines.h"
21 
22 // Constant 160*log10(2) in Q9
23 static const int16_t kLogConst = 24660;
24 
25 // Coefficients used by WebRtcVad_HpOutput, Q14
26 static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 };
27 static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 };
28 
29 // Allpass filter coefficients, upper and lower, in Q15
30 // Upper: 0.64, Lower: 0.17
31 static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };
32 
33 // Adjustment for division with two in WebRtcVad_SplitFilter
34 static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };
35 
WebRtcVad_HpOutput(int16_t * in_vector,int in_vector_length,int16_t * filter_state,int16_t * out_vector)36 void WebRtcVad_HpOutput(int16_t* in_vector,
37                         int in_vector_length,
38                         int16_t* filter_state,
39                         int16_t* out_vector) {
40   int i;
41   int16_t* in_ptr = in_vector;
42   int16_t* out_ptr = out_vector;
43   int32_t tmp32 = 0;
44 
45 
46   // The sum of the absolute values of the impulse response:
47   // The zero/pole-filter has a max amplification of a single sample of: 1.4546
48   // Impulse response: 0.4047 -0.6179 -0.0266  0.1993  0.1035  -0.0194
49   // The all-zero section has a max amplification of a single sample of: 1.6189
50   // Impulse response: 0.4047 -0.8094  0.4047  0       0        0
51   // The all-pole section has a max amplification of a single sample of: 1.9931
52   // Impulse response: 1.0000  0.4734 -0.1189 -0.2187 -0.0627   0.04532
53 
54   for (i = 0; i < in_vector_length; i++) {
55     // all-zero section (filter coefficients in Q14)
56     tmp32 = (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[0], (*in_ptr));
57     tmp32 += (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[1], filter_state[0]);
58     tmp32 += (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[2],
59                                             filter_state[1]);  // Q14
60     filter_state[1] = filter_state[0];
61     filter_state[0] = *in_ptr++;
62 
63     // all-pole section
64     tmp32 -= (int32_t) WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[1],
65                                             filter_state[2]);  // Q14
66     tmp32 -= (int32_t) WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[2], filter_state[3]);
67     filter_state[3] = filter_state[2];
68     filter_state[2] = (int16_t) WEBRTC_SPL_RSHIFT_W32 (tmp32, 14);
69     *out_ptr++ = filter_state[2];
70   }
71 }
72 
WebRtcVad_Allpass(int16_t * in_vector,int16_t filter_coefficients,int vector_length,int16_t * filter_state,int16_t * out_vector)73 void WebRtcVad_Allpass(int16_t* in_vector,
74                        int16_t filter_coefficients,
75                        int vector_length,
76                        int16_t* filter_state,
77                        int16_t* out_vector) {
78   // The filter can only cause overflow (in the w16 output variable)
79   // if more than 4 consecutive input numbers are of maximum value and
80   // has the the same sign as the impulse responses first taps.
81   // First 6 taps of the impulse response: 0.6399 0.5905 -0.3779
82   // 0.2418 -0.1547 0.0990
83 
84   int i;
85   int16_t tmp16 = 0;
86   int32_t tmp32 = 0, in32 = 0;
87   int32_t state32 = WEBRTC_SPL_LSHIFT_W32((int32_t) (*filter_state), 16); // Q31
88 
89   for (i = 0; i < vector_length; i++) {
90     tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficients, (*in_vector));
91     tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
92     *out_vector++ = tmp16;
93     in32 = WEBRTC_SPL_LSHIFT_W32(((int32_t) (*in_vector)), 14);
94     state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficients, tmp16);
95     state32 = WEBRTC_SPL_LSHIFT_W32(state32, 1);
96     in_vector += 2;
97   }
98 
99   *filter_state = (int16_t) WEBRTC_SPL_RSHIFT_W32(state32, 16);
100 }
101 
WebRtcVad_SplitFilter(int16_t * in_vector,int in_vector_length,int16_t * upper_state,int16_t * lower_state,int16_t * out_vector_hp,int16_t * out_vector_lp)102 void WebRtcVad_SplitFilter(int16_t* in_vector,
103                            int in_vector_length,
104                            int16_t* upper_state,
105                            int16_t* lower_state,
106                            int16_t* out_vector_hp,
107                            int16_t* out_vector_lp) {
108   int16_t tmp_out;
109   int i;
110   int half_length = WEBRTC_SPL_RSHIFT_W16(in_vector_length, 1);
111 
112   // All-pass filtering upper branch
113   WebRtcVad_Allpass(&in_vector[0], kAllPassCoefsQ15[0], half_length,
114                     upper_state, out_vector_hp);
115 
116   // All-pass filtering lower branch
117   WebRtcVad_Allpass(&in_vector[1], kAllPassCoefsQ15[1], half_length,
118                     lower_state, out_vector_lp);
119 
120   // Make LP and HP signals
121   for (i = 0; i < half_length; i++) {
122     tmp_out = *out_vector_hp;
123     *out_vector_hp++ -= *out_vector_lp;
124     *out_vector_lp++ += tmp_out;
125   }
126 }
127 
WebRtcVad_get_features(VadInstT * inst,int16_t * in_vector,int frame_size,int16_t * out_vector)128 int16_t WebRtcVad_get_features(VadInstT* inst,
129                                int16_t* in_vector,
130                                int frame_size,
131                                int16_t* out_vector) {
132   int16_t power = 0;
133   // We expect |frame_size| to be 80, 160 or 240 samples, which corresponds to
134   // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
135   // have at most 120 samples after the first split and at most 60 samples after
136   // the second split.
137   int16_t hp_120[120], lp_120[120];
138   int16_t hp_60[60], lp_60[60];
139   // Initialize variables for the first SplitFilter().
140   int length = frame_size;
141   int frequency_band = 0;
142   int16_t* in_ptr = in_vector;
143   int16_t* hp_out_ptr = hp_120;
144   int16_t* lp_out_ptr = lp_120;
145 
146   // Split at 2000 Hz and downsample
147   WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
148                         &inst->lower_state[frequency_band], hp_out_ptr,
149                         lp_out_ptr);
150 
151   // Split at 3000 Hz and downsample
152   frequency_band = 1;
153   in_ptr = hp_120;
154   hp_out_ptr = hp_60;
155   lp_out_ptr = lp_60;
156   length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
157 
158   WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
159                         &inst->lower_state[frequency_band], hp_out_ptr,
160                         lp_out_ptr);
161 
162   // Energy in 3000 Hz - 4000 Hz
163   length = WEBRTC_SPL_RSHIFT_W16(length, 1);
164   WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[5], &power,
165                         &out_vector[5]);
166 
167   // Energy in 2000 Hz - 3000 Hz
168   WebRtcVad_LogOfEnergy(lp_60, length, kOffsetVector[4], &power,
169                         &out_vector[4]);
170 
171   // Split at 1000 Hz and downsample
172   frequency_band = 2;
173   in_ptr = lp_120;
174   hp_out_ptr = hp_60;
175   lp_out_ptr = lp_60;
176   length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
177   WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
178                         &inst->lower_state[frequency_band], hp_out_ptr,
179                         lp_out_ptr);
180 
181   // Energy in 1000 Hz - 2000 Hz
182   length = WEBRTC_SPL_RSHIFT_W16(length, 1);
183   WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[3], &power,
184                         &out_vector[3]);
185 
186   // Split at 500 Hz
187   frequency_band = 3;
188   in_ptr = lp_60;
189   hp_out_ptr = hp_120;
190   lp_out_ptr = lp_120;
191 
192   WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
193                         &inst->lower_state[frequency_band], hp_out_ptr,
194                         lp_out_ptr);
195 
196   // Energy in 500 Hz - 1000 Hz
197   length = WEBRTC_SPL_RSHIFT_W16(length, 1);
198   WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[2], &power,
199                         &out_vector[2]);
200 
201   // Split at 250 Hz
202   frequency_band = 4;
203   in_ptr = lp_120;
204   hp_out_ptr = hp_60;
205   lp_out_ptr = lp_60;
206 
207   WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
208                         &inst->lower_state[frequency_band], hp_out_ptr,
209                         lp_out_ptr);
210 
211   // Energy in 250 Hz - 500 Hz
212   length = WEBRTC_SPL_RSHIFT_W16(length, 1);
213   WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[1], &power,
214                         &out_vector[1]);
215 
216   // Remove DC and LFs
217   WebRtcVad_HpOutput(lp_60, length, inst->hp_filter_state, hp_120);
218 
219   // Power in 80 Hz - 250 Hz
220   WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[0], &power,
221                         &out_vector[0]);
222 
223   return power;
224 }
225 
WebRtcVad_LogOfEnergy(int16_t * vector,int vector_length,int16_t offset,int16_t * power,int16_t * log_energy)226 void WebRtcVad_LogOfEnergy(int16_t* vector,
227                            int vector_length,
228                            int16_t offset,
229                            int16_t* power,
230                            int16_t* log_energy) {
231   int shfts = 0, shfts2 = 0;
232   int16_t energy_s16 = 0;
233   int16_t zeros = 0, frac = 0, log2 = 0;
234   int32_t energy = WebRtcSpl_Energy(vector, vector_length, &shfts);
235 
236   if (energy > 0) {
237 
238     shfts2 = 16 - WebRtcSpl_NormW32(energy);
239     shfts += shfts2;
240     // "shfts" is the total number of right shifts that has been done to
241     // energy_s16.
242     energy_s16 = (int16_t) WEBRTC_SPL_SHIFT_W32(energy, -shfts2);
243 
244     // Find:
245     // 160*log10(energy_s16*2^shfts) = 160*log10(2)*log2(energy_s16*2^shfts) =
246     // 160*log10(2)*(log2(energy_s16) + log2(2^shfts)) =
247     // 160*log10(2)*(log2(energy_s16) + shfts)
248 
249     zeros = WebRtcSpl_NormU32(energy_s16);
250     frac = (int16_t) (((uint32_t) ((int32_t) (energy_s16) << zeros)
251         & 0x7FFFFFFF) >> 21);
252     log2 = (int16_t) (((31 - zeros) << 10) + frac);
253 
254     *log_energy = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(kLogConst, log2, 19)
255         + (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(shfts, kLogConst, 9);
256 
257     if (*log_energy < 0) {
258       *log_energy = 0;
259     }
260   } else {
261     *log_energy = 0;
262     shfts = -15;
263     energy_s16 = 0;
264   }
265 
266   *log_energy += offset;
267 
268   // Total power in frame
269   if (*power <= MIN_ENERGY) {
270     if (shfts > 0) {
271       *power += MIN_ENERGY + 1;
272     } else if (WEBRTC_SPL_SHIFT_W16(energy_s16, shfts) > MIN_ENERGY) {
273       *power += MIN_ENERGY + 1;
274     } else {
275       *power += WEBRTC_SPL_SHIFT_W16(energy_s16, shfts);
276     }
277   }
278 }
279