1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // MSVC++ requires this to be set before any other includes to get M_PI.
6 #define _USE_MATH_DEFINES
7
8 #include "media/filters/wsola_internals.h"
9
10 #include <algorithm>
11 #include <cmath>
12 #include <limits>
13
14 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h"
16 #include "media/base/audio_bus.h"
17
18 namespace media {
19
20 namespace internal {
21
InInterval(int n,Interval q)22 bool InInterval(int n, Interval q) {
23 return n >= q.first && n <= q.second;
24 }
25
MultiChannelSimilarityMeasure(const float * dot_prod_a_b,const float * energy_a,const float * energy_b,int channels)26 float MultiChannelSimilarityMeasure(const float* dot_prod_a_b,
27 const float* energy_a,
28 const float* energy_b,
29 int channels) {
30 const float kEpsilon = 1e-12f;
31 float similarity_measure = 0.0f;
32 for (int n = 0; n < channels; ++n) {
33 similarity_measure += dot_prod_a_b[n] / sqrt(energy_a[n] * energy_b[n] +
34 kEpsilon);
35 }
36 return similarity_measure;
37 }
38
MultiChannelDotProduct(const AudioBus * a,int frame_offset_a,const AudioBus * b,int frame_offset_b,int num_frames,float * dot_product)39 void MultiChannelDotProduct(const AudioBus* a,
40 int frame_offset_a,
41 const AudioBus* b,
42 int frame_offset_b,
43 int num_frames,
44 float* dot_product) {
45 DCHECK_EQ(a->channels(), b->channels());
46 DCHECK_GE(frame_offset_a, 0);
47 DCHECK_GE(frame_offset_b, 0);
48 DCHECK_LE(frame_offset_a + num_frames, a->frames());
49 DCHECK_LE(frame_offset_b + num_frames, b->frames());
50
51 memset(dot_product, 0, sizeof(*dot_product) * a->channels());
52 for (int k = 0; k < a->channels(); ++k) {
53 const float* ch_a = a->channel(k) + frame_offset_a;
54 const float* ch_b = b->channel(k) + frame_offset_b;
55 for (int n = 0; n < num_frames; ++n) {
56 dot_product[k] += *ch_a++ * *ch_b++;
57 }
58 }
59 }
60
MultiChannelMovingBlockEnergies(const AudioBus * input,int frames_per_block,float * energy)61 void MultiChannelMovingBlockEnergies(const AudioBus* input,
62 int frames_per_block,
63 float* energy) {
64 int num_blocks = input->frames() - (frames_per_block - 1);
65 int channels = input->channels();
66
67 for (int k = 0; k < input->channels(); ++k) {
68 const float* input_channel = input->channel(k);
69
70 energy[k] = 0;
71
72 // First block of channel |k|.
73 for (int m = 0; m < frames_per_block; ++m) {
74 energy[k] += input_channel[m] * input_channel[m];
75 }
76
77 const float* slide_out = input_channel;
78 const float* slide_in = input_channel + frames_per_block;
79 for (int n = 1; n < num_blocks; ++n, ++slide_in, ++slide_out) {
80 energy[k + n * channels] = energy[k + (n - 1) * channels] - *slide_out *
81 *slide_out + *slide_in * *slide_in;
82 }
83 }
84 }
85
86 // Fit the curve f(x) = a * x^2 + b * x + c such that
87 // f(-1) = y[0]
88 // f(0) = y[1]
89 // f(1) = y[2]
90 // and return the maximum, assuming that y[0] <= y[1] >= y[2].
QuadraticInterpolation(const float * y_values,float * extremum,float * extremum_value)91 void QuadraticInterpolation(const float* y_values,
92 float* extremum,
93 float* extremum_value) {
94 float a = 0.5f * (y_values[2] + y_values[0]) - y_values[1];
95 float b = 0.5f * (y_values[2] - y_values[0]);
96 float c = y_values[1];
97
98 if (a == 0.f) {
99 // The coordinates are colinear (within floating-point error).
100 *extremum = 0;
101 *extremum_value = y_values[1];
102 } else {
103 *extremum = -b / (2.f * a);
104 *extremum_value = a * (*extremum) * (*extremum) + b * (*extremum) + c;
105 }
106 }
107
DecimatedSearch(int decimation,Interval exclude_interval,const AudioBus * target_block,const AudioBus * search_segment,const float * energy_target_block,const float * energy_candidate_blocks)108 int DecimatedSearch(int decimation,
109 Interval exclude_interval,
110 const AudioBus* target_block,
111 const AudioBus* search_segment,
112 const float* energy_target_block,
113 const float* energy_candidate_blocks) {
114 int channels = search_segment->channels();
115 int block_size = target_block->frames();
116 int num_candidate_blocks = search_segment->frames() - (block_size - 1);
117 scoped_ptr<float[]> dot_prod(new float[channels]);
118 float similarity[3]; // Three elements for cubic interpolation.
119
120 int n = 0;
121 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,
122 dot_prod.get());
123 similarity[0] = MultiChannelSimilarityMeasure(
124 dot_prod.get(), energy_target_block,
125 &energy_candidate_blocks[n * channels], channels);
126
127 // Set the starting point as optimal point.
128 float best_similarity = similarity[0];
129 int optimal_index = 0;
130
131 n += decimation;
132 if (n >= num_candidate_blocks) {
133 return 0;
134 }
135
136 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,
137 dot_prod.get());
138 similarity[1] = MultiChannelSimilarityMeasure(
139 dot_prod.get(), energy_target_block,
140 &energy_candidate_blocks[n * channels], channels);
141
142 n += decimation;
143 if (n >= num_candidate_blocks) {
144 // We cannot do any more sampling. Compare these two values and return the
145 // optimal index.
146 return similarity[1] > similarity[0] ? decimation : 0;
147 }
148
149 for (; n < num_candidate_blocks; n += decimation) {
150 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,
151 dot_prod.get());
152
153 similarity[2] = MultiChannelSimilarityMeasure(
154 dot_prod.get(), energy_target_block,
155 &energy_candidate_blocks[n * channels], channels);
156
157 if ((similarity[1] > similarity[0] && similarity[1] >= similarity[2]) ||
158 (similarity[1] >= similarity[0] && similarity[1] > similarity[2])) {
159 // A local maximum is found. Do a cubic interpolation for a better
160 // estimate of candidate maximum.
161 float normalized_candidate_index;
162 float candidate_similarity;
163 QuadraticInterpolation(similarity, &normalized_candidate_index,
164 &candidate_similarity);
165
166 int candidate_index = n - decimation + static_cast<int>(
167 normalized_candidate_index * decimation + 0.5f);
168 if (candidate_similarity > best_similarity &&
169 !InInterval(candidate_index, exclude_interval)) {
170 optimal_index = candidate_index;
171 best_similarity = candidate_similarity;
172 }
173 } else if (n + decimation >= num_candidate_blocks &&
174 similarity[2] > best_similarity &&
175 !InInterval(n, exclude_interval)) {
176 // If this is the end-point and has a better similarity-measure than
177 // optimal, then we accept it as optimal point.
178 optimal_index = n;
179 best_similarity = similarity[2];
180 }
181 memmove(similarity, &similarity[1], 2 * sizeof(*similarity));
182 }
183 return optimal_index;
184 }
185
FullSearch(int low_limit,int high_limit,Interval exclude_interval,const AudioBus * target_block,const AudioBus * search_block,const float * energy_target_block,const float * energy_candidate_blocks)186 int FullSearch(int low_limit,
187 int high_limit,
188 Interval exclude_interval,
189 const AudioBus* target_block,
190 const AudioBus* search_block,
191 const float* energy_target_block,
192 const float* energy_candidate_blocks) {
193 int channels = search_block->channels();
194 int block_size = target_block->frames();
195 scoped_ptr<float[]> dot_prod(new float[channels]);
196
197 float best_similarity = std::numeric_limits<float>::min();
198 int optimal_index = 0;
199
200 for (int n = low_limit; n <= high_limit; ++n) {
201 if (InInterval(n, exclude_interval)) {
202 continue;
203 }
204 MultiChannelDotProduct(target_block, 0, search_block, n, block_size,
205 dot_prod.get());
206
207 float similarity = MultiChannelSimilarityMeasure(
208 dot_prod.get(), energy_target_block,
209 &energy_candidate_blocks[n * channels], channels);
210
211 if (similarity > best_similarity) {
212 best_similarity = similarity;
213 optimal_index = n;
214 }
215 }
216
217 return optimal_index;
218 }
219
OptimalIndex(const AudioBus * search_block,const AudioBus * target_block,Interval exclude_interval)220 int OptimalIndex(const AudioBus* search_block,
221 const AudioBus* target_block,
222 Interval exclude_interval) {
223 int channels = search_block->channels();
224 DCHECK_EQ(channels, target_block->channels());
225 int target_size = target_block->frames();
226 int num_candidate_blocks = search_block->frames() - (target_size - 1);
227
228 // This is a compromise between complexity reduction and search accuracy. I
229 // don't have a proof that down sample of order 5 is optimal. One can compute
230 // a decimation factor that minimizes complexity given the size of
231 // |search_block| and |target_block|. However, my experiments show the rate of
232 // missing the optimal index is significant. This value is chosen
233 // heuristically based on experiments.
234 const int kSearchDecimation = 5;
235
236 scoped_ptr<float[]> energy_target_block(new float[channels]);
237 scoped_ptr<float[]> energy_candidate_blocks(
238 new float[channels * num_candidate_blocks]);
239
240 // Energy of all candid frames.
241 MultiChannelMovingBlockEnergies(search_block, target_size,
242 energy_candidate_blocks.get());
243
244 // Energy of target frame.
245 MultiChannelDotProduct(target_block, 0, target_block, 0,
246 target_size, energy_target_block.get());
247
248 int optimal_index = DecimatedSearch(kSearchDecimation,
249 exclude_interval, target_block,
250 search_block, energy_target_block.get(),
251 energy_candidate_blocks.get());
252
253 int lim_low = std::max(0, optimal_index - kSearchDecimation);
254 int lim_high = std::min(num_candidate_blocks - 1,
255 optimal_index + kSearchDecimation);
256 return FullSearch(lim_low, lim_high, exclude_interval, target_block,
257 search_block, energy_target_block.get(),
258 energy_candidate_blocks.get());
259 }
260
GetSymmetricHanningWindow(int window_length,float * window)261 void GetSymmetricHanningWindow(int window_length, float* window) {
262 const float scale = 2.0f * M_PI / window_length;
263 for (int n = 0; n < window_length; ++n)
264 window[n] = 0.5f * (1.0f - cosf(n * scale));
265 }
266
267 } // namespace internal
268
269 } // namespace media
270
271