1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "sola_time_scaler.h"
18 
19 #include <math.h>
20 #include <hlogging.h>
21 #include <algorithm>
22 
23 #include "ring_buffer.h"
24 
25 #define FLAGS_sola_ring_buffer 2.0
26 #define FLAGS_sola_enable_correlation true
27 
28 
29 namespace video_editing {
30 
31 // Returns a cross-correlation score for the specified buffers.
Correlate(const float * buffer1,const float * buffer2,int num_frames)32 int SolaAnalyzer::Correlate(const float* buffer1, const float* buffer2,
33                             int num_frames) {
34   CHECK(initialized_);
35 
36   int score = 0;
37   num_frames *= num_channels_;
38   while (num_frames-- > 0) {
39     // Increment the score if the sign bits match.
40     score += ((bit_cast<int32>(*buffer1++) ^ bit_cast<int32>(*buffer2++)) >= 0)
41               ? 1 : 0;
42   }
43   return score;
44 }
45 
46 // Trivial SolaAnalyzer class to bypass correlation.
47 class SolaBypassAnalyzer : public SolaAnalyzer {
48  public:
SolaBypassAnalyzer()49   SolaBypassAnalyzer() { }
Correlate(const float *,const float *,int num_frames)50   virtual int Correlate(const float*, const float*, int num_frames) {
51     return num_frames * num_channels_;
52   }
53 };
54 
55 
56 // Default constructor.
SolaTimeScaler()57 SolaTimeScaler::SolaTimeScaler()
58     : input_buffer_(NULL), output_buffer_(NULL), analyzer_(NULL) {
59   sample_rate_ = 0;
60   num_channels_ = 0;
61 
62   draining_ = false;
63   initialized_ = false;
64 }
65 
~SolaTimeScaler()66 SolaTimeScaler::~SolaTimeScaler() {
67   delete input_buffer_;
68   delete output_buffer_;
69   delete analyzer_;
70 }
71 
72 // Injects a SolaAnalyzer instance for analyzing signal frames.
set_analyzer(SolaAnalyzer * analyzer)73 void SolaTimeScaler::set_analyzer(SolaAnalyzer* analyzer) {
74   MutexLock lock(&mutex_);  // lock out processing while updating
75   delete analyzer_;
76   analyzer_ = analyzer;
77 }
78 
79 // Initializes a SOLA timescaler.
Init(double sample_rate,int num_channels,double initial_speed,double window_duration,double overlap_duration)80 void SolaTimeScaler::Init(double sample_rate,
81                           int num_channels,
82                           double initial_speed,
83                           double window_duration,
84                           double overlap_duration) {
85   MutexLock lock(&mutex_);  // lock out processing while updating
86 
87   sample_rate_ = sample_rate;
88   num_channels_ = num_channels;
89   speed_ = initial_speed;
90   window_duration_ = window_duration;
91   overlap_duration_ = overlap_duration;
92 
93   initialized_ = true;
94   GenerateParameters();
95   Reset();
96 }
97 
98 // Adjusts the rate scaling factor.
set_speed(double speed)99 void SolaTimeScaler::set_speed(double speed) {
100   MutexLock lock(&mutex_);  // lock out processing while updating
101 
102   speed_ = speed;
103   GenerateParameters();
104 }
105 
106 // Generates processing parameters from the current settings.
GenerateParameters()107 void SolaTimeScaler::GenerateParameters() {
108   if (speed_ < 0.1) {
109     LOGE("Requested speed %fx limited to 0.1x", speed_);
110     speed_ = 0.1;
111   } else if (speed_ > 8.0) {
112     LOGE("Requested speed %fx limited to 8.0x", speed_);
113     speed_ = 8.0;
114   }
115 
116   ratio_ = 1.0 / speed_;
117 
118   num_window_frames_ = nearbyint(sample_rate_ * window_duration_);
119 
120   // Limit the overlap to half the window size, and round up to an odd number.
121   // Half of overlap window (rounded down) is also a useful number.
122   overlap_duration_ = min(overlap_duration_, window_duration_ / 2.0);
123   num_overlap_frames_ = nearbyint(sample_rate_ * overlap_duration_);
124   num_overlap_frames_ |= 1;
125   half_overlap_frames_ = num_overlap_frames_ >> 1;
126 
127   if (speed_ >= 1.) {
128     // For compression (speed up), adjacent input windows overlap in the output.
129     input_window_offset_ = num_window_frames_;
130     target_merge_offset_ = nearbyint(num_window_frames_ * ratio_);
131   } else {
132     // For expansion (slow down), each input window start point overlaps the
133     // previous, and they are placed adjacently in the output
134     // (+/- half the overlap size).
135     input_window_offset_ = nearbyint(num_window_frames_ * speed_);
136     target_merge_offset_ = num_window_frames_;
137   }
138 
139   // Make sure we copy enough extra data to be able to perform a
140   // frame correlation over the range of target merge point +/- half overlap,
141   // even when the previous merge point was adjusted backwards a half overlap.
142   max_frames_to_merge_ = max(num_window_frames_,
143       target_merge_offset_ + (2 * num_overlap_frames_));
144   min_output_to_hold_=
145       max_frames_to_merge_ + num_overlap_frames_ - target_merge_offset_;
146 }
147 
148 // The input buffer has one writer and reader.
149 // The output buffer has one reader/updater, and one reader/consumer.
150 static const int kInputReader = 0;
151 static const int kOutputAnalysis = 0;
152 static const int kOutputConsumer = 1;
153 
Reset()154 void SolaTimeScaler::Reset() {
155   CHECK(initialized_);
156   double duration = max(FLAGS_sola_ring_buffer, 20. * window_duration_);
157   draining_ = false;
158 
159   delete input_buffer_;
160   input_buffer_ = new RingBuffer();
161   input_buffer_->Init(static_cast<int>
162       (sample_rate_ * duration), num_channels_, 1);
163 
164   delete output_buffer_;
165   output_buffer_ = new RingBuffer();
166   output_buffer_->Init(static_cast<int>
167       (sample_rate_ * ratio_ * duration), num_channels_, 2);
168 
169   if (analyzer_ == NULL) {
170     if (FLAGS_sola_enable_correlation) {
171       analyzer_ = new SolaAnalyzer();
172     } else {
173       analyzer_ = new SolaBypassAnalyzer();
174     }
175   }
176   analyzer_->Init(sample_rate_, num_channels_);
177 }
178 
179 // Returns the number of frames that the input buffer can accept.
input_limit() const180 int SolaTimeScaler::input_limit() const {
181   CHECK(initialized_);
182   return input_buffer_->overhead();
183 }
184 
185 // Returns the number of available output frames.
available()186 int SolaTimeScaler::available() {
187   CHECK(initialized_);
188 
189   int available = output_buffer_->available(kOutputConsumer);
190   if (available > min_output_to_hold_) {
191     available -= min_output_to_hold_;
192   } else if (draining_) {
193     Process();
194     available = output_buffer_->available(kOutputConsumer);
195     if (available > min_output_to_hold_) {
196       available -= min_output_to_hold_;
197     }
198   } else {
199     available = 0;
200   }
201   return available;
202 }
203 
Drain()204 void SolaTimeScaler::Drain() {
205   CHECK(initialized_);
206 
207   draining_ = true;
208 }
209 
210 
211 // Feeds audio to the timescaler, and processes as much data as possible.
InjectSamples(float * buffer,int num_frames)212 int SolaTimeScaler::InjectSamples(float* buffer, int num_frames) {
213   CHECK(initialized_);
214 
215   // Do not write more frames than the buffer can accept.
216   num_frames = min(input_limit(), num_frames);
217   if (!num_frames) {
218     return 0;
219   }
220 
221   // Copy samples to the input buffer and then process whatever can be consumed.
222   input_buffer_->Write(buffer, num_frames);
223   Process();
224   return num_frames;
225 }
226 
227 // Retrieves audio data from the timescaler.
RetrieveSamples(float * buffer,int num_frames)228 int SolaTimeScaler::RetrieveSamples(float* buffer, int num_frames) {
229   CHECK(initialized_);
230 
231   // Do not read more frames than available.
232   num_frames = min(available(), num_frames);
233   if (!num_frames) {
234     return 0;
235   }
236 
237   output_buffer_->Copy(kOutputConsumer, buffer, num_frames);
238   output_buffer_->Seek(kOutputConsumer,
239                        output_buffer_->Tell(kOutputConsumer) + num_frames);
240 
241   return num_frames;
242 }
243 
244 // Munges input samples to produce output.
Process()245 bool SolaTimeScaler::Process() {
246   CHECK(initialized_);
247   bool generated_data = false;
248 
249   // We can only process data if there is sufficient input available
250   // (or we are draining the latency), and there is sufficient room
251   // for output to be merged.
252   while (((input_buffer_->available(kInputReader) > max_frames_to_merge_) ||
253          draining_) && (output_buffer_->overhead() >= max_frames_to_merge_)) {
254     MutexLock lock(&mutex_);  // lock out updates while processing each window
255 
256     // Determine the number of samples to merge into the output.
257     int input_count =
258         min(input_buffer_->available(kInputReader), max_frames_to_merge_);
259     if (input_count == 0) {
260       break;
261     }
262     // The input reader always points to the next window to process.
263     float* input_pointer = input_buffer_->GetPointer(kInputReader, input_count);
264 
265     // The analysis reader always points to the ideal target merge point,
266     // minus half an overlap window (ie, the starting point for correlation).
267     // That means the available data from that point equals the number
268     // of samples that must be cross-faded.
269     int output_merge_cnt = output_buffer_->available(kOutputAnalysis);
270     float* output_pointer =
271         output_buffer_->GetPointer(kOutputAnalysis, output_merge_cnt);
272 
273     // If there is not enough data to do a proper correlation,
274     // just merge at the ideal target point. Otherwise,
275     // find the best correlation score, working from the center out.
276     int merge_offset = min(output_merge_cnt, half_overlap_frames_);
277 
278     if ((output_merge_cnt >= (2 * num_overlap_frames_)) &&
279         (input_count >= num_overlap_frames_)) {
280       int best_offset = merge_offset;
281       int best_score = 0;
282       int score;
283       for (int i = 0; i <= half_overlap_frames_; ++i) {
284         score = analyzer_->Correlate(input_pointer,
285             output_pointer + ((merge_offset + i) * num_channels_),
286             num_overlap_frames_);
287         if (score > best_score) {
288           best_score = score;
289           best_offset = merge_offset + i;
290           if (score == (num_overlap_frames_ * num_channels_)) {
291             break;  // It doesn't get better than perfect.
292           }
293         }
294         if (i > 0) {
295           score = analyzer_->Correlate(input_pointer,
296               output_pointer + ((merge_offset - i) * num_channels_),
297               num_overlap_frames_);
298           if (score > best_score) {
299             best_score = score;
300             best_offset = merge_offset - i;
301             if (score == (num_overlap_frames_ * num_channels_)) {
302               break;  // It doesn't get better than perfect.
303             }
304           }
305         }
306       }
307       merge_offset = best_offset;
308     } else if ((output_merge_cnt > 0) && !draining_) {
309       LOGE("no correlation performed");
310     }
311 
312     // Crossfade the overlap between input and output, and then
313     // copy in the remaining input.
314     int crossfade_count = max(0, (output_merge_cnt - merge_offset));
315     crossfade_count = min(crossfade_count, input_count);
316     int remaining_count = input_count - crossfade_count;
317 
318     float* merge_pointer = output_pointer + (merge_offset * num_channels_);
319     float flt_count = static_cast<float>(crossfade_count);
320     for (int i = 0; i < crossfade_count; ++i) {
321       // Linear cross-fade, for now.
322       float input_scale = static_cast<float>(i) / flt_count;
323       float output_scale = 1. - input_scale;
324       for (int j = 0; j < num_channels_; ++j) {
325         *merge_pointer = (*merge_pointer * output_scale) +
326                          (*input_pointer++ * input_scale);
327         ++merge_pointer;
328       }
329     }
330     // Copy the merged buffer back into the output, if necessary, and
331     // append the rest of the window.
332     output_buffer_->MergeBack(kOutputAnalysis,
333                               output_pointer, output_merge_cnt);
334     output_buffer_->Write(input_pointer, remaining_count);
335 
336     // Advance the output analysis pointer to the next target merge point,
337     // minus half an overlap window.  The target merge point is always
338     // calculated as a delta from the previous ideal target, not the actual
339     // target, to avoid drift.
340     int output_advance = target_merge_offset_;
341     if (output_merge_cnt < half_overlap_frames_) {
342       // On the first window, back up the pointer for the next correlation.
343       // Thereafter, that compensation is preserved.
344       output_advance -= half_overlap_frames_;
345     }
346 
347     // Don't advance beyond the available data, when finishing up.
348     if (draining_) {
349       output_advance =
350           min(output_advance, output_buffer_->available(kOutputAnalysis));
351     }
352     output_buffer_->Seek(kOutputAnalysis,
353         output_buffer_->Tell(kOutputAnalysis) + output_advance);
354 
355     // Advance the input pointer beyond the frames that are no longer needed.
356     input_buffer_->Seek(kInputReader, input_buffer_->Tell(kInputReader) +
357                         min(input_count, input_window_offset_));
358 
359     if ((crossfade_count + remaining_count) > 0) {
360       generated_data = true;
361     }
362   }  // while (more to process)
363   return generated_data;
364 }
365 
366 }  // namespace video_editing
367