1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_processing/audio_buffer.h"
12 
13 #include "webrtc/common_audio/include/audio_util.h"
14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
16 #include "webrtc/common_audio/channel_buffer.h"
17 #include "webrtc/modules/audio_processing/common.h"
18 
19 namespace webrtc {
20 namespace {
21 
22 const size_t kSamplesPer16kHzChannel = 160;
23 const size_t kSamplesPer32kHzChannel = 320;
24 const size_t kSamplesPer48kHzChannel = 480;
25 
KeyboardChannelIndex(const StreamConfig & stream_config)26 int KeyboardChannelIndex(const StreamConfig& stream_config) {
27   if (!stream_config.has_keyboard()) {
28     assert(false);
29     return 0;
30   }
31 
32   return stream_config.num_channels();
33 }
34 
NumBandsFromSamplesPerChannel(size_t num_frames)35 size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
36   size_t num_bands = 1;
37   if (num_frames == kSamplesPer32kHzChannel ||
38       num_frames == kSamplesPer48kHzChannel) {
39     num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel);
40   }
41   return num_bands;
42 }
43 
44 }  // namespace
45 
AudioBuffer(size_t input_num_frames,size_t num_input_channels,size_t process_num_frames,size_t num_process_channels,size_t output_num_frames)46 AudioBuffer::AudioBuffer(size_t input_num_frames,
47                          size_t num_input_channels,
48                          size_t process_num_frames,
49                          size_t num_process_channels,
50                          size_t output_num_frames)
51   : input_num_frames_(input_num_frames),
52     num_input_channels_(num_input_channels),
53     proc_num_frames_(process_num_frames),
54     num_proc_channels_(num_process_channels),
55     output_num_frames_(output_num_frames),
56     num_channels_(num_process_channels),
57     num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
58     num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
59     mixed_low_pass_valid_(false),
60     reference_copied_(false),
61     activity_(AudioFrame::kVadUnknown),
62     keyboard_data_(NULL),
63     data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {
64   assert(input_num_frames_ > 0);
65   assert(proc_num_frames_ > 0);
66   assert(output_num_frames_ > 0);
67   assert(num_input_channels_ > 0);
68   assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);
69 
70   if (input_num_frames_ != proc_num_frames_ ||
71       output_num_frames_ != proc_num_frames_) {
72     // Create an intermediate buffer for resampling.
73     process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,
74                                                    num_proc_channels_));
75 
76     if (input_num_frames_ != proc_num_frames_) {
77       for (size_t i = 0; i < num_proc_channels_; ++i) {
78         input_resamplers_.push_back(
79             new PushSincResampler(input_num_frames_,
80                                   proc_num_frames_));
81       }
82     }
83 
84     if (output_num_frames_ != proc_num_frames_) {
85       for (size_t i = 0; i < num_proc_channels_; ++i) {
86         output_resamplers_.push_back(
87             new PushSincResampler(proc_num_frames_,
88                                   output_num_frames_));
89       }
90     }
91   }
92 
93   if (num_bands_ > 1) {
94     split_data_.reset(new IFChannelBuffer(proc_num_frames_,
95                                           num_proc_channels_,
96                                           num_bands_));
97     splitting_filter_.reset(new SplittingFilter(num_proc_channels_,
98                                                 num_bands_,
99                                                 proc_num_frames_));
100   }
101 }
102 
~AudioBuffer()103 AudioBuffer::~AudioBuffer() {}
104 
CopyFrom(const float * const * data,const StreamConfig & stream_config)105 void AudioBuffer::CopyFrom(const float* const* data,
106                            const StreamConfig& stream_config) {
107   assert(stream_config.num_frames() == input_num_frames_);
108   assert(stream_config.num_channels() == num_input_channels_);
109   InitForNewData();
110   // Initialized lazily because there's a different condition in
111   // DeinterleaveFrom.
112   const bool need_to_downmix =
113       num_input_channels_ > 1 && num_proc_channels_ == 1;
114   if (need_to_downmix && !input_buffer_) {
115     input_buffer_.reset(
116         new IFChannelBuffer(input_num_frames_, num_proc_channels_));
117   }
118 
119   if (stream_config.has_keyboard()) {
120     keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
121   }
122 
123   // Downmix.
124   const float* const* data_ptr = data;
125   if (need_to_downmix) {
126     DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
127                                 input_buffer_->fbuf()->channels()[0]);
128     data_ptr = input_buffer_->fbuf_const()->channels();
129   }
130 
131   // Resample.
132   if (input_num_frames_ != proc_num_frames_) {
133     for (size_t i = 0; i < num_proc_channels_; ++i) {
134       input_resamplers_[i]->Resample(data_ptr[i],
135                                      input_num_frames_,
136                                      process_buffer_->channels()[i],
137                                      proc_num_frames_);
138     }
139     data_ptr = process_buffer_->channels();
140   }
141 
142   // Convert to the S16 range.
143   for (size_t i = 0; i < num_proc_channels_; ++i) {
144     FloatToFloatS16(data_ptr[i],
145                     proc_num_frames_,
146                     data_->fbuf()->channels()[i]);
147   }
148 }
149 
CopyTo(const StreamConfig & stream_config,float * const * data)150 void AudioBuffer::CopyTo(const StreamConfig& stream_config,
151                          float* const* data) {
152   assert(stream_config.num_frames() == output_num_frames_);
153   assert(stream_config.num_channels() == num_channels_ || num_channels_ == 1);
154 
155   // Convert to the float range.
156   float* const* data_ptr = data;
157   if (output_num_frames_ != proc_num_frames_) {
158     // Convert to an intermediate buffer for subsequent resampling.
159     data_ptr = process_buffer_->channels();
160   }
161   for (size_t i = 0; i < num_channels_; ++i) {
162     FloatS16ToFloat(data_->fbuf()->channels()[i],
163                     proc_num_frames_,
164                     data_ptr[i]);
165   }
166 
167   // Resample.
168   if (output_num_frames_ != proc_num_frames_) {
169     for (size_t i = 0; i < num_channels_; ++i) {
170       output_resamplers_[i]->Resample(data_ptr[i],
171                                       proc_num_frames_,
172                                       data[i],
173                                       output_num_frames_);
174     }
175   }
176 
177   // Upmix.
178   for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) {
179     memcpy(data[i], data[0], output_num_frames_ * sizeof(**data));
180   }
181 }
182 
InitForNewData()183 void AudioBuffer::InitForNewData() {
184   keyboard_data_ = NULL;
185   mixed_low_pass_valid_ = false;
186   reference_copied_ = false;
187   activity_ = AudioFrame::kVadUnknown;
188   num_channels_ = num_proc_channels_;
189 }
190 
channels_const() const191 const int16_t* const* AudioBuffer::channels_const() const {
192   return data_->ibuf_const()->channels();
193 }
194 
channels()195 int16_t* const* AudioBuffer::channels() {
196   mixed_low_pass_valid_ = false;
197   return data_->ibuf()->channels();
198 }
199 
split_bands_const(size_t channel) const200 const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const {
201   return split_data_.get() ?
202          split_data_->ibuf_const()->bands(channel) :
203          data_->ibuf_const()->bands(channel);
204 }
205 
split_bands(size_t channel)206 int16_t* const* AudioBuffer::split_bands(size_t channel) {
207   mixed_low_pass_valid_ = false;
208   return split_data_.get() ?
209          split_data_->ibuf()->bands(channel) :
210          data_->ibuf()->bands(channel);
211 }
212 
split_channels_const(Band band) const213 const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
214   if (split_data_.get()) {
215     return split_data_->ibuf_const()->channels(band);
216   } else {
217     return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
218   }
219 }
220 
split_channels(Band band)221 int16_t* const* AudioBuffer::split_channels(Band band) {
222   mixed_low_pass_valid_ = false;
223   if (split_data_.get()) {
224     return split_data_->ibuf()->channels(band);
225   } else {
226     return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
227   }
228 }
229 
data()230 ChannelBuffer<int16_t>* AudioBuffer::data() {
231   mixed_low_pass_valid_ = false;
232   return data_->ibuf();
233 }
234 
data() const235 const ChannelBuffer<int16_t>* AudioBuffer::data() const {
236   return data_->ibuf_const();
237 }
238 
split_data()239 ChannelBuffer<int16_t>* AudioBuffer::split_data() {
240   mixed_low_pass_valid_ = false;
241   return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
242 }
243 
split_data() const244 const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
245   return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
246 }
247 
channels_const_f() const248 const float* const* AudioBuffer::channels_const_f() const {
249   return data_->fbuf_const()->channels();
250 }
251 
channels_f()252 float* const* AudioBuffer::channels_f() {
253   mixed_low_pass_valid_ = false;
254   return data_->fbuf()->channels();
255 }
256 
split_bands_const_f(size_t channel) const257 const float* const* AudioBuffer::split_bands_const_f(size_t channel) const {
258   return split_data_.get() ?
259          split_data_->fbuf_const()->bands(channel) :
260          data_->fbuf_const()->bands(channel);
261 }
262 
split_bands_f(size_t channel)263 float* const* AudioBuffer::split_bands_f(size_t channel) {
264   mixed_low_pass_valid_ = false;
265   return split_data_.get() ?
266          split_data_->fbuf()->bands(channel) :
267          data_->fbuf()->bands(channel);
268 }
269 
split_channels_const_f(Band band) const270 const float* const* AudioBuffer::split_channels_const_f(Band band) const {
271   if (split_data_.get()) {
272     return split_data_->fbuf_const()->channels(band);
273   } else {
274     return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
275   }
276 }
277 
split_channels_f(Band band)278 float* const* AudioBuffer::split_channels_f(Band band) {
279   mixed_low_pass_valid_ = false;
280   if (split_data_.get()) {
281     return split_data_->fbuf()->channels(band);
282   } else {
283     return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
284   }
285 }
286 
data_f()287 ChannelBuffer<float>* AudioBuffer::data_f() {
288   mixed_low_pass_valid_ = false;
289   return data_->fbuf();
290 }
291 
data_f() const292 const ChannelBuffer<float>* AudioBuffer::data_f() const {
293   return data_->fbuf_const();
294 }
295 
split_data_f()296 ChannelBuffer<float>* AudioBuffer::split_data_f() {
297   mixed_low_pass_valid_ = false;
298   return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
299 }
300 
split_data_f() const301 const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
302   return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
303 }
304 
mixed_low_pass_data()305 const int16_t* AudioBuffer::mixed_low_pass_data() {
306   if (num_proc_channels_ == 1) {
307     return split_bands_const(0)[kBand0To8kHz];
308   }
309 
310   if (!mixed_low_pass_valid_) {
311     if (!mixed_low_pass_channels_.get()) {
312       mixed_low_pass_channels_.reset(
313           new ChannelBuffer<int16_t>(num_split_frames_, 1));
314     }
315 
316     DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
317                                     num_split_frames_, num_channels_,
318                                     mixed_low_pass_channels_->channels()[0]);
319     mixed_low_pass_valid_ = true;
320   }
321   return mixed_low_pass_channels_->channels()[0];
322 }
323 
low_pass_reference(int channel) const324 const int16_t* AudioBuffer::low_pass_reference(int channel) const {
325   if (!reference_copied_) {
326     return NULL;
327   }
328 
329   return low_pass_reference_channels_->channels()[channel];
330 }
331 
keyboard_data() const332 const float* AudioBuffer::keyboard_data() const {
333   return keyboard_data_;
334 }
335 
set_activity(AudioFrame::VADActivity activity)336 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
337   activity_ = activity;
338 }
339 
activity() const340 AudioFrame::VADActivity AudioBuffer::activity() const {
341   return activity_;
342 }
343 
num_channels() const344 size_t AudioBuffer::num_channels() const {
345   return num_channels_;
346 }
347 
set_num_channels(size_t num_channels)348 void AudioBuffer::set_num_channels(size_t num_channels) {
349   num_channels_ = num_channels;
350 }
351 
num_frames() const352 size_t AudioBuffer::num_frames() const {
353   return proc_num_frames_;
354 }
355 
num_frames_per_band() const356 size_t AudioBuffer::num_frames_per_band() const {
357   return num_split_frames_;
358 }
359 
num_keyboard_frames() const360 size_t AudioBuffer::num_keyboard_frames() const {
361   // We don't resample the keyboard channel.
362   return input_num_frames_;
363 }
364 
num_bands() const365 size_t AudioBuffer::num_bands() const {
366   return num_bands_;
367 }
368 
369 // The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
DeinterleaveFrom(AudioFrame * frame)370 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
371   assert(frame->num_channels_ == num_input_channels_);
372   assert(frame->samples_per_channel_ == input_num_frames_);
373   InitForNewData();
374   // Initialized lazily because there's a different condition in CopyFrom.
375   if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
376     input_buffer_.reset(
377         new IFChannelBuffer(input_num_frames_, num_proc_channels_));
378   }
379   activity_ = frame->vad_activity_;
380 
381   int16_t* const* deinterleaved;
382   if (input_num_frames_ == proc_num_frames_) {
383     deinterleaved = data_->ibuf()->channels();
384   } else {
385     deinterleaved = input_buffer_->ibuf()->channels();
386   }
387   if (num_proc_channels_ == 1) {
388     // Downmix and deinterleave simultaneously.
389     DownmixInterleavedToMono(frame->data_, input_num_frames_,
390                              num_input_channels_, deinterleaved[0]);
391   } else {
392     assert(num_proc_channels_ == num_input_channels_);
393     Deinterleave(frame->data_,
394                  input_num_frames_,
395                  num_proc_channels_,
396                  deinterleaved);
397   }
398 
399   // Resample.
400   if (input_num_frames_ != proc_num_frames_) {
401     for (size_t i = 0; i < num_proc_channels_; ++i) {
402       input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i],
403                                      input_num_frames_,
404                                      data_->fbuf()->channels()[i],
405                                      proc_num_frames_);
406     }
407   }
408 }
409 
InterleaveTo(AudioFrame * frame,bool data_changed)410 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) {
411   frame->vad_activity_ = activity_;
412   if (!data_changed) {
413     return;
414   }
415 
416   assert(frame->num_channels_ == num_channels_ || num_channels_ == 1);
417   assert(frame->samples_per_channel_ == output_num_frames_);
418 
419   // Resample if necessary.
420   IFChannelBuffer* data_ptr = data_.get();
421   if (proc_num_frames_ != output_num_frames_) {
422     if (!output_buffer_) {
423       output_buffer_.reset(
424           new IFChannelBuffer(output_num_frames_, num_channels_));
425     }
426     for (size_t i = 0; i < num_channels_; ++i) {
427       output_resamplers_[i]->Resample(
428           data_->fbuf()->channels()[i], proc_num_frames_,
429           output_buffer_->fbuf()->channels()[i], output_num_frames_);
430     }
431     data_ptr = output_buffer_.get();
432   }
433 
434   if (frame->num_channels_ == num_channels_) {
435     Interleave(data_ptr->ibuf()->channels(), proc_num_frames_, num_channels_,
436                frame->data_);
437   } else {
438     UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], proc_num_frames_,
439                            frame->num_channels_, frame->data_);
440   }
441 }
442 
CopyLowPassToReference()443 void AudioBuffer::CopyLowPassToReference() {
444   reference_copied_ = true;
445   if (!low_pass_reference_channels_.get() ||
446       low_pass_reference_channels_->num_channels() != num_channels_) {
447     low_pass_reference_channels_.reset(
448         new ChannelBuffer<int16_t>(num_split_frames_,
449                                    num_proc_channels_));
450   }
451   for (size_t i = 0; i < num_proc_channels_; i++) {
452     memcpy(low_pass_reference_channels_->channels()[i],
453            split_bands_const(i)[kBand0To8kHz],
454            low_pass_reference_channels_->num_frames_per_band() *
455                sizeof(split_bands_const(i)[kBand0To8kHz][0]));
456   }
457 }
458 
SplitIntoFrequencyBands()459 void AudioBuffer::SplitIntoFrequencyBands() {
460   splitting_filter_->Analysis(data_.get(), split_data_.get());
461 }
462 
MergeFrequencyBands()463 void AudioBuffer::MergeFrequencyBands() {
464   splitting_filter_->Synthesis(split_data_.get(), data_.get());
465 }
466 
467 }  // namespace webrtc
468