1 /*
2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "audio_processing_impl.h"
12 
13 #include <assert.h>
14 
15 #include "audio_buffer.h"
16 #include "critical_section_wrapper.h"
17 #include "echo_cancellation_impl.h"
18 #include "echo_control_mobile_impl.h"
19 #include "file_wrapper.h"
20 #include "high_pass_filter_impl.h"
21 #include "gain_control_impl.h"
22 #include "level_estimator_impl.h"
23 #include "module_common_types.h"
24 #include "noise_suppression_impl.h"
25 #include "processing_component.h"
26 #include "splitting_filter.h"
27 #include "voice_detection_impl.h"
28 
29 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
30 // Files generated at build-time by the protobuf compiler.
31 #ifdef WEBRTC_ANDROID
32 #include "external/webrtc/src/modules/audio_processing/debug.pb.h"
33 #else
34 #include "webrtc/audio_processing/debug.pb.h"
35 #endif
36 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
37 
38 namespace webrtc {
Create(int id)39 AudioProcessing* AudioProcessing::Create(int id) {
40   /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
41              webrtc::kTraceAudioProcessing,
42              id,
43              "AudioProcessing::Create()");*/
44 
45   AudioProcessingImpl* apm = new AudioProcessingImpl(id);
46   if (apm->Initialize() != kNoError) {
47     delete apm;
48     apm = NULL;
49   }
50 
51   return apm;
52 }
53 
Destroy(AudioProcessing * apm)54 void AudioProcessing::Destroy(AudioProcessing* apm) {
55   delete static_cast<AudioProcessingImpl*>(apm);
56 }
57 
AudioProcessingImpl(int id)58 AudioProcessingImpl::AudioProcessingImpl(int id)
59     : id_(id),
60       echo_cancellation_(NULL),
61       echo_control_mobile_(NULL),
62       gain_control_(NULL),
63       high_pass_filter_(NULL),
64       level_estimator_(NULL),
65       noise_suppression_(NULL),
66       voice_detection_(NULL),
67       crit_(CriticalSectionWrapper::CreateCriticalSection()),
68       render_audio_(NULL),
69       capture_audio_(NULL),
70 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
71       debug_file_(FileWrapper::Create()),
72       event_msg_(new audioproc::Event()),
73 #endif
74       sample_rate_hz_(kSampleRate16kHz),
75       split_sample_rate_hz_(kSampleRate16kHz),
76       samples_per_channel_(sample_rate_hz_ / 100),
77       stream_delay_ms_(0),
78       was_stream_delay_set_(false),
79       num_reverse_channels_(1),
80       num_input_channels_(1),
81       num_output_channels_(1) {
82 
83   echo_cancellation_ = new EchoCancellationImpl(this);
84   component_list_.push_back(echo_cancellation_);
85 
86   echo_control_mobile_ = new EchoControlMobileImpl(this);
87   component_list_.push_back(echo_control_mobile_);
88 
89   gain_control_ = new GainControlImpl(this);
90   component_list_.push_back(gain_control_);
91 
92   high_pass_filter_ = new HighPassFilterImpl(this);
93   component_list_.push_back(high_pass_filter_);
94 
95   level_estimator_ = new LevelEstimatorImpl(this);
96   component_list_.push_back(level_estimator_);
97 
98   noise_suppression_ = new NoiseSuppressionImpl(this);
99   component_list_.push_back(noise_suppression_);
100 
101   voice_detection_ = new VoiceDetectionImpl(this);
102   component_list_.push_back(voice_detection_);
103 }
104 
~AudioProcessingImpl()105 AudioProcessingImpl::~AudioProcessingImpl() {
106   while (!component_list_.empty()) {
107     ProcessingComponent* component = component_list_.front();
108     component->Destroy();
109     delete component;
110     component_list_.pop_front();
111   }
112 
113 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
114   if (debug_file_->Open()) {
115     debug_file_->CloseFile();
116   }
117 #endif
118 
119   delete crit_;
120   crit_ = NULL;
121 
122   if (render_audio_) {
123     delete render_audio_;
124     render_audio_ = NULL;
125   }
126 
127   if (capture_audio_) {
128     delete capture_audio_;
129     capture_audio_ = NULL;
130   }
131 }
132 
crit() const133 CriticalSectionWrapper* AudioProcessingImpl::crit() const {
134   return crit_;
135 }
136 
split_sample_rate_hz() const137 int AudioProcessingImpl::split_sample_rate_hz() const {
138   return split_sample_rate_hz_;
139 }
140 
Initialize()141 int AudioProcessingImpl::Initialize() {
142   CriticalSectionScoped crit_scoped(*crit_);
143   return InitializeLocked();
144 }
145 
InitializeLocked()146 int AudioProcessingImpl::InitializeLocked() {
147   if (render_audio_ != NULL) {
148     delete render_audio_;
149     render_audio_ = NULL;
150   }
151 
152   if (capture_audio_ != NULL) {
153     delete capture_audio_;
154     capture_audio_ = NULL;
155   }
156 
157   render_audio_ = new AudioBuffer(num_reverse_channels_,
158                                   samples_per_channel_);
159   capture_audio_ = new AudioBuffer(num_input_channels_,
160                                    samples_per_channel_);
161 
162   was_stream_delay_set_ = false;
163 
164   // Initialize all components.
165   std::list<ProcessingComponent*>::iterator it;
166   for (it = component_list_.begin(); it != component_list_.end(); it++) {
167     int err = (*it)->Initialize();
168     if (err != kNoError) {
169       return err;
170     }
171   }
172 
173 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
174   if (debug_file_->Open()) {
175     int err = WriteInitMessage();
176     if (err != kNoError) {
177       return err;
178     }
179   }
180 #endif
181 
182   return kNoError;
183 }
184 
set_sample_rate_hz(int rate)185 int AudioProcessingImpl::set_sample_rate_hz(int rate) {
186   CriticalSectionScoped crit_scoped(*crit_);
187   if (rate != kSampleRate8kHz &&
188       rate != kSampleRate16kHz &&
189       rate != kSampleRate32kHz) {
190     return kBadParameterError;
191   }
192 
193   sample_rate_hz_ = rate;
194   samples_per_channel_ = rate / 100;
195 
196   if (sample_rate_hz_ == kSampleRate32kHz) {
197     split_sample_rate_hz_ = kSampleRate16kHz;
198   } else {
199     split_sample_rate_hz_ = sample_rate_hz_;
200   }
201 
202   return InitializeLocked();
203 }
204 
sample_rate_hz() const205 int AudioProcessingImpl::sample_rate_hz() const {
206   return sample_rate_hz_;
207 }
208 
set_num_reverse_channels(int channels)209 int AudioProcessingImpl::set_num_reverse_channels(int channels) {
210   CriticalSectionScoped crit_scoped(*crit_);
211   // Only stereo supported currently.
212   if (channels > 2 || channels < 1) {
213     return kBadParameterError;
214   }
215 
216   num_reverse_channels_ = channels;
217 
218   return InitializeLocked();
219 }
220 
num_reverse_channels() const221 int AudioProcessingImpl::num_reverse_channels() const {
222   return num_reverse_channels_;
223 }
224 
set_num_channels(int input_channels,int output_channels)225 int AudioProcessingImpl::set_num_channels(
226     int input_channels,
227     int output_channels) {
228   CriticalSectionScoped crit_scoped(*crit_);
229   if (output_channels > input_channels) {
230     return kBadParameterError;
231   }
232 
233   // Only stereo supported currently.
234   if (input_channels > 2 || input_channels < 1) {
235     return kBadParameterError;
236   }
237 
238   if (output_channels > 2 || output_channels < 1) {
239     return kBadParameterError;
240   }
241 
242   num_input_channels_ = input_channels;
243   num_output_channels_ = output_channels;
244 
245   return InitializeLocked();
246 }
247 
num_input_channels() const248 int AudioProcessingImpl::num_input_channels() const {
249   return num_input_channels_;
250 }
251 
num_output_channels() const252 int AudioProcessingImpl::num_output_channels() const {
253   return num_output_channels_;
254 }
255 
ProcessStream(AudioFrame * frame)256 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
257   CriticalSectionScoped crit_scoped(*crit_);
258   int err = kNoError;
259 
260   if (frame == NULL) {
261     return kNullPointerError;
262   }
263 
264   if (frame->_frequencyInHz != sample_rate_hz_) {
265     return kBadSampleRateError;
266   }
267 
268   if (frame->_audioChannel != num_input_channels_) {
269     return kBadNumberChannelsError;
270   }
271 
272   if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
273     return kBadDataLengthError;
274   }
275 
276 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
277   if (debug_file_->Open()) {
278     event_msg_->set_type(audioproc::Event::STREAM);
279     audioproc::Stream* msg = event_msg_->mutable_stream();
280     const size_t data_size = sizeof(int16_t) *
281                              frame->_payloadDataLengthInSamples *
282                              frame->_audioChannel;
283     msg->set_input_data(frame->_payloadData, data_size);
284     msg->set_delay(stream_delay_ms_);
285     msg->set_drift(echo_cancellation_->stream_drift_samples());
286     msg->set_level(gain_control_->stream_analog_level());
287   }
288 #endif
289 
290   capture_audio_->DeinterleaveFrom(frame);
291 
292   // TODO(ajm): experiment with mixing and AEC placement.
293   if (num_output_channels_ < num_input_channels_) {
294     capture_audio_->Mix(num_output_channels_);
295     frame->_audioChannel = num_output_channels_;
296   }
297 
298   bool data_changed = stream_data_changed();
299   if (analysis_needed(data_changed)) {
300     for (int i = 0; i < num_output_channels_; i++) {
301       // Split into a low and high band.
302       SplittingFilterAnalysis(capture_audio_->data(i),
303                               capture_audio_->low_pass_split_data(i),
304                               capture_audio_->high_pass_split_data(i),
305                               capture_audio_->analysis_filter_state1(i),
306                               capture_audio_->analysis_filter_state2(i));
307     }
308   }
309 
310   err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
311   if (err != kNoError) {
312     return err;
313   }
314 
315   err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
316   if (err != kNoError) {
317     return err;
318   }
319 
320   err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
321   if (err != kNoError) {
322     return err;
323   }
324 
325   if (echo_control_mobile_->is_enabled() &&
326       noise_suppression_->is_enabled()) {
327     capture_audio_->CopyLowPassToReference();
328   }
329 
330   err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
331   if (err != kNoError) {
332     return err;
333   }
334 
335   err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
336   if (err != kNoError) {
337     return err;
338   }
339 
340   err = voice_detection_->ProcessCaptureAudio(capture_audio_);
341   if (err != kNoError) {
342     return err;
343   }
344 
345   err = gain_control_->ProcessCaptureAudio(capture_audio_);
346   if (err != kNoError) {
347     return err;
348   }
349 
350   if (synthesis_needed(data_changed)) {
351     for (int i = 0; i < num_output_channels_; i++) {
352       // Recombine low and high bands.
353       SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i),
354                                capture_audio_->high_pass_split_data(i),
355                                capture_audio_->data(i),
356                                capture_audio_->synthesis_filter_state1(i),
357                                capture_audio_->synthesis_filter_state2(i));
358     }
359   }
360 
361   // The level estimator operates on the recombined data.
362   err = level_estimator_->ProcessStream(capture_audio_);
363   if (err != kNoError) {
364     return err;
365   }
366 
367   capture_audio_->InterleaveTo(frame, data_changed);
368 
369 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
370   if (debug_file_->Open()) {
371     audioproc::Stream* msg = event_msg_->mutable_stream();
372     const size_t data_size = sizeof(int16_t) *
373                              frame->_payloadDataLengthInSamples *
374                              frame->_audioChannel;
375     msg->set_output_data(frame->_payloadData, data_size);
376     err = WriteMessageToDebugFile();
377     if (err != kNoError) {
378       return err;
379     }
380   }
381 #endif
382 
383   was_stream_delay_set_ = false;
384   return kNoError;
385 }
386 
AnalyzeReverseStream(AudioFrame * frame)387 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
388   CriticalSectionScoped crit_scoped(*crit_);
389   int err = kNoError;
390 
391   if (frame == NULL) {
392     return kNullPointerError;
393   }
394 
395   if (frame->_frequencyInHz != sample_rate_hz_) {
396     return kBadSampleRateError;
397   }
398 
399   if (frame->_audioChannel != num_reverse_channels_) {
400     return kBadNumberChannelsError;
401   }
402 
403   if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
404     return kBadDataLengthError;
405   }
406 
407 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
408   if (debug_file_->Open()) {
409     event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
410     audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
411     const size_t data_size = sizeof(int16_t) *
412                              frame->_payloadDataLengthInSamples *
413                              frame->_audioChannel;
414     msg->set_data(frame->_payloadData, data_size);
415     err = WriteMessageToDebugFile();
416     if (err != kNoError) {
417       return err;
418     }
419   }
420 #endif
421 
422   render_audio_->DeinterleaveFrom(frame);
423 
424   // TODO(ajm): turn the splitting filter into a component?
425   if (sample_rate_hz_ == kSampleRate32kHz) {
426     for (int i = 0; i < num_reverse_channels_; i++) {
427       // Split into low and high band.
428       SplittingFilterAnalysis(render_audio_->data(i),
429                               render_audio_->low_pass_split_data(i),
430                               render_audio_->high_pass_split_data(i),
431                               render_audio_->analysis_filter_state1(i),
432                               render_audio_->analysis_filter_state2(i));
433     }
434   }
435 
436   // TODO(ajm): warnings possible from components?
437   err = echo_cancellation_->ProcessRenderAudio(render_audio_);
438   if (err != kNoError) {
439     return err;
440   }
441 
442   err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
443   if (err != kNoError) {
444     return err;
445   }
446 
447   err = gain_control_->ProcessRenderAudio(render_audio_);
448   if (err != kNoError) {
449     return err;
450   }
451 
452   return err;  // TODO(ajm): this is for returning warnings; necessary?
453 }
454 
set_stream_delay_ms(int delay)455 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
456   was_stream_delay_set_ = true;
457   if (delay < 0) {
458     return kBadParameterError;
459   }
460 
461   // TODO(ajm): the max is rather arbitrarily chosen; investigate.
462   if (delay > 500) {
463     stream_delay_ms_ = 500;
464     return kBadStreamParameterWarning;
465   }
466 
467   stream_delay_ms_ = delay;
468   return kNoError;
469 }
470 
stream_delay_ms() const471 int AudioProcessingImpl::stream_delay_ms() const {
472   return stream_delay_ms_;
473 }
474 
was_stream_delay_set() const475 bool AudioProcessingImpl::was_stream_delay_set() const {
476   return was_stream_delay_set_;
477 }
478 
StartDebugRecording(const char filename[AudioProcessing::kMaxFilenameSize])479 int AudioProcessingImpl::StartDebugRecording(
480     const char filename[AudioProcessing::kMaxFilenameSize]) {
481   CriticalSectionScoped crit_scoped(*crit_);
482   assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize);
483 
484   if (filename == NULL) {
485     return kNullPointerError;
486   }
487 
488 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
489   // Stop any ongoing recording.
490   if (debug_file_->Open()) {
491     if (debug_file_->CloseFile() == -1) {
492       return kFileError;
493     }
494   }
495 
496   if (debug_file_->OpenFile(filename, false) == -1) {
497     debug_file_->CloseFile();
498     return kFileError;
499   }
500 
501   int err = WriteInitMessage();
502   if (err != kNoError) {
503     return err;
504   }
505   return kNoError;
506 #else
507   return kUnsupportedFunctionError;
508 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
509 }
510 
StopDebugRecording()511 int AudioProcessingImpl::StopDebugRecording() {
512   CriticalSectionScoped crit_scoped(*crit_);
513 
514 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
515   // We just return if recording hasn't started.
516   if (debug_file_->Open()) {
517     if (debug_file_->CloseFile() == -1) {
518       return kFileError;
519     }
520   }
521   return kNoError;
522 #else
523   return kUnsupportedFunctionError;
524 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
525 }
526 
echo_cancellation() const527 EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
528   return echo_cancellation_;
529 }
530 
echo_control_mobile() const531 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
532   return echo_control_mobile_;
533 }
534 
gain_control() const535 GainControl* AudioProcessingImpl::gain_control() const {
536   return gain_control_;
537 }
538 
high_pass_filter() const539 HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
540   return high_pass_filter_;
541 }
542 
level_estimator() const543 LevelEstimator* AudioProcessingImpl::level_estimator() const {
544   return level_estimator_;
545 }
546 
noise_suppression() const547 NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
548   return noise_suppression_;
549 }
550 
voice_detection() const551 VoiceDetection* AudioProcessingImpl::voice_detection() const {
552   return voice_detection_;
553 }
554 
ChangeUniqueId(const WebRtc_Word32 id)555 WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) {
556   CriticalSectionScoped crit_scoped(*crit_);
557   /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
558              webrtc::kTraceAudioProcessing,
559              id_,
560              "ChangeUniqueId(new id = %d)",
561              id);*/
562   id_ = id;
563 
564   return kNoError;
565 }
566 
stream_data_changed() const567 bool AudioProcessingImpl::stream_data_changed() const {
568   int enabled_count = 0;
569   std::list<ProcessingComponent*>::const_iterator it;
570   for (it = component_list_.begin(); it != component_list_.end(); it++) {
571     if ((*it)->is_component_enabled()) {
572       enabled_count++;
573     }
574   }
575 
576   // Data is unchanged if no components are enabled, or if only level_estimator_
577   // or voice_detection_ is enabled.
578   if (enabled_count == 0) {
579     return false;
580   } else if (enabled_count == 1) {
581     if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) {
582       return false;
583     }
584   } else if (enabled_count == 2) {
585     if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) {
586       return false;
587     }
588   }
589   return true;
590 }
591 
synthesis_needed(bool stream_data_changed) const592 bool AudioProcessingImpl::synthesis_needed(bool stream_data_changed) const {
593   return (stream_data_changed && sample_rate_hz_ == kSampleRate32kHz);
594 }
595 
analysis_needed(bool stream_data_changed) const596 bool AudioProcessingImpl::analysis_needed(bool stream_data_changed) const {
597   if (!stream_data_changed && !voice_detection_->is_enabled()) {
598     // Only level_estimator_ is enabled.
599     return false;
600   } else if (sample_rate_hz_ == kSampleRate32kHz) {
601     // Something besides level_estimator_ is enabled, and we have super-wb.
602     return true;
603   }
604   return false;
605 }
606 
607 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
WriteMessageToDebugFile()608 int AudioProcessingImpl::WriteMessageToDebugFile() {
609   int32_t size = event_msg_->ByteSize();
610   if (size <= 0) {
611     return kUnspecifiedError;
612   }
613 #if defined(WEBRTC_BIG_ENDIAN)
614   // TODO(ajm): Use little-endian "on the wire". For the moment, we can be
615   //            pretty safe in assuming little-endian.
616 #endif
617 
618   if (!event_msg_->SerializeToString(&event_str_)) {
619     return kUnspecifiedError;
620   }
621 
622   // Write message preceded by its size.
623   if (!debug_file_->Write(&size, sizeof(int32_t))) {
624     return kFileError;
625   }
626   if (!debug_file_->Write(event_str_.data(), event_str_.length())) {
627     return kFileError;
628   }
629 
630   event_msg_->Clear();
631 
632   return 0;
633 }
634 
WriteInitMessage()635 int AudioProcessingImpl::WriteInitMessage() {
636   event_msg_->set_type(audioproc::Event::INIT);
637   audioproc::Init* msg = event_msg_->mutable_init();
638   msg->set_sample_rate(sample_rate_hz_);
639   msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz());
640   msg->set_num_input_channels(num_input_channels_);
641   msg->set_num_output_channels(num_output_channels_);
642   msg->set_num_reverse_channels(num_reverse_channels_);
643 
644   int err = WriteMessageToDebugFile();
645   if (err != kNoError) {
646     return err;
647   }
648 
649   return kNoError;
650 }
651 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
652 }  // namespace webrtc
653