1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_processing/audio_processing_impl.h"
12 
13 #include <assert.h>
14 #include <algorithm>
15 
16 #include "webrtc/base/checks.h"
17 #include "webrtc/base/platform_file.h"
18 #include "webrtc/base/trace_event.h"
19 #include "webrtc/common_audio/audio_converter.h"
20 #include "webrtc/common_audio/channel_buffer.h"
21 #include "webrtc/common_audio/include/audio_util.h"
22 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
23 extern "C" {
24 #include "webrtc/modules/audio_processing/aec/aec_core.h"
25 }
26 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
27 #include "webrtc/modules/audio_processing/audio_buffer.h"
28 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
29 #include "webrtc/modules/audio_processing/common.h"
30 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
31 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
32 #include "webrtc/modules/audio_processing/gain_control_impl.h"
33 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
34 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
35 #include "webrtc/modules/audio_processing/level_estimator_impl.h"
36 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"
37 #include "webrtc/modules/audio_processing/processing_component.h"
38 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
39 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
40 #include "webrtc/modules/include/module_common_types.h"
41 #include "webrtc/system_wrappers/include/file_wrapper.h"
42 #include "webrtc/system_wrappers/include/logging.h"
43 #include "webrtc/system_wrappers/include/metrics.h"
44 
45 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
46 // Files generated at build-time by the protobuf compiler.
47 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
48 #include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h"
49 #else
50 #include "webrtc/audio_processing/debug.pb.h"
51 #endif
52 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
53 
54 #define RETURN_ON_ERR(expr) \
55   do {                      \
56     int err = (expr);       \
57     if (err != kNoError) {  \
58       return err;           \
59     }                       \
60   } while (0)
61 
62 namespace webrtc {
63 namespace {
64 
LayoutHasKeyboard(AudioProcessing::ChannelLayout layout)65 static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
66   switch (layout) {
67     case AudioProcessing::kMono:
68     case AudioProcessing::kStereo:
69       return false;
70     case AudioProcessing::kMonoAndKeyboard:
71     case AudioProcessing::kStereoAndKeyboard:
72       return true;
73   }
74 
75   assert(false);
76   return false;
77 }
78 }  // namespace
79 
80 // Throughout webrtc, it's assumed that success is represented by zero.
81 static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero");
82 
83 // This class has two main functionalities:
84 //
85 // 1) It is returned instead of the real GainControl after the new AGC has been
86 //    enabled in order to prevent an outside user from overriding compression
87 //    settings. It doesn't do anything in its implementation, except for
88 //    delegating the const methods and Enable calls to the real GainControl, so
89 //    AGC can still be disabled.
90 //
91 // 2) It is injected into AgcManagerDirect and implements volume callbacks for
92 //    getting and setting the volume level. It just caches this value to be used
93 //    in VoiceEngine later.
94 class GainControlForNewAgc : public GainControl, public VolumeCallbacks {
95  public:
GainControlForNewAgc(GainControlImpl * gain_control)96   explicit GainControlForNewAgc(GainControlImpl* gain_control)
97       : real_gain_control_(gain_control), volume_(0) {}
98 
99   // GainControl implementation.
Enable(bool enable)100   int Enable(bool enable) override {
101     return real_gain_control_->Enable(enable);
102   }
is_enabled() const103   bool is_enabled() const override { return real_gain_control_->is_enabled(); }
set_stream_analog_level(int level)104   int set_stream_analog_level(int level) override {
105     volume_ = level;
106     return AudioProcessing::kNoError;
107   }
stream_analog_level()108   int stream_analog_level() override { return volume_; }
set_mode(Mode mode)109   int set_mode(Mode mode) override { return AudioProcessing::kNoError; }
mode() const110   Mode mode() const override { return GainControl::kAdaptiveAnalog; }
set_target_level_dbfs(int level)111   int set_target_level_dbfs(int level) override {
112     return AudioProcessing::kNoError;
113   }
target_level_dbfs() const114   int target_level_dbfs() const override {
115     return real_gain_control_->target_level_dbfs();
116   }
set_compression_gain_db(int gain)117   int set_compression_gain_db(int gain) override {
118     return AudioProcessing::kNoError;
119   }
compression_gain_db() const120   int compression_gain_db() const override {
121     return real_gain_control_->compression_gain_db();
122   }
enable_limiter(bool enable)123   int enable_limiter(bool enable) override { return AudioProcessing::kNoError; }
is_limiter_enabled() const124   bool is_limiter_enabled() const override {
125     return real_gain_control_->is_limiter_enabled();
126   }
set_analog_level_limits(int minimum,int maximum)127   int set_analog_level_limits(int minimum, int maximum) override {
128     return AudioProcessing::kNoError;
129   }
analog_level_minimum() const130   int analog_level_minimum() const override {
131     return real_gain_control_->analog_level_minimum();
132   }
analog_level_maximum() const133   int analog_level_maximum() const override {
134     return real_gain_control_->analog_level_maximum();
135   }
stream_is_saturated() const136   bool stream_is_saturated() const override {
137     return real_gain_control_->stream_is_saturated();
138   }
139 
140   // VolumeCallbacks implementation.
SetMicVolume(int volume)141   void SetMicVolume(int volume) override { volume_ = volume; }
GetMicVolume()142   int GetMicVolume() override { return volume_; }
143 
144  private:
145   GainControl* real_gain_control_;
146   int volume_;
147 };
148 
149 struct AudioProcessingImpl::ApmPublicSubmodules {
ApmPublicSubmoduleswebrtc::AudioProcessingImpl::ApmPublicSubmodules150   ApmPublicSubmodules()
151       : echo_cancellation(nullptr),
152         echo_control_mobile(nullptr),
153         gain_control(nullptr) {}
154   // Accessed externally of APM without any lock acquired.
155   EchoCancellationImpl* echo_cancellation;
156   EchoControlMobileImpl* echo_control_mobile;
157   GainControlImpl* gain_control;
158   rtc::scoped_ptr<HighPassFilterImpl> high_pass_filter;
159   rtc::scoped_ptr<LevelEstimatorImpl> level_estimator;
160   rtc::scoped_ptr<NoiseSuppressionImpl> noise_suppression;
161   rtc::scoped_ptr<VoiceDetectionImpl> voice_detection;
162   rtc::scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc;
163 
164   // Accessed internally from both render and capture.
165   rtc::scoped_ptr<TransientSuppressor> transient_suppressor;
166   rtc::scoped_ptr<IntelligibilityEnhancer> intelligibility_enhancer;
167 };
168 
169 struct AudioProcessingImpl::ApmPrivateSubmodules {
ApmPrivateSubmoduleswebrtc::AudioProcessingImpl::ApmPrivateSubmodules170   explicit ApmPrivateSubmodules(Beamformer<float>* beamformer)
171       : beamformer(beamformer) {}
172   // Accessed internally from capture or during initialization
173   std::list<ProcessingComponent*> component_list;
174   rtc::scoped_ptr<Beamformer<float>> beamformer;
175   rtc::scoped_ptr<AgcManagerDirect> agc_manager;
176 };
177 
178 const int AudioProcessing::kNativeSampleRatesHz[] = {
179     AudioProcessing::kSampleRate8kHz,
180     AudioProcessing::kSampleRate16kHz,
181     AudioProcessing::kSampleRate32kHz,
182     AudioProcessing::kSampleRate48kHz};
183 const size_t AudioProcessing::kNumNativeSampleRates =
184     arraysize(AudioProcessing::kNativeSampleRatesHz);
185 const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
186     kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
187 const int AudioProcessing::kMaxAECMSampleRateHz = kSampleRate16kHz;
188 
Create()189 AudioProcessing* AudioProcessing::Create() {
190   Config config;
191   return Create(config, nullptr);
192 }
193 
Create(const Config & config)194 AudioProcessing* AudioProcessing::Create(const Config& config) {
195   return Create(config, nullptr);
196 }
197 
Create(const Config & config,Beamformer<float> * beamformer)198 AudioProcessing* AudioProcessing::Create(const Config& config,
199                                          Beamformer<float>* beamformer) {
200   AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer);
201   if (apm->Initialize() != kNoError) {
202     delete apm;
203     apm = nullptr;
204   }
205 
206   return apm;
207 }
208 
AudioProcessingImpl(const Config & config)209 AudioProcessingImpl::AudioProcessingImpl(const Config& config)
210     : AudioProcessingImpl(config, nullptr) {}
211 
AudioProcessingImpl(const Config & config,Beamformer<float> * beamformer)212 AudioProcessingImpl::AudioProcessingImpl(const Config& config,
213                                          Beamformer<float>* beamformer)
214     : public_submodules_(new ApmPublicSubmodules()),
215       private_submodules_(new ApmPrivateSubmodules(beamformer)),
216       constants_(config.Get<ExperimentalAgc>().startup_min_volume,
217 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
218                  false,
219 #else
220                  config.Get<ExperimentalAgc>().enabled,
221 #endif
222                  config.Get<Intelligibility>().enabled),
223 
224 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
225       capture_(false,
226 #else
227       capture_(config.Get<ExperimentalNs>().enabled,
228 #endif
229                config.Get<Beamforming>().array_geometry,
230                config.Get<Beamforming>().target_direction),
231       capture_nonlocked_(config.Get<Beamforming>().enabled)
232 {
233   {
234     rtc::CritScope cs_render(&crit_render_);
235     rtc::CritScope cs_capture(&crit_capture_);
236 
237     public_submodules_->echo_cancellation =
238         new EchoCancellationImpl(this, &crit_render_, &crit_capture_);
239     public_submodules_->echo_control_mobile =
240         new EchoControlMobileImpl(this, &crit_render_, &crit_capture_);
241     public_submodules_->gain_control =
242         new GainControlImpl(this, &crit_capture_, &crit_capture_);
243     public_submodules_->high_pass_filter.reset(
244         new HighPassFilterImpl(&crit_capture_));
245     public_submodules_->level_estimator.reset(
246         new LevelEstimatorImpl(&crit_capture_));
247     public_submodules_->noise_suppression.reset(
248         new NoiseSuppressionImpl(&crit_capture_));
249     public_submodules_->voice_detection.reset(
250         new VoiceDetectionImpl(&crit_capture_));
251     public_submodules_->gain_control_for_new_agc.reset(
252         new GainControlForNewAgc(public_submodules_->gain_control));
253 
254     private_submodules_->component_list.push_back(
255         public_submodules_->echo_cancellation);
256     private_submodules_->component_list.push_back(
257         public_submodules_->echo_control_mobile);
258     private_submodules_->component_list.push_back(
259         public_submodules_->gain_control);
260   }
261 
262   SetExtraOptions(config);
263 }
264 
~AudioProcessingImpl()265 AudioProcessingImpl::~AudioProcessingImpl() {
266   // Depends on gain_control_ and
267   // public_submodules_->gain_control_for_new_agc.
268   private_submodules_->agc_manager.reset();
269   // Depends on gain_control_.
270   public_submodules_->gain_control_for_new_agc.reset();
271   while (!private_submodules_->component_list.empty()) {
272     ProcessingComponent* component =
273         private_submodules_->component_list.front();
274     component->Destroy();
275     delete component;
276     private_submodules_->component_list.pop_front();
277   }
278 
279 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
280   if (debug_dump_.debug_file->Open()) {
281     debug_dump_.debug_file->CloseFile();
282   }
283 #endif
284 }
285 
Initialize()286 int AudioProcessingImpl::Initialize() {
287   // Run in a single-threaded manner during initialization.
288   rtc::CritScope cs_render(&crit_render_);
289   rtc::CritScope cs_capture(&crit_capture_);
290   return InitializeLocked();
291 }
292 
Initialize(int input_sample_rate_hz,int output_sample_rate_hz,int reverse_sample_rate_hz,ChannelLayout input_layout,ChannelLayout output_layout,ChannelLayout reverse_layout)293 int AudioProcessingImpl::Initialize(int input_sample_rate_hz,
294                                     int output_sample_rate_hz,
295                                     int reverse_sample_rate_hz,
296                                     ChannelLayout input_layout,
297                                     ChannelLayout output_layout,
298                                     ChannelLayout reverse_layout) {
299   const ProcessingConfig processing_config = {
300       {{input_sample_rate_hz,
301         ChannelsFromLayout(input_layout),
302         LayoutHasKeyboard(input_layout)},
303        {output_sample_rate_hz,
304         ChannelsFromLayout(output_layout),
305         LayoutHasKeyboard(output_layout)},
306        {reverse_sample_rate_hz,
307         ChannelsFromLayout(reverse_layout),
308         LayoutHasKeyboard(reverse_layout)},
309        {reverse_sample_rate_hz,
310         ChannelsFromLayout(reverse_layout),
311         LayoutHasKeyboard(reverse_layout)}}};
312 
313   return Initialize(processing_config);
314 }
315 
Initialize(const ProcessingConfig & processing_config)316 int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) {
317   // Run in a single-threaded manner during initialization.
318   rtc::CritScope cs_render(&crit_render_);
319   rtc::CritScope cs_capture(&crit_capture_);
320   return InitializeLocked(processing_config);
321 }
322 
MaybeInitializeRender(const ProcessingConfig & processing_config)323 int AudioProcessingImpl::MaybeInitializeRender(
324     const ProcessingConfig& processing_config) {
325   return MaybeInitialize(processing_config);
326 }
327 
MaybeInitializeCapture(const ProcessingConfig & processing_config)328 int AudioProcessingImpl::MaybeInitializeCapture(
329     const ProcessingConfig& processing_config) {
330   return MaybeInitialize(processing_config);
331 }
332 
333 // Calls InitializeLocked() if any of the audio parameters have changed from
334 // their current values (needs to be called while holding the crit_render_lock).
MaybeInitialize(const ProcessingConfig & processing_config)335 int AudioProcessingImpl::MaybeInitialize(
336     const ProcessingConfig& processing_config) {
337   // Called from both threads. Thread check is therefore not possible.
338   if (processing_config == formats_.api_format) {
339     return kNoError;
340   }
341 
342   rtc::CritScope cs_capture(&crit_capture_);
343   return InitializeLocked(processing_config);
344 }
345 
InitializeLocked()346 int AudioProcessingImpl::InitializeLocked() {
347   const int fwd_audio_buffer_channels =
348       capture_nonlocked_.beamformer_enabled
349           ? formats_.api_format.input_stream().num_channels()
350           : formats_.api_format.output_stream().num_channels();
351   const int rev_audio_buffer_out_num_frames =
352       formats_.api_format.reverse_output_stream().num_frames() == 0
353           ? formats_.rev_proc_format.num_frames()
354           : formats_.api_format.reverse_output_stream().num_frames();
355   if (formats_.api_format.reverse_input_stream().num_channels() > 0) {
356     render_.render_audio.reset(new AudioBuffer(
357         formats_.api_format.reverse_input_stream().num_frames(),
358         formats_.api_format.reverse_input_stream().num_channels(),
359         formats_.rev_proc_format.num_frames(),
360         formats_.rev_proc_format.num_channels(),
361         rev_audio_buffer_out_num_frames));
362     if (rev_conversion_needed()) {
363       render_.render_converter = AudioConverter::Create(
364           formats_.api_format.reverse_input_stream().num_channels(),
365           formats_.api_format.reverse_input_stream().num_frames(),
366           formats_.api_format.reverse_output_stream().num_channels(),
367           formats_.api_format.reverse_output_stream().num_frames());
368     } else {
369       render_.render_converter.reset(nullptr);
370     }
371   } else {
372     render_.render_audio.reset(nullptr);
373     render_.render_converter.reset(nullptr);
374   }
375   capture_.capture_audio.reset(
376       new AudioBuffer(formats_.api_format.input_stream().num_frames(),
377                       formats_.api_format.input_stream().num_channels(),
378                       capture_nonlocked_.fwd_proc_format.num_frames(),
379                       fwd_audio_buffer_channels,
380                       formats_.api_format.output_stream().num_frames()));
381 
382   // Initialize all components.
383   for (auto item : private_submodules_->component_list) {
384     int err = item->Initialize();
385     if (err != kNoError) {
386       return err;
387     }
388   }
389 
390   InitializeExperimentalAgc();
391   InitializeTransient();
392   InitializeBeamformer();
393   InitializeIntelligibility();
394   InitializeHighPassFilter();
395   InitializeNoiseSuppression();
396   InitializeLevelEstimator();
397   InitializeVoiceDetection();
398 
399 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
400   if (debug_dump_.debug_file->Open()) {
401     int err = WriteInitMessage();
402     if (err != kNoError) {
403       return err;
404     }
405   }
406 #endif
407 
408   return kNoError;
409 }
410 
InitializeLocked(const ProcessingConfig & config)411 int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
412   for (const auto& stream : config.streams) {
413     if (stream.num_channels() > 0 && stream.sample_rate_hz() <= 0) {
414       return kBadSampleRateError;
415     }
416   }
417 
418   const size_t num_in_channels = config.input_stream().num_channels();
419   const size_t num_out_channels = config.output_stream().num_channels();
420 
421   // Need at least one input channel.
422   // Need either one output channel or as many outputs as there are inputs.
423   if (num_in_channels == 0 ||
424       !(num_out_channels == 1 || num_out_channels == num_in_channels)) {
425     return kBadNumberChannelsError;
426   }
427 
428   if (capture_nonlocked_.beamformer_enabled &&
429       num_in_channels != capture_.array_geometry.size()) {
430     return kBadNumberChannelsError;
431   }
432 
433   formats_.api_format = config;
434 
435   // We process at the closest native rate >= min(input rate, output rate)...
436   const int min_proc_rate =
437       std::min(formats_.api_format.input_stream().sample_rate_hz(),
438                formats_.api_format.output_stream().sample_rate_hz());
439   int fwd_proc_rate;
440   for (size_t i = 0; i < kNumNativeSampleRates; ++i) {
441     fwd_proc_rate = kNativeSampleRatesHz[i];
442     if (fwd_proc_rate >= min_proc_rate) {
443       break;
444     }
445   }
446   // ...with one exception.
447   if (public_submodules_->echo_control_mobile->is_enabled() &&
448       min_proc_rate > kMaxAECMSampleRateHz) {
449     fwd_proc_rate = kMaxAECMSampleRateHz;
450   }
451 
452   capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate);
453 
454   // We normally process the reverse stream at 16 kHz. Unless...
455   int rev_proc_rate = kSampleRate16kHz;
456   if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate8kHz) {
457     // ...the forward stream is at 8 kHz.
458     rev_proc_rate = kSampleRate8kHz;
459   } else {
460     if (formats_.api_format.reverse_input_stream().sample_rate_hz() ==
461         kSampleRate32kHz) {
462       // ...or the input is at 32 kHz, in which case we use the splitting
463       // filter rather than the resampler.
464       rev_proc_rate = kSampleRate32kHz;
465     }
466   }
467 
468   // Always downmix the reverse stream to mono for analysis. This has been
469   // demonstrated to work well for AEC in most practical scenarios.
470   formats_.rev_proc_format = StreamConfig(rev_proc_rate, 1);
471 
472   if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate32kHz ||
473       capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate48kHz) {
474     capture_nonlocked_.split_rate = kSampleRate16kHz;
475   } else {
476     capture_nonlocked_.split_rate =
477         capture_nonlocked_.fwd_proc_format.sample_rate_hz();
478   }
479 
480   return InitializeLocked();
481 }
482 
SetExtraOptions(const Config & config)483 void AudioProcessingImpl::SetExtraOptions(const Config& config) {
484   // Run in a single-threaded manner when setting the extra options.
485   rtc::CritScope cs_render(&crit_render_);
486   rtc::CritScope cs_capture(&crit_capture_);
487   for (auto item : private_submodules_->component_list) {
488     item->SetExtraOptions(config);
489   }
490 
491   if (capture_.transient_suppressor_enabled !=
492       config.Get<ExperimentalNs>().enabled) {
493     capture_.transient_suppressor_enabled =
494         config.Get<ExperimentalNs>().enabled;
495     InitializeTransient();
496   }
497 
498 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
499   if (capture_nonlocked_.beamformer_enabled !=
500           config.Get<Beamforming>().enabled) {
501     capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled;
502     if (config.Get<Beamforming>().array_geometry.size() > 1) {
503       capture_.array_geometry = config.Get<Beamforming>().array_geometry;
504     }
505     capture_.target_direction = config.Get<Beamforming>().target_direction;
506     InitializeBeamformer();
507   }
508 #endif  // WEBRTC_ANDROID_PLATFORM_BUILD
509 }
510 
input_sample_rate_hz() const511 int AudioProcessingImpl::input_sample_rate_hz() const {
512   // Accessed from outside APM, hence a lock is needed.
513   rtc::CritScope cs(&crit_capture_);
514   return formats_.api_format.input_stream().sample_rate_hz();
515 }
516 
proc_sample_rate_hz() const517 int AudioProcessingImpl::proc_sample_rate_hz() const {
518   // Used as callback from submodules, hence locking is not allowed.
519   return capture_nonlocked_.fwd_proc_format.sample_rate_hz();
520 }
521 
proc_split_sample_rate_hz() const522 int AudioProcessingImpl::proc_split_sample_rate_hz() const {
523   // Used as callback from submodules, hence locking is not allowed.
524   return capture_nonlocked_.split_rate;
525 }
526 
num_reverse_channels() const527 size_t AudioProcessingImpl::num_reverse_channels() const {
528   // Used as callback from submodules, hence locking is not allowed.
529   return formats_.rev_proc_format.num_channels();
530 }
531 
num_input_channels() const532 size_t AudioProcessingImpl::num_input_channels() const {
533   // Used as callback from submodules, hence locking is not allowed.
534   return formats_.api_format.input_stream().num_channels();
535 }
536 
num_proc_channels() const537 size_t AudioProcessingImpl::num_proc_channels() const {
538   // Used as callback from submodules, hence locking is not allowed.
539   return capture_nonlocked_.beamformer_enabled ? 1 : num_output_channels();
540 }
541 
num_output_channels() const542 size_t AudioProcessingImpl::num_output_channels() const {
543   // Used as callback from submodules, hence locking is not allowed.
544   return formats_.api_format.output_stream().num_channels();
545 }
546 
set_output_will_be_muted(bool muted)547 void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
548   rtc::CritScope cs(&crit_capture_);
549   capture_.output_will_be_muted = muted;
550   if (private_submodules_->agc_manager.get()) {
551     private_submodules_->agc_manager->SetCaptureMuted(
552         capture_.output_will_be_muted);
553   }
554 }
555 
556 
ProcessStream(const float * const * src,size_t samples_per_channel,int input_sample_rate_hz,ChannelLayout input_layout,int output_sample_rate_hz,ChannelLayout output_layout,float * const * dest)557 int AudioProcessingImpl::ProcessStream(const float* const* src,
558                                        size_t samples_per_channel,
559                                        int input_sample_rate_hz,
560                                        ChannelLayout input_layout,
561                                        int output_sample_rate_hz,
562                                        ChannelLayout output_layout,
563                                        float* const* dest) {
564   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_ChannelLayout");
565   StreamConfig input_stream;
566   StreamConfig output_stream;
567   {
568     // Access the formats_.api_format.input_stream beneath the capture lock.
569     // The lock must be released as it is later required in the call
570     // to ProcessStream(,,,);
571     rtc::CritScope cs(&crit_capture_);
572     input_stream = formats_.api_format.input_stream();
573     output_stream = formats_.api_format.output_stream();
574   }
575 
576   input_stream.set_sample_rate_hz(input_sample_rate_hz);
577   input_stream.set_num_channels(ChannelsFromLayout(input_layout));
578   input_stream.set_has_keyboard(LayoutHasKeyboard(input_layout));
579   output_stream.set_sample_rate_hz(output_sample_rate_hz);
580   output_stream.set_num_channels(ChannelsFromLayout(output_layout));
581   output_stream.set_has_keyboard(LayoutHasKeyboard(output_layout));
582 
583   if (samples_per_channel != input_stream.num_frames()) {
584     return kBadDataLengthError;
585   }
586   return ProcessStream(src, input_stream, output_stream, dest);
587 }
588 
ProcessStream(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config,float * const * dest)589 int AudioProcessingImpl::ProcessStream(const float* const* src,
590                                        const StreamConfig& input_config,
591                                        const StreamConfig& output_config,
592                                        float* const* dest) {
593   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig");
594   ProcessingConfig processing_config;
595   {
596     // Acquire the capture lock in order to safely call the function
597     // that retrieves the render side data. This function accesses apm
598     // getters that need the capture lock held when being called.
599     rtc::CritScope cs_capture(&crit_capture_);
600     public_submodules_->echo_cancellation->ReadQueuedRenderData();
601     public_submodules_->echo_control_mobile->ReadQueuedRenderData();
602     public_submodules_->gain_control->ReadQueuedRenderData();
603 
604     if (!src || !dest) {
605       return kNullPointerError;
606     }
607 
608     processing_config = formats_.api_format;
609   }
610 
611   processing_config.input_stream() = input_config;
612   processing_config.output_stream() = output_config;
613 
614   {
615     // Do conditional reinitialization.
616     rtc::CritScope cs_render(&crit_render_);
617     RETURN_ON_ERR(MaybeInitializeCapture(processing_config));
618   }
619   rtc::CritScope cs_capture(&crit_capture_);
620   assert(processing_config.input_stream().num_frames() ==
621          formats_.api_format.input_stream().num_frames());
622 
623 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
624   if (debug_dump_.debug_file->Open()) {
625     RETURN_ON_ERR(WriteConfigMessage(false));
626 
627     debug_dump_.capture.event_msg->set_type(audioproc::Event::STREAM);
628     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
629     const size_t channel_size =
630         sizeof(float) * formats_.api_format.input_stream().num_frames();
631     for (size_t i = 0; i < formats_.api_format.input_stream().num_channels();
632          ++i)
633       msg->add_input_channel(src[i], channel_size);
634   }
635 #endif
636 
637   capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream());
638   RETURN_ON_ERR(ProcessStreamLocked());
639   capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
640 
641 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
642   if (debug_dump_.debug_file->Open()) {
643     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
644     const size_t channel_size =
645         sizeof(float) * formats_.api_format.output_stream().num_frames();
646     for (size_t i = 0; i < formats_.api_format.output_stream().num_channels();
647          ++i)
648       msg->add_output_channel(dest[i], channel_size);
649     RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
650                                           &crit_debug_, &debug_dump_.capture));
651   }
652 #endif
653 
654   return kNoError;
655 }
656 
ProcessStream(AudioFrame * frame)657 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
658   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame");
659   {
660     // Acquire the capture lock in order to safely call the function
661     // that retrieves the render side data. This function accesses apm
662     // getters that need the capture lock held when being called.
663     // The lock needs to be released as
664     // public_submodules_->echo_control_mobile->is_enabled() aquires this lock
665     // as well.
666     rtc::CritScope cs_capture(&crit_capture_);
667     public_submodules_->echo_cancellation->ReadQueuedRenderData();
668     public_submodules_->echo_control_mobile->ReadQueuedRenderData();
669     public_submodules_->gain_control->ReadQueuedRenderData();
670   }
671 
672   if (!frame) {
673     return kNullPointerError;
674   }
675   // Must be a native rate.
676   if (frame->sample_rate_hz_ != kSampleRate8kHz &&
677       frame->sample_rate_hz_ != kSampleRate16kHz &&
678       frame->sample_rate_hz_ != kSampleRate32kHz &&
679       frame->sample_rate_hz_ != kSampleRate48kHz) {
680     return kBadSampleRateError;
681   }
682 
683   if (public_submodules_->echo_control_mobile->is_enabled() &&
684       frame->sample_rate_hz_ > kMaxAECMSampleRateHz) {
685     LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates";
686     return kUnsupportedComponentError;
687   }
688 
689   ProcessingConfig processing_config;
690   {
691     // Aquire lock for the access of api_format.
692     // The lock is released immediately due to the conditional
693     // reinitialization.
694     rtc::CritScope cs_capture(&crit_capture_);
695     // TODO(ajm): The input and output rates and channels are currently
696     // constrained to be identical in the int16 interface.
697     processing_config = formats_.api_format;
698   }
699   processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_);
700   processing_config.input_stream().set_num_channels(frame->num_channels_);
701   processing_config.output_stream().set_sample_rate_hz(frame->sample_rate_hz_);
702   processing_config.output_stream().set_num_channels(frame->num_channels_);
703 
704   {
705     // Do conditional reinitialization.
706     rtc::CritScope cs_render(&crit_render_);
707     RETURN_ON_ERR(MaybeInitializeCapture(processing_config));
708   }
709   rtc::CritScope cs_capture(&crit_capture_);
710   if (frame->samples_per_channel_ !=
711       formats_.api_format.input_stream().num_frames()) {
712     return kBadDataLengthError;
713   }
714 
715 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
716   if (debug_dump_.debug_file->Open()) {
717     debug_dump_.capture.event_msg->set_type(audioproc::Event::STREAM);
718     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
719     const size_t data_size =
720         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
721     msg->set_input_data(frame->data_, data_size);
722   }
723 #endif
724 
725   capture_.capture_audio->DeinterleaveFrom(frame);
726   RETURN_ON_ERR(ProcessStreamLocked());
727   capture_.capture_audio->InterleaveTo(frame,
728                                        output_copy_needed(is_data_processed()));
729 
730 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
731   if (debug_dump_.debug_file->Open()) {
732     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
733     const size_t data_size =
734         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
735     msg->set_output_data(frame->data_, data_size);
736     RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
737                                           &crit_debug_, &debug_dump_.capture));
738   }
739 #endif
740 
741   return kNoError;
742 }
743 
ProcessStreamLocked()744 int AudioProcessingImpl::ProcessStreamLocked() {
745 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
746   if (debug_dump_.debug_file->Open()) {
747     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
748     msg->set_delay(capture_nonlocked_.stream_delay_ms);
749     msg->set_drift(
750         public_submodules_->echo_cancellation->stream_drift_samples());
751     msg->set_level(gain_control()->stream_analog_level());
752     msg->set_keypress(capture_.key_pressed);
753   }
754 #endif
755 
756   MaybeUpdateHistograms();
757 
758   AudioBuffer* ca = capture_.capture_audio.get();  // For brevity.
759 
760   if (constants_.use_new_agc &&
761       public_submodules_->gain_control->is_enabled()) {
762     private_submodules_->agc_manager->AnalyzePreProcess(
763         ca->channels()[0], ca->num_channels(),
764         capture_nonlocked_.fwd_proc_format.num_frames());
765   }
766 
767   bool data_processed = is_data_processed();
768   if (analysis_needed(data_processed)) {
769     ca->SplitIntoFrequencyBands();
770   }
771 
772   if (constants_.intelligibility_enabled) {
773     public_submodules_->intelligibility_enhancer->AnalyzeCaptureAudio(
774         ca->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate,
775         ca->num_channels());
776   }
777 
778   if (capture_nonlocked_.beamformer_enabled) {
779     private_submodules_->beamformer->ProcessChunk(*ca->split_data_f(),
780                                                   ca->split_data_f());
781     ca->set_num_channels(1);
782   }
783 
784   public_submodules_->high_pass_filter->ProcessCaptureAudio(ca);
785   RETURN_ON_ERR(public_submodules_->gain_control->AnalyzeCaptureAudio(ca));
786   public_submodules_->noise_suppression->AnalyzeCaptureAudio(ca);
787   RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessCaptureAudio(ca));
788 
789   if (public_submodules_->echo_control_mobile->is_enabled() &&
790       public_submodules_->noise_suppression->is_enabled()) {
791     ca->CopyLowPassToReference();
792   }
793   public_submodules_->noise_suppression->ProcessCaptureAudio(ca);
794   RETURN_ON_ERR(
795       public_submodules_->echo_control_mobile->ProcessCaptureAudio(ca));
796   public_submodules_->voice_detection->ProcessCaptureAudio(ca);
797 
798   if (constants_.use_new_agc &&
799       public_submodules_->gain_control->is_enabled() &&
800       (!capture_nonlocked_.beamformer_enabled ||
801        private_submodules_->beamformer->is_target_present())) {
802     private_submodules_->agc_manager->Process(
803         ca->split_bands_const(0)[kBand0To8kHz], ca->num_frames_per_band(),
804         capture_nonlocked_.split_rate);
805   }
806   RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio(ca));
807 
808   if (synthesis_needed(data_processed)) {
809     ca->MergeFrequencyBands();
810   }
811 
812   // TODO(aluebs): Investigate if the transient suppression placement should be
813   // before or after the AGC.
814   if (capture_.transient_suppressor_enabled) {
815     float voice_probability =
816         private_submodules_->agc_manager.get()
817             ? private_submodules_->agc_manager->voice_probability()
818             : 1.f;
819 
820     public_submodules_->transient_suppressor->Suppress(
821         ca->channels_f()[0], ca->num_frames(), ca->num_channels(),
822         ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(),
823         ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability,
824         capture_.key_pressed);
825   }
826 
827   // The level estimator operates on the recombined data.
828   public_submodules_->level_estimator->ProcessStream(ca);
829 
830   capture_.was_stream_delay_set = false;
831   return kNoError;
832 }
833 
AnalyzeReverseStream(const float * const * data,size_t samples_per_channel,int rev_sample_rate_hz,ChannelLayout layout)834 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
835                                               size_t samples_per_channel,
836                                               int rev_sample_rate_hz,
837                                               ChannelLayout layout) {
838   TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_ChannelLayout");
839   rtc::CritScope cs(&crit_render_);
840   const StreamConfig reverse_config = {
841       rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
842   };
843   if (samples_per_channel != reverse_config.num_frames()) {
844     return kBadDataLengthError;
845   }
846   return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config);
847 }
848 
ProcessReverseStream(const float * const * src,const StreamConfig & reverse_input_config,const StreamConfig & reverse_output_config,float * const * dest)849 int AudioProcessingImpl::ProcessReverseStream(
850     const float* const* src,
851     const StreamConfig& reverse_input_config,
852     const StreamConfig& reverse_output_config,
853     float* const* dest) {
854   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig");
855   rtc::CritScope cs(&crit_render_);
856   RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, reverse_input_config,
857                                            reverse_output_config));
858   if (is_rev_processed()) {
859     render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(),
860                                  dest);
861   } else if (render_check_rev_conversion_needed()) {
862     render_.render_converter->Convert(src, reverse_input_config.num_samples(),
863                                       dest,
864                                       reverse_output_config.num_samples());
865   } else {
866     CopyAudioIfNeeded(src, reverse_input_config.num_frames(),
867                       reverse_input_config.num_channels(), dest);
868   }
869 
870   return kNoError;
871 }
872 
AnalyzeReverseStreamLocked(const float * const * src,const StreamConfig & reverse_input_config,const StreamConfig & reverse_output_config)873 int AudioProcessingImpl::AnalyzeReverseStreamLocked(
874     const float* const* src,
875     const StreamConfig& reverse_input_config,
876     const StreamConfig& reverse_output_config) {
877   if (src == nullptr) {
878     return kNullPointerError;
879   }
880 
881   if (reverse_input_config.num_channels() == 0) {
882     return kBadNumberChannelsError;
883   }
884 
885   ProcessingConfig processing_config = formats_.api_format;
886   processing_config.reverse_input_stream() = reverse_input_config;
887   processing_config.reverse_output_stream() = reverse_output_config;
888 
889   RETURN_ON_ERR(MaybeInitializeRender(processing_config));
890   assert(reverse_input_config.num_frames() ==
891          formats_.api_format.reverse_input_stream().num_frames());
892 
893 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
894   if (debug_dump_.debug_file->Open()) {
895     debug_dump_.render.event_msg->set_type(audioproc::Event::REVERSE_STREAM);
896     audioproc::ReverseStream* msg =
897         debug_dump_.render.event_msg->mutable_reverse_stream();
898     const size_t channel_size =
899         sizeof(float) * formats_.api_format.reverse_input_stream().num_frames();
900     for (size_t i = 0;
901          i < formats_.api_format.reverse_input_stream().num_channels(); ++i)
902       msg->add_channel(src[i], channel_size);
903     RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
904                                           &crit_debug_, &debug_dump_.render));
905   }
906 #endif
907 
908   render_.render_audio->CopyFrom(src,
909                                  formats_.api_format.reverse_input_stream());
910   return ProcessReverseStreamLocked();
911 }
912 
ProcessReverseStream(AudioFrame * frame)913 int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
914   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
915   RETURN_ON_ERR(AnalyzeReverseStream(frame));
916   rtc::CritScope cs(&crit_render_);
917   if (is_rev_processed()) {
918     render_.render_audio->InterleaveTo(frame, true);
919   }
920 
921   return kNoError;
922 }
923 
AnalyzeReverseStream(AudioFrame * frame)924 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
925   TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_AudioFrame");
926   rtc::CritScope cs(&crit_render_);
927   if (frame == nullptr) {
928     return kNullPointerError;
929   }
930   // Must be a native rate.
931   if (frame->sample_rate_hz_ != kSampleRate8kHz &&
932       frame->sample_rate_hz_ != kSampleRate16kHz &&
933       frame->sample_rate_hz_ != kSampleRate32kHz &&
934       frame->sample_rate_hz_ != kSampleRate48kHz) {
935     return kBadSampleRateError;
936   }
937   // This interface does not tolerate different forward and reverse rates.
938   if (frame->sample_rate_hz_ !=
939       formats_.api_format.input_stream().sample_rate_hz()) {
940     return kBadSampleRateError;
941   }
942 
943   if (frame->num_channels_ <= 0) {
944     return kBadNumberChannelsError;
945   }
946 
947   ProcessingConfig processing_config = formats_.api_format;
948   processing_config.reverse_input_stream().set_sample_rate_hz(
949       frame->sample_rate_hz_);
950   processing_config.reverse_input_stream().set_num_channels(
951       frame->num_channels_);
952   processing_config.reverse_output_stream().set_sample_rate_hz(
953       frame->sample_rate_hz_);
954   processing_config.reverse_output_stream().set_num_channels(
955       frame->num_channels_);
956 
957   RETURN_ON_ERR(MaybeInitializeRender(processing_config));
958   if (frame->samples_per_channel_ !=
959       formats_.api_format.reverse_input_stream().num_frames()) {
960     return kBadDataLengthError;
961   }
962 
963 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
964   if (debug_dump_.debug_file->Open()) {
965     debug_dump_.render.event_msg->set_type(audioproc::Event::REVERSE_STREAM);
966     audioproc::ReverseStream* msg =
967         debug_dump_.render.event_msg->mutable_reverse_stream();
968     const size_t data_size =
969         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
970     msg->set_data(frame->data_, data_size);
971     RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
972                                           &crit_debug_, &debug_dump_.render));
973   }
974 #endif
975   render_.render_audio->DeinterleaveFrom(frame);
976   return ProcessReverseStreamLocked();
977 }
978 
ProcessReverseStreamLocked()979 int AudioProcessingImpl::ProcessReverseStreamLocked() {
980   AudioBuffer* ra = render_.render_audio.get();  // For brevity.
981   if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz) {
982     ra->SplitIntoFrequencyBands();
983   }
984 
985   if (constants_.intelligibility_enabled) {
986     // Currently run in single-threaded mode when the intelligibility
987     // enhancer is activated.
988     // TODO(peah): Fix to be properly multi-threaded.
989     rtc::CritScope cs(&crit_capture_);
990     public_submodules_->intelligibility_enhancer->ProcessRenderAudio(
991         ra->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate,
992         ra->num_channels());
993   }
994 
995   RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessRenderAudio(ra));
996   RETURN_ON_ERR(
997       public_submodules_->echo_control_mobile->ProcessRenderAudio(ra));
998   if (!constants_.use_new_agc) {
999     RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra));
1000   }
1001 
1002   if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz &&
1003       is_rev_processed()) {
1004     ra->MergeFrequencyBands();
1005   }
1006 
1007   return kNoError;
1008 }
1009 
set_stream_delay_ms(int delay)1010 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
1011   rtc::CritScope cs(&crit_capture_);
1012   Error retval = kNoError;
1013   capture_.was_stream_delay_set = true;
1014   delay += capture_.delay_offset_ms;
1015 
1016   if (delay < 0) {
1017     delay = 0;
1018     retval = kBadStreamParameterWarning;
1019   }
1020 
1021   // TODO(ajm): the max is rather arbitrarily chosen; investigate.
1022   if (delay > 500) {
1023     delay = 500;
1024     retval = kBadStreamParameterWarning;
1025   }
1026 
1027   capture_nonlocked_.stream_delay_ms = delay;
1028   return retval;
1029 }
1030 
stream_delay_ms() const1031 int AudioProcessingImpl::stream_delay_ms() const {
1032   // Used as callback from submodules, hence locking is not allowed.
1033   return capture_nonlocked_.stream_delay_ms;
1034 }
1035 
was_stream_delay_set() const1036 bool AudioProcessingImpl::was_stream_delay_set() const {
1037   // Used as callback from submodules, hence locking is not allowed.
1038   return capture_.was_stream_delay_set;
1039 }
1040 
set_stream_key_pressed(bool key_pressed)1041 void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
1042   rtc::CritScope cs(&crit_capture_);
1043   capture_.key_pressed = key_pressed;
1044 }
1045 
set_delay_offset_ms(int offset)1046 void AudioProcessingImpl::set_delay_offset_ms(int offset) {
1047   rtc::CritScope cs(&crit_capture_);
1048   capture_.delay_offset_ms = offset;
1049 }
1050 
delay_offset_ms() const1051 int AudioProcessingImpl::delay_offset_ms() const {
1052   rtc::CritScope cs(&crit_capture_);
1053   return capture_.delay_offset_ms;
1054 }
1055 
StartDebugRecording(const char filename[AudioProcessing::kMaxFilenameSize])1056 int AudioProcessingImpl::StartDebugRecording(
1057     const char filename[AudioProcessing::kMaxFilenameSize]) {
1058   // Run in a single-threaded manner.
1059   rtc::CritScope cs_render(&crit_render_);
1060   rtc::CritScope cs_capture(&crit_capture_);
1061   static_assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize, "");
1062 
1063   if (filename == nullptr) {
1064     return kNullPointerError;
1065   }
1066 
1067 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
1068   // Stop any ongoing recording.
1069   if (debug_dump_.debug_file->Open()) {
1070     if (debug_dump_.debug_file->CloseFile() == -1) {
1071       return kFileError;
1072     }
1073   }
1074 
1075   if (debug_dump_.debug_file->OpenFile(filename, false) == -1) {
1076     debug_dump_.debug_file->CloseFile();
1077     return kFileError;
1078   }
1079 
1080   RETURN_ON_ERR(WriteConfigMessage(true));
1081   RETURN_ON_ERR(WriteInitMessage());
1082   return kNoError;
1083 #else
1084   return kUnsupportedFunctionError;
1085 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
1086 }
1087 
StartDebugRecording(FILE * handle)1088 int AudioProcessingImpl::StartDebugRecording(FILE* handle) {
1089   // Run in a single-threaded manner.
1090   rtc::CritScope cs_render(&crit_render_);
1091   rtc::CritScope cs_capture(&crit_capture_);
1092 
1093   if (handle == nullptr) {
1094     return kNullPointerError;
1095   }
1096 
1097 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
1098   // Stop any ongoing recording.
1099   if (debug_dump_.debug_file->Open()) {
1100     if (debug_dump_.debug_file->CloseFile() == -1) {
1101       return kFileError;
1102     }
1103   }
1104 
1105   if (debug_dump_.debug_file->OpenFromFileHandle(handle, true, false) == -1) {
1106     return kFileError;
1107   }
1108 
1109   RETURN_ON_ERR(WriteConfigMessage(true));
1110   RETURN_ON_ERR(WriteInitMessage());
1111   return kNoError;
1112 #else
1113   return kUnsupportedFunctionError;
1114 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
1115 }
1116 
StartDebugRecordingForPlatformFile(rtc::PlatformFile handle)1117 int AudioProcessingImpl::StartDebugRecordingForPlatformFile(
1118     rtc::PlatformFile handle) {
1119   // Run in a single-threaded manner.
1120   rtc::CritScope cs_render(&crit_render_);
1121   rtc::CritScope cs_capture(&crit_capture_);
1122   FILE* stream = rtc::FdopenPlatformFileForWriting(handle);
1123   return StartDebugRecording(stream);
1124 }
1125 
StopDebugRecording()1126 int AudioProcessingImpl::StopDebugRecording() {
1127   // Run in a single-threaded manner.
1128   rtc::CritScope cs_render(&crit_render_);
1129   rtc::CritScope cs_capture(&crit_capture_);
1130 
1131 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
1132   // We just return if recording hasn't started.
1133   if (debug_dump_.debug_file->Open()) {
1134     if (debug_dump_.debug_file->CloseFile() == -1) {
1135       return kFileError;
1136     }
1137   }
1138   return kNoError;
1139 #else
1140   return kUnsupportedFunctionError;
1141 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
1142 }
1143 
echo_cancellation() const1144 EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
1145   // Adding a lock here has no effect as it allows any access to the submodule
1146   // from the returned pointer.
1147   return public_submodules_->echo_cancellation;
1148 }
1149 
echo_control_mobile() const1150 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
1151   // Adding a lock here has no effect as it allows any access to the submodule
1152   // from the returned pointer.
1153   return public_submodules_->echo_control_mobile;
1154 }
1155 
gain_control() const1156 GainControl* AudioProcessingImpl::gain_control() const {
1157   // Adding a lock here has no effect as it allows any access to the submodule
1158   // from the returned pointer.
1159   if (constants_.use_new_agc) {
1160     return public_submodules_->gain_control_for_new_agc.get();
1161   }
1162   return public_submodules_->gain_control;
1163 }
1164 
high_pass_filter() const1165 HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
1166   // Adding a lock here has no effect as it allows any access to the submodule
1167   // from the returned pointer.
1168   return public_submodules_->high_pass_filter.get();
1169 }
1170 
level_estimator() const1171 LevelEstimator* AudioProcessingImpl::level_estimator() const {
1172   // Adding a lock here has no effect as it allows any access to the submodule
1173   // from the returned pointer.
1174   return public_submodules_->level_estimator.get();
1175 }
1176 
noise_suppression() const1177 NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
1178   // Adding a lock here has no effect as it allows any access to the submodule
1179   // from the returned pointer.
1180   return public_submodules_->noise_suppression.get();
1181 }
1182 
voice_detection() const1183 VoiceDetection* AudioProcessingImpl::voice_detection() const {
1184   // Adding a lock here has no effect as it allows any access to the submodule
1185   // from the returned pointer.
1186   return public_submodules_->voice_detection.get();
1187 }
1188 
is_data_processed() const1189 bool AudioProcessingImpl::is_data_processed() const {
1190   if (capture_nonlocked_.beamformer_enabled) {
1191     return true;
1192   }
1193 
1194   int enabled_count = 0;
1195   for (auto item : private_submodules_->component_list) {
1196     if (item->is_component_enabled()) {
1197       enabled_count++;
1198     }
1199   }
1200   if (public_submodules_->high_pass_filter->is_enabled()) {
1201     enabled_count++;
1202   }
1203   if (public_submodules_->noise_suppression->is_enabled()) {
1204     enabled_count++;
1205   }
1206   if (public_submodules_->level_estimator->is_enabled()) {
1207     enabled_count++;
1208   }
1209   if (public_submodules_->voice_detection->is_enabled()) {
1210     enabled_count++;
1211   }
1212 
1213   // Data is unchanged if no components are enabled, or if only
1214   // public_submodules_->level_estimator
1215   // or public_submodules_->voice_detection is enabled.
1216   if (enabled_count == 0) {
1217     return false;
1218   } else if (enabled_count == 1) {
1219     if (public_submodules_->level_estimator->is_enabled() ||
1220         public_submodules_->voice_detection->is_enabled()) {
1221       return false;
1222     }
1223   } else if (enabled_count == 2) {
1224     if (public_submodules_->level_estimator->is_enabled() &&
1225         public_submodules_->voice_detection->is_enabled()) {
1226       return false;
1227     }
1228   }
1229   return true;
1230 }
1231 
output_copy_needed(bool is_data_processed) const1232 bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
1233   // Check if we've upmixed or downmixed the audio.
1234   return ((formats_.api_format.output_stream().num_channels() !=
1235            formats_.api_format.input_stream().num_channels()) ||
1236           is_data_processed || capture_.transient_suppressor_enabled);
1237 }
1238 
synthesis_needed(bool is_data_processed) const1239 bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
1240   return (is_data_processed &&
1241           (capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
1242                kSampleRate32kHz ||
1243            capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
1244                kSampleRate48kHz));
1245 }
1246 
analysis_needed(bool is_data_processed) const1247 bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
1248   if (!is_data_processed &&
1249       !public_submodules_->voice_detection->is_enabled() &&
1250       !capture_.transient_suppressor_enabled) {
1251     // Only public_submodules_->level_estimator is enabled.
1252     return false;
1253   } else if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
1254                  kSampleRate32kHz ||
1255              capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
1256                  kSampleRate48kHz) {
1257     // Something besides public_submodules_->level_estimator is enabled, and we
1258     // have super-wb.
1259     return true;
1260   }
1261   return false;
1262 }
1263 
is_rev_processed() const1264 bool AudioProcessingImpl::is_rev_processed() const {
1265   return constants_.intelligibility_enabled &&
1266          public_submodules_->intelligibility_enhancer->active();
1267 }
1268 
render_check_rev_conversion_needed() const1269 bool AudioProcessingImpl::render_check_rev_conversion_needed() const {
1270   return rev_conversion_needed();
1271 }
1272 
rev_conversion_needed() const1273 bool AudioProcessingImpl::rev_conversion_needed() const {
1274   return (formats_.api_format.reverse_input_stream() !=
1275           formats_.api_format.reverse_output_stream());
1276 }
1277 
InitializeExperimentalAgc()1278 void AudioProcessingImpl::InitializeExperimentalAgc() {
1279   if (constants_.use_new_agc) {
1280     if (!private_submodules_->agc_manager.get()) {
1281       private_submodules_->agc_manager.reset(new AgcManagerDirect(
1282           public_submodules_->gain_control,
1283           public_submodules_->gain_control_for_new_agc.get(),
1284           constants_.agc_startup_min_volume));
1285     }
1286     private_submodules_->agc_manager->Initialize();
1287     private_submodules_->agc_manager->SetCaptureMuted(
1288         capture_.output_will_be_muted);
1289   }
1290 }
1291 
InitializeTransient()1292 void AudioProcessingImpl::InitializeTransient() {
1293   if (capture_.transient_suppressor_enabled) {
1294     if (!public_submodules_->transient_suppressor.get()) {
1295       public_submodules_->transient_suppressor.reset(new TransientSuppressor());
1296     }
1297     public_submodules_->transient_suppressor->Initialize(
1298         capture_nonlocked_.fwd_proc_format.sample_rate_hz(),
1299         capture_nonlocked_.split_rate,
1300         num_proc_channels());
1301   }
1302 }
1303 
InitializeBeamformer()1304 void AudioProcessingImpl::InitializeBeamformer() {
1305   if (capture_nonlocked_.beamformer_enabled) {
1306     if (!private_submodules_->beamformer) {
1307       private_submodules_->beamformer.reset(new NonlinearBeamformer(
1308           capture_.array_geometry, capture_.target_direction));
1309     }
1310     private_submodules_->beamformer->Initialize(kChunkSizeMs,
1311                                                 capture_nonlocked_.split_rate);
1312   }
1313 }
1314 
InitializeIntelligibility()1315 void AudioProcessingImpl::InitializeIntelligibility() {
1316   if (constants_.intelligibility_enabled) {
1317     IntelligibilityEnhancer::Config config;
1318     config.sample_rate_hz = capture_nonlocked_.split_rate;
1319     config.num_capture_channels = capture_.capture_audio->num_channels();
1320     config.num_render_channels = render_.render_audio->num_channels();
1321     public_submodules_->intelligibility_enhancer.reset(
1322         new IntelligibilityEnhancer(config));
1323   }
1324 }
1325 
InitializeHighPassFilter()1326 void AudioProcessingImpl::InitializeHighPassFilter() {
1327   public_submodules_->high_pass_filter->Initialize(num_proc_channels(),
1328                                                    proc_sample_rate_hz());
1329 }
1330 
InitializeNoiseSuppression()1331 void AudioProcessingImpl::InitializeNoiseSuppression() {
1332   public_submodules_->noise_suppression->Initialize(num_proc_channels(),
1333                                                     proc_sample_rate_hz());
1334 }
1335 
InitializeLevelEstimator()1336 void AudioProcessingImpl::InitializeLevelEstimator() {
1337   public_submodules_->level_estimator->Initialize();
1338 }
1339 
InitializeVoiceDetection()1340 void AudioProcessingImpl::InitializeVoiceDetection() {
1341   public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());
1342 }
1343 
MaybeUpdateHistograms()1344 void AudioProcessingImpl::MaybeUpdateHistograms() {
1345   static const int kMinDiffDelayMs = 60;
1346 
1347   if (echo_cancellation()->is_enabled()) {
1348     // Activate delay_jumps_ counters if we know echo_cancellation is runnning.
1349     // If a stream has echo we know that the echo_cancellation is in process.
1350     if (capture_.stream_delay_jumps == -1 &&
1351         echo_cancellation()->stream_has_echo()) {
1352       capture_.stream_delay_jumps = 0;
1353     }
1354     if (capture_.aec_system_delay_jumps == -1 &&
1355         echo_cancellation()->stream_has_echo()) {
1356       capture_.aec_system_delay_jumps = 0;
1357     }
1358 
1359     // Detect a jump in platform reported system delay and log the difference.
1360     const int diff_stream_delay_ms =
1361         capture_nonlocked_.stream_delay_ms - capture_.last_stream_delay_ms;
1362     if (diff_stream_delay_ms > kMinDiffDelayMs &&
1363         capture_.last_stream_delay_ms != 0) {
1364       RTC_HISTOGRAM_COUNTS_SPARSE(
1365           "WebRTC.Audio.PlatformReportedStreamDelayJump", diff_stream_delay_ms,
1366           kMinDiffDelayMs, 1000, 100);
1367       if (capture_.stream_delay_jumps == -1) {
1368         capture_.stream_delay_jumps = 0;  // Activate counter if needed.
1369       }
1370       capture_.stream_delay_jumps++;
1371     }
1372     capture_.last_stream_delay_ms = capture_nonlocked_.stream_delay_ms;
1373 
1374     // Detect a jump in AEC system delay and log the difference.
1375     const int frames_per_ms =
1376         rtc::CheckedDivExact(capture_nonlocked_.split_rate, 1000);
1377     const int aec_system_delay_ms =
1378         WebRtcAec_system_delay(echo_cancellation()->aec_core()) / frames_per_ms;
1379     const int diff_aec_system_delay_ms =
1380         aec_system_delay_ms - capture_.last_aec_system_delay_ms;
1381     if (diff_aec_system_delay_ms > kMinDiffDelayMs &&
1382         capture_.last_aec_system_delay_ms != 0) {
1383       RTC_HISTOGRAM_COUNTS_SPARSE("WebRTC.Audio.AecSystemDelayJump",
1384                                   diff_aec_system_delay_ms, kMinDiffDelayMs,
1385                                   1000, 100);
1386       if (capture_.aec_system_delay_jumps == -1) {
1387         capture_.aec_system_delay_jumps = 0;  // Activate counter if needed.
1388       }
1389       capture_.aec_system_delay_jumps++;
1390     }
1391     capture_.last_aec_system_delay_ms = aec_system_delay_ms;
1392   }
1393 }
1394 
UpdateHistogramsOnCallEnd()1395 void AudioProcessingImpl::UpdateHistogramsOnCallEnd() {
1396   // Run in a single-threaded manner.
1397   rtc::CritScope cs_render(&crit_render_);
1398   rtc::CritScope cs_capture(&crit_capture_);
1399 
1400   if (capture_.stream_delay_jumps > -1) {
1401     RTC_HISTOGRAM_ENUMERATION_SPARSE(
1402         "WebRTC.Audio.NumOfPlatformReportedStreamDelayJumps",
1403         capture_.stream_delay_jumps, 51);
1404   }
1405   capture_.stream_delay_jumps = -1;
1406   capture_.last_stream_delay_ms = 0;
1407 
1408   if (capture_.aec_system_delay_jumps > -1) {
1409     RTC_HISTOGRAM_ENUMERATION_SPARSE("WebRTC.Audio.NumOfAecSystemDelayJumps",
1410                                      capture_.aec_system_delay_jumps, 51);
1411   }
1412   capture_.aec_system_delay_jumps = -1;
1413   capture_.last_aec_system_delay_ms = 0;
1414 }
1415 
1416 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
WriteMessageToDebugFile(FileWrapper * debug_file,rtc::CriticalSection * crit_debug,ApmDebugDumpThreadState * debug_state)1417 int AudioProcessingImpl::WriteMessageToDebugFile(
1418     FileWrapper* debug_file,
1419     rtc::CriticalSection* crit_debug,
1420     ApmDebugDumpThreadState* debug_state) {
1421   int32_t size = debug_state->event_msg->ByteSize();
1422   if (size <= 0) {
1423     return kUnspecifiedError;
1424   }
1425 #if defined(WEBRTC_ARCH_BIG_ENDIAN)
1426 // TODO(ajm): Use little-endian "on the wire". For the moment, we can be
1427 //            pretty safe in assuming little-endian.
1428 #endif
1429 
1430   if (!debug_state->event_msg->SerializeToString(&debug_state->event_str)) {
1431     return kUnspecifiedError;
1432   }
1433 
1434   {
1435     // Ensure atomic writes of the message.
1436     rtc::CritScope cs_capture(crit_debug);
1437     // Write message preceded by its size.
1438     if (!debug_file->Write(&size, sizeof(int32_t))) {
1439       return kFileError;
1440     }
1441     if (!debug_file->Write(debug_state->event_str.data(),
1442                            debug_state->event_str.length())) {
1443       return kFileError;
1444     }
1445   }
1446 
1447   debug_state->event_msg->Clear();
1448 
1449   return kNoError;
1450 }
1451 
WriteInitMessage()1452 int AudioProcessingImpl::WriteInitMessage() {
1453   debug_dump_.capture.event_msg->set_type(audioproc::Event::INIT);
1454   audioproc::Init* msg = debug_dump_.capture.event_msg->mutable_init();
1455   msg->set_sample_rate(formats_.api_format.input_stream().sample_rate_hz());
1456 
1457   msg->set_num_input_channels(static_cast<google::protobuf::int32>(
1458       formats_.api_format.input_stream().num_channels()));
1459   msg->set_num_output_channels(static_cast<google::protobuf::int32>(
1460       formats_.api_format.output_stream().num_channels()));
1461   msg->set_num_reverse_channels(static_cast<google::protobuf::int32>(
1462       formats_.api_format.reverse_input_stream().num_channels()));
1463   msg->set_reverse_sample_rate(
1464       formats_.api_format.reverse_input_stream().sample_rate_hz());
1465   msg->set_output_sample_rate(
1466       formats_.api_format.output_stream().sample_rate_hz());
1467   // TODO(ekmeyerson): Add reverse output fields to
1468   // debug_dump_.capture.event_msg.
1469 
1470   RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
1471                                         &crit_debug_, &debug_dump_.capture));
1472   return kNoError;
1473 }
1474 
WriteConfigMessage(bool forced)1475 int AudioProcessingImpl::WriteConfigMessage(bool forced) {
1476   audioproc::Config config;
1477 
1478   config.set_aec_enabled(public_submodules_->echo_cancellation->is_enabled());
1479   config.set_aec_delay_agnostic_enabled(
1480       public_submodules_->echo_cancellation->is_delay_agnostic_enabled());
1481   config.set_aec_drift_compensation_enabled(
1482       public_submodules_->echo_cancellation->is_drift_compensation_enabled());
1483   config.set_aec_extended_filter_enabled(
1484       public_submodules_->echo_cancellation->is_extended_filter_enabled());
1485   config.set_aec_suppression_level(static_cast<int>(
1486       public_submodules_->echo_cancellation->suppression_level()));
1487 
1488   config.set_aecm_enabled(
1489       public_submodules_->echo_control_mobile->is_enabled());
1490   config.set_aecm_comfort_noise_enabled(
1491       public_submodules_->echo_control_mobile->is_comfort_noise_enabled());
1492   config.set_aecm_routing_mode(static_cast<int>(
1493       public_submodules_->echo_control_mobile->routing_mode()));
1494 
1495   config.set_agc_enabled(public_submodules_->gain_control->is_enabled());
1496   config.set_agc_mode(
1497       static_cast<int>(public_submodules_->gain_control->mode()));
1498   config.set_agc_limiter_enabled(
1499       public_submodules_->gain_control->is_limiter_enabled());
1500   config.set_noise_robust_agc_enabled(constants_.use_new_agc);
1501 
1502   config.set_hpf_enabled(public_submodules_->high_pass_filter->is_enabled());
1503 
1504   config.set_ns_enabled(public_submodules_->noise_suppression->is_enabled());
1505   config.set_ns_level(
1506       static_cast<int>(public_submodules_->noise_suppression->level()));
1507 
1508   config.set_transient_suppression_enabled(
1509       capture_.transient_suppressor_enabled);
1510 
1511   std::string serialized_config = config.SerializeAsString();
1512   if (!forced &&
1513       debug_dump_.capture.last_serialized_config == serialized_config) {
1514     return kNoError;
1515   }
1516 
1517   debug_dump_.capture.last_serialized_config = serialized_config;
1518 
1519   debug_dump_.capture.event_msg->set_type(audioproc::Event::CONFIG);
1520   debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config);
1521 
1522   RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
1523                                         &crit_debug_, &debug_dump_.capture));
1524   return kNoError;
1525 }
1526 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
1527 
1528 }  // namespace webrtc
1529