1 /*
2  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include "modules/audio_processing/aec3/echo_canceller3.h"
11 
12 #include <algorithm>
13 #include <utility>
14 
15 #include "modules/audio_processing/aec3/aec3_common.h"
16 #include "modules/audio_processing/high_pass_filter.h"
17 #include "modules/audio_processing/logging/apm_data_dumper.h"
18 #include "rtc_base/atomic_ops.h"
19 #include "rtc_base/experiments/field_trial_parser.h"
20 #include "rtc_base/logging.h"
21 #include "system_wrappers/include/field_trial.h"
22 
23 namespace webrtc {
24 
25 namespace {
26 
27 enum class EchoCanceller3ApiCall { kCapture, kRender };
28 
DetectSaturation(rtc::ArrayView<const float> y)29 bool DetectSaturation(rtc::ArrayView<const float> y) {
30   for (auto y_k : y) {
31     if (y_k >= 32700.0f || y_k <= -32700.0f) {
32       return true;
33     }
34   }
35   return false;
36 }
37 
38 // Retrieves a value from a field trial if it is available. If no value is
39 // present, the default value is returned. If the retrieved value is beyond the
40 // specified limits, the default value is returned instead.
RetrieveFieldTrialValue(const char * trial_name,float min,float max,float * value_to_update)41 void RetrieveFieldTrialValue(const char* trial_name,
42                              float min,
43                              float max,
44                              float* value_to_update) {
45   const std::string field_trial_str = field_trial::FindFullName(trial_name);
46 
47   FieldTrialParameter<double> field_trial_param(/*key=*/"", *value_to_update);
48 
49   ParseFieldTrial({&field_trial_param}, field_trial_str);
50   float field_trial_value = static_cast<float>(field_trial_param.Get());
51 
52   if (field_trial_value >= min && field_trial_value <= max) {
53     *value_to_update = field_trial_value;
54   }
55 }
56 
RetrieveFieldTrialValue(const char * trial_name,int min,int max,int * value_to_update)57 void RetrieveFieldTrialValue(const char* trial_name,
58                              int min,
59                              int max,
60                              int* value_to_update) {
61   const std::string field_trial_str = field_trial::FindFullName(trial_name);
62 
63   FieldTrialParameter<int> field_trial_param(/*key=*/"", *value_to_update);
64 
65   ParseFieldTrial({&field_trial_param}, field_trial_str);
66   float field_trial_value = field_trial_param.Get();
67 
68   if (field_trial_value >= min && field_trial_value <= max) {
69     *value_to_update = field_trial_value;
70   }
71 }
72 
FillSubFrameView(AudioBuffer * frame,size_t sub_frame_index,std::vector<std::vector<rtc::ArrayView<float>>> * sub_frame_view)73 void FillSubFrameView(
74     AudioBuffer* frame,
75     size_t sub_frame_index,
76     std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
77   RTC_DCHECK_GE(1, sub_frame_index);
78   RTC_DCHECK_LE(0, sub_frame_index);
79   RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size());
80   RTC_DCHECK_EQ(frame->num_channels(), (*sub_frame_view)[0].size());
81   for (size_t band = 0; band < sub_frame_view->size(); ++band) {
82     for (size_t channel = 0; channel < (*sub_frame_view)[0].size(); ++channel) {
83       (*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
84           &frame->split_bands(channel)[band][sub_frame_index * kSubFrameLength],
85           kSubFrameLength);
86     }
87   }
88 }
89 
FillSubFrameView(std::vector<std::vector<std::vector<float>>> * frame,size_t sub_frame_index,std::vector<std::vector<rtc::ArrayView<float>>> * sub_frame_view)90 void FillSubFrameView(
91     std::vector<std::vector<std::vector<float>>>* frame,
92     size_t sub_frame_index,
93     std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
94   RTC_DCHECK_GE(1, sub_frame_index);
95   RTC_DCHECK_EQ(frame->size(), sub_frame_view->size());
96   RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size());
97   for (size_t band = 0; band < frame->size(); ++band) {
98     for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
99       (*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
100           &(*frame)[band][channel][sub_frame_index * kSubFrameLength],
101           kSubFrameLength);
102     }
103   }
104 }
105 
ProcessCaptureFrameContent(AudioBuffer * linear_output,AudioBuffer * capture,bool level_change,bool saturated_microphone_signal,size_t sub_frame_index,FrameBlocker * capture_blocker,BlockFramer * linear_output_framer,BlockFramer * output_framer,BlockProcessor * block_processor,std::vector<std::vector<std::vector<float>>> * linear_output_block,std::vector<std::vector<rtc::ArrayView<float>>> * linear_output_sub_frame_view,std::vector<std::vector<std::vector<float>>> * capture_block,std::vector<std::vector<rtc::ArrayView<float>>> * capture_sub_frame_view)106 void ProcessCaptureFrameContent(
107     AudioBuffer* linear_output,
108     AudioBuffer* capture,
109     bool level_change,
110     bool saturated_microphone_signal,
111     size_t sub_frame_index,
112     FrameBlocker* capture_blocker,
113     BlockFramer* linear_output_framer,
114     BlockFramer* output_framer,
115     BlockProcessor* block_processor,
116     std::vector<std::vector<std::vector<float>>>* linear_output_block,
117     std::vector<std::vector<rtc::ArrayView<float>>>*
118         linear_output_sub_frame_view,
119     std::vector<std::vector<std::vector<float>>>* capture_block,
120     std::vector<std::vector<rtc::ArrayView<float>>>* capture_sub_frame_view) {
121   FillSubFrameView(capture, sub_frame_index, capture_sub_frame_view);
122 
123   if (linear_output) {
124     RTC_DCHECK(linear_output_framer);
125     RTC_DCHECK(linear_output_block);
126     RTC_DCHECK(linear_output_sub_frame_view);
127     FillSubFrameView(linear_output, sub_frame_index,
128                      linear_output_sub_frame_view);
129   }
130 
131   capture_blocker->InsertSubFrameAndExtractBlock(*capture_sub_frame_view,
132                                                  capture_block);
133   block_processor->ProcessCapture(level_change, saturated_microphone_signal,
134                                   linear_output_block, capture_block);
135   output_framer->InsertBlockAndExtractSubFrame(*capture_block,
136                                                capture_sub_frame_view);
137 
138   if (linear_output) {
139     RTC_DCHECK(linear_output_framer);
140     linear_output_framer->InsertBlockAndExtractSubFrame(
141         *linear_output_block, linear_output_sub_frame_view);
142   }
143 }
144 
ProcessRemainingCaptureFrameContent(bool level_change,bool saturated_microphone_signal,FrameBlocker * capture_blocker,BlockFramer * linear_output_framer,BlockFramer * output_framer,BlockProcessor * block_processor,std::vector<std::vector<std::vector<float>>> * linear_output_block,std::vector<std::vector<std::vector<float>>> * block)145 void ProcessRemainingCaptureFrameContent(
146     bool level_change,
147     bool saturated_microphone_signal,
148     FrameBlocker* capture_blocker,
149     BlockFramer* linear_output_framer,
150     BlockFramer* output_framer,
151     BlockProcessor* block_processor,
152     std::vector<std::vector<std::vector<float>>>* linear_output_block,
153     std::vector<std::vector<std::vector<float>>>* block) {
154   if (!capture_blocker->IsBlockAvailable()) {
155     return;
156   }
157 
158   capture_blocker->ExtractBlock(block);
159   block_processor->ProcessCapture(level_change, saturated_microphone_signal,
160                                   linear_output_block, block);
161   output_framer->InsertBlock(*block);
162 
163   if (linear_output_framer) {
164     RTC_DCHECK(linear_output_block);
165     linear_output_framer->InsertBlock(*linear_output_block);
166   }
167 }
168 
BufferRenderFrameContent(std::vector<std::vector<std::vector<float>>> * render_frame,size_t sub_frame_index,FrameBlocker * render_blocker,BlockProcessor * block_processor,std::vector<std::vector<std::vector<float>>> * block,std::vector<std::vector<rtc::ArrayView<float>>> * sub_frame_view)169 void BufferRenderFrameContent(
170     std::vector<std::vector<std::vector<float>>>* render_frame,
171     size_t sub_frame_index,
172     FrameBlocker* render_blocker,
173     BlockProcessor* block_processor,
174     std::vector<std::vector<std::vector<float>>>* block,
175     std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
176   FillSubFrameView(render_frame, sub_frame_index, sub_frame_view);
177   render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block);
178   block_processor->BufferRender(*block);
179 }
180 
BufferRemainingRenderFrameContent(FrameBlocker * render_blocker,BlockProcessor * block_processor,std::vector<std::vector<std::vector<float>>> * block)181 void BufferRemainingRenderFrameContent(
182     FrameBlocker* render_blocker,
183     BlockProcessor* block_processor,
184     std::vector<std::vector<std::vector<float>>>* block) {
185   if (!render_blocker->IsBlockAvailable()) {
186     return;
187   }
188   render_blocker->ExtractBlock(block);
189   block_processor->BufferRender(*block);
190 }
191 
CopyBufferIntoFrame(const AudioBuffer & buffer,size_t num_bands,size_t num_channels,std::vector<std::vector<std::vector<float>>> * frame)192 void CopyBufferIntoFrame(const AudioBuffer& buffer,
193                          size_t num_bands,
194                          size_t num_channels,
195                          std::vector<std::vector<std::vector<float>>>* frame) {
196   RTC_DCHECK_EQ(num_bands, frame->size());
197   RTC_DCHECK_EQ(num_channels, (*frame)[0].size());
198   RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, (*frame)[0][0].size());
199   for (size_t band = 0; band < num_bands; ++band) {
200     for (size_t channel = 0; channel < num_channels; ++channel) {
201       rtc::ArrayView<const float> buffer_view(
202           &buffer.split_bands_const(channel)[band][0],
203           AudioBuffer::kSplitBandSize);
204       std::copy(buffer_view.begin(), buffer_view.end(),
205                 (*frame)[band][channel].begin());
206     }
207   }
208 }
209 
210 }  // namespace
211 
212 // TODO(webrtc:5298): Move this to a separate file.
AdjustConfig(const EchoCanceller3Config & config)213 EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
214   EchoCanceller3Config adjusted_cfg = config;
215 
216   if (field_trial::IsEnabled("WebRTC-Aec3UseShortConfigChangeDuration")) {
217     adjusted_cfg.filter.config_change_duration_blocks = 10;
218   }
219 
220   if (field_trial::IsEnabled("WebRTC-Aec3UseZeroInitialStateDuration")) {
221     adjusted_cfg.filter.initial_state_seconds = 0.f;
222   } else if (field_trial::IsEnabled(
223                  "WebRTC-Aec3UseDot1SecondsInitialStateDuration")) {
224     adjusted_cfg.filter.initial_state_seconds = .1f;
225   } else if (field_trial::IsEnabled(
226                  "WebRTC-Aec3UseDot2SecondsInitialStateDuration")) {
227     adjusted_cfg.filter.initial_state_seconds = .2f;
228   } else if (field_trial::IsEnabled(
229                  "WebRTC-Aec3UseDot3SecondsInitialStateDuration")) {
230     adjusted_cfg.filter.initial_state_seconds = .3f;
231   } else if (field_trial::IsEnabled(
232                  "WebRTC-Aec3UseDot6SecondsInitialStateDuration")) {
233     adjusted_cfg.filter.initial_state_seconds = .6f;
234   } else if (field_trial::IsEnabled(
235                  "WebRTC-Aec3UseDot9SecondsInitialStateDuration")) {
236     adjusted_cfg.filter.initial_state_seconds = .9f;
237   } else if (field_trial::IsEnabled(
238                  "WebRTC-Aec3Use1Dot2SecondsInitialStateDuration")) {
239     adjusted_cfg.filter.initial_state_seconds = 1.2f;
240   } else if (field_trial::IsEnabled(
241                  "WebRTC-Aec3Use1Dot6SecondsInitialStateDuration")) {
242     adjusted_cfg.filter.initial_state_seconds = 1.6f;
243   } else if (field_trial::IsEnabled(
244                  "WebRTC-Aec3Use2Dot0SecondsInitialStateDuration")) {
245     adjusted_cfg.filter.initial_state_seconds = 2.0f;
246   }
247 
248   if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) {
249     adjusted_cfg.ep_strength.echo_can_saturate = false;
250   }
251 
252   if (field_trial::IsEnabled("WebRTC-Aec3UseDot2ReverbDefaultLen")) {
253     adjusted_cfg.ep_strength.default_len = 0.2f;
254   } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot3ReverbDefaultLen")) {
255     adjusted_cfg.ep_strength.default_len = 0.3f;
256   } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot4ReverbDefaultLen")) {
257     adjusted_cfg.ep_strength.default_len = 0.4f;
258   } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot5ReverbDefaultLen")) {
259     adjusted_cfg.ep_strength.default_len = 0.5f;
260   } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot6ReverbDefaultLen")) {
261     adjusted_cfg.ep_strength.default_len = 0.6f;
262   } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot7ReverbDefaultLen")) {
263     adjusted_cfg.ep_strength.default_len = 0.7f;
264   } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot8ReverbDefaultLen")) {
265     adjusted_cfg.ep_strength.default_len = 0.8f;
266   }
267 
268   if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) {
269     // Two blocks headroom.
270     adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2;
271   }
272 
273   if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToZeroKillSwitch")) {
274     adjusted_cfg.erle.clamp_quality_estimate_to_zero = false;
275   }
276 
277   if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToOneKillSwitch")) {
278     adjusted_cfg.erle.clamp_quality_estimate_to_one = false;
279   }
280 
281   if (field_trial::IsEnabled("WebRTC-Aec3OnsetDetectionKillSwitch")) {
282     adjusted_cfg.erle.onset_detection = false;
283   }
284 
285   if (field_trial::IsEnabled(
286           "WebRTC-Aec3EnforceRenderDelayEstimationDownmixing")) {
287     adjusted_cfg.delay.render_alignment_mixing.downmix = true;
288     adjusted_cfg.delay.render_alignment_mixing.adaptive_selection = false;
289   }
290 
291   if (field_trial::IsEnabled(
292           "WebRTC-Aec3EnforceCaptureDelayEstimationDownmixing")) {
293     adjusted_cfg.delay.capture_alignment_mixing.downmix = true;
294     adjusted_cfg.delay.capture_alignment_mixing.adaptive_selection = false;
295   }
296 
297   if (field_trial::IsEnabled(
298           "WebRTC-Aec3EnforceCaptureDelayEstimationLeftRightPrioritization")) {
299     adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
300         true;
301   }
302 
303   if (field_trial::IsEnabled(
304           "WebRTC-"
305           "Aec3RenderDelayEstimationLeftRightPrioritizationKillSwitch")) {
306     adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
307         false;
308   }
309 
310   if (field_trial::IsEnabled("WebRTC-Aec3SensitiveDominantNearendActivation")) {
311     adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.5f;
312   } else if (field_trial::IsEnabled(
313                  "WebRTC-Aec3VerySensitiveDominantNearendActivation")) {
314     adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.75f;
315   }
316 
317   if (field_trial::IsEnabled("WebRTC-Aec3TransparentAntiHowlingGain")) {
318     adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 1.f;
319   }
320 
321   if (field_trial::IsEnabled(
322           "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorTuning")) {
323     adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = 0.4f;
324     adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = 0.5f;
325   }
326 
327   if (field_trial::IsEnabled(
328           "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorTuning")) {
329     adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = 1.29f;
330     adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = 1.3f;
331   }
332 
333   if (field_trial::IsEnabled(
334           "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorHfTuning")) {
335     adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = 0.3f;
336     adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = 0.4f;
337   }
338 
339   if (field_trial::IsEnabled(
340           "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorHfTuning")) {
341     adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = 1.09f;
342     adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = 1.1f;
343   }
344 
345   if (field_trial::IsEnabled(
346           "WebRTC-Aec3EnforceRapidlyAdjustingNormalSuppressorTunings")) {
347     adjusted_cfg.suppressor.normal_tuning.max_inc_factor = 2.5f;
348   }
349 
350   if (field_trial::IsEnabled(
351           "WebRTC-Aec3EnforceRapidlyAdjustingNearendSuppressorTunings")) {
352     adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = 2.5f;
353   }
354 
355   if (field_trial::IsEnabled(
356           "WebRTC-Aec3EnforceSlowlyAdjustingNormalSuppressorTunings")) {
357     adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = .2f;
358   }
359 
360   if (field_trial::IsEnabled(
361           "WebRTC-Aec3EnforceSlowlyAdjustingNearendSuppressorTunings")) {
362     adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = .2f;
363   }
364 
365   if (field_trial::IsEnabled("WebRTC-Aec3EnforceStationarityProperties")) {
366     adjusted_cfg.echo_audibility.use_stationarity_properties = true;
367   }
368 
369   if (field_trial::IsEnabled(
370           "WebRTC-Aec3EnforceStationarityPropertiesAtInit")) {
371     adjusted_cfg.echo_audibility.use_stationarity_properties_at_init = true;
372   }
373 
374   if (field_trial::IsEnabled("WebRTC-Aec3EnforceLowActiveRenderLimit")) {
375     adjusted_cfg.render_levels.active_render_limit = 50.f;
376   } else if (field_trial::IsEnabled(
377                  "WebRTC-Aec3EnforceVeryLowActiveRenderLimit")) {
378     adjusted_cfg.render_levels.active_render_limit = 30.f;
379   }
380 
381   // Field-trial based override for the whole suppressor tuning.
382   const std::string suppressor_tuning_override_trial_name =
383       field_trial::FindFullName("WebRTC-Aec3SuppressorTuningOverride");
384 
385   FieldTrialParameter<double> nearend_tuning_mask_lf_enr_transparent(
386       "nearend_tuning_mask_lf_enr_transparent",
387       adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
388   FieldTrialParameter<double> nearend_tuning_mask_lf_enr_suppress(
389       "nearend_tuning_mask_lf_enr_suppress",
390       adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
391   FieldTrialParameter<double> nearend_tuning_mask_hf_enr_transparent(
392       "nearend_tuning_mask_hf_enr_transparent",
393       adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
394   FieldTrialParameter<double> nearend_tuning_mask_hf_enr_suppress(
395       "nearend_tuning_mask_hf_enr_suppress",
396       adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
397   FieldTrialParameter<double> nearend_tuning_max_inc_factor(
398       "nearend_tuning_max_inc_factor",
399       adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
400   FieldTrialParameter<double> nearend_tuning_max_dec_factor_lf(
401       "nearend_tuning_max_dec_factor_lf",
402       adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
403   FieldTrialParameter<double> normal_tuning_mask_lf_enr_transparent(
404       "normal_tuning_mask_lf_enr_transparent",
405       adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
406   FieldTrialParameter<double> normal_tuning_mask_lf_enr_suppress(
407       "normal_tuning_mask_lf_enr_suppress",
408       adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
409   FieldTrialParameter<double> normal_tuning_mask_hf_enr_transparent(
410       "normal_tuning_mask_hf_enr_transparent",
411       adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
412   FieldTrialParameter<double> normal_tuning_mask_hf_enr_suppress(
413       "normal_tuning_mask_hf_enr_suppress",
414       adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
415   FieldTrialParameter<double> normal_tuning_max_inc_factor(
416       "normal_tuning_max_inc_factor",
417       adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
418   FieldTrialParameter<double> normal_tuning_max_dec_factor_lf(
419       "normal_tuning_max_dec_factor_lf",
420       adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
421   FieldTrialParameter<double> dominant_nearend_detection_enr_threshold(
422       "dominant_nearend_detection_enr_threshold",
423       adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
424   FieldTrialParameter<double> dominant_nearend_detection_enr_exit_threshold(
425       "dominant_nearend_detection_enr_exit_threshold",
426       adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
427   FieldTrialParameter<double> dominant_nearend_detection_snr_threshold(
428       "dominant_nearend_detection_snr_threshold",
429       adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
430   FieldTrialParameter<int> dominant_nearend_detection_hold_duration(
431       "dominant_nearend_detection_hold_duration",
432       adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
433   FieldTrialParameter<int> dominant_nearend_detection_trigger_threshold(
434       "dominant_nearend_detection_trigger_threshold",
435       adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
436   FieldTrialParameter<double> ep_strength_default_len(
437       "ep_strength_default_len", adjusted_cfg.ep_strength.default_len);
438 
439   ParseFieldTrial(
440       {&nearend_tuning_mask_lf_enr_transparent,
441        &nearend_tuning_mask_lf_enr_suppress,
442        &nearend_tuning_mask_hf_enr_transparent,
443        &nearend_tuning_mask_hf_enr_suppress, &nearend_tuning_max_inc_factor,
444        &nearend_tuning_max_dec_factor_lf,
445        &normal_tuning_mask_lf_enr_transparent,
446        &normal_tuning_mask_lf_enr_suppress,
447        &normal_tuning_mask_hf_enr_transparent,
448        &normal_tuning_mask_hf_enr_suppress, &normal_tuning_max_inc_factor,
449        &normal_tuning_max_dec_factor_lf,
450        &dominant_nearend_detection_enr_threshold,
451        &dominant_nearend_detection_enr_exit_threshold,
452        &dominant_nearend_detection_snr_threshold,
453        &dominant_nearend_detection_hold_duration,
454        &dominant_nearend_detection_trigger_threshold, &ep_strength_default_len},
455       suppressor_tuning_override_trial_name);
456 
457   adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent =
458       static_cast<float>(nearend_tuning_mask_lf_enr_transparent.Get());
459   adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress =
460       static_cast<float>(nearend_tuning_mask_lf_enr_suppress.Get());
461   adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent =
462       static_cast<float>(nearend_tuning_mask_hf_enr_transparent.Get());
463   adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress =
464       static_cast<float>(nearend_tuning_mask_hf_enr_suppress.Get());
465   adjusted_cfg.suppressor.nearend_tuning.max_inc_factor =
466       static_cast<float>(nearend_tuning_max_inc_factor.Get());
467   adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf =
468       static_cast<float>(nearend_tuning_max_dec_factor_lf.Get());
469   adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent =
470       static_cast<float>(normal_tuning_mask_lf_enr_transparent.Get());
471   adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress =
472       static_cast<float>(normal_tuning_mask_lf_enr_suppress.Get());
473   adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent =
474       static_cast<float>(normal_tuning_mask_hf_enr_transparent.Get());
475   adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress =
476       static_cast<float>(normal_tuning_mask_hf_enr_suppress.Get());
477   adjusted_cfg.suppressor.normal_tuning.max_inc_factor =
478       static_cast<float>(normal_tuning_max_inc_factor.Get());
479   adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf =
480       static_cast<float>(normal_tuning_max_dec_factor_lf.Get());
481   adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold =
482       static_cast<float>(dominant_nearend_detection_enr_threshold.Get());
483   adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold =
484       static_cast<float>(dominant_nearend_detection_enr_exit_threshold.Get());
485   adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold =
486       static_cast<float>(dominant_nearend_detection_snr_threshold.Get());
487   adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration =
488       dominant_nearend_detection_hold_duration.Get();
489   adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold =
490       dominant_nearend_detection_trigger_threshold.Get();
491   adjusted_cfg.ep_strength.default_len =
492       static_cast<float>(ep_strength_default_len.Get());
493 
494   // Field trial-based overrides of individual suppressor parameters.
495   RetrieveFieldTrialValue(
496       "WebRTC-Aec3SuppressorNearendLfMaskTransparentOverride", 0.f, 10.f,
497       &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
498   RetrieveFieldTrialValue(
499       "WebRTC-Aec3SuppressorNearendLfMaskSuppressOverride", 0.f, 10.f,
500       &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
501   RetrieveFieldTrialValue(
502       "WebRTC-Aec3SuppressorNearendHfMaskTransparentOverride", 0.f, 10.f,
503       &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
504   RetrieveFieldTrialValue(
505       "WebRTC-Aec3SuppressorNearendHfMaskSuppressOverride", 0.f, 10.f,
506       &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
507   RetrieveFieldTrialValue(
508       "WebRTC-Aec3SuppressorNearendMaxIncFactorOverride", 0.f, 10.f,
509       &adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
510   RetrieveFieldTrialValue(
511       "WebRTC-Aec3SuppressorNearendMaxDecFactorLfOverride", 0.f, 10.f,
512       &adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
513 
514   RetrieveFieldTrialValue(
515       "WebRTC-Aec3SuppressorNormalLfMaskTransparentOverride", 0.f, 10.f,
516       &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
517   RetrieveFieldTrialValue(
518       "WebRTC-Aec3SuppressorNormalLfMaskSuppressOverride", 0.f, 10.f,
519       &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
520   RetrieveFieldTrialValue(
521       "WebRTC-Aec3SuppressorNormalHfMaskTransparentOverride", 0.f, 10.f,
522       &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
523   RetrieveFieldTrialValue(
524       "WebRTC-Aec3SuppressorNormalHfMaskSuppressOverride", 0.f, 10.f,
525       &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
526   RetrieveFieldTrialValue(
527       "WebRTC-Aec3SuppressorNormalMaxIncFactorOverride", 0.f, 10.f,
528       &adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
529   RetrieveFieldTrialValue(
530       "WebRTC-Aec3SuppressorNormalMaxDecFactorLfOverride", 0.f, 10.f,
531       &adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
532 
533   RetrieveFieldTrialValue(
534       "WebRTC-Aec3SuppressorDominantNearendEnrThresholdOverride", 0.f, 100.f,
535       &adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
536   RetrieveFieldTrialValue(
537       "WebRTC-Aec3SuppressorDominantNearendEnrExitThresholdOverride", 0.f,
538       100.f,
539       &adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
540   RetrieveFieldTrialValue(
541       "WebRTC-Aec3SuppressorDominantNearendSnrThresholdOverride", 0.f, 100.f,
542       &adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
543   RetrieveFieldTrialValue(
544       "WebRTC-Aec3SuppressorDominantNearendHoldDurationOverride", 0, 1000,
545       &adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
546   RetrieveFieldTrialValue(
547       "WebRTC-Aec3SuppressorDominantNearendTriggerThresholdOverride", 0, 1000,
548       &adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
549 
550   RetrieveFieldTrialValue(
551       "WebRTC-Aec3SuppressorAntiHowlingGainOverride", 0.f, 10.f,
552       &adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain);
553 
554   RetrieveFieldTrialValue("WebRTC-Aec3SuppressorEpStrengthDefaultLenOverride",
555                           -1.f, 1.f, &adjusted_cfg.ep_strength.default_len);
556 
557   return adjusted_cfg;
558 }
559 
560 class EchoCanceller3::RenderWriter {
561  public:
562   RenderWriter(ApmDataDumper* data_dumper,
563                SwapQueue<std::vector<std::vector<std::vector<float>>>,
564                          Aec3RenderQueueItemVerifier>* render_transfer_queue,
565                size_t num_bands,
566                size_t num_channels);
567   ~RenderWriter();
568   void Insert(const AudioBuffer& input);
569 
570  private:
571   ApmDataDumper* data_dumper_;
572   const size_t num_bands_;
573   const size_t num_channels_;
574   HighPassFilter high_pass_filter_;
575   std::vector<std::vector<std::vector<float>>> render_queue_input_frame_;
576   SwapQueue<std::vector<std::vector<std::vector<float>>>,
577             Aec3RenderQueueItemVerifier>* render_transfer_queue_;
578   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderWriter);
579 };
580 
RenderWriter(ApmDataDumper * data_dumper,SwapQueue<std::vector<std::vector<std::vector<float>>>,Aec3RenderQueueItemVerifier> * render_transfer_queue,size_t num_bands,size_t num_channels)581 EchoCanceller3::RenderWriter::RenderWriter(
582     ApmDataDumper* data_dumper,
583     SwapQueue<std::vector<std::vector<std::vector<float>>>,
584               Aec3RenderQueueItemVerifier>* render_transfer_queue,
585     size_t num_bands,
586     size_t num_channels)
587     : data_dumper_(data_dumper),
588       num_bands_(num_bands),
589       num_channels_(num_channels),
590       high_pass_filter_(16000, num_channels),
591       render_queue_input_frame_(
592           num_bands_,
593           std::vector<std::vector<float>>(
594               num_channels_,
595               std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
596       render_transfer_queue_(render_transfer_queue) {
597   RTC_DCHECK(data_dumper);
598 }
599 
600 EchoCanceller3::RenderWriter::~RenderWriter() = default;
601 
Insert(const AudioBuffer & input)602 void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) {
603   RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band());
604   RTC_DCHECK_EQ(num_bands_, input.num_bands());
605   RTC_DCHECK_EQ(num_channels_, input.num_channels());
606 
607   // TODO(bugs.webrtc.org/8759) Temporary work-around.
608   if (num_bands_ != input.num_bands())
609     return;
610 
611   data_dumper_->DumpWav("aec3_render_input", AudioBuffer::kSplitBandSize,
612                         &input.split_bands_const(0)[0][0], 16000, 1);
613 
614   CopyBufferIntoFrame(input, num_bands_, num_channels_,
615                       &render_queue_input_frame_);
616   high_pass_filter_.Process(&render_queue_input_frame_[0]);
617 
618   static_cast<void>(render_transfer_queue_->Insert(&render_queue_input_frame_));
619 }
620 
621 int EchoCanceller3::instance_count_ = 0;
622 
EchoCanceller3(const EchoCanceller3Config & config,int sample_rate_hz,size_t num_render_channels,size_t num_capture_channels)623 EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
624                                int sample_rate_hz,
625                                size_t num_render_channels,
626                                size_t num_capture_channels)
627     : EchoCanceller3(AdjustConfig(config),
628                      sample_rate_hz,
629                      num_render_channels,
630                      num_capture_channels,
631                      std::unique_ptr<BlockProcessor>(
632                          BlockProcessor::Create(AdjustConfig(config),
633                                                 sample_rate_hz,
634                                                 num_render_channels,
635                                                 num_capture_channels))) {}
EchoCanceller3(const EchoCanceller3Config & config,int sample_rate_hz,size_t num_render_channels,size_t num_capture_channels,std::unique_ptr<BlockProcessor> block_processor)636 EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
637                                int sample_rate_hz,
638                                size_t num_render_channels,
639                                size_t num_capture_channels,
640                                std::unique_ptr<BlockProcessor> block_processor)
641     : data_dumper_(
642           new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
643       config_(config),
644       sample_rate_hz_(sample_rate_hz),
645       num_bands_(NumBandsForRate(sample_rate_hz_)),
646       num_render_channels_(num_render_channels),
647       num_capture_channels_(num_capture_channels),
648       output_framer_(num_bands_, num_capture_channels_),
649       capture_blocker_(num_bands_, num_capture_channels_),
650       render_blocker_(num_bands_, num_render_channels_),
651       render_transfer_queue_(
652           kRenderTransferQueueSizeFrames,
653           std::vector<std::vector<std::vector<float>>>(
654               num_bands_,
655               std::vector<std::vector<float>>(
656                   num_render_channels_,
657                   std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
658           Aec3RenderQueueItemVerifier(num_bands_,
659                                       num_render_channels_,
660                                       AudioBuffer::kSplitBandSize)),
661       block_processor_(std::move(block_processor)),
662       render_queue_output_frame_(
663           num_bands_,
664           std::vector<std::vector<float>>(
665               num_render_channels_,
666               std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
667       render_block_(
668           num_bands_,
669           std::vector<std::vector<float>>(num_render_channels_,
670                                           std::vector<float>(kBlockSize, 0.f))),
671       capture_block_(
672           num_bands_,
673           std::vector<std::vector<float>>(num_capture_channels_,
674                                           std::vector<float>(kBlockSize, 0.f))),
675       render_sub_frame_view_(
676           num_bands_,
677           std::vector<rtc::ArrayView<float>>(num_render_channels_)),
678       capture_sub_frame_view_(
679           num_bands_,
680           std::vector<rtc::ArrayView<float>>(num_capture_channels_)) {
681   RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
682 
683   if (config_.delay.fixed_capture_delay_samples > 0) {
684     block_delay_buffer_.reset(new BlockDelayBuffer(
685         num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize,
686         config_.delay.fixed_capture_delay_samples));
687   }
688 
689   render_writer_.reset(new RenderWriter(data_dumper_.get(),
690                                         &render_transfer_queue_, num_bands_,
691                                         num_render_channels_));
692 
693   RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000);
694   RTC_DCHECK_GE(kMaxNumBands, num_bands_);
695 
696   if (config_.filter.export_linear_aec_output) {
697     linear_output_framer_.reset(new BlockFramer(1, num_capture_channels_));
698     linear_output_block_ =
699         std::make_unique<std::vector<std::vector<std::vector<float>>>>(
700             1, std::vector<std::vector<float>>(
701                    num_capture_channels_, std::vector<float>(kBlockSize, 0.f)));
702     linear_output_sub_frame_view_ =
703         std::vector<std::vector<rtc::ArrayView<float>>>(
704             1, std::vector<rtc::ArrayView<float>>(num_capture_channels_));
705   }
706 }
707 
708 EchoCanceller3::~EchoCanceller3() = default;
709 
AnalyzeRender(const AudioBuffer & render)710 void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) {
711   RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_);
712 
713   RTC_DCHECK_EQ(render.num_channels(), num_render_channels_);
714   data_dumper_->DumpRaw("aec3_call_order",
715                         static_cast<int>(EchoCanceller3ApiCall::kRender));
716 
717   return render_writer_->Insert(render);
718 }
719 
AnalyzeCapture(const AudioBuffer & capture)720 void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) {
721   RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
722   data_dumper_->DumpWav("aec3_capture_analyze_input", capture.num_frames(),
723                         capture.channels_const()[0], sample_rate_hz_, 1);
724   saturated_microphone_signal_ = false;
725   for (size_t channel = 0; channel < capture.num_channels(); ++channel) {
726     saturated_microphone_signal_ |=
727         DetectSaturation(rtc::ArrayView<const float>(
728             capture.channels_const()[channel], capture.num_frames()));
729     if (saturated_microphone_signal_) {
730       break;
731     }
732   }
733 }
734 
ProcessCapture(AudioBuffer * capture,bool level_change)735 void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) {
736   ProcessCapture(capture, nullptr, level_change);
737 }
738 
ProcessCapture(AudioBuffer * capture,AudioBuffer * linear_output,bool level_change)739 void EchoCanceller3::ProcessCapture(AudioBuffer* capture,
740                                     AudioBuffer* linear_output,
741                                     bool level_change) {
742   RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
743   RTC_DCHECK(capture);
744   RTC_DCHECK_EQ(num_bands_, capture->num_bands());
745   RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band());
746   RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_);
747   data_dumper_->DumpRaw("aec3_call_order",
748                         static_cast<int>(EchoCanceller3ApiCall::kCapture));
749 
750   if (linear_output && !linear_output_framer_) {
751     RTC_LOG(LS_ERROR) << "Trying to retrieve the linear AEC output without "
752                          "properly configuring AEC3.";
753     RTC_NOTREACHED();
754   }
755 
756   // Report capture call in the metrics and periodically update API call
757   // metrics.
758   api_call_metrics_.ReportCaptureCall();
759 
760   // Optionally delay the capture signal.
761   if (config_.delay.fixed_capture_delay_samples > 0) {
762     RTC_DCHECK(block_delay_buffer_);
763     block_delay_buffer_->DelaySignal(capture);
764   }
765 
766   rtc::ArrayView<float> capture_lower_band = rtc::ArrayView<float>(
767       &capture->split_bands(0)[0][0], AudioBuffer::kSplitBandSize);
768 
769   data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, 16000, 1);
770 
771   EmptyRenderQueue();
772 
773   ProcessCaptureFrameContent(linear_output, capture, level_change,
774                              saturated_microphone_signal_, 0, &capture_blocker_,
775                              linear_output_framer_.get(), &output_framer_,
776                              block_processor_.get(), linear_output_block_.get(),
777                              &linear_output_sub_frame_view_, &capture_block_,
778                              &capture_sub_frame_view_);
779 
780   ProcessCaptureFrameContent(linear_output, capture, level_change,
781                              saturated_microphone_signal_, 1, &capture_blocker_,
782                              linear_output_framer_.get(), &output_framer_,
783                              block_processor_.get(), linear_output_block_.get(),
784                              &linear_output_sub_frame_view_, &capture_block_,
785                              &capture_sub_frame_view_);
786 
787   ProcessRemainingCaptureFrameContent(
788       level_change, saturated_microphone_signal_, &capture_blocker_,
789       linear_output_framer_.get(), &output_framer_, block_processor_.get(),
790       linear_output_block_.get(), &capture_block_);
791 
792   data_dumper_->DumpWav("aec3_capture_output", AudioBuffer::kSplitBandSize,
793                         &capture->split_bands(0)[0][0], 16000, 1);
794 }
795 
GetMetrics() const796 EchoControl::Metrics EchoCanceller3::GetMetrics() const {
797   RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
798   Metrics metrics;
799   block_processor_->GetMetrics(&metrics);
800   return metrics;
801 }
802 
SetAudioBufferDelay(int delay_ms)803 void EchoCanceller3::SetAudioBufferDelay(int delay_ms) {
804   RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
805   block_processor_->SetAudioBufferDelay(delay_ms);
806 }
807 
ActiveProcessing() const808 bool EchoCanceller3::ActiveProcessing() const {
809   return true;
810 }
811 
CreateDefaultConfig(size_t num_render_channels,size_t num_capture_channels)812 EchoCanceller3Config EchoCanceller3::CreateDefaultConfig(
813     size_t num_render_channels,
814     size_t num_capture_channels) {
815   EchoCanceller3Config cfg;
816   if (num_render_channels > 1) {
817     // Use shorter and more rapidly adapting coarse filter to compensate for
818     // thge increased number of total filter parameters to adapt.
819     cfg.filter.coarse.length_blocks = 11;
820     cfg.filter.coarse.rate = 0.95f;
821     cfg.filter.coarse_initial.length_blocks = 11;
822     cfg.filter.coarse_initial.rate = 0.95f;
823 
824     // Use more concervative suppressor behavior for non-nearend speech.
825     cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
826     cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
827   }
828   return cfg;
829 }
830 
EmptyRenderQueue()831 void EchoCanceller3::EmptyRenderQueue() {
832   RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
833   bool frame_to_buffer =
834       render_transfer_queue_.Remove(&render_queue_output_frame_);
835   while (frame_to_buffer) {
836     // Report render call in the metrics.
837     api_call_metrics_.ReportRenderCall();
838 
839     BufferRenderFrameContent(&render_queue_output_frame_, 0, &render_blocker_,
840                              block_processor_.get(), &render_block_,
841                              &render_sub_frame_view_);
842 
843     BufferRenderFrameContent(&render_queue_output_frame_, 1, &render_blocker_,
844                              block_processor_.get(), &render_block_,
845                              &render_sub_frame_view_);
846 
847     BufferRemainingRenderFrameContent(&render_blocker_, block_processor_.get(),
848                                       &render_block_);
849 
850     frame_to_buffer =
851         render_transfer_queue_.Remove(&render_queue_output_frame_);
852   }
853 }
854 }  // namespace webrtc
855