1 /*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10 #include "modules/audio_processing/aec3/echo_canceller3.h"
11
12 #include <algorithm>
13 #include <utility>
14
15 #include "modules/audio_processing/aec3/aec3_common.h"
16 #include "modules/audio_processing/high_pass_filter.h"
17 #include "modules/audio_processing/logging/apm_data_dumper.h"
18 #include "rtc_base/atomic_ops.h"
19 #include "rtc_base/experiments/field_trial_parser.h"
20 #include "rtc_base/logging.h"
21 #include "system_wrappers/include/field_trial.h"
22
23 namespace webrtc {
24
25 namespace {
26
27 enum class EchoCanceller3ApiCall { kCapture, kRender };
28
DetectSaturation(rtc::ArrayView<const float> y)29 bool DetectSaturation(rtc::ArrayView<const float> y) {
30 for (auto y_k : y) {
31 if (y_k >= 32700.0f || y_k <= -32700.0f) {
32 return true;
33 }
34 }
35 return false;
36 }
37
38 // Retrieves a value from a field trial if it is available. If no value is
39 // present, the default value is returned. If the retrieved value is beyond the
40 // specified limits, the default value is returned instead.
RetrieveFieldTrialValue(const char * trial_name,float min,float max,float * value_to_update)41 void RetrieveFieldTrialValue(const char* trial_name,
42 float min,
43 float max,
44 float* value_to_update) {
45 const std::string field_trial_str = field_trial::FindFullName(trial_name);
46
47 FieldTrialParameter<double> field_trial_param(/*key=*/"", *value_to_update);
48
49 ParseFieldTrial({&field_trial_param}, field_trial_str);
50 float field_trial_value = static_cast<float>(field_trial_param.Get());
51
52 if (field_trial_value >= min && field_trial_value <= max) {
53 *value_to_update = field_trial_value;
54 }
55 }
56
RetrieveFieldTrialValue(const char * trial_name,int min,int max,int * value_to_update)57 void RetrieveFieldTrialValue(const char* trial_name,
58 int min,
59 int max,
60 int* value_to_update) {
61 const std::string field_trial_str = field_trial::FindFullName(trial_name);
62
63 FieldTrialParameter<int> field_trial_param(/*key=*/"", *value_to_update);
64
65 ParseFieldTrial({&field_trial_param}, field_trial_str);
66 float field_trial_value = field_trial_param.Get();
67
68 if (field_trial_value >= min && field_trial_value <= max) {
69 *value_to_update = field_trial_value;
70 }
71 }
72
FillSubFrameView(AudioBuffer * frame,size_t sub_frame_index,std::vector<std::vector<rtc::ArrayView<float>>> * sub_frame_view)73 void FillSubFrameView(
74 AudioBuffer* frame,
75 size_t sub_frame_index,
76 std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
77 RTC_DCHECK_GE(1, sub_frame_index);
78 RTC_DCHECK_LE(0, sub_frame_index);
79 RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size());
80 RTC_DCHECK_EQ(frame->num_channels(), (*sub_frame_view)[0].size());
81 for (size_t band = 0; band < sub_frame_view->size(); ++band) {
82 for (size_t channel = 0; channel < (*sub_frame_view)[0].size(); ++channel) {
83 (*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
84 &frame->split_bands(channel)[band][sub_frame_index * kSubFrameLength],
85 kSubFrameLength);
86 }
87 }
88 }
89
FillSubFrameView(std::vector<std::vector<std::vector<float>>> * frame,size_t sub_frame_index,std::vector<std::vector<rtc::ArrayView<float>>> * sub_frame_view)90 void FillSubFrameView(
91 std::vector<std::vector<std::vector<float>>>* frame,
92 size_t sub_frame_index,
93 std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
94 RTC_DCHECK_GE(1, sub_frame_index);
95 RTC_DCHECK_EQ(frame->size(), sub_frame_view->size());
96 RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size());
97 for (size_t band = 0; band < frame->size(); ++band) {
98 for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
99 (*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
100 &(*frame)[band][channel][sub_frame_index * kSubFrameLength],
101 kSubFrameLength);
102 }
103 }
104 }
105
ProcessCaptureFrameContent(AudioBuffer * linear_output,AudioBuffer * capture,bool level_change,bool saturated_microphone_signal,size_t sub_frame_index,FrameBlocker * capture_blocker,BlockFramer * linear_output_framer,BlockFramer * output_framer,BlockProcessor * block_processor,std::vector<std::vector<std::vector<float>>> * linear_output_block,std::vector<std::vector<rtc::ArrayView<float>>> * linear_output_sub_frame_view,std::vector<std::vector<std::vector<float>>> * capture_block,std::vector<std::vector<rtc::ArrayView<float>>> * capture_sub_frame_view)106 void ProcessCaptureFrameContent(
107 AudioBuffer* linear_output,
108 AudioBuffer* capture,
109 bool level_change,
110 bool saturated_microphone_signal,
111 size_t sub_frame_index,
112 FrameBlocker* capture_blocker,
113 BlockFramer* linear_output_framer,
114 BlockFramer* output_framer,
115 BlockProcessor* block_processor,
116 std::vector<std::vector<std::vector<float>>>* linear_output_block,
117 std::vector<std::vector<rtc::ArrayView<float>>>*
118 linear_output_sub_frame_view,
119 std::vector<std::vector<std::vector<float>>>* capture_block,
120 std::vector<std::vector<rtc::ArrayView<float>>>* capture_sub_frame_view) {
121 FillSubFrameView(capture, sub_frame_index, capture_sub_frame_view);
122
123 if (linear_output) {
124 RTC_DCHECK(linear_output_framer);
125 RTC_DCHECK(linear_output_block);
126 RTC_DCHECK(linear_output_sub_frame_view);
127 FillSubFrameView(linear_output, sub_frame_index,
128 linear_output_sub_frame_view);
129 }
130
131 capture_blocker->InsertSubFrameAndExtractBlock(*capture_sub_frame_view,
132 capture_block);
133 block_processor->ProcessCapture(level_change, saturated_microphone_signal,
134 linear_output_block, capture_block);
135 output_framer->InsertBlockAndExtractSubFrame(*capture_block,
136 capture_sub_frame_view);
137
138 if (linear_output) {
139 RTC_DCHECK(linear_output_framer);
140 linear_output_framer->InsertBlockAndExtractSubFrame(
141 *linear_output_block, linear_output_sub_frame_view);
142 }
143 }
144
ProcessRemainingCaptureFrameContent(bool level_change,bool saturated_microphone_signal,FrameBlocker * capture_blocker,BlockFramer * linear_output_framer,BlockFramer * output_framer,BlockProcessor * block_processor,std::vector<std::vector<std::vector<float>>> * linear_output_block,std::vector<std::vector<std::vector<float>>> * block)145 void ProcessRemainingCaptureFrameContent(
146 bool level_change,
147 bool saturated_microphone_signal,
148 FrameBlocker* capture_blocker,
149 BlockFramer* linear_output_framer,
150 BlockFramer* output_framer,
151 BlockProcessor* block_processor,
152 std::vector<std::vector<std::vector<float>>>* linear_output_block,
153 std::vector<std::vector<std::vector<float>>>* block) {
154 if (!capture_blocker->IsBlockAvailable()) {
155 return;
156 }
157
158 capture_blocker->ExtractBlock(block);
159 block_processor->ProcessCapture(level_change, saturated_microphone_signal,
160 linear_output_block, block);
161 output_framer->InsertBlock(*block);
162
163 if (linear_output_framer) {
164 RTC_DCHECK(linear_output_block);
165 linear_output_framer->InsertBlock(*linear_output_block);
166 }
167 }
168
BufferRenderFrameContent(std::vector<std::vector<std::vector<float>>> * render_frame,size_t sub_frame_index,FrameBlocker * render_blocker,BlockProcessor * block_processor,std::vector<std::vector<std::vector<float>>> * block,std::vector<std::vector<rtc::ArrayView<float>>> * sub_frame_view)169 void BufferRenderFrameContent(
170 std::vector<std::vector<std::vector<float>>>* render_frame,
171 size_t sub_frame_index,
172 FrameBlocker* render_blocker,
173 BlockProcessor* block_processor,
174 std::vector<std::vector<std::vector<float>>>* block,
175 std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
176 FillSubFrameView(render_frame, sub_frame_index, sub_frame_view);
177 render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block);
178 block_processor->BufferRender(*block);
179 }
180
BufferRemainingRenderFrameContent(FrameBlocker * render_blocker,BlockProcessor * block_processor,std::vector<std::vector<std::vector<float>>> * block)181 void BufferRemainingRenderFrameContent(
182 FrameBlocker* render_blocker,
183 BlockProcessor* block_processor,
184 std::vector<std::vector<std::vector<float>>>* block) {
185 if (!render_blocker->IsBlockAvailable()) {
186 return;
187 }
188 render_blocker->ExtractBlock(block);
189 block_processor->BufferRender(*block);
190 }
191
CopyBufferIntoFrame(const AudioBuffer & buffer,size_t num_bands,size_t num_channels,std::vector<std::vector<std::vector<float>>> * frame)192 void CopyBufferIntoFrame(const AudioBuffer& buffer,
193 size_t num_bands,
194 size_t num_channels,
195 std::vector<std::vector<std::vector<float>>>* frame) {
196 RTC_DCHECK_EQ(num_bands, frame->size());
197 RTC_DCHECK_EQ(num_channels, (*frame)[0].size());
198 RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, (*frame)[0][0].size());
199 for (size_t band = 0; band < num_bands; ++band) {
200 for (size_t channel = 0; channel < num_channels; ++channel) {
201 rtc::ArrayView<const float> buffer_view(
202 &buffer.split_bands_const(channel)[band][0],
203 AudioBuffer::kSplitBandSize);
204 std::copy(buffer_view.begin(), buffer_view.end(),
205 (*frame)[band][channel].begin());
206 }
207 }
208 }
209
210 } // namespace
211
212 // TODO(webrtc:5298): Move this to a separate file.
AdjustConfig(const EchoCanceller3Config & config)213 EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
214 EchoCanceller3Config adjusted_cfg = config;
215
216 if (field_trial::IsEnabled("WebRTC-Aec3UseShortConfigChangeDuration")) {
217 adjusted_cfg.filter.config_change_duration_blocks = 10;
218 }
219
220 if (field_trial::IsEnabled("WebRTC-Aec3UseZeroInitialStateDuration")) {
221 adjusted_cfg.filter.initial_state_seconds = 0.f;
222 } else if (field_trial::IsEnabled(
223 "WebRTC-Aec3UseDot1SecondsInitialStateDuration")) {
224 adjusted_cfg.filter.initial_state_seconds = .1f;
225 } else if (field_trial::IsEnabled(
226 "WebRTC-Aec3UseDot2SecondsInitialStateDuration")) {
227 adjusted_cfg.filter.initial_state_seconds = .2f;
228 } else if (field_trial::IsEnabled(
229 "WebRTC-Aec3UseDot3SecondsInitialStateDuration")) {
230 adjusted_cfg.filter.initial_state_seconds = .3f;
231 } else if (field_trial::IsEnabled(
232 "WebRTC-Aec3UseDot6SecondsInitialStateDuration")) {
233 adjusted_cfg.filter.initial_state_seconds = .6f;
234 } else if (field_trial::IsEnabled(
235 "WebRTC-Aec3UseDot9SecondsInitialStateDuration")) {
236 adjusted_cfg.filter.initial_state_seconds = .9f;
237 } else if (field_trial::IsEnabled(
238 "WebRTC-Aec3Use1Dot2SecondsInitialStateDuration")) {
239 adjusted_cfg.filter.initial_state_seconds = 1.2f;
240 } else if (field_trial::IsEnabled(
241 "WebRTC-Aec3Use1Dot6SecondsInitialStateDuration")) {
242 adjusted_cfg.filter.initial_state_seconds = 1.6f;
243 } else if (field_trial::IsEnabled(
244 "WebRTC-Aec3Use2Dot0SecondsInitialStateDuration")) {
245 adjusted_cfg.filter.initial_state_seconds = 2.0f;
246 }
247
248 if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) {
249 adjusted_cfg.ep_strength.echo_can_saturate = false;
250 }
251
252 if (field_trial::IsEnabled("WebRTC-Aec3UseDot2ReverbDefaultLen")) {
253 adjusted_cfg.ep_strength.default_len = 0.2f;
254 } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot3ReverbDefaultLen")) {
255 adjusted_cfg.ep_strength.default_len = 0.3f;
256 } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot4ReverbDefaultLen")) {
257 adjusted_cfg.ep_strength.default_len = 0.4f;
258 } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot5ReverbDefaultLen")) {
259 adjusted_cfg.ep_strength.default_len = 0.5f;
260 } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot6ReverbDefaultLen")) {
261 adjusted_cfg.ep_strength.default_len = 0.6f;
262 } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot7ReverbDefaultLen")) {
263 adjusted_cfg.ep_strength.default_len = 0.7f;
264 } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot8ReverbDefaultLen")) {
265 adjusted_cfg.ep_strength.default_len = 0.8f;
266 }
267
268 if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) {
269 // Two blocks headroom.
270 adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2;
271 }
272
273 if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToZeroKillSwitch")) {
274 adjusted_cfg.erle.clamp_quality_estimate_to_zero = false;
275 }
276
277 if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToOneKillSwitch")) {
278 adjusted_cfg.erle.clamp_quality_estimate_to_one = false;
279 }
280
281 if (field_trial::IsEnabled("WebRTC-Aec3OnsetDetectionKillSwitch")) {
282 adjusted_cfg.erle.onset_detection = false;
283 }
284
285 if (field_trial::IsEnabled(
286 "WebRTC-Aec3EnforceRenderDelayEstimationDownmixing")) {
287 adjusted_cfg.delay.render_alignment_mixing.downmix = true;
288 adjusted_cfg.delay.render_alignment_mixing.adaptive_selection = false;
289 }
290
291 if (field_trial::IsEnabled(
292 "WebRTC-Aec3EnforceCaptureDelayEstimationDownmixing")) {
293 adjusted_cfg.delay.capture_alignment_mixing.downmix = true;
294 adjusted_cfg.delay.capture_alignment_mixing.adaptive_selection = false;
295 }
296
297 if (field_trial::IsEnabled(
298 "WebRTC-Aec3EnforceCaptureDelayEstimationLeftRightPrioritization")) {
299 adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
300 true;
301 }
302
303 if (field_trial::IsEnabled(
304 "WebRTC-"
305 "Aec3RenderDelayEstimationLeftRightPrioritizationKillSwitch")) {
306 adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
307 false;
308 }
309
310 if (field_trial::IsEnabled("WebRTC-Aec3SensitiveDominantNearendActivation")) {
311 adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.5f;
312 } else if (field_trial::IsEnabled(
313 "WebRTC-Aec3VerySensitiveDominantNearendActivation")) {
314 adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.75f;
315 }
316
317 if (field_trial::IsEnabled("WebRTC-Aec3TransparentAntiHowlingGain")) {
318 adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 1.f;
319 }
320
321 if (field_trial::IsEnabled(
322 "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorTuning")) {
323 adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = 0.4f;
324 adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = 0.5f;
325 }
326
327 if (field_trial::IsEnabled(
328 "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorTuning")) {
329 adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = 1.29f;
330 adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = 1.3f;
331 }
332
333 if (field_trial::IsEnabled(
334 "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorHfTuning")) {
335 adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = 0.3f;
336 adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = 0.4f;
337 }
338
339 if (field_trial::IsEnabled(
340 "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorHfTuning")) {
341 adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = 1.09f;
342 adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = 1.1f;
343 }
344
345 if (field_trial::IsEnabled(
346 "WebRTC-Aec3EnforceRapidlyAdjustingNormalSuppressorTunings")) {
347 adjusted_cfg.suppressor.normal_tuning.max_inc_factor = 2.5f;
348 }
349
350 if (field_trial::IsEnabled(
351 "WebRTC-Aec3EnforceRapidlyAdjustingNearendSuppressorTunings")) {
352 adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = 2.5f;
353 }
354
355 if (field_trial::IsEnabled(
356 "WebRTC-Aec3EnforceSlowlyAdjustingNormalSuppressorTunings")) {
357 adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = .2f;
358 }
359
360 if (field_trial::IsEnabled(
361 "WebRTC-Aec3EnforceSlowlyAdjustingNearendSuppressorTunings")) {
362 adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = .2f;
363 }
364
365 if (field_trial::IsEnabled("WebRTC-Aec3EnforceStationarityProperties")) {
366 adjusted_cfg.echo_audibility.use_stationarity_properties = true;
367 }
368
369 if (field_trial::IsEnabled(
370 "WebRTC-Aec3EnforceStationarityPropertiesAtInit")) {
371 adjusted_cfg.echo_audibility.use_stationarity_properties_at_init = true;
372 }
373
374 if (field_trial::IsEnabled("WebRTC-Aec3EnforceLowActiveRenderLimit")) {
375 adjusted_cfg.render_levels.active_render_limit = 50.f;
376 } else if (field_trial::IsEnabled(
377 "WebRTC-Aec3EnforceVeryLowActiveRenderLimit")) {
378 adjusted_cfg.render_levels.active_render_limit = 30.f;
379 }
380
381 // Field-trial based override for the whole suppressor tuning.
382 const std::string suppressor_tuning_override_trial_name =
383 field_trial::FindFullName("WebRTC-Aec3SuppressorTuningOverride");
384
385 FieldTrialParameter<double> nearend_tuning_mask_lf_enr_transparent(
386 "nearend_tuning_mask_lf_enr_transparent",
387 adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
388 FieldTrialParameter<double> nearend_tuning_mask_lf_enr_suppress(
389 "nearend_tuning_mask_lf_enr_suppress",
390 adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
391 FieldTrialParameter<double> nearend_tuning_mask_hf_enr_transparent(
392 "nearend_tuning_mask_hf_enr_transparent",
393 adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
394 FieldTrialParameter<double> nearend_tuning_mask_hf_enr_suppress(
395 "nearend_tuning_mask_hf_enr_suppress",
396 adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
397 FieldTrialParameter<double> nearend_tuning_max_inc_factor(
398 "nearend_tuning_max_inc_factor",
399 adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
400 FieldTrialParameter<double> nearend_tuning_max_dec_factor_lf(
401 "nearend_tuning_max_dec_factor_lf",
402 adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
403 FieldTrialParameter<double> normal_tuning_mask_lf_enr_transparent(
404 "normal_tuning_mask_lf_enr_transparent",
405 adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
406 FieldTrialParameter<double> normal_tuning_mask_lf_enr_suppress(
407 "normal_tuning_mask_lf_enr_suppress",
408 adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
409 FieldTrialParameter<double> normal_tuning_mask_hf_enr_transparent(
410 "normal_tuning_mask_hf_enr_transparent",
411 adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
412 FieldTrialParameter<double> normal_tuning_mask_hf_enr_suppress(
413 "normal_tuning_mask_hf_enr_suppress",
414 adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
415 FieldTrialParameter<double> normal_tuning_max_inc_factor(
416 "normal_tuning_max_inc_factor",
417 adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
418 FieldTrialParameter<double> normal_tuning_max_dec_factor_lf(
419 "normal_tuning_max_dec_factor_lf",
420 adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
421 FieldTrialParameter<double> dominant_nearend_detection_enr_threshold(
422 "dominant_nearend_detection_enr_threshold",
423 adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
424 FieldTrialParameter<double> dominant_nearend_detection_enr_exit_threshold(
425 "dominant_nearend_detection_enr_exit_threshold",
426 adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
427 FieldTrialParameter<double> dominant_nearend_detection_snr_threshold(
428 "dominant_nearend_detection_snr_threshold",
429 adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
430 FieldTrialParameter<int> dominant_nearend_detection_hold_duration(
431 "dominant_nearend_detection_hold_duration",
432 adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
433 FieldTrialParameter<int> dominant_nearend_detection_trigger_threshold(
434 "dominant_nearend_detection_trigger_threshold",
435 adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
436 FieldTrialParameter<double> ep_strength_default_len(
437 "ep_strength_default_len", adjusted_cfg.ep_strength.default_len);
438
439 ParseFieldTrial(
440 {&nearend_tuning_mask_lf_enr_transparent,
441 &nearend_tuning_mask_lf_enr_suppress,
442 &nearend_tuning_mask_hf_enr_transparent,
443 &nearend_tuning_mask_hf_enr_suppress, &nearend_tuning_max_inc_factor,
444 &nearend_tuning_max_dec_factor_lf,
445 &normal_tuning_mask_lf_enr_transparent,
446 &normal_tuning_mask_lf_enr_suppress,
447 &normal_tuning_mask_hf_enr_transparent,
448 &normal_tuning_mask_hf_enr_suppress, &normal_tuning_max_inc_factor,
449 &normal_tuning_max_dec_factor_lf,
450 &dominant_nearend_detection_enr_threshold,
451 &dominant_nearend_detection_enr_exit_threshold,
452 &dominant_nearend_detection_snr_threshold,
453 &dominant_nearend_detection_hold_duration,
454 &dominant_nearend_detection_trigger_threshold, &ep_strength_default_len},
455 suppressor_tuning_override_trial_name);
456
457 adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent =
458 static_cast<float>(nearend_tuning_mask_lf_enr_transparent.Get());
459 adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress =
460 static_cast<float>(nearend_tuning_mask_lf_enr_suppress.Get());
461 adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent =
462 static_cast<float>(nearend_tuning_mask_hf_enr_transparent.Get());
463 adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress =
464 static_cast<float>(nearend_tuning_mask_hf_enr_suppress.Get());
465 adjusted_cfg.suppressor.nearend_tuning.max_inc_factor =
466 static_cast<float>(nearend_tuning_max_inc_factor.Get());
467 adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf =
468 static_cast<float>(nearend_tuning_max_dec_factor_lf.Get());
469 adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent =
470 static_cast<float>(normal_tuning_mask_lf_enr_transparent.Get());
471 adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress =
472 static_cast<float>(normal_tuning_mask_lf_enr_suppress.Get());
473 adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent =
474 static_cast<float>(normal_tuning_mask_hf_enr_transparent.Get());
475 adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress =
476 static_cast<float>(normal_tuning_mask_hf_enr_suppress.Get());
477 adjusted_cfg.suppressor.normal_tuning.max_inc_factor =
478 static_cast<float>(normal_tuning_max_inc_factor.Get());
479 adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf =
480 static_cast<float>(normal_tuning_max_dec_factor_lf.Get());
481 adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold =
482 static_cast<float>(dominant_nearend_detection_enr_threshold.Get());
483 adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold =
484 static_cast<float>(dominant_nearend_detection_enr_exit_threshold.Get());
485 adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold =
486 static_cast<float>(dominant_nearend_detection_snr_threshold.Get());
487 adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration =
488 dominant_nearend_detection_hold_duration.Get();
489 adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold =
490 dominant_nearend_detection_trigger_threshold.Get();
491 adjusted_cfg.ep_strength.default_len =
492 static_cast<float>(ep_strength_default_len.Get());
493
494 // Field trial-based overrides of individual suppressor parameters.
495 RetrieveFieldTrialValue(
496 "WebRTC-Aec3SuppressorNearendLfMaskTransparentOverride", 0.f, 10.f,
497 &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
498 RetrieveFieldTrialValue(
499 "WebRTC-Aec3SuppressorNearendLfMaskSuppressOverride", 0.f, 10.f,
500 &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
501 RetrieveFieldTrialValue(
502 "WebRTC-Aec3SuppressorNearendHfMaskTransparentOverride", 0.f, 10.f,
503 &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
504 RetrieveFieldTrialValue(
505 "WebRTC-Aec3SuppressorNearendHfMaskSuppressOverride", 0.f, 10.f,
506 &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
507 RetrieveFieldTrialValue(
508 "WebRTC-Aec3SuppressorNearendMaxIncFactorOverride", 0.f, 10.f,
509 &adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
510 RetrieveFieldTrialValue(
511 "WebRTC-Aec3SuppressorNearendMaxDecFactorLfOverride", 0.f, 10.f,
512 &adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
513
514 RetrieveFieldTrialValue(
515 "WebRTC-Aec3SuppressorNormalLfMaskTransparentOverride", 0.f, 10.f,
516 &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
517 RetrieveFieldTrialValue(
518 "WebRTC-Aec3SuppressorNormalLfMaskSuppressOverride", 0.f, 10.f,
519 &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
520 RetrieveFieldTrialValue(
521 "WebRTC-Aec3SuppressorNormalHfMaskTransparentOverride", 0.f, 10.f,
522 &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
523 RetrieveFieldTrialValue(
524 "WebRTC-Aec3SuppressorNormalHfMaskSuppressOverride", 0.f, 10.f,
525 &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
526 RetrieveFieldTrialValue(
527 "WebRTC-Aec3SuppressorNormalMaxIncFactorOverride", 0.f, 10.f,
528 &adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
529 RetrieveFieldTrialValue(
530 "WebRTC-Aec3SuppressorNormalMaxDecFactorLfOverride", 0.f, 10.f,
531 &adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
532
533 RetrieveFieldTrialValue(
534 "WebRTC-Aec3SuppressorDominantNearendEnrThresholdOverride", 0.f, 100.f,
535 &adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
536 RetrieveFieldTrialValue(
537 "WebRTC-Aec3SuppressorDominantNearendEnrExitThresholdOverride", 0.f,
538 100.f,
539 &adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
540 RetrieveFieldTrialValue(
541 "WebRTC-Aec3SuppressorDominantNearendSnrThresholdOverride", 0.f, 100.f,
542 &adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
543 RetrieveFieldTrialValue(
544 "WebRTC-Aec3SuppressorDominantNearendHoldDurationOverride", 0, 1000,
545 &adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
546 RetrieveFieldTrialValue(
547 "WebRTC-Aec3SuppressorDominantNearendTriggerThresholdOverride", 0, 1000,
548 &adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
549
550 RetrieveFieldTrialValue(
551 "WebRTC-Aec3SuppressorAntiHowlingGainOverride", 0.f, 10.f,
552 &adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain);
553
554 RetrieveFieldTrialValue("WebRTC-Aec3SuppressorEpStrengthDefaultLenOverride",
555 -1.f, 1.f, &adjusted_cfg.ep_strength.default_len);
556
557 return adjusted_cfg;
558 }
559
560 class EchoCanceller3::RenderWriter {
561 public:
562 RenderWriter(ApmDataDumper* data_dumper,
563 SwapQueue<std::vector<std::vector<std::vector<float>>>,
564 Aec3RenderQueueItemVerifier>* render_transfer_queue,
565 size_t num_bands,
566 size_t num_channels);
567 ~RenderWriter();
568 void Insert(const AudioBuffer& input);
569
570 private:
571 ApmDataDumper* data_dumper_;
572 const size_t num_bands_;
573 const size_t num_channels_;
574 HighPassFilter high_pass_filter_;
575 std::vector<std::vector<std::vector<float>>> render_queue_input_frame_;
576 SwapQueue<std::vector<std::vector<std::vector<float>>>,
577 Aec3RenderQueueItemVerifier>* render_transfer_queue_;
578 RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderWriter);
579 };
580
RenderWriter(ApmDataDumper * data_dumper,SwapQueue<std::vector<std::vector<std::vector<float>>>,Aec3RenderQueueItemVerifier> * render_transfer_queue,size_t num_bands,size_t num_channels)581 EchoCanceller3::RenderWriter::RenderWriter(
582 ApmDataDumper* data_dumper,
583 SwapQueue<std::vector<std::vector<std::vector<float>>>,
584 Aec3RenderQueueItemVerifier>* render_transfer_queue,
585 size_t num_bands,
586 size_t num_channels)
587 : data_dumper_(data_dumper),
588 num_bands_(num_bands),
589 num_channels_(num_channels),
590 high_pass_filter_(16000, num_channels),
591 render_queue_input_frame_(
592 num_bands_,
593 std::vector<std::vector<float>>(
594 num_channels_,
595 std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
596 render_transfer_queue_(render_transfer_queue) {
597 RTC_DCHECK(data_dumper);
598 }
599
600 EchoCanceller3::RenderWriter::~RenderWriter() = default;
601
Insert(const AudioBuffer & input)602 void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) {
603 RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band());
604 RTC_DCHECK_EQ(num_bands_, input.num_bands());
605 RTC_DCHECK_EQ(num_channels_, input.num_channels());
606
607 // TODO(bugs.webrtc.org/8759) Temporary work-around.
608 if (num_bands_ != input.num_bands())
609 return;
610
611 data_dumper_->DumpWav("aec3_render_input", AudioBuffer::kSplitBandSize,
612 &input.split_bands_const(0)[0][0], 16000, 1);
613
614 CopyBufferIntoFrame(input, num_bands_, num_channels_,
615 &render_queue_input_frame_);
616 high_pass_filter_.Process(&render_queue_input_frame_[0]);
617
618 static_cast<void>(render_transfer_queue_->Insert(&render_queue_input_frame_));
619 }
620
621 int EchoCanceller3::instance_count_ = 0;
622
EchoCanceller3(const EchoCanceller3Config & config,int sample_rate_hz,size_t num_render_channels,size_t num_capture_channels)623 EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
624 int sample_rate_hz,
625 size_t num_render_channels,
626 size_t num_capture_channels)
627 : EchoCanceller3(AdjustConfig(config),
628 sample_rate_hz,
629 num_render_channels,
630 num_capture_channels,
631 std::unique_ptr<BlockProcessor>(
632 BlockProcessor::Create(AdjustConfig(config),
633 sample_rate_hz,
634 num_render_channels,
635 num_capture_channels))) {}
EchoCanceller3(const EchoCanceller3Config & config,int sample_rate_hz,size_t num_render_channels,size_t num_capture_channels,std::unique_ptr<BlockProcessor> block_processor)636 EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
637 int sample_rate_hz,
638 size_t num_render_channels,
639 size_t num_capture_channels,
640 std::unique_ptr<BlockProcessor> block_processor)
641 : data_dumper_(
642 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
643 config_(config),
644 sample_rate_hz_(sample_rate_hz),
645 num_bands_(NumBandsForRate(sample_rate_hz_)),
646 num_render_channels_(num_render_channels),
647 num_capture_channels_(num_capture_channels),
648 output_framer_(num_bands_, num_capture_channels_),
649 capture_blocker_(num_bands_, num_capture_channels_),
650 render_blocker_(num_bands_, num_render_channels_),
651 render_transfer_queue_(
652 kRenderTransferQueueSizeFrames,
653 std::vector<std::vector<std::vector<float>>>(
654 num_bands_,
655 std::vector<std::vector<float>>(
656 num_render_channels_,
657 std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
658 Aec3RenderQueueItemVerifier(num_bands_,
659 num_render_channels_,
660 AudioBuffer::kSplitBandSize)),
661 block_processor_(std::move(block_processor)),
662 render_queue_output_frame_(
663 num_bands_,
664 std::vector<std::vector<float>>(
665 num_render_channels_,
666 std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
667 render_block_(
668 num_bands_,
669 std::vector<std::vector<float>>(num_render_channels_,
670 std::vector<float>(kBlockSize, 0.f))),
671 capture_block_(
672 num_bands_,
673 std::vector<std::vector<float>>(num_capture_channels_,
674 std::vector<float>(kBlockSize, 0.f))),
675 render_sub_frame_view_(
676 num_bands_,
677 std::vector<rtc::ArrayView<float>>(num_render_channels_)),
678 capture_sub_frame_view_(
679 num_bands_,
680 std::vector<rtc::ArrayView<float>>(num_capture_channels_)) {
681 RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
682
683 if (config_.delay.fixed_capture_delay_samples > 0) {
684 block_delay_buffer_.reset(new BlockDelayBuffer(
685 num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize,
686 config_.delay.fixed_capture_delay_samples));
687 }
688
689 render_writer_.reset(new RenderWriter(data_dumper_.get(),
690 &render_transfer_queue_, num_bands_,
691 num_render_channels_));
692
693 RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000);
694 RTC_DCHECK_GE(kMaxNumBands, num_bands_);
695
696 if (config_.filter.export_linear_aec_output) {
697 linear_output_framer_.reset(new BlockFramer(1, num_capture_channels_));
698 linear_output_block_ =
699 std::make_unique<std::vector<std::vector<std::vector<float>>>>(
700 1, std::vector<std::vector<float>>(
701 num_capture_channels_, std::vector<float>(kBlockSize, 0.f)));
702 linear_output_sub_frame_view_ =
703 std::vector<std::vector<rtc::ArrayView<float>>>(
704 1, std::vector<rtc::ArrayView<float>>(num_capture_channels_));
705 }
706 }
707
708 EchoCanceller3::~EchoCanceller3() = default;
709
AnalyzeRender(const AudioBuffer & render)710 void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) {
711 RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_);
712
713 RTC_DCHECK_EQ(render.num_channels(), num_render_channels_);
714 data_dumper_->DumpRaw("aec3_call_order",
715 static_cast<int>(EchoCanceller3ApiCall::kRender));
716
717 return render_writer_->Insert(render);
718 }
719
AnalyzeCapture(const AudioBuffer & capture)720 void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) {
721 RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
722 data_dumper_->DumpWav("aec3_capture_analyze_input", capture.num_frames(),
723 capture.channels_const()[0], sample_rate_hz_, 1);
724 saturated_microphone_signal_ = false;
725 for (size_t channel = 0; channel < capture.num_channels(); ++channel) {
726 saturated_microphone_signal_ |=
727 DetectSaturation(rtc::ArrayView<const float>(
728 capture.channels_const()[channel], capture.num_frames()));
729 if (saturated_microphone_signal_) {
730 break;
731 }
732 }
733 }
734
ProcessCapture(AudioBuffer * capture,bool level_change)735 void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) {
736 ProcessCapture(capture, nullptr, level_change);
737 }
738
ProcessCapture(AudioBuffer * capture,AudioBuffer * linear_output,bool level_change)739 void EchoCanceller3::ProcessCapture(AudioBuffer* capture,
740 AudioBuffer* linear_output,
741 bool level_change) {
742 RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
743 RTC_DCHECK(capture);
744 RTC_DCHECK_EQ(num_bands_, capture->num_bands());
745 RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band());
746 RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_);
747 data_dumper_->DumpRaw("aec3_call_order",
748 static_cast<int>(EchoCanceller3ApiCall::kCapture));
749
750 if (linear_output && !linear_output_framer_) {
751 RTC_LOG(LS_ERROR) << "Trying to retrieve the linear AEC output without "
752 "properly configuring AEC3.";
753 RTC_NOTREACHED();
754 }
755
756 // Report capture call in the metrics and periodically update API call
757 // metrics.
758 api_call_metrics_.ReportCaptureCall();
759
760 // Optionally delay the capture signal.
761 if (config_.delay.fixed_capture_delay_samples > 0) {
762 RTC_DCHECK(block_delay_buffer_);
763 block_delay_buffer_->DelaySignal(capture);
764 }
765
766 rtc::ArrayView<float> capture_lower_band = rtc::ArrayView<float>(
767 &capture->split_bands(0)[0][0], AudioBuffer::kSplitBandSize);
768
769 data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, 16000, 1);
770
771 EmptyRenderQueue();
772
773 ProcessCaptureFrameContent(linear_output, capture, level_change,
774 saturated_microphone_signal_, 0, &capture_blocker_,
775 linear_output_framer_.get(), &output_framer_,
776 block_processor_.get(), linear_output_block_.get(),
777 &linear_output_sub_frame_view_, &capture_block_,
778 &capture_sub_frame_view_);
779
780 ProcessCaptureFrameContent(linear_output, capture, level_change,
781 saturated_microphone_signal_, 1, &capture_blocker_,
782 linear_output_framer_.get(), &output_framer_,
783 block_processor_.get(), linear_output_block_.get(),
784 &linear_output_sub_frame_view_, &capture_block_,
785 &capture_sub_frame_view_);
786
787 ProcessRemainingCaptureFrameContent(
788 level_change, saturated_microphone_signal_, &capture_blocker_,
789 linear_output_framer_.get(), &output_framer_, block_processor_.get(),
790 linear_output_block_.get(), &capture_block_);
791
792 data_dumper_->DumpWav("aec3_capture_output", AudioBuffer::kSplitBandSize,
793 &capture->split_bands(0)[0][0], 16000, 1);
794 }
795
GetMetrics() const796 EchoControl::Metrics EchoCanceller3::GetMetrics() const {
797 RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
798 Metrics metrics;
799 block_processor_->GetMetrics(&metrics);
800 return metrics;
801 }
802
SetAudioBufferDelay(int delay_ms)803 void EchoCanceller3::SetAudioBufferDelay(int delay_ms) {
804 RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
805 block_processor_->SetAudioBufferDelay(delay_ms);
806 }
807
ActiveProcessing() const808 bool EchoCanceller3::ActiveProcessing() const {
809 return true;
810 }
811
CreateDefaultConfig(size_t num_render_channels,size_t num_capture_channels)812 EchoCanceller3Config EchoCanceller3::CreateDefaultConfig(
813 size_t num_render_channels,
814 size_t num_capture_channels) {
815 EchoCanceller3Config cfg;
816 if (num_render_channels > 1) {
817 // Use shorter and more rapidly adapting coarse filter to compensate for
818 // thge increased number of total filter parameters to adapt.
819 cfg.filter.coarse.length_blocks = 11;
820 cfg.filter.coarse.rate = 0.95f;
821 cfg.filter.coarse_initial.length_blocks = 11;
822 cfg.filter.coarse_initial.rate = 0.95f;
823
824 // Use more concervative suppressor behavior for non-nearend speech.
825 cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
826 cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
827 }
828 return cfg;
829 }
830
EmptyRenderQueue()831 void EchoCanceller3::EmptyRenderQueue() {
832 RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
833 bool frame_to_buffer =
834 render_transfer_queue_.Remove(&render_queue_output_frame_);
835 while (frame_to_buffer) {
836 // Report render call in the metrics.
837 api_call_metrics_.ReportRenderCall();
838
839 BufferRenderFrameContent(&render_queue_output_frame_, 0, &render_blocker_,
840 block_processor_.get(), &render_block_,
841 &render_sub_frame_view_);
842
843 BufferRenderFrameContent(&render_queue_output_frame_, 1, &render_blocker_,
844 block_processor_.get(), &render_block_,
845 &render_sub_frame_view_);
846
847 BufferRemainingRenderFrameContent(&render_blocker_, block_processor_.get(),
848 &render_block_);
849
850 frame_to_buffer =
851 render_transfer_queue_.Remove(&render_queue_output_frame_);
852 }
853 }
854 } // namespace webrtc
855