1 /*
2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
12 #define MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
13 
14 #include <stddef.h>
15 
16 #include <array>
17 #include <memory>
18 #include <vector>
19 
20 #include "absl/types/optional.h"
21 #include "api/array_view.h"
22 #include "api/audio/echo_canceller3_config.h"
23 #include "modules/audio_processing/aec3/aec3_common.h"
24 #include "modules/audio_processing/aec3/delay_estimate.h"
25 #include "modules/audio_processing/aec3/echo_audibility.h"
26 #include "modules/audio_processing/aec3/echo_path_variability.h"
27 #include "modules/audio_processing/aec3/erl_estimator.h"
28 #include "modules/audio_processing/aec3/erle_estimator.h"
29 #include "modules/audio_processing/aec3/filter_analyzer.h"
30 #include "modules/audio_processing/aec3/render_buffer.h"
31 #include "modules/audio_processing/aec3/reverb_model_estimator.h"
32 #include "modules/audio_processing/aec3/subtractor_output.h"
33 #include "modules/audio_processing/aec3/subtractor_output_analyzer.h"
34 
35 namespace webrtc {
36 
37 class ApmDataDumper;
38 
39 // Handles the state and the conditions for the echo removal functionality.
40 class AecState {
41  public:
42   AecState(const EchoCanceller3Config& config, size_t num_capture_channels);
43   ~AecState();
44 
45   // Returns whether the echo subtractor can be used to determine the residual
46   // echo.
UsableLinearEstimate()47   bool UsableLinearEstimate() const {
48     return filter_quality_state_.LinearFilterUsable() &&
49            config_.filter.use_linear_filter;
50   }
51 
52   // Returns whether the echo subtractor output should be used as output.
UseLinearFilterOutput()53   bool UseLinearFilterOutput() const {
54     return filter_quality_state_.LinearFilterUsable() &&
55            config_.filter.use_linear_filter;
56   }
57 
58   // Returns whether the render signal is currently active.
ActiveRender()59   bool ActiveRender() const { return blocks_with_active_render_ > 200; }
60 
61   // Returns the appropriate scaling of the residual echo to match the
62   // audibility.
63   void GetResidualEchoScaling(rtc::ArrayView<float> residual_scaling) const;
64 
65   // Returns whether the stationary properties of the signals are used in the
66   // aec.
UseStationarityProperties()67   bool UseStationarityProperties() const {
68     return config_.echo_audibility.use_stationarity_properties;
69   }
70 
71   // Returns the ERLE.
Erle()72   rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
73     return erle_estimator_.Erle();
74   }
75 
76   // Returns an offset to apply to the estimation of the residual echo
77   // computation. Returning nullopt means that no offset should be used, while
78   // any other value will be applied as a multiplier to the estimated residual
79   // echo.
80   absl::optional<float> ErleUncertainty() const;
81 
82   // Returns the fullband ERLE estimate in log2 units.
FullBandErleLog2()83   float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); }
84 
85   // Returns the ERL.
Erl()86   const std::array<float, kFftLengthBy2Plus1>& Erl() const {
87     return erl_estimator_.Erl();
88   }
89 
90   // Returns the time-domain ERL.
ErlTimeDomain()91   float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); }
92 
93   // Returns the delay estimate based on the linear filter.
MinDirectPathFilterDelay()94   int MinDirectPathFilterDelay() const {
95     return delay_state_.MinDirectPathFilterDelay();
96   }
97 
98   // Returns whether the capture signal is saturated.
SaturatedCapture()99   bool SaturatedCapture() const { return capture_signal_saturation_; }
100 
101   // Returns whether the echo signal is saturated.
SaturatedEcho()102   bool SaturatedEcho() const { return saturation_detector_.SaturatedEcho(); }
103 
104   // Updates the capture signal saturation.
UpdateCaptureSaturation(bool capture_signal_saturation)105   void UpdateCaptureSaturation(bool capture_signal_saturation) {
106     capture_signal_saturation_ = capture_signal_saturation;
107   }
108 
109   // Returns whether the transparent mode is active
TransparentMode()110   bool TransparentMode() const {
111     return transparent_mode_activated_ && transparent_state_.Active();
112   }
113 
114   // Takes appropriate action at an echo path change.
115   void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
116 
117   // Returns the decay factor for the echo reverberation.
ReverbDecay()118   float ReverbDecay() const { return reverb_model_estimator_.ReverbDecay(); }
119 
120   // Return the frequency response of the reverberant echo.
GetReverbFrequencyResponse()121   rtc::ArrayView<const float> GetReverbFrequencyResponse() const {
122     return reverb_model_estimator_.GetReverbFrequencyResponse();
123   }
124 
125   // Returns whether the transition for going out of the initial stated has
126   // been triggered.
TransitionTriggered()127   bool TransitionTriggered() const {
128     return initial_state_.TransitionTriggered();
129   }
130 
131   // Updates the aec state.
132   // TODO(bugs.webrtc.org/10913): Compute multi-channel ERL.
133   void Update(
134       const absl::optional<DelayEstimate>& external_delay,
135       rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
136           adaptive_filter_frequency_responses,
137       rtc::ArrayView<const std::vector<float>>
138           adaptive_filter_impulse_responses,
139       const RenderBuffer& render_buffer,
140       rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
141       rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
142       rtc::ArrayView<const SubtractorOutput> subtractor_output);
143 
144   // Returns filter length in blocks.
FilterLengthBlocks()145   int FilterLengthBlocks() const {
146     // All filters have the same length, so arbitrarily return channel 0 length.
147     return filter_analyzer_.FilterLengthBlocks();
148   }
149 
150  private:
151   static int instance_count_;
152   std::unique_ptr<ApmDataDumper> data_dumper_;
153   const EchoCanceller3Config config_;
154   const size_t num_capture_channels_;
155   const bool transparent_mode_activated_;
156   const bool deactivate_initial_state_reset_at_echo_path_change_;
157   const bool full_reset_at_echo_path_change_;
158   const bool subtractor_analyzer_reset_at_echo_path_change_;
159 
160   // Class for controlling the transition from the intial state, which in turn
161   // controls when the filter parameters for the initial state should be used.
162   class InitialState {
163    public:
164     explicit InitialState(const EchoCanceller3Config& config);
165     // Resets the state to again begin in the initial state.
166     void Reset();
167 
168     // Updates the state based on new data.
169     void Update(bool active_render, bool saturated_capture);
170 
171     // Returns whether the initial state is active or not.
InitialStateActive()172     bool InitialStateActive() const { return initial_state_; }
173 
174     // Returns that the transition from the initial state has was started.
TransitionTriggered()175     bool TransitionTriggered() const { return transition_triggered_; }
176 
177    private:
178     const bool conservative_initial_phase_;
179     const float initial_state_seconds_;
180     bool transition_triggered_ = false;
181     bool initial_state_ = true;
182     size_t strong_not_saturated_render_blocks_ = 0;
183   } initial_state_;
184 
185   // Class for choosing the direct-path delay relative to the beginning of the
186   // filter, as well as any other data related to the delay used within
187   // AecState.
188   class FilterDelay {
189    public:
190     FilterDelay(const EchoCanceller3Config& config,
191                 size_t num_capture_channels);
192 
193     // Returns whether an external delay has been reported to the AecState (from
194     // the delay estimator).
ExternalDelayReported()195     bool ExternalDelayReported() const { return external_delay_reported_; }
196 
197     // Returns the delay in blocks relative to the beginning of the filter that
198     // corresponds to the direct path of the echo.
DirectPathFilterDelays()199     rtc::ArrayView<const int> DirectPathFilterDelays() const {
200       return filter_delays_blocks_;
201     }
202 
203     // Returns the minimum delay among the direct path delays relative to the
204     // beginning of the filter
MinDirectPathFilterDelay()205     int MinDirectPathFilterDelay() const { return min_filter_delay_; }
206 
207     // Updates the delay estimates based on new data.
208     void Update(
209         rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
210         const absl::optional<DelayEstimate>& external_delay,
211         size_t blocks_with_proper_filter_adaptation);
212 
213    private:
214     const int delay_headroom_samples_;
215     bool external_delay_reported_ = false;
216     std::vector<int> filter_delays_blocks_;
217     int min_filter_delay_ = 0;
218     absl::optional<DelayEstimate> external_delay_;
219   } delay_state_;
220 
221   // Class for detecting and toggling the transparent mode which causes the
222   // suppressor to apply no suppression.
223   class TransparentMode {
224    public:
225     explicit TransparentMode(const EchoCanceller3Config& config);
226 
227     // Returns whether the transparent mode should be active.
Active()228     bool Active() const { return transparency_activated_; }
229 
230     // Resets the state of the detector.
231     void Reset();
232 
233     // Updates the detection deciscion based on new data.
234     void Update(int filter_delay_blocks,
235                 bool any_filter_consistent,
236                 bool any_filter_converged,
237                 bool all_filters_diverged,
238                 bool active_render,
239                 bool saturated_capture);
240 
241    private:
242     const bool bounded_erl_;
243     const bool linear_and_stable_echo_path_;
244     size_t capture_block_counter_ = 0;
245     bool transparency_activated_ = false;
246     size_t active_blocks_since_sane_filter_;
247     bool sane_filter_observed_ = false;
248     bool finite_erl_recently_detected_ = false;
249     size_t non_converged_sequence_size_;
250     size_t diverged_sequence_size_ = 0;
251     size_t active_non_converged_sequence_size_ = 0;
252     size_t num_converged_blocks_ = 0;
253     bool recent_convergence_during_activity_ = false;
254     size_t strong_not_saturated_render_blocks_ = 0;
255   } transparent_state_;
256 
257   // Class for analyzing how well the linear filter is, and can be expected to,
258   // perform on the current signals. The purpose of this is for using to
259   // select the echo suppression functionality as well as the input to the echo
260   // suppressor.
261   class FilteringQualityAnalyzer {
262    public:
263     FilteringQualityAnalyzer(const EchoCanceller3Config& config,
264                              size_t num_capture_channels);
265 
266     // Returns whether the linear filter can be used for the echo
267     // canceller output.
LinearFilterUsable()268     bool LinearFilterUsable() const { return overall_usable_linear_estimates_; }
269 
270     // Returns whether an individual filter output can be used for the echo
271     // canceller output.
UsableLinearFilterOutputs()272     const std::vector<bool>& UsableLinearFilterOutputs() const {
273       return usable_linear_filter_estimates_;
274     }
275 
276     // Resets the state of the analyzer.
277     void Reset();
278 
279     // Updates the analysis based on new data.
280     void Update(bool active_render,
281                 bool transparent_mode,
282                 bool saturated_capture,
283                 const absl::optional<DelayEstimate>& external_delay,
284                 bool any_filter_converged);
285 
286    private:
287     const bool use_linear_filter_;
288     bool overall_usable_linear_estimates_ = false;
289     size_t filter_update_blocks_since_reset_ = 0;
290     size_t filter_update_blocks_since_start_ = 0;
291     bool convergence_seen_ = false;
292     std::vector<bool> usable_linear_filter_estimates_;
293   } filter_quality_state_;
294 
295   // Class for detecting whether the echo is to be considered to be
296   // saturated.
297   class SaturationDetector {
298    public:
299     // Returns whether the echo is to be considered saturated.
SaturatedEcho()300     bool SaturatedEcho() const { return saturated_echo_; }
301 
302     // Updates the detection decision based on new data.
303     void Update(rtc::ArrayView<const std::vector<float>> x,
304                 bool saturated_capture,
305                 bool usable_linear_estimate,
306                 rtc::ArrayView<const SubtractorOutput> subtractor_output,
307                 float echo_path_gain);
308 
309    private:
310     bool saturated_echo_ = false;
311   } saturation_detector_;
312 
313   ErlEstimator erl_estimator_;
314   ErleEstimator erle_estimator_;
315   size_t strong_not_saturated_render_blocks_ = 0;
316   size_t blocks_with_active_render_ = 0;
317   bool capture_signal_saturation_ = false;
318   FilterAnalyzer filter_analyzer_;
319   absl::optional<DelayEstimate> external_delay_;
320   EchoAudibility echo_audibility_;
321   ReverbModelEstimator reverb_model_estimator_;
322   ReverbModel avg_render_reverb_;
323   SubtractorOutputAnalyzer subtractor_output_analyzer_;
324 };
325 
326 }  // namespace webrtc
327 
328 #endif  // MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
329