1 /*
2  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/residual_echo_detector.h"
12 
13 #include <algorithm>
14 #include <numeric>
15 
16 #include "absl/types/optional.h"
17 #include "modules/audio_processing/audio_buffer.h"
18 #include "modules/audio_processing/logging/apm_data_dumper.h"
19 #include "rtc_base/atomic_ops.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/logging.h"
22 #include "system_wrappers/include/metrics.h"
23 
24 namespace {
25 
Power(rtc::ArrayView<const float> input)26 float Power(rtc::ArrayView<const float> input) {
27   if (input.empty()) {
28     return 0.f;
29   }
30   return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) /
31          input.size();
32 }
33 
34 constexpr size_t kLookbackFrames = 650;
35 // TODO(ivoc): Verify the size of this buffer.
36 constexpr size_t kRenderBufferSize = 30;
37 constexpr float kAlpha = 0.001f;
38 // 10 seconds of data, updated every 10 ms.
39 constexpr size_t kAggregationBufferSize = 10 * 100;
40 
41 }  // namespace
42 
43 namespace webrtc {
44 
45 int ResidualEchoDetector::instance_count_ = 0;
46 
ResidualEchoDetector()47 ResidualEchoDetector::ResidualEchoDetector()
48     : data_dumper_(
49           new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
50       render_buffer_(kRenderBufferSize),
51       render_power_(kLookbackFrames),
52       render_power_mean_(kLookbackFrames),
53       render_power_std_dev_(kLookbackFrames),
54       covariances_(kLookbackFrames),
55       recent_likelihood_max_(kAggregationBufferSize) {}
56 
57 ResidualEchoDetector::~ResidualEchoDetector() = default;
58 
AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio)59 void ResidualEchoDetector::AnalyzeRenderAudio(
60     rtc::ArrayView<const float> render_audio) {
61   // Dump debug data assuming 48 kHz sample rate (if this assumption is not
62   // valid the dumped audio will need to be converted offline accordingly).
63   data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(),
64                         48000, 1);
65 
66   if (render_buffer_.Size() == 0) {
67     frames_since_zero_buffer_size_ = 0;
68   } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) {
69     // This can happen in a few cases: at the start of a call, due to a glitch
70     // or due to clock drift. The excess capture value will be ignored.
71     // TODO(ivoc): Include how often this happens in APM stats.
72     render_buffer_.Pop();
73     frames_since_zero_buffer_size_ = 0;
74   }
75   ++frames_since_zero_buffer_size_;
76   float power = Power(render_audio);
77   render_buffer_.Push(power);
78 }
79 
AnalyzeCaptureAudio(rtc::ArrayView<const float> capture_audio)80 void ResidualEchoDetector::AnalyzeCaptureAudio(
81     rtc::ArrayView<const float> capture_audio) {
82   // Dump debug data assuming 48 kHz sample rate (if this assumption is not
83   // valid the dumped audio will need to be converted offline accordingly).
84   data_dumper_->DumpWav("ed_capture", capture_audio.size(),
85                         capture_audio.data(), 48000, 1);
86 
87   if (first_process_call_) {
88     // On the first process call (so the start of a call), we must flush the
89     // render buffer, otherwise the render data will be delayed.
90     render_buffer_.Clear();
91     first_process_call_ = false;
92   }
93 
94   // Get the next render value.
95   const absl::optional<float> buffered_render_power = render_buffer_.Pop();
96   if (!buffered_render_power) {
97     // This can happen in a few cases: at the start of a call, due to a glitch
98     // or due to clock drift. The excess capture value will be ignored.
99     // TODO(ivoc): Include how often this happens in APM stats.
100     return;
101   }
102   // Update the render statistics, and store the statistics in circular buffers.
103   render_statistics_.Update(*buffered_render_power);
104   RTC_DCHECK_LT(next_insertion_index_, kLookbackFrames);
105   render_power_[next_insertion_index_] = *buffered_render_power;
106   render_power_mean_[next_insertion_index_] = render_statistics_.mean();
107   render_power_std_dev_[next_insertion_index_] =
108       render_statistics_.std_deviation();
109 
110   // Get the next capture value, update capture statistics and add the relevant
111   // values to the buffers.
112   const float capture_power = Power(capture_audio);
113   capture_statistics_.Update(capture_power);
114   const float capture_mean = capture_statistics_.mean();
115   const float capture_std_deviation = capture_statistics_.std_deviation();
116 
117   // Update the covariance values and determine the new echo likelihood.
118   echo_likelihood_ = 0.f;
119   size_t read_index = next_insertion_index_;
120 
121   int best_delay = -1;
122   for (size_t delay = 0; delay < covariances_.size(); ++delay) {
123     RTC_DCHECK_LT(read_index, render_power_.size());
124     covariances_[delay].Update(capture_power, capture_mean,
125                                capture_std_deviation, render_power_[read_index],
126                                render_power_mean_[read_index],
127                                render_power_std_dev_[read_index]);
128     read_index = read_index > 0 ? read_index - 1 : kLookbackFrames - 1;
129 
130     if (covariances_[delay].normalized_cross_correlation() > echo_likelihood_) {
131       echo_likelihood_ = covariances_[delay].normalized_cross_correlation();
132       best_delay = static_cast<int>(delay);
133     }
134   }
135   // This is a temporary log message to help find the underlying cause for echo
136   // likelihoods > 1.0.
137   // TODO(ivoc): Remove once the issue is resolved.
138   if (echo_likelihood_ > 1.1f) {
139     // Make sure we don't spam the log.
140     if (log_counter_ < 5 && best_delay != -1) {
141       size_t read_index = kLookbackFrames + next_insertion_index_ - best_delay;
142       if (read_index >= kLookbackFrames) {
143         read_index -= kLookbackFrames;
144       }
145       RTC_DCHECK_LT(read_index, render_power_.size());
146       RTC_LOG_F(LS_ERROR) << "Echo detector internal state: {"
147                              "Echo likelihood: "
148                           << echo_likelihood_ << ", Best Delay: " << best_delay
149                           << ", Covariance: "
150                           << covariances_[best_delay].covariance()
151                           << ", Last capture power: " << capture_power
152                           << ", Capture mean: " << capture_mean
153                           << ", Capture_standard deviation: "
154                           << capture_std_deviation << ", Last render power: "
155                           << render_power_[read_index]
156                           << ", Render mean: " << render_power_mean_[read_index]
157                           << ", Render standard deviation: "
158                           << render_power_std_dev_[read_index]
159                           << ", Reliability: " << reliability_ << "}";
160       log_counter_++;
161     }
162   }
163   RTC_DCHECK_LT(echo_likelihood_, 1.1f);
164 
165   reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f;
166   echo_likelihood_ *= reliability_;
167   // This is a temporary fix to prevent echo likelihood values > 1.0.
168   // TODO(ivoc): Find the root cause of this issue and fix it.
169   echo_likelihood_ = std::min(echo_likelihood_, 1.0f);
170   int echo_percentage = static_cast<int>(echo_likelihood_ * 100);
171   RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood",
172                        echo_percentage, 0, 100, 100 /* number of bins */);
173 
174   // Update the buffer of recent likelihood values.
175   recent_likelihood_max_.Update(echo_likelihood_);
176 
177   // Update the next insertion index.
178   next_insertion_index_ = next_insertion_index_ < (kLookbackFrames - 1)
179                               ? next_insertion_index_ + 1
180                               : 0;
181 }
182 
Initialize(int,int,int,int)183 void ResidualEchoDetector::Initialize(int /*capture_sample_rate_hz*/,
184                                       int /*num_capture_channels*/,
185                                       int /*render_sample_rate_hz*/,
186                                       int /*num_render_channels*/) {
187   render_buffer_.Clear();
188   std::fill(render_power_.begin(), render_power_.end(), 0.f);
189   std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f);
190   std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f);
191   render_statistics_.Clear();
192   capture_statistics_.Clear();
193   recent_likelihood_max_.Clear();
194   for (auto& cov : covariances_) {
195     cov.Clear();
196   }
197   echo_likelihood_ = 0.f;
198   next_insertion_index_ = 0;
199   reliability_ = 0.f;
200 }
201 
PackRenderAudioBuffer(AudioBuffer * audio,std::vector<float> * packed_buffer)202 void EchoDetector::PackRenderAudioBuffer(AudioBuffer* audio,
203                                          std::vector<float>* packed_buffer) {
204   packed_buffer->clear();
205   packed_buffer->insert(packed_buffer->end(), audio->channels()[0],
206                         audio->channels()[0] + audio->num_frames());
207 }
208 
GetMetrics() const209 EchoDetector::Metrics ResidualEchoDetector::GetMetrics() const {
210   EchoDetector::Metrics metrics;
211   metrics.echo_likelihood = echo_likelihood_;
212   metrics.echo_likelihood_recent_max = recent_likelihood_max_.max();
213   return metrics;
214 }
215 }  // namespace webrtc
216