1 /*
2  *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/ns/noise_estimator.h"
12 
13 #include <algorithm>
14 
15 #include "modules/audio_processing/ns/fast_math.h"
16 #include "rtc_base/checks.h"
17 
18 namespace webrtc {
19 
20 namespace {
21 
22 // Log(i).
23 constexpr std::array<float, 129> log_table = {
24     0.f,       0.f,       0.f,       0.f,       0.f,       1.609438f, 1.791759f,
25     1.945910f, 2.079442f, 2.197225f, 2.302585f, 2.397895f, 2.484907f, 2.564949f,
26     2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f,
27     3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f,
28     3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f,
29     3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f,
30     3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f,
31     3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f,
32     4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f,
33     4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f,
34     4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f,
35     4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f,
36     4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f,
37     4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f,
38     4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f,
39     4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f,
40     4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f,
41     4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f,
42     4.836282f, 4.844187f, 4.852030f};
43 
44 }  // namespace
45 
NoiseEstimator(const SuppressionParams & suppression_params)46 NoiseEstimator::NoiseEstimator(const SuppressionParams& suppression_params)
47     : suppression_params_(suppression_params) {
48   noise_spectrum_.fill(0.f);
49   prev_noise_spectrum_.fill(0.f);
50   conservative_noise_spectrum_.fill(0.f);
51   parametric_noise_spectrum_.fill(0.f);
52 }
53 
PrepareAnalysis()54 void NoiseEstimator::PrepareAnalysis() {
55   std::copy(noise_spectrum_.begin(), noise_spectrum_.end(),
56             prev_noise_spectrum_.begin());
57 }
58 
PreUpdate(int32_t num_analyzed_frames,rtc::ArrayView<const float,kFftSizeBy2Plus1> signal_spectrum,float signal_spectral_sum)59 void NoiseEstimator::PreUpdate(
60     int32_t num_analyzed_frames,
61     rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
62     float signal_spectral_sum) {
63   quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_);
64 
65   if (num_analyzed_frames < kShortStartupPhaseBlocks) {
66     // Compute simplified noise model during startup.
67     const size_t kStartBand = 5;
68     float sum_log_i_log_magn = 0.f;
69     float sum_log_i = 0.f;
70     float sum_log_i_square = 0.f;
71     float sum_log_magn = 0.f;
72     for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) {
73       float log_i = log_table[i];
74       sum_log_i += log_i;
75       sum_log_i_square += log_i * log_i;
76       float log_signal = LogApproximation(signal_spectrum[i]);
77       sum_log_magn += log_signal;
78       sum_log_i_log_magn += log_i * log_signal;
79     }
80 
81     // Estimate the parameter for the level of the white noise.
82     constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1;
83     white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 *
84                           suppression_params_.over_subtraction_factor;
85 
86     // Estimate pink noise parameters.
87     float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) -
88                   sum_log_i * sum_log_i;
89     float num =
90         sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn;
91     RTC_DCHECK_NE(denom, 0.f);
92     float pink_noise_adjustment = num / denom;
93 
94     // Constrain the estimated spectrum to be positive.
95     pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f);
96     pink_noise_numerator_ += pink_noise_adjustment;
97     num = sum_log_i * sum_log_magn -
98           (kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn;
99     RTC_DCHECK_NE(denom, 0.f);
100     pink_noise_adjustment = num / denom;
101 
102     // Constrain the pink noise power to be in the interval [0, 1].
103     pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f);
104 
105     pink_noise_exp_ += pink_noise_adjustment;
106 
107     const float one_by_num_analyzed_frames_plus_1 =
108         1.f / (num_analyzed_frames + 1.f);
109 
110     // Calculate the frequency-independent parts of parametric noise estimate.
111     float parametric_exp = 0.f;
112     float parametric_num = 0.f;
113     if (pink_noise_exp_ > 0.f) {
114       // Use pink noise estimate.
115       parametric_num = ExpApproximation(pink_noise_numerator_ *
116                                         one_by_num_analyzed_frames_plus_1);
117       parametric_num *= num_analyzed_frames + 1.f;
118       parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1;
119     }
120 
121     constexpr float kOneByShortStartupPhaseBlocks =
122         1.f / kShortStartupPhaseBlocks;
123     for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
124       // Estimate the background noise using the white and pink noise
125       // parameters.
126       if (pink_noise_exp_ == 0.f) {
127         // Use white noise estimate.
128         parametric_noise_spectrum_[i] = white_noise_level_;
129       } else {
130         // Use pink noise estimate.
131         float use_band = i < kStartBand ? kStartBand : i;
132         float denom = PowApproximation(use_band, parametric_exp);
133         RTC_DCHECK_NE(denom, 0.f);
134         parametric_noise_spectrum_[i] = parametric_num / denom;
135       }
136     }
137 
138     // Weight quantile noise with modeled noise.
139     for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
140       noise_spectrum_[i] *= num_analyzed_frames;
141       float tmp = parametric_noise_spectrum_[i] *
142                   (kShortStartupPhaseBlocks - num_analyzed_frames);
143       noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1;
144       noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks;
145     }
146   }
147 }
148 
PostUpdate(rtc::ArrayView<const float> speech_probability,rtc::ArrayView<const float,kFftSizeBy2Plus1> signal_spectrum)149 void NoiseEstimator::PostUpdate(
150     rtc::ArrayView<const float> speech_probability,
151     rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum) {
152   // Time-avg parameter for noise_spectrum update.
153   constexpr float kNoiseUpdate = 0.9f;
154 
155   float gamma = kNoiseUpdate;
156   for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
157     const float prob_speech = speech_probability[i];
158     const float prob_non_speech = 1.f - prob_speech;
159 
160     // Temporary noise update used for speech frames if update value is less
161     // than previous.
162     float noise_update_tmp =
163         gamma * prev_noise_spectrum_[i] +
164         (1.f - gamma) * (prob_non_speech * signal_spectrum[i] +
165                          prob_speech * prev_noise_spectrum_[i]);
166 
167     // Time-constant based on speech/noise_spectrum state.
168     float gamma_old = gamma;
169 
170     // Increase gamma for frame likely to be seech.
171     constexpr float kProbRange = .2f;
172     gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate;
173 
174     // Conservative noise_spectrum update.
175     if (prob_speech < kProbRange) {
176       conservative_noise_spectrum_[i] +=
177           0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]);
178     }
179 
180     // Noise_spectrum update.
181     if (gamma == gamma_old) {
182       noise_spectrum_[i] = noise_update_tmp;
183     } else {
184       noise_spectrum_[i] =
185           gamma * prev_noise_spectrum_[i] +
186           (1.f - gamma) * (prob_non_speech * signal_spectrum[i] +
187                            prob_speech * prev_noise_spectrum_[i]);
188       // Allow for noise_spectrum update downwards: If noise_spectrum update
189       // decreases the noise_spectrum, it is safe, so allow it to happen.
190       noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp);
191     }
192   }
193 }
194 
195 }  // namespace webrtc
196