1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/transient/transient_detector.h"
12 
13 #include <float.h>
14 #include <string.h>
15 
16 #include <algorithm>
17 #include <cmath>
18 
19 #include "modules/audio_processing/transient/common.h"
20 #include "modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h"
21 #include "modules/audio_processing/transient/moving_moments.h"
22 #include "modules/audio_processing/transient/wpd_node.h"
23 #include "modules/audio_processing/transient/wpd_tree.h"
24 #include "rtc_base/checks.h"
25 
26 namespace webrtc {
27 
28 static const int kTransientLengthMs = 30;
29 static const int kChunksAtStartupLeftToDelete =
30     kTransientLengthMs / ts::kChunkSizeMs;
31 static const float kDetectThreshold = 16.f;
32 
TransientDetector(int sample_rate_hz)33 TransientDetector::TransientDetector(int sample_rate_hz)
34     : samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000),
35       last_first_moment_(),
36       last_second_moment_(),
37       chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete),
38       reference_energy_(1.f),
39       using_reference_(false) {
40   RTC_DCHECK(sample_rate_hz == ts::kSampleRate8kHz ||
41              sample_rate_hz == ts::kSampleRate16kHz ||
42              sample_rate_hz == ts::kSampleRate32kHz ||
43              sample_rate_hz == ts::kSampleRate48kHz);
44   int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000;
45   // Adjustment to avoid data loss while downsampling, making
46   // |samples_per_chunk_| and |samples_per_transient| always divisible by
47   // |kLeaves|.
48   samples_per_chunk_ -= samples_per_chunk_ % kLeaves;
49   samples_per_transient -= samples_per_transient % kLeaves;
50 
51   tree_leaves_data_length_ = samples_per_chunk_ / kLeaves;
52   wpd_tree_.reset(new WPDTree(samples_per_chunk_,
53                               kDaubechies8HighPassCoefficients,
54                               kDaubechies8LowPassCoefficients,
55                               kDaubechies8CoefficientsLength, kLevels));
56   for (size_t i = 0; i < kLeaves; ++i) {
57     moving_moments_[i].reset(
58         new MovingMoments(samples_per_transient / kLeaves));
59   }
60 
61   first_moments_.reset(new float[tree_leaves_data_length_]);
62   second_moments_.reset(new float[tree_leaves_data_length_]);
63 
64   for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) {
65     previous_results_.push_back(0.f);
66   }
67 }
68 
~TransientDetector()69 TransientDetector::~TransientDetector() {}
70 
Detect(const float * data,size_t data_length,const float * reference_data,size_t reference_length)71 float TransientDetector::Detect(const float* data,
72                                 size_t data_length,
73                                 const float* reference_data,
74                                 size_t reference_length) {
75   RTC_DCHECK(data);
76   RTC_DCHECK_EQ(samples_per_chunk_, data_length);
77 
78   // TODO(aluebs): Check if these errors can logically happen and if not assert
79   // on them.
80   if (wpd_tree_->Update(data, samples_per_chunk_) != 0) {
81     return -1.f;
82   }
83 
84   float result = 0.f;
85 
86   for (size_t i = 0; i < kLeaves; ++i) {
87     WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i);
88 
89     moving_moments_[i]->CalculateMoments(leaf->data(), tree_leaves_data_length_,
90                                          first_moments_.get(),
91                                          second_moments_.get());
92 
93     // Add value delayed (Use the last moments from the last call to Detect).
94     float unbiased_data = leaf->data()[0] - last_first_moment_[i];
95     result +=
96         unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN);
97 
98     // Add new values.
99     for (size_t j = 1; j < tree_leaves_data_length_; ++j) {
100       unbiased_data = leaf->data()[j] - first_moments_[j - 1];
101       result +=
102           unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN);
103     }
104 
105     last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1];
106     last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1];
107   }
108 
109   result /= tree_leaves_data_length_;
110 
111   result *= ReferenceDetectionValue(reference_data, reference_length);
112 
113   if (chunks_at_startup_left_to_delete_ > 0) {
114     chunks_at_startup_left_to_delete_--;
115     result = 0.f;
116   }
117 
118   if (result >= kDetectThreshold) {
119     result = 1.f;
120   } else {
121     // Get proportional value.
122     // Proportion achieved with a squared raised cosine function with domain
123     // [0, kDetectThreshold) and image [0, 1), it's always increasing.
124     const float horizontal_scaling = ts::kPi / kDetectThreshold;
125     const float kHorizontalShift = ts::kPi;
126     const float kVerticalScaling = 0.5f;
127     const float kVerticalShift = 1.f;
128 
129     result = (std::cos(result * horizontal_scaling + kHorizontalShift) +
130               kVerticalShift) *
131              kVerticalScaling;
132     result *= result;
133   }
134 
135   previous_results_.pop_front();
136   previous_results_.push_back(result);
137 
138   // In the current implementation we return the max of the current result and
139   // the previous results, so the high results have a width equals to
140   // |transient_length|.
141   return *std::max_element(previous_results_.begin(), previous_results_.end());
142 }
143 
144 // Looks for the highest slope and compares it with the previous ones.
145 // An exponential transformation takes this to the [0, 1] range. This value is
146 // multiplied by the detection result to avoid false positives.
ReferenceDetectionValue(const float * data,size_t length)147 float TransientDetector::ReferenceDetectionValue(const float* data,
148                                                  size_t length) {
149   if (data == NULL) {
150     using_reference_ = false;
151     return 1.f;
152   }
153   static const float kEnergyRatioThreshold = 0.2f;
154   static const float kReferenceNonLinearity = 20.f;
155   static const float kMemory = 0.99f;
156   float reference_energy = 0.f;
157   for (size_t i = 1; i < length; ++i) {
158     reference_energy += data[i] * data[i];
159   }
160   if (reference_energy == 0.f) {
161     using_reference_ = false;
162     return 1.f;
163   }
164   RTC_DCHECK_NE(0, reference_energy_);
165   float result = 1.f / (1.f + std::exp(kReferenceNonLinearity *
166                                        (kEnergyRatioThreshold -
167                                         reference_energy / reference_energy_)));
168   reference_energy_ =
169       kMemory * reference_energy_ + (1.f - kMemory) * reference_energy;
170 
171   using_reference_ = true;
172 
173   return result;
174 }
175 
176 }  // namespace webrtc
177