1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_
12 #define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_
13 
14 #include <array>
15 #include <cstddef>
16 #include <memory>
17 #include <vector>
18 
19 #include "api/array_view.h"
20 #include "modules/audio_processing/agc2/rnn_vad/common.h"
21 #include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h"
22 #include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h"
23 #include "modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h"
24 #include "modules/audio_processing/utility/pffft_wrapper.h"
25 
26 namespace webrtc {
27 namespace rnn_vad {
28 
29 // Class to compute spectral features.
30 class SpectralFeaturesExtractor {
31  public:
32   SpectralFeaturesExtractor();
33   SpectralFeaturesExtractor(const SpectralFeaturesExtractor&) = delete;
34   SpectralFeaturesExtractor& operator=(const SpectralFeaturesExtractor&) =
35       delete;
36   ~SpectralFeaturesExtractor();
37   // Resets the internal state of the feature extractor.
38   void Reset();
39   // Analyzes a pair of reference and lagged frames from the pitch buffer,
40   // detects silence and computes features. If silence is detected, the output
41   // is neither computed nor written.
42   bool CheckSilenceComputeFeatures(
43       rtc::ArrayView<const float, kFrameSize20ms24kHz> reference_frame,
44       rtc::ArrayView<const float, kFrameSize20ms24kHz> lagged_frame,
45       rtc::ArrayView<float, kNumBands - kNumLowerBands> higher_bands_cepstrum,
46       rtc::ArrayView<float, kNumLowerBands> average,
47       rtc::ArrayView<float, kNumLowerBands> first_derivative,
48       rtc::ArrayView<float, kNumLowerBands> second_derivative,
49       rtc::ArrayView<float, kNumLowerBands> bands_cross_corr,
50       float* variability);
51 
52  private:
53   void ComputeAvgAndDerivatives(
54       rtc::ArrayView<float, kNumLowerBands> average,
55       rtc::ArrayView<float, kNumLowerBands> first_derivative,
56       rtc::ArrayView<float, kNumLowerBands> second_derivative) const;
57   void ComputeNormalizedCepstralCorrelation(
58       rtc::ArrayView<float, kNumLowerBands> bands_cross_corr);
59   float ComputeVariability() const;
60 
61   const std::array<float, kFrameSize20ms24kHz / 2> half_window_;
62   Pffft fft_;
63   std::unique_ptr<Pffft::FloatBuffer> fft_buffer_;
64   std::unique_ptr<Pffft::FloatBuffer> reference_frame_fft_;
65   std::unique_ptr<Pffft::FloatBuffer> lagged_frame_fft_;
66   SpectralCorrelator spectral_correlator_;
67   std::array<float, kOpusBands24kHz> reference_frame_bands_energy_;
68   std::array<float, kOpusBands24kHz> lagged_frame_bands_energy_;
69   std::array<float, kOpusBands24kHz> bands_cross_corr_;
70   const std::array<float, kNumBands * kNumBands> dct_table_;
71   RingBuffer<float, kNumBands, kCepstralCoeffsHistorySize>
72       cepstral_coeffs_ring_buf_;
73   SymmetricMatrixBuffer<float, kCepstralCoeffsHistorySize> cepstral_diffs_buf_;
74 };
75 
76 }  // namespace rnn_vad
77 }  // namespace webrtc
78 
79 #endif  // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_
80