1 /* 2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ 12 #define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ 13 14 #include <array> 15 #include <cstddef> 16 #include <memory> 17 #include <vector> 18 19 #include "api/array_view.h" 20 #include "modules/audio_processing/agc2/rnn_vad/common.h" 21 #include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h" 22 #include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" 23 #include "modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h" 24 #include "modules/audio_processing/utility/pffft_wrapper.h" 25 26 namespace webrtc { 27 namespace rnn_vad { 28 29 // Class to compute spectral features. 30 class SpectralFeaturesExtractor { 31 public: 32 SpectralFeaturesExtractor(); 33 SpectralFeaturesExtractor(const SpectralFeaturesExtractor&) = delete; 34 SpectralFeaturesExtractor& operator=(const SpectralFeaturesExtractor&) = 35 delete; 36 ~SpectralFeaturesExtractor(); 37 // Resets the internal state of the feature extractor. 38 void Reset(); 39 // Analyzes a pair of reference and lagged frames from the pitch buffer, 40 // detects silence and computes features. If silence is detected, the output 41 // is neither computed nor written. 42 bool CheckSilenceComputeFeatures( 43 rtc::ArrayView<const float, kFrameSize20ms24kHz> reference_frame, 44 rtc::ArrayView<const float, kFrameSize20ms24kHz> lagged_frame, 45 rtc::ArrayView<float, kNumBands - kNumLowerBands> higher_bands_cepstrum, 46 rtc::ArrayView<float, kNumLowerBands> average, 47 rtc::ArrayView<float, kNumLowerBands> first_derivative, 48 rtc::ArrayView<float, kNumLowerBands> second_derivative, 49 rtc::ArrayView<float, kNumLowerBands> bands_cross_corr, 50 float* variability); 51 52 private: 53 void ComputeAvgAndDerivatives( 54 rtc::ArrayView<float, kNumLowerBands> average, 55 rtc::ArrayView<float, kNumLowerBands> first_derivative, 56 rtc::ArrayView<float, kNumLowerBands> second_derivative) const; 57 void ComputeNormalizedCepstralCorrelation( 58 rtc::ArrayView<float, kNumLowerBands> bands_cross_corr); 59 float ComputeVariability() const; 60 61 const std::array<float, kFrameSize20ms24kHz / 2> half_window_; 62 Pffft fft_; 63 std::unique_ptr<Pffft::FloatBuffer> fft_buffer_; 64 std::unique_ptr<Pffft::FloatBuffer> reference_frame_fft_; 65 std::unique_ptr<Pffft::FloatBuffer> lagged_frame_fft_; 66 SpectralCorrelator spectral_correlator_; 67 std::array<float, kOpusBands24kHz> reference_frame_bands_energy_; 68 std::array<float, kOpusBands24kHz> lagged_frame_bands_energy_; 69 std::array<float, kOpusBands24kHz> bands_cross_corr_; 70 const std::array<float, kNumBands * kNumBands> dct_table_; 71 RingBuffer<float, kNumBands, kCepstralCoeffsHistorySize> 72 cepstral_coeffs_ring_buf_; 73 SymmetricMatrixBuffer<float, kCepstralCoeffsHistorySize> cepstral_diffs_buf_; 74 }; 75 76 } // namespace rnn_vad 77 } // namespace webrtc 78 79 #endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ 80