1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
12 
13 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
14 #include "webrtc/modules/audio_processing/audio_buffer.h"
15 
16 namespace webrtc {
17 class VoiceDetectionImpl::Vad {
18  public:
Vad()19   Vad() {
20     state_ = WebRtcVad_Create();
21     RTC_CHECK(state_);
22     int error = WebRtcVad_Init(state_);
23     RTC_DCHECK_EQ(0, error);
24   }
~Vad()25   ~Vad() {
26     WebRtcVad_Free(state_);
27   }
state()28   VadInst* state() { return state_; }
29  private:
30   VadInst* state_ = nullptr;
31   RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
32 };
33 
VoiceDetectionImpl(rtc::CriticalSection * crit)34 VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
35     : crit_(crit) {
36   RTC_DCHECK(crit);
37 }
38 
~VoiceDetectionImpl()39 VoiceDetectionImpl::~VoiceDetectionImpl() {}
40 
Initialize(int sample_rate_hz)41 void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
42   rtc::CritScope cs(crit_);
43   sample_rate_hz_ = sample_rate_hz;
44   rtc::scoped_ptr<Vad> new_vad;
45   if (enabled_) {
46     new_vad.reset(new Vad());
47   }
48   vad_.swap(new_vad);
49   using_external_vad_ = false;
50   frame_size_samples_ =
51       static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
52   set_likelihood(likelihood_);
53 }
54 
ProcessCaptureAudio(AudioBuffer * audio)55 void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
56   rtc::CritScope cs(crit_);
57   if (!enabled_) {
58     return;
59   }
60   if (using_external_vad_) {
61     using_external_vad_ = false;
62     return;
63   }
64 
65   RTC_DCHECK_GE(160u, audio->num_frames_per_band());
66   // TODO(ajm): concatenate data in frame buffer here.
67   int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
68                                   audio->mixed_low_pass_data(),
69                                   frame_size_samples_);
70   if (vad_ret == 0) {
71     stream_has_voice_ = false;
72     audio->set_activity(AudioFrame::kVadPassive);
73   } else if (vad_ret == 1) {
74     stream_has_voice_ = true;
75     audio->set_activity(AudioFrame::kVadActive);
76   } else {
77     RTC_NOTREACHED();
78   }
79 }
80 
Enable(bool enable)81 int VoiceDetectionImpl::Enable(bool enable) {
82   rtc::CritScope cs(crit_);
83   if (enabled_ != enable) {
84     enabled_ = enable;
85     Initialize(sample_rate_hz_);
86   }
87   return AudioProcessing::kNoError;
88 }
89 
is_enabled() const90 bool VoiceDetectionImpl::is_enabled() const {
91   rtc::CritScope cs(crit_);
92   return enabled_;
93 }
94 
set_stream_has_voice(bool has_voice)95 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
96   rtc::CritScope cs(crit_);
97   using_external_vad_ = true;
98   stream_has_voice_ = has_voice;
99   return AudioProcessing::kNoError;
100 }
101 
stream_has_voice() const102 bool VoiceDetectionImpl::stream_has_voice() const {
103   rtc::CritScope cs(crit_);
104   // TODO(ajm): enable this assertion?
105   //assert(using_external_vad_ || is_component_enabled());
106   return stream_has_voice_;
107 }
108 
set_likelihood(VoiceDetection::Likelihood likelihood)109 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
110   rtc::CritScope cs(crit_);
111   likelihood_ = likelihood;
112   if (enabled_) {
113     int mode = 2;
114     switch (likelihood) {
115       case VoiceDetection::kVeryLowLikelihood:
116         mode = 3;
117         break;
118       case VoiceDetection::kLowLikelihood:
119         mode = 2;
120         break;
121       case VoiceDetection::kModerateLikelihood:
122         mode = 1;
123         break;
124       case VoiceDetection::kHighLikelihood:
125         mode = 0;
126         break;
127       default:
128         RTC_NOTREACHED();
129         break;
130     }
131     int error = WebRtcVad_set_mode(vad_->state(), mode);
132     RTC_DCHECK_EQ(0, error);
133   }
134   return AudioProcessing::kNoError;
135 }
136 
likelihood() const137 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
138   rtc::CritScope cs(crit_);
139   return likelihood_;
140 }
141 
set_frame_size_ms(int size)142 int VoiceDetectionImpl::set_frame_size_ms(int size) {
143   rtc::CritScope cs(crit_);
144   RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
145   frame_size_ms_ = size;
146   Initialize(sample_rate_hz_);
147   return AudioProcessing::kNoError;
148 }
149 
frame_size_ms() const150 int VoiceDetectionImpl::frame_size_ms() const {
151   rtc::CritScope cs(crit_);
152   return frame_size_ms_;
153 }
154 }  // namespace webrtc
155