1 /*
2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "voice_detection_impl.h"
12 
13 #include <cassert>
14 
15 #include "critical_section_wrapper.h"
16 #include "webrtc_vad.h"
17 
18 #include "audio_processing_impl.h"
19 #include "audio_buffer.h"
20 
21 namespace webrtc {
22 
23 typedef VadInst Handle;
24 
25 namespace {
MapSetting(VoiceDetection::Likelihood likelihood)26 WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) {
27   switch (likelihood) {
28     case VoiceDetection::kVeryLowLikelihood:
29       return 3;
30       break;
31     case VoiceDetection::kLowLikelihood:
32       return 2;
33       break;
34     case VoiceDetection::kModerateLikelihood:
35       return 1;
36       break;
37     case VoiceDetection::kHighLikelihood:
38       return 0;
39       break;
40     default:
41       return -1;
42   }
43 }
44 }  // namespace
45 
46 
VoiceDetectionImpl(const AudioProcessingImpl * apm)47 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
48   : ProcessingComponent(apm),
49     apm_(apm),
50     stream_has_voice_(false),
51     using_external_vad_(false),
52     likelihood_(kLowLikelihood),
53     frame_size_ms_(10),
54     frame_size_samples_(0) {}
55 
~VoiceDetectionImpl()56 VoiceDetectionImpl::~VoiceDetectionImpl() {}
57 
ProcessCaptureAudio(AudioBuffer * audio)58 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
59   if (!is_component_enabled()) {
60     return apm_->kNoError;
61   }
62 
63   if (using_external_vad_) {
64     using_external_vad_ = false;
65     return apm_->kNoError;
66   }
67   assert(audio->samples_per_split_channel() <= 160);
68 
69   WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
70   if (audio->num_channels() > 1) {
71     audio->CopyAndMixLowPass(1);
72     mixed_data = audio->mixed_low_pass_data(0);
73   }
74 
75   // TODO(ajm): concatenate data in frame buffer here.
76 
77   int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
78                                   apm_->split_sample_rate_hz(),
79                                   mixed_data,
80                                   frame_size_samples_);
81   if (vad_ret == 0) {
82     stream_has_voice_ = false;
83     audio->set_activity(AudioFrame::kVadPassive);
84   } else if (vad_ret == 1) {
85     stream_has_voice_ = true;
86     audio->set_activity(AudioFrame::kVadActive);
87   } else {
88     return apm_->kUnspecifiedError;
89   }
90 
91   return apm_->kNoError;
92 }
93 
Enable(bool enable)94 int VoiceDetectionImpl::Enable(bool enable) {
95   CriticalSectionScoped crit_scoped(*apm_->crit());
96   return EnableComponent(enable);
97 }
98 
is_enabled() const99 bool VoiceDetectionImpl::is_enabled() const {
100   return is_component_enabled();
101 }
102 
set_stream_has_voice(bool has_voice)103 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
104   using_external_vad_ = true;
105   stream_has_voice_ = has_voice;
106   return apm_->kNoError;
107 }
108 
stream_has_voice() const109 bool VoiceDetectionImpl::stream_has_voice() const {
110   // TODO(ajm): enable this assertion?
111   //assert(using_external_vad_ || is_component_enabled());
112   return stream_has_voice_;
113 }
114 
set_likelihood(VoiceDetection::Likelihood likelihood)115 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
116   CriticalSectionScoped crit_scoped(*apm_->crit());
117   if (MapSetting(likelihood) == -1) {
118     return apm_->kBadParameterError;
119   }
120 
121   likelihood_ = likelihood;
122   return Configure();
123 }
124 
likelihood() const125 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
126   return likelihood_;
127 }
128 
set_frame_size_ms(int size)129 int VoiceDetectionImpl::set_frame_size_ms(int size) {
130   CriticalSectionScoped crit_scoped(*apm_->crit());
131   assert(size == 10); // TODO(ajm): remove when supported.
132   if (size != 10 &&
133       size != 20 &&
134       size != 30) {
135     return apm_->kBadParameterError;
136   }
137 
138   frame_size_ms_ = size;
139 
140   return Initialize();
141 }
142 
frame_size_ms() const143 int VoiceDetectionImpl::frame_size_ms() const {
144   return frame_size_ms_;
145 }
146 
Initialize()147 int VoiceDetectionImpl::Initialize() {
148   int err = ProcessingComponent::Initialize();
149   if (err != apm_->kNoError || !is_component_enabled()) {
150     return err;
151   }
152 
153   using_external_vad_ = false;
154   frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
155   // TODO(ajm): intialize frame buffer here.
156 
157   return apm_->kNoError;
158 }
159 
get_version(char * version,int version_len_bytes) const160 int VoiceDetectionImpl::get_version(char* version,
161                                     int version_len_bytes) const {
162   if (WebRtcVad_get_version(version, version_len_bytes) != 0) {
163     return apm_->kBadParameterError;
164   }
165 
166   return apm_->kNoError;
167 }
168 
CreateHandle() const169 void* VoiceDetectionImpl::CreateHandle() const {
170   Handle* handle = NULL;
171   if (WebRtcVad_Create(&handle) != apm_->kNoError) {
172     handle = NULL;
173   } else {
174     assert(handle != NULL);
175   }
176 
177   return handle;
178 }
179 
DestroyHandle(void * handle) const180 int VoiceDetectionImpl::DestroyHandle(void* handle) const {
181   return WebRtcVad_Free(static_cast<Handle*>(handle));
182 }
183 
InitializeHandle(void * handle) const184 int VoiceDetectionImpl::InitializeHandle(void* handle) const {
185   return WebRtcVad_Init(static_cast<Handle*>(handle));
186 }
187 
ConfigureHandle(void * handle) const188 int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
189   return WebRtcVad_set_mode(static_cast<Handle*>(handle),
190                             MapSetting(likelihood_));
191 }
192 
num_handles_required() const193 int VoiceDetectionImpl::num_handles_required() const {
194   return 1;
195 }
196 
GetHandleError(void * handle) const197 int VoiceDetectionImpl::GetHandleError(void* handle) const {
198   // The VAD has no get_error() function.
199   assert(handle != NULL);
200   return apm_->kUnspecifiedError;
201 }
202 }  // namespace webrtc
203