1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
12
13 #include <assert.h>
14
15 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
16 #include "webrtc/modules/audio_processing/audio_buffer.h"
17 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
18
19 namespace webrtc {
20
21 typedef VadInst Handle;
22
23 namespace {
MapSetting(VoiceDetection::Likelihood likelihood)24 int MapSetting(VoiceDetection::Likelihood likelihood) {
25 switch (likelihood) {
26 case VoiceDetection::kVeryLowLikelihood:
27 return 3;
28 case VoiceDetection::kLowLikelihood:
29 return 2;
30 case VoiceDetection::kModerateLikelihood:
31 return 1;
32 case VoiceDetection::kHighLikelihood:
33 return 0;
34 }
35 assert(false);
36 return -1;
37 }
38 } // namespace
39
VoiceDetectionImpl(const AudioProcessing * apm,CriticalSectionWrapper * crit)40 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
41 CriticalSectionWrapper* crit)
42 : ProcessingComponent(),
43 apm_(apm),
44 crit_(crit),
45 stream_has_voice_(false),
46 using_external_vad_(false),
47 likelihood_(kLowLikelihood),
48 frame_size_ms_(10),
49 frame_size_samples_(0) {}
50
~VoiceDetectionImpl()51 VoiceDetectionImpl::~VoiceDetectionImpl() {}
52
ProcessCaptureAudio(AudioBuffer * audio)53 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
54 if (!is_component_enabled()) {
55 return apm_->kNoError;
56 }
57
58 if (using_external_vad_) {
59 using_external_vad_ = false;
60 return apm_->kNoError;
61 }
62 assert(audio->samples_per_split_channel() <= 160);
63
64 // TODO(ajm): concatenate data in frame buffer here.
65
66 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
67 apm_->proc_split_sample_rate_hz(),
68 audio->mixed_low_pass_data(),
69 frame_size_samples_);
70 if (vad_ret == 0) {
71 stream_has_voice_ = false;
72 audio->set_activity(AudioFrame::kVadPassive);
73 } else if (vad_ret == 1) {
74 stream_has_voice_ = true;
75 audio->set_activity(AudioFrame::kVadActive);
76 } else {
77 return apm_->kUnspecifiedError;
78 }
79
80 return apm_->kNoError;
81 }
82
Enable(bool enable)83 int VoiceDetectionImpl::Enable(bool enable) {
84 CriticalSectionScoped crit_scoped(crit_);
85 return EnableComponent(enable);
86 }
87
is_enabled() const88 bool VoiceDetectionImpl::is_enabled() const {
89 return is_component_enabled();
90 }
91
set_stream_has_voice(bool has_voice)92 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
93 using_external_vad_ = true;
94 stream_has_voice_ = has_voice;
95 return apm_->kNoError;
96 }
97
stream_has_voice() const98 bool VoiceDetectionImpl::stream_has_voice() const {
99 // TODO(ajm): enable this assertion?
100 //assert(using_external_vad_ || is_component_enabled());
101 return stream_has_voice_;
102 }
103
set_likelihood(VoiceDetection::Likelihood likelihood)104 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
105 CriticalSectionScoped crit_scoped(crit_);
106 if (MapSetting(likelihood) == -1) {
107 return apm_->kBadParameterError;
108 }
109
110 likelihood_ = likelihood;
111 return Configure();
112 }
113
likelihood() const114 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
115 return likelihood_;
116 }
117
set_frame_size_ms(int size)118 int VoiceDetectionImpl::set_frame_size_ms(int size) {
119 CriticalSectionScoped crit_scoped(crit_);
120 assert(size == 10); // TODO(ajm): remove when supported.
121 if (size != 10 &&
122 size != 20 &&
123 size != 30) {
124 return apm_->kBadParameterError;
125 }
126
127 frame_size_ms_ = size;
128
129 return Initialize();
130 }
131
frame_size_ms() const132 int VoiceDetectionImpl::frame_size_ms() const {
133 return frame_size_ms_;
134 }
135
Initialize()136 int VoiceDetectionImpl::Initialize() {
137 int err = ProcessingComponent::Initialize();
138 if (err != apm_->kNoError || !is_component_enabled()) {
139 return err;
140 }
141
142 using_external_vad_ = false;
143 frame_size_samples_ = frame_size_ms_ *
144 apm_->proc_split_sample_rate_hz() / 1000;
145 // TODO(ajm): intialize frame buffer here.
146
147 return apm_->kNoError;
148 }
149
CreateHandle() const150 void* VoiceDetectionImpl::CreateHandle() const {
151 Handle* handle = NULL;
152 if (WebRtcVad_Create(&handle) != apm_->kNoError) {
153 handle = NULL;
154 } else {
155 assert(handle != NULL);
156 }
157
158 return handle;
159 }
160
DestroyHandle(void * handle) const161 void VoiceDetectionImpl::DestroyHandle(void* handle) const {
162 WebRtcVad_Free(static_cast<Handle*>(handle));
163 }
164
InitializeHandle(void * handle) const165 int VoiceDetectionImpl::InitializeHandle(void* handle) const {
166 return WebRtcVad_Init(static_cast<Handle*>(handle));
167 }
168
ConfigureHandle(void * handle) const169 int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
170 return WebRtcVad_set_mode(static_cast<Handle*>(handle),
171 MapSetting(likelihood_));
172 }
173
num_handles_required() const174 int VoiceDetectionImpl::num_handles_required() const {
175 return 1;
176 }
177
GetHandleError(void * handle) const178 int VoiceDetectionImpl::GetHandleError(void* handle) const {
179 // The VAD has no get_error() function.
180 assert(handle != NULL);
181 return apm_->kUnspecifiedError;
182 }
183 } // namespace webrtc
184