1/* 2 * Copyright 2016 The WebRTC Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#import "voice_processing_audio_unit.h" 12 13#include "absl/base/macros.h" 14#include "rtc_base/checks.h" 15#include "system_wrappers/include/metrics.h" 16 17#import "base/RTCLogging.h" 18#import "sdk/objc/components/audio/RTCAudioSessionConfiguration.h" 19 20#if !defined(NDEBUG) 21static void LogStreamDescription(AudioStreamBasicDescription description) { 22 char formatIdString[5]; 23 UInt32 formatId = CFSwapInt32HostToBig(description.mFormatID); 24 bcopy(&formatId, formatIdString, 4); 25 formatIdString[4] = '\0'; 26 RTCLog(@"AudioStreamBasicDescription: {\n" 27 " mSampleRate: %.2f\n" 28 " formatIDString: %s\n" 29 " mFormatFlags: 0x%X\n" 30 " mBytesPerPacket: %u\n" 31 " mFramesPerPacket: %u\n" 32 " mBytesPerFrame: %u\n" 33 " mChannelsPerFrame: %u\n" 34 " mBitsPerChannel: %u\n" 35 " mReserved: %u\n}", 36 description.mSampleRate, formatIdString, 37 static_cast<unsigned int>(description.mFormatFlags), 38 static_cast<unsigned int>(description.mBytesPerPacket), 39 static_cast<unsigned int>(description.mFramesPerPacket), 40 static_cast<unsigned int>(description.mBytesPerFrame), 41 static_cast<unsigned int>(description.mChannelsPerFrame), 42 static_cast<unsigned int>(description.mBitsPerChannel), 43 static_cast<unsigned int>(description.mReserved)); 44} 45#endif 46 47namespace webrtc { 48namespace ios_adm { 49 50// Calls to AudioUnitInitialize() can fail if called back-to-back on different 51// ADM instances. A fall-back solution is to allow multiple sequential calls 52// with as small delay between each. This factor sets the max number of allowed 53// initialization attempts. 54static const int kMaxNumberOfAudioUnitInitializeAttempts = 5; 55// A VP I/O unit's bus 1 connects to input hardware (microphone). 56static const AudioUnitElement kInputBus = 1; 57// A VP I/O unit's bus 0 connects to output hardware (speaker). 58static const AudioUnitElement kOutputBus = 0; 59 60// Returns the automatic gain control (AGC) state on the processed microphone 61// signal. Should be on by default for Voice Processing audio units. 62static OSStatus GetAGCState(AudioUnit audio_unit, UInt32* enabled) { 63 RTC_DCHECK(audio_unit); 64 UInt32 size = sizeof(*enabled); 65 OSStatus result = AudioUnitGetProperty(audio_unit, 66 kAUVoiceIOProperty_VoiceProcessingEnableAGC, 67 kAudioUnitScope_Global, 68 kInputBus, 69 enabled, 70 &size); 71 RTCLog(@"VPIO unit AGC: %u", static_cast<unsigned int>(*enabled)); 72 return result; 73} 74 75VoiceProcessingAudioUnit::VoiceProcessingAudioUnit( 76 VoiceProcessingAudioUnitObserver* observer) 77 : observer_(observer), vpio_unit_(nullptr), state_(kInitRequired) { 78 RTC_DCHECK(observer); 79} 80 81VoiceProcessingAudioUnit::~VoiceProcessingAudioUnit() { 82 DisposeAudioUnit(); 83} 84 85const UInt32 VoiceProcessingAudioUnit::kBytesPerSample = 2; 86 87bool VoiceProcessingAudioUnit::Init() { 88 RTC_DCHECK_EQ(state_, kInitRequired); 89 90 // Create an audio component description to identify the Voice Processing 91 // I/O audio unit. 92 AudioComponentDescription vpio_unit_description; 93 vpio_unit_description.componentType = kAudioUnitType_Output; 94 vpio_unit_description.componentSubType = kAudioUnitSubType_VoiceProcessingIO; 95 vpio_unit_description.componentManufacturer = kAudioUnitManufacturer_Apple; 96 vpio_unit_description.componentFlags = 0; 97 vpio_unit_description.componentFlagsMask = 0; 98 99 // Obtain an audio unit instance given the description. 100 AudioComponent found_vpio_unit_ref = 101 AudioComponentFindNext(nullptr, &vpio_unit_description); 102 103 // Create a Voice Processing IO audio unit. 104 OSStatus result = noErr; 105 result = AudioComponentInstanceNew(found_vpio_unit_ref, &vpio_unit_); 106 if (result != noErr) { 107 vpio_unit_ = nullptr; 108 RTCLogError(@"AudioComponentInstanceNew failed. Error=%ld.", (long)result); 109 return false; 110 } 111 112 // Enable input on the input scope of the input element. 113 UInt32 enable_input = 1; 114 result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO, 115 kAudioUnitScope_Input, kInputBus, &enable_input, 116 sizeof(enable_input)); 117 if (result != noErr) { 118 DisposeAudioUnit(); 119 RTCLogError(@"Failed to enable input on input scope of input element. " 120 "Error=%ld.", 121 (long)result); 122 return false; 123 } 124 125 // Enable output on the output scope of the output element. 126 UInt32 enable_output = 1; 127 result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO, 128 kAudioUnitScope_Output, kOutputBus, 129 &enable_output, sizeof(enable_output)); 130 if (result != noErr) { 131 DisposeAudioUnit(); 132 RTCLogError(@"Failed to enable output on output scope of output element. " 133 "Error=%ld.", 134 (long)result); 135 return false; 136 } 137 138 // Specify the callback function that provides audio samples to the audio 139 // unit. 140 AURenderCallbackStruct render_callback; 141 render_callback.inputProc = OnGetPlayoutData; 142 render_callback.inputProcRefCon = this; 143 result = AudioUnitSetProperty( 144 vpio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input, 145 kOutputBus, &render_callback, sizeof(render_callback)); 146 if (result != noErr) { 147 DisposeAudioUnit(); 148 RTCLogError(@"Failed to specify the render callback on the output bus. " 149 "Error=%ld.", 150 (long)result); 151 return false; 152 } 153 154 // Disable AU buffer allocation for the recorder, we allocate our own. 155 // TODO(henrika): not sure that it actually saves resource to make this call. 156 UInt32 flag = 0; 157 result = AudioUnitSetProperty( 158 vpio_unit_, kAudioUnitProperty_ShouldAllocateBuffer, 159 kAudioUnitScope_Output, kInputBus, &flag, sizeof(flag)); 160 if (result != noErr) { 161 DisposeAudioUnit(); 162 RTCLogError(@"Failed to disable buffer allocation on the input bus. " 163 "Error=%ld.", 164 (long)result); 165 return false; 166 } 167 168 // Specify the callback to be called by the I/O thread to us when input audio 169 // is available. The recorded samples can then be obtained by calling the 170 // AudioUnitRender() method. 171 AURenderCallbackStruct input_callback; 172 input_callback.inputProc = OnDeliverRecordedData; 173 input_callback.inputProcRefCon = this; 174 result = AudioUnitSetProperty(vpio_unit_, 175 kAudioOutputUnitProperty_SetInputCallback, 176 kAudioUnitScope_Global, kInputBus, 177 &input_callback, sizeof(input_callback)); 178 if (result != noErr) { 179 DisposeAudioUnit(); 180 RTCLogError(@"Failed to specify the input callback on the input bus. " 181 "Error=%ld.", 182 (long)result); 183 return false; 184 } 185 186 state_ = kUninitialized; 187 return true; 188} 189 190VoiceProcessingAudioUnit::State VoiceProcessingAudioUnit::GetState() const { 191 return state_; 192} 193 194bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) { 195 RTC_DCHECK_GE(state_, kUninitialized); 196 RTCLog(@"Initializing audio unit with sample rate: %f", sample_rate); 197 198 OSStatus result = noErr; 199 AudioStreamBasicDescription format = GetFormat(sample_rate); 200 UInt32 size = sizeof(format); 201#if !defined(NDEBUG) 202 LogStreamDescription(format); 203#endif 204 205 // Set the format on the output scope of the input element/bus. 206 result = 207 AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat, 208 kAudioUnitScope_Output, kInputBus, &format, size); 209 if (result != noErr) { 210 RTCLogError(@"Failed to set format on output scope of input bus. " 211 "Error=%ld.", 212 (long)result); 213 return false; 214 } 215 216 // Set the format on the input scope of the output element/bus. 217 result = 218 AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat, 219 kAudioUnitScope_Input, kOutputBus, &format, size); 220 if (result != noErr) { 221 RTCLogError(@"Failed to set format on input scope of output bus. " 222 "Error=%ld.", 223 (long)result); 224 return false; 225 } 226 227 // Initialize the Voice Processing I/O unit instance. 228 // Calls to AudioUnitInitialize() can fail if called back-to-back on 229 // different ADM instances. The error message in this case is -66635 which is 230 // undocumented. Tests have shown that calling AudioUnitInitialize a second 231 // time, after a short sleep, avoids this issue. 232 // See webrtc:5166 for details. 233 int failed_initalize_attempts = 0; 234 result = AudioUnitInitialize(vpio_unit_); 235 while (result != noErr) { 236 RTCLogError(@"Failed to initialize the Voice Processing I/O unit. " 237 "Error=%ld.", 238 (long)result); 239 ++failed_initalize_attempts; 240 if (failed_initalize_attempts == kMaxNumberOfAudioUnitInitializeAttempts) { 241 // Max number of initialization attempts exceeded, hence abort. 242 RTCLogError(@"Too many initialization attempts."); 243 return false; 244 } 245 RTCLog(@"Pause 100ms and try audio unit initialization again..."); 246 [NSThread sleepForTimeInterval:0.1f]; 247 result = AudioUnitInitialize(vpio_unit_); 248 } 249 if (result == noErr) { 250 RTCLog(@"Voice Processing I/O unit is now initialized."); 251 } 252 253 // AGC should be enabled by default for Voice Processing I/O units but it is 254 // checked below and enabled explicitly if needed. This scheme is used 255 // to be absolutely sure that the AGC is enabled since we have seen cases 256 // where only zeros are recorded and a disabled AGC could be one of the 257 // reasons why it happens. 258 int agc_was_enabled_by_default = 0; 259 UInt32 agc_is_enabled = 0; 260 result = GetAGCState(vpio_unit_, &agc_is_enabled); 261 if (result != noErr) { 262 RTCLogError(@"Failed to get AGC state (1st attempt). " 263 "Error=%ld.", 264 (long)result); 265 // Example of error code: kAudioUnitErr_NoConnection (-10876). 266 // All error codes related to audio units are negative and are therefore 267 // converted into a postive value to match the UMA APIs. 268 RTC_HISTOGRAM_COUNTS_SPARSE_100000( 269 "WebRTC.Audio.GetAGCStateErrorCode1", (-1) * result); 270 } else if (agc_is_enabled) { 271 // Remember that the AGC was enabled by default. Will be used in UMA. 272 agc_was_enabled_by_default = 1; 273 } else { 274 // AGC was initially disabled => try to enable it explicitly. 275 UInt32 enable_agc = 1; 276 result = 277 AudioUnitSetProperty(vpio_unit_, 278 kAUVoiceIOProperty_VoiceProcessingEnableAGC, 279 kAudioUnitScope_Global, kInputBus, &enable_agc, 280 sizeof(enable_agc)); 281 if (result != noErr) { 282 RTCLogError(@"Failed to enable the built-in AGC. " 283 "Error=%ld.", 284 (long)result); 285 RTC_HISTOGRAM_COUNTS_SPARSE_100000( 286 "WebRTC.Audio.SetAGCStateErrorCode", (-1) * result); 287 } 288 result = GetAGCState(vpio_unit_, &agc_is_enabled); 289 if (result != noErr) { 290 RTCLogError(@"Failed to get AGC state (2nd attempt). " 291 "Error=%ld.", 292 (long)result); 293 RTC_HISTOGRAM_COUNTS_SPARSE_100000( 294 "WebRTC.Audio.GetAGCStateErrorCode2", (-1) * result); 295 } 296 } 297 298 // Track if the built-in AGC was enabled by default (as it should) or not. 299 RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCWasEnabledByDefault", 300 agc_was_enabled_by_default); 301 RTCLog(@"WebRTC.Audio.BuiltInAGCWasEnabledByDefault: %d", 302 agc_was_enabled_by_default); 303 // As a final step, add an UMA histogram for tracking the AGC state. 304 // At this stage, the AGC should be enabled, and if it is not, more work is 305 // needed to find out the root cause. 306 RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCIsEnabled", agc_is_enabled); 307 RTCLog(@"WebRTC.Audio.BuiltInAGCIsEnabled: %u", 308 static_cast<unsigned int>(agc_is_enabled)); 309 310 state_ = kInitialized; 311 return true; 312} 313 314bool VoiceProcessingAudioUnit::Start() { 315 RTC_DCHECK_GE(state_, kUninitialized); 316 RTCLog(@"Starting audio unit."); 317 318 OSStatus result = AudioOutputUnitStart(vpio_unit_); 319 if (result != noErr) { 320 RTCLogError(@"Failed to start audio unit. Error=%ld", (long)result); 321 return false; 322 } else { 323 RTCLog(@"Started audio unit"); 324 } 325 state_ = kStarted; 326 return true; 327} 328 329bool VoiceProcessingAudioUnit::Stop() { 330 RTC_DCHECK_GE(state_, kUninitialized); 331 RTCLog(@"Stopping audio unit."); 332 333 OSStatus result = AudioOutputUnitStop(vpio_unit_); 334 if (result != noErr) { 335 RTCLogError(@"Failed to stop audio unit. Error=%ld", (long)result); 336 return false; 337 } else { 338 RTCLog(@"Stopped audio unit"); 339 } 340 341 state_ = kInitialized; 342 return true; 343} 344 345bool VoiceProcessingAudioUnit::Uninitialize() { 346 RTC_DCHECK_GE(state_, kUninitialized); 347 RTCLog(@"Unintializing audio unit."); 348 349 OSStatus result = AudioUnitUninitialize(vpio_unit_); 350 if (result != noErr) { 351 RTCLogError(@"Failed to uninitialize audio unit. Error=%ld", (long)result); 352 return false; 353 } else { 354 RTCLog(@"Uninitialized audio unit."); 355 } 356 357 state_ = kUninitialized; 358 return true; 359} 360 361OSStatus VoiceProcessingAudioUnit::Render(AudioUnitRenderActionFlags* flags, 362 const AudioTimeStamp* time_stamp, 363 UInt32 output_bus_number, 364 UInt32 num_frames, 365 AudioBufferList* io_data) { 366 RTC_DCHECK(vpio_unit_) << "Init() not called."; 367 368 OSStatus result = AudioUnitRender(vpio_unit_, flags, time_stamp, 369 output_bus_number, num_frames, io_data); 370 if (result != noErr) { 371 RTCLogError(@"Failed to render audio unit. Error=%ld", (long)result); 372 } 373 return result; 374} 375 376OSStatus VoiceProcessingAudioUnit::OnGetPlayoutData( 377 void* in_ref_con, 378 AudioUnitRenderActionFlags* flags, 379 const AudioTimeStamp* time_stamp, 380 UInt32 bus_number, 381 UInt32 num_frames, 382 AudioBufferList* io_data) { 383 VoiceProcessingAudioUnit* audio_unit = 384 static_cast<VoiceProcessingAudioUnit*>(in_ref_con); 385 return audio_unit->NotifyGetPlayoutData(flags, time_stamp, bus_number, 386 num_frames, io_data); 387} 388 389OSStatus VoiceProcessingAudioUnit::OnDeliverRecordedData( 390 void* in_ref_con, 391 AudioUnitRenderActionFlags* flags, 392 const AudioTimeStamp* time_stamp, 393 UInt32 bus_number, 394 UInt32 num_frames, 395 AudioBufferList* io_data) { 396 VoiceProcessingAudioUnit* audio_unit = 397 static_cast<VoiceProcessingAudioUnit*>(in_ref_con); 398 return audio_unit->NotifyDeliverRecordedData(flags, time_stamp, bus_number, 399 num_frames, io_data); 400} 401 402OSStatus VoiceProcessingAudioUnit::NotifyGetPlayoutData( 403 AudioUnitRenderActionFlags* flags, 404 const AudioTimeStamp* time_stamp, 405 UInt32 bus_number, 406 UInt32 num_frames, 407 AudioBufferList* io_data) { 408 return observer_->OnGetPlayoutData(flags, time_stamp, bus_number, num_frames, 409 io_data); 410} 411 412OSStatus VoiceProcessingAudioUnit::NotifyDeliverRecordedData( 413 AudioUnitRenderActionFlags* flags, 414 const AudioTimeStamp* time_stamp, 415 UInt32 bus_number, 416 UInt32 num_frames, 417 AudioBufferList* io_data) { 418 return observer_->OnDeliverRecordedData(flags, time_stamp, bus_number, 419 num_frames, io_data); 420} 421 422AudioStreamBasicDescription VoiceProcessingAudioUnit::GetFormat( 423 Float64 sample_rate) const { 424 // Set the application formats for input and output: 425 // - use same format in both directions 426 // - avoid resampling in the I/O unit by using the hardware sample rate 427 // - linear PCM => noncompressed audio data format with one frame per packet 428 // - no need to specify interleaving since only mono is supported 429 AudioStreamBasicDescription format; 430 RTC_DCHECK_EQ(1, kRTCAudioSessionPreferredNumberOfChannels); 431 format.mSampleRate = sample_rate; 432 format.mFormatID = kAudioFormatLinearPCM; 433 format.mFormatFlags = 434 kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; 435 format.mBytesPerPacket = kBytesPerSample; 436 format.mFramesPerPacket = 1; // uncompressed. 437 format.mBytesPerFrame = kBytesPerSample; 438 format.mChannelsPerFrame = kRTCAudioSessionPreferredNumberOfChannels; 439 format.mBitsPerChannel = 8 * kBytesPerSample; 440 return format; 441} 442 443void VoiceProcessingAudioUnit::DisposeAudioUnit() { 444 if (vpio_unit_) { 445 switch (state_) { 446 case kStarted: 447 Stop(); 448 // Fall through. 449 ABSL_FALLTHROUGH_INTENDED; 450 case kInitialized: 451 Uninitialize(); 452 break; 453 case kUninitialized: 454 ABSL_FALLTHROUGH_INTENDED; 455 case kInitRequired: 456 break; 457 } 458 459 RTCLog(@"Disposing audio unit."); 460 OSStatus result = AudioComponentInstanceDispose(vpio_unit_); 461 if (result != noErr) { 462 RTCLogError(@"AudioComponentInstanceDispose failed. Error=%ld.", 463 (long)result); 464 } 465 vpio_unit_ = nullptr; 466 } 467} 468 469} // namespace ios_adm 470} // namespace webrtc 471