1/*
2 *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#import "voice_processing_audio_unit.h"
12
13#include "absl/base/macros.h"
14#include "rtc_base/checks.h"
15#include "system_wrappers/include/metrics.h"
16
17#import "base/RTCLogging.h"
18#import "sdk/objc/components/audio/RTCAudioSessionConfiguration.h"
19
20#if !defined(NDEBUG)
21static void LogStreamDescription(AudioStreamBasicDescription description) {
22  char formatIdString[5];
23  UInt32 formatId = CFSwapInt32HostToBig(description.mFormatID);
24  bcopy(&formatId, formatIdString, 4);
25  formatIdString[4] = '\0';
26  RTCLog(@"AudioStreamBasicDescription: {\n"
27          "  mSampleRate: %.2f\n"
28          "  formatIDString: %s\n"
29          "  mFormatFlags: 0x%X\n"
30          "  mBytesPerPacket: %u\n"
31          "  mFramesPerPacket: %u\n"
32          "  mBytesPerFrame: %u\n"
33          "  mChannelsPerFrame: %u\n"
34          "  mBitsPerChannel: %u\n"
35          "  mReserved: %u\n}",
36         description.mSampleRate, formatIdString,
37         static_cast<unsigned int>(description.mFormatFlags),
38         static_cast<unsigned int>(description.mBytesPerPacket),
39         static_cast<unsigned int>(description.mFramesPerPacket),
40         static_cast<unsigned int>(description.mBytesPerFrame),
41         static_cast<unsigned int>(description.mChannelsPerFrame),
42         static_cast<unsigned int>(description.mBitsPerChannel),
43         static_cast<unsigned int>(description.mReserved));
44}
45#endif
46
47namespace webrtc {
48namespace ios_adm {
49
50// Calls to AudioUnitInitialize() can fail if called back-to-back on different
51// ADM instances. A fall-back solution is to allow multiple sequential calls
52// with as small delay between each. This factor sets the max number of allowed
53// initialization attempts.
54static const int kMaxNumberOfAudioUnitInitializeAttempts = 5;
55// A VP I/O unit's bus 1 connects to input hardware (microphone).
56static const AudioUnitElement kInputBus = 1;
57// A VP I/O unit's bus 0 connects to output hardware (speaker).
58static const AudioUnitElement kOutputBus = 0;
59
60// Returns the automatic gain control (AGC) state on the processed microphone
61// signal. Should be on by default for Voice Processing audio units.
62static OSStatus GetAGCState(AudioUnit audio_unit, UInt32* enabled) {
63  RTC_DCHECK(audio_unit);
64  UInt32 size = sizeof(*enabled);
65  OSStatus result = AudioUnitGetProperty(audio_unit,
66                                         kAUVoiceIOProperty_VoiceProcessingEnableAGC,
67                                         kAudioUnitScope_Global,
68                                         kInputBus,
69                                         enabled,
70                                         &size);
71  RTCLog(@"VPIO unit AGC: %u", static_cast<unsigned int>(*enabled));
72  return result;
73}
74
75VoiceProcessingAudioUnit::VoiceProcessingAudioUnit(
76    VoiceProcessingAudioUnitObserver* observer)
77    : observer_(observer), vpio_unit_(nullptr), state_(kInitRequired) {
78  RTC_DCHECK(observer);
79}
80
81VoiceProcessingAudioUnit::~VoiceProcessingAudioUnit() {
82  DisposeAudioUnit();
83}
84
85const UInt32 VoiceProcessingAudioUnit::kBytesPerSample = 2;
86
87bool VoiceProcessingAudioUnit::Init() {
88  RTC_DCHECK_EQ(state_, kInitRequired);
89
90  // Create an audio component description to identify the Voice Processing
91  // I/O audio unit.
92  AudioComponentDescription vpio_unit_description;
93  vpio_unit_description.componentType = kAudioUnitType_Output;
94  vpio_unit_description.componentSubType = kAudioUnitSubType_VoiceProcessingIO;
95  vpio_unit_description.componentManufacturer = kAudioUnitManufacturer_Apple;
96  vpio_unit_description.componentFlags = 0;
97  vpio_unit_description.componentFlagsMask = 0;
98
99  // Obtain an audio unit instance given the description.
100  AudioComponent found_vpio_unit_ref =
101      AudioComponentFindNext(nullptr, &vpio_unit_description);
102
103  // Create a Voice Processing IO audio unit.
104  OSStatus result = noErr;
105  result = AudioComponentInstanceNew(found_vpio_unit_ref, &vpio_unit_);
106  if (result != noErr) {
107    vpio_unit_ = nullptr;
108    RTCLogError(@"AudioComponentInstanceNew failed. Error=%ld.", (long)result);
109    return false;
110  }
111
112  // Enable input on the input scope of the input element.
113  UInt32 enable_input = 1;
114  result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
115                                kAudioUnitScope_Input, kInputBus, &enable_input,
116                                sizeof(enable_input));
117  if (result != noErr) {
118    DisposeAudioUnit();
119    RTCLogError(@"Failed to enable input on input scope of input element. "
120                 "Error=%ld.",
121                (long)result);
122    return false;
123  }
124
125  // Enable output on the output scope of the output element.
126  UInt32 enable_output = 1;
127  result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
128                                kAudioUnitScope_Output, kOutputBus,
129                                &enable_output, sizeof(enable_output));
130  if (result != noErr) {
131    DisposeAudioUnit();
132    RTCLogError(@"Failed to enable output on output scope of output element. "
133                 "Error=%ld.",
134                (long)result);
135    return false;
136  }
137
138  // Specify the callback function that provides audio samples to the audio
139  // unit.
140  AURenderCallbackStruct render_callback;
141  render_callback.inputProc = OnGetPlayoutData;
142  render_callback.inputProcRefCon = this;
143  result = AudioUnitSetProperty(
144      vpio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input,
145      kOutputBus, &render_callback, sizeof(render_callback));
146  if (result != noErr) {
147    DisposeAudioUnit();
148    RTCLogError(@"Failed to specify the render callback on the output bus. "
149                 "Error=%ld.",
150                (long)result);
151    return false;
152  }
153
154  // Disable AU buffer allocation for the recorder, we allocate our own.
155  // TODO(henrika): not sure that it actually saves resource to make this call.
156  UInt32 flag = 0;
157  result = AudioUnitSetProperty(
158      vpio_unit_, kAudioUnitProperty_ShouldAllocateBuffer,
159      kAudioUnitScope_Output, kInputBus, &flag, sizeof(flag));
160  if (result != noErr) {
161    DisposeAudioUnit();
162    RTCLogError(@"Failed to disable buffer allocation on the input bus. "
163                 "Error=%ld.",
164                (long)result);
165    return false;
166  }
167
168  // Specify the callback to be called by the I/O thread to us when input audio
169  // is available. The recorded samples can then be obtained by calling the
170  // AudioUnitRender() method.
171  AURenderCallbackStruct input_callback;
172  input_callback.inputProc = OnDeliverRecordedData;
173  input_callback.inputProcRefCon = this;
174  result = AudioUnitSetProperty(vpio_unit_,
175                                kAudioOutputUnitProperty_SetInputCallback,
176                                kAudioUnitScope_Global, kInputBus,
177                                &input_callback, sizeof(input_callback));
178  if (result != noErr) {
179    DisposeAudioUnit();
180    RTCLogError(@"Failed to specify the input callback on the input bus. "
181                 "Error=%ld.",
182                (long)result);
183    return false;
184  }
185
186  state_ = kUninitialized;
187  return true;
188}
189
190VoiceProcessingAudioUnit::State VoiceProcessingAudioUnit::GetState() const {
191  return state_;
192}
193
194bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) {
195  RTC_DCHECK_GE(state_, kUninitialized);
196  RTCLog(@"Initializing audio unit with sample rate: %f", sample_rate);
197
198  OSStatus result = noErr;
199  AudioStreamBasicDescription format = GetFormat(sample_rate);
200  UInt32 size = sizeof(format);
201#if !defined(NDEBUG)
202  LogStreamDescription(format);
203#endif
204
205  // Set the format on the output scope of the input element/bus.
206  result =
207      AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
208                           kAudioUnitScope_Output, kInputBus, &format, size);
209  if (result != noErr) {
210    RTCLogError(@"Failed to set format on output scope of input bus. "
211                 "Error=%ld.",
212                (long)result);
213    return false;
214  }
215
216  // Set the format on the input scope of the output element/bus.
217  result =
218      AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
219                           kAudioUnitScope_Input, kOutputBus, &format, size);
220  if (result != noErr) {
221    RTCLogError(@"Failed to set format on input scope of output bus. "
222                 "Error=%ld.",
223                (long)result);
224    return false;
225  }
226
227  // Initialize the Voice Processing I/O unit instance.
228  // Calls to AudioUnitInitialize() can fail if called back-to-back on
229  // different ADM instances. The error message in this case is -66635 which is
230  // undocumented. Tests have shown that calling AudioUnitInitialize a second
231  // time, after a short sleep, avoids this issue.
232  // See webrtc:5166 for details.
233  int failed_initalize_attempts = 0;
234  result = AudioUnitInitialize(vpio_unit_);
235  while (result != noErr) {
236    RTCLogError(@"Failed to initialize the Voice Processing I/O unit. "
237                 "Error=%ld.",
238                (long)result);
239    ++failed_initalize_attempts;
240    if (failed_initalize_attempts == kMaxNumberOfAudioUnitInitializeAttempts) {
241      // Max number of initialization attempts exceeded, hence abort.
242      RTCLogError(@"Too many initialization attempts.");
243      return false;
244    }
245    RTCLog(@"Pause 100ms and try audio unit initialization again...");
246    [NSThread sleepForTimeInterval:0.1f];
247    result = AudioUnitInitialize(vpio_unit_);
248  }
249  if (result == noErr) {
250    RTCLog(@"Voice Processing I/O unit is now initialized.");
251  }
252
253  // AGC should be enabled by default for Voice Processing I/O units but it is
254  // checked below and enabled explicitly if needed. This scheme is used
255  // to be absolutely sure that the AGC is enabled since we have seen cases
256  // where only zeros are recorded and a disabled AGC could be one of the
257  // reasons why it happens.
258  int agc_was_enabled_by_default = 0;
259  UInt32 agc_is_enabled = 0;
260  result = GetAGCState(vpio_unit_, &agc_is_enabled);
261  if (result != noErr) {
262    RTCLogError(@"Failed to get AGC state (1st attempt). "
263                 "Error=%ld.",
264                (long)result);
265    // Example of error code: kAudioUnitErr_NoConnection (-10876).
266    // All error codes related to audio units are negative and are therefore
267    // converted into a postive value to match the UMA APIs.
268    RTC_HISTOGRAM_COUNTS_SPARSE_100000(
269        "WebRTC.Audio.GetAGCStateErrorCode1", (-1) * result);
270  } else if (agc_is_enabled) {
271    // Remember that the AGC was enabled by default. Will be used in UMA.
272    agc_was_enabled_by_default = 1;
273  } else {
274    // AGC was initially disabled => try to enable it explicitly.
275    UInt32 enable_agc = 1;
276    result =
277        AudioUnitSetProperty(vpio_unit_,
278                             kAUVoiceIOProperty_VoiceProcessingEnableAGC,
279                             kAudioUnitScope_Global, kInputBus, &enable_agc,
280                             sizeof(enable_agc));
281    if (result != noErr) {
282      RTCLogError(@"Failed to enable the built-in AGC. "
283                   "Error=%ld.",
284                  (long)result);
285      RTC_HISTOGRAM_COUNTS_SPARSE_100000(
286          "WebRTC.Audio.SetAGCStateErrorCode", (-1) * result);
287    }
288    result = GetAGCState(vpio_unit_, &agc_is_enabled);
289    if (result != noErr) {
290      RTCLogError(@"Failed to get AGC state (2nd attempt). "
291                   "Error=%ld.",
292                  (long)result);
293      RTC_HISTOGRAM_COUNTS_SPARSE_100000(
294          "WebRTC.Audio.GetAGCStateErrorCode2", (-1) * result);
295    }
296  }
297
298  // Track if the built-in AGC was enabled by default (as it should) or not.
299  RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCWasEnabledByDefault",
300                        agc_was_enabled_by_default);
301  RTCLog(@"WebRTC.Audio.BuiltInAGCWasEnabledByDefault: %d",
302         agc_was_enabled_by_default);
303  // As a final step, add an UMA histogram for tracking the AGC state.
304  // At this stage, the AGC should be enabled, and if it is not, more work is
305  // needed to find out the root cause.
306  RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCIsEnabled", agc_is_enabled);
307  RTCLog(@"WebRTC.Audio.BuiltInAGCIsEnabled: %u",
308         static_cast<unsigned int>(agc_is_enabled));
309
310  state_ = kInitialized;
311  return true;
312}
313
314bool VoiceProcessingAudioUnit::Start() {
315  RTC_DCHECK_GE(state_, kUninitialized);
316  RTCLog(@"Starting audio unit.");
317
318  OSStatus result = AudioOutputUnitStart(vpio_unit_);
319  if (result != noErr) {
320    RTCLogError(@"Failed to start audio unit. Error=%ld", (long)result);
321    return false;
322  } else {
323    RTCLog(@"Started audio unit");
324  }
325  state_ = kStarted;
326  return true;
327}
328
329bool VoiceProcessingAudioUnit::Stop() {
330  RTC_DCHECK_GE(state_, kUninitialized);
331  RTCLog(@"Stopping audio unit.");
332
333  OSStatus result = AudioOutputUnitStop(vpio_unit_);
334  if (result != noErr) {
335    RTCLogError(@"Failed to stop audio unit. Error=%ld", (long)result);
336    return false;
337  } else {
338    RTCLog(@"Stopped audio unit");
339  }
340
341  state_ = kInitialized;
342  return true;
343}
344
345bool VoiceProcessingAudioUnit::Uninitialize() {
346  RTC_DCHECK_GE(state_, kUninitialized);
347  RTCLog(@"Unintializing audio unit.");
348
349  OSStatus result = AudioUnitUninitialize(vpio_unit_);
350  if (result != noErr) {
351    RTCLogError(@"Failed to uninitialize audio unit. Error=%ld", (long)result);
352    return false;
353  } else {
354    RTCLog(@"Uninitialized audio unit.");
355  }
356
357  state_ = kUninitialized;
358  return true;
359}
360
361OSStatus VoiceProcessingAudioUnit::Render(AudioUnitRenderActionFlags* flags,
362                                          const AudioTimeStamp* time_stamp,
363                                          UInt32 output_bus_number,
364                                          UInt32 num_frames,
365                                          AudioBufferList* io_data) {
366  RTC_DCHECK(vpio_unit_) << "Init() not called.";
367
368  OSStatus result = AudioUnitRender(vpio_unit_, flags, time_stamp,
369                                    output_bus_number, num_frames, io_data);
370  if (result != noErr) {
371    RTCLogError(@"Failed to render audio unit. Error=%ld", (long)result);
372  }
373  return result;
374}
375
376OSStatus VoiceProcessingAudioUnit::OnGetPlayoutData(
377    void* in_ref_con,
378    AudioUnitRenderActionFlags* flags,
379    const AudioTimeStamp* time_stamp,
380    UInt32 bus_number,
381    UInt32 num_frames,
382    AudioBufferList* io_data) {
383  VoiceProcessingAudioUnit* audio_unit =
384      static_cast<VoiceProcessingAudioUnit*>(in_ref_con);
385  return audio_unit->NotifyGetPlayoutData(flags, time_stamp, bus_number,
386                                          num_frames, io_data);
387}
388
389OSStatus VoiceProcessingAudioUnit::OnDeliverRecordedData(
390    void* in_ref_con,
391    AudioUnitRenderActionFlags* flags,
392    const AudioTimeStamp* time_stamp,
393    UInt32 bus_number,
394    UInt32 num_frames,
395    AudioBufferList* io_data) {
396  VoiceProcessingAudioUnit* audio_unit =
397      static_cast<VoiceProcessingAudioUnit*>(in_ref_con);
398  return audio_unit->NotifyDeliverRecordedData(flags, time_stamp, bus_number,
399                                               num_frames, io_data);
400}
401
402OSStatus VoiceProcessingAudioUnit::NotifyGetPlayoutData(
403    AudioUnitRenderActionFlags* flags,
404    const AudioTimeStamp* time_stamp,
405    UInt32 bus_number,
406    UInt32 num_frames,
407    AudioBufferList* io_data) {
408  return observer_->OnGetPlayoutData(flags, time_stamp, bus_number, num_frames,
409                                     io_data);
410}
411
412OSStatus VoiceProcessingAudioUnit::NotifyDeliverRecordedData(
413    AudioUnitRenderActionFlags* flags,
414    const AudioTimeStamp* time_stamp,
415    UInt32 bus_number,
416    UInt32 num_frames,
417    AudioBufferList* io_data) {
418  return observer_->OnDeliverRecordedData(flags, time_stamp, bus_number,
419                                          num_frames, io_data);
420}
421
422AudioStreamBasicDescription VoiceProcessingAudioUnit::GetFormat(
423    Float64 sample_rate) const {
424  // Set the application formats for input and output:
425  // - use same format in both directions
426  // - avoid resampling in the I/O unit by using the hardware sample rate
427  // - linear PCM => noncompressed audio data format with one frame per packet
428  // - no need to specify interleaving since only mono is supported
429  AudioStreamBasicDescription format;
430  RTC_DCHECK_EQ(1, kRTCAudioSessionPreferredNumberOfChannels);
431  format.mSampleRate = sample_rate;
432  format.mFormatID = kAudioFormatLinearPCM;
433  format.mFormatFlags =
434      kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
435  format.mBytesPerPacket = kBytesPerSample;
436  format.mFramesPerPacket = 1;  // uncompressed.
437  format.mBytesPerFrame = kBytesPerSample;
438  format.mChannelsPerFrame = kRTCAudioSessionPreferredNumberOfChannels;
439  format.mBitsPerChannel = 8 * kBytesPerSample;
440  return format;
441}
442
443void VoiceProcessingAudioUnit::DisposeAudioUnit() {
444  if (vpio_unit_) {
445    switch (state_) {
446      case kStarted:
447        Stop();
448        // Fall through.
449        ABSL_FALLTHROUGH_INTENDED;
450      case kInitialized:
451        Uninitialize();
452        break;
453      case kUninitialized:
454        ABSL_FALLTHROUGH_INTENDED;
455      case kInitRequired:
456        break;
457    }
458
459    RTCLog(@"Disposing audio unit.");
460    OSStatus result = AudioComponentInstanceDispose(vpio_unit_);
461    if (result != noErr) {
462      RTCLogError(@"AudioComponentInstanceDispose failed. Error=%ld.",
463                  (long)result);
464    }
465    vpio_unit_ = nullptr;
466  }
467}
468
469}  // namespace ios_adm
470}  // namespace webrtc
471