1 // Copyright 2011 Google Inc. All Rights Reserved. 2 3 package android.speech.tts; 4 5 import android.media.AudioFormat; 6 import android.media.AudioTrack; 7 import android.speech.tts.TextToSpeechService.AudioOutputParams; 8 import android.util.Log; 9 10 /** 11 * Exposes parts of the {@link AudioTrack} API by delegating calls to an 12 * underlying {@link AudioTrack}. Additionally, provides methods like 13 * {@link #waitAndRelease()} that will block until all audiotrack 14 * data has been flushed to the mixer, and is estimated to have completed 15 * playback. 16 */ 17 class BlockingAudioTrack { 18 private static final String TAG = "TTS.BlockingAudioTrack"; 19 private static final boolean DBG = false; 20 21 22 /** 23 * The minimum increment of time to wait for an AudioTrack to finish 24 * playing. 25 */ 26 private static final long MIN_SLEEP_TIME_MS = 20; 27 28 /** 29 * The maximum increment of time to sleep while waiting for an AudioTrack 30 * to finish playing. 31 */ 32 private static final long MAX_SLEEP_TIME_MS = 2500; 33 34 /** 35 * The maximum amount of time to wait for an audio track to make progress while 36 * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but 37 * could happen in exceptional circumstances like a media_server crash. 38 */ 39 private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; 40 41 /** 42 * Minimum size of the buffer of the underlying {@link android.media.AudioTrack} 43 * we create. 44 */ 45 private static final int MIN_AUDIO_BUFFER_SIZE = 8192; 46 47 48 private final AudioOutputParams mAudioParams; 49 private final int mSampleRateInHz; 50 private final int mAudioFormat; 51 private final int mChannelCount; 52 53 54 private final int mBytesPerFrame; 55 /** 56 * A "short utterance" is one that uses less bytes than the audio 57 * track buffer size (mAudioBufferSize). In this case, we need to call 58 * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly 59 * different logic is required to wait for the track to finish. 60 * 61 * Not volatile, accessed only from the audio playback thread. 62 */ 63 private boolean mIsShortUtterance; 64 /** 65 * Will be valid after a call to {@link #init()}. 66 */ 67 private int mAudioBufferSize; 68 private int mBytesWritten = 0; 69 70 // Need to be seen by stop() which can be called from another thread. mAudioTrack will be 71 // set to null only after waitAndRelease(). 72 private Object mAudioTrackLock = new Object(); 73 private AudioTrack mAudioTrack; 74 private volatile boolean mStopped; 75 76 private int mSessionId; 77 BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate, int audioFormat, int channelCount)78 BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate, 79 int audioFormat, int channelCount) { 80 mAudioParams = audioParams; 81 mSampleRateInHz = sampleRate; 82 mAudioFormat = audioFormat; 83 mChannelCount = channelCount; 84 85 mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount; 86 mIsShortUtterance = false; 87 mAudioBufferSize = 0; 88 mBytesWritten = 0; 89 90 mAudioTrack = null; 91 mStopped = false; 92 } 93 init()94 public boolean init() { 95 AudioTrack track = createStreamingAudioTrack(); 96 synchronized (mAudioTrackLock) { 97 mAudioTrack = track; 98 } 99 100 if (track == null) { 101 return false; 102 } else { 103 return true; 104 } 105 } 106 stop()107 public void stop() { 108 synchronized (mAudioTrackLock) { 109 if (mAudioTrack != null) { 110 mAudioTrack.stop(); 111 } 112 mStopped = true; 113 } 114 } 115 write(byte[] data)116 public int write(byte[] data) { 117 AudioTrack track = null; 118 synchronized (mAudioTrackLock) { 119 track = mAudioTrack; 120 } 121 122 if (track == null || mStopped) { 123 return -1; 124 } 125 final int bytesWritten = writeToAudioTrack(track, data); 126 127 mBytesWritten += bytesWritten; 128 return bytesWritten; 129 } 130 waitAndRelease()131 public void waitAndRelease() { 132 AudioTrack track = null; 133 synchronized (mAudioTrackLock) { 134 track = mAudioTrack; 135 } 136 if (track == null) { 137 if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]"); 138 return; 139 } 140 141 // For "small" audio tracks, we have to stop() them to make them mixable, 142 // else the audio subsystem will wait indefinitely for us to fill the buffer 143 // before rendering the track mixable. 144 // 145 // If mStopped is true, the track would already have been stopped, so not 146 // much point not doing that again. 147 if (mBytesWritten < mAudioBufferSize && !mStopped) { 148 if (DBG) { 149 Log.d(TAG, "Stopping audio track to flush audio, state was : " + 150 track.getPlayState() + ",stopped= " + mStopped); 151 } 152 153 mIsShortUtterance = true; 154 track.stop(); 155 } 156 157 // Block until the audio track is done only if we haven't stopped yet. 158 if (!mStopped) { 159 if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode()); 160 blockUntilDone(mAudioTrack); 161 } 162 163 // The last call to AudioTrack.write( ) will return only after 164 // all data from the audioTrack has been sent to the mixer, so 165 // it's safe to release at this point. 166 if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]"); 167 synchronized(mAudioTrackLock) { 168 mAudioTrack = null; 169 } 170 track.release(); 171 } 172 173 getChannelConfig(int channelCount)174 static int getChannelConfig(int channelCount) { 175 if (channelCount == 1) { 176 return AudioFormat.CHANNEL_OUT_MONO; 177 } else if (channelCount == 2){ 178 return AudioFormat.CHANNEL_OUT_STEREO; 179 } 180 181 return 0; 182 } 183 getAudioLengthMs(int numBytes)184 long getAudioLengthMs(int numBytes) { 185 final int unconsumedFrames = numBytes / mBytesPerFrame; 186 final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz; 187 188 return estimatedTimeMs; 189 } 190 writeToAudioTrack(AudioTrack audioTrack, byte[] bytes)191 private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) { 192 if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { 193 if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode()); 194 audioTrack.play(); 195 } 196 197 int count = 0; 198 while (count < bytes.length) { 199 // Note that we don't take bufferCopy.mOffset into account because 200 // it is guaranteed to be 0. 201 int written = audioTrack.write(bytes, count, bytes.length); 202 if (written <= 0) { 203 break; 204 } 205 count += written; 206 } 207 return count; 208 } 209 createStreamingAudioTrack()210 private AudioTrack createStreamingAudioTrack() { 211 final int channelConfig = getChannelConfig(mChannelCount); 212 213 int minBufferSizeInBytes 214 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat); 215 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); 216 217 AudioFormat audioFormat = (new AudioFormat.Builder()) 218 .setChannelMask(channelConfig) 219 .setEncoding(mAudioFormat) 220 .setSampleRate(mSampleRateInHz).build(); 221 AudioTrack audioTrack = new AudioTrack(mAudioParams.mAudioAttributes, 222 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM, 223 mAudioParams.mSessionId); 224 225 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { 226 Log.w(TAG, "Unable to create audio track."); 227 audioTrack.release(); 228 return null; 229 } 230 231 mAudioBufferSize = bufferSizeInBytes; 232 233 setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan); 234 return audioTrack; 235 } 236 blockUntilDone(AudioTrack audioTrack)237 private void blockUntilDone(AudioTrack audioTrack) { 238 if (mBytesWritten <= 0) { 239 return; 240 } 241 242 if (mIsShortUtterance) { 243 // In this case we would have called AudioTrack#stop() to flush 244 // buffers to the mixer. This makes the playback head position 245 // unobservable and notification markers do not work reliably. We 246 // have no option but to wait until we think the track would finish 247 // playing and release it after. 248 // 249 // This isn't as bad as it looks because (a) We won't end up waiting 250 // for much longer than we should because even at 4khz mono, a short 251 // utterance weighs in at about 2 seconds, and (b) such short utterances 252 // are expected to be relatively infrequent and in a stream of utterances 253 // this shows up as a slightly longer pause. 254 blockUntilEstimatedCompletion(); 255 } else { 256 blockUntilCompletion(audioTrack); 257 } 258 } 259 blockUntilEstimatedCompletion()260 private void blockUntilEstimatedCompletion() { 261 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 262 final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz); 263 264 if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); 265 266 try { 267 Thread.sleep(estimatedTimeMs); 268 } catch (InterruptedException ie) { 269 // Do nothing. 270 } 271 } 272 blockUntilCompletion(AudioTrack audioTrack)273 private void blockUntilCompletion(AudioTrack audioTrack) { 274 final int lengthInFrames = mBytesWritten / mBytesPerFrame; 275 276 int previousPosition = -1; 277 int currentPosition = 0; 278 long blockedTimeMs = 0; 279 280 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && 281 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) { 282 283 final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / 284 audioTrack.getSampleRate(); 285 final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); 286 287 // Check if the audio track has made progress since the last loop 288 // iteration. We should then add in the amount of time that was 289 // spent sleeping in the last iteration. 290 if (currentPosition == previousPosition) { 291 // This works only because the sleep time that would have been calculated 292 // would be the same in the previous iteration too. 293 blockedTimeMs += sleepTimeMs; 294 // If we've taken too long to make progress, bail. 295 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { 296 Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + 297 "for AudioTrack to make progress, Aborting"); 298 break; 299 } 300 } else { 301 blockedTimeMs = 0; 302 } 303 previousPosition = currentPosition; 304 305 if (DBG) { 306 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + 307 " Playback position : " + currentPosition + ", Length in frames : " 308 + lengthInFrames); 309 } 310 try { 311 Thread.sleep(sleepTimeMs); 312 } catch (InterruptedException ie) { 313 break; 314 } 315 } 316 } 317 setupVolume(AudioTrack audioTrack, float volume, float pan)318 private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { 319 final float vol = clip(volume, 0.0f, 1.0f); 320 final float panning = clip(pan, -1.0f, 1.0f); 321 322 float volLeft = vol; 323 float volRight = vol; 324 if (panning > 0.0f) { 325 volLeft *= (1.0f - panning); 326 } else if (panning < 0.0f) { 327 volRight *= (1.0f + panning); 328 } 329 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); 330 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { 331 Log.e(TAG, "Failed to set volume"); 332 } 333 } 334 clip(long value, long min, long max)335 private static final long clip(long value, long min, long max) { 336 return value < min ? min : (value < max ? value : max); 337 } 338 clip(float value, float min, float max)339 private static final float clip(float value, float min, float max) { 340 return value < min ? min : (value < max ? value : max); 341 } 342 343 } 344