1 // Copyright 2011 Google Inc. All Rights Reserved.
2 
3 package android.speech.tts;
4 
5 import android.media.AudioFormat;
6 import android.media.AudioTrack;
7 import android.speech.tts.TextToSpeechService.AudioOutputParams;
8 import android.util.Log;
9 
10 /**
11  * Exposes parts of the {@link AudioTrack} API by delegating calls to an
12  * underlying {@link AudioTrack}. Additionally, provides methods like
13  * {@link #waitAndRelease()} that will block until all audiotrack
14  * data has been flushed to the mixer, and is estimated to have completed
15  * playback.
16  */
17 class BlockingAudioTrack {
18     private static final String TAG = "TTS.BlockingAudioTrack";
19     private static final boolean DBG = false;
20 
21 
22     /**
23      * The minimum increment of time to wait for an AudioTrack to finish
24      * playing.
25      */
26     private static final long MIN_SLEEP_TIME_MS = 20;
27 
28     /**
29      * The maximum increment of time to sleep while waiting for an AudioTrack
30      * to finish playing.
31      */
32     private static final long MAX_SLEEP_TIME_MS = 2500;
33 
34     /**
35      * The maximum amount of time to wait for an audio track to make progress while
36      * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
37      * could happen in exceptional circumstances like a media_server crash.
38      */
39     private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;
40 
41     /**
42      * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
43      * we create.
44      */
45     private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
46 
47 
48     private final AudioOutputParams mAudioParams;
49     private final int mSampleRateInHz;
50     private final int mAudioFormat;
51     private final int mChannelCount;
52 
53 
54     private final int mBytesPerFrame;
55     /**
56      * A "short utterance" is one that uses less bytes than the audio
57      * track buffer size (mAudioBufferSize). In this case, we need to call
58      * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
59      * different logic is required to wait for the track to finish.
60      *
61      * Not volatile, accessed only from the audio playback thread.
62      */
63     private boolean mIsShortUtterance;
64     /**
65      * Will be valid after a call to {@link #init()}.
66      */
67     private int mAudioBufferSize;
68     private int mBytesWritten = 0;
69 
70     // Need to be seen by stop() which can be called from another thread. mAudioTrack will be
71     // set to null only after waitAndRelease().
72     private Object mAudioTrackLock = new Object();
73     private AudioTrack mAudioTrack;
74     private volatile boolean mStopped;
75 
76     private int mSessionId;
77 
BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate, int audioFormat, int channelCount)78     BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate,
79             int audioFormat, int channelCount) {
80         mAudioParams = audioParams;
81         mSampleRateInHz = sampleRate;
82         mAudioFormat = audioFormat;
83         mChannelCount = channelCount;
84 
85         mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount;
86         mIsShortUtterance = false;
87         mAudioBufferSize = 0;
88         mBytesWritten = 0;
89 
90         mAudioTrack = null;
91         mStopped = false;
92     }
93 
init()94     public boolean init() {
95         AudioTrack track = createStreamingAudioTrack();
96         synchronized (mAudioTrackLock) {
97             mAudioTrack = track;
98         }
99 
100         if (track == null) {
101             return false;
102         } else {
103             return true;
104         }
105     }
106 
stop()107     public void stop() {
108         synchronized (mAudioTrackLock) {
109             if (mAudioTrack != null) {
110                 mAudioTrack.stop();
111             }
112             mStopped = true;
113         }
114     }
115 
write(byte[] data)116     public int write(byte[] data) {
117         AudioTrack track = null;
118         synchronized (mAudioTrackLock) {
119             track = mAudioTrack;
120         }
121 
122         if (track == null || mStopped) {
123             return -1;
124         }
125         final int bytesWritten = writeToAudioTrack(track, data);
126 
127         mBytesWritten += bytesWritten;
128         return bytesWritten;
129     }
130 
waitAndRelease()131     public void waitAndRelease() {
132         AudioTrack track = null;
133         synchronized (mAudioTrackLock) {
134             track = mAudioTrack;
135         }
136         if (track == null) {
137             if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]");
138             return;
139         }
140 
141         // For "small" audio tracks, we have to stop() them to make them mixable,
142         // else the audio subsystem will wait indefinitely for us to fill the buffer
143         // before rendering the track mixable.
144         //
145         // If mStopped is true, the track would already have been stopped, so not
146         // much point not doing that again.
147         if (mBytesWritten < mAudioBufferSize && !mStopped) {
148             if (DBG) {
149                 Log.d(TAG, "Stopping audio track to flush audio, state was : " +
150                         track.getPlayState() + ",stopped= " + mStopped);
151             }
152 
153             mIsShortUtterance = true;
154             track.stop();
155         }
156 
157         // Block until the audio track is done only if we haven't stopped yet.
158         if (!mStopped) {
159             if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
160             blockUntilDone(mAudioTrack);
161         }
162 
163         // The last call to AudioTrack.write( ) will return only after
164         // all data from the audioTrack has been sent to the mixer, so
165         // it's safe to release at this point.
166         if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]");
167         synchronized(mAudioTrackLock) {
168             mAudioTrack = null;
169         }
170         track.release();
171     }
172 
173 
getChannelConfig(int channelCount)174     static int getChannelConfig(int channelCount) {
175         if (channelCount == 1) {
176             return AudioFormat.CHANNEL_OUT_MONO;
177         } else if (channelCount == 2){
178             return AudioFormat.CHANNEL_OUT_STEREO;
179         }
180 
181         return 0;
182     }
183 
getAudioLengthMs(int numBytes)184     long getAudioLengthMs(int numBytes) {
185         final int unconsumedFrames = numBytes / mBytesPerFrame;
186         final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;
187 
188         return estimatedTimeMs;
189     }
190 
writeToAudioTrack(AudioTrack audioTrack, byte[] bytes)191     private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
192         if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
193             if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
194             audioTrack.play();
195         }
196 
197         int count = 0;
198         while (count < bytes.length) {
199             // Note that we don't take bufferCopy.mOffset into account because
200             // it is guaranteed to be 0.
201             int written = audioTrack.write(bytes, count, bytes.length);
202             if (written <= 0) {
203                 break;
204             }
205             count += written;
206         }
207         return count;
208     }
209 
createStreamingAudioTrack()210     private AudioTrack createStreamingAudioTrack() {
211         final int channelConfig = getChannelConfig(mChannelCount);
212 
213         int minBufferSizeInBytes
214                 = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
215         int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
216 
217         AudioFormat audioFormat = (new AudioFormat.Builder())
218                 .setChannelMask(channelConfig)
219                 .setEncoding(mAudioFormat)
220                 .setSampleRate(mSampleRateInHz).build();
221         AudioTrack audioTrack = new AudioTrack(mAudioParams.mAudioAttributes,
222                 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM,
223                 mAudioParams.mSessionId);
224 
225         if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
226             Log.w(TAG, "Unable to create audio track.");
227             audioTrack.release();
228             return null;
229         }
230 
231         mAudioBufferSize = bufferSizeInBytes;
232 
233         setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan);
234         return audioTrack;
235     }
236 
blockUntilDone(AudioTrack audioTrack)237     private void blockUntilDone(AudioTrack audioTrack) {
238         if (mBytesWritten <= 0) {
239             return;
240         }
241 
242         if (mIsShortUtterance) {
243             // In this case we would have called AudioTrack#stop() to flush
244             // buffers to the mixer. This makes the playback head position
245             // unobservable and notification markers do not work reliably. We
246             // have no option but to wait until we think the track would finish
247             // playing and release it after.
248             //
249             // This isn't as bad as it looks because (a) We won't end up waiting
250             // for much longer than we should because even at 4khz mono, a short
251             // utterance weighs in at about 2 seconds, and (b) such short utterances
252             // are expected to be relatively infrequent and in a stream of utterances
253             // this shows up as a slightly longer pause.
254             blockUntilEstimatedCompletion();
255         } else {
256             blockUntilCompletion(audioTrack);
257         }
258     }
259 
blockUntilEstimatedCompletion()260     private void blockUntilEstimatedCompletion() {
261         final int lengthInFrames = mBytesWritten / mBytesPerFrame;
262         final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);
263 
264         if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");
265 
266         try {
267             Thread.sleep(estimatedTimeMs);
268         } catch (InterruptedException ie) {
269             // Do nothing.
270         }
271     }
272 
blockUntilCompletion(AudioTrack audioTrack)273     private void blockUntilCompletion(AudioTrack audioTrack) {
274         final int lengthInFrames = mBytesWritten / mBytesPerFrame;
275 
276         int previousPosition = -1;
277         int currentPosition = 0;
278         long blockedTimeMs = 0;
279 
280         while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
281                 audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {
282 
283             final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
284                     audioTrack.getSampleRate();
285             final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);
286 
287             // Check if the audio track has made progress since the last loop
288             // iteration. We should then add in the amount of time that was
289             // spent sleeping in the last iteration.
290             if (currentPosition == previousPosition) {
291                 // This works only because the sleep time that would have been calculated
292                 // would be the same in the previous iteration too.
293                 blockedTimeMs += sleepTimeMs;
294                 // If we've taken too long to make progress, bail.
295                 if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
296                     Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
297                             "for AudioTrack to make progress, Aborting");
298                     break;
299                 }
300             } else {
301                 blockedTimeMs = 0;
302             }
303             previousPosition = currentPosition;
304 
305             if (DBG) {
306                 Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
307                         " Playback position : " + currentPosition + ", Length in frames : "
308                         + lengthInFrames);
309             }
310             try {
311                 Thread.sleep(sleepTimeMs);
312             } catch (InterruptedException ie) {
313                 break;
314             }
315         }
316     }
317 
setupVolume(AudioTrack audioTrack, float volume, float pan)318     private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
319         final float vol = clip(volume, 0.0f, 1.0f);
320         final float panning = clip(pan, -1.0f, 1.0f);
321 
322         float volLeft = vol;
323         float volRight = vol;
324         if (panning > 0.0f) {
325             volLeft *= (1.0f - panning);
326         } else if (panning < 0.0f) {
327             volRight *= (1.0f + panning);
328         }
329         if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
330         if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
331             Log.e(TAG, "Failed to set volume");
332         }
333     }
334 
clip(long value, long min, long max)335     private static final long clip(long value, long min, long max) {
336         return value < min ? min : (value < max ? value : max);
337     }
338 
clip(float value, float min, float max)339     private static final float clip(float value, float min, float max) {
340         return value < min ? min : (value < max ? value : max);
341     }
342 
343 }
344