1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.car.assist.client.tts; 18 19 import android.content.Context; 20 import android.media.AudioAttributes; 21 import android.media.AudioFocusRequest; 22 import android.media.AudioManager; 23 import android.os.Handler; 24 import android.speech.tts.TextToSpeech; 25 import android.speech.tts.UtteranceProgressListener; 26 import android.util.Log; 27 import android.util.Pair; 28 29 import androidx.annotation.VisibleForTesting; 30 31 import java.util.HashMap; 32 import java.util.List; 33 import java.util.Map; 34 import java.util.concurrent.TimeUnit; 35 import java.util.function.BiConsumer; 36 37 /** 38 * Component that wraps platform TTS engine and supports play-out of batches of text. 39 * <p> 40 * It takes care of setting up TTS Engine when text is played out and shutting it down after an idle 41 * period with no play-out. This is desirable since the owning app is long-lived and the TTS Engine 42 * brings up another service-process. 43 * <p> 44 * As batches of text are played-out, they issue callbacks on the {@link Listener} provided with the 45 * batch. 46 */ 47 public class TextToSpeechHelper { 48 /** 49 * Listener interface used by clients to be notified as batch of text is played out. 50 */ 51 public interface Listener { 52 /** 53 * Called when play-out starts for batch. May never get called if batch has errors or 54 * interruptions. 55 */ onTextToSpeechStarted(long requestId)56 void onTextToSpeechStarted(long requestId); 57 58 /** 59 * Called when play-out ends for batch. 60 * 61 * @param error Whether play-out ended due to an error or not. Note: if it was aborted, it's 62 * not considered an error. 63 */ onTextToSpeechStopped(long requestId, boolean error)64 void onTextToSpeechStopped(long requestId, boolean error); 65 } 66 67 private static final String TAG = "CM#TextToSpeechHelper"; 68 69 private static final String UTTERANCE_ID_SEPARATOR = ";"; 70 private static final long DEFAULT_SHUTDOWN_DELAY_MILLIS = TimeUnit.MINUTES.toMillis(1); 71 72 private final Map<String, BatchListener> mListeners = new HashMap<>(); 73 private final Handler mHandler = new Handler(); 74 private final Context mContext; 75 private final TextToSpeechHelper.Listener mListener; 76 private final AudioManager.OnAudioFocusChangeListener mNoOpListener = (f) -> { /* NO-OP */ }; 77 private final AudioManager mAudioManager; 78 private final AudioAttributes mAudioAttributes; 79 private final AudioFocusRequest mAudioFocusRequest; 80 private final long mShutdownDelayMillis; 81 private TextToSpeechEngine mTextToSpeechEngine; 82 private int mInitStatus; 83 private SpeechRequest mPendingRequest; 84 private String mCurrentBatchId; 85 86 private final Runnable mMaybeShutdownRunnable = new Runnable() { 87 @Override 88 public void run() { 89 if (mListeners.isEmpty() || mPendingRequest == null) { 90 shutdownEngine(); 91 } else { 92 mHandler.postDelayed(this, mShutdownDelayMillis); 93 } 94 } 95 }; 96 TextToSpeechHelper(Context context, TextToSpeechHelper.Listener listener)97 public TextToSpeechHelper(Context context, TextToSpeechHelper.Listener listener) { 98 this(context, new AndroidTextToSpeechEngine(), DEFAULT_SHUTDOWN_DELAY_MILLIS, listener); 99 } 100 101 @VisibleForTesting TextToSpeechHelper(Context context, TextToSpeechEngine ttsEngine, long shutdownDelayMillis, TextToSpeechHelper.Listener listener)102 TextToSpeechHelper(Context context, TextToSpeechEngine ttsEngine, long shutdownDelayMillis, 103 TextToSpeechHelper.Listener listener) { 104 mContext = context; 105 mAudioManager = (AudioManager) mContext.getSystemService(Context.AUDIO_SERVICE); 106 mTextToSpeechEngine = ttsEngine; 107 mShutdownDelayMillis = shutdownDelayMillis; 108 // OnInitListener will only set to SUCCESS/ERROR. So we initialize to STOPPED. 109 mInitStatus = TextToSpeech.STOPPED; 110 mListener = listener; 111 mAudioAttributes = new AudioAttributes.Builder() 112 .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) 113 .setUsage(AudioAttributes.USAGE_ASSISTANT) 114 .build(); 115 mAudioFocusRequest = new AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT) 116 .setAudioAttributes(mAudioAttributes) 117 .setOnAudioFocusChangeListener(mNoOpListener) 118 .build(); 119 } 120 maybeInitAndKeepAlive()121 private void maybeInitAndKeepAlive() { 122 if (!mTextToSpeechEngine.isInitialized()) { 123 if (Log.isLoggable(TAG, Log.DEBUG)) { 124 Log.d(TAG, "Initializing TTS Engine"); 125 } 126 mTextToSpeechEngine.initialize(mContext, this::handleInitCompleted); 127 mTextToSpeechEngine.setOnUtteranceProgressListener(mProgressListener); 128 mTextToSpeechEngine.setAudioAttributes(mAudioAttributes); 129 } 130 // Since we're handling a request, delay engine shutdown. 131 mHandler.removeCallbacks(mMaybeShutdownRunnable); 132 mHandler.postDelayed(mMaybeShutdownRunnable, mShutdownDelayMillis); 133 } 134 handleInitCompleted(int initStatus)135 private void handleInitCompleted(int initStatus) { 136 if (Log.isLoggable(TAG, Log.DEBUG)) { 137 Log.d(TAG, String.format("Init completed. Status: %d", initStatus)); 138 } 139 mInitStatus = initStatus; 140 if (mPendingRequest != null) { 141 playInternal(mPendingRequest.mTextToSpeak, mPendingRequest.mRequestId); 142 mPendingRequest = null; 143 } 144 } 145 146 /** 147 * Plays out given batch of text. If engine is not active, it is setup and the request is stored 148 * until then. Only one batch is supported at a time; If a previous batch is waiting engine 149 * setup, that batch is dropped. If a previous batch is playing, the play-out is stopped and 150 * next one is passed to the TTS Engine. Callbacks are issued on the provided {@code listener}. 151 * Will request audio focus first, failure will trigger onAudioFocusFailed in listener. 152 * <p/> 153 * NOTE: Underlying engine may have limit on length of text in each element of the batch; it 154 * will reject anything longer. See {@link TextToSpeech#getMaxSpeechInputLength()}. 155 * 156 * @param textToSpeak Batch of text to play-out. 157 * @param requestId The tracking request id 158 * @return true if the request to play was successful 159 */ requestPlay(List<CharSequence> textToSpeak, long requestId)160 public boolean requestPlay(List<CharSequence> textToSpeak, long requestId) { 161 if (textToSpeak.isEmpty()) { 162 /* no-op */ 163 return true; 164 } 165 int result = mAudioManager.requestAudioFocus(mAudioFocusRequest); 166 if (result != AudioManager.AUDIOFOCUS_REQUEST_GRANTED) { 167 return false; 168 } 169 maybeInitAndKeepAlive(); 170 171 // Check if its still initializing. 172 if (mInitStatus == TextToSpeech.STOPPED) { 173 // Squash any already queued request. 174 if (mPendingRequest != null) { 175 onTtsStopped(requestId, /* error= */ false); 176 } 177 mPendingRequest = new SpeechRequest(textToSpeak, requestId); 178 } else { 179 playInternal(textToSpeak, requestId); 180 } 181 return true; 182 } 183 184 /** Requests that all play-out be stopped. */ requestStop()185 public void requestStop() { 186 mTextToSpeechEngine.stop(); 187 mCurrentBatchId = null; 188 } 189 isSpeaking()190 public boolean isSpeaking() { 191 return mTextToSpeechEngine.isSpeaking(); 192 } 193 194 // wrap call back to listener.onTextToSpeechStopped with adandonAudioFocus. onTtsStopped(long requestId, boolean error)195 private void onTtsStopped(long requestId, boolean error) { 196 mAudioManager.abandonAudioFocusRequest(mAudioFocusRequest); 197 mHandler.post(() -> mListener.onTextToSpeechStopped(requestId, error)); 198 } 199 playInternal(List<CharSequence> textToSpeak, long requestId)200 private void playInternal(List<CharSequence> textToSpeak, long requestId) { 201 if (mInitStatus == TextToSpeech.ERROR) { 202 Log.e(TAG, "TTS setup failed!"); 203 onTtsStopped(requestId, /* error= */ true); 204 return; 205 } 206 207 // Abort anything currently playing and flushes queue. 208 mTextToSpeechEngine.stop(); 209 210 // Queue up new batch. We assign id's = "batchId;index" where index increments from 0 211 // to batchSize - 1. If queueing fails, we abort the whole batch. 212 mCurrentBatchId = Long.toString(requestId); 213 for (int i = 0; i < textToSpeak.size(); i++) { 214 CharSequence text = textToSpeak.get(i); 215 String utteranceId = 216 String.format("%s%s%d", mCurrentBatchId, UTTERANCE_ID_SEPARATOR, i); 217 if (Log.isLoggable(TAG, Log.DEBUG)) { 218 Log.d(TAG, String.format("Queueing tts: '%s' [%s]", text, utteranceId)); 219 } 220 if (mTextToSpeechEngine.speak(text, TextToSpeech.QUEUE_ADD, /* params= */ null, 221 utteranceId) != TextToSpeech.SUCCESS) { 222 mTextToSpeechEngine.stop(); 223 mCurrentBatchId = null; 224 Log.e(TAG, "Queuing text failed!"); 225 onTtsStopped(requestId, /* error= */ true); 226 return; 227 } 228 } 229 // Register BatchListener for entire batch. Will invoke callbacks on Listener as batch 230 // progresses. 231 mListeners.put(mCurrentBatchId, new BatchListener(requestId, textToSpeak.size())); 232 } 233 234 /** 235 * Releases resources and shuts down TTS Engine. 236 */ cleanup()237 public void cleanup() { 238 mHandler.removeCallbacksAndMessages(/* token= */ null); 239 shutdownEngine(); 240 } 241 242 /** Returns the stream used by the TTS engine. */ getStream()243 public int getStream() { 244 return mTextToSpeechEngine.getStream(); 245 } 246 shutdownEngine()247 private void shutdownEngine() { 248 if (mTextToSpeechEngine.isInitialized()) { 249 if (Log.isLoggable(TAG, Log.DEBUG)) { 250 Log.d(TAG, "Shutting down TTS Engine"); 251 } 252 mTextToSpeechEngine.stop(); 253 mTextToSpeechEngine.shutdown(); 254 mInitStatus = TextToSpeech.STOPPED; 255 } 256 } 257 parse(String utteranceId)258 private static Pair<String, Integer> parse(String utteranceId) { 259 try { 260 String[] pair = utteranceId.split(UTTERANCE_ID_SEPARATOR); 261 String batchId = pair[0]; 262 int index = Integer.valueOf(pair[1]); 263 return Pair.create(batchId, index); 264 } catch (IndexOutOfBoundsException | NumberFormatException e) { 265 throw new IllegalArgumentException( 266 String.format("Utterance ID is invalid: %s.", utteranceId) 267 ); 268 } 269 } 270 271 // Handles all callbacks from TextToSpeechEngine. Possible order of callbacks: 272 // - onStart, onDone: successful play-out. 273 // - onStart, onStop: play-out starts, but interrupted. 274 // - onStart, onError: play-out starts and fails. 275 // - onStop: play-out never starts, but aborted. 276 // - onError: play-out never starts, but fails. 277 // Since the callbacks arrive on other threads, they are dispatched onto mHandler where the 278 // appropriate BatchListener is invoked. 279 private final UtteranceProgressListener mProgressListener = new UtteranceProgressListener() { 280 private void safeInvokeAsync(String utteranceId, 281 BiConsumer<BatchListener, Pair<String, Integer>> callback) { 282 mHandler.post(() -> { 283 Pair<String, Integer> parsedId = parse(utteranceId); 284 BatchListener listener = mListeners.get(parsedId.first); 285 if (listener != null) { 286 callback.accept(listener, parsedId); 287 } else { 288 if (Log.isLoggable(TAG, Log.DEBUG)) { 289 Log.d(TAG, "Missing batch listener: " + utteranceId); 290 } 291 } 292 }); 293 } 294 295 @Override 296 public void onStart(String utteranceId) { 297 if (Log.isLoggable(TAG, Log.DEBUG)) { 298 Log.d(TAG, "TTS onStart: " + utteranceId); 299 } 300 mHandler.post(() -> { 301 Pair<String, Integer> parsedId = parse(utteranceId); 302 BatchListener listener = mListeners.get(parsedId.first); 303 if (listener != null) { 304 listener.onStart(); 305 } else { 306 if (Log.isLoggable(TAG, Log.DEBUG)) { 307 Log.d(TAG, "Missing batch listener: " + utteranceId); 308 } 309 } 310 }); 311 } 312 313 @Override 314 public void onDone(String utteranceId) { 315 if (Log.isLoggable(TAG, Log.DEBUG)) { 316 Log.d(TAG, "TTS onDone: " + utteranceId); 317 } 318 safeInvokeAsync(utteranceId, BatchListener::onDone); 319 } 320 321 @Override 322 public void onStop(String utteranceId, boolean interrupted) { 323 if (Log.isLoggable(TAG, Log.DEBUG)) { 324 Log.d(TAG, "TTS onStop: " + utteranceId); 325 } 326 safeInvokeAsync(utteranceId, BatchListener::onStop); 327 } 328 329 @Override 330 public void onError(String utteranceId) { 331 if (Log.isLoggable(TAG, Log.DEBUG)) { 332 Log.d(TAG, "TTS onError: " + utteranceId); 333 } 334 safeInvokeAsync(utteranceId, BatchListener::onError); 335 } 336 }; 337 338 /** 339 * Handles callbacks for a single batch of TTS text and issues callbacks on wrapped 340 * {@link Listener} that client is listening on. 341 */ 342 private class BatchListener { 343 private boolean mBatchStarted; 344 private final long mRequestId; 345 private final int mUtteranceCount; 346 BatchListener(long requestId, int utteranceCount)347 BatchListener(long requestId, int utteranceCount) { 348 mRequestId = requestId; 349 mUtteranceCount = utteranceCount; 350 } 351 352 // Issues Listener.onTextToSpeechStarted when first item of batch starts. onStart()353 void onStart() { 354 if (!mBatchStarted) { 355 mBatchStarted = true; 356 mListener.onTextToSpeechStarted(mRequestId); 357 } 358 } 359 360 // Issues Listener.onTextToSpeechStopped when last item of batch finishes. onDone(Pair<String, Integer> parsedId)361 void onDone(Pair<String, Integer> parsedId) { 362 // parseId is zero-indexed, mUtteranceCount is not. 363 if (parsedId.second == (mUtteranceCount - 1)) { 364 handleBatchFinished(parsedId, /* error= */ false); 365 } 366 } 367 368 // If any item of batch fails, abort the batch and issue Listener.onTextToSpeechStopped. onError(Pair<String, Integer> parsedId)369 void onError(Pair<String, Integer> parsedId) { 370 if (parsedId.first.equals(mCurrentBatchId)) { 371 mTextToSpeechEngine.stop(); 372 } 373 handleBatchFinished(parsedId, /* error= */ true); 374 } 375 376 // If any item of batch is preempted (rest should also be), 377 // issue Listener.onTextToSpeechStopped. onStop(Pair<String, Integer> parsedId)378 void onStop(Pair<String, Integer> parsedId) { 379 handleBatchFinished(parsedId, /* error= */ false); 380 } 381 382 // Handles terminal callbacks for the batch. We invoke stopped and remove ourselves. 383 // No further callbacks will be handled for the batch. handleBatchFinished(Pair<String, Integer> parsedId, boolean error)384 private void handleBatchFinished(Pair<String, Integer> parsedId, boolean error) { 385 onTtsStopped(mRequestId, error); 386 mListeners.remove(parsedId.first); 387 } 388 } 389 390 private static class SpeechRequest { 391 final List<CharSequence> mTextToSpeak; 392 final long mRequestId; 393 SpeechRequest(List<CharSequence> textToSpeak, long requestId)394 SpeechRequest(List<CharSequence> textToSpeak, long requestId) { 395 mTextToSpeak = textToSpeak; 396 mRequestId = requestId; 397 } 398 } 399 } 400