1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.car.assist.client.tts;
18 
19 import android.content.Context;
20 import android.media.AudioAttributes;
21 import android.media.AudioFocusRequest;
22 import android.media.AudioManager;
23 import android.os.Handler;
24 import android.speech.tts.TextToSpeech;
25 import android.speech.tts.UtteranceProgressListener;
26 import android.util.Log;
27 import android.util.Pair;
28 
29 import androidx.annotation.VisibleForTesting;
30 
31 import java.util.HashMap;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.concurrent.TimeUnit;
35 import java.util.function.BiConsumer;
36 
37 /**
38  * Component that wraps platform TTS engine and supports play-out of batches of text.
39  * <p>
40  * It takes care of setting up TTS Engine when text is played out and shutting it down after an idle
41  * period with no play-out. This is desirable since the owning app is long-lived and the TTS Engine
42  * brings up another service-process.
43  * <p>
44  * As batches of text are played-out, they issue callbacks on the {@link Listener} provided with the
45  * batch.
46  */
47 public class TextToSpeechHelper {
48     /**
49      * Listener interface used by clients to be notified as batch of text is played out.
50      */
51     public interface Listener {
52         /**
53          * Called when play-out starts for batch. May never get called if batch has errors or
54          * interruptions.
55          */
onTextToSpeechStarted(long requestId)56         void onTextToSpeechStarted(long requestId);
57 
58         /**
59          * Called when play-out ends for batch.
60          *
61          * @param error Whether play-out ended due to an error or not. Note: if it was aborted, it's
62          *              not considered an error.
63          */
onTextToSpeechStopped(long requestId, boolean error)64         void onTextToSpeechStopped(long requestId, boolean error);
65     }
66 
67     private static final String TAG = "CM#TextToSpeechHelper";
68 
69     private static final String UTTERANCE_ID_SEPARATOR = ";";
70     private static final long DEFAULT_SHUTDOWN_DELAY_MILLIS = TimeUnit.MINUTES.toMillis(1);
71 
72     private final Map<String, BatchListener> mListeners = new HashMap<>();
73     private final Handler mHandler = new Handler();
74     private final Context mContext;
75     private final TextToSpeechHelper.Listener mListener;
76     private final AudioManager.OnAudioFocusChangeListener mNoOpListener = (f) -> { /* NO-OP */ };
77     private final AudioManager mAudioManager;
78     private final AudioAttributes mAudioAttributes;
79     private final AudioFocusRequest mAudioFocusRequest;
80     private final long mShutdownDelayMillis;
81     private TextToSpeechEngine mTextToSpeechEngine;
82     private int mInitStatus;
83     private SpeechRequest mPendingRequest;
84     private String mCurrentBatchId;
85 
86     private final Runnable mMaybeShutdownRunnable = new Runnable() {
87         @Override
88         public void run() {
89             if (mListeners.isEmpty() || mPendingRequest == null) {
90                 shutdownEngine();
91             } else {
92                 mHandler.postDelayed(this, mShutdownDelayMillis);
93             }
94         }
95     };
96 
TextToSpeechHelper(Context context, TextToSpeechHelper.Listener listener)97     public TextToSpeechHelper(Context context, TextToSpeechHelper.Listener listener) {
98         this(context, new AndroidTextToSpeechEngine(), DEFAULT_SHUTDOWN_DELAY_MILLIS, listener);
99     }
100 
101     @VisibleForTesting
TextToSpeechHelper(Context context, TextToSpeechEngine ttsEngine, long shutdownDelayMillis, TextToSpeechHelper.Listener listener)102     TextToSpeechHelper(Context context, TextToSpeechEngine ttsEngine, long shutdownDelayMillis,
103             TextToSpeechHelper.Listener listener) {
104         mContext = context;
105         mAudioManager = (AudioManager) mContext.getSystemService(Context.AUDIO_SERVICE);
106         mTextToSpeechEngine = ttsEngine;
107         mShutdownDelayMillis = shutdownDelayMillis;
108         // OnInitListener will only set to SUCCESS/ERROR. So we initialize to STOPPED.
109         mInitStatus = TextToSpeech.STOPPED;
110         mListener = listener;
111         mAudioAttributes =  new AudioAttributes.Builder()
112                 .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
113                 .setUsage(AudioAttributes.USAGE_ASSISTANT)
114                 .build();
115         mAudioFocusRequest = new AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT)
116                 .setAudioAttributes(mAudioAttributes)
117                 .setOnAudioFocusChangeListener(mNoOpListener)
118                 .build();
119     }
120 
maybeInitAndKeepAlive()121     private void maybeInitAndKeepAlive() {
122         if (!mTextToSpeechEngine.isInitialized()) {
123             if (Log.isLoggable(TAG, Log.DEBUG)) {
124                 Log.d(TAG, "Initializing TTS Engine");
125             }
126             mTextToSpeechEngine.initialize(mContext, this::handleInitCompleted);
127             mTextToSpeechEngine.setOnUtteranceProgressListener(mProgressListener);
128             mTextToSpeechEngine.setAudioAttributes(mAudioAttributes);
129         }
130         // Since we're handling a request, delay engine shutdown.
131         mHandler.removeCallbacks(mMaybeShutdownRunnable);
132         mHandler.postDelayed(mMaybeShutdownRunnable, mShutdownDelayMillis);
133     }
134 
handleInitCompleted(int initStatus)135     private void handleInitCompleted(int initStatus) {
136         if (Log.isLoggable(TAG, Log.DEBUG)) {
137             Log.d(TAG, String.format("Init completed. Status: %d", initStatus));
138         }
139         mInitStatus = initStatus;
140         if (mPendingRequest != null) {
141             playInternal(mPendingRequest.mTextToSpeak, mPendingRequest.mRequestId);
142             mPendingRequest = null;
143         }
144     }
145 
146     /**
147      * Plays out given batch of text. If engine is not active, it is setup and the request is stored
148      * until then. Only one batch is supported at a time; If a previous batch is waiting engine
149      * setup, that batch is dropped. If a previous batch is playing, the play-out is stopped and
150      * next one is passed to the TTS Engine. Callbacks are issued on the provided {@code listener}.
151      * Will request audio focus first, failure will trigger onAudioFocusFailed in listener.
152      * <p/>
153      * NOTE: Underlying engine may have limit on length of text in each element of the batch; it
154      * will reject anything longer. See {@link TextToSpeech#getMaxSpeechInputLength()}.
155      *
156      * @param textToSpeak Batch of text to play-out.
157      * @param requestId The tracking request id
158      * @return true if the request to play was successful
159      */
requestPlay(List<CharSequence> textToSpeak, long requestId)160     public boolean requestPlay(List<CharSequence> textToSpeak, long requestId) {
161         if (textToSpeak.isEmpty()) {
162             /* no-op */
163             return true;
164         }
165         int result = mAudioManager.requestAudioFocus(mAudioFocusRequest);
166         if (result != AudioManager.AUDIOFOCUS_REQUEST_GRANTED) {
167             return false;
168         }
169         maybeInitAndKeepAlive();
170 
171         // Check if its still initializing.
172         if (mInitStatus == TextToSpeech.STOPPED) {
173             // Squash any already queued request.
174             if (mPendingRequest != null) {
175                 onTtsStopped(requestId, /* error= */ false);
176             }
177             mPendingRequest = new SpeechRequest(textToSpeak, requestId);
178         } else {
179             playInternal(textToSpeak, requestId);
180         }
181         return true;
182     }
183 
184     /** Requests that all play-out be stopped. */
requestStop()185     public void requestStop() {
186         mTextToSpeechEngine.stop();
187         mCurrentBatchId = null;
188     }
189 
isSpeaking()190     public boolean isSpeaking() {
191         return mTextToSpeechEngine.isSpeaking();
192     }
193 
194     // wrap call back to listener.onTextToSpeechStopped with adandonAudioFocus.
onTtsStopped(long requestId, boolean error)195     private void onTtsStopped(long requestId, boolean error) {
196         mAudioManager.abandonAudioFocusRequest(mAudioFocusRequest);
197         mHandler.post(() -> mListener.onTextToSpeechStopped(requestId, error));
198     }
199 
playInternal(List<CharSequence> textToSpeak, long requestId)200     private void playInternal(List<CharSequence> textToSpeak, long requestId) {
201         if (mInitStatus == TextToSpeech.ERROR) {
202             Log.e(TAG, "TTS setup failed!");
203             onTtsStopped(requestId, /* error= */ true);
204             return;
205         }
206 
207         // Abort anything currently playing and flushes queue.
208         mTextToSpeechEngine.stop();
209 
210         // Queue up new batch. We assign id's = "batchId;index" where index increments from 0
211         // to batchSize - 1. If queueing fails, we abort the whole batch.
212         mCurrentBatchId = Long.toString(requestId);
213         for (int i = 0; i < textToSpeak.size(); i++) {
214             CharSequence text = textToSpeak.get(i);
215             String utteranceId =
216                     String.format("%s%s%d", mCurrentBatchId, UTTERANCE_ID_SEPARATOR, i);
217             if (Log.isLoggable(TAG, Log.DEBUG)) {
218                 Log.d(TAG, String.format("Queueing tts: '%s' [%s]", text, utteranceId));
219             }
220             if (mTextToSpeechEngine.speak(text, TextToSpeech.QUEUE_ADD, /* params= */ null,
221                     utteranceId) != TextToSpeech.SUCCESS) {
222                 mTextToSpeechEngine.stop();
223                 mCurrentBatchId = null;
224                 Log.e(TAG, "Queuing text failed!");
225                 onTtsStopped(requestId, /* error= */ true);
226                 return;
227             }
228         }
229         // Register BatchListener for entire batch. Will invoke callbacks on Listener as batch
230         // progresses.
231         mListeners.put(mCurrentBatchId, new BatchListener(requestId, textToSpeak.size()));
232     }
233 
234     /**
235      * Releases resources and shuts down TTS Engine.
236      */
cleanup()237     public void cleanup() {
238         mHandler.removeCallbacksAndMessages(/* token= */ null);
239         shutdownEngine();
240     }
241 
242     /** Returns the stream used by the TTS engine. */
getStream()243     public int getStream() {
244         return mTextToSpeechEngine.getStream();
245     }
246 
shutdownEngine()247     private void shutdownEngine() {
248         if (mTextToSpeechEngine.isInitialized()) {
249             if (Log.isLoggable(TAG, Log.DEBUG)) {
250                 Log.d(TAG, "Shutting down TTS Engine");
251             }
252             mTextToSpeechEngine.stop();
253             mTextToSpeechEngine.shutdown();
254             mInitStatus = TextToSpeech.STOPPED;
255         }
256     }
257 
parse(String utteranceId)258     private static Pair<String, Integer> parse(String utteranceId) {
259         try {
260             String[] pair = utteranceId.split(UTTERANCE_ID_SEPARATOR);
261             String batchId = pair[0];
262             int index = Integer.valueOf(pair[1]);
263             return Pair.create(batchId, index);
264         } catch (IndexOutOfBoundsException | NumberFormatException e) {
265             throw new IllegalArgumentException(
266                     String.format("Utterance ID is invalid: %s.", utteranceId)
267             );
268         }
269     }
270 
271     // Handles all callbacks from TextToSpeechEngine. Possible order of callbacks:
272     // - onStart, onDone: successful play-out.
273     // - onStart, onStop: play-out starts, but interrupted.
274     // - onStart, onError: play-out starts and fails.
275     // - onStop: play-out never starts, but aborted.
276     // - onError: play-out never starts, but fails.
277     // Since the callbacks arrive on other threads, they are dispatched onto mHandler where the
278     // appropriate BatchListener is invoked.
279     private final UtteranceProgressListener mProgressListener = new UtteranceProgressListener() {
280         private void safeInvokeAsync(String utteranceId,
281                 BiConsumer<BatchListener, Pair<String, Integer>> callback) {
282             mHandler.post(() -> {
283                 Pair<String, Integer> parsedId = parse(utteranceId);
284                 BatchListener listener = mListeners.get(parsedId.first);
285                 if (listener != null) {
286                     callback.accept(listener, parsedId);
287                 } else {
288                     if (Log.isLoggable(TAG, Log.DEBUG)) {
289                         Log.d(TAG, "Missing batch listener: " + utteranceId);
290                     }
291                 }
292             });
293         }
294 
295         @Override
296         public void onStart(String utteranceId) {
297             if (Log.isLoggable(TAG, Log.DEBUG)) {
298                 Log.d(TAG, "TTS onStart: " + utteranceId);
299             }
300             mHandler.post(() -> {
301                 Pair<String, Integer> parsedId = parse(utteranceId);
302                 BatchListener listener = mListeners.get(parsedId.first);
303                 if (listener != null) {
304                     listener.onStart();
305                 } else {
306                     if (Log.isLoggable(TAG, Log.DEBUG)) {
307                         Log.d(TAG, "Missing batch listener: " + utteranceId);
308                     }
309                 }
310             });
311         }
312 
313         @Override
314         public void onDone(String utteranceId) {
315             if (Log.isLoggable(TAG, Log.DEBUG)) {
316                 Log.d(TAG, "TTS onDone: " + utteranceId);
317             }
318             safeInvokeAsync(utteranceId, BatchListener::onDone);
319         }
320 
321         @Override
322         public void onStop(String utteranceId, boolean interrupted) {
323             if (Log.isLoggable(TAG, Log.DEBUG)) {
324                 Log.d(TAG, "TTS onStop: " + utteranceId);
325             }
326             safeInvokeAsync(utteranceId, BatchListener::onStop);
327         }
328 
329         @Override
330         public void onError(String utteranceId) {
331             if (Log.isLoggable(TAG, Log.DEBUG)) {
332                 Log.d(TAG, "TTS onError: " + utteranceId);
333             }
334             safeInvokeAsync(utteranceId, BatchListener::onError);
335         }
336     };
337 
338     /**
339      * Handles callbacks for a single batch of TTS text and issues callbacks on wrapped
340      * {@link Listener} that client is listening on.
341      */
342     private class BatchListener {
343         private boolean mBatchStarted;
344         private final long mRequestId;
345         private final int mUtteranceCount;
346 
BatchListener(long requestId, int utteranceCount)347         BatchListener(long requestId, int utteranceCount) {
348             mRequestId = requestId;
349             mUtteranceCount = utteranceCount;
350         }
351 
352         // Issues Listener.onTextToSpeechStarted when first item of batch starts.
onStart()353         void onStart() {
354             if (!mBatchStarted) {
355                 mBatchStarted = true;
356                 mListener.onTextToSpeechStarted(mRequestId);
357             }
358         }
359 
360         // Issues Listener.onTextToSpeechStopped when last item of batch finishes.
onDone(Pair<String, Integer> parsedId)361         void onDone(Pair<String, Integer> parsedId) {
362             // parseId is zero-indexed, mUtteranceCount is not.
363             if (parsedId.second == (mUtteranceCount - 1)) {
364                 handleBatchFinished(parsedId, /* error= */ false);
365             }
366         }
367 
368         // If any item of batch fails, abort the batch and issue Listener.onTextToSpeechStopped.
onError(Pair<String, Integer> parsedId)369         void onError(Pair<String, Integer> parsedId) {
370             if (parsedId.first.equals(mCurrentBatchId)) {
371                 mTextToSpeechEngine.stop();
372             }
373             handleBatchFinished(parsedId, /* error= */ true);
374         }
375 
376         // If any item of batch is preempted (rest should also be),
377         // issue Listener.onTextToSpeechStopped.
onStop(Pair<String, Integer> parsedId)378         void onStop(Pair<String, Integer> parsedId) {
379             handleBatchFinished(parsedId, /* error= */ false);
380         }
381 
382         // Handles terminal callbacks for the batch. We invoke stopped and remove ourselves.
383         // No further callbacks will be handled for the batch.
handleBatchFinished(Pair<String, Integer> parsedId, boolean error)384         private void handleBatchFinished(Pair<String, Integer> parsedId, boolean error) {
385             onTtsStopped(mRequestId, error);
386             mListeners.remove(parsedId.first);
387         }
388     }
389 
390     private static class SpeechRequest {
391         final List<CharSequence> mTextToSpeak;
392         final long mRequestId;
393 
SpeechRequest(List<CharSequence> textToSpeak, long requestId)394         SpeechRequest(List<CharSequence> textToSpeak, long requestId) {
395             mTextToSpeak = textToSpeak;
396             mRequestId = requestId;
397         }
398     }
399 }
400