1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 package com.example.android.ttsengine;
17 
18 import android.content.Context;
19 import android.content.SharedPreferences;
20 import android.media.AudioFormat;
21 import android.speech.tts.SynthesisCallback;
22 import android.speech.tts.SynthesisRequest;
23 import android.speech.tts.TextToSpeech;
24 import android.speech.tts.TextToSpeechService;
25 import android.util.Log;
26 
27 import java.io.BufferedReader;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.io.InputStreamReader;
31 import java.nio.ByteBuffer;
32 import java.nio.ByteOrder;
33 import java.util.HashMap;
34 import java.util.Map;
35 
36 /**
37  * A text to speech engine that generates "speech" that a robot might understand.
38  * The engine supports two different "languages", each with their own frequency
39  * mappings.
40  *
41  * It exercises all aspects of the Text to speech engine API
42  * {@link android.speech.tts.TextToSpeechService}.
43  */
44 public class RobotSpeakTtsService extends TextToSpeechService {
45     private static final String TAG = "ExampleTtsService";
46 
47     /*
48      * This is the sampling rate of our output audio. This engine outputs
49      * audio at 16khz 16bits per sample PCM audio.
50      */
51     private static final int SAMPLING_RATE_HZ = 16000;
52 
53     /*
54      * We multiply by a factor of two since each sample contains 16 bits (2 bytes).
55      */
56     private final byte[] mAudioBuffer = new byte[SAMPLING_RATE_HZ * 2];
57 
58     private Map<Character, Integer> mFrequenciesMap;
59     private volatile String[] mCurrentLanguage = null;
60     private volatile boolean mStopRequested = false;
61     private SharedPreferences mSharedPrefs = null;
62 
63     @Override
onCreate()64     public void onCreate() {
65         super.onCreate();
66         mSharedPrefs = getSharedPreferences(GeneralSettingsFragment.SHARED_PREFS_NAME,
67                 Context.MODE_PRIVATE);
68         // We load the default language when we start up. This isn't strictly
69         // required though, it can always be loaded lazily on the first call to
70         // onLoadLanguage or onSynthesizeText. This a tradeoff between memory usage
71         // and the latency of the first call.
72         onLoadLanguage("eng", "usa", "");
73     }
74 
75     @Override
onDestroy()76     public void onDestroy() {
77         super.onDestroy();
78     }
79 
80     @Override
onGetLanguage()81     protected String[] onGetLanguage() {
82         // Note that mCurrentLanguage is volatile because this can be called from
83         // multiple threads.
84         return mCurrentLanguage;
85     }
86 
87     @Override
onIsLanguageAvailable(String lang, String country, String variant)88     protected int onIsLanguageAvailable(String lang, String country, String variant) {
89         // The robot speak synthesizer supports only english.
90         if ("eng".equals(lang)) {
91             // We support two specific robot languages, the british robot language
92             // and the american robot language.
93             if ("USA".equals(country) || "GBR".equals(country)) {
94                 // If the engine supported a specific variant, we would have
95                 // something like.
96                 //
97                 // if ("android".equals(variant)) {
98                 //     return TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE;
99                 // }
100                 return TextToSpeech.LANG_COUNTRY_AVAILABLE;
101             }
102 
103             // We support the language, but not the country.
104             return TextToSpeech.LANG_AVAILABLE;
105         }
106 
107         return TextToSpeech.LANG_NOT_SUPPORTED;
108     }
109 
110     /*
111      * Note that this method is synchronized, as is onSynthesizeText because
112      * onLoadLanguage can be called from multiple threads (while onSynthesizeText
113      * is always called from a single thread only).
114      */
115     @Override
onLoadLanguage(String lang, String country, String variant)116     protected synchronized int onLoadLanguage(String lang, String country, String variant) {
117         final int isLanguageAvailable = onIsLanguageAvailable(lang, country, variant);
118 
119         if (isLanguageAvailable == TextToSpeech.LANG_NOT_SUPPORTED) {
120             return isLanguageAvailable;
121         }
122 
123         String loadCountry = country;
124         if (isLanguageAvailable == TextToSpeech.LANG_AVAILABLE) {
125             loadCountry = "USA";
126         }
127 
128         // If we've already loaded the requested language, we can return early.
129         if (mCurrentLanguage != null) {
130             if (mCurrentLanguage[0].equals(lang) && mCurrentLanguage[1].equals(country)) {
131                 return isLanguageAvailable;
132             }
133         }
134 
135         Map<Character, Integer> newFrequenciesMap = null;
136         try {
137             InputStream file = getAssets().open(lang + "-" + loadCountry + ".freq");
138             newFrequenciesMap = buildFrequencyMap(file);
139             file.close();
140         } catch (IOException e) {
141             Log.e(TAG, "Error loading data for : " + lang + "-" + country);
142         }
143 
144         mFrequenciesMap = newFrequenciesMap;
145         mCurrentLanguage = new String[] { lang, loadCountry, ""};
146 
147         return isLanguageAvailable;
148     }
149 
150     @Override
onStop()151     protected void onStop() {
152         mStopRequested = true;
153     }
154 
155     @Override
onSynthesizeText(SynthesisRequest request, SynthesisCallback callback)156     protected synchronized void onSynthesizeText(SynthesisRequest request,
157             SynthesisCallback callback) {
158         // Note that we call onLoadLanguage here since there is no guarantee
159         // that there would have been a prior call to this function.
160         int load = onLoadLanguage(request.getLanguage(), request.getCountry(),
161                 request.getVariant());
162 
163         // We might get requests for a language we don't support - in which case
164         // we error out early before wasting too much time.
165         if (load == TextToSpeech.LANG_NOT_SUPPORTED) {
166             callback.error();
167             return;
168         }
169 
170         // At this point, we have loaded the language we need for synthesis and
171         // it is guaranteed that we support it so we proceed with synthesis.
172 
173         // We denote that we are ready to start sending audio across to the
174         // framework. We use a fixed sampling rate (16khz), and send data across
175         // in 16bit PCM mono.
176         callback.start(SAMPLING_RATE_HZ,
177                 AudioFormat.ENCODING_PCM_16BIT, 1 /* Number of channels. */);
178 
179         // We then scan through each character of the request string and
180         // generate audio for it.
181         final String text = request.getText().toLowerCase();
182         for (int i = 0; i < text.length(); ++i) {
183             char value = normalize(text.charAt(i));
184             // It is crucial to call either of callback.error() or callback.done() to ensure
185             // that audio / other resources are released as soon as possible.
186             if (!generateOneSecondOfAudio(value, callback)) {
187                 callback.error();
188                 return;
189             }
190         }
191 
192         // Alright, we're done with our synthesis - yay!
193         callback.done();
194     }
195 
196     /*
197      * Normalizes a given character to the range 'a' - 'z' (inclusive). Our
198      * frequency mappings contain frequencies for each of these characters.
199      */
normalize(char input)200     private static char normalize(char input) {
201         if (input == ' ') {
202             return input;
203         }
204 
205         if (input < 'a') {
206             return 'a';
207         }
208         if (input > 'z') {
209             return 'z';
210         }
211 
212         return input;
213     }
214 
buildFrequencyMap(InputStream is)215     private Map<Character, Integer> buildFrequencyMap(InputStream is) throws IOException {
216         BufferedReader br = new BufferedReader(new InputStreamReader(is));
217         String line = null;
218         Map<Character, Integer> map = new HashMap<Character, Integer>();
219         try {
220             while ((line = br.readLine()) != null) {
221                 String[] parts = line.split(":");
222                 if (parts.length != 2) {
223                     throw new IOException("Invalid line encountered: " + line);
224                 }
225                 map.put(parts[0].charAt(0), Integer.parseInt(parts[1]));
226             }
227             map.put(' ', 0);
228             return map;
229         } finally {
230             is.close();
231         }
232     }
233 
generateOneSecondOfAudio(char alphabet, SynthesisCallback cb)234     private boolean generateOneSecondOfAudio(char alphabet, SynthesisCallback cb) {
235         ByteBuffer buffer = ByteBuffer.wrap(mAudioBuffer).order(ByteOrder.LITTLE_ENDIAN);
236 
237         // Someone called onStop, end the current synthesis and return.
238         // The mStopRequested variable will be reset at the beginning of the
239         // next synthesis.
240         //
241         // In general, a call to onStop( ) should make a best effort attempt
242         // to stop all processing for the *current* onSynthesizeText request (if
243         // one is active).
244         if (mStopRequested) {
245             return false;
246         }
247 
248 
249         if (mFrequenciesMap == null || !mFrequenciesMap.containsKey(alphabet)) {
250             return false;
251         }
252 
253         final int frequency = mFrequenciesMap.get(alphabet);
254 
255         if (frequency > 0) {
256             // This is the wavelength in samples. The frequency is chosen so that the
257             // waveLength is always a multiple of two and frequency divides the
258             // SAMPLING_RATE exactly.
259             final int waveLength = SAMPLING_RATE_HZ / frequency;
260             final int times = SAMPLING_RATE_HZ / waveLength;
261 
262             for (int j = 0; j < times; ++j) {
263                 // For a square curve, half of the values will be at Short.MIN_VALUE
264                 // and the other half will be Short.MAX_VALUE.
265                 for (int i = 0; i < waveLength / 2; ++i) {
266                     buffer.putShort((short)(getAmplitude() * -1));
267                 }
268                 for (int i = 0; i < waveLength / 2; ++i) {
269                     buffer.putShort(getAmplitude());
270                 }
271             }
272         } else {
273             // Play a second of silence.
274             for (int i = 0; i < mAudioBuffer.length / 2; ++i) {
275                 buffer.putShort((short) 0);
276             }
277         }
278 
279         // Get the maximum allowed size of data we can send across in audioAvailable.
280         final int maxBufferSize = cb.getMaxBufferSize();
281         int offset = 0;
282         while (offset < mAudioBuffer.length) {
283             int bytesToWrite = Math.min(maxBufferSize, mAudioBuffer.length - offset);
284             cb.audioAvailable(mAudioBuffer, offset, bytesToWrite);
285             offset += bytesToWrite;
286         }
287         return true;
288     }
289 
getAmplitude()290     private short getAmplitude() {
291         boolean whisper = mSharedPrefs.getBoolean(GeneralSettingsFragment.WHISPER_KEY, false);
292         return (short) (whisper ? 2048 : 8192);
293     }
294 }
295