1 /* com_svox_picottsengine.cpp
2
3 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 * This is the Manager layer. It sits on top of the native Pico engine
18 * and provides the interface to the defined Google TTS engine API.
19 * The Google engine API is the boundary to allow a TTS engine to be swapped.
20 * The Manager layer also provide the SSML tag interpretation.
21 * The supported SSML tags are mapped to corresponding tags natively supported by Pico.
22 * Native Pico functions always begin with picoXXX.
23 *
24 * In the Pico engine, the language cannot be changed indpendently of the voice.
25 * If either the voice or locale/language are changed, a new resource is loaded.
26 *
27 * Only a subset of SSML 1.0 tags are supported.
28 * Some SSML tags involve significant complexity.
29 * If the language is changed through an SSML tag, there is a latency for the load.
30 *
31 */
32 //#define LOG_NDEBUG 0
33
34 #include <stdio.h>
35 #include <unistd.h>
36 #include <stdlib.h>
37
38 #define LOG_TAG "SVOX Pico Engine"
39
40 #include <utils/Log.h>
41 #include <utils/String16.h> /* for strlen16 */
42 #include <android_runtime/AndroidRuntime.h>
43 #include <TtsEngine.h>
44
45 #include <cutils/jstring.h>
46 #include <picoapi.h>
47 #include <picodefs.h>
48
49 #include "svox_ssml_parser.h"
50
51 using namespace android;
52
53 /* adaptation layer defines */
54 #define PICO_MEM_SIZE 2500000
55 /* speaking rate */
56 #define PICO_MIN_RATE 20
57 #define PICO_MAX_RATE 500
58 #define PICO_DEF_RATE 100
59 /* speaking pitch */
60 #define PICO_MIN_PITCH 50
61 #define PICO_MAX_PITCH 200
62 #define PICO_DEF_PITCH 100
63 /* speaking volume */
64 #define PICO_MIN_VOLUME 0
65 #define PICO_MAX_VOLUME 500
66 #define PICO_DEF_VOLUME 100
67
68 /* string constants */
69 #define MAX_OUTBUF_SIZE 128
70 const char * PICO_SYSTEM_LINGWARE_PATH = "/system/tts/lang_pico/";
71 const char * PICO_LINGWARE_PATH = "/sdcard/svox/";
72 const char * PICO_VOICE_NAME = "PicoVoice";
73 const char * PICO_SPEED_OPEN_TAG = "<speed level='%d'>";
74 const char * PICO_SPEED_CLOSE_TAG = "</speed>";
75 const char * PICO_PITCH_OPEN_TAG = "<pitch level='%d'>";
76 const char * PICO_PITCH_CLOSE_TAG = "</pitch>";
77 const char * PICO_VOLUME_OPEN_TAG = "<volume level='%d'>";
78 const char * PICO_VOLUME_CLOSE_TAG = "</volume>";
79 const char * PICO_PHONEME_OPEN_TAG = "<phoneme ph='";
80 const char * PICO_PHONEME_CLOSE_TAG = "'/>";
81
82 /* supported voices
83 Pico does not seperately specify the voice and locale. */
84 const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" };
85 const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" };
86 const char * picoSupportedLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" };
87 const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" };
88 const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" };
89 const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" };
90 const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" };
91 const int picoNumSupportedVocs = 6;
92
93 /* supported properties */
94 const char * picoSupportedProperties[] = { "language", "rate", "pitch", "volume" };
95 const int picoNumSupportedProperties = 4;
96
97
98 /* adapation layer global variables */
99 synthDoneCB_t * picoSynthDoneCBPtr;
100 void * picoMemArea = NULL;
101 pico_System picoSystem = NULL;
102 pico_Resource picoTaResource = NULL;
103 pico_Resource picoSgResource = NULL;
104 pico_Resource picoUtppResource = NULL;
105 pico_Engine picoEngine = NULL;
106 pico_Char * picoTaFileName = NULL;
107 pico_Char * picoSgFileName = NULL;
108 pico_Char * picoUtppFileName = NULL;
109 pico_Char * picoTaResourceName = NULL;
110 pico_Char * picoSgResourceName = NULL;
111 pico_Char * picoUtppResourceName = NULL;
112 int picoSynthAbort = 0;
113 char * picoProp_currLang = NULL; /* current language */
114 int picoProp_currRate = PICO_DEF_RATE; /* current rate */
115 int picoProp_currPitch = PICO_DEF_PITCH; /* current pitch */
116 int picoProp_currVolume = PICO_DEF_VOLUME; /* current volume */
117
118 int picoCurrentLangIndex = -1;
119
120 char * pico_alt_lingware_path = NULL;
121
122
123 /* internal helper functions */
124
125 /** checkForLocale
126 * Check whether the requested locale is among the supported locales.
127 * @locale - the locale to check, either in xx or xx-YY format
128 * return index of the locale, or -1 if not supported.
129 */
checkForLocale(const char * locale)130 static int checkForLocale( const char * locale )
131 {
132 int found = -1; /* language not found */
133 int i;
134 if (locale == NULL) {
135 ALOGE("checkForLocale called with NULL language");
136 return found;
137 }
138
139 /* Verify that the requested locale is a locale that we support. */
140 for (i = 0; i < picoNumSupportedVocs; i ++) {
141 if (strcmp(locale, picoSupportedLang[i]) == 0) { /* in array */
142 found = i;
143 break;
144 }
145 };
146
147 /* The exact locale was not found. */
148 if (found < 0) {
149 /* We didn't find an exact match; it may have been specified with only the first 2 characters.
150 This could overmatch ISO 639-3 language codes.%% */
151
152 /* check whether the current language matches the locale's language */
153 if ((picoCurrentLangIndex > -1) &&
154 (strncmp(locale, picoSupportedLang[picoCurrentLangIndex], 2) == 0)) {
155 /* the current language matches the requested language, let's use it */
156 found = picoCurrentLangIndex;
157 } else {
158 /* check whether we can find a match at least on the language */
159 for (i = 0; i < picoNumSupportedVocs; i ++) {
160 if (strncmp(locale, picoSupportedLang[i], 2) == 0) {
161 found = i;
162 break;
163 }
164 }
165 }
166
167 if (found < 0) {
168 ALOGE("TtsEngine::set language called with unsupported locale %s", locale);
169 }
170 };
171 return found;
172 }
173
174
175 /** cleanResources
176 * Unloads any loaded Pico resources.
177 */
cleanResources(void)178 static void cleanResources( void )
179 {
180 if (picoEngine) {
181 pico_disposeEngine( picoSystem, &picoEngine );
182 pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME );
183 picoEngine = NULL;
184 }
185 if (picoUtppResource) {
186 pico_unloadResource( picoSystem, &picoUtppResource );
187 picoUtppResource = NULL;
188 }
189 if (picoTaResource) {
190 pico_unloadResource( picoSystem, &picoTaResource );
191 picoTaResource = NULL;
192 }
193 if (picoSgResource) {
194 pico_unloadResource( picoSystem, &picoSgResource );
195 picoSgResource = NULL;
196 }
197
198 if (picoSystem) {
199 pico_terminate(&picoSystem);
200 picoSystem = NULL;
201 }
202 picoCurrentLangIndex = -1;
203 }
204
205
206 /** cleanFiles
207 * Frees any memory allocated for file and resource strings.
208 */
cleanFiles(void)209 static void cleanFiles( void )
210 {
211 if (picoProp_currLang) {
212 free( picoProp_currLang );
213 picoProp_currLang = NULL;
214 }
215
216 if (picoTaFileName) {
217 free( picoTaFileName );
218 picoTaFileName = NULL;
219 }
220
221 if (picoSgFileName) {
222 free( picoSgFileName );
223 picoSgFileName = NULL;
224 }
225
226 if (picoUtppFileName) {
227 free( picoUtppFileName );
228 picoUtppFileName = NULL;
229 }
230
231 if (picoTaResourceName) {
232 free( picoTaResourceName );
233 picoTaResourceName = NULL;
234 }
235
236 if (picoSgResourceName) {
237 free( picoSgResourceName );
238 picoSgResourceName = NULL;
239 }
240
241 if (picoUtppResourceName) {
242 free( picoUtppResourceName );
243 picoUtppResourceName = NULL;
244 }
245 }
246
247 /** hasResourcesForLanguage
248 * Check to see if the resources required to load the language at the specified index
249 * are properly installed
250 * @langIndex - the index of the language to check the resources for. The index is valid.
251 * return true if the required resources are installed, false otherwise
252 */
hasResourcesForLanguage(int langIndex)253 static bool hasResourcesForLanguage(int langIndex) {
254 FILE * pFile;
255 char* fileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
256
257 /* check resources on system (under PICO_SYSTEM_LINGWARE_PATH). */
258 strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH);
259 strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]);
260 pFile = fopen(fileName, "r");
261 if (pFile != NULL) {
262 /* "ta" file found. */
263 fclose (pFile);
264 /* now look for "sg" file. */
265 strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH);
266 strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]);
267 pFile = fopen(fileName, "r");
268 if (pFile != NULL) {
269 /* "sg" file found, no need to continue checking, return success. */
270 fclose(pFile);
271 free(fileName);
272 return true;
273 }
274 }
275
276 /* resources not found on system, check resources on alternative location */
277 /* (under pico_alt_lingware_path). */
278 strcpy((char*)fileName, pico_alt_lingware_path);
279 strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]);
280 pFile = fopen(fileName, "r");
281 if (pFile == NULL) {
282 free(fileName);
283 return false;
284 } else {
285 fclose (pFile);
286 }
287
288 strcpy((char*)fileName, pico_alt_lingware_path);
289 strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]);
290 pFile = fopen(fileName, "r");
291 if (pFile == NULL) {
292 free(fileName);
293 return false;
294 } else {
295 fclose(pFile);
296 free(fileName);
297 return true;
298 }
299 }
300
301 /** doLanguageSwitchFromLangIndex
302 * Switch to the requested locale.
303 * If the locale is already loaded, it returns immediately.
304 * If another locale is already is loaded, it will first be unloaded and the new one then loaded.
305 * If no locale is loaded, the requested locale will be loaded.
306 * @langIndex - the index of the locale/voice to load, which is guaranteed to be supported.
307 * return TTS_SUCCESS or TTS_FAILURE
308 */
doLanguageSwitchFromLangIndex(int langIndex)309 static tts_result doLanguageSwitchFromLangIndex( int langIndex )
310 {
311 int ret; /* function result code */
312
313 if (langIndex>=0) {
314 /* If we already have a loaded locale, check whether it is the same one as requested. */
315 if (picoProp_currLang && (strcmp(picoProp_currLang, picoSupportedLang[langIndex]) == 0)) {
316 //ALOGI("Language already loaded (%s == %s)", picoProp_currLang,
317 // picoSupportedLang[langIndex]);
318 return TTS_SUCCESS;
319 }
320 }
321
322 /* It is not the same locale; unload the current one first. Also invalidates the system object*/
323 cleanResources();
324
325 /* Allocate memory for file and resource names. */
326 cleanFiles();
327
328 if (picoSystem==NULL) {
329 /*re-init system object*/
330 ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem );
331 if (PICO_OK != ret) {
332 ALOGE("Failed to initialize the pico system object\n");
333 return TTS_FAILURE;
334 }
335 }
336
337 picoProp_currLang = (char *) malloc( 10 );
338 picoTaFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
339 picoSgFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
340 picoUtppFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
341 picoTaResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
342 picoSgResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
343 picoUtppResourceName =(pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
344
345 if (
346 (picoProp_currLang==NULL) || (picoTaFileName==NULL) || (picoSgFileName==NULL) ||
347 (picoUtppFileName==NULL) || (picoTaResourceName==NULL) || (picoSgResourceName==NULL) ||
348 (picoUtppResourceName==NULL)
349 ) {
350 ALOGE("Failed to allocate memory for internal strings\n");
351 cleanResources();
352 return TTS_FAILURE;
353 }
354
355 /* Find where to load the resource files from: system or alternative location */
356 /* based on availability of the Ta file. Try the alternative location first, this is where */
357 /* more recent language file updates would be installed (under pico_alt_lingware_path). */
358 bool bUseSystemPath = true;
359 FILE * pFile;
360 char* tmpFileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
361 strcpy((char*)tmpFileName, pico_alt_lingware_path);
362 strcat((char*)tmpFileName, (const char*)picoInternalTaLingware[langIndex]);
363 pFile = fopen(tmpFileName, "r");
364 if (pFile != NULL) {
365 /* "ta" file found under pico_alt_lingware_path, don't use the system path. */
366 fclose (pFile);
367 bUseSystemPath = false;
368 }
369 free(tmpFileName);
370
371 /* Set the path and file names for resource files. */
372 if (bUseSystemPath) {
373 strcpy((char *) picoTaFileName, PICO_SYSTEM_LINGWARE_PATH);
374 strcpy((char *) picoSgFileName, PICO_SYSTEM_LINGWARE_PATH);
375 strcpy((char *) picoUtppFileName, PICO_SYSTEM_LINGWARE_PATH);
376 } else {
377 strcpy((char *) picoTaFileName, pico_alt_lingware_path);
378 strcpy((char *) picoSgFileName, pico_alt_lingware_path);
379 strcpy((char *) picoUtppFileName, pico_alt_lingware_path);
380 }
381 strcat((char *) picoTaFileName, (const char *) picoInternalTaLingware[langIndex]);
382 strcat((char *) picoSgFileName, (const char *) picoInternalSgLingware[langIndex]);
383 strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]);
384
385 /* Load the text analysis Lingware resource file. */
386 ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource );
387 if (PICO_OK != ret) {
388 ALOGE("Failed to load textana resource for %s [%d]", picoSupportedLang[langIndex], ret);
389 cleanResources();
390 cleanFiles();
391 return TTS_FAILURE;
392 }
393
394 /* Load the signal generation Lingware resource file. */
395 ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource );
396 if (PICO_OK != ret) {
397 ALOGE("Failed to load siggen resource for %s [%d]", picoSupportedLang[langIndex], ret);
398 cleanResources();
399 cleanFiles();
400 return TTS_FAILURE;
401 }
402
403 /* Load the utpp Lingware resource file if exists - NOTE: this file is optional
404 and is currently not used. Loading is only attempted for future compatibility.
405 If this file is not present the loading will still succeed. */
406 ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource );
407 if ((PICO_OK != ret) && (ret != PICO_EXC_CANT_OPEN_FILE)) {
408 ALOGE("Failed to load utpp resource for %s [%d]", picoSupportedLang[langIndex], ret);
409 cleanResources();
410 cleanFiles();
411 return TTS_FAILURE;
412 }
413
414 /* Get the text analysis resource name. */
415 ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName );
416 if (PICO_OK != ret) {
417 ALOGE("Failed to get textana resource name for %s [%d]", picoSupportedLang[langIndex], ret);
418 cleanResources();
419 cleanFiles();
420 return TTS_FAILURE;
421 }
422
423 /* Get the signal generation resource name. */
424 ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName );
425 if ((PICO_OK == ret) && (picoUtppResource != NULL)) {
426 /* Get utpp resource name - optional: see note above. */
427 ret = pico_getResourceName( picoSystem, picoUtppResource, (char *) picoUtppResourceName );
428 if (PICO_OK != ret) {
429 ALOGE("Failed to get utpp resource name for %s [%d]", picoSupportedLang[langIndex], ret);
430 cleanResources();
431 cleanFiles();
432 return TTS_FAILURE;
433 }
434 }
435 if (PICO_OK != ret) {
436 ALOGE("Failed to get siggen resource name for %s [%d]", picoSupportedLang[langIndex], ret);
437 cleanResources();
438 cleanFiles();
439 return TTS_FAILURE;
440 }
441
442 /* Create a voice definition. */
443 ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME );
444 if (PICO_OK != ret) {
445 ALOGE("Failed to create voice for %s [%d]", picoSupportedLang[langIndex], ret);
446 cleanResources();
447 cleanFiles();
448 return TTS_FAILURE;
449 }
450
451 /* Add the text analysis resource to the voice. */
452 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName );
453 if (PICO_OK != ret) {
454 ALOGE("Failed to add textana resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
455 cleanResources();
456 cleanFiles();
457 return TTS_FAILURE;
458 }
459
460 /* Add the signal generation resource to the voice. */
461 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName );
462 if ((PICO_OK == ret) && (picoUtppResource != NULL)) {
463 /* Add utpp resource to voice - optional: see note above. */
464 ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoUtppResourceName );
465 if (PICO_OK != ret) {
466 ALOGE("Failed to add utpp resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
467 cleanResources();
468 cleanFiles();
469 return TTS_FAILURE;
470 }
471 }
472
473 if (PICO_OK != ret) {
474 ALOGE("Failed to add siggen resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
475 cleanResources();
476 cleanFiles();
477 return TTS_FAILURE;
478 }
479
480 ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine );
481 if (PICO_OK != ret) {
482 ALOGE("Failed to create engine for %s [%d]", picoSupportedLang[langIndex], ret);
483 cleanResources();
484 cleanFiles();
485 return TTS_FAILURE;
486 }
487
488 /* Set the current locale/voice. */
489 strcpy( picoProp_currLang, picoSupportedLang[langIndex] );
490 picoCurrentLangIndex = langIndex;
491 ALOGI("loaded %s successfully", picoProp_currLang);
492 return TTS_SUCCESS;
493 }
494
495
496 /** doLanguageSwitch
497 * Switch to the requested locale.
498 * If this locale is already loaded, it returns immediately.
499 * If another locale is already loaded, this will first be unloaded
500 * and the new one then loaded.
501 * If no locale is loaded, the requested will be loaded.
502 * @locale - the locale to check, either in xx or xx-YY format (i.e "en" or "en-US")
503 * return TTS_SUCCESS or TTS_FAILURE
504 */
doLanguageSwitch(const char * locale)505 static tts_result doLanguageSwitch( const char * locale )
506 {
507 int loclIndex; /* locale index */
508
509 /* Load the new locale. */
510 loclIndex = checkForLocale( locale );
511 if (loclIndex < 0) {
512 ALOGE("Tried to swith to non-supported locale %s", locale);
513 return TTS_FAILURE;
514 }
515 //ALOGI("Found supported locale %s", picoSupportedLang[loclIndex]);
516 return doLanguageSwitchFromLangIndex( loclIndex );
517 }
518
519
520 /** doAddProperties
521 * Add <speed>, <pitch> and <volume> tags to the text,
522 * if the properties have been set to non-default values, and return the new string.
523 * The calling function is responsible for freeing the returned string.
524 * @str - text to apply tags to
525 * return new string with tags applied
526 */
doAddProperties(const char * str)527 static char * doAddProperties( const char * str )
528 {
529 char * data = NULL;
530 int haspitch, hasspeed, hasvol; /* parameters */
531 int textlen; /* property string length */
532 haspitch = 0; hasspeed = 0; hasvol = 0;
533 textlen = strlen(str) + 1;
534 if (picoProp_currPitch != PICO_DEF_PITCH) { /* non-default pitch */
535 textlen += strlen(PICO_PITCH_OPEN_TAG) + 5;
536 textlen += strlen(PICO_PITCH_CLOSE_TAG);
537 haspitch = 1;
538 }
539 if (picoProp_currRate != PICO_DEF_RATE) { /* non-default rate */
540 textlen += strlen(PICO_SPEED_OPEN_TAG) + 5;
541 textlen += strlen(PICO_SPEED_CLOSE_TAG);
542 hasspeed = 1;
543 }
544
545 if (picoProp_currVolume != PICO_DEF_VOLUME) { /* non-default volume */
546 textlen += strlen(PICO_VOLUME_OPEN_TAG) + 5;
547 textlen += strlen(PICO_VOLUME_CLOSE_TAG);
548 hasvol = 1;
549 }
550
551 /* Compose the property strings. */
552 data = (char *) malloc( textlen ); /* allocate string */
553 if (!data) {
554 return NULL;
555 }
556 memset(data, 0, textlen); /* clear it */
557 if (haspitch) {
558 char* tmp = (char*)malloc(strlen(PICO_PITCH_OPEN_TAG) + strlen(PICO_PITCH_CLOSE_TAG) + 5);
559 sprintf(tmp, PICO_PITCH_OPEN_TAG, picoProp_currPitch);
560 strcat(data, tmp);
561 free(tmp);
562 }
563
564 if (hasspeed) {
565 char* tmp = (char*)malloc(strlen(PICO_SPEED_OPEN_TAG) + strlen(PICO_SPEED_CLOSE_TAG) + 5);
566 sprintf(tmp, PICO_SPEED_OPEN_TAG, picoProp_currRate);
567 strcat(data, tmp);
568 free(tmp);
569 }
570
571 if (hasvol) {
572 char* tmp = (char*)malloc(strlen(PICO_VOLUME_OPEN_TAG) + strlen(PICO_VOLUME_CLOSE_TAG) + 5);
573 sprintf(tmp, PICO_VOLUME_OPEN_TAG, picoProp_currVolume);
574 strcat(data, tmp);
575 free(tmp);
576 }
577
578 strcat(data, str);
579 if (hasvol) {
580 strcat(data, PICO_VOLUME_CLOSE_TAG);
581 }
582
583 if (hasspeed) {
584 strcat(data, PICO_SPEED_CLOSE_TAG);
585 }
586
587 if (haspitch) {
588 strcat(data, PICO_PITCH_CLOSE_TAG);
589 }
590 return data;
591 }
592
593
594 /** get_tok
595 * Searches for tokens in a string
596 * @str - text to be processed
597 * @pos - position of first character to be searched in str
598 * @textlen - postion of last character to be searched
599 * @tokstart - address of a variable to receive the start of the token found
600 * @tokstart - address of a variable to receive the length of the token found
601 * return : 1=token found, 0=token not found
602 * notes : the token separator set could be enlarged adding characters in "seps"
603 */
get_tok(const char * str,int pos,int textlen,int * tokstart,int * toklen)604 static int get_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen)
605 {
606 const char * seps = " ";
607
608 /*look for start*/
609 while ((pos<textlen) && (strchr(seps,str[pos]) != NULL)) {
610 pos++;
611 }
612 if (pos == textlen) {
613 /*no characters != seps found whithin string*/
614 return 0;
615 }
616 *tokstart = pos;
617 /*look for end*/
618 while ((pos<textlen) && (strchr(seps,str[pos]) == NULL)) {
619 pos++;
620 }
621 *toklen = pos - *tokstart;
622 return 1;
623 }/*get_tok*/
624
625
626 /** get_sub_tok
627 * Searches for subtokens in a token having a compound structure with camel case like "xxxYyyy"
628 * @str - text to be processed
629 * @pos - position of first character to be searched in str
630 * @textlen - postion of last character to be searched in str
631 * @tokstart - address of a variable to receive the start of the sub token found
632 * @tokstart - address of a variable to receive the length of the sub token found
633 * return : 1=sub token found, 0=sub token not found
634 * notes : the sub token separator set could be enlarged adding characters in "seps"
635 */
get_sub_tok(const char * str,int pos,int textlen,int * tokstart,int * toklen)636 static int get_sub_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen) {
637
638 const char * seps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
639
640 if (pos == textlen) {
641 return 0;
642 }
643
644 /*first char != space*/
645 *tokstart = pos;
646 /*finding first non separator*/
647 while ((pos < textlen) && (strchr(seps, str[pos]) != NULL)) {
648 pos++;
649 }
650 if (pos == textlen) {
651 /*characters all in seps found whithin string : return full token*/
652 *toklen = pos - *tokstart;
653 return 1;
654 }
655 /*pos should be pointing to first non seps and more chars are there*/
656 /*finding first separator*/
657 while ((pos < textlen) && (strchr(seps, str[pos]) == NULL)) {
658 pos++;
659 }
660 if (pos == textlen) {
661 /*transition non seps->seps not found : return full token*/
662 *toklen = pos - *tokstart;
663 return 1;
664 }
665 *toklen = pos - *tokstart;
666 return 1;
667 }/*get_sub_tok*/
668
669
670 /** doCamelCase
671 * Searches for tokens having a compound structure with camel case and transforms them as follows :
672 * "XxxxYyyy" -->> "Xxxx Yyyy",
673 * "xxxYyyy" -->> "xxx Yyyy",
674 * "XXXYyyy" -->> "XXXYyyy"
675 * etc....
676 * The calling function is responsible for freeing the returned string.
677 * @str - text to be processed
678 * return new string with text processed
679 */
doCamelCase(const char * str)680 static char * doCamelCase( const char * str )
681 {
682 int textlen; /* input string length */
683 int totlen; /* output string length */
684 int tlen_2, nsubtok; /* nuber of subtokens */
685 int toklen, tokstart; /*legnth and start of generic token*/
686 int stoklen, stokstart; /*legnth and start of generic sub-token*/
687 int pos, tokpos, outpos; /*postion of current char in input string and token and output*/
688 char *data; /*pointer of the returned string*/
689
690 pos = 0;
691 tokpos = 0;
692 toklen = 0;
693 stoklen = 0;
694 tlen_2 = 0;
695 totlen = 0;
696
697 textlen = strlen(str) + 1;
698
699 /*counting characters after sub token splitting including spaces*/
700 //while ((pos<textlen) && (str[pos]!=0)) {
701 while (get_tok(str, pos, textlen, &tokstart, &toklen)) {
702 tokpos = tokstart;
703 tlen_2 = 0;
704 nsubtok = 0;
705 while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) {
706 totlen += stoklen;
707 tlen_2 += stoklen;
708 tokpos = stokstart + stoklen;
709 nsubtok += 1;
710 }
711 totlen += nsubtok; /*add spaces between subtokens*/
712 pos = tokstart + tlen_2;
713 }
714 //}
715 /* Allocate the return string */
716
717 data = (char *) malloc( totlen ); /* allocate string */
718 if (!data) {
719 return NULL;
720 }
721 memset(data, 0, totlen); /* clear it */
722 outpos = 0;
723 pos = 0;
724 /*copying characters*/
725 //while ((pos<textlen) && (str[pos]!=0)) {
726 while (get_tok (str, pos, textlen, &tokstart, &toklen)) {
727 tokpos = tokstart;
728 tlen_2 = 0;
729 nsubtok = 0;
730 while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) {
731 strncpy(&(data[outpos]), &(str[stokstart]), stoklen);
732 outpos += stoklen;
733 strncpy(&(data[outpos]), " ", 1);
734 tlen_2 += stoklen;
735 outpos += 1;
736 tokpos = stokstart + stoklen;
737 }
738 pos=tokstart+tlen_2;
739 }
740 //}
741 if (outpos == 0) {
742 outpos = 1;
743 }
744 data[outpos-1] = 0;
745 return data;
746 }/*doCamelCase*/
747
748
749 /** createPhonemeString
750 * Wrap all individual words in <phoneme> tags.
751 * The Pico <phoneme> tag only supports one word in each tag,
752 * therefore they must be individually wrapped!
753 * @xsampa - text to convert to Pico phomene string
754 * @length - length of the input string
755 * return new string with tags applied
756 */
createPhonemeString(const char * xsampa,int length)757 extern char * createPhonemeString( const char * xsampa, int length )
758 {
759 char * convstring = NULL;
760 int origStrLen = strlen(xsampa);
761 int numWords = 1;
762 int start, totalLength, i, j;
763
764 for (i = 0; i < origStrLen; i ++) {
765 if ((xsampa[i] == ' ') || (xsampa[i] == '#')) {
766 numWords ++;
767 }
768 }
769
770 if (numWords == 1) {
771 convstring = new char[origStrLen + 17];
772 convstring[0] = '\0';
773 strcat(convstring, PICO_PHONEME_OPEN_TAG);
774 strcat(convstring, xsampa);
775 strcat(convstring, PICO_PHONEME_CLOSE_TAG);
776 } else {
777 char * words[numWords];
778 start = 0; totalLength = 0; i = 0; j = 0;
779 for (i=0, j=0; i < origStrLen; i++) {
780 if ((xsampa[i] == ' ') || (xsampa[i] == '#')) {
781 words[j] = new char[i+1-start+17];
782 words[j][0] = '\0';
783 strcat( words[j], PICO_PHONEME_OPEN_TAG);
784 strncat(words[j], xsampa+start, i-start);
785 strcat( words[j], PICO_PHONEME_CLOSE_TAG);
786 start = i + 1;
787 j++;
788 totalLength += strlen(words[j-1]);
789 }
790 }
791 words[j] = new char[i+1-start+17];
792 words[j][0] = '\0';
793 strcat(words[j], PICO_PHONEME_OPEN_TAG);
794 strcat(words[j], xsampa+start);
795 strcat(words[j], PICO_PHONEME_CLOSE_TAG);
796 totalLength += strlen(words[j]);
797 convstring = new char[totalLength + 1];
798 convstring[0] = '\0';
799 for (i=0 ; i < numWords ; i++) {
800 strcat(convstring, words[i]);
801 delete [] words[i];
802 }
803 }
804
805 return convstring;
806 }
807
808 /* The XSAMPA uses as many as 5 characters to represent a single IPA code. */
809 typedef struct tagPhnArr
810 {
811 char16_t strIPA; /* IPA Unicode symbol */
812 char strXSAMPA[6]; /* SAMPA sequence */
813 } PArr;
814
815 #define phn_cnt (134+7)
816
817 PArr PhnAry[phn_cnt] = {
818
819 /* XSAMPA conversion table
820 This maps a single IPA symbol to a sequence representing XSAMPA.
821 This relies upon a direct one-to-one correspondance
822 including diphthongs and affricates. */
823
824 /* Vowels (23) complete */
825 {0x025B, "E"},
826 {0x0251, "A"},
827 {0x0254, "O"},
828 {0x00F8, "2"},
829 {0x0153, "9"},
830 {0x0276, "&"},
831 {0x0252, "Q"},
832 {0x028C, "V"},
833 {0x0264, "7"},
834 {0x026F, "M"},
835 {0x0268, "1"},
836 {0x0289, "}"},
837 {0x026A, "I"},
838 {0x028F, "Y"},
839 {0x028A, "U"},
840 {0x0259, "@"},
841 {0x0275, "8"},
842 {0x0250, "6"},
843 {0x00E6, "{"},
844 {0x025C, "3"},
845 {0x025A, "@`"},
846 {0x025E, "3\\\\"},
847 {0x0258, "@\\\\"},
848
849 /* Consonants (60) complete */
850 {0x0288, "t`"},
851 {0x0256, "d`"},
852 {0x025F, "J\\\\"},
853 {0x0261, "g"},
854 {0x0262, "G\\\\"},
855 {0x0294, "?"},
856 {0x0271, "F"},
857 {0x0273, "n`"},
858 {0x0272, "J"},
859 {0x014B, "N"},
860 {0x0274, "N\\\\"},
861 {0x0299, "B\\\\"},
862 {0x0280, "R\\\\"},
863 {0x027E, "4"},
864 {0x027D, "r`"},
865 {0x0278, "p\\\\"},
866 {0x03B2, "B"},
867 {0x03B8, "T"},
868 {0x00F0, "D"},
869 {0x0283, "S"},
870 {0x0292, "Z"},
871 {0x0282, "s`"},
872 {0x0290, "z`"},
873 {0x00E7, "C"},
874 {0x029D, "j\\\\"},
875 {0x0263, "G"},
876 {0x03C7, "X"},
877 {0x0281, "R"},
878 {0x0127, "X\\\\"},
879 {0x0295, "?\\\\"},
880 {0x0266, "h\\\\"},
881 {0x026C, "K"},
882 {0x026E, "K\\\\"},
883 {0x028B, "P"},
884 {0x0279, "r\\\\"},
885 {0x027B, "r\\\\'"},
886 {0x0270, "M\\\\"},
887 {0x026D, "l`"},
888 {0x028E, "L"},
889 {0x029F, "L\\\\"},
890 {0x0253, "b_<"},
891 {0x0257, "d_<"},
892 {0x0284, "J\\_<"},
893 {0x0260, "g_<"},
894 {0x029B, "G\\_<"},
895 {0x028D, "W"},
896 {0x0265, "H"},
897 {0x029C, "H\\\\"},
898 {0x02A1, ">\\\\"},
899 {0x02A2, "<\\\\"},
900 {0x0267, "x\\\\"}, /* hooktop heng */
901 {0x0298, "O\\\\"},
902 {0x01C0, "|\\\\"},
903 {0x01C3, "!\\\\"},
904 {0x01C2, "=\\"},
905 {0x01C1, "|\\|\\"},
906 {0x027A, "l\\\\"},
907 {0x0255, "s\\\\"},
908 {0x0291, "z\\\\"},
909 {0x026B, "l_G"},
910
911
912 /* Diacritics (37) complete */
913 {0x02BC, "_>"},
914 {0x0325, "_0"},
915 {0x030A, "_0"},
916 {0x032C, "_v"},
917 {0x02B0, "_h"},
918 {0x0324, "_t"},
919 {0x0330, "_k"},
920 {0x033C, "_N"},
921 {0x032A, "_d"},
922 {0x033A, "_a"},
923 {0x033B, "_m"},
924 {0x0339, "_O"},
925 {0x031C, "_c"},
926 {0x031F, "_+"},
927 {0x0320, "_-"},
928 {0x0308, "_\""}, /* centralized */
929 {0x033D, "_x"},
930 {0x0318, "_A"},
931 {0x0319, "_q"},
932 {0x02DE, "`"},
933 {0x02B7, "_w"},
934 {0x02B2, "_j"},
935 {0x02E0, "_G"},
936 {0x02E4, "_?\\\\"}, /* pharyngealized */
937 {0x0303, "~"}, /* nasalized */
938 {0x207F, "_n"},
939 {0x02E1, "_l"},
940 {0x031A, "_}"},
941 {0x0334, "_e"},
942 {0x031D, "_r"}, /* raised equivalent to 02D4 */
943 {0x02D4, "_r"}, /* raised equivalent to 031D */
944 {0x031E, "_o"}, /* lowered equivalent to 02D5 */
945 {0x02D5, "_o"}, /* lowered equivalent to 031E */
946 {0x0329, "="}, /* sylabic */
947 {0x032F, "_^"}, /* non-sylabic */
948 {0x0361, "_"}, /* top tie bar */
949 {0x035C, "_"},
950
951 /* Suprasegmental (15) incomplete */
952 {0x02C8, "\""}, /* primary stress */
953 {0x02CC, "%"}, /* secondary stress */
954 {0x02D0, ":"}, /* long */
955 {0x02D1, ":\\\\"}, /* half-long */
956 {0x0306, "_X"}, /* extra short */
957
958 {0x2016, "||"}, /* major group */
959 {0x203F, "-\\\\"}, /* bottom tie bar */
960 {0x2197, "<R>"}, /* global rise */
961 {0x2198, "<F>"}, /* global fall */
962 {0x2193, "<D>"}, /* downstep */
963 {0x2191, "<U>"}, /* upstep */
964 {0x02E5, "<T>"}, /* extra high level */
965 {0x02E7, "<M>"}, /* mid level */
966 {0x02E9, "<B>"}, /* extra low level */
967
968 {0x025D, "3`:"}, /* non-IPA %% */
969
970 /* Affricates (6) complete */
971 {0x02A3, "d_z"},
972 {0x02A4, "d_Z"},
973 {0x02A5, "d_z\\\\"},
974 {0x02A6, "t_s"},
975 {0x02A7, "t_S"},
976 {0x02A8, "t_s\\\\"}
977 };
978
979
CnvIPAPnt(const char16_t IPnt,char * XPnt)980 void CnvIPAPnt( const char16_t IPnt, char * XPnt )
981 {
982 char16_t ThisPnt = IPnt; /* local copy of single IPA codepoint */
983 int idx; /* index into table */
984
985 /* Convert an individual IPA codepoint.
986 A single IPA code could map to a string.
987 Search the table. If it is not found, use the same character.
988 Since most codepoints can be contained within 16 bits,
989 they are represented as wide chars. */
990 XPnt[0] = 0; /* clear the result string */
991
992 /* Search the table for the conversion. */
993 for (idx = 0; idx < phn_cnt; idx ++) { /* for each item in table */
994 if (IPnt == PhnAry[idx].strIPA) { /* matches IPA code */
995 strcat( XPnt, (const char *)&(PhnAry[idx].strXSAMPA) ); /* copy the XSAMPA string */
996 return;
997 }
998 }
999 strcat(XPnt, (const char *)&ThisPnt); /* just copy it */
1000 }
1001
1002
1003 /** cnvIpaToXsampa
1004 * Convert an IPA character string to an XSAMPA character string.
1005 * @ipaString - input IPA string to convert
1006 * @outXsampaString - converted XSAMPA string is passed back in this parameter
1007 * return size of the new string
1008 */
1009
cnvIpaToXsampa(const char16_t * ipaString,size_t ipaStringSize,char ** outXsampaString)1010 int cnvIpaToXsampa( const char16_t * ipaString, size_t ipaStringSize, char ** outXsampaString )
1011 {
1012 size_t xsize; /* size of result */
1013 size_t ipidx; /* index into IPA string */
1014 char * XPnt; /* short XSAMPA char sequence */
1015
1016 /* Convert an IPA string to an XSAMPA string and store the xsampa string in *outXsampaString.
1017 It is the responsibility of the caller to free the allocated string.
1018 Increment through the string. For each base & combination convert it to the XSAMP equivalent.
1019 Because of the XSAMPA limitations, not all IPA characters will be covered. */
1020 XPnt = (char *) malloc(6);
1021 xsize = (4 * ipaStringSize) + 8; /* assume more than double size */
1022 *outXsampaString = (char *) malloc( xsize );/* allocate return string */
1023 *outXsampaString[0] = 0;
1024 xsize = 0; /* clear final */
1025
1026 for (ipidx = 0; ipidx < ipaStringSize; ipidx ++) { /* for each IPA code */
1027 CnvIPAPnt( ipaString[ipidx], XPnt ); /* get converted character */
1028 strcat((char *)*outXsampaString, XPnt ); /* concatenate XSAMPA */
1029 }
1030 free(XPnt);
1031 xsize = strlen(*outXsampaString); /* get the final length */
1032 return xsize;
1033 }
1034
1035
1036 /* Google Engine API function implementations */
1037
1038 /** init
1039 * Allocates Pico memory block and initializes the Pico system.
1040 * synthDoneCBPtr - Pointer to callback function which will receive generated samples
1041 * config - the engine configuration parameters, here only contains the non-system path
1042 * for the lingware location
1043 * return tts_result
1044 */
init(synthDoneCB_t synthDoneCBPtr,const char * config)1045 tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr, const char *config )
1046 {
1047 if (synthDoneCBPtr == NULL) {
1048 ALOGE("Callback pointer is NULL");
1049 return TTS_FAILURE;
1050 }
1051
1052 picoMemArea = malloc( PICO_MEM_SIZE );
1053 if (!picoMemArea) {
1054 ALOGE("Failed to allocate memory for Pico system");
1055 return TTS_FAILURE;
1056 }
1057
1058 pico_Status ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem );
1059 if (PICO_OK != ret) {
1060 ALOGE("Failed to initialize Pico system");
1061 free( picoMemArea );
1062 picoMemArea = NULL;
1063 return TTS_FAILURE;
1064 }
1065
1066 picoSynthDoneCBPtr = synthDoneCBPtr;
1067
1068 picoCurrentLangIndex = -1;
1069
1070 // was the initialization given an alternative path for the lingware location?
1071 if ((config != NULL) && (strlen(config) > 0)) {
1072 pico_alt_lingware_path = (char*)malloc(strlen(config));
1073 strcpy((char*)pico_alt_lingware_path, config);
1074 ALOGV("Alternative lingware path %s", pico_alt_lingware_path);
1075 } else {
1076 pico_alt_lingware_path = (char*)malloc(strlen(PICO_LINGWARE_PATH) + 1);
1077 strcpy((char*)pico_alt_lingware_path, PICO_LINGWARE_PATH);
1078 ALOGV("Using predefined lingware path %s", pico_alt_lingware_path);
1079 }
1080
1081 return TTS_SUCCESS;
1082 }
1083
1084
1085 /** shutdown
1086 * Unloads all Pico resources; terminates Pico system and frees Pico memory block.
1087 * return tts_result
1088 */
shutdown(void)1089 tts_result TtsEngine::shutdown( void )
1090 {
1091 cleanResources();
1092
1093 if (picoSystem) {
1094 pico_terminate(&picoSystem);
1095 picoSystem = NULL;
1096 }
1097 if (picoMemArea) {
1098 free(picoMemArea);
1099 picoMemArea = NULL;
1100 }
1101
1102 cleanFiles();
1103 return TTS_SUCCESS;
1104 }
1105
1106
1107 /** loadLanguage
1108 * Load a new language.
1109 * @lang - string with ISO 3 letter language code.
1110 * @country - string with ISO 3 letter country code .
1111 * @variant - string with language variant for that language and country pair.
1112 * return tts_result
1113 */
loadLanguage(const char * lang,const char * country,const char * variant)1114 tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant)
1115 {
1116 return TTS_FAILURE;
1117 //return setProperty("language", value, size);
1118 }
1119
1120
1121 /** setLanguage
1122 * Load a new language (locale). Use the ISO 639-3 language codes.
1123 * @lang - string with ISO 639-3 language code.
1124 * @country - string with ISO 3 letter country code.
1125 * @variant - string with language variant for that language and country pair.
1126 * return tts_result
1127 */
setLanguage(const char * lang,const char * country,const char * variant)1128 tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant )
1129 {
1130 //ALOGI("TtsEngine::setLanguage %s %s %s", lang, country, variant);
1131 int langIndex;
1132 int countryIndex;
1133 int i;
1134
1135 if (lang == NULL)
1136 {
1137 ALOGE("TtsEngine::setLanguage called with NULL language");
1138 return TTS_FAILURE;
1139 }
1140
1141 /* We look for a match on the language first
1142 then we look for a match on the country.
1143 If no match on the language:
1144 return an error.
1145 If match on the language, but no match on the country:
1146 load the language found for the language match.
1147 If match on the language, and match on the country:
1148 load the language found for the country match. */
1149
1150 /* Find a match on the language. */
1151 langIndex = -1; /* no match */
1152 for (i = 0; i < picoNumSupportedVocs; i ++)
1153 {
1154 if (strcmp(lang, picoSupportedLangIso3[i]) == 0)
1155 {
1156 langIndex = i;
1157 break;
1158 }
1159 }
1160 if (langIndex < 0)
1161 {
1162 /* The language isn't supported. */
1163 ALOGE("TtsEngine::setLanguage called with unsupported language");
1164 return TTS_FAILURE;
1165 }
1166
1167 /* Find a match on the country, if there is one. */
1168 if (country != NULL)
1169 {
1170 countryIndex = -1;
1171 for (i = langIndex; i < picoNumSupportedVocs; i ++)
1172 {
1173 if ( (strcmp(lang, picoSupportedLangIso3[i]) == 0)
1174 && (strcmp(country, picoSupportedCountryIso3[i]) == 0))
1175 {
1176 countryIndex = i;
1177 break;
1178 }
1179 }
1180
1181 if (countryIndex < 0)
1182 {
1183 /* We didn't find a match on the country, but we had a match on the language.
1184 Use that language. */
1185 ALOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).",
1186 lang, country);
1187 }
1188 else
1189 {
1190 /* We have a match on both the language and the country. */
1191 langIndex = countryIndex;
1192 }
1193 }
1194
1195 return doLanguageSwitchFromLangIndex( langIndex ); /* switch the language */
1196 }
1197
1198
1199 /** isLanguageAvailable
1200 * Returns the level of support for a language.
1201 * @lang - string with ISO 3 letter language code.
1202 * @country - string with ISO 3 letter country code .
1203 * @variant - string with language variant for that language and country pair.
1204 * return tts_support_result
1205 */
isLanguageAvailable(const char * lang,const char * country,const char * variant)1206 tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country,
1207 const char *variant) {
1208 int langIndex = -1;
1209 int countryIndex = -1;
1210 //-------------------------
1211 // language matching
1212 // if no language specified
1213 if (lang == NULL) {
1214 ALOGE("TtsEngine::isLanguageAvailable called with no language");
1215 return TTS_LANG_NOT_SUPPORTED;
1216 }
1217
1218 // find a match on the language
1219 for (int i = 0; i < picoNumSupportedVocs; i++)
1220 {
1221 if (strcmp(lang, picoSupportedLangIso3[i]) == 0) {
1222 langIndex = i;
1223 break;
1224 }
1225 }
1226 if (langIndex < 0) {
1227 // language isn't supported
1228 ALOGV("TtsEngine::isLanguageAvailable called with unsupported language");
1229 return TTS_LANG_NOT_SUPPORTED;
1230 }
1231
1232 //-------------------------
1233 // country matching
1234 // if no country specified
1235 if ((country == NULL) || (strlen(country) == 0)) {
1236 // check installation of matched language
1237 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA);
1238 }
1239
1240 // find a match on the country
1241 for (int i = langIndex; i < picoNumSupportedVocs; i++) {
1242 if ((strcmp(lang, picoSupportedLangIso3[i]) == 0)
1243 && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) {
1244 countryIndex = i;
1245 break;
1246 }
1247 }
1248 if (countryIndex < 0) {
1249 // we didn't find a match on the country, but we had a match on the language
1250 // check installation of matched language
1251 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA);
1252 } else {
1253 // we have a match on the language and the country
1254 langIndex = countryIndex;
1255 // check installation of matched language + country
1256 return (hasResourcesForLanguage(langIndex) ? TTS_LANG_COUNTRY_AVAILABLE : TTS_LANG_MISSING_DATA);
1257 }
1258
1259 // no variants supported in this library, TTS_LANG_COUNTRY_VAR_AVAILABLE cannot be returned.
1260 }
1261
1262
1263 /** getLanguage
1264 * Get the currently loaded language - if any.
1265 * @lang - string with current ISO 3 letter language code, empty string if no loaded language.
1266 * @country - string with current ISO 3 letter country code, empty string if no loaded language.
1267 * @variant - string with current language variant, empty string if no loaded language.
1268 * return tts_result
1269 */
getLanguage(char * language,char * country,char * variant)1270 tts_result TtsEngine::getLanguage(char *language, char *country, char *variant)
1271 {
1272 if (picoCurrentLangIndex == -1) {
1273 strcpy(language, "\0");
1274 strcpy(country, "\0");
1275 strcpy(variant, "\0");
1276 } else {
1277 strcpy(language, picoSupportedLangIso3[picoCurrentLangIndex]);
1278 strcpy(country, picoSupportedCountryIso3[picoCurrentLangIndex]);
1279 // no variant in this implementation
1280 strcpy(variant, "\0");
1281 }
1282 return TTS_SUCCESS;
1283 }
1284
1285
1286 /** setAudioFormat
1287 * sets the audio format to use for synthesis, returns what is actually used.
1288 * @encoding - reference to encoding format
1289 * @rate - reference to sample rate
1290 * @channels - reference to number of channels
1291 * return tts_result
1292 * */
setAudioFormat(tts_audio_format & encoding,uint32_t & rate,int & channels)1293 tts_result TtsEngine::setAudioFormat(tts_audio_format& encoding, uint32_t& rate,
1294 int& channels)
1295 {
1296 // ignore the input parameters, the enforced audio parameters are fixed here
1297 encoding = TTS_AUDIO_FORMAT_PCM_16_BIT;
1298 rate = 16000;
1299 channels = 1;
1300 return TTS_SUCCESS;
1301 }
1302
1303
1304 /** setProperty
1305 * Set property. The supported properties are: language, rate, pitch and volume.
1306 * @property - name of property to set
1307 * @value - value to set
1308 * @size - size of value
1309 * return tts_result
1310 */
setProperty(const char * property,const char * value,const size_t size)1311 tts_result TtsEngine::setProperty( const char * property, const char * value, const size_t size )
1312 {
1313 int rate;
1314 int pitch;
1315 int volume;
1316
1317 /* Set a specific property for the engine.
1318 Supported properties include: language (locale), rate, pitch, volume. */
1319 /* Sanity check */
1320 if (property == NULL) {
1321 ALOGE("setProperty called with property NULL");
1322 return TTS_PROPERTY_UNSUPPORTED;
1323 }
1324
1325 if (value == NULL) {
1326 ALOGE("setProperty called with value NULL");
1327 return TTS_VALUE_INVALID;
1328 }
1329
1330 if (strncmp(property, "language", 8) == 0) {
1331 /* Verify it's in correct format. */
1332 if (strlen(value) != 2 && strlen(value) != 6) {
1333 ALOGE("change language called with incorrect format");
1334 return TTS_VALUE_INVALID;
1335 }
1336
1337 /* Try to switch to specified language. */
1338 if (doLanguageSwitch(value) == TTS_FAILURE) {
1339 ALOGE("failed to load language");
1340 return TTS_FAILURE;
1341 } else {
1342 return TTS_SUCCESS;
1343 }
1344 } else if (strncmp(property, "rate", 4) == 0) {
1345 rate = atoi(value);
1346 if (rate < PICO_MIN_RATE) {
1347 rate = PICO_MIN_RATE;
1348 }
1349 if (rate > PICO_MAX_RATE) {
1350 rate = PICO_MAX_RATE;
1351 }
1352 picoProp_currRate = rate;
1353 return TTS_SUCCESS;
1354 } else if (strncmp(property, "pitch", 5) == 0) {
1355 pitch = atoi(value);
1356 if (pitch < PICO_MIN_PITCH) {
1357 pitch = PICO_MIN_PITCH;
1358 }
1359 if (pitch > PICO_MAX_PITCH) {
1360 pitch = PICO_MAX_PITCH;
1361 }
1362 picoProp_currPitch = pitch;
1363 return TTS_SUCCESS;
1364 } else if (strncmp(property, "volume", 6) == 0) {
1365 volume = atoi(value);
1366 if (volume < PICO_MIN_VOLUME) {
1367 volume = PICO_MIN_VOLUME;
1368 }
1369 if (volume > PICO_MAX_VOLUME) {
1370 volume = PICO_MAX_VOLUME;
1371 }
1372 picoProp_currVolume = volume;
1373 return TTS_SUCCESS;
1374 }
1375
1376 return TTS_PROPERTY_UNSUPPORTED;
1377 }
1378
1379
1380 /** getProperty
1381 * Get the property. Supported properties are: language, rate, pitch and volume.
1382 * @property - name of property to get
1383 * @value - buffer which will receive value of property
1384 * @iosize - size of value - if size is too small on return this will contain actual size needed
1385 * return tts_result
1386 */
getProperty(const char * property,char * value,size_t * iosize)1387 tts_result TtsEngine::getProperty( const char * property, char * value, size_t * iosize )
1388 {
1389 /* Get the property for the engine.
1390 This property was previously set by setProperty or by default. */
1391 /* sanity check */
1392 if (property == NULL) {
1393 ALOGE("getProperty called with property NULL");
1394 return TTS_PROPERTY_UNSUPPORTED;
1395 }
1396
1397 if (value == NULL) {
1398 ALOGE("getProperty called with value NULL");
1399 return TTS_VALUE_INVALID;
1400 }
1401
1402 if (strncmp(property, "language", 8) == 0) {
1403 if (picoProp_currLang == NULL) {
1404 strcpy(value, "");
1405 } else {
1406 if (*iosize < strlen(picoProp_currLang)+1) {
1407 *iosize = strlen(picoProp_currLang) + 1;
1408 return TTS_PROPERTY_SIZE_TOO_SMALL;
1409 }
1410 strcpy(value, picoProp_currLang);
1411 }
1412 return TTS_SUCCESS;
1413 } else if (strncmp(property, "rate", 4) == 0) {
1414 char tmprate[4];
1415 sprintf(tmprate, "%d", picoProp_currRate);
1416 if (*iosize < strlen(tmprate)+1) {
1417 *iosize = strlen(tmprate) + 1;
1418 return TTS_PROPERTY_SIZE_TOO_SMALL;
1419 }
1420 strcpy(value, tmprate);
1421 return TTS_SUCCESS;
1422 } else if (strncmp(property, "pitch", 5) == 0) {
1423 char tmppitch[4];
1424 sprintf(tmppitch, "%d", picoProp_currPitch);
1425 if (*iosize < strlen(tmppitch)+1) {
1426 *iosize = strlen(tmppitch) + 1;
1427 return TTS_PROPERTY_SIZE_TOO_SMALL;
1428 }
1429 strcpy(value, tmppitch);
1430 return TTS_SUCCESS;
1431 } else if (strncmp(property, "volume", 6) == 0) {
1432 char tmpvol[4];
1433 sprintf(tmpvol, "%d", picoProp_currVolume);
1434 if (*iosize < strlen(tmpvol)+1) {
1435 *iosize = strlen(tmpvol) + 1;
1436 return TTS_PROPERTY_SIZE_TOO_SMALL;
1437 }
1438 strcpy(value, tmpvol);
1439 return TTS_SUCCESS;
1440 }
1441
1442 /* Unknown property */
1443 ALOGE("Unsupported property");
1444 return TTS_PROPERTY_UNSUPPORTED;
1445 }
1446
1447
1448 /** synthesizeText
1449 * Synthesizes a text string.
1450 * The text string could be annotated with SSML tags.
1451 * @text - text to synthesize
1452 * @buffer - buffer which will receive generated samples
1453 * @bufferSize - size of buffer
1454 * @userdata - pointer to user data which will be passed back to callback function
1455 * return tts_result
1456 */
synthesizeText(const char * text,int8_t * buffer,size_t bufferSize,void * userdata)1457 tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata )
1458 {
1459 int err;
1460 int cbret;
1461 pico_Char * inp = NULL;
1462 char * expanded_text = NULL;
1463 pico_Char * local_text = NULL;
1464 short outbuf[MAX_OUTBUF_SIZE/2];
1465 pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type;
1466 pico_Status ret;
1467 SvoxSsmlParser * parser = NULL;
1468
1469 picoSynthAbort = 0;
1470 if (text == NULL) {
1471 ALOGE("synthesizeText called with NULL string");
1472 return TTS_FAILURE;
1473 }
1474
1475 if (strlen(text) == 0) {
1476 return TTS_SUCCESS;
1477 }
1478
1479 if (buffer == NULL) {
1480 ALOGE("synthesizeText called with NULL buffer");
1481 return TTS_FAILURE;
1482 }
1483
1484 if ( (strncmp(text, "<speak", 6) == 0) || (strncmp(text, "<?xml", 5) == 0) ) {
1485 /* SSML input */
1486 parser = new SvoxSsmlParser();
1487 if (parser && parser->initSuccessful()) {
1488 err = parser->parseDocument(text, 1);
1489 if (err == XML_STATUS_ERROR) {
1490 /* Note: for some reason expat always thinks the input document has an error
1491 at the end, even when the XML document is perfectly formed */
1492 ALOGI("Warning: SSML document parsed with errors");
1493 }
1494 char * parsed_text = parser->getParsedDocument();
1495 if (parsed_text) {
1496 /* Add property tags to the string - if any. */
1497 local_text = (pico_Char *) doAddProperties( parsed_text );
1498 if (!local_text) {
1499 ALOGE("Failed to allocate memory for text string");
1500 delete parser;
1501 return TTS_FAILURE;
1502 }
1503 char * lang = parser->getParsedDocumentLanguage();
1504 if (lang != NULL) {
1505 if (doLanguageSwitch(lang) == TTS_FAILURE) {
1506 ALOGE("Failed to switch to language (%s) specified in SSML document.", lang);
1507 delete parser;
1508 return TTS_FAILURE;
1509 }
1510 } else {
1511 // lang is NULL, pick a language so the synthesis can be performed
1512 if (picoCurrentLangIndex == -1) {
1513 // no current language loaded, pick the first one and load it
1514 if (doLanguageSwitchFromLangIndex(0) == TTS_FAILURE) {
1515 ALOGE("Failed to switch to default language.");
1516 delete parser;
1517 return TTS_FAILURE;
1518 }
1519 }
1520 //ALOGI("No language in SSML, using current language (%s).", picoProp_currLang);
1521 }
1522 delete parser;
1523 } else {
1524 ALOGE("Failed to parse SSML document");
1525 delete parser;
1526 return TTS_FAILURE;
1527 }
1528 } else {
1529 ALOGE("Failed to create SSML parser");
1530 if (parser) {
1531 delete parser;
1532 }
1533 return TTS_FAILURE;
1534 }
1535 } else {
1536 /* camelCase pre-processing */
1537 expanded_text = doCamelCase(text);
1538 /* Add property tags to the string - if any. */
1539 local_text = (pico_Char *) doAddProperties( expanded_text );
1540 if (expanded_text) {
1541 free( expanded_text );
1542 }
1543 if (!local_text) {
1544 ALOGE("Failed to allocate memory for text string");
1545 return TTS_FAILURE;
1546 }
1547 }
1548
1549 text_remaining = strlen((const char *) local_text) + 1;
1550
1551 inp = (pico_Char *) local_text;
1552
1553 size_t bufused = 0;
1554
1555 /* synthesis loop */
1556 while (text_remaining) {
1557 if (picoSynthAbort) {
1558 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1559 break;
1560 }
1561
1562 /* Feed the text into the engine. */
1563 ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent );
1564 if (ret != PICO_OK) {
1565 ALOGE("Error synthesizing string '%s': [%d]", text, ret);
1566 if (local_text) {
1567 free( local_text );
1568 }
1569 return TTS_FAILURE;
1570 }
1571
1572 text_remaining -= bytes_sent;
1573 inp += bytes_sent;
1574 do {
1575 if (picoSynthAbort) {
1576 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1577 break;
1578 }
1579 /* Retrieve the samples and add them to the buffer. */
1580 ret = pico_getData( picoEngine, (void *) outbuf, MAX_OUTBUF_SIZE, &bytes_recv,
1581 &out_data_type );
1582 if (bytes_recv) {
1583 if ((bufused + bytes_recv) <= bufferSize) {
1584 memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv);
1585 bufused += bytes_recv;
1586 } else {
1587 /* The buffer filled; pass this on to the callback function. */
1588 cbret = picoSynthDoneCBPtr(userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer,
1589 bufused, TTS_SYNTH_PENDING);
1590 if (cbret == TTS_CALLBACK_HALT) {
1591 ALOGI("Halt requested by caller. Halting.");
1592 picoSynthAbort = 1;
1593 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1594 break;
1595 }
1596 bufused = 0;
1597 memcpy(buffer, (int8_t *) outbuf, bytes_recv);
1598 bufused += bytes_recv;
1599 }
1600 }
1601 } while (PICO_STEP_BUSY == ret);
1602
1603 /* This chunk of synthesis is finished; pass the remaining samples.
1604 Use 16 KHz, 16-bit samples. */
1605 if (!picoSynthAbort) {
1606 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1607 TTS_SYNTH_PENDING);
1608 }
1609 picoSynthAbort = 0;
1610
1611 if (ret != PICO_STEP_IDLE) {
1612 if (ret != 0){
1613 ALOGE("Error occurred during synthesis [%d]", ret);
1614 }
1615 if (local_text) {
1616 free(local_text);
1617 }
1618 ALOGV("Synth loop: sending TTS_SYNTH_DONE after error");
1619 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1620 TTS_SYNTH_DONE);
1621 pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1622 return TTS_FAILURE;
1623 }
1624 }
1625
1626 /* Synthesis is done; notify the caller */
1627 ALOGV("Synth loop: sending TTS_SYNTH_DONE after all done, or was asked to stop");
1628 picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1629 TTS_SYNTH_DONE);
1630
1631 if (local_text) {
1632 free( local_text );
1633 }
1634 return TTS_SUCCESS;
1635 }
1636
1637
1638
1639 /** stop
1640 * Aborts the running synthesis.
1641 * return tts_result
1642 */
stop(void)1643 tts_result TtsEngine::stop( void )
1644 {
1645 picoSynthAbort = 1;
1646 return TTS_SUCCESS;
1647 }
1648
1649
1650 #ifdef __cplusplus
1651 extern "C" {
1652 #endif
1653
getTtsEngine(void)1654 TtsEngine * getTtsEngine( void )
1655 {
1656 return new TtsEngine();
1657 }
1658
1659 #ifdef __cplusplus
1660 }
1661 #endif
1662