1 /* com_svox_picottsengine.cpp
2 
3  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *   This is the Manager layer.  It sits on top of the native Pico engine
18  *   and provides the interface to the defined Google TTS engine API.
19  *   The Google engine API is the boundary to allow a TTS engine to be swapped.
20  *   The Manager layer also provide the SSML tag interpretation.
21  *   The supported SSML tags are mapped to corresponding tags natively supported by Pico.
22  *   Native Pico functions always begin with picoXXX.
23  *
24  *   In the Pico engine, the language cannot be changed indpendently of the voice.
25  *   If either the voice or locale/language are changed, a new resource is loaded.
26  *
27  *   Only a subset of SSML 1.0 tags are supported.
28  *   Some SSML tags involve significant complexity.
29  *   If the language is changed through an SSML tag, there is a latency for the load.
30  *
31  */
32 //#define LOG_NDEBUG 0
33 
34 #include <stdio.h>
35 #include <unistd.h>
36 #include <stdlib.h>
37 
38 #define LOG_TAG "SVOX Pico Engine"
39 
40 #include <utils/Log.h>
41 #include <utils/String16.h>                     /* for strlen16 */
42 #include <android_runtime/AndroidRuntime.h>
43 #include <TtsEngine.h>
44 
45 #include <cutils/jstring.h>
46 #include <picoapi.h>
47 #include <picodefs.h>
48 
49 #include "svox_ssml_parser.h"
50 
51 using namespace android;
52 
53 /* adaptation layer defines */
54 #define PICO_MEM_SIZE       2500000
55 /* speaking rate    */
56 #define PICO_MIN_RATE        20
57 #define PICO_MAX_RATE       500
58 #define PICO_DEF_RATE       100
59 /* speaking pitch   */
60 #define PICO_MIN_PITCH       50
61 #define PICO_MAX_PITCH      200
62 #define PICO_DEF_PITCH      100
63 /* speaking volume  */
64 #define PICO_MIN_VOLUME       0
65 #define PICO_MAX_VOLUME     500
66 #define PICO_DEF_VOLUME     100
67 
68 /* string constants */
69 #define MAX_OUTBUF_SIZE     128
70 const char * PICO_SYSTEM_LINGWARE_PATH      = "/system/tts/lang_pico/";
71 const char * PICO_LINGWARE_PATH             = "/sdcard/svox/";
72 const char * PICO_VOICE_NAME                = "PicoVoice";
73 const char * PICO_SPEED_OPEN_TAG            = "<speed level='%d'>";
74 const char * PICO_SPEED_CLOSE_TAG           = "</speed>";
75 const char * PICO_PITCH_OPEN_TAG            = "<pitch level='%d'>";
76 const char * PICO_PITCH_CLOSE_TAG           = "</pitch>";
77 const char * PICO_VOLUME_OPEN_TAG           = "<volume level='%d'>";
78 const char * PICO_VOLUME_CLOSE_TAG          = "</volume>";
79 const char * PICO_PHONEME_OPEN_TAG          = "<phoneme ph='";
80 const char * PICO_PHONEME_CLOSE_TAG         = "'/>";
81 
82 /* supported voices
83    Pico does not seperately specify the voice and locale.   */
84 const char * picoSupportedLangIso3[]        = { "eng",              "eng",              "deu",              "spa",              "fra",              "ita" };
85 const char * picoSupportedCountryIso3[]     = { "USA",              "GBR",              "DEU",              "ESP",              "FRA",              "ITA" };
86 const char * picoSupportedLang[]            = { "en-US",            "en-GB",            "de-DE",            "es-ES",            "fr-FR",            "it-IT" };
87 const char * picoInternalLang[]             = { "en-US",            "en-GB",            "de-DE",            "es-ES",            "fr-FR",            "it-IT" };
88 const char * picoInternalTaLingware[]       = { "en-US_ta.bin",     "en-GB_ta.bin",     "de-DE_ta.bin",     "es-ES_ta.bin",     "fr-FR_ta.bin",     "it-IT_ta.bin" };
89 const char * picoInternalSgLingware[]       = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" };
90 const char * picoInternalUtppLingware[]     = { "en-US_utpp.bin",   "en-GB_utpp.bin",   "de-DE_utpp.bin",   "es-ES_utpp.bin",   "fr-FR_utpp.bin",   "it-IT_utpp.bin" };
91 const int picoNumSupportedVocs              = 6;
92 
93 /* supported properties */
94 const char * picoSupportedProperties[]      = { "language", "rate", "pitch", "volume" };
95 const int    picoNumSupportedProperties     = 4;
96 
97 
98 /* adapation layer global variables */
99 synthDoneCB_t * picoSynthDoneCBPtr;
100 void *          picoMemArea         = NULL;
101 pico_System     picoSystem          = NULL;
102 pico_Resource   picoTaResource      = NULL;
103 pico_Resource   picoSgResource      = NULL;
104 pico_Resource   picoUtppResource    = NULL;
105 pico_Engine     picoEngine          = NULL;
106 pico_Char *     picoTaFileName      = NULL;
107 pico_Char *     picoSgFileName      = NULL;
108 pico_Char *     picoUtppFileName    = NULL;
109 pico_Char *     picoTaResourceName  = NULL;
110 pico_Char *     picoSgResourceName  = NULL;
111 pico_Char *     picoUtppResourceName = NULL;
112 int     picoSynthAbort = 0;
113 char *  picoProp_currLang   = NULL;                 /* current language */
114 int     picoProp_currRate   = PICO_DEF_RATE;        /* current rate     */
115 int     picoProp_currPitch  = PICO_DEF_PITCH;       /* current pitch    */
116 int     picoProp_currVolume = PICO_DEF_VOLUME;      /* current volume   */
117 
118 int picoCurrentLangIndex = -1;
119 
120 char * pico_alt_lingware_path = NULL;
121 
122 
123 /* internal helper functions */
124 
125 /** checkForLocale
126  *  Check whether the requested locale is among the supported locales.
127  *  @locale -  the locale to check, either in xx or xx-YY format
128  *  return index of the locale, or -1 if not supported.
129 */
checkForLocale(const char * locale)130 static int checkForLocale( const char * locale )
131 {
132      int found = -1;                                         /* language not found   */
133      int i;
134      if (locale == NULL) {
135         ALOGE("checkForLocale called with NULL language");
136         return found;
137      }
138 
139     /* Verify that the requested locale is a locale that we support.    */
140     for (i = 0; i < picoNumSupportedVocs; i ++) {
141         if (strcmp(locale, picoSupportedLang[i]) == 0) { /* in array */
142             found = i;
143             break;
144         }
145     };
146 
147     /* The exact locale was not found.    */
148     if (found < 0) {
149         /* We didn't find an exact match; it may have been specified with only the first 2 characters.
150            This could overmatch ISO 639-3 language codes.%%                                   */
151 
152         /* check whether the current language matches the locale's language */
153         if ((picoCurrentLangIndex > -1) &&
154                 (strncmp(locale, picoSupportedLang[picoCurrentLangIndex], 2) == 0)) {
155             /* the current language matches the requested language, let's use it */
156             found = picoCurrentLangIndex;
157         } else {
158             /* check whether we can find a match at least on the language */
159             for (i = 0; i < picoNumSupportedVocs; i ++) {
160                 if (strncmp(locale, picoSupportedLang[i], 2) == 0) {
161                     found = i;
162                     break;
163                 }
164             }
165         }
166 
167         if (found < 0) {
168             ALOGE("TtsEngine::set language called with unsupported locale %s", locale);
169         }
170     };
171     return found;
172 }
173 
174 
175 /** cleanResources
176  *  Unloads any loaded Pico resources.
177 */
cleanResources(void)178 static void cleanResources( void )
179 {
180     if (picoEngine) {
181         pico_disposeEngine( picoSystem, &picoEngine );
182         pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME );
183         picoEngine = NULL;
184     }
185     if (picoUtppResource) {
186         pico_unloadResource( picoSystem, &picoUtppResource );
187         picoUtppResource = NULL;
188     }
189     if (picoTaResource) {
190         pico_unloadResource( picoSystem, &picoTaResource );
191         picoTaResource = NULL;
192     }
193     if (picoSgResource) {
194         pico_unloadResource( picoSystem, &picoSgResource );
195         picoSgResource = NULL;
196     }
197 
198     if (picoSystem) {
199         pico_terminate(&picoSystem);
200         picoSystem = NULL;
201     }
202     picoCurrentLangIndex = -1;
203 }
204 
205 
206 /** cleanFiles
207  *  Frees any memory allocated for file and resource strings.
208 */
cleanFiles(void)209 static void cleanFiles( void )
210 {
211     if (picoProp_currLang) {
212         free( picoProp_currLang );
213         picoProp_currLang = NULL;
214     }
215 
216     if (picoTaFileName) {
217         free( picoTaFileName );
218         picoTaFileName = NULL;
219     }
220 
221     if (picoSgFileName) {
222         free( picoSgFileName );
223         picoSgFileName = NULL;
224     }
225 
226     if (picoUtppFileName) {
227         free( picoUtppFileName );
228         picoUtppFileName = NULL;
229     }
230 
231     if (picoTaResourceName) {
232         free( picoTaResourceName );
233         picoTaResourceName = NULL;
234     }
235 
236     if (picoSgResourceName) {
237         free( picoSgResourceName );
238         picoSgResourceName = NULL;
239     }
240 
241     if (picoUtppResourceName) {
242         free( picoUtppResourceName );
243         picoUtppResourceName = NULL;
244     }
245 }
246 
247 /** hasResourcesForLanguage
248  *  Check to see if the resources required to load the language at the specified index
249  *  are properly installed
250  *  @langIndex - the index of the language to check the resources for. The index is valid.
251  *  return true if the required resources are installed, false otherwise
252  */
hasResourcesForLanguage(int langIndex)253 static bool hasResourcesForLanguage(int langIndex) {
254     FILE * pFile;
255     char* fileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
256 
257     /* check resources on system (under PICO_SYSTEM_LINGWARE_PATH). */
258     strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH);
259     strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]);
260     pFile = fopen(fileName, "r");
261     if (pFile != NULL) {
262         /* "ta" file found. */
263         fclose (pFile);
264         /* now look for "sg" file. */
265         strcpy((char*)fileName, PICO_SYSTEM_LINGWARE_PATH);
266         strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]);
267         pFile = fopen(fileName, "r");
268         if (pFile != NULL) {
269             /* "sg" file found, no need to continue checking, return success. */
270             fclose(pFile);
271             free(fileName);
272             return true;
273         }
274     }
275 
276     /* resources not found on system, check resources on alternative location */
277     /* (under pico_alt_lingware_path).                                            */
278     strcpy((char*)fileName, pico_alt_lingware_path);
279     strcat((char*)fileName, (const char*)picoInternalTaLingware[langIndex]);
280     pFile = fopen(fileName, "r");
281     if (pFile == NULL) {
282         free(fileName);
283         return false;
284     } else {
285         fclose (pFile);
286     }
287 
288     strcpy((char*)fileName, pico_alt_lingware_path);
289     strcat((char*)fileName, (const char*)picoInternalSgLingware[langIndex]);
290     pFile = fopen(fileName, "r");
291     if (pFile == NULL) {
292         free(fileName);
293         return false;
294     } else {
295         fclose(pFile);
296         free(fileName);
297         return true;
298     }
299 }
300 
301 /** doLanguageSwitchFromLangIndex
302  *  Switch to the requested locale.
303  *  If the locale is already loaded, it returns immediately.
304  *  If another locale is already is loaded, it will first be unloaded and the new one then loaded.
305  *  If no locale is loaded, the requested locale will be loaded.
306  *  @langIndex -  the index of the locale/voice to load, which is guaranteed to be supported.
307  *  return TTS_SUCCESS or TTS_FAILURE
308  */
doLanguageSwitchFromLangIndex(int langIndex)309 static tts_result doLanguageSwitchFromLangIndex( int langIndex )
310 {
311     int ret;                                        /* function result code */
312 
313     if (langIndex>=0) {
314         /* If we already have a loaded locale, check whether it is the same one as requested.   */
315         if (picoProp_currLang && (strcmp(picoProp_currLang, picoSupportedLang[langIndex]) == 0)) {
316             //ALOGI("Language already loaded (%s == %s)", picoProp_currLang,
317             //        picoSupportedLang[langIndex]);
318             return TTS_SUCCESS;
319         }
320     }
321 
322     /* It is not the same locale; unload the current one first. Also invalidates the system object*/
323     cleanResources();
324 
325     /* Allocate memory for file and resource names.     */
326     cleanFiles();
327 
328     if (picoSystem==NULL) {
329         /*re-init system object*/
330         ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem );
331         if (PICO_OK != ret) {
332             ALOGE("Failed to initialize the pico system object\n");
333             return TTS_FAILURE;
334         }
335     }
336 
337     picoProp_currLang   = (char *)      malloc( 10 );
338     picoTaFileName      = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
339     picoSgFileName      = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
340     picoUtppFileName    = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE );
341     picoTaResourceName  = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
342     picoSgResourceName  = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
343     picoUtppResourceName =(pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE );
344 
345     if (
346         (picoProp_currLang==NULL) || (picoTaFileName==NULL) || (picoSgFileName==NULL) ||
347         (picoUtppFileName==NULL) || (picoTaResourceName==NULL) || (picoSgResourceName==NULL) ||
348         (picoUtppResourceName==NULL)
349         ) {
350         ALOGE("Failed to allocate memory for internal strings\n");
351         cleanResources();
352         return TTS_FAILURE;
353     }
354 
355     /* Find where to load the resource files from: system or alternative location              */
356     /* based on availability of the Ta file. Try the alternative location first, this is where */
357     /* more recent language file updates would be installed (under pico_alt_lingware_path).        */
358     bool bUseSystemPath = true;
359     FILE * pFile;
360     char* tmpFileName = (char*)malloc(PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE);
361     strcpy((char*)tmpFileName, pico_alt_lingware_path);
362     strcat((char*)tmpFileName, (const char*)picoInternalTaLingware[langIndex]);
363     pFile = fopen(tmpFileName, "r");
364     if (pFile != NULL) {
365         /* "ta" file found under pico_alt_lingware_path, don't use the system path. */
366         fclose (pFile);
367         bUseSystemPath = false;
368     }
369     free(tmpFileName);
370 
371     /* Set the path and file names for resource files.  */
372     if (bUseSystemPath) {
373         strcpy((char *) picoTaFileName,   PICO_SYSTEM_LINGWARE_PATH);
374         strcpy((char *) picoSgFileName,   PICO_SYSTEM_LINGWARE_PATH);
375         strcpy((char *) picoUtppFileName, PICO_SYSTEM_LINGWARE_PATH);
376     } else {
377         strcpy((char *) picoTaFileName,   pico_alt_lingware_path);
378         strcpy((char *) picoSgFileName,   pico_alt_lingware_path);
379         strcpy((char *) picoUtppFileName, pico_alt_lingware_path);
380     }
381     strcat((char *) picoTaFileName,   (const char *) picoInternalTaLingware[langIndex]);
382     strcat((char *) picoSgFileName,   (const char *) picoInternalSgLingware[langIndex]);
383     strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]);
384 
385     /* Load the text analysis Lingware resource file.   */
386     ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource );
387     if (PICO_OK != ret) {
388         ALOGE("Failed to load textana resource for %s [%d]", picoSupportedLang[langIndex], ret);
389         cleanResources();
390         cleanFiles();
391         return TTS_FAILURE;
392     }
393 
394     /* Load the signal generation Lingware resource file.   */
395     ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource );
396     if (PICO_OK != ret) {
397         ALOGE("Failed to load siggen resource for %s [%d]", picoSupportedLang[langIndex], ret);
398         cleanResources();
399         cleanFiles();
400         return TTS_FAILURE;
401     }
402 
403     /* Load the utpp Lingware resource file if exists - NOTE: this file is optional
404        and is currently not used. Loading is only attempted for future compatibility.
405        If this file is not present the loading will still succeed.                      */
406     ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource );
407     if ((PICO_OK != ret) && (ret != PICO_EXC_CANT_OPEN_FILE)) {
408         ALOGE("Failed to load utpp resource for %s [%d]", picoSupportedLang[langIndex], ret);
409         cleanResources();
410         cleanFiles();
411         return TTS_FAILURE;
412     }
413 
414     /* Get the text analysis resource name.     */
415     ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName );
416     if (PICO_OK != ret) {
417         ALOGE("Failed to get textana resource name for %s [%d]", picoSupportedLang[langIndex], ret);
418         cleanResources();
419         cleanFiles();
420         return TTS_FAILURE;
421     }
422 
423     /* Get the signal generation resource name. */
424     ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName );
425     if ((PICO_OK == ret) && (picoUtppResource != NULL)) {
426         /* Get utpp resource name - optional: see note above.   */
427         ret = pico_getResourceName( picoSystem, picoUtppResource, (char *) picoUtppResourceName );
428         if (PICO_OK != ret)  {
429             ALOGE("Failed to get utpp resource name for %s [%d]", picoSupportedLang[langIndex], ret);
430             cleanResources();
431             cleanFiles();
432             return TTS_FAILURE;
433         }
434     }
435     if (PICO_OK != ret) {
436         ALOGE("Failed to get siggen resource name for %s [%d]", picoSupportedLang[langIndex], ret);
437         cleanResources();
438         cleanFiles();
439         return TTS_FAILURE;
440     }
441 
442     /* Create a voice definition.   */
443     ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME );
444     if (PICO_OK != ret) {
445         ALOGE("Failed to create voice for %s [%d]", picoSupportedLang[langIndex], ret);
446         cleanResources();
447         cleanFiles();
448         return TTS_FAILURE;
449     }
450 
451     /* Add the text analysis resource to the voice. */
452     ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName );
453     if (PICO_OK != ret) {
454         ALOGE("Failed to add textana resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
455         cleanResources();
456         cleanFiles();
457         return TTS_FAILURE;
458     }
459 
460     /* Add the signal generation resource to the voice. */
461     ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName );
462     if ((PICO_OK == ret) && (picoUtppResource != NULL)) {
463         /* Add utpp resource to voice - optional: see note above.   */
464         ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoUtppResourceName );
465         if (PICO_OK != ret) {
466             ALOGE("Failed to add utpp resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
467             cleanResources();
468             cleanFiles();
469             return TTS_FAILURE;
470         }
471     }
472 
473     if (PICO_OK != ret) {
474         ALOGE("Failed to add siggen resource to voice for %s [%d]", picoSupportedLang[langIndex], ret);
475         cleanResources();
476         cleanFiles();
477         return TTS_FAILURE;
478     }
479 
480     ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine );
481     if (PICO_OK != ret) {
482         ALOGE("Failed to create engine for %s [%d]", picoSupportedLang[langIndex], ret);
483         cleanResources();
484         cleanFiles();
485         return TTS_FAILURE;
486     }
487 
488     /* Set the current locale/voice.    */
489     strcpy( picoProp_currLang, picoSupportedLang[langIndex] );
490     picoCurrentLangIndex = langIndex;
491     ALOGI("loaded %s successfully", picoProp_currLang);
492     return TTS_SUCCESS;
493 }
494 
495 
496 /** doLanguageSwitch
497  *  Switch to the requested locale.
498  *  If this locale is already loaded, it returns immediately.
499  *  If another locale is already loaded, this will first be unloaded
500  *  and the new one then loaded.
501  *  If no locale is loaded, the requested will be loaded.
502  *  @locale -  the locale to check, either in xx or xx-YY format (i.e "en" or "en-US")
503  *  return TTS_SUCCESS or TTS_FAILURE
504 */
doLanguageSwitch(const char * locale)505 static tts_result doLanguageSwitch( const char * locale )
506 {
507     int loclIndex;                              /* locale index */
508 
509     /* Load the new locale. */
510     loclIndex = checkForLocale( locale );
511     if (loclIndex < 0)  {
512         ALOGE("Tried to swith to non-supported locale %s", locale);
513         return TTS_FAILURE;
514     }
515     //ALOGI("Found supported locale %s", picoSupportedLang[loclIndex]);
516     return doLanguageSwitchFromLangIndex( loclIndex );
517 }
518 
519 
520 /** doAddProperties
521  *  Add <speed>, <pitch> and <volume> tags to the text,
522  *  if the properties have been set to non-default values, and return the new string.
523  *  The calling function is responsible for freeing the returned string.
524  *  @str - text to apply tags to
525  *  return new string with tags applied
526 */
doAddProperties(const char * str)527 static char * doAddProperties( const char * str )
528 {
529     char *  data = NULL;
530     int     haspitch, hasspeed, hasvol;                 /* parameters           */
531     int     textlen;                                    /* property string length   */
532     haspitch = 0; hasspeed = 0; hasvol = 0;
533     textlen = strlen(str) + 1;
534     if (picoProp_currPitch != PICO_DEF_PITCH) {          /* non-default pitch    */
535         textlen += strlen(PICO_PITCH_OPEN_TAG) + 5;
536         textlen += strlen(PICO_PITCH_CLOSE_TAG);
537         haspitch = 1;
538     }
539     if (picoProp_currRate != PICO_DEF_RATE) {            /* non-default rate     */
540         textlen += strlen(PICO_SPEED_OPEN_TAG) + 5;
541         textlen += strlen(PICO_SPEED_CLOSE_TAG);
542         hasspeed = 1;
543     }
544 
545     if (picoProp_currVolume != PICO_DEF_VOLUME) {        /* non-default volume   */
546         textlen += strlen(PICO_VOLUME_OPEN_TAG) + 5;
547         textlen += strlen(PICO_VOLUME_CLOSE_TAG);
548         hasvol = 1;
549     }
550 
551     /* Compose the property strings.    */
552     data = (char *) malloc( textlen );                  /* allocate string      */
553     if (!data) {
554         return NULL;
555     }
556     memset(data, 0, textlen);                           /* clear it             */
557     if (haspitch) {
558         char* tmp = (char*)malloc(strlen(PICO_PITCH_OPEN_TAG) + strlen(PICO_PITCH_CLOSE_TAG) + 5);
559         sprintf(tmp, PICO_PITCH_OPEN_TAG, picoProp_currPitch);
560         strcat(data, tmp);
561         free(tmp);
562     }
563 
564     if (hasspeed) {
565         char* tmp = (char*)malloc(strlen(PICO_SPEED_OPEN_TAG) + strlen(PICO_SPEED_CLOSE_TAG) + 5);
566         sprintf(tmp, PICO_SPEED_OPEN_TAG, picoProp_currRate);
567         strcat(data, tmp);
568         free(tmp);
569     }
570 
571     if (hasvol) {
572         char* tmp = (char*)malloc(strlen(PICO_VOLUME_OPEN_TAG) + strlen(PICO_VOLUME_CLOSE_TAG) + 5);
573         sprintf(tmp, PICO_VOLUME_OPEN_TAG, picoProp_currVolume);
574         strcat(data, tmp);
575         free(tmp);
576     }
577 
578     strcat(data, str);
579     if (hasvol) {
580         strcat(data, PICO_VOLUME_CLOSE_TAG);
581     }
582 
583     if (hasspeed) {
584         strcat(data, PICO_SPEED_CLOSE_TAG);
585     }
586 
587     if (haspitch) {
588         strcat(data, PICO_PITCH_CLOSE_TAG);
589     }
590     return data;
591 }
592 
593 
594 /** get_tok
595  *  Searches for tokens in a string
596  *  @str - text to be processed
597  *  @pos - position of first character to be searched in str
598  *  @textlen - postion of last character to be searched
599  *  @tokstart - address of a variable to receive the start of the token found
600  *  @tokstart - address of a variable to receive the length of the token found
601  *  return : 1=token found, 0=token not found
602  *  notes : the token separator set could be enlarged adding characters in "seps"
603 */
get_tok(const char * str,int pos,int textlen,int * tokstart,int * toklen)604 static int  get_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen)
605 {
606     const char * seps = " ";
607 
608     /*look for start*/
609     while ((pos<textlen) && (strchr(seps,str[pos]) != NULL)) {
610         pos++;
611     }
612     if (pos == textlen) {
613         /*no characters != seps found whithin string*/
614         return 0;
615     }
616     *tokstart = pos;
617     /*look for end*/
618     while ((pos<textlen) && (strchr(seps,str[pos]) == NULL)) {
619         pos++;
620     }
621     *toklen = pos - *tokstart;
622     return 1;
623 }/*get_tok*/
624 
625 
626 /** get_sub_tok
627  *  Searches for subtokens in a token having a compound structure with camel case like "xxxYyyy"
628  *  @str - text to be processed
629  *  @pos - position of first character to be searched in str
630  *  @textlen - postion of last character to be searched in str
631  *  @tokstart - address of a variable to receive the start of the sub token found
632  *  @tokstart - address of a variable to receive the length of the sub token found
633  *  return : 1=sub token found, 0=sub token not found
634  *  notes : the sub token separator set could be enlarged adding characters in "seps"
635 */
get_sub_tok(const char * str,int pos,int textlen,int * tokstart,int * toklen)636 static int  get_sub_tok(const char * str , int pos, int textlen, int *tokstart, int *toklen) {
637 
638     const char * seps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
639 
640     if (pos == textlen) {
641         return 0;
642     }
643 
644     /*first char != space*/
645     *tokstart = pos;
646     /*finding first non separator*/
647     while ((pos < textlen) && (strchr(seps, str[pos]) != NULL)) {
648         pos++;
649     }
650     if (pos == textlen) {
651         /*characters all in seps found whithin string : return full token*/
652         *toklen = pos - *tokstart;
653         return 1;
654     }
655     /*pos should be pointing to first non seps and more chars are there*/
656     /*finding first separator*/
657     while ((pos < textlen) && (strchr(seps, str[pos]) == NULL)) {
658         pos++;
659     }
660     if (pos == textlen) {
661         /*transition non seps->seps not found : return full token*/
662         *toklen = pos - *tokstart;
663         return 1;
664     }
665     *toklen = pos - *tokstart;
666     return 1;
667 }/*get_sub_tok*/
668 
669 
670 /** doCamelCase
671  *  Searches for tokens having a compound structure with camel case and transforms them as follows :
672  *        "XxxxYyyy" -->> "Xxxx Yyyy",
673  *        "xxxYyyy"  -->> "xxx Yyyy",
674  *        "XXXYyyy"  -->> "XXXYyyy"
675  *        etc....
676  *  The calling function is responsible for freeing the returned string.
677  *  @str - text to be processed
678  *  return new string with text processed
679 */
doCamelCase(const char * str)680 static char * doCamelCase( const char * str )
681 {
682     int     textlen;             /* input string length   */
683     int     totlen;              /* output string length   */
684     int     tlen_2, nsubtok;     /* nuber of subtokens   */
685     int     toklen, tokstart;    /*legnth and start of generic token*/
686     int     stoklen, stokstart;  /*legnth and start of generic sub-token*/
687     int     pos, tokpos, outpos; /*postion of current char in input string and token and output*/
688     char    *data;               /*pointer of the returned string*/
689 
690     pos = 0;
691     tokpos = 0;
692     toklen = 0;
693     stoklen = 0;
694     tlen_2 = 0;
695     totlen = 0;
696 
697     textlen = strlen(str) + 1;
698 
699     /*counting characters after sub token splitting including spaces*/
700     //while ((pos<textlen) && (str[pos]!=0)) {
701     while (get_tok(str, pos, textlen, &tokstart, &toklen)) {
702         tokpos = tokstart;
703         tlen_2 = 0;
704         nsubtok = 0;
705         while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) {
706             totlen += stoklen;
707             tlen_2 += stoklen;
708             tokpos = stokstart + stoklen;
709             nsubtok += 1;
710         }
711         totlen += nsubtok;    /*add spaces between subtokens*/
712         pos = tokstart + tlen_2;
713     }
714     //}
715     /* Allocate the return string */
716 
717     data = (char *) malloc( totlen );                  /* allocate string      */
718     if (!data) {
719         return NULL;
720     }
721     memset(data, 0, totlen);                           /* clear it             */
722     outpos = 0;
723     pos = 0;
724     /*copying characters*/
725     //while ((pos<textlen) && (str[pos]!=0)) {
726     while (get_tok  (str, pos, textlen, &tokstart, &toklen)) {
727         tokpos = tokstart;
728         tlen_2 = 0;
729         nsubtok = 0;
730         while (get_sub_tok(str, tokpos, tokstart+toklen, &stokstart, &stoklen)) {
731             strncpy(&(data[outpos]), &(str[stokstart]), stoklen);
732             outpos += stoklen;
733             strncpy(&(data[outpos]), " ", 1);
734             tlen_2 += stoklen;
735             outpos += 1;
736             tokpos = stokstart + stoklen;
737         }
738         pos=tokstart+tlen_2;
739     }
740     //}
741     if (outpos == 0) {
742         outpos = 1;
743     }
744     data[outpos-1] = 0;
745     return data;
746 }/*doCamelCase*/
747 
748 
749 /** createPhonemeString
750  *  Wrap all individual words in <phoneme> tags.
751  *  The Pico <phoneme> tag only supports one word in each tag,
752  *  therefore they must be individually wrapped!
753  *  @xsampa - text to convert to Pico phomene string
754  *  @length - length of the input string
755  *  return new string with tags applied
756 */
createPhonemeString(const char * xsampa,int length)757 extern char * createPhonemeString( const char * xsampa, int length )
758 {
759     char *  convstring = NULL;
760     int     origStrLen = strlen(xsampa);
761     int     numWords   = 1;
762     int     start, totalLength, i, j;
763 
764     for (i = 0; i < origStrLen; i ++) {
765         if ((xsampa[i] == ' ') || (xsampa[i] == '#')) {
766             numWords ++;
767         }
768     }
769 
770     if (numWords == 1) {
771         convstring = new char[origStrLen + 17];
772         convstring[0] = '\0';
773         strcat(convstring, PICO_PHONEME_OPEN_TAG);
774         strcat(convstring, xsampa);
775         strcat(convstring, PICO_PHONEME_CLOSE_TAG);
776     } else {
777         char * words[numWords];
778         start = 0; totalLength = 0; i = 0; j = 0;
779         for (i=0, j=0; i < origStrLen; i++) {
780             if ((xsampa[i] == ' ') || (xsampa[i] == '#')) {
781                 words[j]    = new char[i+1-start+17];
782                 words[j][0] = '\0';
783                 strcat( words[j], PICO_PHONEME_OPEN_TAG);
784                 strncat(words[j], xsampa+start, i-start);
785                 strcat( words[j], PICO_PHONEME_CLOSE_TAG);
786                 start = i + 1;
787                 j++;
788                 totalLength += strlen(words[j-1]);
789             }
790         }
791         words[j]    = new char[i+1-start+17];
792         words[j][0] = '\0';
793         strcat(words[j], PICO_PHONEME_OPEN_TAG);
794         strcat(words[j], xsampa+start);
795         strcat(words[j], PICO_PHONEME_CLOSE_TAG);
796         totalLength += strlen(words[j]);
797         convstring = new char[totalLength + 1];
798         convstring[0] = '\0';
799         for (i=0 ; i < numWords ; i++) {
800             strcat(convstring, words[i]);
801             delete [] words[i];
802         }
803     }
804 
805     return convstring;
806 }
807 
808 /* The XSAMPA uses as many as 5 characters to represent a single IPA code.  */
809 typedef struct tagPhnArr
810 {
811     char16_t    strIPA;             /* IPA Unicode symbol       */
812     char        strXSAMPA[6];       /* SAMPA sequence           */
813 } PArr;
814 
815 #define phn_cnt (134+7)
816 
817 PArr    PhnAry[phn_cnt] = {
818 
819     /* XSAMPA conversion table
820 	   This maps a single IPA symbol to a sequence representing XSAMPA.
821        This relies upon a direct one-to-one correspondance
822        including diphthongs and affricates.						      */
823 
824     /* Vowels (23) complete     */
825     {0x025B,        "E"},
826     {0x0251,        "A"},
827     {0x0254,        "O"},
828     {0x00F8,        "2"},
829     {0x0153,        "9"},
830     {0x0276,        "&"},
831     {0x0252,        "Q"},
832     {0x028C,        "V"},
833     {0x0264,        "7"},
834     {0x026F,        "M"},
835     {0x0268,        "1"},
836     {0x0289,        "}"},
837     {0x026A,        "I"},
838     {0x028F,        "Y"},
839     {0x028A,        "U"},
840     {0x0259,        "@"},
841     {0x0275,        "8"},
842     {0x0250,        "6"},
843     {0x00E6,        "{"},
844     {0x025C,        "3"},
845     {0x025A,        "@`"},
846     {0x025E,        "3\\\\"},
847     {0x0258,        "@\\\\"},
848 
849     /* Consonants (60) complete */
850     {0x0288,        "t`"},
851     {0x0256,        "d`"},
852     {0x025F,        "J\\\\"},
853     {0x0261,        "g"},
854     {0x0262,        "G\\\\"},
855     {0x0294,        "?"},
856     {0x0271,        "F"},
857     {0x0273,        "n`"},
858     {0x0272,        "J"},
859     {0x014B,        "N"},
860     {0x0274,        "N\\\\"},
861     {0x0299,        "B\\\\"},
862     {0x0280,        "R\\\\"},
863     {0x027E,        "4"},
864     {0x027D,        "r`"},
865     {0x0278,        "p\\\\"},
866     {0x03B2,        "B"},
867     {0x03B8,        "T"},
868     {0x00F0,        "D"},
869     {0x0283,        "S"},
870     {0x0292,        "Z"},
871     {0x0282,        "s`"},
872     {0x0290,        "z`"},
873     {0x00E7,        "C"},
874     {0x029D,        "j\\\\"},
875     {0x0263,        "G"},
876     {0x03C7,        "X"},
877     {0x0281,        "R"},
878     {0x0127,        "X\\\\"},
879     {0x0295,        "?\\\\"},
880     {0x0266,        "h\\\\"},
881     {0x026C,        "K"},
882     {0x026E,        "K\\\\"},
883     {0x028B,        "P"},
884     {0x0279,        "r\\\\"},
885     {0x027B,        "r\\\\'"},
886     {0x0270,        "M\\\\"},
887     {0x026D,        "l`"},
888     {0x028E,        "L"},
889     {0x029F,        "L\\\\"},
890     {0x0253,        "b_<"},
891     {0x0257,        "d_<"},
892     {0x0284,        "J\\_<"},
893     {0x0260,        "g_<"},
894     {0x029B,        "G\\_<"},
895     {0x028D,        "W"},
896     {0x0265,        "H"},
897     {0x029C,        "H\\\\"},
898     {0x02A1,        ">\\\\"},
899     {0x02A2,        "<\\\\"},
900     {0x0267,        "x\\\\"},		/* hooktop heng	*/
901     {0x0298,        "O\\\\"},
902     {0x01C0,        "|\\\\"},
903     {0x01C3,        "!\\\\"},
904     {0x01C2,        "=\\"},
905     {0x01C1,        "|\\|\\"},
906     {0x027A,        "l\\\\"},
907     {0x0255,        "s\\\\"},
908     {0x0291,        "z\\\\"},
909     {0x026B,        "l_G"},
910 
911 
912     /* Diacritics (37) complete */
913     {0x02BC,        "_>"},
914     {0x0325,        "_0"},
915     {0x030A,        "_0"},
916     {0x032C,        "_v"},
917     {0x02B0,        "_h"},
918     {0x0324,        "_t"},
919     {0x0330,        "_k"},
920     {0x033C,        "_N"},
921     {0x032A,        "_d"},
922     {0x033A,        "_a"},
923     {0x033B,        "_m"},
924     {0x0339,        "_O"},
925     {0x031C,        "_c"},
926     {0x031F,        "_+"},
927     {0x0320,        "_-"},
928     {0x0308,        "_\""},     /* centralized		*/
929     {0x033D,        "_x"},
930     {0x0318,        "_A"},
931     {0x0319,        "_q"},
932     {0x02DE,        "`"},
933     {0x02B7,        "_w"},
934     {0x02B2,        "_j"},
935     {0x02E0,        "_G"},
936     {0x02E4,        "_?\\\\"},	/* pharyngealized	*/
937     {0x0303,        "~"},		/* nasalized		*/
938     {0x207F,        "_n"},
939     {0x02E1,        "_l"},
940     {0x031A,        "_}"},
941     {0x0334,        "_e"},
942     {0x031D,        "_r"},		/* raised  equivalent to 02D4 */
943     {0x02D4,        "_r"},		/* raised  equivalent to 031D */
944     {0x031E,        "_o"},		/* lowered equivalent to 02D5 */
945     {0x02D5,        "_o"},		/* lowered equivalent to 031E */
946     {0x0329,        "="},		/* sylabic			*/
947     {0x032F,        "_^"},		/* non-sylabic		*/
948     {0x0361,        "_"},		/* top tie bar		*/
949     {0x035C,        "_"},
950 
951     /* Suprasegmental (15) incomplete */
952     {0x02C8,        "\""},		/* primary   stress	*/
953     {0x02CC,        "%"},		/* secondary stress	*/
954     {0x02D0,        ":"},		/* long				*/
955     {0x02D1,        ":\\\\"},	/* half-long		*/
956     {0x0306,        "_X"},		/* extra short		*/
957 
958     {0x2016,        "||"},		/* major group		*/
959     {0x203F,        "-\\\\"},	/* bottom tie bar	*/
960     {0x2197,        "<R>"},		/* global rise		*/
961     {0x2198,        "<F>"},		/* global fall		*/
962     {0x2193,        "<D>"},		/* downstep			*/
963     {0x2191,        "<U>"},		/* upstep			*/
964     {0x02E5,        "<T>"},		/* extra high level	*/
965     {0x02E7,        "<M>"},		/* mid level		*/
966     {0x02E9,        "<B>"},		/* extra low level	*/
967 
968     {0x025D,        "3`:"},		/* non-IPA	%%		*/
969 
970     /* Affricates (6) complete  */
971     {0x02A3,        "d_z"},
972     {0x02A4,        "d_Z"},
973     {0x02A5,        "d_z\\\\"},
974     {0x02A6,        "t_s"},
975     {0x02A7,        "t_S"},
976     {0x02A8,        "t_s\\\\"}
977     };
978 
979 
CnvIPAPnt(const char16_t IPnt,char * XPnt)980 void CnvIPAPnt( const char16_t IPnt, char * XPnt )
981 {
982     char16_t        ThisPnt = IPnt;                     /* local copy of single IPA codepoint   */
983     int             idx;                                /* index into table         */
984 
985     /* Convert an individual IPA codepoint.
986        A single IPA code could map to a string.
987        Search the table.  If it is not found, use the same character.
988        Since most codepoints can be contained within 16 bits,
989        they are represented as wide chars.              */
990     XPnt[0] = 0;                                        /* clear the result string  */
991 
992     /* Search the table for the conversion. */
993     for (idx = 0; idx < phn_cnt; idx ++) {               /* for each item in table   */
994         if (IPnt == PhnAry[idx].strIPA) {                /* matches IPA code         */
995             strcat( XPnt, (const char *)&(PhnAry[idx].strXSAMPA) ); /* copy the XSAMPA string   */
996             return;
997         }
998     }
999     strcat(XPnt, (const char *)&ThisPnt);               /* just copy it             */
1000 }
1001 
1002 
1003 /** cnvIpaToXsampa
1004  *  Convert an IPA character string to an XSAMPA character string.
1005  *  @ipaString - input IPA string to convert
1006  *  @outXsampaString - converted XSAMPA string is passed back in this parameter
1007  *  return size of the new string
1008 */
1009 
cnvIpaToXsampa(const char16_t * ipaString,size_t ipaStringSize,char ** outXsampaString)1010 int cnvIpaToXsampa( const char16_t * ipaString, size_t ipaStringSize, char ** outXsampaString )
1011 {
1012     size_t xsize;                                  /* size of result               */
1013     size_t ipidx;                                  /* index into IPA string        */
1014     char * XPnt;                                   /* short XSAMPA char sequence   */
1015 
1016     /* Convert an IPA string to an XSAMPA string and store the xsampa string in *outXsampaString.
1017        It is the responsibility of the caller to free the allocated string.
1018        Increment through the string.  For each base & combination convert it to the XSAMP equivalent.
1019        Because of the XSAMPA limitations, not all IPA characters will be covered.       */
1020     XPnt = (char *) malloc(6);
1021     xsize   = (4 * ipaStringSize) + 8;          /* assume more than double size */
1022     *outXsampaString = (char *) malloc( xsize );/* allocate return string   */
1023     *outXsampaString[0] = 0;
1024     xsize = 0;                                  /* clear final              */
1025 
1026     for (ipidx = 0; ipidx < ipaStringSize; ipidx ++) { /* for each IPA code        */
1027         CnvIPAPnt( ipaString[ipidx], XPnt );           /* get converted character  */
1028         strcat((char *)*outXsampaString, XPnt );       /* concatenate XSAMPA       */
1029     }
1030     free(XPnt);
1031     xsize = strlen(*outXsampaString);                  /* get the final length     */
1032     return xsize;
1033 }
1034 
1035 
1036 /* Google Engine API function implementations */
1037 
1038 /** init
1039  *  Allocates Pico memory block and initializes the Pico system.
1040  *  synthDoneCBPtr - Pointer to callback function which will receive generated samples
1041  *  config - the engine configuration parameters, here only contains the non-system path
1042  *      for the lingware location
1043  *  return tts_result
1044 */
init(synthDoneCB_t synthDoneCBPtr,const char * config)1045 tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr, const char *config )
1046 {
1047     if (synthDoneCBPtr == NULL) {
1048         ALOGE("Callback pointer is NULL");
1049         return TTS_FAILURE;
1050     }
1051 
1052     picoMemArea = malloc( PICO_MEM_SIZE );
1053     if (!picoMemArea) {
1054         ALOGE("Failed to allocate memory for Pico system");
1055         return TTS_FAILURE;
1056     }
1057 
1058     pico_Status ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem );
1059     if (PICO_OK != ret) {
1060         ALOGE("Failed to initialize Pico system");
1061         free( picoMemArea );
1062         picoMemArea = NULL;
1063         return TTS_FAILURE;
1064     }
1065 
1066     picoSynthDoneCBPtr = synthDoneCBPtr;
1067 
1068     picoCurrentLangIndex = -1;
1069 
1070     // was the initialization given an alternative path for the lingware location?
1071     if ((config != NULL) && (strlen(config) > 0)) {
1072         pico_alt_lingware_path = (char*)malloc(strlen(config));
1073         strcpy((char*)pico_alt_lingware_path, config);
1074         ALOGV("Alternative lingware path %s", pico_alt_lingware_path);
1075     } else {
1076         pico_alt_lingware_path = (char*)malloc(strlen(PICO_LINGWARE_PATH) + 1);
1077         strcpy((char*)pico_alt_lingware_path, PICO_LINGWARE_PATH);
1078         ALOGV("Using predefined lingware path %s", pico_alt_lingware_path);
1079     }
1080 
1081     return TTS_SUCCESS;
1082 }
1083 
1084 
1085 /** shutdown
1086  *  Unloads all Pico resources; terminates Pico system and frees Pico memory block.
1087  *  return tts_result
1088 */
shutdown(void)1089 tts_result TtsEngine::shutdown( void )
1090 {
1091     cleanResources();
1092 
1093     if (picoSystem) {
1094         pico_terminate(&picoSystem);
1095         picoSystem = NULL;
1096     }
1097     if (picoMemArea) {
1098         free(picoMemArea);
1099         picoMemArea = NULL;
1100     }
1101 
1102     cleanFiles();
1103     return TTS_SUCCESS;
1104 }
1105 
1106 
1107 /** loadLanguage
1108  *  Load a new language.
1109  *  @lang - string with ISO 3 letter language code.
1110  *  @country - string with ISO 3 letter country code .
1111  *  @variant - string with language variant for that language and country pair.
1112  *  return tts_result
1113 */
loadLanguage(const char * lang,const char * country,const char * variant)1114 tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant)
1115 {
1116     return TTS_FAILURE;
1117     //return setProperty("language", value, size);
1118 }
1119 
1120 
1121 /** setLanguage
1122  *  Load a new language (locale).  Use the ISO 639-3 language codes.
1123  *  @lang - string with ISO 639-3 language code.
1124  *  @country - string with ISO 3 letter country code.
1125  *  @variant - string with language variant for that language and country pair.
1126  *  return tts_result
1127  */
setLanguage(const char * lang,const char * country,const char * variant)1128 tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant )
1129 {
1130     //ALOGI("TtsEngine::setLanguage %s %s %s", lang, country, variant);
1131     int langIndex;
1132     int countryIndex;
1133     int i;
1134 
1135     if (lang == NULL)
1136         {
1137         ALOGE("TtsEngine::setLanguage called with NULL language");
1138         return TTS_FAILURE;
1139         }
1140 
1141     /* We look for a match on the language first
1142        then we look for a match on the country.
1143        If no match on the language:
1144              return an error.
1145        If match on the language, but no match on the country:
1146              load the language found for the language match.
1147        If match on the language, and match on the country:
1148              load the language found for the country match.     */
1149 
1150     /* Find a match on the language.    */
1151     langIndex = -1;                                     /* no match */
1152     for (i = 0; i < picoNumSupportedVocs; i ++)
1153         {
1154         if (strcmp(lang, picoSupportedLangIso3[i]) == 0)
1155             {
1156             langIndex = i;
1157             break;
1158             }
1159         }
1160     if (langIndex < 0)
1161         {
1162         /* The language isn't supported.    */
1163         ALOGE("TtsEngine::setLanguage called with unsupported language");
1164         return TTS_FAILURE;
1165         }
1166 
1167     /* Find a match on the country, if there is one.    */
1168     if (country != NULL)
1169         {
1170         countryIndex = -1;
1171         for (i = langIndex; i < picoNumSupportedVocs; i ++)
1172             {
1173             if (   (strcmp(lang,    picoSupportedLangIso3[i])    == 0)
1174                 && (strcmp(country, picoSupportedCountryIso3[i]) == 0))
1175                 {
1176                 countryIndex = i;
1177                 break;
1178                 }
1179             }
1180 
1181         if (countryIndex < 0)
1182             {
1183             /* We didn't find a match on the country, but we had a match on the language.
1184                Use that language.                                                       */
1185             ALOGI("TtsEngine::setLanguage found matching language(%s) but not matching country(%s).",
1186                     lang, country);
1187             }
1188         else
1189             {
1190             /* We have a match on both the language and the country.    */
1191             langIndex = countryIndex;
1192             }
1193         }
1194 
1195     return doLanguageSwitchFromLangIndex( langIndex );      /* switch the language  */
1196 }
1197 
1198 
1199 /** isLanguageAvailable
1200  *  Returns the level of support for a language.
1201  *  @lang - string with ISO 3 letter language code.
1202  *  @country - string with ISO 3 letter country code .
1203  *  @variant - string with language variant for that language and country pair.
1204  *  return tts_support_result
1205 */
isLanguageAvailable(const char * lang,const char * country,const char * variant)1206 tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country,
1207             const char *variant) {
1208     int langIndex = -1;
1209     int countryIndex = -1;
1210     //-------------------------
1211     // language matching
1212     // if no language specified
1213     if (lang == NULL)  {
1214         ALOGE("TtsEngine::isLanguageAvailable called with no language");
1215         return TTS_LANG_NOT_SUPPORTED;
1216     }
1217 
1218     // find a match on the language
1219     for (int i = 0; i < picoNumSupportedVocs; i++)
1220     {
1221         if (strcmp(lang, picoSupportedLangIso3[i]) == 0) {
1222             langIndex = i;
1223             break;
1224         }
1225     }
1226     if (langIndex < 0) {
1227         // language isn't supported
1228         ALOGV("TtsEngine::isLanguageAvailable called with unsupported language");
1229         return TTS_LANG_NOT_SUPPORTED;
1230     }
1231 
1232     //-------------------------
1233     // country matching
1234     // if no country specified
1235     if ((country == NULL) || (strlen(country) == 0)) {
1236         // check installation of matched language
1237         return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA);
1238     }
1239 
1240     // find a match on the country
1241     for (int i = langIndex; i < picoNumSupportedVocs; i++) {
1242         if ((strcmp(lang, picoSupportedLangIso3[i]) == 0)
1243                 && (strcmp(country, picoSupportedCountryIso3[i]) == 0)) {
1244             countryIndex = i;
1245             break;
1246         }
1247     }
1248     if (countryIndex < 0)  {
1249         // we didn't find a match on the country, but we had a match on the language
1250         // check installation of matched language
1251         return (hasResourcesForLanguage(langIndex) ? TTS_LANG_AVAILABLE : TTS_LANG_MISSING_DATA);
1252     } else {
1253         // we have a match on the language and the country
1254         langIndex = countryIndex;
1255         // check installation of matched language + country
1256         return (hasResourcesForLanguage(langIndex) ? TTS_LANG_COUNTRY_AVAILABLE : TTS_LANG_MISSING_DATA);
1257     }
1258 
1259     // no variants supported in this library, TTS_LANG_COUNTRY_VAR_AVAILABLE cannot be returned.
1260 }
1261 
1262 
1263 /** getLanguage
1264  *  Get the currently loaded language - if any.
1265  *  @lang - string with current ISO 3 letter language code, empty string if no loaded language.
1266  *  @country - string with current ISO 3 letter country code, empty string if no loaded language.
1267  *  @variant - string with current language variant, empty string if no loaded language.
1268  *  return tts_result
1269 */
getLanguage(char * language,char * country,char * variant)1270 tts_result TtsEngine::getLanguage(char *language, char *country, char *variant)
1271 {
1272     if (picoCurrentLangIndex == -1) {
1273         strcpy(language, "\0");
1274         strcpy(country, "\0");
1275         strcpy(variant, "\0");
1276     } else {
1277         strcpy(language, picoSupportedLangIso3[picoCurrentLangIndex]);
1278         strcpy(country, picoSupportedCountryIso3[picoCurrentLangIndex]);
1279         // no variant in this implementation
1280         strcpy(variant, "\0");
1281     }
1282     return TTS_SUCCESS;
1283 }
1284 
1285 
1286 /** setAudioFormat
1287  * sets the audio format to use for synthesis, returns what is actually used.
1288  * @encoding - reference to encoding format
1289  * @rate - reference to sample rate
1290  * @channels - reference to number of channels
1291  * return tts_result
1292  * */
setAudioFormat(tts_audio_format & encoding,uint32_t & rate,int & channels)1293 tts_result TtsEngine::setAudioFormat(tts_audio_format& encoding, uint32_t& rate,
1294             int& channels)
1295 {
1296     // ignore the input parameters, the enforced audio parameters are fixed here
1297     encoding = TTS_AUDIO_FORMAT_PCM_16_BIT;
1298     rate = 16000;
1299     channels = 1;
1300     return TTS_SUCCESS;
1301 }
1302 
1303 
1304 /** setProperty
1305  *  Set property. The supported properties are:  language, rate, pitch and volume.
1306  *  @property - name of property to set
1307  *  @value - value to set
1308  *  @size - size of value
1309  *  return tts_result
1310 */
setProperty(const char * property,const char * value,const size_t size)1311 tts_result TtsEngine::setProperty( const char * property, const char * value, const size_t size )
1312 {
1313     int rate;
1314     int pitch;
1315     int volume;
1316 
1317     /* Set a specific property for the engine.
1318        Supported properties include: language (locale), rate, pitch, volume.    */
1319     /* Sanity check */
1320     if (property == NULL) {
1321         ALOGE("setProperty called with property NULL");
1322         return TTS_PROPERTY_UNSUPPORTED;
1323     }
1324 
1325     if (value == NULL) {
1326         ALOGE("setProperty called with value NULL");
1327         return TTS_VALUE_INVALID;
1328     }
1329 
1330     if (strncmp(property, "language", 8) == 0) {
1331         /* Verify it's in correct format.   */
1332         if (strlen(value) != 2 && strlen(value) != 6) {
1333             ALOGE("change language called with incorrect format");
1334             return TTS_VALUE_INVALID;
1335         }
1336 
1337         /* Try to switch to specified language. */
1338         if (doLanguageSwitch(value) == TTS_FAILURE) {
1339             ALOGE("failed to load language");
1340             return TTS_FAILURE;
1341         } else {
1342             return TTS_SUCCESS;
1343         }
1344     } else if (strncmp(property, "rate", 4) == 0) {
1345         rate = atoi(value);
1346         if (rate < PICO_MIN_RATE) {
1347             rate = PICO_MIN_RATE;
1348         }
1349         if (rate > PICO_MAX_RATE) {
1350             rate = PICO_MAX_RATE;
1351         }
1352         picoProp_currRate = rate;
1353         return TTS_SUCCESS;
1354     } else if (strncmp(property, "pitch", 5) == 0) {
1355         pitch = atoi(value);
1356         if (pitch < PICO_MIN_PITCH) {
1357             pitch = PICO_MIN_PITCH;
1358         }
1359         if (pitch > PICO_MAX_PITCH) {
1360             pitch = PICO_MAX_PITCH;
1361         }
1362         picoProp_currPitch = pitch;
1363         return TTS_SUCCESS;
1364     } else if (strncmp(property, "volume", 6) == 0) {
1365         volume = atoi(value);
1366         if (volume < PICO_MIN_VOLUME) {
1367             volume = PICO_MIN_VOLUME;
1368         }
1369         if (volume > PICO_MAX_VOLUME) {
1370             volume = PICO_MAX_VOLUME;
1371         }
1372         picoProp_currVolume = volume;
1373         return TTS_SUCCESS;
1374     }
1375 
1376     return TTS_PROPERTY_UNSUPPORTED;
1377 }
1378 
1379 
1380 /** getProperty
1381  *  Get the property.  Supported properties are:  language, rate, pitch and volume.
1382  *  @property - name of property to get
1383  *  @value    - buffer which will receive value of property
1384  *  @iosize   - size of value - if size is too small on return this will contain actual size needed
1385  *  return tts_result
1386 */
getProperty(const char * property,char * value,size_t * iosize)1387 tts_result TtsEngine::getProperty( const char * property, char * value, size_t * iosize )
1388 {
1389     /* Get the property for the engine.
1390        This property was previously set by setProperty or by default.       */
1391     /* sanity check */
1392     if (property == NULL) {
1393         ALOGE("getProperty called with property NULL");
1394         return TTS_PROPERTY_UNSUPPORTED;
1395     }
1396 
1397     if (value == NULL) {
1398         ALOGE("getProperty called with value NULL");
1399         return TTS_VALUE_INVALID;
1400     }
1401 
1402     if (strncmp(property, "language", 8) == 0) {
1403         if (picoProp_currLang == NULL) {
1404             strcpy(value, "");
1405         } else {
1406             if (*iosize < strlen(picoProp_currLang)+1)  {
1407                 *iosize = strlen(picoProp_currLang) + 1;
1408                 return TTS_PROPERTY_SIZE_TOO_SMALL;
1409             }
1410             strcpy(value, picoProp_currLang);
1411         }
1412         return TTS_SUCCESS;
1413     } else if (strncmp(property, "rate", 4) == 0) {
1414         char tmprate[4];
1415         sprintf(tmprate, "%d", picoProp_currRate);
1416         if (*iosize < strlen(tmprate)+1) {
1417             *iosize = strlen(tmprate) + 1;
1418             return TTS_PROPERTY_SIZE_TOO_SMALL;
1419         }
1420         strcpy(value, tmprate);
1421         return TTS_SUCCESS;
1422     } else if (strncmp(property, "pitch", 5) == 0) {
1423         char tmppitch[4];
1424         sprintf(tmppitch, "%d", picoProp_currPitch);
1425         if (*iosize < strlen(tmppitch)+1) {
1426             *iosize = strlen(tmppitch) + 1;
1427             return TTS_PROPERTY_SIZE_TOO_SMALL;
1428         }
1429         strcpy(value, tmppitch);
1430         return TTS_SUCCESS;
1431     } else if (strncmp(property, "volume", 6) == 0) {
1432         char tmpvol[4];
1433         sprintf(tmpvol, "%d", picoProp_currVolume);
1434         if (*iosize < strlen(tmpvol)+1) {
1435             *iosize = strlen(tmpvol) + 1;
1436             return TTS_PROPERTY_SIZE_TOO_SMALL;
1437         }
1438         strcpy(value, tmpvol);
1439         return TTS_SUCCESS;
1440     }
1441 
1442     /* Unknown property */
1443     ALOGE("Unsupported property");
1444     return TTS_PROPERTY_UNSUPPORTED;
1445 }
1446 
1447 
1448 /** synthesizeText
1449  *  Synthesizes a text string.
1450  *  The text string could be annotated with SSML tags.
1451  *  @text     - text to synthesize
1452  *  @buffer   - buffer which will receive generated samples
1453  *  @bufferSize - size of buffer
1454  *  @userdata - pointer to user data which will be passed back to callback function
1455  *  return tts_result
1456 */
synthesizeText(const char * text,int8_t * buffer,size_t bufferSize,void * userdata)1457 tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata )
1458 {
1459     int         err;
1460     int         cbret;
1461     pico_Char * inp = NULL;
1462     char *      expanded_text = NULL;
1463     pico_Char * local_text = NULL;
1464     short       outbuf[MAX_OUTBUF_SIZE/2];
1465     pico_Int16  bytes_sent, bytes_recv, text_remaining, out_data_type;
1466     pico_Status ret;
1467     SvoxSsmlParser * parser = NULL;
1468 
1469     picoSynthAbort = 0;
1470     if (text == NULL) {
1471         ALOGE("synthesizeText called with NULL string");
1472         return TTS_FAILURE;
1473     }
1474 
1475     if (strlen(text) == 0) {
1476         return TTS_SUCCESS;
1477     }
1478 
1479     if (buffer == NULL) {
1480         ALOGE("synthesizeText called with NULL buffer");
1481         return TTS_FAILURE;
1482     }
1483 
1484     if ( (strncmp(text, "<speak", 6) == 0) || (strncmp(text, "<?xml", 5) == 0) ) {
1485         /* SSML input */
1486         parser = new SvoxSsmlParser();
1487         if (parser && parser->initSuccessful()) {
1488             err = parser->parseDocument(text, 1);
1489             if (err == XML_STATUS_ERROR) {
1490                 /* Note: for some reason expat always thinks the input document has an error
1491                    at the end, even when the XML document is perfectly formed */
1492                 ALOGI("Warning: SSML document parsed with errors");
1493             }
1494             char * parsed_text = parser->getParsedDocument();
1495             if (parsed_text) {
1496                 /* Add property tags to the string - if any.    */
1497                 local_text = (pico_Char *) doAddProperties( parsed_text );
1498                 if (!local_text) {
1499                     ALOGE("Failed to allocate memory for text string");
1500                     delete parser;
1501                     return TTS_FAILURE;
1502                 }
1503                 char * lang = parser->getParsedDocumentLanguage();
1504                 if (lang != NULL) {
1505                     if (doLanguageSwitch(lang) == TTS_FAILURE) {
1506                         ALOGE("Failed to switch to language (%s) specified in SSML document.", lang);
1507                         delete parser;
1508                         return TTS_FAILURE;
1509                     }
1510                 } else {
1511                     // lang is NULL, pick a language so the synthesis can be performed
1512                     if (picoCurrentLangIndex == -1) {
1513                         // no current language loaded, pick the first one and load it
1514                         if (doLanguageSwitchFromLangIndex(0) == TTS_FAILURE) {
1515                             ALOGE("Failed to switch to default language.");
1516                             delete parser;
1517                             return TTS_FAILURE;
1518                         }
1519                     }
1520                     //ALOGI("No language in SSML, using current language (%s).", picoProp_currLang);
1521                 }
1522                 delete parser;
1523             } else {
1524                 ALOGE("Failed to parse SSML document");
1525                 delete parser;
1526                 return TTS_FAILURE;
1527             }
1528         } else {
1529             ALOGE("Failed to create SSML parser");
1530             if (parser) {
1531                 delete parser;
1532             }
1533             return TTS_FAILURE;
1534         }
1535     } else {
1536         /* camelCase pre-processing */
1537         expanded_text = doCamelCase(text);
1538         /* Add property tags to the string - if any.    */
1539         local_text = (pico_Char *) doAddProperties( expanded_text );
1540         if (expanded_text) {
1541             free( expanded_text );
1542         }
1543         if (!local_text) {
1544             ALOGE("Failed to allocate memory for text string");
1545             return TTS_FAILURE;
1546         }
1547     }
1548 
1549     text_remaining = strlen((const char *) local_text) + 1;
1550 
1551     inp = (pico_Char *) local_text;
1552 
1553     size_t bufused = 0;
1554 
1555     /* synthesis loop   */
1556     while (text_remaining) {
1557         if (picoSynthAbort) {
1558             ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1559             break;
1560         }
1561 
1562         /* Feed the text into the engine.   */
1563         ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent );
1564         if (ret != PICO_OK) {
1565             ALOGE("Error synthesizing string '%s': [%d]", text, ret);
1566             if (local_text) {
1567                 free( local_text );
1568             }
1569             return TTS_FAILURE;
1570         }
1571 
1572         text_remaining -= bytes_sent;
1573         inp += bytes_sent;
1574         do {
1575             if (picoSynthAbort) {
1576                 ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1577                 break;
1578             }
1579             /* Retrieve the samples and add them to the buffer. */
1580             ret = pico_getData( picoEngine, (void *) outbuf, MAX_OUTBUF_SIZE, &bytes_recv,
1581                     &out_data_type );
1582             if (bytes_recv) {
1583                 if ((bufused + bytes_recv) <= bufferSize) {
1584                     memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv);
1585                     bufused += bytes_recv;
1586                 } else {
1587                     /* The buffer filled; pass this on to the callback function.    */
1588                     cbret = picoSynthDoneCBPtr(userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer,
1589                             bufused, TTS_SYNTH_PENDING);
1590                     if (cbret == TTS_CALLBACK_HALT) {
1591                         ALOGI("Halt requested by caller. Halting.");
1592                         picoSynthAbort = 1;
1593                         ret = pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1594                         break;
1595                     }
1596                     bufused = 0;
1597                     memcpy(buffer, (int8_t *) outbuf, bytes_recv);
1598                     bufused += bytes_recv;
1599                 }
1600             }
1601         } while (PICO_STEP_BUSY == ret);
1602 
1603         /* This chunk of synthesis is finished; pass the remaining samples.
1604            Use 16 KHz, 16-bit samples.                                              */
1605         if (!picoSynthAbort) {
1606             picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1607                     TTS_SYNTH_PENDING);
1608         }
1609         picoSynthAbort = 0;
1610 
1611         if (ret != PICO_STEP_IDLE) {
1612             if (ret != 0){
1613                 ALOGE("Error occurred during synthesis [%d]", ret);
1614             }
1615             if (local_text) {
1616                 free(local_text);
1617             }
1618             ALOGV("Synth loop: sending TTS_SYNTH_DONE after error");
1619             picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1620                     TTS_SYNTH_DONE);
1621             pico_resetEngine( picoEngine, PICO_RESET_SOFT );
1622             return TTS_FAILURE;
1623         }
1624     }
1625 
1626     /* Synthesis is done; notify the caller */
1627     ALOGV("Synth loop: sending TTS_SYNTH_DONE after all done, or was asked to stop");
1628     picoSynthDoneCBPtr( userdata, 16000, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, buffer, bufused,
1629             TTS_SYNTH_DONE);
1630 
1631     if (local_text) {
1632         free( local_text );
1633     }
1634     return TTS_SUCCESS;
1635 }
1636 
1637 
1638 
1639 /** stop
1640  *  Aborts the running synthesis.
1641  *  return tts_result
1642 */
stop(void)1643 tts_result TtsEngine::stop( void )
1644 {
1645     picoSynthAbort = 1;
1646     return TTS_SUCCESS;
1647 }
1648 
1649 
1650 #ifdef __cplusplus
1651 extern "C" {
1652 #endif
1653 
getTtsEngine(void)1654 TtsEngine * getTtsEngine( void )
1655 {
1656     return new TtsEngine();
1657 }
1658 
1659 #ifdef __cplusplus
1660 }
1661 #endif
1662