1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picosa.c
18  *
19  * sentence analysis - POS disambiguation
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 
29 #include "picoos.h"
30 #include "picodbg.h"
31 #include "picobase.h"
32 #include "picokdt.h"
33 #include "picoklex.h"
34 #include "picoktab.h"
35 #include "picokfst.h"
36 #include "picotrns.h"
37 #include "picodata.h"
38 #include "picosa.h"
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 #if 0
44 }
45 #endif
46 
47 
48 /* PU saStep states */
49 #define SA_STEPSTATE_COLLECT       0
50 #define SA_STEPSTATE_PROCESS_POSD 10
51 #define SA_STEPSTATE_PROCESS_WPHO 11
52 #define SA_STEPSTATE_PROCESS_TRNS_PARSE 12
53 #define SA_STEPSTATE_PROCESS_TRNS_FST 13
54 #define SA_STEPSTATE_FEED          2
55 
56 #define SA_MAX_ALTDESC_SIZE (30*(PICOTRNS_MAX_NUM_POSSYM + 2))
57 
58 #define SA_MSGSTR_SIZE 32
59 
60 /*  subobject    : SentAnaUnit
61  *  shortcut     : sa
62  *  context size : one phrase, max. 30 non-PUNC items, for non-processed items
63  *                 one item if internal input empty
64  */
65 
66 /** @addtogroup picosa
67 
68   internal buffers:
69 
70   - headx: array for extended item heads of fixed size (head plus
71     index for content, plus two fields for boundary strength/type)
72 
73   - cbuf1, cbuf2: buffers for item contents (referenced by index in
74     headx). Future: replace these two buffers by a single double-sided
75     buffer (double shrink-grow type)
76 
77   0. bottom up filling of items in headx and cbuf1
78 
79   1. POS disambiguation (right-to-left, top-to-bottom):
80   - number and sequence of items unchanged
81   - item content can only get smaller (reducing nr of results in WORDINDEX)
82   -> info stays in "headx, cbuf1" and changed in place                      \n
83      WORDGRAPH(POSes,NA)graph             -> WORDGRAPH(POS,NA)graph         \n
84      WORDINDEX(POSes,NA)POS1ind1...POSNindN  -> WORDINDEX(POS,NA)POS|ind    \n
85 
86   2. lex-index lookup and G2P (both directions possible, left-to-right done):
87   - number and sequence of items unchanged, item head info and content
88     changes
89   -> headx changed in place; cbuf1 to cbuf2                                 \n
90      WORDGRAPH(POS,NA)graph    -> WORDPHON(POS,NA)phon                      \n
91      WORDINDEX(POS,NA)POS|ind  -> WORDPHON(POS,NA)phon                      \n
92 
93   3. phrasing (right-to-left):
94 
95      Previous (before introducing SBEG)\n
96      ----------------------------------
97                                            1|          2|             3|    4|    \n
98      e.g. from      WP WP WP       WP WP PUNC  WP WP PUNC  WP WP WP PUNC FLUSH    \n
99      e.g. to  BINIT WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND WP WP WP BSEND BTERM   \n
100               |1                         |2          |3             |4            \n
101 
102      3-level bound state: to keep track of bound strength from end of
103      previous punc-phrase, then BOUND item output as first item
104      (strength from prev punc-phrase and type from current
105      punc-phrase).
106 
107      trailing PUNC item       bound states
108                               INIT         SEND         PHR1
109        PUNC(SENTEND, T)       B(I,T)>SEND  B(S,T)>SEND  B(P1,T)>SEND
110        PUNC(SENTEND, Q)       B(I,Q)>SEND  B(S,Q)>SEND  B(P1,Q)>SEND
111        PUNC(SENTEND, E)       B(I,E)>SEND  B(S,E)>SEND  B(P1,E)>SEND
112        PUNC(PHRASEEND, P)     B(I,P)>PHR1  B(S,P)>PHR1  B(P1,P)>PHR1
113        PUNC(PHRASEEND, FORC)  B(I,P)>PHR1  B(S,P)>PHR1  B(P1,P)>PHR1
114        PUNC(FLUSH, T)         B(I,T)..     B(S,T)..     B(P1,T)..
115                                 B(T,NA)      B(T,NA)      B(T,NA)
116                                 >INIT        >INIT        >INIT
117 
118      PHR2/3 case:
119      trailing PUNC item       bound states
120                           INIT              SEND              PHR1
121        PUNC(SENTEND, T)   B(I,P)B(P,T)>SEND B(S,P)B(P,T)>SEND B(P1,P)B(P,T)>SEND
122        PUNC(SENTEND, Q)   B(I,P)B(P,Q)>SEND B(S,P)B(P,Q)>SEND B(P1,P)B(P,Q)>SEND
123        PUNC(SENTEND, E)   B(I,P)B(P,E)>SEND B(S,P)B(P,E)>SEND B(P1,P)B(P,E)>SEND
124        PUNC(PHRASEEND, P) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
125        PUNC(PHREND, FORC) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
126        PUNC(FLUSH, T)     B(I,P)B(P,T)..    B(S,T)B(P,T)..    B(P1,T)B(P,T)..
127                             B(T,NA)             B(T,NA)             B(T,NA)
128                             >INIT               >INIT               >INIT
129 
130      Current
131      --------
132      e.g. from      WP WP WP       WP WP PUNC  WP WP PUNC        WP WP WP PUNC  FLUSH
133      e.g. to  BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM
134               |1                         |2                |3                   |4
135 
136      2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer
137      allmost full), with the trailing PUNCT item included (last item).
138      If the trailing PUNC is a a primary phrase separator, the
139        item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can
140        be output at the start of the next primary phrase.
141      Otherwise,
142        the item is converted to the corresponding BOUND and output. the bound state is set to SSEP,
143        so that a BOUND of type SBEG is output at the start of the next primary phrase.
144 
145      trailing PUNC item       bound states
146                               SSEP           PPHR
147        PUNC(SENTEND, X)       B(B,X)>SSEP    B(P1,X)>SSEP  (X = T | Q | E)
148        PUNC(FLUSH, T)         B(B,T)>SSEP*    B(P1,T)>SSEP
149        PUNC(PHRASEEND, P)     B(B,P)>PPHR    B(P1,P)>PPHR
150        PUNC(PHRASEEND, FORC)  B(B,P)>PPHR    B(P1,P)>PPHR
151 
152 *    If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then
153      all but the first will be treated as an (empty) phrase containing just this item.
154      If this (single) item is a flush, creation of SBEG is suppressed.
155 
156 
157   - dtphr phrasing tree (rather subphrasing tree it should be called)
158     determines
159       BOUND_PHR2
160       BOUND_PHR3
161   - boundary strenghts are determined for every word (except the
162     first one) from right-to-left. The boundary types mark the phrase
163     type of the phrase following the boundary.
164   - number of items actually changed (new BOUND items added): because
165     of fixed size without content, two fields are contained in headx
166     to indicate if a BOUND needs to be added to the LEFT of the item.
167     -> headx further extended with boundary strength and type info to
168     indicate that to the left of the headx ele a BOUND needs to be
169     inserted when outputting.
170 
171   4. accentuation:
172   - number of items unchanged, content unchanged, only head info changes
173   -> changed in place in headx
174 */
175 
176 
177 typedef struct {
178     picodata_itemhead_t head;
179     picoos_uint16 cind;
180 } picosa_headx_t;
181 
182 
183 typedef struct sa_subobj {
184     picoos_uint8 procState; /* for next processing step decision */
185 
186     picoos_uint8 inspaceok;      /* flag: headx/cbuf1 has space for an item */
187     picoos_uint8 needsmoreitems; /* flag: need more items */
188     picoos_uint8 phonesTransduced; /* flag: */
189 
190     picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE];  /* tmp. location for an item */
191 
192     picosa_headx_t headx[PICOSA_MAXNR_HEADX];
193     picoos_uint16 headxBottom; /* bottom */
194     picoos_uint16 headxLen;    /* length, 0 if empty */
195 
196     picoos_uint8 cbuf1[PICOSA_MAXSIZE_CBUF];
197     picoos_uint16 cbuf1BufSize; /* actually allocated size */
198     picoos_uint16 cbuf1Len;     /* length, 0 if empty */
199 
200     picoos_uint8 cbuf2[PICOSA_MAXSIZE_CBUF];
201     picoos_uint16 cbuf2BufSize; /* actually allocated size */
202     picoos_uint16 cbuf2Len;     /* length, 0 if empty */
203 
204     picotrns_possym_t phonBufA[PICOTRNS_MAX_NUM_POSSYM+1];
205     picotrns_possym_t phonBufB[PICOTRNS_MAX_NUM_POSSYM+1];
206     picotrns_possym_t * phonBuf;
207     picotrns_possym_t * phonBufOut;
208     picoos_uint16 phonReadPos, phonWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */
209     picoos_uint16 nextReadPos; /* position of (potential) next item to read from */
210 
211 
212     /* buffer for internal calculation of transducer */
213     picotrns_AltDesc altDescBuf;
214     /* the number of AltDesc in the buffer */
215     picoos_uint16 maxAltDescLen;
216 
217     /* tab knowledge base */
218     picoktab_Graphs tabgraphs;
219     picoktab_Phones tabphones;
220     picoktab_Pos tabpos;
221     picoktab_FixedIds fixedIds;
222 
223     /* dtposd knowledge base */
224     picokdt_DtPosD dtposd;
225 
226     /* dtg2p knowledge base */
227     picokdt_DtG2P dtg2p;
228 
229     /* lex knowledge base */
230     picoklex_Lex lex;
231 
232     /* ulex knowledge bases */
233     picoos_uint8 numUlex;
234     picoklex_Lex ulex[PICOKNOW_MAX_NUM_ULEX];
235 
236     /* fst knowledge bases */
237     picoos_uint8 numFsts;
238     picokfst_FST fst[PICOKNOW_MAX_NUM_WPHO_FSTS];
239     picoos_uint8 curFst; /* the fst to be applied next */
240 
241 
242 } sa_subobj_t;
243 
244 
saInitialize(register picodata_ProcessingUnit this,picoos_int32 resetMode)245 static pico_status_t saInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode) {
246     sa_subobj_t * sa;
247     picoos_uint16 i;
248     picokfst_FST fst;
249     picoknow_kb_id_t fstKbIds[PICOKNOW_MAX_NUM_WPHO_FSTS] = PICOKNOW_KBID_WPHO_ARRAY;
250     picoklex_Lex ulex;
251     picoknow_kb_id_t ulexKbIds[PICOKNOW_MAX_NUM_ULEX] = PICOKNOW_KBID_ULEX_ARRAY;
252 
253     PICODBG_DEBUG(("calling"));
254 
255     if (NULL == this || NULL == this->subObj) {
256         return picoos_emRaiseException(this->common->em,
257                                        PICO_ERR_NULLPTR_ACCESS, NULL, NULL);
258     }
259     sa = (sa_subobj_t *) this->subObj;
260 
261     /*  sa->common = this->common; */
262 
263     sa->procState = SA_STEPSTATE_COLLECT;
264 
265     sa->inspaceok = TRUE;
266     sa->needsmoreitems = TRUE;
267 
268     sa->headxBottom = 0;
269     sa->headxLen = 0;
270     sa->cbuf1BufSize = PICOSA_MAXSIZE_CBUF;
271     sa->cbuf2BufSize = PICOSA_MAXSIZE_CBUF;
272     sa->cbuf1Len = 0;
273     sa->cbuf2Len = 0;
274 
275     /* init headx, cbuf1, cbuf2 */
276     for (i = 0; i < PICOSA_MAXNR_HEADX; i++){
277         sa->headx[i].head.type = 0;
278         sa->headx[i].head.info1 = PICODATA_ITEMINFO1_NA;
279         sa->headx[i].head.info2 = PICODATA_ITEMINFO2_NA;
280         sa->headx[i].head.len = 0;
281         sa->headx[i].cind = 0;
282     }
283     for (i = 0; i < PICOSA_MAXSIZE_CBUF; i++) {
284         sa->cbuf1[i] = 0;
285         sa->cbuf2[i] = 0;
286     }
287 
288 
289     /* possym buffer */
290     sa->phonesTransduced = FALSE;
291     sa->phonBuf = sa->phonBufA;
292     sa->phonBufOut = sa->phonBufB;
293     sa->phonReadPos = 0;
294     sa->phonWritePos = 0;
295     sa->nextReadPos = 0;
296 
297     if (resetMode == PICO_RESET_SOFT) {
298         /*following initializations needed only at startup or after a full reset*/
299         return PICO_OK;
300     }
301 
302     /* kb fst[] */
303     sa->numFsts = 0;
304     for (i = 0; i<PICOKNOW_MAX_NUM_WPHO_FSTS; i++) {
305         fst = picokfst_getFST(this->voice->kbArray[fstKbIds[i]]);
306         if (NULL != fst) {
307             sa->fst[sa->numFsts++] = fst;
308         }
309     }
310     sa->curFst = 0;
311     PICODBG_DEBUG(("got %i fsts", sa->numFsts));
312     /* kb fixedIds */
313     sa->fixedIds = picoktab_getFixedIds(this->voice->kbArray[PICOKNOW_KBID_FIXED_IDS]);
314 
315     /* kb tabgraphs */
316     sa->tabgraphs =
317         picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
318     if (sa->tabgraphs == NULL) {
319         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
320                                        NULL, NULL);
321     }
322     PICODBG_DEBUG(("got tabgraphs"));
323 
324     /* kb tabphones */
325     sa->tabphones =
326         picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]);
327     if (sa->tabphones == NULL) {
328         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
329                                        NULL, NULL);
330     }
331     PICODBG_DEBUG(("got tabphones"));
332 
333 #ifdef PICO_DEBU
334     {
335         picoos_uint16 itmp;
336         for (itmp = 0; itmp < 256; itmp++) {
337             if (picoktab_hasVowelProp(sa->tabphones, itmp)) {
338                 PICODBG_DEBUG(("tabphones hasVowel: %d", itmp));
339             }
340             if (picoktab_hasDiphthProp(sa->tabphones, itmp)) {
341                 PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp));
342             }
343             if (picoktab_hasGlottProp(sa->tabphones, itmp)) {
344                 PICODBG_DEBUG(("tabphones hasGlott: %d", itmp));
345             }
346             if (picoktab_hasNonsyllvowelProp(sa->tabphones, itmp)) {
347                 PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp));
348             }
349             if (picoktab_hasSyllconsProp(sa->tabphones, itmp)) {
350                 PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp));
351             }
352             if (picoktab_isPrimstress(sa->tabphones, itmp)) {
353                 PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp));
354             }
355             if (picoktab_isSecstress(sa->tabphones, itmp)) {
356                 PICODBG_DEBUG(("tabphones isSecstress: %d", itmp));
357             }
358             if (picoktab_isSyllbound(sa->tabphones, itmp)) {
359                 PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp));
360             }
361             if (picoktab_isPause(sa->tabphones, itmp)) {
362                 PICODBG_DEBUG(("tabphones isPause: %d", itmp));
363             }
364         }
365 
366         PICODBG_DEBUG(("tabphones primstressID: %d",
367                        picoktab_getPrimstressID(sa->tabphones)));
368         PICODBG_DEBUG(("tabphones secstressID: %d",
369                        picoktab_getSecstressID(sa->tabphones)));
370         PICODBG_DEBUG(("tabphones syllboundID: %d",
371                        picoktab_getSyllboundID(sa->tabphones)));
372         PICODBG_DEBUG(("tabphones pauseID: %d",
373                        picoktab_getPauseID(sa->tabphones)));
374     }
375 #endif
376 
377     /* kb tabpos */
378     sa->tabpos =
379         picoktab_getPos(this->voice->kbArray[PICOKNOW_KBID_TAB_POS]);
380     if (sa->tabpos == NULL) {
381         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
382                                        NULL, NULL);
383     }
384     PICODBG_DEBUG(("got tabpos"));
385 
386     /* kb dtposd */
387     sa->dtposd = picokdt_getDtPosD(this->voice->kbArray[PICOKNOW_KBID_DT_POSD]);
388     if (sa->dtposd == NULL) {
389         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
390                                        NULL, NULL);
391     }
392     PICODBG_DEBUG(("got dtposd"));
393 
394     /* kb dtg2p */
395     sa->dtg2p = picokdt_getDtG2P(this->voice->kbArray[PICOKNOW_KBID_DT_G2P]);
396     if (sa->dtg2p == NULL) {
397         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
398                                        NULL, NULL);
399     }
400     PICODBG_DEBUG(("got dtg2p"));
401 
402     /* kb lex */
403     sa->lex = picoklex_getLex(this->voice->kbArray[PICOKNOW_KBID_LEX_MAIN]);
404     if (sa->lex == NULL) {
405         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
406                                        NULL, NULL);
407     }
408     PICODBG_DEBUG(("got lex"));
409 
410     /* kb ulex[] */
411     sa->numUlex = 0;
412     for (i = 0; i<PICOKNOW_MAX_NUM_ULEX; i++) {
413         ulex = picoklex_getLex(this->voice->kbArray[ulexKbIds[i]]);
414         if (NULL != ulex) {
415             sa->ulex[sa->numUlex++] = ulex;
416         }
417     }
418     PICODBG_DEBUG(("got %i user lexica", sa->numUlex));
419 
420     return PICO_OK;
421 }
422 
423 static picodata_step_result_t saStep(register picodata_ProcessingUnit this,
424                                      picoos_int16 mode,
425                                      picoos_uint16 *numBytesOutput);
426 
saTerminate(register picodata_ProcessingUnit this)427 static pico_status_t saTerminate(register picodata_ProcessingUnit this) {
428     return PICO_OK;
429 }
430 
saSubObjDeallocate(register picodata_ProcessingUnit this,picoos_MemoryManager mm)431 static pico_status_t saSubObjDeallocate(register picodata_ProcessingUnit this,
432                                         picoos_MemoryManager mm) {
433     sa_subobj_t * sa;
434     if (NULL != this) {
435         sa = (sa_subobj_t *) this->subObj;
436         picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
437         picoos_deallocate(mm, (void *) &this->subObj);
438     }
439     return PICO_OK;
440 }
441 
442 
picosa_newSentAnaUnit(picoos_MemoryManager mm,picoos_Common common,picodata_CharBuffer cbIn,picodata_CharBuffer cbOut,picorsrc_Voice voice)443 picodata_ProcessingUnit picosa_newSentAnaUnit(picoos_MemoryManager mm,
444                                               picoos_Common common,
445                                               picodata_CharBuffer cbIn,
446                                               picodata_CharBuffer cbOut,
447                                               picorsrc_Voice voice) {
448     picodata_ProcessingUnit this;
449     sa_subobj_t * sa;
450     this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
451     if (this == NULL) {
452         return NULL;
453     }
454 
455     this->initialize = saInitialize;
456     PICODBG_DEBUG(("set this->step to saStep"));
457     this->step = saStep;
458     this->terminate = saTerminate;
459     this->subDeallocate = saSubObjDeallocate;
460 
461     this->subObj = picoos_allocate(mm, sizeof(sa_subobj_t));
462     if (this->subObj == NULL) {
463         picoos_deallocate(mm, (void *)&this);
464         picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
465         return NULL;
466     }
467 
468     sa = (sa_subobj_t *) this->subObj;
469 
470     sa->altDescBuf = picotrns_allocate_alt_desc_buf(mm, SA_MAX_ALTDESC_SIZE, &sa->maxAltDescLen);
471     if (NULL == sa->altDescBuf) {
472         picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
473         picoos_deallocate(mm, (void *)&sa);
474         picoos_deallocate(mm, (void *)&this);
475         picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM, NULL, NULL);
476     }
477 
478 
479     saInitialize(this, PICO_RESET_FULL);
480     return this;
481 }
482 
483 
484 /* ***********************************************************************/
485 /* PROCESS_POSD disambiguation functions */
486 /* ***********************************************************************/
487 
488 /* find next POS to the right of 'ind' and return its POS and index */
saPosDItemSeqGetPosRight(register picodata_ProcessingUnit this,register sa_subobj_t * sa,const picoos_uint16 ind,const picoos_uint16 top,picoos_uint16 * rightind)489 static picoos_uint8 saPosDItemSeqGetPosRight(register picodata_ProcessingUnit this,
490                                             register sa_subobj_t *sa,
491                                             const picoos_uint16 ind,
492                                             const picoos_uint16 top,
493                                             picoos_uint16 *rightind) {
494     picoos_uint8 val;
495     picoos_int32 i;
496 
497     val = PICOKDT_EPSILON;
498     for (i = ind + 1; ((val == PICOKDT_EPSILON) && (i < top)); i++) {
499         if ((sa->headx[i].head.type == PICODATA_ITEM_WORDGRAPH) ||
500                 (sa->headx[i].head.type == PICODATA_ITEM_WORDINDEX)  ||
501                 (sa->headx[i].head.type == PICODATA_ITEM_WORDPHON) ) {
502             val = sa->headx[i].head.info1;
503         }
504     }
505     *rightind = i - 1;
506     return val;
507 }
508 
509 
510 /* left-to-right, for each WORDGRAPH/WORDINDEX/WORDPHON do posd */
saDisambPos(register picodata_ProcessingUnit this,register sa_subobj_t * sa)511 static pico_status_t saDisambPos(register picodata_ProcessingUnit this,
512                                  register sa_subobj_t *sa) {
513     picokdt_classify_result_t dtres;
514     picoos_uint8 half_nratt_posd = PICOKDT_NRATT_POSD >> 1;
515     picoos_uint16 valbuf[PICOKDT_NRATT_POSD]; /* only [0..half_nratt_posd] can be >2^8 */
516     picoos_uint16 prevout;   /* direct dt output (hist.) or POS of prev word */
517     picoos_uint16 lastprev3; /* last index of POS(es) found to the left */
518     picoos_uint16 curPOS;     /* POS(es) of current word */
519     picoos_int32 first;    /* index of first item with POS(es) */
520     picoos_int32 ci;
521     picoos_uint8 okay;       /* two uses: processing okay and lexind resovled */
522     picoos_uint8 i;
523     picoos_uint16 inval;
524     picoos_uint16 fallback;
525 
526     /* set initial values */
527     okay = TRUE;
528     prevout = PICOKDT_HISTORY_ZERO;
529     curPOS = PICODATA_ITEMINFO1_ERR;
530     first = 0;
531 
532     while ((first < sa->headxLen) &&
533            (sa->headx[first].head.type != PICODATA_ITEM_WORDGRAPH) &&
534            (sa->headx[first].head.type != PICODATA_ITEM_WORDINDEX) &&
535            (sa->headx[first].head.type != PICODATA_ITEM_WORDPHON)) {
536         first++;
537     }
538     if (first >= sa->headxLen) {
539         /* phrase not containing an item with POSes info, e.g. single flush */
540         PICODBG_DEBUG(("no item with POSes found"));
541         return PICO_OK;
542     }
543 
544     lastprev3 = first;
545 
546     for (i = 0; i <= half_nratt_posd; i++) {
547         valbuf[i] = PICOKDT_HISTORY_ZERO;
548     }
549     /* set POS(es) of current word, will be shifted afterwards */
550     valbuf[half_nratt_posd+1] = sa->headx[first].head.info1;
551     for (i = half_nratt_posd+2; i < PICOKDT_NRATT_POSD; i++) {
552     /* find next POS to the right and set valbuf[i] */
553         valbuf[i] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
554     }
555 
556     PICODBG_TRACE(("headxLen: %d", sa->headxLen));
557 
558     /* process from left to right all items in headx */
559     for (ci = first; ci < sa->headxLen; ci++) {
560         okay = TRUE;
561 
562         PICODBG_TRACE(("iter: %d, type: %c", ci, sa->headx[ci].head.type));
563 
564         /* if not (WORDGRAPH or WORDINDEX) */
565         if ((sa->headx[ci].head.type != PICODATA_ITEM_WORDGRAPH) &&
566                 (sa->headx[ci].head.type != PICODATA_ITEM_WORDINDEX)  &&
567                 (sa->headx[ci].head.type != PICODATA_ITEM_WORDPHON)) {
568             continue;
569         }
570 
571         PICODBG_TRACE(("iter: %d, curPOS: %d", ci, sa->headx[ci].head.info1));
572 
573         /* no continue so far => at [ci] we have a WORDGRAPH / WORDINDEX item */
574         /* shift all elements one position to the left */
575         /* shift predicted values (history) */
576         for (i=1; i<half_nratt_posd; i++) {
577             valbuf[i-1] = valbuf[i];
578         }
579         /* insert previously predicted value (now history) */
580         valbuf[half_nratt_posd-1] = prevout;
581         /* shift not yet predicted values */
582         for (i=half_nratt_posd+1; i<PICOKDT_NRATT_POSD; i++) {
583             valbuf[i-1] = valbuf[i];
584         }
585         /* find next POS to the right and set valbuf[PICOKDT_NRATT_POSD-1] */
586         valbuf[PICOKDT_NRATT_POSD-1] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
587 
588         /* just to be on the safe side; the following should never happen */
589         if (sa->headx[ci].head.info1 != valbuf[half_nratt_posd]) {
590             PICODBG_WARN(("syncing POS"));
591             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
592                                   NULL, NULL);
593             valbuf[half_nratt_posd] = sa->headx[ci].head.info1;
594         }
595 
596         curPOS = valbuf[half_nratt_posd];
597 
598         /* Check if POS disambiguation not needed */
599         if (picoktab_isUniquePos(sa->tabpos, (picoos_uint8) curPOS)) {
600             /* not needed */
601             inval = 0;
602             fallback = 0;
603             if (!picokdt_dtPosDreverseMapOutFixed(sa->dtposd, curPOS,
604                                        &prevout, &fallback)) {
605                 if (fallback) {
606                     prevout = fallback;
607 
608                 } else {
609                     PICODBG_ERROR(("problem doing reverse output mapping"));
610                     prevout = curPOS;
611                 }
612             }
613             PICODBG_DEBUG(("keeping: %d", sa->headx[ci].head.info1));
614             continue;
615         }
616 
617         /* assuming PICOKDT_NRATT_POSD == 7 */
618         PICODBG_DEBUG(("%d: [%d %d %d %d %d %d %d]",
619                        ci, valbuf[0], valbuf[1], valbuf[2],
620                        valbuf[3], valbuf[4], valbuf[5], valbuf[6]));
621 
622         /* no continue so far => POS disambiguation needed */
623         /* construct input vector, which is set in dtposd */
624         if (!picokdt_dtPosDconstructInVec(sa->dtposd, valbuf)) {
625             /* error constructing invec */
626             PICODBG_WARN(("problem with invec"));
627             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
628                                   NULL, NULL);
629             okay = FALSE;
630         }
631         /* classify */
632         if (okay && (!picokdt_dtPosDclassify(sa->dtposd, &prevout))) {
633             /* error doing classification */
634             PICODBG_WARN(("problem classifying"));
635             picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
636                                   NULL, NULL);
637             okay = FALSE;
638         }
639         /* decompose */
640         if (okay && (!picokdt_dtPosDdecomposeOutClass(sa->dtposd, &dtres))) {
641             /* error decomposing */
642             PICODBG_WARN(("problem decomposing"));
643             picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
644                                   NULL, NULL);
645             okay = FALSE;
646         }
647         if (okay && dtres.set) {
648             PICODBG_DEBUG(("in: %d, out: %d", valbuf[3], dtres.class));
649         } else {
650             PICODBG_WARN(("problem disambiguating POS"));
651             dtres.class = PICODATA_ITEMINFO1_ERR;
652         }
653 
654         if (dtres.class > 255) {
655             PICODBG_WARN(("dt result outside valid range, setting pos to ERR"));
656             dtres.class = PICODATA_ITEMINFO1_ERR;
657         }
658 
659         sa->headx[ci].head.info1 = (picoos_uint8)dtres.class;
660         if (sa->headx[ci].head.type == PICODATA_ITEM_WORDINDEX) {
661             /* find pos/ind entry in cbuf matching unique,
662                disambiguated POS, adapt current headx cind/len
663                accordingly */
664             PICODBG_DEBUG(("select phon based on POS disambiguation"));
665             okay = FALSE;
666             for (i = 0; i < sa->headx[ci].head.len; i += PICOKLEX_POSIND_SIZE) {
667                 PICODBG_DEBUG(("comparing POS at cind + %d", i));
668                 if (picoktab_isPartOfPosGroup(sa->tabpos,
669                             (picoos_uint8)dtres.class,
670                             sa->cbuf1[sa->headx[ci].cind + i])) {
671                     PICODBG_DEBUG(("found match for entry %d",
672                                    i/PICOKLEX_POSIND_SIZE + 1));
673                     sa->headx[ci].cind += i;
674                     okay = TRUE;
675                     break;
676                 }
677             }
678             /* not finding a match is possible if posd predicts a POS that
679                is not part of any of the input POSes -> no warning */
680 #if defined(PICO_DEBUG)
681             if (!okay) {
682                 PICODBG_DEBUG(("no match found, selecting 1st entry"));
683             }
684 #endif
685             sa->headx[ci].head.len = PICOKLEX_POSIND_SIZE;
686         }
687     }
688     return PICO_OK;
689 }
690 
691 
692 /* ***********************************************************************/
693 /* PROCESS_WPHO functions, copy, lexindex, and g2p */
694 /* ***********************************************************************/
695 
696 /* ************** copy ***************/
697 
saCopyItemContent1to2(register picodata_ProcessingUnit this,register sa_subobj_t * sa,picoos_uint16 ind)698 static pico_status_t saCopyItemContent1to2(register picodata_ProcessingUnit this,
699                                            register sa_subobj_t *sa,
700                                            picoos_uint16 ind) {
701     picoos_uint16 i;
702     picoos_uint16 cind1;
703 
704     /* set headx.cind, and copy content, head unchanged */
705     cind1 = sa->headx[ind].cind;
706     sa->headx[ind].cind = sa->cbuf2Len;
707 
708     /* check cbufLen */
709     if (sa->headx[ind].head.len > (sa->cbuf2BufSize - sa->cbuf2Len)) {
710         sa->headx[ind].head.len = sa->cbuf2BufSize - sa->cbuf2Len;
711         PICODBG_WARN(("phones skipped"));
712         picoos_emRaiseWarning(this->common->em,
713                               PICO_WARN_INCOMPLETE, NULL, NULL);
714         if (sa->headx[ind].head.len == 0) {
715             sa->headx[ind].cind = 0;
716         }
717     }
718 
719     for (i = 0; i < sa->headx[ind].head.len; i++) {
720         sa->cbuf2[sa->cbuf2Len] = sa->cbuf1[cind1 + i];
721         sa->cbuf2Len++;
722     }
723 
724     PICODBG_DEBUG(("%c item, len: %d",
725                    sa->headx[ind].head.type, sa->headx[ind].head.len));
726 
727     return PICO_OK;
728 }
729 
730 
731 /* ************** lexindex ***************/
732 
saLexIndLookup(register picodata_ProcessingUnit this,register sa_subobj_t * sa,picoklex_Lex lex,picoos_uint16 ind)733 static pico_status_t saLexIndLookup(register picodata_ProcessingUnit this,
734                                     register sa_subobj_t *sa,
735                                     picoklex_Lex lex,
736                                     picoos_uint16 ind) {
737     picoos_uint8 pos;
738     picoos_uint8 *phones;
739     picoos_uint8 plen;
740     picoos_uint16 i;
741 
742     if (picoklex_lexIndLookup(lex, &(sa->cbuf1[sa->headx[ind].cind + 1]),
743                               PICOKLEX_IND_SIZE, &pos, &phones, &plen)) {
744         sa->headx[ind].cind = sa->cbuf2Len;
745 
746         /* check cbufLen */
747         if (plen > (sa->cbuf2BufSize - sa->cbuf2Len)) {
748             plen = sa->cbuf2BufSize - sa->cbuf2Len;
749             PICODBG_WARN(("phones skipped"));
750             picoos_emRaiseWarning(this->common->em,
751                                   PICO_WARN_INCOMPLETE, NULL, NULL);
752             if (plen == 0) {
753                 sa->headx[ind].cind = 0;
754             }
755         }
756 
757         /* set item head, info1, info2 unchanged */
758         sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
759         sa->headx[ind].head.len = plen;
760 
761         for (i = 0; i < plen; i++) {
762             sa->cbuf2[sa->cbuf2Len] = phones[i];
763             sa->cbuf2Len++;
764         }
765 
766         PICODBG_DEBUG(("%c item, pos: %d, plen: %d",
767                        PICODATA_ITEM_WORDPHON, pos, plen));
768 
769     } else {
770         PICODBG_WARN(("lexIndLookup problem"));
771         picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
772                               NULL, NULL);
773     }
774     return PICO_OK;
775 }
776 
777 
778 
779 /* ************** g2p ***************/
780 
781 
782 /* Name    :   saGetNvowel
783    Function:   returns vowel info in a word or word seq
784    Input   :   sInChar         the grapheme string to be converted in phoneme
785                inLen           number of bytes in grapheme buffer
786                inPos           start position of current grapheme (0..inLen-1)
787    Output  :   nVow            number of vowels in the word
788                nVord           vowel order in the word
789    Returns :   TRUE: processing successful;  FALSE: errors
790 */
saGetNrVowel(register picodata_ProcessingUnit this,register sa_subobj_t * sa,const picoos_uint8 * sInChar,const picoos_uint16 inLen,const picoos_uint8 inPos,picoos_uint8 * nVow,picoos_uint8 * nVord)791 static picoos_uint8 saGetNrVowel(register picodata_ProcessingUnit this,
792                                  register sa_subobj_t *sa,
793                                  const picoos_uint8 *sInChar,
794                                  const picoos_uint16 inLen,
795                                  const picoos_uint8 inPos,
796                                  picoos_uint8 *nVow,
797                                  picoos_uint8 *nVord) {
798     picoos_uint32 nCount;
799     picoos_uint32 pos;
800     picoos_uint8 cstr[PICOBASE_UTF8_MAXLEN + 1];
801 
802     /*defaults*/
803     *nVow = 0;
804     *nVord = 0;
805     /*1:check wether the current char is a vowel*/
806     pos = inPos;
807     if (!picobase_get_next_utf8char(sInChar, inLen, &pos, cstr) ||
808         !picoktab_hasVowellikeProp(sa->tabgraphs, cstr, PICOBASE_UTF8_MAXLEN)) {
809         return FALSE;
810     }
811     /*2:count number of vowels in current word and find vowel order*/
812     for (nCount = 0; nCount < inLen; ) {
813       if (!picobase_get_next_utf8char(sInChar, inLen, &nCount, cstr)) {
814             return FALSE;
815       }
816         if (picoktab_hasVowellikeProp(sa->tabgraphs, cstr,
817                                       PICOBASE_UTF8_MAXLEN)) {
818             (*nVow)++;
819             if (nCount == pos) {
820                 (*nVord) = (*nVow);
821         }
822         }
823     }
824     return TRUE;
825 }
826 
827 
828 /* do g2p for a full word, right-to-left */
saDoG2P(register picodata_ProcessingUnit this,register sa_subobj_t * sa,const picoos_uint8 * graph,const picoos_uint8 graphlen,const picoos_uint8 pos,picoos_uint8 * phones,const picoos_uint16 phonesmaxlen,picoos_uint16 * plen)829 static picoos_uint8 saDoG2P(register picodata_ProcessingUnit this,
830                             register sa_subobj_t *sa,
831                             const picoos_uint8 *graph,
832                             const picoos_uint8 graphlen,
833                             const picoos_uint8 pos,
834                             picoos_uint8 *phones,
835                             const picoos_uint16 phonesmaxlen,
836                             picoos_uint16 *plen) {
837     picoos_uint16 outNp1Ch; /*last 3 outputs produced*/
838     picoos_uint16 outNp2Ch;
839     picoos_uint16 outNp3Ch;
840     picoos_uint8 nPrimary;
841     picoos_uint8 nCount;
842     picoos_uint32 utfpos;
843     picoos_uint16 nOutVal;
844     picoos_uint8 okay;
845     picoos_uint16 phonesind;
846     picoos_uint8 nrvow;
847     picoos_uint8 ordvow;
848     picokdt_classify_vecresult_t dtresv;
849     picoos_uint16 i;
850 
851     *plen = 0;
852     okay = TRUE;
853 
854     /* use sa->tmpbuf[PICOSA_MAXITEMSIZE] to temporarly store the
855        phones which are predicted in reverse order. Once all are
856        available put them in phones in usuable order. phonesind is
857        used to fille item in reverse order starting at the end of
858        tmpbuf. */
859     phonesind = PICOSA_MAXITEMSIZE - 1;
860 
861     /* prepare the data for loop operations */
862     outNp1Ch = PICOKDT_HISTORY_ZERO;
863     outNp2Ch = PICOKDT_HISTORY_ZERO;
864     outNp3Ch = PICOKDT_HISTORY_ZERO;
865 
866     /* inner loop */
867     nPrimary = 0;
868 
869     /* ************************************************/
870     /* go backward grapheme by grapheme, it's utf8... */
871     /* ************************************************/
872 
873     /* set start nCount to position of start of last utfchar */
874     /* ! watch out! somethimes starting at 1, sometimes at 0,
875        ! sometimes counting per byte, sometimes per UTF8 char */
876     /* nCount is (start position + 1) of utf8 char */
877     utfpos = graphlen;
878     if (picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
879         nCount = utfpos + 1;
880     } else {
881         /* should not occurr */
882         PICODBG_ERROR(("invalid utf8 string, graphlen: %d", graphlen));
883         return FALSE;
884     }
885 
886     while (nCount > 0) {
887         PICODBG_TRACE(("right-to-left g2p, count: %d", nCount));
888         okay = TRUE;
889 
890         if (!saGetNrVowel(this, sa, graph, graphlen, nCount-1, &nrvow,
891                           &ordvow)) {
892             nrvow = 0;
893             ordvow = 0;
894         }
895 
896         /* prepare input vector, set inside tree object invec,
897          * g2pBuildVector will call the constructInVec tree method */
898         if (!picokdt_dtG2PconstructInVec(sa->dtg2p,
899                                          graph, /*grapheme start*/
900                                          graphlen, /*grapheme length*/
901                                          nCount-1, /*grapheme current position*/
902                                          pos, /*Word POS*/
903                                          nrvow, /*nr vowels if vowel, 0 else */
904                                          ordvow, /*ord of vowel if vowel, 0 el*/
905                                          &nPrimary,  /*primary stress flag*/
906                                          outNp1Ch, /*Right phoneme context +1*/
907                                          outNp2Ch, /*Right phoneme context +2*/
908                                          outNp3Ch)) { /*Right phon context +3*/
909             /*Errors in preparing the input vector : skip processing*/
910             PICODBG_WARN(("problem with invec"));
911             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
912                                   NULL, NULL);
913             okay = FALSE;
914         }
915 
916         /* classify using the invec in the tree object and save the direct
917            tree output also in the tree object */
918         if (okay && (!picokdt_dtG2Pclassify(sa->dtg2p, &nOutVal))) {
919             /* error doing classification */
920             PICODBG_WARN(("problem classifying"));
921             picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
922                                   NULL, NULL);
923             okay = FALSE;
924         }
925 
926         /* decompose the invec in the tree object and return result in dtresv */
927         if (okay && (!picokdt_dtG2PdecomposeOutClass(sa->dtg2p, &dtresv))) {
928             /* error decomposing */
929             PICODBG_WARN(("problem decomposing"));
930             picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
931                                   NULL, NULL);
932             okay = FALSE;
933         }
934 
935         if (okay) {
936             if ((dtresv.nr == 0) || (dtresv.classvec[0] == PICOKDT_EPSILON)) {
937                 /* no phones to be added */
938                 PICODBG_TRACE(("epsilon, no phone added %c", graph[nCount-1]));
939                 ;
940             } else {
941                 /* add decomposed output to tmpbuf, reverse order */
942                 for (i = dtresv.nr; ((((PICOSA_MAXITEMSIZE - 1) -
943                                        phonesind)<phonesmaxlen) &&
944                                      (i > 0)); ) {
945                     i--;
946                     PICODBG_TRACE(("%c %d",graph[nCount-1],dtresv.classvec[i]));
947                     if (dtresv.classvec[i] > 255) {
948                         PICODBG_WARN(("dt result outside valid range, "
949                                       "skipping phone"));
950                         continue;
951                     }
952                     sa->tmpbuf[phonesind--] = (picoos_uint8)dtresv.classvec[i];
953                     if (!nPrimary) {
954                         if (picoktab_isPrimstress(sa->tabphones,
955                           (picoos_uint8)dtresv.classvec[i])) {
956                             nPrimary = 1;
957             }
958                     }
959                     (*plen)++;
960                 }
961                 if (i > 0) {
962                     PICODBG_WARN(("phones skipped"));
963                     picoos_emRaiseWarning(this->common->em,
964                                           PICO_WARN_INCOMPLETE, NULL, NULL);
965                 }
966             }
967         }
968 
969         /*shift tree output history and update*/
970         outNp3Ch = outNp2Ch;
971         outNp2Ch = outNp1Ch;
972         outNp1Ch = nOutVal;
973 
974         /* go backward one utf8 char */
975         /* nCount is in +1 domain */
976         if (nCount <= 1) {
977             /* end of str */
978             nCount = 0;
979         } else {
980             utfpos = nCount - 1;
981             if (!picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
982                 /* should not occur */
983                 PICODBG_ERROR(("invalid utf8 string, utfpos: %d", utfpos));
984                 return FALSE;
985             } else {
986                 nCount = utfpos + 1;
987             }
988         }
989     }
990 
991     /* a must be: (PICOSA_MAXITEMSIZE-1) - phonesind == *plen */
992     /* now that we have all phone IDs, copy in correct order to phones */
993     /* phonesind point to next free slot in the reverse domainn,
994        ie. inc first */
995     phonesind++;
996     for (i = 0; i < *plen; i++, phonesind++) {
997         phones[i] = sa->tmpbuf[phonesind];
998     }
999     return TRUE;
1000 }
1001 
1002 
1003 /* item in headx[ind]/cbuf1, out: modified headx and cbuf2 */
1004 
saGraphemeToPhoneme(register picodata_ProcessingUnit this,register sa_subobj_t * sa,picoos_uint16 ind)1005 static pico_status_t saGraphemeToPhoneme(register picodata_ProcessingUnit this,
1006                                          register sa_subobj_t *sa,
1007                                          picoos_uint16 ind) {
1008     picoos_uint16 plen;
1009 
1010     PICODBG_TRACE(("starting g2p"));
1011 
1012     if (saDoG2P(this, sa, &(sa->cbuf1[sa->headx[ind].cind]),
1013                 sa->headx[ind].head.len, sa->headx[ind].head.info1,
1014                 &(sa->cbuf2[sa->cbuf2Len]), (sa->cbuf2BufSize - sa->cbuf2Len),
1015                 &plen)) {
1016 
1017         /* check of cbuf2Len done in saDoG2P, phones skipped if needed */
1018         if (plen > 255) {
1019             PICODBG_WARN(("maximum number of phones exceeded (%d), skipping",
1020                           plen));
1021             plen = 255;
1022         }
1023 
1024         /* set item head, info1, info2 unchanged */
1025         sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
1026         sa->headx[ind].head.len = (picoos_uint8)plen;
1027         sa->headx[ind].cind = sa->cbuf2Len;
1028         sa->cbuf2Len += plen;
1029         PICODBG_DEBUG(("%c item, plen: %d",
1030                        PICODATA_ITEM_WORDPHON, plen));
1031     } else {
1032         PICODBG_WARN(("problem doing g2p"));
1033         picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
1034                               NULL, NULL);
1035     }
1036     return PICO_OK;
1037 }
1038 
1039 
1040 /* ***********************************************************************/
1041 /*                          extract phonemes of an item into a phonBuf   */
1042 /* ***********************************************************************/
1043 
saAddPhoneme(register sa_subobj_t * sa,picoos_uint16 pos,picoos_uint16 sym)1044 static pico_status_t saAddPhoneme(register sa_subobj_t *sa, picoos_uint16 pos, picoos_uint16 sym) {
1045     /* picoos_uint8 plane, unshifted; */
1046 
1047     /* just for debuging */
1048     /*
1049     unshifted = picotrns_unplane(sym,&plane);
1050     PICODBG_DEBUG(("adding %i/%i (%c on plane %i) at phonBuf[%i]",pos,sym,unshifted,plane,sa->phonWritePos));
1051     */
1052     if (PICOTRNS_MAX_NUM_POSSYM <= sa->phonWritePos) {
1053         /* not an error! */
1054         PICODBG_DEBUG(("couldn't add because phon buffer full"));
1055         return PICO_EXC_BUF_OVERFLOW;
1056     } else {
1057         sa->phonBuf[sa->phonWritePos].pos = pos;
1058         sa->phonBuf[sa->phonWritePos].sym = sym;
1059         sa->phonWritePos++;
1060         return PICO_OK;
1061     }
1062 }
1063 
1064 /*
1065 static pico_status_t saAddStartPhoneme(register sa_subobj_t *sa) {
1066     return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1067             (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
1068 }
1069 
1070 
1071 static pico_status_t saAddTermPhoneme(register sa_subobj_t *sa) {
1072     return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1073             (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
1074 }
1075 
1076 */
1077 
saExtractPhonemes(register picodata_ProcessingUnit this,register sa_subobj_t * sa,picoos_uint16 pos,picodata_itemhead_t * head,const picoos_uint8 * content)1078 static pico_status_t saExtractPhonemes(register picodata_ProcessingUnit this,
1079         register sa_subobj_t *sa, picoos_uint16 pos,
1080         picodata_itemhead_t* head, const picoos_uint8* content)
1081 {
1082     pico_status_t rv= PICO_OK;
1083     picoos_uint8 i;
1084     picoos_int16 fstSymbol;
1085 #if defined(PICO_DEBUG)
1086     picoos_char msgstr[SA_MSGSTR_SIZE];
1087 #endif
1088 
1089     PICODBG_TRACE(("doing item %s",
1090                     picodata_head_to_string(head,msgstr,SA_MSGSTR_SIZE)));
1091     /*
1092      Items  considered in a transduction are WORDPHON item. its starting offset within the inBuf is given as
1093      'pos'.
1094      Elements that go into the transduction receive "their" position in the buffer.
1095      */
1096     sa->phonWritePos = 0;
1097     /* WORDPHON(POS,WACC)phon */
1098     rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1099                 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
1100     for (i = 0; i < head->len; i++) {
1101         fstSymbol = /* (PICOKFST_PLANE_PHONEMES << 8) + */content[i];
1102         /*  */
1103         PICODBG_TRACE(("adding phoneme %c",fstSymbol));
1104         rv = saAddPhoneme(sa, pos+PICODATA_ITEM_HEADSIZE+i, fstSymbol);
1105     }
1106     rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1107                 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
1108     sa->nextReadPos = pos + PICODATA_ITEM_HEADSIZE +  head->len;
1109     return rv;
1110 }
1111 
1112 
1113 #define SA_POSSYM_OK           0
1114 #define SA_POSSYM_OUT_OF_RANGE 1
1115 #define SA_POSSYM_END          2
1116 #define SA_POSSYM_INVALID     -3
1117 /* *readPos is the next position in phonBuf to be read, and *writePos is the first position not to be read (may be outside
1118  * buf).
1119  * 'rangeEnd' is the first possym position outside the desired range.
1120  * Possible return values:
1121  * SA_POSSYM_OK            : 'pos' and 'sym' are set to the read possym, *readPos is advanced
1122  * SA_POSSYM_OUT_OF_RANGE  : pos is out of range. 'pos' is set to that of the read possym, 'sym' is undefined
1123  * SA_POSSYM_UNDERFLOW     : no more data in buf. 'pos' is set to PICOTRNS_POS_INVALID,    'sym' is undefined
1124  * SA_POSSYM_INVALID       : "strange" pos.       'pos' is set to PICOTRNS_POS_INVALID,    'sym' is undefined
1125  */
getNextPosSym(sa_subobj_t * sa,picoos_int16 * pos,picoos_int16 * sym,picoos_int16 rangeEnd)1126 static pico_status_t getNextPosSym(sa_subobj_t * sa, picoos_int16 * pos, picoos_int16 * sym,
1127         picoos_int16 rangeEnd) {
1128     /* skip POS_IGNORE */
1129     while ((sa->phonReadPos < sa->phonWritePos) && (PICOTRNS_POS_IGNORE == sa->phonBuf[sa->phonReadPos].pos))  {
1130         PICODBG_DEBUG(("ignoring phone at sa->phonBuf[%i] because it has pos==IGNORE",sa->phonReadPos));
1131         sa->phonReadPos++;
1132     }
1133     if ((sa->phonReadPos < sa->phonWritePos)) {
1134         *pos = sa->phonBuf[sa->phonReadPos].pos;
1135         if ((PICOTRNS_POS_INSERT == *pos) || ((0 <= *pos) && (*pos < rangeEnd))) {
1136             *sym = sa->phonBuf[sa->phonReadPos++].sym;
1137             return SA_POSSYM_OK;
1138         } else if (*pos < 0){ /* *pos is "strange" (e.g. POS_INVALID) */
1139             return SA_POSSYM_INVALID;
1140         } else {
1141             return SA_POSSYM_OUT_OF_RANGE;
1142         }
1143     } else {
1144         /* no more possyms to read */
1145         *pos = PICOTRNS_POS_INVALID;
1146         return SA_POSSYM_END;
1147     }
1148 }
1149 
1150 
1151 
1152 
1153 /* ***********************************************************************/
1154 /*                          saStep function                              */
1155 /* ***********************************************************************/
1156 
1157 /*
1158 complete phrase processed in one step, if not fast enough -> rework
1159 
1160 init, collect into internal buffer, process, and then feed to
1161 output buffer
1162 
1163 init state: INIT ext           ext
1164 state trans:     in hc1  hc2   out
1165 
1166 INIT | putItem   =  0    0    +1      | BUSY  -> COLL (put B-SBEG item,
1167                                                    set do-init to false)
1168 
1169                                     inspace-ok-hc1
1170                                   needs-more-items-(phrase-or-flush)
1171 COLL1 |getItems -n +n             0 1 | ATOMIC -> PPOSD     (got items,
1172                                                       if flush set do-init)
1173 COLL2 |getItems -n +n             1 0 | ATOMIC -> PPOSD (got items, forced)
1174 COLL3 |getItems -n +n             1 1 | IDLE          (got items, need more)
1175 COLL4 |getItems  =  =             1 1 | IDLE             (got no items)
1176 
1177 PPOSD | posd     = ~n~n               | BUSY     -> PWP     (posd done)
1178 PWP   | lex/g2p  = ~n-n  0+n          | BUSY     -> PPHR    (lex/g2p done)
1179 PPHR  | phr      = -n 0 +m=n          | BUSY     -> PACC    (phr done, m>=n)
1180 PACC  | acc      =  0 0 ~m=n          | BUSY     -> FEED    (acc done)
1181 
1182                                   doinit-flag
1183 FEED | putItems  0  0 0 -m-n  +m  0   | BUSY -> COLL    (put items)
1184 FEED | putItems  0  0 0 -m-n  +m  1   | BUSY -> INIT    (put items)
1185 FEED | putItems  0  0 0 -d-d  +d      | OUT_FULL        (put some items)
1186 */
1187 
saStep(register picodata_ProcessingUnit this,picoos_int16 mode,picoos_uint16 * numBytesOutput)1188 static picodata_step_result_t saStep(register picodata_ProcessingUnit this,
1189                                      picoos_int16 mode,
1190                                      picoos_uint16 *numBytesOutput) {
1191     register sa_subobj_t *sa;
1192     pico_status_t rv = PICO_OK;
1193     pico_status_t rvP = PICO_OK;
1194     picoos_uint16 blen = 0;
1195     picoos_uint16 clen = 0;
1196     picoos_uint16 i;
1197     picoklex_Lex lex;
1198 
1199 
1200     if (NULL == this || NULL == this->subObj) {
1201         return PICODATA_PU_ERROR;
1202     }
1203     sa = (sa_subobj_t *) this->subObj;
1204     mode = mode;        /* avoid warning "var not used in this function"*/
1205     *numBytesOutput = 0;
1206     while (1) { /* exit via return */
1207         PICODBG_DEBUG(("doing state %i, hLen|c1Len|c2Len: %d|%d|%d",
1208                        sa->procState, sa->headxLen, sa->cbuf1Len,
1209                        sa->cbuf2Len));
1210 
1211         switch (sa->procState) {
1212 
1213             /* *********************************************************/
1214             /* collect state: get item(s) from charBuf and store in
1215              * internal buffers, need a complete punctuation-phrase
1216              */
1217             case SA_STEPSTATE_COLLECT:
1218 
1219                 while (sa->inspaceok && sa->needsmoreitems
1220                        && (PICO_OK ==
1221                            (rv = picodata_cbGetItem(this->cbIn, sa->tmpbuf,
1222                                             PICOSA_MAXITEMSIZE, &blen)))) {
1223                     rvP = picodata_get_itemparts(sa->tmpbuf,
1224                                             PICOSA_MAXITEMSIZE,
1225                                             &(sa->headx[sa->headxLen].head),
1226                                             &(sa->cbuf1[sa->cbuf1Len]),
1227                                             sa->cbuf1BufSize-sa->cbuf1Len,
1228                                             &clen);
1229                     if (rvP != PICO_OK) {
1230                         PICODBG_ERROR(("problem getting item parts"));
1231                         picoos_emRaiseException(this->common->em, rvP,
1232                                                 NULL, NULL);
1233                         return PICODATA_PU_ERROR;
1234                     }
1235 
1236                     /* if CMD(...FLUSH...) -> PUNC(...FLUSH...),
1237                        construct PUNC-FLUSH item in headx */
1238                     if ((sa->headx[sa->headxLen].head.type ==
1239                          PICODATA_ITEM_CMD) &&
1240                         (sa->headx[sa->headxLen].head.info1 ==
1241                          PICODATA_ITEMINFO1_CMD_FLUSH)) {
1242                         sa->headx[sa->headxLen].head.type =
1243                             PICODATA_ITEM_PUNC;
1244                         sa->headx[sa->headxLen].head.info1 =
1245                             PICODATA_ITEMINFO1_PUNC_FLUSH;
1246                         sa->headx[sa->headxLen].head.info2 =
1247                             PICODATA_ITEMINFO2_PUNC_SENT_T;
1248                         sa->headx[sa->headxLen].head.len = 0;
1249                     }
1250 
1251                     /* convert opening phoneme command to WORDPHON
1252                      * and assign user-POS XX to it (Bug 432) */
1253                     sa->headx[sa->headxLen].cind = sa->cbuf1Len;
1254                     /* maybe overwritten later */
1255                     if ((sa->headx[sa->headxLen].head.type ==
1256                         PICODATA_ITEM_CMD) &&
1257                        (sa->headx[sa->headxLen].head.info1 ==
1258                         PICODATA_ITEMINFO1_CMD_PHONEME)&&
1259                         (sa->headx[sa->headxLen].head.info2 ==
1260                          PICODATA_ITEMINFO2_CMD_START)) {
1261                         picoos_uint8 i;
1262                         picoos_uint8 wordsep = picoktab_getWordboundID(sa->tabphones);
1263                         PICODBG_INFO(("wordsep id is %i",wordsep));
1264                         sa->headx[sa->headxLen].head.type = PICODATA_ITEM_WORDPHON;
1265                         sa->headx[sa->headxLen].head.info1 = PICODATA_POS_XX;
1266                         sa->headx[sa->headxLen].head.info2 = PICODATA_ITEMINFO2_NA;
1267                         /* cut off additional words */
1268                         i = 0;
1269                         while ((i < sa->headx[sa->headxLen].head.len) && (wordsep != sa->cbuf1[sa->headx[sa->headxLen].cind+i])) {
1270                             PICODBG_INFO(("accepting phoneme %i",sa->cbuf1[sa->headx[sa->headxLen].cind+i]));
1271 
1272                             i++;
1273                         }
1274                         if (i < sa->headx[sa->headxLen].head.len) {
1275                             PICODBG_INFO(("cutting off superfluous phonetic words at %i",i));
1276                             sa->headx[sa->headxLen].head.len = i;
1277                         }
1278                     }
1279 
1280                     /* check/set needsmoreitems */
1281                     if (sa->headx[sa->headxLen].head.type ==
1282                         PICODATA_ITEM_PUNC) {
1283                         sa->needsmoreitems = FALSE;
1284                     }
1285 
1286                     /* check/set inspaceok, keep spare slot for forcing */
1287                     if ((sa->headxLen >= (PICOSA_MAXNR_HEADX - 2)) ||
1288                         ((sa->cbuf1BufSize - sa->cbuf1Len) <
1289                          PICOSA_MAXITEMSIZE)) {
1290                         sa->inspaceok = FALSE;
1291                     }
1292 
1293                     if (clen > 0) {
1294                         sa->headx[sa->headxLen].cind = sa->cbuf1Len;
1295                         sa->cbuf1Len += clen;
1296                     } else {
1297                         sa->headx[sa->headxLen].cind = 0;
1298                     }
1299                     sa->headxLen++;
1300                 }
1301 
1302                 if (!sa->needsmoreitems) {
1303                     /* 1, phrase buffered */
1304                     sa->procState = SA_STEPSTATE_PROCESS_POSD;
1305                     return PICODATA_PU_ATOMIC;
1306                 } else if (!sa->inspaceok) {
1307                     /* 2, forced phrase end */
1308                     /* at least one slot is still free, use it to
1309                        force a trailing PUNC item */
1310                     sa->headx[sa->headxLen].head.type = PICODATA_ITEM_PUNC;
1311                     sa->headx[sa->headxLen].head.info1 =
1312                         PICODATA_ITEMINFO1_PUNC_PHRASEEND;
1313                     sa->headx[sa->headxLen].head.info2 =
1314                         PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED;
1315                     sa->headx[sa->headxLen].head.len = 0;
1316                     sa->needsmoreitems = FALSE; /* not really needed for now */
1317                     sa->headxLen++;
1318                     PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND"));
1319                     picoos_emRaiseWarning(this->common->em,
1320                                           PICO_WARN_FALLBACK, NULL,
1321                                           (picoos_char *)"forced phrase end");
1322                     sa->procState = SA_STEPSTATE_PROCESS_POSD;
1323                     return PICODATA_PU_ATOMIC;
1324                 } else if (rv == PICO_EOF) {
1325                     /* 3, 4 */
1326                     return PICODATA_PU_IDLE;
1327                 } else if ((rv == PICO_EXC_BUF_UNDERFLOW) ||
1328                            (rv == PICO_EXC_BUF_OVERFLOW)) {
1329                     /* error, no valid item in cb (UNDER) */
1330                     /*        or tmpbuf not large enough, not possible (OVER) */
1331                     /* no exception raised, left for ctrl to handle */
1332                     PICODBG_ERROR(("buffer under/overflow, rv: %d", rv));
1333                     return PICODATA_PU_ERROR;
1334                 } else {
1335                     /* error, only possible if cbGetItem implementation
1336                        changes without this function being adapted*/
1337                     PICODBG_ERROR(("untreated return value, rv: %d", rv));
1338                     return PICODATA_PU_ERROR;
1339                 }
1340                 break;
1341 
1342 
1343             /* *********************************************************/
1344             /* process posd state: process items in headx/cbuf1
1345              * and change in place
1346              */
1347             case SA_STEPSTATE_PROCESS_POSD:
1348                 /* ensure there is an item in inBuf */
1349                 if (sa->headxLen > 0) {
1350                     /* we have a phrase in headx, cbuf1 (can be
1351                        single PUNC item without POS), do pos disamb */
1352                     if (PICO_OK != saDisambPos(this, sa)) {
1353                         picoos_emRaiseException(this->common->em,
1354                                                 PICO_ERR_OTHER, NULL, NULL);
1355                         return PICODATA_PU_ERROR;
1356                     }
1357                     sa->procState = SA_STEPSTATE_PROCESS_WPHO;
1358 
1359                 } else if (sa->headxLen == 0) {    /* no items in inBuf */
1360                     PICODBG_WARN(("no items in inBuf"));
1361                     sa->procState = SA_STEPSTATE_COLLECT;
1362                     return PICODATA_PU_BUSY;
1363                 }
1364 
1365 #if defined (PICO_DEBUG)
1366                 if (1) {
1367                     picoos_uint8 i, j, ittype;
1368                     for (i = 0; i < sa->headxLen; i++) {
1369                         ittype = sa->headx[i].head.type;
1370                         PICODBG_INFO_CTX();
1371                         PICODBG_INFO_MSG(("sa-d: ("));
1372                         PICODBG_INFO_MSG(("'%c',", ittype));
1373                         if ((32 <= sa->headx[i].head.info1) &&
1374                             (sa->headx[i].head.info1 < 127) &&
1375                             (ittype != PICODATA_ITEM_WORDGRAPH) &&
1376                             (ittype != PICODATA_ITEM_WORDINDEX)) {
1377                             PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
1378                         } else {
1379                             PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
1380                         }
1381                         if ((32 <= sa->headx[i].head.info2) &&
1382                             (sa->headx[i].head.info2 < 127)) {
1383                             PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
1384                         } else {
1385                             PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
1386                         }
1387                         PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
1388 
1389                         for (j = 0; j < sa->headx[i].head.len; j++) {
1390                             if ((ittype == PICODATA_ITEM_WORDGRAPH) ||
1391                                 (ittype == PICODATA_ITEM_CMD)) {
1392                                 PICODBG_INFO_MSG(("%c",
1393                                         sa->cbuf1[sa->headx[i].cind+j]));
1394                             } else {
1395                                 PICODBG_INFO_MSG(("%4d",
1396                                         sa->cbuf1[sa->headx[i].cind+j]));
1397                             }
1398                         }
1399                         PICODBG_INFO_MSG(("\n"));
1400                     }
1401                 }
1402 #endif
1403 
1404                 break;
1405 
1406 
1407             /* *********************************************************/
1408             /* process wpho state: process items in headx/cbuf1 and modify
1409              * headx in place and fill cbuf2
1410              */
1411             case SA_STEPSTATE_PROCESS_WPHO:
1412                 /* ensure there is an item in inBuf */
1413                 if (sa->headxLen > 0) {
1414                     /* we have a phrase in headx, cbuf1 (can be single
1415                        PUNC item), do lex lookup, g2p, or copy */
1416 
1417                     /* check if cbuf2 is empty as it should be */
1418                     if (sa->cbuf2Len > 0) {
1419                         /* enforce emptyness */
1420                         PICODBG_WARN(("forcing empty cbuf2, discarding buf"));
1421                         picoos_emRaiseWarning(this->common->em,
1422                                               PICO_WARN_PU_DISCARD_BUF,
1423                                               NULL, NULL);
1424                     }
1425 
1426                     /* cbuf2 overflow avoided in saGrapheme*, saLexInd*,
1427                        saCopyItem*, phones skipped if needed */
1428                     for (i = 0; i < sa->headxLen; i++) {
1429                         switch (sa->headx[i].head.type) {
1430                             case PICODATA_ITEM_WORDGRAPH:
1431                                 if (PICO_OK != saGraphemeToPhoneme(this, sa,
1432                                                                    i)) {
1433                                     /* not possible, phones skipped if needed */
1434                                     picoos_emRaiseException(this->common->em,
1435                                                             PICO_ERR_OTHER,
1436                                                             NULL, NULL);
1437                                     return PICODATA_PU_ERROR;
1438                                 }
1439                                 break;
1440                             case PICODATA_ITEM_WORDINDEX:
1441                                 if (0 == sa->headx[i].head.info2) {
1442                                   lex = sa->lex;
1443                                 } else {
1444                                     lex = sa->ulex[sa->headx[i].head.info2-1];
1445                                 }
1446                                 if (PICO_OK != saLexIndLookup(this, sa, lex, i)) {
1447                                     /* not possible, phones skipped if needed */
1448                                     picoos_emRaiseException(this->common->em,
1449                                                             PICO_ERR_OTHER,
1450                                                             NULL, NULL);
1451                                     return PICODATA_PU_ERROR;
1452                                 }
1453                                 break;
1454                             default:
1455                                 /* copy item unmodified, ie. headx untouched,
1456                                    content from cbuf1 to cbuf2 */
1457                                 if (PICO_OK != saCopyItemContent1to2(this, sa,
1458                                                                      i)) {
1459                                     /* not possible, phones skipped if needed */
1460                                     picoos_emRaiseException(this->common->em,
1461                                                             PICO_ERR_OTHER,
1462                                                             NULL, NULL);
1463                                     return PICODATA_PU_ERROR;
1464                                 }
1465                                 break;
1466                         }
1467                     }
1468                     /* set cbuf1 to empty */
1469                     sa->cbuf1Len = 0;
1470                     sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
1471 
1472                 } else if (sa->headxLen == 0) {    /* no items in inBuf */
1473                     PICODBG_WARN(("no items in inBuf"));
1474                     sa->procState = SA_STEPSTATE_COLLECT;
1475                     return PICODATA_PU_BUSY;
1476                 }
1477 
1478 #if defined (PICO_DEBUG)
1479                 if (1) {
1480                     picoos_uint8 i, j, ittype;
1481                     for (i = 0; i < sa->headxLen; i++) {
1482                         ittype = sa->headx[i].head.type;
1483                         PICODBG_INFO_CTX();
1484                         PICODBG_INFO_MSG(("sa-g: ("));
1485                         PICODBG_INFO_MSG(("'%c',", ittype));
1486                         if ((32 <= sa->headx[i].head.info1) &&
1487                             (sa->headx[i].head.info1 < 127) &&
1488                             (ittype != PICODATA_ITEM_WORDPHON)) {
1489                             PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
1490                         } else {
1491                             PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
1492                         }
1493                         if ((32 <= sa->headx[i].head.info2) &&
1494                             (sa->headx[i].head.info2 < 127)) {
1495                             PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
1496                         } else {
1497                             PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
1498                         }
1499                         PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
1500 
1501                         for (j = 0; j < sa->headx[i].head.len; j++) {
1502                             if ((ittype == PICODATA_ITEM_CMD)) {
1503                                 PICODBG_INFO_MSG(("%c",
1504                                         sa->cbuf2[sa->headx[i].cind+j]));
1505                             } else {
1506                                 PICODBG_INFO_MSG(("%4d",
1507                                         sa->cbuf2[sa->headx[i].cind+j]));
1508                             }
1509                         }
1510                         PICODBG_INFO_MSG(("\n"));
1511                     }
1512                 }
1513 #endif
1514 
1515                 break;
1516 
1517 
1518                 /* *********************************************************/
1519                 /* transduction parse state: extract phonemes of item in internal outBuf */
1520            case SA_STEPSTATE_PROCESS_TRNS_PARSE:
1521 
1522                 PICODBG_DEBUG(("transduce item (bot, remain): (%d, %d)",
1523                                 sa->headxBottom, sa->headxLen));
1524 
1525                 /* check for termination condition first */
1526                 if (0 == sa->headxLen) {
1527                     /* reset headx, cbuf2 */
1528                     sa->headxBottom = 0;
1529                     sa->cbuf2Len = 0;
1530                     /* reset collect state support variables */
1531                     sa->inspaceok = TRUE;
1532                     sa->needsmoreitems = TRUE;
1533 
1534                     sa->procState = SA_STEPSTATE_COLLECT;
1535                     return PICODATA_PU_BUSY;
1536                 }
1537 
1538                 sa->procState = SA_STEPSTATE_FEED;
1539                 /* copy item unmodified */
1540                 rv = picodata_put_itemparts(
1541                         &(sa->headx[sa->headxBottom].head),
1542                         &(sa->cbuf2[sa->headx[sa->headxBottom].cind]),
1543                         sa->headx[sa->headxBottom].head.len, sa->tmpbuf,
1544                         PICOSA_MAXITEMSIZE, &blen);
1545 
1546                 if (PICODATA_ITEM_WORDPHON == sa->headx[sa->headxBottom].head.type) {
1547                    PICODBG_DEBUG(("PARSE found WORDPHON"));
1548                    rv = saExtractPhonemes(this, sa, 0, &(sa->headx[sa->headxBottom].head),
1549                            &(sa->cbuf2[sa->headx[sa->headxBottom].cind]));
1550                    if (PICO_OK == rv) {
1551                        PICODBG_DEBUG(("PARSE successfully returned from phoneme extraction"));
1552                        sa->procState = SA_STEPSTATE_PROCESS_TRNS_FST;
1553                    } else {
1554                        PICODBG_WARN(("PARSE phone extraction returned exception %i, output WORDPHON untransduced",rv));
1555                    }
1556                } else {
1557                    PICODBG_DEBUG(("PARSE found other item, just copying"));
1558                }
1559                if (SA_STEPSTATE_FEED == sa->procState) {
1560                     PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1561                             (picoos_uint8 *)"sa-p: ",
1562                             sa->tmpbuf, PICOSA_MAXITEMSIZE);
1563 
1564                 }
1565 
1566                 /* consume item */
1567                 sa->headxBottom++;
1568                 sa->headxLen--;
1569 
1570                 break;
1571 
1572                 /* *********************************************************/
1573                 /* transduce state: copy item in internal outBuf to tmpBuf and transduce */
1574            case SA_STEPSTATE_PROCESS_TRNS_FST:
1575 
1576 
1577 
1578 
1579 
1580                /* if no word-level FSTs: doing trivial syllabification instead */
1581                if (0 == sa->numFsts) {
1582                    PICODBG_DEBUG(("doing trivial sylabification with %i phones", sa->phonWritePos));
1583 #if defined(PICO_DEBUG)
1584                    {
1585                        PICODBG_INFO_CTX();
1586                        PICODBG_INFO_MSG(("sa trying to trivially syllabify: "));
1587                        PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
1588                        PICODBG_INFO_MSG(("\n"));
1589                    }
1590 #endif
1591 
1592                    picotrns_trivial_syllabify(sa->tabphones, sa->phonBuf,
1593                            sa->phonWritePos, sa->phonBufOut,
1594                            &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1595                    PICODBG_DEBUG(("returned from trivial sylabification with %i phones", sa->phonWritePos));
1596 #if defined(PICO_DEBUG)
1597                    {
1598                        PICODBG_INFO_CTX();
1599                        PICODBG_INFO_MSG(("sa returned from syllabification: "));
1600                        PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
1601                        PICODBG_INFO_MSG(("\n"));
1602                    }
1603 #endif
1604 
1605                    /* eliminate deep epsilons */
1606                    PICODBG_DEBUG(("doing epsilon elimination with %i phones", sa->phonWritePos));
1607                    picotrns_eliminate_epsilons(sa->phonBufOut,
1608                            sa->phonWritePos, sa->phonBuf,
1609                            &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1610                    PICODBG_DEBUG(("returning from epsilon elimination with %i phones", sa->phonWritePos));
1611                    sa->phonReadPos = 0;
1612                    sa->phonesTransduced = 1;
1613                    sa->procState = SA_STEPSTATE_FEED;
1614                    break;
1615                }
1616 
1617                /* there are word-level FSTs */
1618                /* termination condition first */
1619                if (sa->curFst >= sa->numFsts) {
1620                    /* reset for next transduction */
1621                    sa->curFst = 0;
1622                    sa->phonReadPos = 0;
1623                    sa->phonesTransduced = 1;
1624                    sa->procState = SA_STEPSTATE_FEED;
1625                    break;
1626                }
1627 
1628                /* transduce from phonBufIn to PhonBufOut */
1629                {
1630 
1631                    picoos_uint32 nrSteps;
1632 #if defined(PICO_DEBUG)
1633                    {
1634                        PICODBG_INFO_CTX();
1635                        PICODBG_INFO_MSG(("sa trying to transduce: "));
1636                        PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
1637                        PICODBG_INFO_MSG(("\n"));
1638                    }
1639 #endif
1640                    picotrns_transduce(sa->fst[sa->curFst], FALSE,
1641                            picotrns_printSolution, sa->phonBuf, sa->phonWritePos, sa->phonBufOut,
1642                            &sa->phonWritePos,
1643                            PICOTRNS_MAX_NUM_POSSYM, sa->altDescBuf,
1644                            sa->maxAltDescLen, &nrSteps);
1645 #if defined(PICO_DEBUG)
1646                    {
1647                        PICODBG_INFO_CTX();
1648                        PICODBG_INFO_MSG(("sa returned from transduction: "));
1649                        PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
1650                        PICODBG_INFO_MSG(("\n"));
1651                    }
1652 #endif
1653                }
1654 
1655 
1656 
1657                /*
1658                 The trasduction output will contain equivalent items i.e. (x,y')  for each (x,y) plus inserted deep symbols (-1,d).
1659                 In case of deletions, (x,0) might also be omitted...
1660                 */
1661                /* eliminate deep epsilons */
1662                picotrns_eliminate_epsilons(sa->phonBufOut,
1663                        sa->phonWritePos, sa->phonBuf, &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1664                sa->phonesTransduced = 1;
1665 
1666                sa->curFst++;
1667 
1668                return PICODATA_PU_ATOMIC;
1669                /* break; */
1670 
1671                 /* *********************************************************/
1672                 /* feed state: copy item in internal outBuf to output charBuf */
1673 
1674            case SA_STEPSTATE_FEED:
1675 
1676                PICODBG_DEBUG(("FEED"));
1677 
1678                if (sa->phonesTransduced) {
1679                    /* replace original phones by transduced */
1680                    picoos_uint16 phonWritePos = PICODATA_ITEM_HEADSIZE;
1681                    picoos_uint8 plane;
1682                    picoos_int16 sym, pos;
1683                    while (SA_POSSYM_OK == (rv = getNextPosSym(sa,&pos,&sym,sa->nextReadPos))) {
1684                        PICODBG_TRACE(("FEED inserting phoneme %c into inBuf[%i]",sym,phonWritePos));
1685                        sym = picotrns_unplane(sym, &plane);
1686                        PICODBG_ASSERT((PICOKFST_PLANE_PHONEMES == plane));
1687                        sa->tmpbuf[phonWritePos++] = (picoos_uint8) sym;
1688                    }
1689                    PICODBG_DEBUG(("FEED setting item length to %i",phonWritePos - PICODATA_ITEM_HEADSIZE));
1690                    picodata_set_itemlen(sa->tmpbuf,PICODATA_ITEM_HEADSIZE,phonWritePos - PICODATA_ITEM_HEADSIZE);
1691                    if (SA_POSSYM_INVALID == rv) {
1692                        PICODBG_ERROR(("FEED unexpected symbol or unexpected end of phoneme list"));
1693                        return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_WARN_INCOMPLETE, NULL, NULL);
1694                    }
1695                    sa->phonesTransduced = 0;
1696 
1697                } /* if (sa->phonesTransduced) */
1698 
1699 
1700                 rvP = picodata_cbPutItem(this->cbOut, sa->tmpbuf,
1701                 PICOSA_MAXITEMSIZE, &clen);
1702 
1703                 *numBytesOutput += clen;
1704 
1705                 PICODBG_DEBUG(("put item, status: %d", rvP));
1706 
1707                 if (rvP == PICO_OK) {
1708                 } else if (rvP == PICO_EXC_BUF_OVERFLOW) {
1709                     /* try again next time */
1710                     PICODBG_DEBUG(("feeding overflow"));
1711                     return PICODATA_PU_OUT_FULL;
1712                 } else {
1713                     /* error, should never happen */
1714                     PICODBG_ERROR(("untreated return value, rvP: %d", rvP));
1715                     return PICODATA_PU_ERROR;
1716                 }
1717 
1718                 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1719                         (picoos_uint8 *)"sana: ",
1720                         sa->tmpbuf, PICOSA_MAXITEMSIZE);
1721 
1722                 sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
1723                 /* return PICODATA_PU_BUSY; */
1724                 break;
1725 
1726             default:
1727                 break;
1728         } /* switch */
1729 
1730     } /* while */
1731 
1732     /* should be never reached */
1733     PICODBG_ERROR(("reached end of function"));
1734     picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL);
1735     return PICODATA_PU_ERROR;
1736 }
1737 
1738 #ifdef __cplusplus
1739 }
1740 #endif
1741 
1742 
1743 /* end */
1744