1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picoacph.c
18  *
19  * accentuation and phrasing
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 
29 #include "picoos.h"
30 #include "picodbg.h"
31 #include "picobase.h"
32 #include "picodata.h"
33 #include "picoacph.h"
34 #include "picokdt.h"
35 #include "picoklex.h"
36 #include "picoktab.h"
37 
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41 #if 0
42 }
43 #endif
44 
45 /* PU acphStep states */
46 #define SA_STEPSTATE_COLLECT       0
47 #define SA_STEPSTATE_PROCESS_PHR  12
48 #define SA_STEPSTATE_PROCESS_ACC  13
49 #define SA_STEPSTATE_FEED          2
50 
51 
52 /* boundary strength state */
53 #define SA_BOUNDSTRENGTH_SSEP      0 /* sentence separator */
54 #define SA_BOUNDSTRENGTH_PPHR      1 /* primary phrase separator */
55 
56 
57 /*  subobject    : AccPhrUnit
58  *  shortcut     : acph
59  *  context size : one phrase, max. 30 non-PUNC items, for non-processed items
60  *                 one item if internal input empty
61  */
62 
63 /**
64  * @addtogroup picoacph
65  *
66  * <b> Pico Accentuation and Phrasing </b>\n
67  *
68   internal buffers:
69 
70   - headx : array for extended item heads of fixed size (head plus
71     index for content, plus two fields for boundary strength/type)
72   - cbuf : buffer for item contents (referenced by index in
73     headx).
74 
75   0. bottom up filling of items in headx and cbuf
76 
77   1. phrasing (right-to-left):
78 
79      e.g. from      WP WP WP       WP WP PUNC  WP WP PUNC        WP WP WP PUNC  FLUSH    \n
80      e.g. to  BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM    \n
81               |1                         |2                |3                   |4        \n
82 
83      2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer
84      allmost full), with the trailing PUNCT item included (last item).\n
85      If the trailing PUNC is a a primary phrase separator, the
86        item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can
87        be output at the start of the next primary phrase.\n
88      Otherwise,
89        the item is converted to the corresponding BOUND and output. the bound state is set to SSEP,
90        so that a BOUND of type SBEG is output at the start of the next primary phrase.
91 
92      trailing PUNC item       bound states                                    \n
93                               SSEP           PPHR                            \n
94        PUNC(SENTEND, X)       B(B,X)>SSEP    B(P1,X)>SSEP  (X = T | Q | E)    \n
95        PUNC(FLUSH, T)         B(B,T)>SSEP*    B(P1,T)>SSEP                    \n
96        PUNC(PHRASEEND, P)     B(B,P)>PPHR    B(P1,P)>PPHR                    \n
97        PUNC(PHRASEEND, FORC)  B(B,P)>PPHR    B(P1,P)>PPHR                    \n
98 
99     If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then
100      all but the first will be treated as an (empty) phrase containing just this item.
101      If this (single) item is a flush, creation of SBEG is suppressed.
102 
103 
104   - dtphr phrasing tree ("subphrasing")
105     determines
106       - BOUND_PHR2
107       - BOUND_PHR3
108   - boundary strenghts are determined for every word (except the
109     first one) from right-to-left. The boundary types mark the phrase
110     type of the phrase following the boundary.
111   - number of items actually changed (new BOUND items added): because
112     of fixed size without content, two fields are contained in headx
113     to indicate if a BOUND needs to be added to the LEFT of the item.
114     -> headx further extended with boundary strength and type info to
115     indicate that to the left of the headx ele a BOUND needs to be
116     inserted when outputting.
117 
118   2. accentuation:
119   - number of items unchanged, content unchanged, only head info changes
120   -> changed in place in headx
121 */
122 
123 
124 typedef struct {
125     picodata_itemhead_t head;
126     picoos_uint16 cind;
127     picoos_uint8 boundstrength;  /* bstrength to the left, 0 if not set */
128     picoos_uint8 boundtype;      /* btype for following phrase, 0 if not set */
129 } picoacph_headx_t;
130 
131 
132 typedef struct acph_subobj {
133     picoos_uint8 procState; /* for next processing step decision */
134     picoos_uint8 boundStrengthState;    /* boundary strength state */
135 
136     picoos_uint8 inspaceok;      /* flag: headx/cbuf has space for an item */
137     picoos_uint8 needsmoreitems; /* flag: need more items */
138 
139     picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE];  /* tmp. location for an item */
140 
141     picoacph_headx_t headx[PICOACPH_MAXNR_HEADX];
142     picoos_uint16 headxBottom; /* bottom */
143     picoos_uint16 headxLen;    /* length, 0 if empty */
144 
145     picoos_uint8 cbuf[PICOACPH_MAXSIZE_CBUF];
146     picoos_uint16 cbufBufSize; /* actually allocated size */
147     picoos_uint16 cbufLen;     /* length, 0 if empty */
148 
149     /* tab knowledge base */
150     picoktab_Phones tabphones;
151 
152     /* dtphr knowledge base */
153     picokdt_DtPHR dtphr;
154 
155     /* dtacc knowledge base */
156     picokdt_DtACC dtacc;
157 } acph_subobj_t;
158 
159 
acphInitialize(register picodata_ProcessingUnit this,picoos_int32 resetMode)160 static pico_status_t acphInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode) {
161     acph_subobj_t * acph;
162     picoos_uint16 i;
163 
164     PICODBG_DEBUG(("calling"));
165 
166     if (NULL == this || NULL == this->subObj) {
167         return picoos_emRaiseException(this->common->em,
168                                        PICO_ERR_NULLPTR_ACCESS, NULL, NULL);
169     }
170     acph = (acph_subobj_t *) this->subObj;
171     acph->procState = SA_STEPSTATE_COLLECT;
172     acph->boundStrengthState = SA_BOUNDSTRENGTH_SSEP;
173 
174     acph->inspaceok = TRUE;
175     acph->needsmoreitems = TRUE;
176 
177     acph->headxBottom = 0;
178     acph->headxLen = 0;
179     acph->cbufBufSize = PICOACPH_MAXSIZE_CBUF;
180     acph->cbufLen = 0;
181 
182     /* init headx, cbuf */
183     for (i = 0; i < PICOACPH_MAXNR_HEADX; i++){
184         acph->headx[i].head.type = 0;
185         acph->headx[i].head.info1 = 0;
186         acph->headx[i].head.info2 = 0;
187         acph->headx[i].head.len = 0;
188         acph->headx[i].cind = 0;
189         acph->headx[i].boundstrength = 0;
190         acph->headx[i].boundtype = 0;
191     }
192     for (i = 0; i < PICOACPH_MAXSIZE_CBUF; i++) {
193         acph->cbuf[i] = 0;
194     }
195 
196     if (resetMode == PICO_RESET_SOFT) {
197         /*following initializations needed only at startup or after a full reset*/
198         return PICO_OK;
199     }
200 
201     /* kb tabphones */
202     acph->tabphones =
203         picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]);
204     if (acph->tabphones == NULL) {
205         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
206                                        NULL, NULL);
207     }
208     PICODBG_DEBUG(("got tabphones"));
209 
210 #ifdef PICO_DEBUG_1
211     {
212         picoos_uint16 itmp;
213         for (itmp = 0; itmp < 256; itmp++) {
214             if (picoktab_hasVowelProp(acph->tabphones, itmp)) {
215                 PICODBG_DEBUG(("tabphones hasVowel: %d", itmp));
216             }
217             if (picoktab_hasDiphthProp(acph->tabphones, itmp)) {
218                 PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp));
219             }
220             if (picoktab_hasGlottProp(acph->tabphones, itmp)) {
221                 PICODBG_DEBUG(("tabphones hasGlott: %d", itmp));
222             }
223             if (picoktab_hasNonsyllvowelProp(acph->tabphones, itmp)) {
224                 PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp));
225             }
226             if (picoktab_hasSyllconsProp(acph->tabphones, itmp)) {
227                 PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp));
228             }
229 
230             if (picoktab_isPrimstress(acph->tabphones, itmp)) {
231                 PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp));
232             }
233             if (picoktab_isSecstress(acph->tabphones, itmp)) {
234                 PICODBG_DEBUG(("tabphones isSecstress: %d", itmp));
235             }
236             if (picoktab_isSyllbound(acph->tabphones, itmp)) {
237                 PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp));
238             }
239             if (picoktab_isPause(acph->tabphones, itmp)) {
240                 PICODBG_DEBUG(("tabphones isPause: %d", itmp));
241             }
242         }
243 
244         PICODBG_DEBUG(("tabphones primstressID: %d",
245                        picoktab_getPrimstressID(acph->tabphones)));
246         PICODBG_DEBUG(("tabphones secstressID: %d",
247                        picoktab_getSecstressID(acph->tabphones)));
248         PICODBG_DEBUG(("tabphones syllboundID: %d",
249                        picoktab_getSyllboundID(acph->tabphones)));
250         PICODBG_DEBUG(("tabphones pauseID: %d",
251                        picoktab_getPauseID(acph->tabphones)));
252     }
253 #endif
254 
255 
256     /* kb dtphr */
257     acph->dtphr = picokdt_getDtPHR(this->voice->kbArray[PICOKNOW_KBID_DT_PHR]);
258     if (acph->dtphr == NULL) {
259         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
260                                        NULL, NULL);
261     }
262     PICODBG_DEBUG(("got dtphr"));
263 
264     /* kb dtacc */
265     acph->dtacc = picokdt_getDtACC(this->voice->kbArray[PICOKNOW_KBID_DT_ACC]);
266     if (acph->dtacc == NULL) {
267         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
268                                        NULL, NULL);
269     }
270     PICODBG_DEBUG(("got dtacc"));
271 
272     return PICO_OK;
273 }
274 
275 static picodata_step_result_t acphStep(register picodata_ProcessingUnit this,
276                                      picoos_int16 mode,
277                                      picoos_uint16 *numBytesOutput);
278 
acphTerminate(register picodata_ProcessingUnit this)279 static pico_status_t acphTerminate(register picodata_ProcessingUnit this)
280 {
281     return PICO_OK;
282 }
283 
acphSubObjDeallocate(register picodata_ProcessingUnit this,picoos_MemoryManager mm)284 static pico_status_t acphSubObjDeallocate(register picodata_ProcessingUnit this,
285                                         picoos_MemoryManager mm) {
286     mm = mm;        /* avoid warning "var not used in this function"*/
287     if (NULL != this) {
288         picoos_deallocate(this->common->mm, (void *) &this->subObj);
289     }
290     return PICO_OK;
291 }
292 
293 
picoacph_newAccPhrUnit(picoos_MemoryManager mm,picoos_Common common,picodata_CharBuffer cbIn,picodata_CharBuffer cbOut,picorsrc_Voice voice)294 picodata_ProcessingUnit picoacph_newAccPhrUnit(picoos_MemoryManager mm,
295                                               picoos_Common common,
296                                               picodata_CharBuffer cbIn,
297                                               picodata_CharBuffer cbOut,
298                                               picorsrc_Voice voice) {
299     picodata_ProcessingUnit this;
300 
301     this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
302     if (this == NULL) {
303         return NULL;
304     }
305 
306     this->initialize = acphInitialize;
307     PICODBG_DEBUG(("set this->step to acphStep"));
308     this->step = acphStep;
309     this->terminate = acphTerminate;
310     this->subDeallocate = acphSubObjDeallocate;
311     this->subObj = picoos_allocate(mm, sizeof(acph_subobj_t));
312     if (this->subObj == NULL) {
313         picoos_deallocate(mm, (void *)&this);
314         picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
315         return NULL;
316     }
317 
318     acphInitialize(this, PICO_RESET_FULL);
319     return this;
320 }
321 
322 
323 /* ***********************************************************************/
324 /* PROCESS_PHR/ACC support functions */
325 /* ***********************************************************************/
326 
327 
acphGetNrSylls(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind)328 static picoos_uint8 acphGetNrSylls(register picodata_ProcessingUnit this,
329                                  register acph_subobj_t *acph,
330                                  const picoos_uint16 ind) {
331     picoos_uint8 i;
332     picoos_uint8 ch;
333     picoos_uint8 count;
334 
335     count = 1;
336     for (i = 0; i < acph->headx[ind].head.len; i++) {
337         ch = acph->cbuf[acph->headx[ind].cind + i];
338         if (picoktab_isSyllbound(acph->tabphones, ch)) {
339             count++;
340         }
341     }
342     return count;
343 }
344 
345 
346 /* ***********************************************************************/
347 /* PROCESS_PHR functions */
348 /* ***********************************************************************/
349 
350 
351 /* find next POS to the left of 'ind' and return its POS and index */
acphPhrItemSeqGetPosLeft(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind,picoos_uint16 * leftind)352 static picoos_uint8 acphPhrItemSeqGetPosLeft(register picodata_ProcessingUnit this,
353                                            register acph_subobj_t *acph,
354                                            const picoos_uint16 ind,
355                                            picoos_uint16 *leftind) {
356     picoos_uint8 val;
357     picoos_int32 i;
358 
359     val = PICOKDT_EPSILON;
360     for (i = ind - 1; ((val == PICOKDT_EPSILON) && (i >= 0)); i--) {
361         if ((acph->headx[i].head.type == PICODATA_ITEM_WORDPHON)) {
362             val = acph->headx[i].head.info1;
363         }
364     }
365     *leftind = i + 1;
366     return val;
367 }
368 
369 
370 /* right-to-left, for each WORDPHON do phr */
acphSubPhrasing(register picodata_ProcessingUnit this,register acph_subobj_t * acph)371 static pico_status_t acphSubPhrasing(register picodata_ProcessingUnit this,
372                                    register acph_subobj_t *acph) {
373     picokdt_classify_result_t dtres;
374     picoos_uint8 valbuf[5];
375     picoos_uint16 nrwordspre;
376     picoos_uint16 nrwordsfol;
377     picoos_uint16 nrsyllsfol;
378     picoos_uint16 lastprev2; /* last index of POS(es) found to the left */
379     picoos_uint8 curpos;     /* POS(es) of current word */
380     picoos_uint16 upbound;   /* index of last WORDPHON item (with POS) */
381     picoos_uint8 okay;
382     picoos_uint8 nosubphrases;
383     picoos_int32 i;
384 
385     /* set initial values */
386     okay = TRUE;
387     nosubphrases = TRUE;
388     curpos = PICOKDT_EPSILON;   /* needs to be in 2^8 */
389 
390     /* set upbound to last WORDPHON, don't worry about first one */
391     upbound = acph->headxLen - 1;
392     while ((upbound > 0) &&
393            (acph->headx[upbound].head.type != PICODATA_ITEM_WORDPHON)) {
394         upbound--;
395     }
396 
397     /* zero or one WORDPHON, no subphrasing needed, but handling of
398        BOUND strength state is needed */
399     if (upbound <= 0) {
400         /* phrase not containing more than one WORDPHON */
401         PICODBG_DEBUG(("less than two WORDPHON in phrase -> no subphrasing"));
402     }
403 
404     lastprev2 = upbound;
405 
406     /* set initial nr pre/fol words/sylls, upbound is ind of last WORDPHON */
407     nrwordsfol = 0;
408     nrsyllsfol = 0;
409     nrwordspre = 0;
410     for (i = 0; i < upbound; i++) {
411         if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
412             nrwordspre++;
413         }
414     }
415 
416     nrwordspre++;    /* because we later have a decrement before being used */
417 
418 
419     /* set POS of current word in valbuf[1], will be shifted right afterwards */
420     valbuf[1] = acph->headx[upbound].head.info1;
421     /* find first POS to the left and set valbuf[0] */
422     valbuf[0] = acphPhrItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
423     for (i = 2; i < 5; i++) {
424         valbuf[i] = PICOKDT_EPSILON;
425     }
426 
427     PICODBG_TRACE(("headxLen: %d", acph->headxLen));
428 
429     /* at least two WORDPHON items */
430     /* process from right-to-left all items in headx, except for 1st WORDPHON */
431     for (i = upbound; (i > 0) && (nrwordspre > 1); i--) {
432         okay = TRUE;
433 
434         PICODBG_TRACE(("iter: %d, type: %c", i, acph->headx[i].head.type));
435 
436         /* if not (WORDPHON) */
437         if ((acph->headx[i].head.type != PICODATA_ITEM_WORDPHON)) {
438             continue;
439         }
440 
441         PICODBG_TRACE(("iter: %d, curpos: %d", i, acph->headx[i].head.info1));
442 
443         /* get and set POS of current item, must be WORDPHON */
444         curpos = acph->headx[i].head.info1;
445 
446         /* no continue so far => at [i] we have a WORDPHON item */
447         /* shift all POS elements one position to the right */
448         valbuf[4] = valbuf[3];
449         valbuf[3] = valbuf[2];
450         valbuf[2] = valbuf[1];
451         valbuf[1] = valbuf[0];
452         /* find next POS to the left and set valbuf[0] */
453         valbuf[0] = acphPhrItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
454 
455         /* better check double than never */
456         if (curpos != valbuf[2]) {
457             PICODBG_WARN(("syncing POS"));
458             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
459                                   NULL, NULL);
460             valbuf[2] = curpos;
461         }
462 
463         nrwordsfol++;
464         nrsyllsfol += acphGetNrSylls(this, acph, i);
465         nrwordspre--;
466 
467         PICODBG_TRACE(("%d: [%d,%d|%d|%d,%d|%d,%d,%d]",
468                        i, valbuf[0], valbuf[1], valbuf[2], valbuf[3],
469                        valbuf[4], nrwordspre, nrwordsfol, nrsyllsfol));
470 
471         /* no continue so far => subphrasing needed */
472         /* construct input vector, which is set in dtphr */
473         if (!picokdt_dtPHRconstructInVec(acph->dtphr, valbuf[0], valbuf[1],
474                                          valbuf[2], valbuf[3], valbuf[4],
475                                          nrwordspre, nrwordsfol, nrsyllsfol)) {
476             /* error constructing invec */
477             PICODBG_WARN(("problem with invec"));
478             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
479                                   NULL, NULL);
480             okay = FALSE;
481         }
482         /* classify */
483         if (okay && (!picokdt_dtPHRclassify(acph->dtphr))) {
484             /* error doing classification */
485             PICODBG_WARN(("problem classifying"));
486             picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
487                                   NULL, NULL);
488             okay = FALSE;
489         }
490         /* decompose */
491         if (okay && (!picokdt_dtPHRdecomposeOutClass(acph->dtphr, &dtres))) {
492             /* error decomposing */
493             PICODBG_WARN(("problem decomposing"));
494             picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
495                                   NULL, NULL);
496             okay = FALSE;
497         }
498 
499         if (okay && dtres.set) {
500             PICODBG_DEBUG(("%d - inpos: %d, out: %d", i,valbuf[2],dtres.class));
501         } else {
502             PICODBG_WARN(("problem determining subphrase boundary strength"));
503             dtres.class = PICODATA_ITEMINFO1_ERR;
504         }
505 
506         if (dtres.class > 255) {
507             PICODBG_WARN(("dt class outside valid range, setting to PHR0"));
508             dtres.class = PICODATA_ITEMINFO1_BOUND_PHR0;
509         }
510         acph->headx[i].boundstrength = (picoos_uint8)dtres.class;
511         if ((dtres.class == PICODATA_ITEMINFO1_BOUND_PHR2) ||
512             (dtres.class == PICODATA_ITEMINFO1_BOUND_PHR3)) {
513             if (nosubphrases) {
514                 /* it's the last secondary phrase in the primary phrase */
515                 /* add type info */
516                 switch (acph->headx[acph->headxLen - 1].head.info2) {
517                     case PICODATA_ITEMINFO2_PUNC_SENT_T:
518                         acph->headx[i].boundtype =
519                             PICODATA_ITEMINFO2_BOUNDTYPE_T;
520                         break;
521                     case PICODATA_ITEMINFO2_PUNC_SENT_Q:
522                         acph->headx[i].boundtype =
523                             PICODATA_ITEMINFO2_BOUNDTYPE_Q;
524                         break;
525                     case PICODATA_ITEMINFO2_PUNC_SENT_E:
526                         acph->headx[i].boundtype =
527                             PICODATA_ITEMINFO2_BOUNDTYPE_E;
528                         break;
529                     case PICODATA_ITEMINFO2_PUNC_PHRASE:
530                     case PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED:
531                         acph->headx[i].boundtype =
532                             PICODATA_ITEMINFO2_BOUNDTYPE_P;
533                         break;
534                     default:
535                         PICODBG_WARN(("invalid boundary type, not set"));
536                         break;
537                 }
538                 nosubphrases = FALSE;
539 
540             } else {
541                 acph->headx[i].boundtype =
542                     PICODATA_ITEMINFO2_BOUNDTYPE_P;
543             }
544             /* reset nr following words and sylls counters */
545             nrwordsfol = 0;
546             nrsyllsfol = 0;
547         }
548     }
549 
550     /* process first item, add bound-info */
551     switch (acph->boundStrengthState) {
552         case SA_BOUNDSTRENGTH_SSEP:
553             acph->headx[0].boundstrength =
554                 PICODATA_ITEMINFO1_BOUND_SBEG;
555             break;
556         case SA_BOUNDSTRENGTH_PPHR:
557             acph->headx[0].boundstrength =
558                 PICODATA_ITEMINFO1_BOUND_PHR1;
559             break;
560         default:
561             PICODBG_WARN(("invalid boundary strength, not set"));
562             break;
563     }
564 
565     /* set boundary strength state */
566     switch (acph->headx[acph->headxLen - 1].head.info1) {
567         case PICODATA_ITEMINFO1_PUNC_SENTEND:
568         case PICODATA_ITEMINFO1_PUNC_FLUSH:
569             acph->boundStrengthState = SA_BOUNDSTRENGTH_SSEP;
570             break;
571         case PICODATA_ITEMINFO1_PUNC_PHRASEEND:
572             acph->boundStrengthState = SA_BOUNDSTRENGTH_PPHR;
573             break;
574         default:
575             PICODBG_WARN(("invalid boundary strength state, not changed"));
576             break;
577     }
578 
579     if (nosubphrases) {
580         /* process first item, add type info */
581         switch (acph->headx[acph->headxLen - 1].head.info2) {
582             case PICODATA_ITEMINFO2_PUNC_SENT_T:
583                 acph->headx[0].boundtype =
584                     PICODATA_ITEMINFO2_BOUNDTYPE_T;
585                 break;
586             case PICODATA_ITEMINFO2_PUNC_SENT_Q:
587                 acph->headx[0].boundtype =
588                     PICODATA_ITEMINFO2_BOUNDTYPE_Q;
589                 break;
590             case PICODATA_ITEMINFO2_PUNC_SENT_E:
591                 acph->headx[0].boundtype =
592                     PICODATA_ITEMINFO2_BOUNDTYPE_E;
593                 break;
594             case PICODATA_ITEMINFO2_PUNC_PHRASE:
595             case PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED:
596                 acph->headx[0].boundtype =
597                     PICODATA_ITEMINFO2_BOUNDTYPE_P;
598                 break;
599             default:
600                 PICODBG_WARN(("invalid boundary type, not set"));
601                 break;
602         }
603     } else {
604         acph->headx[0].boundtype =
605             PICODATA_ITEMINFO2_BOUNDTYPE_P;
606     }
607 
608     return PICO_OK;
609 }
610 
611 
612 /* ***********************************************************************/
613 /* PROCESS_ACC functions */
614 /* ***********************************************************************/
615 
616 /* find next POS to the left of 'ind' and return its POS and index */
acphAccItemSeqGetPosLeft(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind,picoos_uint16 * leftind)617 static picoos_uint8 acphAccItemSeqGetPosLeft(register picodata_ProcessingUnit this,
618                                            register acph_subobj_t *acph,
619                                            const picoos_uint16 ind,
620                                            picoos_uint16 *leftind) {
621     picoos_uint8 val;
622     picoos_int32 i;
623 
624     val = PICOKDT_EPSILON;
625     for (i = ind - 1; ((val == PICOKDT_EPSILON) && (i >= 0)); i--) {
626         if ((acph->headx[i].head.type == PICODATA_ITEM_WORDPHON)) {
627             val = acph->headx[i].head.info1;
628         }
629     }
630     *leftind = i + 1;
631     return val;
632 }
633 
634 
635 /* s1: nr sylls in word before the first primary stressed syll,
636    s2: nr sylls in word after (but excluding) the first primary stressed syll */
acphAccNrSyllParts(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind,picoos_uint8 * s1,picoos_uint8 * s2)637 static picoos_uint8 acphAccNrSyllParts(register picodata_ProcessingUnit this,
638                                      register acph_subobj_t *acph,
639                                      const picoos_uint16 ind,
640                                      picoos_uint8 *s1,
641                                      picoos_uint8 *s2) {
642     picoos_uint16 pind;
643     picoos_uint16 pend;    /* phone string start+len */
644     picoos_uint8 afterprim;
645 
646     /* check ind is in valid range */
647     if (ind >= acph->headxLen) {
648         return FALSE;
649     }
650 
651     *s1 = 0;
652     *s2 = 0;
653     afterprim = FALSE;
654     pend = acph->headx[ind].cind + acph->headx[ind].head.len;
655     for (pind = acph->headx[ind].cind; pind < pend; pind++) {
656         if (picoktab_isPrimstress(acph->tabphones, acph->cbuf[pind])) {
657             afterprim = TRUE;
658         } else if (picoktab_isSyllbound(acph->tabphones, acph->cbuf[pind])) {
659             if (afterprim) {
660                 (*s2)++;
661             } else {
662                 (*s1)++;
663             }
664         }
665     }
666     if (afterprim) {
667         (*s2)++;
668     } else {
669         (*s1)++;
670     }
671 
672     /* exclude the stressed syllable */
673     if ((*s2) > 0) {
674         (*s2)--;
675     }
676     /* handle the case when there is no primstress */
677     if (!afterprim) {
678         (*s2) = (*s1);
679     }
680     return TRUE;
681 }
682 
683 
acphAccGetNrsRight(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind,picoos_uint16 * nrwordsfol,picoos_uint16 * nrsyllsfol,picoos_uint16 * footwordsfol,picoos_uint16 * footsyllsfol)684 static picoos_uint8 acphAccGetNrsRight(register picodata_ProcessingUnit this,
685                                      register acph_subobj_t *acph,
686                                      const picoos_uint16 ind,
687                                      picoos_uint16 *nrwordsfol,
688                                      picoos_uint16 *nrsyllsfol,
689                                      picoos_uint16 *footwordsfol,
690                                      picoos_uint16 *footsyllsfol) {
691     picoos_uint16 i;
692     picoos_uint8 s1;
693     picoos_uint8 s2;
694 
695     if (!acphAccNrSyllParts(this, acph, ind, &s1, &s2)) {
696         return FALSE;
697     }
698 
699     *nrwordsfol = 0;
700     *nrsyllsfol = s2;
701     i = ind + 1;
702     while ((i < acph->headxLen) &&
703            (acph->headx[i].boundstrength == PICODATA_ITEMINFO1_BOUND_PHR0)) {
704         if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
705             (*nrwordsfol)++;
706             *nrsyllsfol += acphGetNrSylls(this, acph, i);
707         }
708         i++;
709     }
710 
711     *footwordsfol = 0;
712     *footsyllsfol = s2;
713     i = ind + 1;
714     while ((i < acph->headxLen) &&
715            (acph->headx[i].head.info2 != PICODATA_ACC1)) {
716         if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
717             (*footwordsfol)++;
718             *footsyllsfol += acphGetNrSylls(this, acph, i);
719         }
720         i++;
721     }
722     if ((i < acph->headxLen) && (acph->headx[i].head.info2 == PICODATA_ACC1)) {
723         if (!acphAccNrSyllParts(this, acph, i, &s1, &s2)) {
724             return FALSE;
725         }
726         *footsyllsfol += s1;
727     }
728     return TRUE;
729 }
730 
731 
acphAccGetNrsLeft(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind,picoos_uint16 * nrwordspre,picoos_uint16 * nrsyllspre)732 static picoos_uint8 acphAccGetNrsLeft(register picodata_ProcessingUnit this,
733                                     register acph_subobj_t *acph,
734                                     const picoos_uint16 ind,
735                                     picoos_uint16 *nrwordspre,
736                                     picoos_uint16 *nrsyllspre) {
737     picoos_int32 i;
738     picoos_uint8 s1;
739     picoos_uint8 s2;
740 
741     if (!acphAccNrSyllParts(this, acph, ind, &s1, &s2)) {
742         return FALSE;
743     }
744 
745     *nrwordspre = 0;
746     *nrsyllspre = s1;
747     i = ind - 1;
748     while ((i >= 0) &&
749            (acph->headx[i].boundstrength == PICODATA_ITEMINFO1_BOUND_PHR0)) {
750         if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
751             (*nrwordspre)++;
752             *nrsyllspre += acphGetNrSylls(this, acph, i);
753         }
754         i--;
755     }
756 
757     if ((acph->headx[i].boundstrength != PICODATA_ITEMINFO1_BOUND_PHR0) &&
758         (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON)) {
759         (*nrwordspre)++;
760         *nrsyllspre += acphGetNrSylls(this, acph, i);
761     }
762     return TRUE;
763 }
764 
765 
766 /* return TRUE if wordphon contains no stress, FALSE otherwise */
acphIsWordWithoutStress(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind)767 static picoos_uint8 acphIsWordWithoutStress(register picodata_ProcessingUnit this,
768                                           register acph_subobj_t *acph,
769                                           const picoos_uint16 ind) {
770     picoos_uint8 i;
771     picoos_uint16 pos;
772 
773     pos = acph->headx[ind].cind;
774     for (i = 0; i < acph->headx[ind].head.len; i++) {
775         if (picoktab_isPrimstress(acph->tabphones, acph->cbuf[pos + i]) ||
776             picoktab_isSecstress(acph->tabphones, acph->cbuf[pos + i])) {
777             return FALSE;
778         }
779     }
780     return TRUE;
781 }
782 
783 
784 /* right-to-left, for each WORDPHON do acc */
acphAccentuation(register picodata_ProcessingUnit this,register acph_subobj_t * acph)785 static pico_status_t acphAccentuation(register picodata_ProcessingUnit this,
786                                     register acph_subobj_t *acph) {
787     picokdt_classify_result_t dtres;
788     picoos_uint8 valbuf[5];
789     picoos_uint16 hist1;
790     picoos_uint16 hist2;
791     picoos_uint16 nrwordspre;
792     picoos_uint16 nrsyllspre;
793     picoos_uint16 nrwordsfol;
794     picoos_uint16 nrsyllsfol;
795     picoos_uint16 footwordsfol;
796     picoos_uint16 footsyllsfol;
797     picoos_uint16 lastprev2; /* last index of POS(es) found to the left */
798     picoos_uint8 curpos;     /* POS(es) of current word */
799     picoos_uint16 prevout;
800     picoos_uint8 okay;
801     picoos_int32 upbound;   /* index of last WORDPHON item (with POS) */
802     picoos_uint16 i;
803 
804     /* set initial values */
805     okay = TRUE;
806     curpos = PICOKDT_EPSILON;    /* needs to be < 2^8 */
807 
808     /* set upbound to last WORDPHON */
809     upbound = acph->headxLen - 1;
810     while ((upbound >= 0) &&
811            (acph->headx[upbound].head.type != PICODATA_ITEM_WORDPHON)) {
812         upbound--;
813     }
814 
815     if (upbound < 0) {
816         /* phrase containing zero WORDPHON */
817         PICODBG_DEBUG(("no WORDPHON in phrase -> no accentuation"));
818         return PICO_OK;
819     }
820 
821     lastprev2 = upbound;
822 
823     /* set initial history values */
824     prevout = PICOKDT_HISTORY_ZERO;
825     hist1 = PICOKDT_HISTORY_ZERO;
826     hist2 = PICOKDT_HISTORY_ZERO;
827 
828     /* set initial nr pre/fol words/sylls, upbound is ind of last WORDPHON */
829     nrwordsfol = 0;
830     nrsyllsfol = 0;
831     footwordsfol = 0;
832     footsyllsfol = 0;
833     nrwordspre = 0;
834     nrsyllspre = 0;
835 
836     /* set POS of current word in valbuf[1], will be shifted right afterwards */
837     valbuf[1] = acph->headx[upbound].head.info1;
838     /* find first POS to the left and set valbuf[0] */
839     valbuf[0] = acphAccItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
840     for (i = 2; i < 5; i++) {
841         valbuf[i] = PICOKDT_EPSILON;
842     }
843 
844     PICODBG_TRACE(("headxLen: %d", acph->headxLen));
845 
846     /* process from right-to-left all items in headx */
847     for (i = upbound+1; i > 0; ) {
848         i--;
849 
850         okay = TRUE;
851 
852         PICODBG_TRACE(("iter: %d, type: %c", i, acph->headx[i].head.type));
853 
854         /* if not (WORDPHON) */
855         if ((acph->headx[i].head.type != PICODATA_ITEM_WORDPHON)) {
856             continue;
857         }
858 
859         PICODBG_TRACE(("iter: %d, curpos: %d", i, acph->headx[i].head.info1));
860 
861         /* get and set POS of current item, must be WORDPHON */
862         curpos = acph->headx[i].head.info1;
863 
864         /* no continue so far => at [i] we have a WORDPHON item */
865         /* shift all POS elements one position to the right */
866         valbuf[4] = valbuf[3];
867         valbuf[3] = valbuf[2];
868         valbuf[2] = valbuf[1];
869         valbuf[1] = valbuf[0];
870         /* find next POS to the left and set valbuf[0] */
871         valbuf[0] = acphAccItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
872 
873         /* better check double than never */
874         if (curpos != valbuf[2]) {
875             PICODBG_WARN(("syncing POS"));
876             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
877                                   NULL, NULL);
878             valbuf[2] = curpos;
879         }
880 
881         /* set history values */
882         hist2 = hist1;
883         hist1 = prevout;
884 
885         /* ************************************************************ */
886         /* many speedups possible by avoiding double calc of attribtues */
887         /* ************************************************************ */
888 
889         /* get distances */
890         if ((!acphAccGetNrsRight(this, acph, i, &nrwordsfol, &nrsyllsfol,
891                                &footwordsfol, &footsyllsfol)) ||
892             (!acphAccGetNrsLeft(this, acph, i, &nrwordspre, &nrsyllspre))) {
893             PICODBG_WARN(("problem setting distances in invec"));
894             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
895                                   NULL, NULL);
896             okay = FALSE;
897         }
898 
899         PICODBG_TRACE(("%d: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]", i,
900                        valbuf[0], valbuf[1], valbuf[2], valbuf[3], valbuf[4],
901                        hist1, hist2, nrwordspre, nrsyllspre,
902                        nrwordsfol, nrsyllsfol, footwordsfol, footsyllsfol));
903 
904         /* no continue so far => accentuation needed */
905         /* construct input vector, which is set in dtacc */
906         if (!picokdt_dtACCconstructInVec(acph->dtacc, valbuf[0], valbuf[1],
907                                          valbuf[2], valbuf[3], valbuf[4],
908                                          hist1, hist2, nrwordspre, nrsyllspre,
909                                          nrwordsfol, nrsyllsfol, footwordsfol,
910                                          footsyllsfol)) {
911             /* error constructing invec */
912             PICODBG_WARN(("problem with invec"));
913             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
914                                   NULL, NULL);
915             okay = FALSE;
916         }
917         /* classify */
918         if (okay && (!picokdt_dtACCclassify(acph->dtacc, &prevout))) {
919             /* error doing classification */
920             PICODBG_WARN(("problem classifying"));
921             picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
922                                   NULL, NULL);
923             okay = FALSE;
924         }
925         /* decompose */
926         if (okay && (!picokdt_dtACCdecomposeOutClass(acph->dtacc, &dtres))) {
927             /* error decomposing */
928             PICODBG_WARN(("problem decomposing"));
929             picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
930                                   NULL, NULL);
931             okay = FALSE;
932         }
933 
934         if (dtres.class > 255) {
935             PICODBG_WARN(("dt class outside valid range, setting to ACC0"));
936             dtres.class = PICODATA_ACC0;
937         }
938 
939         if (okay && dtres.set) {
940             PICODBG_DEBUG(("%d - inpos: %d, out: %d", i,valbuf[2],dtres.class));
941             if (acphIsWordWithoutStress(this, acph, i)) {
942                 if (dtres.class != PICODATA_ACC0) {
943                     acph->headx[i].head.info2 = PICODATA_ACC3;
944                 } else {
945                     acph->headx[i].head.info2 = (picoos_uint8)dtres.class;
946                 }
947             } else {
948                 acph->headx[i].head.info2 = (picoos_uint8)dtres.class;
949             }
950             PICODBG_DEBUG(("%d - after-nostress-corr: %d",
951                            i, acph->headx[i].head.info2));
952         } else {
953             PICODBG_WARN(("problem determining accentuation level"));
954             dtres.class = PICODATA_ITEMINFO1_ERR;
955         }
956     }
957     return PICO_OK;
958 }
959 
960 
961 
962 /* ***********************************************************************/
963 /* acphStep support functions */
964 /* ***********************************************************************/
965 
acphPutBoundItem(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint8 strength,const picoos_uint8 type,picoos_uint8 * dopuoutfull,picoos_uint16 * numBytesOutput)966 static picoos_uint8 acphPutBoundItem(register picodata_ProcessingUnit this,
967                                    register acph_subobj_t *acph,
968                                    const picoos_uint8 strength,
969                                    const picoos_uint8 type,
970                                    picoos_uint8 *dopuoutfull,
971                                    picoos_uint16 *numBytesOutput) {
972     pico_status_t rv = PICO_OK;
973     picoos_uint16 blen = 0;
974     picodata_itemhead_t tmphead;
975 
976     *dopuoutfull = FALSE;
977 
978     /* construct BOUND item in tmpbuf and put item */
979     tmphead.type = PICODATA_ITEM_BOUND;
980     tmphead.info1 = strength;
981     tmphead.info2 = type;
982     tmphead.len = 0;
983     rv = picodata_put_itemparts(&tmphead, NULL, 0, acph->tmpbuf,
984                                 PICODATA_MAX_ITEMSIZE, &blen);
985     if (rv != PICO_OK) {
986         PICODBG_ERROR(("problem creating BOUND item"));
987         picoos_emRaiseException(this->common->em, rv, NULL, NULL);
988         return FALSE;
989     }
990     /* put constructed item to ext. charbuf */
991     rv = picodata_cbPutItem(this->cbOut, acph->tmpbuf, blen, &blen);
992 
993     *numBytesOutput += blen;
994     if (rv == PICO_EXC_BUF_OVERFLOW) {
995         PICODBG_DEBUG(("overflow in cb output buffer"));
996         *dopuoutfull = TRUE;    /* ie. do PU_OUT_FULL later */
997         return FALSE;
998     } else if (rv != PICO_OK) {
999         PICODBG_ERROR(("problem putting BOUND item"));
1000         picoos_emRaiseException(this->common->em, rv, NULL, NULL);
1001         return FALSE;
1002     }
1003 
1004     PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1005                        (picoos_uint8 *)"acph: ", acph->tmpbuf, blen);
1006 
1007     return TRUE;
1008 }
1009 
1010 
1011 
1012 /* ***********************************************************************/
1013 /*                          acphStep function                              */
1014 /* ***********************************************************************/
1015 
1016 /*
1017 complete phrase processed in one step, if not fast enough -> rework
1018 
1019 init, collect into internal buffer, process, and then feed to
1020 output buffer
1021 
1022 init state: INIT ext           ext
1023 state trans:     in hc1  hc2   out
1024 
1025 INIT | putItem   =  0    0    +1      | BUSY  -> COLL (put B-SBEG item,
1026                                                    set do-init to false)
1027 
1028                                     inspace-ok-hc1
1029                                   needs-more-items-(phrase-or-flush)
1030 COLL1 |getItems -n +n             0 1 | ATOMIC -> PPOSD     (got items,
1031                                                       if flush set do-init)
1032 COLL2 |getItems -n +n             1 0 | ATOMIC -> PPOSD (got items, forced)
1033 COLL3 |getItems -n +n             1 1 | IDLE          (got items, need more)
1034 COLL4 |getItems  =  =             1 1 | IDLE             (got no items)
1035 
1036 PPOSD | posd     = ~n~n               | BUSY     -> PWP     (posd done)
1037 PWP   | lex/g2p  = ~n-n  0+n          | BUSY     -> PPHR    (lex/g2p done)
1038 PPHR  | phr      = -n 0 +m=n          | BUSY     -> PACC    (phr done, m>=n)
1039 PACC  | acc      =  0 0 ~m=n          | BUSY     -> FEED    (acc done)
1040 
1041                                   doinit-flag
1042 FEED | putItems  0  0 0 -m-n  +m  0   | BUSY -> COLL    (put items)
1043 FEED | putItems  0  0 0 -m-n  +m  1   | BUSY -> INIT    (put items)
1044 FEED | putItems  0  0 0 -d-d  +d      | OUT_FULL        (put some items)
1045 */
1046 
acphStep(register picodata_ProcessingUnit this,picoos_int16 mode,picoos_uint16 * numBytesOutput)1047 static picodata_step_result_t acphStep(register picodata_ProcessingUnit this,
1048                                      picoos_int16 mode,
1049                                      picoos_uint16 *numBytesOutput) {
1050     register acph_subobj_t *acph;
1051     pico_status_t rv = PICO_OK;
1052     pico_status_t rvP = PICO_OK;
1053     picoos_uint16 blen = 0;
1054     picoos_uint16 clen = 0;
1055     picoos_uint16 i;
1056 
1057 
1058     if (NULL == this || NULL == this->subObj) {
1059         return PICODATA_PU_ERROR;
1060     }
1061     acph = (acph_subobj_t *) this->subObj;
1062     mode = mode;        /* avoid warning "var not used in this function"*/
1063     *numBytesOutput = 0;
1064     while (1) { /* exit via return */
1065         PICODBG_DEBUG(("doing state %i, hLen|c1Len: %d|%d",
1066                        acph->procState, acph->headxLen, acph->cbufLen));
1067 
1068         switch (acph->procState) {
1069 
1070             /* *********************************************************/
1071             /* collect state: get item(s) from charBuf and store in
1072              * internal buffers, need a complete punctuation-phrase
1073              */
1074             case SA_STEPSTATE_COLLECT:
1075 
1076                 while (acph->inspaceok && acph->needsmoreitems && (PICO_OK ==
1077                 (rv = picodata_cbGetItem(this->cbIn, acph->tmpbuf,
1078                                 PICODATA_MAX_ITEMSIZE, &blen)))) {
1079                     rvP = picodata_get_itemparts(acph->tmpbuf,
1080                     PICODATA_MAX_ITEMSIZE, &(acph->headx[acph->headxLen].head),
1081                             &(acph->cbuf[acph->cbufLen]), acph->cbufBufSize
1082                                     - acph->cbufLen, &clen);
1083                     if (rvP != PICO_OK) {
1084                         PICODBG_ERROR(("problem getting item parts"));
1085                         picoos_emRaiseException(this->common->em, rvP,
1086                         NULL, NULL);
1087                         return PICODATA_PU_ERROR;
1088                     }
1089 
1090                     /* if CMD(...FLUSH...) -> PUNC(...FLUSH...),
1091                      construct PUNC-FLUSH item in headx */
1092                     if ((acph->headx[acph->headxLen].head.type
1093                             == PICODATA_ITEM_CMD)
1094                             && (acph->headx[acph->headxLen].head.info1
1095                                     == PICODATA_ITEMINFO1_CMD_FLUSH)) {
1096                         acph->headx[acph->headxLen].head.type
1097                                 = PICODATA_ITEM_PUNC;
1098                         acph->headx[acph->headxLen].head.info1
1099                                 = PICODATA_ITEMINFO1_PUNC_FLUSH;
1100                         acph->headx[acph->headxLen].head.info2
1101                                 = PICODATA_ITEMINFO2_PUNC_SENT_T;
1102                         acph->headx[acph->headxLen].head.len = 0;
1103                     }
1104 
1105                     /* check/set needsmoreitems */
1106                     if (acph->headx[acph->headxLen].head.type
1107                             == PICODATA_ITEM_PUNC) {
1108                         acph->needsmoreitems = FALSE;
1109                     }
1110 
1111                     /* check/set inspaceok, keep spare slot for forcing */
1112                     if ((acph->headxLen >= (PICOACPH_MAXNR_HEADX - 2))
1113                             || ((acph->cbufBufSize - acph->cbufLen)
1114                                     < PICODATA_MAX_ITEMSIZE)) {
1115                         acph->inspaceok = FALSE;
1116                     }
1117 
1118                     if (clen > 0) {
1119                         acph->headx[acph->headxLen].cind = acph->cbufLen;
1120                         acph->cbufLen += clen;
1121                     } else {
1122                         acph->headx[acph->headxLen].cind = 0;
1123                     }
1124                     acph->headxLen++;
1125                 }
1126 
1127                 if (!acph->needsmoreitems) {
1128                     /* 1, phrase buffered */
1129                     acph->procState = SA_STEPSTATE_PROCESS_PHR;
1130                     return PICODATA_PU_ATOMIC;
1131                 } else if (!acph->inspaceok) {
1132                     /* 2, forced phrase end */
1133                     /* at least one slot is still free, use it to
1134                        force a trailing PUNC item */
1135                     acph->headx[acph->headxLen].head.type = PICODATA_ITEM_PUNC;
1136                     acph->headx[acph->headxLen].head.info1 =
1137                         PICODATA_ITEMINFO1_PUNC_PHRASEEND;
1138                     acph->headx[acph->headxLen].head.info2 =
1139                         PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED;
1140                     acph->headx[acph->headxLen].head.len = 0;
1141                     acph->needsmoreitems = FALSE; /* not really needed for now */
1142                     acph->headxLen++;
1143                     PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND"));
1144                     picoos_emRaiseWarning(this->common->em,
1145                                           PICO_WARN_FALLBACK, NULL,
1146                                           (picoos_char *)"forced phrase end");
1147                     acph->procState = SA_STEPSTATE_PROCESS_PHR;
1148                     return PICODATA_PU_ATOMIC;
1149                 } else if (rv == PICO_EOF) {
1150                     /* 3, 4 */
1151                     return PICODATA_PU_IDLE;
1152                 } else if ((rv == PICO_EXC_BUF_UNDERFLOW) ||
1153                            (rv == PICO_EXC_BUF_OVERFLOW)) {
1154                     /* error, no valid item in cb (UNDER) */
1155                     /*        or tmpbuf not large enough, not possible (OVER) */
1156                     /* no exception raised, left for ctrl to handle */
1157                     PICODBG_ERROR(("buffer under/overflow, rv: %d", rv));
1158                     return PICODATA_PU_ERROR;
1159                 } else {
1160                     /* error, only possible if cbGetItem implementation
1161                        changes without this function being adapted*/
1162                     PICODBG_ERROR(("untreated return value, rv: %d", rv));
1163                     return PICODATA_PU_ERROR;
1164                 }
1165                 break;
1166 
1167 
1168 
1169 
1170             /* *********************************************************/
1171             /* process phr state: process items in headx and modify
1172              * headx in place
1173              */
1174             case SA_STEPSTATE_PROCESS_PHR:
1175                 /* ensure there is an item in inBuf */
1176                 if (acph->headxLen > 0) {
1177                     /* we have a phrase in headx, cbuf1 (can be
1178                        single PUNC item), do phrasing and modify headx */
1179 
1180                     if (PICO_OK != acphSubPhrasing(this, acph)) {
1181                         picoos_emRaiseException(this->common->em,
1182                                                 PICO_ERR_OTHER, NULL, NULL);
1183                         return PICODATA_PU_ERROR;
1184                     }
1185                     acph->procState = SA_STEPSTATE_PROCESS_ACC;
1186                 } else if (acph->headxLen == 0) {    /* no items in inBuf */
1187                     PICODBG_WARN(("no items in inBuf"));
1188                     acph->procState = SA_STEPSTATE_COLLECT;
1189                     return PICODATA_PU_BUSY;
1190                 }
1191 
1192 #if defined (PICO_DEBUG_NOTNEEDED)
1193                 if (1) {
1194                     picoos_uint8 i, j, ittype;
1195                     for (i = 0; i < acph->headxLen; i++) {
1196                         if ((acph->headx[i].boundstrength != 0) &&
1197                             (acph->headx[i].boundstrength !=
1198                              PICODATA_ITEMINFO1_BOUND_PHR0)) {
1199                             PICODBG_INFO(("acph-p: boundstrength '%c', "
1200                                           "boundtype '%c'",
1201                                           acph->headx[i].boundstrength,
1202                                           acph->headx[i].boundtype));
1203                         }
1204 
1205                         ittype = acph->headx[i].head.type;
1206                         PICODBG_INFO_CTX();
1207                         PICODBG_INFO_MSG(("acph-p: ("));
1208                         PICODBG_INFO_MSG(("'%c',", ittype));
1209                         if ((32 <= acph->headx[i].head.info1) &&
1210                             (acph->headx[i].head.info1 < 127) &&
1211                             (ittype != PICODATA_ITEM_WORDPHON)) {
1212                             PICODBG_INFO_MSG(("'%c',",acph->headx[i].head.info1));
1213                         } else {
1214                             PICODBG_INFO_MSG(("%3d,", acph->headx[i].head.info1));
1215                         }
1216                         if ((32 <= acph->headx[i].head.info2) &&
1217                             (acph->headx[i].head.info2 < 127)) {
1218                             PICODBG_INFO_MSG(("'%c',",acph->headx[i].head.info2));
1219                         } else {
1220                             PICODBG_INFO_MSG(("%3d,", acph->headx[i].head.info2));
1221                         }
1222                         PICODBG_INFO_MSG(("%3d)", acph->headx[i].head.len));
1223 
1224                         for (j = 0; j < acph->headx[i].head.len; j++) {
1225                             if ((ittype == PICODATA_ITEM_CMD)) {
1226                                 PICODBG_INFO_MSG(("%c",
1227                                         acph->cbuf[acph->headx[i].cind+j]));
1228                             } else {
1229                                 PICODBG_INFO_MSG(("%4d",
1230                                         acph->cbuf[acph->headx[i].cind+j]));
1231                             }
1232                         }
1233                         PICODBG_INFO_MSG(("\n"));
1234                     }
1235                 }
1236 #endif
1237 
1238                 break;
1239 
1240 
1241             /* *********************************************************/
1242             /* process acc state: process items in headx and modify
1243              * headx in place
1244              */
1245             case SA_STEPSTATE_PROCESS_ACC:
1246                 /* ensure there is an item in inBuf */
1247                 if (acph->headxLen > 0) {
1248                     /* we have a phrase in headx, cbuf (can be
1249                        single PUNC item), do accentuation and modify headx */
1250                     if (PICO_OK != acphAccentuation(this, acph)) {
1251                         picoos_emRaiseException(this->common->em,
1252                                                 PICO_ERR_OTHER, NULL, NULL);
1253                         return PICODATA_PU_ERROR;
1254                     }
1255                     acph->procState = SA_STEPSTATE_FEED;
1256                 } else if (acph->headxLen == 0) {    /* no items in inBuf */
1257                     PICODBG_WARN(("no items in inBuf"));
1258                     acph->procState = SA_STEPSTATE_COLLECT;
1259                     return PICODATA_PU_BUSY;
1260                 }
1261                 break;
1262 
1263 
1264             /* *********************************************************/
1265             /* feed state: copy item in internal outBuf to output charBuf */
1266             case SA_STEPSTATE_FEED: {
1267                 picoos_uint16 indupbound;
1268                 picoos_uint8 dopuoutfull;
1269 
1270                 PICODBG_DEBUG(("put out items (bot, len): (%d, %d)",
1271                                acph->headxBottom, acph->headxLen));
1272 
1273                 indupbound = acph->headxBottom + acph->headxLen;
1274                 dopuoutfull = FALSE;
1275 
1276                 if (acph->headxBottom == 0) {
1277                     /* construct first BOUND item in tmpbuf and put item */
1278                     /* produce BOUND unless it is followed by a term/flush) */
1279                     if (acph->headx[0].head.info1
1280                             != PICODATA_ITEMINFO1_PUNC_FLUSH) {
1281                         if (!acphPutBoundItem(this, acph,
1282                                 acph->headx[0].boundstrength,
1283                                 acph->headx[0].boundtype, &dopuoutfull,
1284                                 numBytesOutput)) {
1285                             if (dopuoutfull) {
1286                                 PICODBG_DEBUG(("feeding overflow"));
1287                                 return PICODATA_PU_OUT_FULL;
1288                             } else {
1289                                 /* ERR-msg and exception done in acphPutBoundItem */
1290                                 return PICODATA_PU_ERROR;
1291                             }
1292                         }
1293                     }
1294                 }
1295 
1296                 /* for all items in headx, cbuf */
1297                 for (i = acph->headxBottom; i < indupbound; i++) {
1298 
1299                     switch (acph->headx[i].head.type) {
1300                         case PICODATA_ITEM_PUNC:
1301                             /* if sentence end, put SEND bound */
1302                             if ((acph->headx[i].head.info1 ==
1303                                  PICODATA_ITEMINFO1_PUNC_SENTEND) &&
1304                                 (i == (indupbound - 1))) {
1305                                 /* construct and put BOUND item */
1306                                 if (!acphPutBoundItem(this, acph,
1307                                             PICODATA_ITEMINFO1_BOUND_SEND,
1308                                             PICODATA_ITEMINFO2_NA,
1309                                             &dopuoutfull, numBytesOutput)) {
1310                                     if (dopuoutfull) {
1311                                         PICODBG_DEBUG(("feeding overflow"));
1312                                         return PICODATA_PU_OUT_FULL;
1313                                     } else {
1314                                         /* ERR-msg and exception done
1315                                            in acphPutBoundItem */
1316                                         return PICODATA_PU_ERROR;
1317                                     }
1318                                 }
1319                             } else if ((acph->headx[i].head.info1 ==
1320                                  PICODATA_ITEMINFO1_PUNC_FLUSH) &&
1321                                 (i == (indupbound - 1))) {
1322                                 /* construct and put BOUND item */
1323                                 if (!acphPutBoundItem(this, acph,
1324                                             PICODATA_ITEMINFO1_BOUND_TERM,
1325                                             PICODATA_ITEMINFO2_NA,
1326                                             &dopuoutfull, numBytesOutput)) {
1327                                     if (dopuoutfull) {
1328                                         PICODBG_DEBUG(("feeding overflow"));
1329                                         return PICODATA_PU_OUT_FULL;
1330                                     } else {
1331                                         /* ERR-msg and exception done
1332                                            in acphPutBoundItem */
1333                                         return PICODATA_PU_ERROR;
1334                                     }
1335                                 }
1336                             }
1337                             /* else, good-bye PUNC, not needed anymore */
1338                             break;
1339                         default:
1340 
1341                             /* PHR2/3 maybe existing, check and add
1342                                BOUND item now, if needed */
1343                             if ((acph->headx[i].boundstrength ==
1344                                  PICODATA_ITEMINFO1_BOUND_PHR2) ||
1345                                 (acph->headx[i].boundstrength ==
1346                                  PICODATA_ITEMINFO1_BOUND_PHR3)) {
1347                                 if (!acphPutBoundItem(this, acph,
1348                                             acph->headx[i].boundstrength,
1349                                             acph->headx[i].boundtype,
1350                                             &dopuoutfull, numBytesOutput)) {
1351                                     if (dopuoutfull) {
1352                                         PICODBG_DEBUG(("feeding overflow"));
1353                                         return PICODATA_PU_OUT_FULL;
1354                                     } else {
1355                                         /* ERR-msg and exception done
1356                                            in acphPutBoundItem */
1357                                         return PICODATA_PU_ERROR;
1358                                     }
1359                                 }
1360                             }
1361 
1362                             /* copy item unmodified */
1363                             rv = picodata_put_itemparts(&(acph->headx[i].head),
1364                                      &(acph->cbuf[acph->headx[i].cind]),
1365                                      acph->headx[i].head.len,
1366                                      acph->tmpbuf, PICODATA_MAX_ITEMSIZE,
1367                                      &blen);
1368 
1369                             rvP = picodata_cbPutItem(this->cbOut, acph->tmpbuf,
1370                                     PICODATA_MAX_ITEMSIZE, &clen);
1371 
1372                             *numBytesOutput += clen;
1373 
1374                             PICODBG_DEBUG(("put item, status: %d", rvP));
1375 
1376                             if (rvP == PICO_OK) {
1377                                 acph->headxBottom++;
1378                                 acph->headxLen--;
1379                             } else if (rvP == PICO_EXC_BUF_OVERFLOW) {
1380                                 /* try again next time, but PHR2/3
1381                                    bound already added if existing,
1382                                    ensure it's not output a 2nd
1383                                    time */
1384                                 PICODBG_DEBUG(("feeding overflow"));
1385                                 acph->headx[i].boundstrength = 0;
1386                                 return PICODATA_PU_OUT_FULL;
1387                             } else {
1388                                 /* error, should never happen */
1389                                 PICODBG_ERROR(("untreated return value, rvP: %d", rvP));
1390                                 return PICODATA_PU_ERROR;
1391                             }
1392 
1393                             PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1394                                                (picoos_uint8 *)"acph: ",
1395                                                acph->tmpbuf, PICODATA_MAX_ITEMSIZE);
1396 
1397                             break;
1398                     } /*switch*/
1399                 } /*for*/
1400 
1401                 /* reset headx, cbuf */
1402                 acph->headxBottom = 0;
1403                 acph->headxLen = 0;
1404                 acph->cbufLen = 0;
1405                 for (i = 0; i < PICOACPH_MAXNR_HEADX; i++) {
1406                     acph->headx[i].boundstrength = 0;
1407                 }
1408 
1409                 /* reset collect state support variables */
1410                 acph->inspaceok = TRUE;
1411                 acph->needsmoreitems = TRUE;
1412 
1413                 acph->procState = SA_STEPSTATE_COLLECT;
1414                 return PICODATA_PU_BUSY;
1415                 break;
1416             }
1417 
1418             default:
1419                 break;
1420         } /* switch */
1421 
1422     } /* while */
1423 
1424     /* should be never reached */
1425     PICODBG_ERROR(("reached end of function"));
1426     picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL);
1427     return PICODATA_PU_ERROR;
1428 }
1429 
1430 #ifdef __cplusplus
1431 }
1432 #endif
1433 
1434 
1435 /* end */
1436