1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picotrns.c
18  *
19  * fst processing
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 
29 #include "picoos.h"
30 #include "picodbg.h"
31 /* #include "picodata.h" */
32 /* #include "picoknow.h" */
33 #include "picoktab.h"
34 #include "picokfst.h"
35 #include "picotrns.h"
36 
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 #if 0
41 }
42 #endif
43 
44 
45 
picotrns_unplane(picoos_int16 symIn,picoos_uint8 * plane)46 picoos_uint8 picotrns_unplane(picoos_int16 symIn, picoos_uint8 * plane) {
47     if (symIn < 0) {
48         (*plane) = 0;
49         return (picoos_uint8) symIn;
50     } else {
51         (*plane) = symIn >> 8;
52         return (picoos_uint8) (symIn & 0xFF);
53     }
54 }
55 
56 #if defined(PICO_DEBUG)
57 
PICOTRNS_PRINTSYM1(picoknow_KnowledgeBase kbdbg,picoos_int16 insym,picoos_uint8 phonemic)58 void PICOTRNS_PRINTSYM1(picoknow_KnowledgeBase kbdbg, picoos_int16 insym, picoos_uint8 phonemic)
59 {
60 #include "picokdbg.h"
61     picoos_int16 sym;
62     picoos_uint8 plane;
63     picokdbg_Dbg dbg = (NULL == kbdbg) ? NULL :  picokdbg_getDbg(kbdbg);
64     sym = picotrns_unplane(insym, &plane);
65     switch (plane) {
66         case PICOKFST_PLANE_PHONEMES: /* phones */
67             if ((NULL == dbg) || !phonemic) {
68                 PICODBG_INFO_MSG((" %c", sym));
69             } else {
70                 PICODBG_INFO_MSG((" %s", picokdbg_getPhoneSym(dbg, (picoos_uint8) sym)));
71             }
72             break;
73         case PICOKFST_PLANE_ACCENTS: /* accents */
74             PICODBG_INFO_MSG((" {A%c}", sym));
75             break;
76         case PICOKFST_PLANE_XSAMPA: /* xsampa symbols */
77             PICODBG_INFO_MSG((" {XS:(%i)}", sym));
78             break;
79         case PICOKFST_PLANE_POS: /* part of speech */
80             PICODBG_INFO_MSG((" {P:%d}", sym));
81             break;
82         case PICOKFST_PLANE_PB_STRENGTHS: /* phrases */
83             if (sym == 48) {
84                 PICODBG_INFO_MSG((" {WB}", sym));
85             } else if (sym == 115) {
86                 PICODBG_INFO_MSG((" {P0}", sym));
87             } else {
88                 PICODBG_INFO_MSG((" {P%c}", sym));
89             }
90             break;
91         case PICOKFST_PLANE_INTERN: /* intern */
92             PICODBG_INFO_MSG((" [%c]", sym));
93             break;
94     }
95 }
96 
PICOTRNS_PRINTSYM(picoknow_KnowledgeBase kbdbg,picoos_int16 insym)97 void PICOTRNS_PRINTSYM(picoknow_KnowledgeBase kbdbg, picoos_int16 insym)
98 {
99     PICOTRNS_PRINTSYM1(kbdbg,insym,1);
100 }
101 
PICOTRNS_PRINTSYMSEQ1(picoknow_KnowledgeBase kbdbg,const picotrns_possym_t seq[],const picoos_uint16 seqLen,picoos_uint8 phonemic)102 void PICOTRNS_PRINTSYMSEQ1(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen,
103                            picoos_uint8 phonemic) {
104     picoos_uint16 i;
105     for (i=0; i<seqLen; i++) {
106         PICOTRNS_PRINTSYM1(kbdbg, seq[i].sym, phonemic);
107     }
108 }
109 
PICOTRNS_PRINTSYMSEQ(picoknow_KnowledgeBase kbdbg,const picotrns_possym_t seq[],const picoos_uint16 seqLen)110 void PICOTRNS_PRINTSYMSEQ(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen) {
111     PICOTRNS_PRINTSYMSEQ1(kbdbg,seq, seqLen, 1);
112 }
113 
picotrns_printSolution(const picotrns_possym_t outSeq[],const picoos_uint16 outSeqLen)114 void picotrns_printSolution(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen)
115 {
116     PICODBG_INFO_CTX();
117     PICODBG_INFO_MSG(("solution: "));
118         PICOTRNS_PRINTSYMSEQ(NULL, outSeq, outSeqLen);
119     PICODBG_INFO_MSG(("\n"));
120 }
121 
picotrns_printSolutionAscii(const picotrns_possym_t outSeq[],const picoos_uint16 outSeqLen)122 void picotrns_printSolutionAscii(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen)
123 {
124     PICODBG_INFO_CTX();
125     PICODBG_INFO_MSG(("solution: "));
126         PICOTRNS_PRINTSYMSEQ1(NULL, outSeq, outSeqLen,0);
127     PICODBG_INFO_MSG(("\n"));
128 }
129 
130 #endif
131 
132 
133 
134 
135 /* * +CT+ ***/
136 struct picotrns_transductionState {
137     picoos_uint16 phase;   /* transduction phase:
138                               0 = before start
139                               1 = before regular recursion step
140                               2 = before finish
141                               3 = after finish */
142     picoos_uint32 nrSol;   /* nr of solutions so far */
143     picoos_int16  recPos;  /* recursion position; must be signed! */
144 };
145 
146 typedef struct picotrns_altDesc {
147     picokfst_state_t startFSTState;   /**< starting FST state in current recursion position */
148     picoos_int32     inPos;           /**< corresponding position in input string */
149     picokfst_state_t altState;        /**< state of alternatives search;
150                                          - 0 = before pair search
151                                          - 1 = search state is a valid pair search state
152                                          - 2 = before inEps search
153                                          - 3 = search state is a valid inEps trans search state
154                                          - 4 = no more alternatives */
155     picoos_int32     searchState;     /**< pair search state or inEps trans search state */
156     picokfst_symid_t altOutSym;       /**< current output symbol at this recursion position */
157     picoos_int32     altOutRefPos;    /**< output reference position at this recursion position */
158 } picotrns_altDesc_t;
159 
160 
picotrns_allocate_alt_desc_buf(picoos_MemoryManager mm,picoos_uint32 maxByteSize,picoos_uint16 * numAltDescs)161 picotrns_AltDesc picotrns_allocate_alt_desc_buf(picoos_MemoryManager mm, picoos_uint32 maxByteSize, picoos_uint16 * numAltDescs)
162 {
163     picotrns_AltDesc buf;
164     (*numAltDescs) = (picoos_uint32) (maxByteSize / sizeof(picotrns_altDesc_t));
165     buf =  (picotrns_AltDesc) picoos_allocate(mm, (*numAltDescs) * sizeof(picotrns_altDesc_t));
166     if (NULL == buf) {
167         (*numAltDescs) = 0;
168         return NULL;
169     } else {
170         return buf;
171     }
172 }
173 
picotrns_deallocate_alt_desc_buf(picoos_MemoryManager mm,picotrns_AltDesc * altDescBuf)174  void picotrns_deallocate_alt_desc_buf(picoos_MemoryManager mm, picotrns_AltDesc * altDescBuf)
175 {
176     picoos_deallocate(mm, (void *) altDescBuf);
177 }
178 
179 /* copy elements from inSeq to outSeq, ignoring elements with epsilon symbol */
picotrns_eliminate_epsilons(const picotrns_possym_t inSeq[],picoos_uint16 inSeqLen,picotrns_possym_t outSeq[],picoos_uint16 * outSeqLen,picoos_uint16 maxOutSeqLen)180 pico_status_t picotrns_eliminate_epsilons(const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
181         picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen)
182 {
183     picoos_uint16 i, j = 0;
184 
185     for (i=0; i < inSeqLen; i++) {
186         /* it is assumed that PICOKFST_SYMID_EPS is a hardwired value and not shifted */
187         if (PICOKFST_SYMID_EPS != inSeq[i].sym) {
188             if (j < maxOutSeqLen) {
189                 outSeq[j].pos = inSeq[i].pos;
190                 outSeq[j].sym = inSeq[i].sym;
191                 j++;
192             }
193         }
194         *outSeqLen = j;
195     }
196     return PICO_OK;
197 }
198 
199 
insertSym(picotrns_possym_t inSeq[],picoos_uint16 pos,picoos_int16 sym)200 static void insertSym(picotrns_possym_t inSeq[], picoos_uint16 pos, picoos_int16 sym) {
201     inSeq[pos].sym = sym;
202     inSeq[pos].pos = PICOTRNS_POS_INSERT;
203 }
204 
205 /* copy elements from inSeq to outSeq, inserting syllable separators in some trivial way.
206  * inSeq is assumed to be at most PICOTRNS_MAX_NUM_POSSYM, outSeq at least of size PICOTRNS_MAX_NUM_POSSYM  */
picotrns_trivial_syllabify(picoktab_Phones phones,const picotrns_possym_t inSeq[],const picoos_uint16 inSeqLen,picotrns_possym_t outSeq[],picoos_uint16 * outSeqLen,picoos_uint16 maxOutSeqLen)207 pico_status_t picotrns_trivial_syllabify(picoktab_Phones phones,
208         const picotrns_possym_t inSeq[], const picoos_uint16 inSeqLen,
209         picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen)
210 {
211     picoos_uint16 i = 0, j = 0, out = 0, numInserted = 0;
212     picoos_uint8 vowelFound = FALSE;
213     picoos_uint16 accentpos = 0;
214     picoos_int16 accent = 0;
215 
216     PICODBG_TRACE(("start"));
217 
218 
219     while (i < inSeqLen) {
220         /* make sure that at least one more sylSep can be inserted */
221         if (inSeqLen+numInserted+1 >= maxOutSeqLen) {
222             return PICO_EXC_BUF_OVERFLOW;
223         }
224        /* let j skip consonant cluster */
225         accent = 0;
226         accentpos = 0;
227         while ((j < inSeqLen) && !picoktab_isSyllCarrier(phones,(picoos_uint8)inSeq[j].sym)) {
228             if ((inSeq[j].sym == picoktab_getPrimstressID(phones))
229                     || (inSeq[j].sym == picoktab_getPrimstressID(phones))) {
230                 PICODBG_TRACE(("j skipping stress symbol inSeq[%i].sym = %c", j, inSeq[j].sym));
231                 accent = inSeq[j].sym;
232                 accentpos = j;
233             } else {
234                 PICODBG_TRACE(("j skipping consonant inSeq[%i].sym = %c", j, inSeq[j].sym));
235             }
236             j++;
237         }
238         if (j < inSeqLen) { /* j is at the start of a new vowel */
239             /* copy consonant cluster (moving i) to output, insert syll separator if between vowels */
240             while (i < j-1) {
241                 if ((accent > 0) && (i == accentpos)) {
242                     PICODBG_TRACE(("skipping inSeq[%i].sym = %c (stress)", i, inSeq[i].sym));
243                   i++;
244                 } else {
245                 PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant) into output buffer", i, inSeq[i].sym));
246                  outSeq[out++] = inSeq[i++];
247                 }
248             }
249             if (vowelFound) { /* we're between vowels */
250                 PICODBG_TRACE(("inserting syllable separator into output buffer"));
251                 insertSym(outSeq,out++,picoktab_getSyllboundID(phones));
252                 if (accent > 0) {
253                     insertSym(outSeq,out++,accent);
254                 }
255                 numInserted++;
256             }
257             if ((accent > 0) && (i == accentpos)) {
258                 PICODBG_TRACE(("skipping inSeq[%i].sym = %c (stress)", i, inSeq[i].sym));
259               i++;
260             } else {
261             PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant) into output buffer", i, inSeq[i].sym));
262              outSeq[out++] = inSeq[i++];
263             }
264             vowelFound = TRUE;
265             /* now copy vowel cluster */
266             while ((i < inSeqLen) && picoktab_isSyllCarrier(phones,(picoos_uint8)inSeq[i].sym)) {
267                 PICODBG_TRACE(("copying inSeq[%i].sym = %c (vowel) into output buffer", i, inSeq[i].sym));
268                 outSeq[out++] = inSeq[i++];
269             }
270             j = i;
271         } else { /* j is at end of word or end of input */
272             while (i < j) {
273                 PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant or stress) into output buffer", i, inSeq[i].sym));
274                 outSeq[out++] = inSeq[i++];
275             }
276         }
277         *outSeqLen = out;
278     }
279     PICODBG_ASSERT((out == inSeqLen + numInserted));
280 
281     return PICO_OK;
282 }
283 
284 
285 /* ******** +CT+: full transduction procedure **********/
286 
287 
288 /* Gets next acceptable alternative for output symbol '*outSym' at current recursion position
289    starting from previous alternative in 'altDesc'; possibly uses input symbol
290    given by 'inSeq'/'inSeq'; returns whether alterative was found in '*found';
291    if '*found', the other output values ('*outRefPos', '*endFSTstate', '*nextInPos'*)
292    return the characteristics for next recursion step;
293    if '*found' is false, the output values are undefined. */
294 
GetNextAlternative(picokfst_FST fst,picotrns_AltDesc altDesc,const picotrns_possym_t inSeq[],picoos_uint16 inSeqLen,picokfst_symid_t * outSym,picoos_int32 * outRefPos,picokfst_state_t * endFSTState,picoos_int32 * nextInPos,picoos_bool * found)295 static void GetNextAlternative (picokfst_FST fst, picotrns_AltDesc altDesc,
296                                 const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
297                                 picokfst_symid_t * outSym, picoos_int32 * outRefPos,
298                                 picokfst_state_t * endFSTState, picoos_int32 * nextInPos, picoos_bool * found)
299 {
300 
301     picoos_bool inSymFound;
302     picoos_bool pairFound;
303     picokfst_class_t pairClass;
304     picoos_bool inEpsTransFound;
305     picokfst_symid_t inSym;
306 
307     (*found) = 0;
308     do {
309         switch (altDesc->altState) {
310             case 0:   /* before pair search */
311                 if (altDesc->inPos < inSeqLen) {
312                     inSym = inSeq[altDesc->inPos].sym;
313                     if (inSym == PICOKFST_SYMID_EPS) {
314                         /* very special case: input epsilon simply produces eps in output
315                            without fst state change */
316                         (*found) = 1;
317                         (*outSym) = PICOKFST_SYMID_EPS;
318                         (*outRefPos) = inSeq[altDesc->inPos].pos;
319                         (*endFSTState) = altDesc->startFSTState;
320                         (*nextInPos) = altDesc->inPos + 1;
321                         altDesc->altState = 2;
322                     } else {
323                         /* start search for alternatives using input symbol */
324                         picokfst_kfstStartPairSearch(fst,inSeq[altDesc->inPos].sym,& inSymFound,& altDesc->searchState);
325                         if (!inSymFound) {
326                             altDesc->altState = 2;
327                             PICODBG_INFO_CTX();
328                             PICODBG_INFO_MSG((" didnt find symbol "));
329                             PICOTRNS_PRINTSYM(NULL, inSeq[altDesc->inPos].sym);
330                             PICODBG_INFO_MSG(("\n"));
331 
332                         } else {
333                             altDesc->altState = 1;
334                         }
335                     }
336                 } else {
337                     altDesc->altState = 2;
338                 }
339                 break;
340             case 1:   /* within pair search */
341                 picokfst_kfstGetNextPair(fst,& altDesc->searchState,& pairFound,& (*outSym),& pairClass);
342                 if (pairFound) {
343                     picokfst_kfstGetTrans(fst,altDesc->startFSTState,pairClass,& (*endFSTState));
344                     if ((*endFSTState) > 0) {
345                         (*found) = 1;
346                         (*outRefPos) = inSeq[altDesc->inPos].pos;
347                         (*nextInPos) = altDesc->inPos + 1;
348                     }
349                 } else {
350                     /* no more pair found */
351                     altDesc->altState = 2;
352                 }
353                 break;
354             case 2:   /* before inEps trans search */
355                 picokfst_kfstStartInEpsTransSearch(fst,altDesc->startFSTState,& inEpsTransFound,& altDesc->searchState);
356                 if (inEpsTransFound) {
357                     altDesc->altState = 3;
358                 } else {
359                     altDesc->altState = 4;
360                 }
361                 break;
362             case 3:   /* within inEps trans search */
363                 picokfst_kfstGetNextInEpsTrans(fst,& altDesc->searchState,& inEpsTransFound,& (*outSym),& (*endFSTState));
364                 if (inEpsTransFound) {
365                     (*found) = 1;
366                     (*outRefPos) =  PICOTRNS_POS_INSERT;
367                     (*nextInPos) = altDesc->inPos;
368                 } else {
369                     altDesc->altState = 4;
370                 }
371                 break;
372             case 4:   /* no more alternatives */
373                 break;
374         }
375     } while (! ((*found) || (altDesc->altState == 4)) );  /* i.e., until (*found) || (altState == 4) */
376 }
377 
378 
379 
380 /* Transfers current alternatives path stored in 'altDesc' with current path length 'pathLen'
381    into 'outSeq'/'outSeqLen'. The number of solutions is incremented. */
382 
NoteSolution(picoos_uint32 * nrSol,picotrns_printSolutionFct printSolution,picotrns_altDesc_t altDesc[],picoos_uint16 pathLen,picotrns_possym_t outSeq[],picoos_uint16 * outSeqLen,picoos_uint16 maxOutSeqLen)383 static void NoteSolution (picoos_uint32 * nrSol, picotrns_printSolutionFct printSolution,
384                           picotrns_altDesc_t altDesc[], picoos_uint16 pathLen,
385                           picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen)
386 {
387     register picotrns_AltDesc ap;
388     picoos_uint32 i;
389 
390     (*nrSol)++;
391     (*outSeqLen) = 0;
392     for (i = 0; i < pathLen; i++) {
393         if (i < maxOutSeqLen) {
394             ap = &altDesc[i];
395             outSeq[i].sym = ap->altOutSym;
396             outSeq[i].pos = ap->altOutRefPos;
397             (*outSeqLen)++;
398         }
399     }
400     if (pathLen > maxOutSeqLen) {
401         PICODBG_WARN(("**** output symbol array too small to hold full solution\n"));
402     }
403     if (printSolution != NULL) {
404         printSolution(outSeq,(*outSeqLen));
405     }
406 }
407 
408 
409 
410 /* *
411     general scheme to get all solutions ("position" refers to abstract backtracking recursion depth,
412     which in the current solution is equal to the output symbol position):
413 
414     "set position to first position";
415     "initialize alternatives in first position";
416     REPEAT
417       IF "current state in current position is a solution" THEN
418         "note solution";
419       END;
420       "get first or next acceptable alternative in current position";
421       IF "acceptable alternative found" THEN
422         "note alternative";
423         "go to next position";
424         "initialize alternatives in that position";
425       ELSE
426         "step back to previous position";
427       END;
428     UNTIL "current position is before first position"
429 ***/
430 
431 
432 /* Initializes transduction state for further use in repeated application
433    of 'TransductionStep'. */
434 
StartTransduction(struct picotrns_transductionState * transductionState)435 static void StartTransduction (struct picotrns_transductionState * transductionState)
436 {
437     (*transductionState).phase = 0;
438 }
439 
440 
441 
442 /* Performs one step in the transduction of 'inSeqLen' input symbols with corresponding
443    reference positions in 'inSeq'. '*transductionState' must have been
444    initialized by 'StartTransduction'. Repeat calls to this procedure until '*finished' returns true.
445    The output is returned in 'outSeqLen' symbols and reference positions in 'outSeq'.
446    The output reference positions refer to the corresponding input reference positions.
447    Inserted output symbols receive the reference position -1. If several solutions are possible,
448    only the last found solution is returned.
449    'altDesc' is a temporary workspace which should be at least one cell longer than 'outSeq'.
450    'firstSolOnly' determines whether only the first solution should be found or if
451    the search should go on to find all solutions (mainly for testing purposes).
452 
453    NOTE: current version written for use in single repetitive steps;
454    could be simplified if full transduction can be done as an atomic operation */
455 
TransductionStep(picokfst_FST fst,struct picotrns_transductionState * transductionState,picotrns_altDesc_t altDesc[],picoos_uint16 maxAltDescLen,picoos_bool firstSolOnly,picotrns_printSolutionFct printSolution,const picotrns_possym_t inSeq[],picoos_uint16 inSeqLen,picotrns_possym_t outSeq[],picoos_uint16 * outSeqLen,picoos_uint16 maxOutSeqLen,picoos_bool * finished)456 static void TransductionStep (picokfst_FST fst, struct picotrns_transductionState * transductionState,
457                               picotrns_altDesc_t altDesc[], picoos_uint16 maxAltDescLen,
458                               picoos_bool firstSolOnly, picotrns_printSolutionFct printSolution,
459                               const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
460                               picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen,
461                               picoos_bool * finished)
462 {
463     register picotrns_AltDesc ap;
464     picoos_int32 i;
465     picokfst_state_t endFSTState;
466     picoos_int32 nextInPos;
467     picoos_bool found;
468     picokfst_symid_t outSym;
469     picoos_int32 outRefPos;
470     picoos_int32 tmpRecPos;
471 
472     (*finished) = 0;
473     tmpRecPos = (*transductionState).recPos;
474     switch ((*transductionState).phase) {
475         case 0:   /* before initialization */
476             (*transductionState).nrSol = 0;
477 
478             /* check for initial solution (empty strings are always accepted) */
479             if (inSeqLen == 0) {
480                 NoteSolution(& (*transductionState).nrSol,printSolution,altDesc,0,outSeq,outSeqLen,maxOutSeqLen);
481             }
482 
483             /* initialize first recursion position */
484             tmpRecPos = 0;
485             ap = & altDesc[0];
486             ap->startFSTState = 1;
487             ap->inPos = 0;
488             ap->altState = 0;
489             (*transductionState).phase = 1;
490             break;
491 
492         case 1:   /* before regular recursion step */
493             if ((tmpRecPos < 0) || (firstSolOnly && ((*transductionState).nrSol > 0))) {
494                 /* end reached */
495                 (*transductionState).phase = 2;
496             } else {
497                 /* not finished; do regular step */
498 
499                 /* get first or next acceptable alternative in current position */
500                 GetNextAlternative(fst,& altDesc[tmpRecPos],inSeq,inSeqLen,& outSym,& outRefPos,& endFSTState,& nextInPos,& found);
501                 if (found) {
502                     /* note alternative in current position */
503                     ap = & altDesc[tmpRecPos];
504                     ap->altOutSym = outSym;
505                     ap->altOutRefPos = outRefPos;
506 
507                     /* check for solution after found alternative */
508                     if ((nextInPos == inSeqLen) && picokfst_kfstIsAcceptingState(fst,endFSTState)) {
509                         NoteSolution(& (*transductionState).nrSol,printSolution,altDesc,tmpRecPos+1,
510                                      outSeq,outSeqLen,maxOutSeqLen);
511                     }
512 
513                     /* go to next position if possible, start search for follower alternative symbols */
514                     if (tmpRecPos < maxAltDescLen-1) {
515                         /* got to next position */
516                         tmpRecPos = tmpRecPos + 1;
517 
518                         /* initialize alternatives in new position */
519                         ap = & altDesc[tmpRecPos];
520                         ap->startFSTState = endFSTState;
521                         ap->inPos = nextInPos;
522                         ap->altState = 0;
523 
524                     } else {
525                         /* do not go on due to limited path but still treat alternatives in current position */
526                         PICODBG_WARN(("--- transduction path too long; may fail to find solution\n"));
527                     }
528                 } else {  /* no more acceptable alternative found in current position */
529                     /* backtrack to previous recursion */
530                     tmpRecPos = tmpRecPos - 1;
531                 }
532             }
533             break;
534 
535         case 2:   /* before finish */
536             if ((*transductionState).nrSol == 0) {
537                 PICODBG_WARN(("--- no transduction solution found, using input as output\n"));
538                 i = 0;
539                 while ((i < inSeqLen) && (i < maxOutSeqLen)) {
540                     outSeq[i].sym = inSeq[i].sym;
541                     outSeq[i].pos = inSeq[i].pos;
542                     i++;
543                 }
544                 (*outSeqLen) = i;
545             } else if ((*transductionState).nrSol > 1) {
546                 PICODBG_WARN(("--- more than one transducer solutions found\n"));
547             }
548             (*transductionState).phase = 3;
549             break;
550 
551         case 3:   /* after finish */
552             (*finished) = 1;
553             break;
554     }
555     (*transductionState).recPos = tmpRecPos;
556 }
557 
558 
559 
560 /* see description in header */
picotrns_transduce(picokfst_FST fst,picoos_bool firstSolOnly,picotrns_printSolutionFct printSolution,const picotrns_possym_t inSeq[],picoos_uint16 inSeqLen,picotrns_possym_t outSeq[],picoos_uint16 * outSeqLen,picoos_uint16 maxOutSeqLen,picotrns_AltDesc altDescBuf,picoos_uint16 maxAltDescLen,picoos_uint32 * nrSteps)561 pico_status_t picotrns_transduce (picokfst_FST fst, picoos_bool firstSolOnly,
562                                          picotrns_printSolutionFct printSolution,
563                                          const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
564                                          picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen,
565                                          picotrns_AltDesc altDescBuf, picoos_uint16 maxAltDescLen,
566                                          picoos_uint32 *nrSteps)
567 {
568     struct picotrns_transductionState transductionState;
569     picoos_bool finished;
570 
571 #if defined(PICO_DEBUG)
572     {
573         picoos_uint16 i;
574 
575         PICODBG_INFO_CTX();
576         PICODBG_INFO_MSG(("got input: "));
577         for (i=0; i<inSeqLen; i++) {
578             PICODBG_INFO_MSG((" %d", inSeq[i].sym));
579         }
580         PICODBG_INFO_MSG((" ("));
581         PICOTRNS_PRINTSYMSEQ(NULL,inSeq,inSeqLen);
582         PICODBG_INFO_MSG((")\n"));
583     }
584 #endif
585    StartTransduction(&transductionState);
586     finished = 0;
587     *nrSteps = 0;
588     while (!finished) {
589         TransductionStep(fst,&transductionState,altDescBuf,maxAltDescLen,firstSolOnly,printSolution,
590                          inSeq,inSeqLen,outSeq,outSeqLen,maxOutSeqLen,&finished);
591         (*nrSteps)++;
592     }
593 
594     return PICO_OK;
595 }
596 
597 
598 /**
599  * Data structure for picotrns_SimpleTransducer object.
600  */
601 typedef struct picotrns_simple_transducer {
602     picoos_Common common;
603     picotrns_possym_t possymBufA[PICOTRNS_MAX_NUM_POSSYM+1];
604     picotrns_possym_t possymBufB[PICOTRNS_MAX_NUM_POSSYM+1];
605     picotrns_possym_t * possymBuf; /**< the buffer of the pos/sym pairs */
606     picotrns_possym_t * possymBufTmp;
607     picoos_uint16 possymReadPos, possymWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */
608 
609     /* buffer for internal calculation of transducer */
610     picotrns_AltDesc altDescBuf;
611     /* the number of AltDesc in the buffer */
612     picoos_uint16 maxAltDescLen;
613 } picotrns_simple_transducer_t;
614 
615 
picotrns_stInitialize(picotrns_SimpleTransducer transducer)616 pico_status_t  picotrns_stInitialize(picotrns_SimpleTransducer transducer)
617 {
618     transducer->possymBuf = transducer->possymBufA;
619     transducer->possymBufTmp = transducer->possymBufB;
620     transducer->possymReadPos = 0;
621     transducer->possymWritePos = 0;
622     return PICO_OK;
623 }
624 /** creates a SimpleTranducer with a working buffer of given size
625  *
626  * @param mm      MemoryManager handle
627  * @param common  Common handle
628  * @param maxAltDescLen maximal size for working buffer (in bytes)
629  * @return handle to new SimpleTransducer or NULL if error
630  */
picotrns_newSimpleTransducer(picoos_MemoryManager mm,picoos_Common common,picoos_uint16 maxAltDescLen)631 picotrns_SimpleTransducer picotrns_newSimpleTransducer(picoos_MemoryManager mm,
632                                               picoos_Common common,
633                                               picoos_uint16 maxAltDescLen)
634 {
635     picotrns_SimpleTransducer this;
636     this = picoos_allocate(mm, sizeof(picotrns_simple_transducer_t));
637     if (this == NULL) {
638         picoos_deallocate(mm, (void *)&this);
639         picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
640         return NULL;
641     }
642 
643     /* allocate working buffer */
644     this->altDescBuf = picotrns_allocate_alt_desc_buf(mm, maxAltDescLen, &this->maxAltDescLen);
645     if (this->altDescBuf == NULL) {
646         picoos_deallocate(mm, (void *)&this);
647         picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
648         return NULL;
649     }
650     this->common = common;
651     picotrns_stInitialize(this);
652     return this;
653 }
654 /** disposes a SimpleTransducer
655  *
656  * @param this
657  * @param mm
658  * @return PICO_OK
659  */
picotrns_disposeSimpleTransducer(picotrns_SimpleTransducer * this,picoos_MemoryManager mm)660 pico_status_t picotrns_disposeSimpleTransducer(picotrns_SimpleTransducer * this,
661                                         picoos_MemoryManager mm)
662 {
663     if (NULL != (*this)) {
664         picotrns_deallocate_alt_desc_buf(mm,&(*this)->altDescBuf);
665         picoos_deallocate(mm, (void *) this);
666         (*this) = NULL;
667     }
668     return PICO_OK;
669 }
670 
671 /** transduces the contents previously inserted via @ref picotrns_newSimpleTransducer and @ref
672  *  picotrns_disposeSimpleTransducer.
673  *
674  * @param this
675  * @param fst
676  * @return
677  */
picotrns_stTransduce(picotrns_SimpleTransducer this,picokfst_FST fst)678 pico_status_t picotrns_stTransduce(picotrns_SimpleTransducer this, picokfst_FST fst)
679 {
680     picoos_uint16 outSeqLen;
681     picoos_uint32 nrSteps;
682     pico_status_t status;
683 
684     status = picotrns_transduce(fst,TRUE,NULL,
685             this->possymBuf, this->possymWritePos,
686             this->possymBufTmp,&outSeqLen, PICOTRNS_MAX_NUM_POSSYM,
687             this->altDescBuf,this->maxAltDescLen,&nrSteps);
688     if (PICO_OK != status) {
689         return status;
690     }
691     return picotrns_eliminate_epsilons(this->possymBufTmp,outSeqLen,this->possymBuf,&this->possymWritePos,PICOTRNS_MAX_NUM_POSSYM);
692 }
693 
694 /**
695  * Add chars from NULLC-terminated string \c inStr, shifted to plane \c plane, to internal input buffer of
696  *  \c transducer.
697  *
698  * @param this is an initialized picotrns_SimpleTransducer
699  * @param inStr NULLC-terminated byte sequence
700  * @param plane
701  * @return PICO_OK, if all bytes fit into buffer, or PICO_EXC_BUF_OVERFLOW otherwise
702  */
picotrns_stAddWithPlane(picotrns_SimpleTransducer this,picoos_char * inStr,picoos_uint8 plane)703 pico_status_t picotrns_stAddWithPlane(picotrns_SimpleTransducer this, picoos_char * inStr, picoos_uint8 plane)
704 {
705     while ((*inStr) && (this->possymWritePos < PICOTRNS_MAX_NUM_POSSYM)) {
706         this->possymBuf[this->possymWritePos].pos = PICOTRNS_POS_INSERT;
707         this->possymBuf[this->possymWritePos].sym = (plane << 8) + (*inStr);
708         PICODBG_DEBUG(("inserting pos/sym = %i/'%c' at pos %i",
709                 this->possymBuf[this->possymWritePos].pos,
710                 this->possymBuf[this->possymWritePos].sym,
711                 this->possymWritePos));
712         this->possymWritePos++;
713         inStr++;
714     }
715     if (!(*inStr)) {
716         return PICO_OK;
717     } else {
718         return PICO_EXC_BUF_OVERFLOW;
719     }
720 }
721 
picotrns_stGetSymSequence(picotrns_SimpleTransducer this,picoos_uint8 * outputSymIds,picoos_uint32 maxOutputSymIds)722 pico_status_t picotrns_stGetSymSequence(
723         picotrns_SimpleTransducer this,
724         picoos_uint8 * outputSymIds,
725         picoos_uint32 maxOutputSymIds)
726 {
727     picoos_uint8 plane;
728     picoos_uint32 outputCount = 0;
729     while ((this->possymReadPos < this->possymWritePos) && (outputCount < maxOutputSymIds)) {
730         *outputSymIds++ = picotrns_unplane(this->possymBuf[this->possymReadPos++].sym, &plane);
731         outputCount++;
732     }
733     *outputSymIds = NULLC;
734     if (outputCount <= maxOutputSymIds) {
735         return PICO_OK;
736     } else {
737         return PICO_EXC_BUF_OVERFLOW;
738     }
739 }
740 
741 #ifdef __cplusplus
742 }
743 #endif
744 
745 /* end picotrns.c */
746