1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picotrns.h
18  *
19  * fst processing
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 
29 /** @addtogroup picotrns
30  *
31  * Conventions:
32  *
33  * - The input to the transducer is a list of pos/sym pairs, where pos are arbitrary position markers
34  * - All positions are allowed on input (in particular all those coming as an output of a previous transduction)
35  * - A phone sequence to be transduced has to begin with PICOKNOW_PHON_START_ID and end with PICOKNOW_PHON_TERM_ID
36  *   These special symbols are kept in the transduction output (as first and last symbol)
37  * - Symbols inserted by the transduction process allways get their position marker pos=PICOTRNS_POS_INSERT
38  * - The order of positions on output must be the same as that on input, i.e. apart from inserted pairs, the
39  *   output position sequence must be a sub-sequence of the input position sequence.
40  * - Inserted symbols are allways preceded by a positioned pos/sym pair, e.g.
41  *   if the sequence pos1/sym1, pos2/sym2 should be tranduced to x/sym3, y/sym4, z/sym5, then x must be pos1 or pos2
42  *   and not PICOTRNS_POS_INSERT
43  *
44  *   For lingware developers: Insertions are always interpreted "to the right"
45  *     - E.g.: The original sequence is phon1 , command , phon2
46  *          - The input to the transducer is then  pos1/phon1 , pos2/phon2
47  *          - The output is pos1/phon1'  -1/phon_ins pos2/phon2'  [assuming -1 is the special insertion pos]
48  *     - Then the new sequence will be recomposed as phon1' , phon_ins , command , phon2'  [note position of command!]
49  *     - To overwrite this behaviour, rules must be formulated such that the transduction output is
50  *     pos1/phon1'  pos2/phon_ins  -1/phon2'
51  */
52 #ifndef PICOTRNS_H_
53 #define PICOTRNS_H_
54 
55 #include "picoos.h"
56 #include "picokfst.h"
57 #include "picoktab.h"
58 
59 #ifdef __cplusplus
60 extern "C" {
61 #endif
62 #if 0
63 }
64 #endif
65 
66 #define PICOTRNS_MAX_NUM_POSSYM 255
67 
68 #define PICOTRNS_POS_INSERT   (picoos_int16) -1    /* position returned by transducer to mark symbols inserted by the transducer */
69 #define PICOTRNS_POS_INVALID  (picoos_int16) -2    /* value to mark an invalid (e.g. uninitiated) position */
70 #define PICOTRNS_POS_IGNORE   (picoos_int16) -3    /* value to mark a pos/sym pair to be ignored (e.g. start/term symbols only used by the transducer) */
71 
72 
73 typedef struct picotrns_possym {
74     picoos_int16 pos;
75     picoos_int16 sym;
76 } picotrns_possym_t;
77 
78 picoos_uint8 picotrns_unplane(picoos_int16 symIn, picoos_uint8 * plane);
79 
80 
81 #if defined(PICO_DEBUG)
82 
83 void PICOTRNS_PRINTSYM(picoknow_KnowledgeBase kbdbg, picoos_int16 insym);
84 
85 void PICOTRNS_PRINTSYMSEQ(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen);
86 
87 void picotrns_printSolution(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen);
88 
89 #else
90 #define PICOTRNS_PRINTSYM(x,y)
91 #define PICOTRNS_PRINTSYMSEQ(x,y,z)
92 #define picotrns_printSolution NULL
93 #endif
94 
95 
96 typedef struct picotrns_altDesc * picotrns_AltDesc;
97 
98 
99 picotrns_AltDesc picotrns_allocate_alt_desc_buf(picoos_MemoryManager mm, picoos_uint32 maxByteSize, picoos_uint16 * numAltDescs);
100 
101 void picotrns_deallocate_alt_desc_buf(picoos_MemoryManager mm, picotrns_AltDesc * altDescBuf);
102 
103 
104 /* type of function for printing transduction solutions;
105    only for testing purposes in transduction mode where all solutions
106    are produced */
107 typedef void picotrns_printSolutionFct(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen);
108 
109 
110 
111 /** overall transduction; transduces 'inSeq' with 'inSeqLen' elements
112    to '*outSeqLen' elements in 'outSeq';
113  *
114  * @param fst the finite-state transducer used for transduction
115  * @param firstSolOnly determines whether only the first solution (usually)
116    or all solutions should be produced (for testing); only the last found
117    solution is returned in 'outSeq';
118  * @param printSolution if not NULL, every found solution is displayed using
119    the given function
120  * @param inSeq the input sequence
121  * @param inSeqLen the input sequence length
122  * @retval outSeq the output sequence
123  * @retval outSeqLen the output sequence length
124  * @param maxOutSeqLen   must provide the maximum length of 'outSeq'
125  * @param altDescBuf must provide a working array of length 'maxAltDescLen'
126  * @param maxAltDescLen should be chosen at least 'maxOutSeqLen' + 1
127  * @retval nrSteps returns the overall internal number of iterative steps done
128  * @return status of the transduction: PICO_OK, if transduction successful
129    @note if 'outSeq' or 'altDesc' are too small to hold a solution,
130    an error occurs and the input is simply transfered to the output
131    (up to maximum possible length)
132  */
133 extern pico_status_t picotrns_transduce (picokfst_FST fst, picoos_bool firstSolOnly,
134                                          picotrns_printSolutionFct printSolution,
135                                          const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
136                                          picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen,
137                                          picotrns_AltDesc altDescBuf, picoos_uint16 maxAltDescLen,
138                                          picoos_uint32 *nrSteps);
139 
140 
141 
142 /* transduce 'inSeq' into 'outSeq' 'inSeq' has to be terminated with the id for symbol '#'. 'outSeq' is terminated in the same way. */
143 /*
144 pico_status_t picotrns_transduce_sequence(picokfst_FST fst, const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
145         picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen);
146 */
147 
148 /* copy elements from inSeq to outSeq, ignoring elements with epsilon symbol */
149 pico_status_t picotrns_eliminate_epsilons(const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
150         picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen);
151 
152 /* copy elements from inSeq to outSeq, inserting syllable separators in some trivial way.
153  * inSeq is assumed to be at most, outSeq at least of size PICOTRNS_MAX_NUM_POSSYM  */
154 pico_status_t picotrns_trivial_syllabify(picoktab_Phones phones,
155         const picotrns_possym_t inSeq[], const picoos_uint16 inSeqLen,
156         picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen);
157 
158 
159 /**  object   : SimpleTransducer
160  *   shortcut : st
161  *
162  */
163 typedef struct picotrns_simple_transducer * picotrns_SimpleTransducer;
164 
165 picotrns_SimpleTransducer picotrns_newSimpleTransducer(picoos_MemoryManager mm,
166                                               picoos_Common common,
167                                               picoos_uint16 maxAltDescLen);
168 
169 pico_status_t picotrns_disposeSimpleTransducer(picotrns_SimpleTransducer * this,
170         picoos_MemoryManager mm);
171 
172 pico_status_t  picotrns_stInitialize(picotrns_SimpleTransducer transducer);
173 
174 pico_status_t picotrns_stAddWithPlane(picotrns_SimpleTransducer this, picoos_char * inStr, picoos_uint8 plane);
175 
176 pico_status_t picotrns_stTransduce(picotrns_SimpleTransducer this, picokfst_FST fst);
177 
178 pico_status_t picotrns_stGetSymSequence(
179         picotrns_SimpleTransducer this,
180         picoos_uint8 * outputSymIds,
181         picoos_uint32 maxOutputSymIds);
182 
183 
184 
185 
186 
187 #ifdef __cplusplus
188 }
189 #endif
190 
191 #endif /*PICOTRNS_H_*/
192