1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picokfst.h
18  *
19  * FST knowledge loading and access
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 #ifndef PICOKFST_H_
29 #define PICOKFST_H_
30 
31 #include "picodefs.h"
32 #include "picodbg.h"
33 #include "picoos.h"
34 #include "picoknow.h"
35 
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 #if 0
40 }
41 #endif
42 
43 typedef picoos_int16 picokfst_symid_t; /* type of symbol identifiers */
44 typedef picoos_int16 picokfst_class_t; /* type of symbol pair classes */
45 typedef picoos_int16 picokfst_state_t; /* type of states */
46 
47 #define PICOKFST_SYMID_EPS    (picokfst_symid_t)   0   /* epsilon symbol id */
48 #define PICOKFST_SYMID_ILLEG  (picokfst_symid_t)  -1   /* illegal symbol id */
49 
50 /**
51  * @addtogroup picokfst
52  *
53  * Mapping of values to FST symbol id (relevant for compiling the FST) \n
54  * Value                   FST symbol id                    \n
55  * --------------------------------------                    \n
56  * phoneme_id      ->      phoneme_id     +  256 *  PICOKFST_PLANE_PHONEMES    \n
57  * accentlevel_id  ->      accentlevel_id +  256 *  PICOKFST_PLANE_ACCENTS    \n
58  * POS_id          ->      POS_id         +  256 *  PICOKFST_PLANE_POS        \n
59  * pb_strength_id  ->      pb_strength_id +  256 *  PICOKFST_PLANE_PB_STRENGTHS    \n
60  * phon_term_id    ->      phon_term_id   +  256 *  PICOKFST_PLANE_INTERN    \n
61 */
62 enum picokfst_symbol_plane {
63     PICOKFST_PLANE_PHONEMES = 0,       /* phoneme plane */
64     PICOKFST_PLANE_ASCII = 1,          /* "ascii" plane (values > 127 may be used internally) */
65     PICOKFST_PLANE_XSAMPA = 2,         /* x-sampa primitives plane (pico-specific table) */
66     PICOKFST_PLANE_ACCENTS = 4,        /* accent plane */
67     PICOKFST_PLANE_POS = 5,            /* part of speech plane */
68     PICOKFST_PLANE_PB_STRENGTHS = 6,   /* phrase boundary strength plane */
69     PICOKFST_PLANE_INTERN = 7          /* internal plane, e.g. phonStartId, phonTermId */
70 };
71 
72 /* to be used as bit set, e.g.
73  * picoos_uint8 transductionMode = PICOKFST_TRANSMODE_NEWSYMS | PICOKFST_TRANSMODE_POSUSED;
74  */
75 enum picofst_transduction_mode {
76     PICOKFST_TRANSMODE_NEWSYMS = 1, /* e.g. {#WB},{#PB-S},{#PB-W},{#ACC0},{#ACC1},{#ACC2},{#ACC3}, */
77     PICOKFST_TRANSMODE_POSUSED = 2 /* FST contains Part Of Speech symbols */
78 
79 };
80 
81 
82 /* ************************************************************/
83 /* function to create specialized kb, */
84 /* to be used by knowledge layer (picorsrc) only */
85 /* ************************************************************/
86 
87 /* calculates a small number of data (e.g. addresses) from kb for fast access.
88  * This data is encapsulated in a picokfst_FST that can later be retrieved
89  * with picokfst_getFST. */
90 pico_status_t picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,
91                                                   picoos_Common common);
92 
93 
94 /* ************************************************************/
95 /* FST type and getFST function */
96 /* ************************************************************/
97 
98 /* FST type */
99 typedef struct picokfst_fst * picokfst_FST;
100 
101 /* return kb FST for usage in PU */
102 picokfst_FST picokfst_getFST(picoknow_KnowledgeBase this);
103 
104 
105 /* ************************************************************/
106 /* FST access methods */
107 /* ************************************************************/
108 
109 /* returns transduction mode specified with rule sources;
110    result to be interpreted as set of picofst_transduction_mode */
111 picoos_uint8 picokfst_kfstGetTransductionMode(picokfst_FST this);
112 
113 /* returns number of states and number of pair classes in FST;
114    legal states are 1..nrStates, legal classes are 1..nrClasses */
115 void picokfst_kfstGetFSTSizes (picokfst_FST this, picoos_int32 *nrStates, picoos_int32 *nrClasses);
116 
117 /* starts search for all pairs with input symbol 'inSym'; '*inSymFound' returns whether
118    such pairs exist at all; '*searchState' returns a search state to be used in
119    subsequent calls to function 'picokfst_kfstGetNextPair', which must be used
120    to get the symbol pairs */
121 void picokfst_kfstStartPairSearch (picokfst_FST this, picokfst_symid_t inSym,
122                                           picoos_bool * inSymFound, picoos_int32 * searchState);
123 
124 /* gets next pair for input symbol specified with preceding call to 'picokfst_kfstStartPairSearch';
125    '*searchState' maintains the search state, 'pairFound' returns whether any more pair was found,
126    '*outSym' returns the output symbol of the found pair, and '*pairClass' returns the
127    transition class of the found symbol pair */
128 void picokfst_kfstGetNextPair (picokfst_FST this, picoos_int32 * searchState,
129                                       picoos_bool * pairFound,
130                                       picokfst_symid_t * outSym, picokfst_class_t * pairClass);
131 
132 /* attempts to do FST transition from state 'startState' with pair class 'transClass';
133    if such a transition exists, 'endState' returns the end state of the transition (> 0),
134    otherwise 'endState' returns <= 0 */
135 void picokfst_kfstGetTrans (picokfst_FST this, picokfst_state_t startState, picokfst_class_t transClass,
136                                    picokfst_state_t * endState);
137 
138 /* starts search for all pairs with input epsilon symbol and all correponding
139    FST transitions starting in state 'startState'; to be used for fast
140    computation of epsilon closures;
141    '*inEpsTransFound' returns whether any such transition was found at all;
142    if so, '*searchState' returns a search state to be used in subsequent calls
143    to 'picokfst_kfstGetNextInEpsTrans' */
144 void picokfst_kfstStartInEpsTransSearch (picokfst_FST this, picokfst_state_t startState,
145                                                 picoos_bool * inEpsTransFound, picoos_int32 * searchState);
146 
147 /* gets next FST transition with a pair with empty input symbol starting from a state
148    previoulsy specified in 'picokfst_kfstStartInEpsTransSearch';
149    '*searchState' maintains the search state, '*inEpsTransFound' returns
150    whether a new transition with input epsilon was found, '*outSym 'returns
151    the output symbol of the found pair, and '*endState' returns the end state
152    of the found transition with that pair */
153 void picokfst_kfstGetNextInEpsTrans (picokfst_FST this, picoos_int32 * searchState,
154                                             picoos_bool * inEpsTransFound,
155                                             picokfst_symid_t * outSym, picokfst_state_t * endState);
156 
157 /* returns whether 'state' is an accepting state of FST; originally, only
158    state 1 was an accepting state; however, in order to remove the need to
159    always do a last transition with a termination symbol pair, this function
160    defines a state as an accepting state if there is transition to state 1
161    with the terminator symbol pair */
162 picoos_bool picokfst_kfstIsAcceptingState (picokfst_FST this, picokfst_state_t state);
163 
164 #ifdef __cplusplus
165 }
166 #endif
167 
168 
169 #endif /*PICOKFST_H_*/
170