1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picokfst.c
18  *
19  * FST knowledge loading and access
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 #include "picoos.h"
29 #include "picodbg.h"
30 #include "picoknow.h"
31 #include "picokfst.h"
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 #if 0
37 }
38 #endif
39 
40 
41 #define FileHdrSize 4       /* size of FST file header */
42 
43 
44 
45 /* ************************************************************/
46 /* function to create specialized kb, */
47 /* to be used by picorsrc only */
48 /* ************************************************************/
49 
50 /** object       : FSTKnowledgeBase
51  *  shortcut     : kfst
52  *  derived from : picoknow_KnowledgeBase
53  */
54 
55 typedef struct kfst_subobj * kfst_SubObj;
56 
57 typedef struct kfst_subobj{
58     picoos_uint8 * fstStream;         /* the byte stream base address */
59     picoos_int32 hdrLen;              /* length of file header */
60     picoos_int32 transductionMode;    /* transduction mode to be used for FST */
61     picoos_int32 nrClasses;           /* nr of pair/transition classes in FST; class is in [1..nrClasses] */
62     picoos_int32 nrStates;            /* nr of states in FST; state is in [1..nrState] */
63     picoos_int32 termClass;           /* pair class of terminator symbol pair; probably obsolete */
64     picoos_int32 alphaHashTabSize;    /* size of pair alphabet hash table */
65     picoos_int32 alphaHashTabPos;     /* absolute address of the start of the pair alphabet */
66     picoos_int32 transTabEntrySize;   /* size in bytes of each transition table entry */
67     picoos_int32 transTabPos;         /* absolute address of the start of the transition table */
68     picoos_int32 inEpsStateTabPos;    /* absolute address of the start of the input epsilon transition table */
69     picoos_int32 accStateTabPos;      /* absolute address of the table of accepting states */
70 } kfst_subobj_t;
71 
72 
73 
74 /* ************************************************************/
75 /* primitives for reading from byte stream */
76 /* ************************************************************/
77 
78 /* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into unsigned number 'num'.
79    '*pos' is modified to the position right after the number */
FixedBytesToUnsignedNum(picoos_uint8 * stream,picoos_uint8 nrBytes,picoos_uint32 * pos,picoos_uint32 * num)80 static void FixedBytesToUnsignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_uint32 * num)
81 {
82     picoos_int32 i;
83 
84     (*num) = 0;
85     for (i = 0; i < nrBytes; i++) {
86         (*num) = ((*num) << 8) + (picoos_uint32)stream[*pos];
87         (*pos)++;
88     }
89 }
90 
91 
92 /* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into signed number 'num'.
93    '*pos' is modified to the position right after the number */
FixedBytesToSignedNum(picoos_uint8 * stream,picoos_uint8 nrBytes,picoos_uint32 * pos,picoos_int32 * num)94 static void FixedBytesToSignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_int32 * num)
95 {
96     picoos_int32 i;
97     picoos_uint32 val;
98 
99     val = 0;
100     for (i = 0; i < nrBytes; i++) {
101         val = (val << 8) + (picoos_uint32)stream[*pos];
102         (*pos)++;
103     }
104     if (val % 2 == 1) {
105         /* negative number */
106         (*num) = -((picoos_int32)((val - 1) / 2)) - 1;
107     } else {
108         /* positive number */
109         (*num) = val / 2;
110     }
111 }
112 
113 
114 /* Converts varying-sized sequence of bytes starting at position '*pos' in byte stream 'stream'
115    into (signed) number 'num'. '*pos' is modified to the position right after the number. */
BytesToNum(picoos_uint8 * stream,picoos_uint32 * pos,picoos_int32 * num)116 static void BytesToNum (picoos_uint8 * stream, picoos_uint32 * pos, picoos_int32 * num)
117 {
118     picoos_uint32 val;
119     picoos_uint32 b;
120 
121     val = 0;
122     b = (picoos_uint32)stream[*pos];
123     (*pos)++;
124     while (b < 128) {
125         val = (val << 7) + b;
126         b = (picoos_uint32)stream[*pos];
127         (*pos)++;
128     }
129     val = (val << 7) + (b - 128);
130     if (val % 2 == 1) {
131         /* negative number */
132         (*num) = -((picoos_int32)((val - 1) / 2)) - 1;
133     } else {
134         /* positive number */
135         (*num) = val / 2;
136     }
137 }
138 
139 
140 /* ************************************************************/
141 /* setting up FST from byte stream */
142 /* ************************************************************/
143 
kfstInitialize(register picoknow_KnowledgeBase this,picoos_Common common)144 static pico_status_t kfstInitialize(register picoknow_KnowledgeBase this,
145         picoos_Common common)
146 {
147     picoos_uint32 curpos;
148     picoos_int32 offs;
149     kfst_subobj_t * kfst;
150 
151     PICODBG_DEBUG(("kfstInitialize -- start\n"));
152 
153     if (NULL == this || NULL == this->subObj) {
154         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL,
155                 NULL);
156     }
157     kfst = (kfst_subobj_t *) this->subObj;
158 
159     /* +CT+ */
160     kfst->fstStream = this->base;
161     PICODBG_TRACE(("base: %d\n",this->base));
162     kfst->hdrLen = FileHdrSize;
163     curpos = kfst->hdrLen;
164     BytesToNum(kfst->fstStream,& curpos,& kfst->transductionMode);
165     BytesToNum(kfst->fstStream,& curpos,& kfst->nrClasses);
166     BytesToNum(kfst->fstStream,& curpos,& kfst->nrStates);
167     BytesToNum(kfst->fstStream,& curpos,& kfst->termClass);
168     BytesToNum(kfst->fstStream,& curpos,& kfst->alphaHashTabSize);
169     BytesToNum(kfst->fstStream,& curpos,& offs);
170     kfst->alphaHashTabPos = kfst->hdrLen + offs;
171     BytesToNum(kfst->fstStream,& curpos,& kfst->transTabEntrySize);
172     BytesToNum(kfst->fstStream,& curpos,& offs);
173     kfst->transTabPos = kfst->hdrLen + offs;
174     BytesToNum(kfst->fstStream,& curpos,& offs);
175     kfst->inEpsStateTabPos = kfst->hdrLen + offs;
176     BytesToNum(kfst->fstStream,& curpos,& offs);
177     kfst->accStateTabPos = kfst->hdrLen + offs;
178     /* -CT- */
179 
180     return PICO_OK;
181 }
182 
183 
kfstSubObjDeallocate(register picoknow_KnowledgeBase this,picoos_MemoryManager mm)184 static pico_status_t kfstSubObjDeallocate(register picoknow_KnowledgeBase this,
185         picoos_MemoryManager mm)
186 {
187     if (NULL != this) {
188         picoos_deallocate(mm, (void *) &this->subObj);
189     }
190     return PICO_OK;
191 }
192 
193 
194 /* calculates a small number of data (e.g. addresses) from kb for fast access.
195  * This data is encapsulated in a picokfst_FST that can later be retrieved
196  * with picokfst_getFST. */
picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,picoos_Common common)197 pico_status_t picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,
198                                                   picoos_Common common)
199 {
200     pico_status_t status;
201 
202     if (NULL == this) {
203         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, NULL);
204     }
205     if (0 < this->size) {
206         /* not a dummy kb */
207         this->subDeallocate = kfstSubObjDeallocate;
208 
209         this->subObj = picoos_allocate(common->mm, sizeof(kfst_subobj_t));
210 
211         if (NULL == this->subObj) {
212             return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
213         }
214         status = kfstInitialize(this, common);
215         if (PICO_OK != status) {
216             picoos_deallocate(common->mm,(void **)&this->subObj);
217         }
218     }
219     return PICO_OK;
220 }
221 
222 
223 /* ************************************************************/
224 /* FST type and getFST function */
225 /* ************************************************************/
226 
227 
228 
229 /* return kb FST for usage in PU */
picokfst_getFST(picoknow_KnowledgeBase this)230 picokfst_FST picokfst_getFST(picoknow_KnowledgeBase this)
231 {
232     if (NULL == this) {
233         return NULL;
234     } else {
235         return (picokfst_FST) this->subObj;
236     }
237 }
238 
239 
240 
241 /* ************************************************************/
242 /* FST access methods */
243 /* ************************************************************/
244 
245 
246 /* see description in header file */
picokfst_kfstGetTransductionMode(picokfst_FST this)247 extern picoos_uint8 picokfst_kfstGetTransductionMode(picokfst_FST this)
248 {
249     kfst_SubObj fst = (kfst_SubObj) this;
250     if (fst != NULL) {
251         return fst->transductionMode;
252     } else {
253         return 0;
254     }
255 }
256 
257 
258 /* see description in header file */
picokfst_kfstGetFSTSizes(picokfst_FST this,picoos_int32 * nrStates,picoos_int32 * nrClasses)259 extern void picokfst_kfstGetFSTSizes (picokfst_FST this, picoos_int32 *nrStates, picoos_int32 *nrClasses)
260 {
261     kfst_SubObj fst = (kfst_SubObj) this;
262     if (fst != NULL) {
263         *nrStates = fst->nrStates;
264         *nrClasses = fst->nrClasses;
265     } else {
266         *nrStates = 0;
267         *nrClasses = 0;
268     }
269 }
270 
271 /* see description in header file */
picokfst_kfstStartPairSearch(picokfst_FST this,picokfst_symid_t inSym,picoos_bool * inSymFound,picoos_int32 * searchState)272 extern void picokfst_kfstStartPairSearch (picokfst_FST this, picokfst_symid_t inSym,
273                                           picoos_bool * inSymFound, picoos_int32 * searchState)
274 {
275     picoos_uint32 pos;
276     picoos_int32 offs;
277     picoos_int32 h;
278     picoos_int32 inSymCellPos;
279     picoos_int32 inSymX;
280     picoos_int32 nextSameHashInSymOffs;
281 
282     kfst_SubObj fst = (kfst_SubObj) this;
283     (*searchState) =  -1;
284     (*inSymFound) = 0;
285     h = inSym % fst->alphaHashTabSize;
286     pos = fst->alphaHashTabPos + (h * 4);
287     FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs);
288     if (offs > 0) {
289         inSymCellPos = fst->alphaHashTabPos + offs;
290         pos = inSymCellPos;
291         BytesToNum(fst->fstStream,& pos,& inSymX);
292         BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs);
293         while ((inSymX != inSym) && (nextSameHashInSymOffs > 0)) {
294             inSymCellPos = inSymCellPos + nextSameHashInSymOffs;
295             pos = inSymCellPos;
296             BytesToNum(fst->fstStream,& pos,& inSymX);
297             BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs);
298         }
299         if (inSymX == inSym) {
300             /* input symbol found; state is set to position after symbol cell */
301             (*searchState) = pos;
302             (*inSymFound) = 1;
303         }
304     }
305 }
306 
307 
308 /* see description in header file */
picokfst_kfstGetNextPair(picokfst_FST this,picoos_int32 * searchState,picoos_bool * pairFound,picokfst_symid_t * outSym,picokfst_class_t * pairClass)309 extern void picokfst_kfstGetNextPair (picokfst_FST this, picoos_int32 * searchState,
310                                       picoos_bool * pairFound,
311                                       picokfst_symid_t * outSym, picokfst_class_t * pairClass)
312 {
313     picoos_uint32 pos;
314     picoos_int32 val;
315 
316     kfst_SubObj fst = (kfst_SubObj) this;
317     if ((*searchState) < 0) {
318         (*pairFound) = 0;
319         (*outSym) = PICOKFST_SYMID_ILLEG;
320         (*pairClass) =  -1;
321     } else {
322         pos = (*searchState);
323         BytesToNum(fst->fstStream,& pos,& val);
324         *outSym = (picokfst_symid_t)val;
325         if ((*outSym) != PICOKFST_SYMID_ILLEG) {
326             BytesToNum(fst->fstStream,& pos,& val);
327             *pairClass = (picokfst_class_t)val;
328             (*pairFound) = 1;
329             (*searchState) = pos;
330         } else {
331             (*pairFound) = 0;
332             (*outSym) = PICOKFST_SYMID_ILLEG;
333             (*pairClass) =  -1;
334             (*searchState) =  -1;
335         }
336     }
337 }
338 
339 
340 
341 /* see description in header file */
picokfst_kfstGetTrans(picokfst_FST this,picokfst_state_t startState,picokfst_class_t transClass,picokfst_state_t * endState)342 extern void picokfst_kfstGetTrans (picokfst_FST this, picokfst_state_t startState, picokfst_class_t transClass,
343                                    picokfst_state_t * endState)
344 {
345 
346     picoos_uint32 pos;
347     picoos_int32 index;
348     picoos_uint32 endStateX;
349 
350     kfst_SubObj fst = (kfst_SubObj) this;
351     if ((startState < 1) || (startState > fst->nrStates) || (transClass < 1) || (transClass > fst->nrClasses)) {
352         (*endState) = 0;
353     } else {
354         index = (startState - 1) * fst->nrClasses + transClass - 1;
355         pos = fst->transTabPos + (index * fst->transTabEntrySize);
356         FixedBytesToUnsignedNum(fst->fstStream,fst->transTabEntrySize,& pos,& endStateX);
357         (*endState) = endStateX;
358     }
359 }
360 
361 
362 /* see description in header file */
picokfst_kfstStartInEpsTransSearch(picokfst_FST this,picokfst_state_t startState,picoos_bool * inEpsTransFound,picoos_int32 * searchState)363 extern void picokfst_kfstStartInEpsTransSearch (picokfst_FST this, picokfst_state_t startState,
364                                                 picoos_bool * inEpsTransFound, picoos_int32 * searchState)
365 {
366 
367     picoos_int32 offs;
368     picoos_uint32 pos;
369 
370     kfst_SubObj fst = (kfst_SubObj) this;
371     (*searchState) =  -1;
372     (*inEpsTransFound) = 0;
373     if ((startState > 0) && (startState <= fst->nrStates)) {
374         pos = fst->inEpsStateTabPos + (startState - 1) * 4;
375         FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs);
376         if (offs > 0) {
377             (*searchState) = fst->inEpsStateTabPos + offs;
378             (*inEpsTransFound) = 1;
379         }
380     }
381 }
382 
383 
384 
385 /* see description in header file */
picokfst_kfstGetNextInEpsTrans(picokfst_FST this,picoos_int32 * searchState,picoos_bool * inEpsTransFound,picokfst_symid_t * outSym,picokfst_state_t * endState)386 extern void picokfst_kfstGetNextInEpsTrans (picokfst_FST this, picoos_int32 * searchState,
387                                             picoos_bool * inEpsTransFound,
388                                             picokfst_symid_t * outSym, picokfst_state_t * endState)
389 {
390     picoos_uint32 pos;
391     picoos_int32 val;
392 
393     kfst_SubObj fst = (kfst_SubObj) this;
394     if ((*searchState) < 0) {
395         (*inEpsTransFound) = 0;
396         (*outSym) = PICOKFST_SYMID_ILLEG;
397         (*endState) = 0;
398     } else {
399         pos = (*searchState);
400         BytesToNum(fst->fstStream,& pos,& val);
401         *outSym = (picokfst_symid_t)val;
402         if ((*outSym) != PICOKFST_SYMID_ILLEG) {
403             BytesToNum(fst->fstStream,& pos,& val);
404             *endState = (picokfst_state_t)val;
405             (*inEpsTransFound) = 1;
406             (*searchState) = pos;
407         } else {
408             (*inEpsTransFound) = 0;
409             (*outSym) = PICOKFST_SYMID_ILLEG;
410             (*endState) = 0;
411             (*searchState) =  -1;
412         }
413     }
414 }
415 
416 
417 /* see description in header file */
picokfst_kfstIsAcceptingState(picokfst_FST this,picokfst_state_t state)418 extern picoos_bool picokfst_kfstIsAcceptingState (picokfst_FST this, picokfst_state_t state)
419 {
420 
421     picoos_uint32 pos;
422     picoos_uint32 val;
423 
424     kfst_SubObj fst = (kfst_SubObj) this;
425     if ((state > 0) && (state <= fst->nrStates)) {
426         pos = fst->accStateTabPos + (state - 1);
427         FixedBytesToUnsignedNum(fst->fstStream,1,& pos,& val);
428         return (val == 1);
429     } else {
430         return 0;
431     }
432 }
433 
434 #ifdef __cplusplus
435 }
436 #endif
437 
438 /* End picofst.c */
439