1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picokpdf.c
18  *
19  *  knowledge handling for pdf
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 
29 #include "picoos.h"
30 #include "picodbg.h"
31 #include "picoknow.h"
32 #include "picokpdf.h"
33 
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 #if 0
38 }
39 #endif
40 
41 
42 /* ************************************************************/
43 /* pdf */
44 /* ************************************************************/
45 
46 /*
47  * @addtogroup picokpdf
48  *
49   overview: format of knowledge base pdf file
50 
51   This is the format for the dur pdf file:
52     - Numframes:     1             uint16
53     - Vecsize:       1             uint8
54     - sampperframe:  1             uint8
55     - Phonquantlen:  1             uint8
56     - Phonquant:     Phonquantlen  uint8
57     - Statequantlen: 1             uint8
58     - Statequantlen: Statequantlen uint8
59     - And then numframes x vecsize uint8
60 
61   This is the format for mul (mgc and lfz) pdf files:
62     - numframes:         1         uint16
63     - vecsize:           1         uint8
64     - numstates:         1         uint8
65     - numframesperstate: numstates uint16
66     - ceporder:          1         uint8
67     - numvuv             1         uint8
68     - numdeltas:         1         uint8
69     - scmeanpow:         1         uint8
70     - maxbigpow:         1         uint8
71     - scmeanpowum  KPDF_NUMSTREAMS * ceporder uint8
72     - scivarpow    KPDF_NUMSTREAMS * ceporder uint8
73 
74     And then numframes x vecsize uint8
75 
76 */
77 
78 
79 /* ************************************************************/
80 /* pdf data defines */
81 /* may not be changed with current implementation */
82 /* ************************************************************/
83 
84 
85 #define KPDF_NUMSTREAMS  3 /* coeff, delta, deltadelta */
86 
87 
88 /* ************************************************************/
89 /* pdf loading */
90 /* ************************************************************/
91 
kpdfDURInitialize(register picoknow_KnowledgeBase this,picoos_Common common)92 static pico_status_t kpdfDURInitialize(register picoknow_KnowledgeBase this,
93                                        picoos_Common common) {
94     picokpdf_pdfdur_t *pdfdur;
95     picoos_uint16 pos;
96 
97     if (NULL == this || NULL == this->subObj) {
98         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
99                                        NULL, NULL);
100     }
101     pdfdur = (picokpdf_pdfdur_t *)this->subObj;
102 
103     pos = 0;
104 
105     pdfdur->numframes = ((picoos_uint16)(this->base[pos+1])) << 8 |
106         this->base[pos];
107     pos += 2;
108     pdfdur->vecsize = this->base[pos++];
109     pdfdur->sampperframe = this->base[pos++];
110     pdfdur->phonquantlen = this->base[pos++];
111     pdfdur->phonquant = &(this->base[pos]);
112     pos += pdfdur->phonquantlen;
113     pdfdur->statequantlen = this->base[pos++];
114     pdfdur->statequant = &(this->base[pos]);
115     pos += pdfdur->statequantlen;
116     pdfdur->content = &(this->base[pos]);
117     PICODBG_DEBUG(("numframes %d, vecsize %d, phonquantlen %d, "
118                    "statequantlen %d", pdfdur->numframes, pdfdur->vecsize,
119                    pdfdur->phonquantlen, pdfdur->statequantlen));
120     if ((picoos_uint32)(pos + (pdfdur->numframes * pdfdur->vecsize)) != this->size) {
121         PICODBG_DEBUG(("header-spec size %d, kb-size %d",
122                        pos + (pdfdur->numframes * pdfdur->vecsize),
123                        this->size));
124         return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
125                                        NULL, NULL);
126     }
127     PICODBG_DEBUG(("dur pdf initialized"));
128     return PICO_OK;
129 }
130 
convScaleFactorToBig(picoos_uint8 pow,picoos_uint8 bigpow)131 static picoos_uint8 convScaleFactorToBig(picoos_uint8 pow, picoos_uint8 bigpow)
132 {
133     if (pow > 0x0F) {
134         pow = bigpow + (0xFF - pow + 1);  /* take 2's complement of negative pow */
135     } else if (bigpow >= pow) {
136         pow = bigpow - pow;
137     } else {
138         /* error: bigpow is smaller than input pow */
139         return 0;
140     }
141     return pow;
142 }
143 
kpdfMULInitialize(register picoknow_KnowledgeBase this,picoos_Common common)144 static pico_status_t kpdfMULInitialize(register picoknow_KnowledgeBase this,
145                                        picoos_Common common) {
146     picokpdf_pdfmul_t *pdfmul;
147     picoos_uint16 pos;
148     picoos_uint8 scmeanpow, maxbigpow, nummean;
149     picoos_uint8 i;
150 
151     if (NULL == this || NULL == this->subObj) {
152         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
153                                        NULL, NULL);
154     }
155     pdfmul = (picokpdf_pdfmul_t *)this->subObj;
156 
157     pos = 0;
158 
159     pdfmul->numframes = ((picoos_uint16)(this->base[pos+1])) << 8 |
160         this->base[pos];
161     pos += 2;
162     pdfmul->vecsize = this->base[pos++];
163     pdfmul->numstates = this->base[pos++];
164     {
165         pdfmul->stateoffset[0] = (picoos_uint16) 0;
166         for (i=1; i<pdfmul->numstates; i++) {
167             pdfmul->stateoffset[i] = pdfmul->stateoffset[i-1] + (this->base[pos] | ((picoos_uint16) this->base[pos+1] << 8));
168             pos += 2;
169         }
170         pos += 2; /* we don't need the last number if we only need the offset (i.e. how to get to the vector start) */
171     }
172 
173     pdfmul->ceporder = this->base[pos++];
174     pdfmul->numvuv = this->base[pos++];
175     pdfmul->numdeltas = this->base[pos++];
176     scmeanpow = this->base[pos++];
177     maxbigpow = this->base[pos++];
178     if (maxbigpow < PICOKPDF_BIG_POW) {
179         PICODBG_ERROR(("bigpow %i is larger than maxbigpow %i defined in pdf lingware", PICOKPDF_BIG_POW, maxbigpow));
180         return picoos_emRaiseException(common->em, PICO_EXC_MAX_NUM_EXCEED,NULL,NULL);
181     }
182     pdfmul->bigpow = PICOKPDF_BIG_POW; /* what we have to use is the smaller number! */
183 
184     pdfmul->amplif = this->base[pos++];
185 
186     /* bigpow corrected by scmeanpow, multiply means by 2^meanpow to obtain fixed point representation */
187     pdfmul->meanpow = convScaleFactorToBig(scmeanpow, pdfmul->bigpow);
188     if (0 == pdfmul->meanpow) {
189         PICODBG_ERROR(("error in convScaleFactorToBig"));
190         return picoos_emRaiseException(common->em, PICO_EXC_MAX_NUM_EXCEED,NULL,NULL);
191     }
192     nummean = 3*pdfmul->ceporder;
193 
194     pdfmul->meanpowUm = picoos_allocate(common->mm,nummean*sizeof(picoos_uint8));
195     pdfmul->ivarpow = picoos_allocate(common->mm,nummean*sizeof(picoos_uint8));
196     if ((NULL == pdfmul->meanpowUm) || (NULL == pdfmul->ivarpow)) {
197         picoos_deallocate(common->mm,(void *) &(pdfmul->meanpowUm));
198         picoos_deallocate(common->mm,(void *) &(pdfmul->ivarpow));
199         return picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM,NULL,NULL);
200     }
201 
202     /*     read meanpowUm and convert on the fly */
203     /*     meaning of meanpowUm becomes: multiply means from pdf stream by 2^meanpowUm
204      * to achieve fixed point scaling by big
205      */
206     for (i=0; i<nummean; i++) {
207         pdfmul->meanpowUm[i] = convScaleFactorToBig(this->base[pos++], pdfmul->bigpow);
208     }
209 
210    /*read ivarpow  and convert on the fly */
211     for (i=0; i<nummean; i++) {
212         pdfmul->ivarpow[i] = convScaleFactorToBig(this->base[pos++], pdfmul->bigpow);
213     }
214 
215     /* check numdeltas */
216     if ((pdfmul->numdeltas == 0xFF) && (pdfmul->vecsize != (pdfmul->numvuv + pdfmul->ceporder * 3 * (2+1)))) {
217         PICODBG_ERROR(("header has inconsistent values for vecsize, ceporder, numvuv, and numdeltas"));
218         return picoos_emRaiseException(common->em,PICO_EXC_FILE_CORRUPT,NULL,NULL);
219      }
220 
221 /*     vecsize: 1 uint8 for vuv
222          + ceporder short for static means
223          + numdeltas uint8 and short for sparse delta means
224          + ceporder*3 uint8 for static and delta inverse variances
225 */
226     if ((pdfmul->numdeltas != 0xFF) && (pdfmul->vecsize != pdfmul->numvuv+pdfmul->ceporder*2+pdfmul->numdeltas*3+pdfmul->ceporder*3)) {
227         PICODBG_ERROR(("header has inconsistent values for vecsize, ceporder, numvuv, and numdeltas\n"
228                 "vecsize = %i while numvuv+ceporder*2 + numdeltas*3 + ceporder*3 = %i",
229                 pdfmul->vecsize, pdfmul->numvuv + pdfmul->ceporder*2 + pdfmul->numdeltas * 3 + pdfmul->ceporder * 3));
230         return picoos_emRaiseException(common->em,PICO_EXC_FILE_CORRUPT,NULL,NULL);
231     }
232     pdfmul->content = &(this->base[pos]);
233     PICODBG_DEBUG(("numframes %d, vecsize %d, numstates %d, ceporder %d, "
234                    "numvuv %d, numdeltas %d, meanpow %d, bigpow %d",
235                    pdfmul->numframes, pdfmul->vecsize, pdfmul->numstates,
236                    pdfmul->ceporder, pdfmul->numvuv, pdfmul->numdeltas,
237                    pdfmul->meanpow, pdfmul->bigpow));
238     if ((picoos_uint32)(pos + (pdfmul->numframes * pdfmul->vecsize)) != this->size) {
239         PICODBG_DEBUG(("header-spec size %d, kb-size %d",
240                        pos + (pdfmul->numframes * pdfmul->vecsize),
241                        this->size));
242         return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
243                                        NULL, NULL);
244     }
245     PICODBG_DEBUG(("mul pdf initialized"));
246     return PICO_OK;
247 }
248 
kpdfPHSInitialize(register picoknow_KnowledgeBase this,picoos_Common common)249 static pico_status_t kpdfPHSInitialize(register picoknow_KnowledgeBase this,
250                                        picoos_Common common) {
251     picokpdf_pdfphs_t *pdfphs;
252     picoos_uint16 pos;
253 
254     if (NULL == this || NULL == this->subObj) {
255         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
256                                        NULL, NULL);
257     }
258     pdfphs = (picokpdf_pdfphs_t *)this->subObj;
259 
260     pos = 0;
261 
262     pdfphs->numvectors = ((picoos_uint16)(this->base[pos+1])) << 8 |
263         this->base[pos];
264     pos += 2;
265     pdfphs->indexBase = &(this->base[pos]);
266     pdfphs->contentBase = pdfphs->indexBase + pdfphs->numvectors * sizeof(picoos_uint32);
267     PICODBG_DEBUG(("phs pdf initialized"));
268     return PICO_OK;
269 }
270 
271 
272 
kpdfMULSubObjDeallocate(register picoknow_KnowledgeBase this,picoos_MemoryManager mm)273 static pico_status_t kpdfMULSubObjDeallocate(register picoknow_KnowledgeBase this,
274                                           picoos_MemoryManager mm) {
275 
276 
277     picokpdf_pdfmul_t *pdfmul;
278 
279     if ((NULL != this) && (NULL != this->subObj)) {
280         pdfmul = (picokpdf_pdfmul_t *)this->subObj;
281         picoos_deallocate(mm,(void *) &(pdfmul->meanpowUm));
282         picoos_deallocate(mm,(void *) &(pdfmul->ivarpow));
283         picoos_deallocate(mm, (void *) &(this->subObj));
284     }
285     return PICO_OK;
286 }
287 
kpdfDURSubObjDeallocate(register picoknow_KnowledgeBase this,picoos_MemoryManager mm)288 static pico_status_t kpdfDURSubObjDeallocate(register picoknow_KnowledgeBase this,
289                                           picoos_MemoryManager mm) {
290     if (NULL != this) {
291         picoos_deallocate(mm, (void *) &this->subObj);
292     }
293     return PICO_OK;
294 }
295 
kpdfPHSSubObjDeallocate(register picoknow_KnowledgeBase this,picoos_MemoryManager mm)296 static pico_status_t kpdfPHSSubObjDeallocate(register picoknow_KnowledgeBase this,
297                                           picoos_MemoryManager mm) {
298     if (NULL != this) {
299         picoos_deallocate(mm, (void *) &this->subObj);
300     }
301     return PICO_OK;
302 }
303 
304 /* we don't offer a specialized constructor for a *KnowledgeBase but
305  * instead a "specializer" of an allready existing generic
306  * picoknow_KnowledgeBase */
307 
picokpdf_specializePdfKnowledgeBase(picoknow_KnowledgeBase this,picoos_Common common,const picokpdf_kpdftype_t kpdftype)308 pico_status_t picokpdf_specializePdfKnowledgeBase(picoknow_KnowledgeBase this,
309                                           picoos_Common common,
310                                           const picokpdf_kpdftype_t kpdftype) {
311     pico_status_t status;
312 
313     if (NULL == this) {
314         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
315                                        NULL, NULL);
316     }
317     switch (kpdftype) {
318         case PICOKPDF_KPDFTYPE_DUR:
319             this->subDeallocate = kpdfDURSubObjDeallocate;
320             this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfdur_t));
321             if (NULL == this->subObj) {
322                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
323                                                NULL, NULL);
324             }
325             status = kpdfDURInitialize(this, common);
326             break;
327         case PICOKPDF_KPDFTYPE_MUL:
328             this->subDeallocate = kpdfMULSubObjDeallocate;
329             this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfmul_t));
330             if (NULL == this->subObj) {
331                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
332                                                NULL, NULL);
333             }
334             status = kpdfMULInitialize(this, common);
335             break;
336         case PICOKPDF_KPDFTYPE_PHS:
337             this->subDeallocate = kpdfPHSSubObjDeallocate;
338             this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfphs_t));
339             if (NULL == this->subObj) {
340                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
341                                                NULL, NULL);
342             }
343             status = kpdfPHSInitialize(this, common);
344             break;
345 
346         default:
347             return picoos_emRaiseException(common->em, PICO_ERR_OTHER,
348                                            NULL, NULL);
349     }
350 
351     if (status != PICO_OK) {
352         picoos_deallocate(common->mm, (void *) &this->subObj);
353         return picoos_emRaiseException(common->em, status, NULL, NULL);
354     }
355     return PICO_OK;
356 }
357 
358 
359 /* ************************************************************/
360 /* pdf getPdf* */
361 /* ************************************************************/
362 
picokpdf_getPdfDUR(picoknow_KnowledgeBase this)363 picokpdf_PdfDUR picokpdf_getPdfDUR(picoknow_KnowledgeBase this) {
364     return ((NULL == this) ? NULL : ((picokpdf_PdfDUR) this->subObj));
365 }
366 
picokpdf_getPdfMUL(picoknow_KnowledgeBase this)367 picokpdf_PdfMUL picokpdf_getPdfMUL(picoknow_KnowledgeBase this) {
368     return ((NULL == this) ? NULL : ((picokpdf_PdfMUL) this->subObj));
369 }
370 
picokpdf_getPdfPHS(picoknow_KnowledgeBase this)371 picokpdf_PdfPHS picokpdf_getPdfPHS(picoknow_KnowledgeBase this) {
372     return ((NULL == this) ? NULL : ((picokpdf_PdfPHS) this->subObj));
373 }
374 
375 
376 #ifdef __cplusplus
377 }
378 #endif
379 
380 
381 /* end */
382