1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picokdt.c
18  *
19  * knowledge handling for decision trees
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 
29 #include "picoos.h"
30 #include "picodbg.h"
31 #include "picobase.h"
32 #include "picoknow.h"
33 #include "picodata.h"
34 #include "picokdt.h"
35 
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 #if 0
40 }
41 #endif
42 
43 
44 /* ************************************************************/
45 /* decision tree */
46 /* ************************************************************/
47 
48 /**
49  * @addtogroup picokdt
50   * ---------------------------------------------------\n
51  * <b> Pico KDT support </b>\n
52  * ---------------------------------------------------\n
53    overview extended binary tree file:
54   - dt consists of optional attribute mapping tables and a non-empty
55     tree part
56   - using the attribute mapping tables an attribute value as used
57     throughout the TTS can be mapped to its smaller representation
58     used in the tree
59   - multi-byte values always little endian
60 
61   -------------------------------------------------------------------
62   - bin-file, decision tree knowledge base in binary form
63 
64     - dt-kb = header inputmaptables outputmaptables tree
65 
66 
67     - header = INPMAPTABLEPOS2 OUTMAPTABLEPOS2 TREEPOS2
68 
69     - INPMAPTABLEPOS2: two bytes, equals offest in number of bytes from
70                      the start of kb to the start of input map tables,
71                      may not be 0
72     - OUTMAPTABLEPOS2: two bytes, equals offest in number of bytes from
73                      the start of kb to the start of outtables,
74                      may not be 0
75     - TREEPOS2: two bytes, equals offest in number of bytes from the
76               start of kb to the start of the tree
77 
78 
79     - inputmaptables = maptables
80     - outputmaptables = maptables
81     - maptables = NRMAPTABLES1 {maptable}=NRMAPTABLES1
82     - maptable = LENTABLE2 TABLETYPE1 (   bytemaptable
83                                       | wordmaptable
84                                       | graphinmaptable
85                                       | bytetovarmaptable )
86     - bytemaptable (in or out, usage varies) =  NRBYTES2   {BYTE1}=NRBYTES2
87     - wordmaptable (in or out, usage varies) =  NRWORDS2   {WORD2}=NRWORDS2
88     - graphinmaptable (in only)              =  NRGRAPHS2  {GRAPH1:4}=NRGRAPHS2
89     - bytetovarmaptable (out only)           =  NRINBYTES2 outvarsearchind
90                                               outvaroutputs
91     - outvarsearchind = {OUTVAROFFSET2}=NRINBYTES2
92     - outvaroutputs = {VARVALID1:}=NRINBYTES2
93 
94     - bytemaptable: fixed size, *Map*Fixed \n
95     - wordmaptable: fixed size, *Map*Fixed \n
96     - graphinmaptable: search value is variable size (UTF8 grapheme), \n
97                      value to be mapped to is fixed size, one byte \n
98     - bytetovarmaptable: search value is fixed size, one byte, values \n
99                        to be mapped to are of variable size (e.g. several \n
100                        phones) \n
101 
102     - NRMAPTABLES1: one byte representing the number of map tables
103     - LENTABLE2: two bytes, equals offset to the next table (or next
104                part of kb, e.g. tree),
105                if LENTABLE2 = 3, and
106                TABLETYPE1 = EMPTY -> empty table, no mapping to be done
107     - TABLETYPE1: one byte, type of map table (byte, word, or graph=utf8)
108     - NRBYTES2: two bytes, number of bytes following in the table (one
109               would be okay, to simplify some implementation also set
110               to 2)
111     - BYTE1: one btye, the sequence is used to determine the values
112            being mapped to, starting with 0
113     - NRWORDS2: two bytes, number of words (two btyes) following in the table
114     - WORD2: two bytes, the sequence is used to determine the values
115            being mapped to, starting with 0
116     - NRGRAPHS2: two bytes, number of graphemes encoded in UTF8 following
117                in table
118     - GRAPH1:4: one to four bytes, UTF8 representation of a grapheme, the
119               sequence of graphemes is used to determine the value being
120               mapped to, starting with 0, the length information is
121               encoded in UTF8, no need for extra length info
122     - NRINBYTES2: two bytes, number of single byte IDs the tree can produce
123     - OUTVAROFFSET2: two bytes, offset from the start of the
124                    outvaroutputs to the start of the following output
125                    phone ID group, ie. the first outvaroffset is the
126                    offset to the start of the second PHONEID
127                    group. Using the previous outvaroffset (or the start
128                    of the outvaroutputs) the start and lenth of the
129                    PHONEID group can be determined and we can get the
130                    sequence of output values we map the chunk value to
131     - VARVALID1:: one to several bytes, one byte each for an output phone ID
132 
133     - tree = treenodeinfos TREEBODYSIZE4 treebody
134     - treenodeinfos = NRVFIELDS1 vfields NRATTRIBUTES1 NRQFIELDS1 qfields
135     - vfields = {VFIELD1}=NRVFIELDS1
136     - qfields = {QFIELD1}=NRATTRIBUTES1xNRQFIELDS1
137     - treebody = "cf. code"
138 
139     - TREEBODYSIZE4: four bytes, size of treebody in number of bytes
140     - NRVFIELDS1: one byte, number of node properties in the following
141                 vector (predefined and fixed sequence of properties)
142     - VFIELD1: number of bits used to represent a node property
143     - NRATTRIBUTES1: one byte, number of attributes (rows) in the
144                    following matrix
145     - NRQFIELDS1: one byte, number (columns) of question-dependent node
146                 properties per attribute in the following matrix
147                 (predefined and fixed sequence of properties)
148     - QFIELD1: number of bits used to represent a question-dependent
149              property in the matrix
150 
151 
152     - Currently,
153         - NRVFIELDS1 is fixed at 2 for all trees, ie.
154         - vfields = 2 aVFIELD1 bVFIELD1
155         - aVFIELD1: nr of bits for questions
156         - bVFIELD1: nr of bits for decisions
157 
158         - NRQFIELDS1 is fixed at 5 for all trees, ie. \n
159         - qfields = NRATTRIBUTES1 5 aQFIELD1 bQFIELD1 cQFIELD1 dQFIELD1 eQFIELD1 \n
160             - aQFIELD1: nr of bits for fork count \n
161             - bQFIELD1: nr of bits for start position for subsets \n
162             - cQFIELD1: nr of bits for group size \n
163             - dQFIELD1: nr of bits for offset to reach output \n
164             - eQFIELD1: nr of bits for threshold (if continuous node) \n
165 */
166 
167 
168 /* ************************************************************/
169 /* decision tree data defines */
170 /* may not be changed with current implementation */
171 /* ************************************************************/
172 
173 /* maptables fields */
174 #define PICOKDT_MTSPOS_NRMAPTABLES   0
175 
176 /* position of first byte of first maptable (for omt the only table */
177 #define PICOKDT_MTPOS_START          1
178 
179 /* maptable fields */
180 #define PICOKDT_MTPOS_LENTABLE       0
181 #define PICOKDT_MTPOS_TABLETYPE      2
182 #define PICOKDT_MTPOS_NUMBER         3
183 #define PICOKDT_MTPOS_MAPSTART       5
184 
185 /* treenodeinfos fields */
186 #define PICOKDT_NIPOS_NRVFIELDS      0
187 #define PICOKDT_NIPOS_NRATTS         3
188 #define PICOKDT_NIPOS_NRQFIELDS      4
189 
190 /* fixed treenodeinfos number of fields */
191 #define PICOKDT_NODEINFO_NRVFIELDS   2
192 #define PICOKDT_NODEINFO_NRQFIELDS   5
193 
194 /* fixed number of bits used */
195 #define PICOKDT_NODETYPE_NRBITS      2
196 #define PICOKDT_SUBSETTYPE_NRBITS    2
197 #define PICOKDT_ISDECIDE_NRBITS      1
198 
199 /* number of inpmaptables for each tree. Since we have a possibly
200    empty input map table for each att, currently these values must be
201    equal to PICOKDT_NRATT* */
202 typedef enum {
203     PICOKDT_NRINPMT_POSP = 12,
204     PICOKDT_NRINPMT_POSD =  7,
205     PICOKDT_NRINPMT_G2P  = 16,
206     PICOKDT_NRINPMT_PHR  =  8,
207     PICOKDT_NRINPMT_ACC  = 13,
208     PICOKDT_NRINPMT_PAM  = 60
209 } kdt_nrinpmaptables_t;
210 
211 /* number of outmaptables for each tree, at least one, possibly empty,
212    output map table for each tree */
213 typedef enum {
214     PICOKDT_NROUTMT_POSP =  1,
215     PICOKDT_NROUTMT_POSD =  1,
216     PICOKDT_NROUTMT_G2P  =  1,
217     PICOKDT_NROUTMT_PHR  =  1,
218     PICOKDT_NROUTMT_ACC  =  1,
219     PICOKDT_NROUTMT_PAM  =  1
220 } kdt_nroutmaptables_t;
221 
222 /* maptable types */
223 typedef enum {
224     PICOKDT_MTTYPE_EMPTY     = 0,
225     PICOKDT_MTTYPE_BYTE      = 1,
226     PICOKDT_MTTYPE_WORD      = 2,
227     PICOKDT_MTTYPE_GRAPH     = 3,
228     PICOKDT_MTTYPE_BYTETOVAR = 4
229 } kdt_mttype_t;
230 
231 
232 /* ************************************************************/
233 /* decision tree types and loading */
234 /* ************************************************************/
235 /*  object       : Dt*KnowledgeBase
236  *  shortcut     : kdt*
237  *  derived from : picoknow_KnowledgeBase
238  */
239 
240 /* subobj shared by all decision trees */
241 typedef struct {
242     picokdt_kdttype_t type;
243     picoos_uint8 *inpmaptable;
244     picoos_uint8 *outmaptable;
245     picoos_uint8 *tree;
246     picoos_uint32 beg_offset[128];  /* for efficiency */
247 
248     /* tree-internal details for faster processing */
249     picoos_uint8 *vfields;
250     picoos_uint8 *qfields;
251     picoos_uint8  nrattributes;
252     picoos_uint8 *treebody;
253     /*picoos_uint8  nrvfields;*/  /* fix PICOKDT_NODEINFO_NRVFIELDS */
254     /*picoos_uint8  nrqfields;*/  /* fix PICOKDT_NODEINFO_NRQFIELDS */
255 
256     /* direct output vector (no output mapping) */
257     picoos_uint8 dset;    /* TRUE if class set, FALSE otherwise */
258     picoos_uint16 dclass;
259 } kdt_subobj_t;
260 
261 /* subobj specific for each decision tree type */
262 typedef struct {
263     kdt_subobj_t dt;
264     picoos_uint16 invec[PICOKDT_NRATT_POSP];    /* input vector */
265     picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
266 } kdtposp_subobj_t;
267 
268 typedef struct {
269     kdt_subobj_t dt;
270     picoos_uint16 invec[PICOKDT_NRATT_POSD];    /* input vector */
271     picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
272 } kdtposd_subobj_t;
273 
274 typedef struct {
275     kdt_subobj_t dt;
276     picoos_uint16 invec[PICOKDT_NRATT_G2P];    /* input vector */
277     picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
278 } kdtg2p_subobj_t;
279 
280 typedef struct {
281     kdt_subobj_t dt;
282     picoos_uint16 invec[PICOKDT_NRATT_PHR];    /* input vector */
283     picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
284 } kdtphr_subobj_t;
285 
286 typedef struct {
287     kdt_subobj_t dt;
288     picoos_uint16 invec[PICOKDT_NRATT_ACC];    /* input vector */
289     picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
290 } kdtacc_subobj_t;
291 
292 typedef struct {
293     kdt_subobj_t dt;
294     picoos_uint16 invec[PICOKDT_NRATT_PAM];    /* input vector */
295     picoos_uint8 inveclen;  /* nr of ele set in invec; must be =nrattributes */
296 } kdtpam_subobj_t;
297 
298 
kdtDtInitialize(register picoknow_KnowledgeBase this,picoos_Common common,kdt_subobj_t * dtp)299 static pico_status_t kdtDtInitialize(register picoknow_KnowledgeBase this,
300                                      picoos_Common common,
301                                      kdt_subobj_t *dtp) {
302     picoos_uint16 inppos;
303     picoos_uint16 outpos;
304     picoos_uint16 treepos;
305     picoos_uint32 curpos = 0, pos;
306     picoos_uint16 lentable;
307     picoos_uint16 i;
308     picoos_uint8 imtnr;
309 
310     PICODBG_DEBUG(("start"));
311 
312     /* get inmap, outmap, tree offsets */
313     if ((PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &inppos))
314         && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &outpos))
315         && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos,
316                                                  &treepos))) {
317 
318         /* all pos are mandatory, verify */
319         if (inppos && outpos && treepos) {
320             dtp->inpmaptable = this->base + inppos;
321             dtp->outmaptable = this->base + outpos;
322             dtp->tree = this->base + treepos;
323             /* precalc beg offset table */
324             imtnr=dtp->inpmaptable[0];
325             pos=1;
326             dtp->beg_offset[0] = 1;
327             for (i = 0; i < imtnr; i++) {
328                 lentable = ((picoos_uint16)(dtp->inpmaptable[pos+1])) << 8 |
329                     dtp->inpmaptable[pos];
330                 pos += lentable;
331                 dtp->beg_offset[i+1] = pos;
332             }
333         } else {
334             dtp->inpmaptable = NULL;
335             dtp->outmaptable = NULL;
336             dtp->tree = NULL;
337             PICODBG_ERROR(("invalid kb position info"));
338             return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
339                                            NULL, NULL);
340         }
341 
342         /* nr of outmaptables is equal 1 for all trees, verify */
343         if (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != 1) {
344             PICODBG_ERROR(("wrong number of outmaptables"));
345             return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
346                                            NULL, NULL);
347         }
348 
349         /* check if this is an empty table, ie. len == 3 */
350         if ((dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE]
351              == 3)
352             && (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE
353                                  + 1] == 0)) {
354             /* verify that this is supposed to be an empty table and
355                set outmaptable to NULL if so */
356             if (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE]
357                 == PICOKDT_MTTYPE_EMPTY) {
358                 dtp->outmaptable = NULL;
359             } else {
360                 PICODBG_ERROR(("table length vs. type problem"));
361                 return picoos_emRaiseException(common->em,
362                                                PICO_EXC_FILE_CORRUPT,
363                                                NULL, NULL);
364             }
365         }
366 
367         dtp->vfields = dtp->tree + 1;
368         dtp->qfields = dtp->tree + PICOKDT_NODEINFO_NRVFIELDS + 3;
369         dtp->nrattributes = dtp->tree[PICOKDT_NIPOS_NRATTS];
370         dtp->treebody = dtp->qfields + 4 +
371             (dtp->nrattributes * PICOKDT_NODEINFO_NRQFIELDS); /* TREEBODYSIZE4*/
372 
373         /*dtp->nrvfields = dtp->tree[PICOKDT_NIPOS_NRVFIELDS]; <- is fix */
374         /*dtp->nrqfields = dtp->tree[PICOKDT_NIPOS_NRQFIELDS]; <- is fix */
375         /* verify that nrvfields ad nrqfields are correct */
376         if ((PICOKDT_NODEINFO_NRVFIELDS != dtp->tree[PICOKDT_NIPOS_NRVFIELDS]) ||
377             (PICOKDT_NODEINFO_NRQFIELDS != dtp->tree[PICOKDT_NIPOS_NRQFIELDS])) {
378             PICODBG_ERROR(("problem with nr of vfields (%d) or qfields (%d)",
379                            dtp->tree[PICOKDT_NIPOS_NRVFIELDS],
380                            dtp->tree[PICOKDT_NIPOS_NRQFIELDS]));
381             return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
382                                            NULL, NULL);
383         }
384         dtp->dset = 0;
385         dtp->dclass = 0;
386         PICODBG_DEBUG(("tree init: nratt: %d, posomt: %d, postree: %d",
387                        dtp->nrattributes, (dtp->outmaptable - dtp->inpmaptable),
388                        (dtp->tree - dtp->inpmaptable)));
389         return PICO_OK;
390     } else {
391         PICODBG_ERROR(("problem reading kb in memory"));
392         return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
393                                        NULL, NULL);
394     }
395 }
396 
397 
kdtDtCheck(register picoknow_KnowledgeBase this,picoos_Common common,kdt_subobj_t * dtp,kdt_nratt_t nratt,kdt_nrinpmaptables_t nrinpmt,kdt_nroutmaptables_t nroutmt,kdt_mttype_t mttype)398 static pico_status_t kdtDtCheck(register picoknow_KnowledgeBase this,
399                                 picoos_Common common,
400                                 kdt_subobj_t *dtp,
401                                 kdt_nratt_t nratt,
402                                 kdt_nrinpmaptables_t nrinpmt,
403                                 kdt_nroutmaptables_t nroutmt,
404                                 kdt_mttype_t mttype) {
405     /* check nr attributes */
406     /* check nr inpmaptables */
407     /* check nr outmaptables */
408     /* check outmaptable is word type */
409     if ((nratt != dtp->nrattributes)
410         || (dtp->inpmaptable == NULL)
411         || (dtp->outmaptable == NULL)
412         || (dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nrinpmt)
413         || (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nroutmt)
414         || (dtp->outmaptable[PICOKDT_MTPOS_START+PICOKDT_MTPOS_TABLETYPE]
415             != mttype)) {
416         PICODBG_ERROR(("check failed, nratt %d, nrimt %d, nromt %d, omttype %d",
417                        dtp->nrattributes,
418                        dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES],
419                        dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES],
420                        dtp->outmaptable[PICOKDT_MTPOS_START +
421                                         PICOKDT_MTPOS_TABLETYPE]));
422         return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
423                                        NULL, NULL);
424     }
425     return PICO_OK;
426 }
427 
428 
429 
kdtPosPInitialize(register picoknow_KnowledgeBase this,picoos_Common common)430 static pico_status_t kdtPosPInitialize(register picoknow_KnowledgeBase this,
431                                        picoos_Common common) {
432     pico_status_t status;
433     kdtposp_subobj_t *dtposp;
434     kdt_subobj_t *dt;
435     picoos_uint8 i;
436 
437     if (NULL == this || NULL == this->subObj) {
438         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
439                                        NULL, NULL);
440     }
441     dtposp = (kdtposp_subobj_t *)this->subObj;
442     dt = &(dtposp->dt);
443     dt->type = PICOKDT_KDTTYPE_POSP;
444     if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
445         return status;
446     }
447     if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSP,
448                              PICOKDT_NRINPMT_POSP, PICOKDT_NROUTMT_POSP,
449                              PICOKDT_MTTYPE_WORD)) != PICO_OK) {
450         return status;
451     }
452 
453     /* init specialized subobj part */
454     for (i = 0; i < PICOKDT_NRATT_POSP; i++) {
455         dtposp->invec[i] = 0;
456     }
457     dtposp->inveclen = 0;
458     PICODBG_DEBUG(("posp tree initialized"));
459     return PICO_OK;
460 }
461 
462 
kdtPosDInitialize(register picoknow_KnowledgeBase this,picoos_Common common)463 static pico_status_t kdtPosDInitialize(register picoknow_KnowledgeBase this,
464                                        picoos_Common common) {
465     pico_status_t status;
466     kdtposd_subobj_t *dtposd;
467     kdt_subobj_t *dt;
468     picoos_uint8 i;
469 
470     if (NULL == this || NULL == this->subObj) {
471         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
472                                        NULL, NULL);
473     }
474     dtposd = (kdtposd_subobj_t *)this->subObj;
475     dt = &(dtposd->dt);
476     dt->type = PICOKDT_KDTTYPE_POSD;
477     if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
478         return status;
479     }
480     if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSD,
481                              PICOKDT_NRINPMT_POSD, PICOKDT_NROUTMT_POSD,
482                              PICOKDT_MTTYPE_WORD)) != PICO_OK) {
483         return status;
484     }
485 
486     /* init spezialized subobj part */
487     for (i = 0; i < PICOKDT_NRATT_POSD; i++) {
488         dtposd->invec[i] = 0;
489     }
490     dtposd->inveclen = 0;
491     PICODBG_DEBUG(("posd tree initialized"));
492     return PICO_OK;
493 }
494 
495 
kdtG2PInitialize(register picoknow_KnowledgeBase this,picoos_Common common)496 static pico_status_t kdtG2PInitialize(register picoknow_KnowledgeBase this,
497                                       picoos_Common common) {
498     pico_status_t status;
499     kdtg2p_subobj_t *dtg2p;
500     kdt_subobj_t *dt;
501     picoos_uint8 i;
502 
503     if (NULL == this || NULL == this->subObj) {
504         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
505                                        NULL, NULL);
506     }
507     dtg2p = (kdtg2p_subobj_t *)this->subObj;
508     dt = &(dtg2p->dt);
509     dt->type = PICOKDT_KDTTYPE_G2P;
510     if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
511         return status;
512     }
513 
514     if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_G2P,
515                              PICOKDT_NRINPMT_G2P, PICOKDT_NROUTMT_G2P,
516                              PICOKDT_MTTYPE_BYTETOVAR)) != PICO_OK) {
517         return status;
518     }
519 
520     /* init spezialized subobj part */
521     for (i = 0; i < PICOKDT_NRATT_G2P; i++) {
522         dtg2p->invec[i] = 0;
523     }
524     dtg2p->inveclen = 0;
525     PICODBG_DEBUG(("g2p tree initialized"));
526     return PICO_OK;
527 }
528 
529 
kdtPhrInitialize(register picoknow_KnowledgeBase this,picoos_Common common)530 static pico_status_t kdtPhrInitialize(register picoknow_KnowledgeBase this,
531                                       picoos_Common common) {
532     pico_status_t status;
533     kdtphr_subobj_t *dtphr;
534     kdt_subobj_t *dt;
535     picoos_uint8 i;
536 
537     if (NULL == this || NULL == this->subObj) {
538         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
539                                        NULL, NULL);
540     }
541     dtphr = (kdtphr_subobj_t *)this->subObj;
542     dt = &(dtphr->dt);
543     dt->type = PICOKDT_KDTTYPE_PHR;
544     if ((status = kdtDtInitialize(this, common,dt)) != PICO_OK) {
545         return status;
546     }
547 
548     if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PHR,
549                              PICOKDT_NRINPMT_PHR, PICOKDT_NROUTMT_PHR,
550                              PICOKDT_MTTYPE_WORD)) != PICO_OK) {
551         return status;
552     }
553 
554     /* init spezialized subobj part */
555     for (i = 0; i < PICOKDT_NRATT_PHR; i++) {
556         dtphr->invec[i] = 0;
557     }
558     dtphr->inveclen = 0;
559     PICODBG_DEBUG(("phr tree initialized"));
560     return PICO_OK;
561 }
562 
563 
kdtAccInitialize(register picoknow_KnowledgeBase this,picoos_Common common)564 static pico_status_t kdtAccInitialize(register picoknow_KnowledgeBase this,
565                                       picoos_Common common) {
566     pico_status_t status;
567     kdtacc_subobj_t *dtacc;
568     kdt_subobj_t *dt;
569     picoos_uint8 i;
570 
571     if (NULL == this || NULL == this->subObj) {
572         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
573                                        NULL, NULL);
574     }
575     dtacc = (kdtacc_subobj_t *)this->subObj;
576     dt = &(dtacc->dt);
577     dt->type = PICOKDT_KDTTYPE_ACC;
578     if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
579         return status;
580     }
581 
582     if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_ACC,
583                              PICOKDT_NRINPMT_ACC, PICOKDT_NROUTMT_ACC,
584                              PICOKDT_MTTYPE_WORD)) != PICO_OK) {
585         return status;
586     }
587 
588     /* init spezialized subobj part */
589     for (i = 0; i < PICOKDT_NRATT_ACC; i++) {
590         dtacc->invec[i] = 0;
591     }
592     dtacc->inveclen = 0;
593     PICODBG_DEBUG(("acc tree initialized"));
594     return PICO_OK;
595 }
596 
597 
kdtPamInitialize(register picoknow_KnowledgeBase this,picoos_Common common)598 static pico_status_t kdtPamInitialize(register picoknow_KnowledgeBase this,
599                                       picoos_Common common) {
600     pico_status_t status;
601     kdtpam_subobj_t *dtpam;
602     kdt_subobj_t *dt;
603     picoos_uint8 i;
604 
605     if (NULL == this || NULL == this->subObj) {
606         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
607                                        NULL, NULL);
608     }
609     dtpam = (kdtpam_subobj_t *)this->subObj;
610     dt = &(dtpam->dt);
611     dt->type = PICOKDT_KDTTYPE_PAM;
612     if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
613         return status;
614     }
615 
616     if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PAM,
617                              PICOKDT_NRINPMT_PAM, PICOKDT_NROUTMT_PAM,
618                              PICOKDT_MTTYPE_WORD)) != PICO_OK) {
619         return status;
620     }
621 
622     /* init spezialized subobj part */
623     for (i = 0; i < PICOKDT_NRATT_PAM; i++) {
624         dtpam->invec[i] = 0;
625     }
626     dtpam->inveclen = 0;
627     PICODBG_DEBUG(("pam tree initialized"));
628     return PICO_OK;
629 }
630 
631 
kdtSubObjDeallocate(register picoknow_KnowledgeBase this,picoos_MemoryManager mm)632 static pico_status_t kdtSubObjDeallocate(register picoknow_KnowledgeBase this,
633                                          picoos_MemoryManager mm) {
634     if (NULL != this) {
635         picoos_deallocate(mm, (void *) &this->subObj);
636     }
637     return PICO_OK;
638 }
639 
640 
641 /* we don't offer a specialized constructor for a *KnowledgeBase but
642  * instead a "specializer" of an allready existing generic
643  * picoknow_KnowledgeBase */
644 
picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this,picoos_Common common,const picokdt_kdttype_t kdttype)645 pico_status_t picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this,
646                                                 picoos_Common common,
647                                                 const picokdt_kdttype_t kdttype) {
648     pico_status_t status;
649 
650     if (NULL == this) {
651         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
652                                        NULL, NULL);
653     }
654     this->subDeallocate = kdtSubObjDeallocate;
655     switch (kdttype) {
656         case PICOKDT_KDTTYPE_POSP:
657             this->subObj = picoos_allocate(common->mm,sizeof(kdtposp_subobj_t));
658             if (NULL == this->subObj) {
659                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
660                                                NULL, NULL);
661             }
662             status = kdtPosPInitialize(this, common);
663             break;
664         case PICOKDT_KDTTYPE_POSD:
665             this->subObj = picoos_allocate(common->mm,sizeof(kdtposd_subobj_t));
666             if (NULL == this->subObj) {
667                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
668                                                NULL, NULL);
669             }
670             status = kdtPosDInitialize(this, common);
671             break;
672         case PICOKDT_KDTTYPE_G2P:
673             this->subObj = picoos_allocate(common->mm,sizeof(kdtg2p_subobj_t));
674             if (NULL == this->subObj) {
675                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
676                                                NULL, NULL);
677             }
678             status = kdtG2PInitialize(this, common);
679             break;
680         case PICOKDT_KDTTYPE_PHR:
681             this->subObj = picoos_allocate(common->mm,sizeof(kdtphr_subobj_t));
682             if (NULL == this->subObj) {
683                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
684                                                NULL, NULL);
685             }
686             status = kdtPhrInitialize(this, common);
687             break;
688         case PICOKDT_KDTTYPE_ACC:
689             this->subObj = picoos_allocate(common->mm,sizeof(kdtacc_subobj_t));
690             if (NULL == this->subObj) {
691                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
692                                                NULL, NULL);
693             }
694             status = kdtAccInitialize(this, common);
695             break;
696         case PICOKDT_KDTTYPE_PAM:
697             this->subObj = picoos_allocate(common->mm,sizeof(kdtpam_subobj_t));
698             if (NULL == this->subObj) {
699                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
700                                                NULL, NULL);
701             }
702             status = kdtPamInitialize(this, common);
703             break;
704         default:
705             return picoos_emRaiseException(common->em, PICO_ERR_OTHER,
706                                            NULL, NULL);
707     }
708 
709     if (status != PICO_OK) {
710         picoos_deallocate(common->mm, (void *) &this->subObj);
711         return picoos_emRaiseException(common->em, status, NULL, NULL);
712     }
713     return PICO_OK;
714 }
715 
716 
717 /* ************************************************************/
718 /* decision tree getDt* */
719 /* ************************************************************/
720 
picokdt_getDtPosP(picoknow_KnowledgeBase this)721 picokdt_DtPosP picokdt_getDtPosP(picoknow_KnowledgeBase this) {
722     return ((NULL == this) ? NULL : ((picokdt_DtPosP) this->subObj));
723 }
724 
picokdt_getDtPosD(picoknow_KnowledgeBase this)725 picokdt_DtPosD picokdt_getDtPosD(picoknow_KnowledgeBase this) {
726     return ((NULL == this) ? NULL : ((picokdt_DtPosD) this->subObj));
727 }
728 
picokdt_getDtG2P(picoknow_KnowledgeBase this)729 picokdt_DtG2P  picokdt_getDtG2P (picoknow_KnowledgeBase this) {
730     return ((NULL == this) ? NULL : ((picokdt_DtG2P) this->subObj));
731 }
732 
picokdt_getDtPHR(picoknow_KnowledgeBase this)733 picokdt_DtPHR  picokdt_getDtPHR (picoknow_KnowledgeBase this) {
734     return ((NULL == this) ? NULL : ((picokdt_DtPHR) this->subObj));
735 }
736 
picokdt_getDtACC(picoknow_KnowledgeBase this)737 picokdt_DtACC  picokdt_getDtACC (picoknow_KnowledgeBase this) {
738     return ((NULL == this) ? NULL : ((picokdt_DtACC) this->subObj));
739 }
740 
picokdt_getDtPAM(picoknow_KnowledgeBase this)741 picokdt_DtPAM  picokdt_getDtPAM (picoknow_KnowledgeBase this) {
742     return ((NULL == this) ? NULL : ((picokdt_DtPAM) this->subObj));
743 }
744 
745 
746 
747 /* ************************************************************/
748 /* decision tree support functions, tree */
749 /* ************************************************************/
750 
751 
752 typedef enum {
753     eQuestion  = 0,   /* index to #bits to identify question */
754     eDecide    = 1    /* index to #bits to identify decision */
755 } kdt_vfields_ind_t;
756 
757 typedef enum {
758     eForkCount = 0,   /* index to #bits for number of forks */
759     eBitNo     = 1,   /* index to #bits for index of 1st element */
760     eBitCount  = 2,   /* index to #bits for size of the group */
761     eJump      = 3,   /* index to #bits for offset to reach output node */
762     eCut       = 4    /* for contin. node: #bits for threshold checked */
763 } kdt_qfields_ind_t;
764 
765 typedef enum {
766     eNTerminal   = 0,
767     eNBinary     = 1,
768     eNContinuous = 2,
769     eNDiscrete   = 3
770 } kdt_nodetypes_t;
771 
772 typedef enum {
773     eOneValue = 0,
774     eTwoValues = 1,
775     eWithoutBitMask = 2,
776     eBitMask = 3
777 } kdt_subsettypes_t;
778 
779 
780 /* Name    :   kdt_jump
781    Function:   maps the iJump offset to byte + bit coordinates
782    Input   :   iJump   absolute bit offset (0..(nr-bytes-treebody)*8)
783    Output  :   iByteNo the first byte containing the bits to extract
784                        (0..(nr-bytes-treebody))
785                iBitNo  the first bit to be extracted (0..7)
786    Returns :   void
787    Notes   :   updates the iByteNo + iBitNo fields
788 */
kdt_jump(const picoos_uint32 iJump,picoos_uint32 * iByteNo,picoos_int8 * iBitNo)789 static void kdt_jump(const picoos_uint32 iJump,
790                      picoos_uint32 *iByteNo,
791                      picoos_int8 *iBitNo) {
792     picoos_uint32 iByteSize;
793 
794     iByteSize = (iJump / 8 );
795     *iBitNo = (iJump - (iByteSize * 8)) + (7 - *iBitNo);
796     *iByteNo += iByteSize;
797     if (*iBitNo >= 8) {
798         (*iByteNo)++;
799         *iBitNo = 15 - *iBitNo;
800     } else {
801         *iBitNo = 7 - *iBitNo;
802     }
803 }
804 
805 
806 /* replaced inline for speedup */
807 /* Name    :   kdtIsVal
808    Function:   Returns the binary value of the bit pointed to by iByteNo, iBitNo
809    Input   :   iByteNo ofsset to the byte containing the bits to extract
810                        (0..sizeof(treebody))
811                iBitNo  ofsset to the first bit to be extracted (0..7)
812    Returns :   0/1 depending on the bit pointed to
813 */
814 /*
815 static picoos_uint8 kdtIsVal(register kdt_subobj_t *this,
816                              picoos_uint32 iByteNo,
817                              picoos_int8 iBitNo) {
818     return ((this->treebody[iByteNo] & ((1)<<iBitNo)) > 0);
819 }
820 */
821 
822 
823 /* @todo : consider replacing inline for speedup */
824 
825 /* Name    :   kdtGetQFieldsVal (was: m_QuestDependentFields)
826    Function:   gets a byte from qfields
827    Input   :   this      handle to a dt subobj
828                attind    index of the attribute
829                qind      index of the byte to be read
830    Returns :   the requested byte
831    Notes   :   check that attind < this->nrattributes needed before calling
832                this function!
833 */
kdtGetQFieldsVal(register kdt_subobj_t * this,const picoos_uint8 attind,const kdt_qfields_ind_t qind)834 static picoos_uint8 kdtGetQFieldsVal(register kdt_subobj_t *this,
835                                      const picoos_uint8 attind,
836                                      const kdt_qfields_ind_t qind) {
837     /* check of qind done in initialize and (for some compilers) with typing */
838     /* check of attind needed before calling this function */
839     return this->qfields[(attind * PICOKDT_NODEINFO_NRQFIELDS) + qind];
840 }
841 
842 
843 /* Name    :   kdtGetShiftVal (was: get_shift_value)
844    Function:   returns the (treebody) value pointed to by iByteNo, iBitNo,
845                and with size iSize
846    Input   :   this    reference to the processing unit struct
847                iSize   number of bits to be extracted (0..N)
848                iByteNo ofsset to the byte containing the bits to extract
849                        (0..sizeof(treebody))
850                iBitNo  ofsset to the first bit to be extracted (0..7)
851    Returns :   the value requested (if size==0 --> 0 is returned)
852 */
853 /*
854 static picoos_uint32 orig_kdtGetShiftVal(register kdt_subobj_t *this,
855                                     const picoos_int16 iSize,
856                                     picoos_uint32 *iByteNo,
857                                     picoos_int8 *iBitNo) {
858     picoos_uint32 iVal;
859     picoos_int16 i;
860 
861     iVal = 0;
862     for (i = iSize-1; i >= 0; i--) {
863         if ( (this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) {
864             iVal |= ( (1) << i );
865         }
866         (*iBitNo)--;
867         if (*iBitNo < 0) {
868             *iBitNo = 7;
869             (*iByteNo)++;
870         }
871     }
872     return iVal;
873 }
874 */
875 /* refactor */
kdtGetShiftVal(register kdt_subobj_t * this,const picoos_int16 iSize,picoos_uint32 * iByteNo,picoos_int8 * iBitNo)876 static picoos_uint32 kdtGetShiftVal(register kdt_subobj_t *this,
877         const picoos_int16 iSize, picoos_uint32 *iByteNo, picoos_int8 *iBitNo)
878 {
879     picoos_uint32 v, b, iVal;
880     picoos_int16 i, j, len;
881     picoos_uint8 val;
882 
883     if (iSize < 4) {
884         iVal = 0;
885         for (i = iSize - 1; i >= 0; i--) {
886             /* no check that *iByteNo is within valid treebody range */
887             if ((this->treebody[*iByteNo] & ((1) << (*iBitNo))) > 0) {
888                 iVal |= ((1) << i);
889             }
890             (*iBitNo)--;
891             if (*iBitNo < 0) {
892                 *iBitNo = 7;
893                 (*iByteNo)++;
894             }
895         }
896         return iVal;
897     }
898 
899     b = *iByteNo;
900     j = *iBitNo;
901     len = iSize;
902     *iBitNo = j - iSize;
903     v = 0;
904     while (*iBitNo < 0) {
905         *iBitNo += 8;
906         (*iByteNo)++;
907     }
908 
909     val = this->treebody[b++];
910     if (j < 7) {
911         switch (j) {
912             case 0:
913                 val &= 0x01;
914                 break;
915             case 1:
916                 val &= 0x03;
917                 break;
918             case 2:
919                 val &= 0x07;
920                 break;
921             case 3:
922                 val &= 0x0f;
923                 break;
924             case 4:
925                 val &= 0x1f;
926                 break;
927             case 5:
928                 val &= 0x3f;
929                 break;
930             case 6:
931                 val &= 0x7f;
932                 break;
933         }
934     }
935     len -= j + 1;
936     if (len < 0) {
937         val >>= -len;
938     }
939     v = val;
940     while (len > 0) {
941         if (len >= 8) {
942             j = 8;
943         } else {
944             j = len;
945         }
946         v <<= j;
947         val = this->treebody[b++];
948         if (j < 8) {
949             switch (j) {
950                 case 1:
951                     val &= 0x80;
952                     val >>= 7;
953                     break;
954                 case 2:
955                     val &= 0xc0;
956                     val >>= 6;
957                     break;
958                 case 3:
959                     val &= 0xe0;
960                     val >>= 5;
961                     break;
962                 case 4:
963                     val &= 0xf0;
964                     val >>= 4;
965                     break;
966                 case 5:
967                     val &= 0xf8;
968                     val >>= 3;
969                     break;
970                 case 6:
971                     val &= 0xfc;
972                     val >>= 2;
973                     break;
974                 case 7:
975                     val &= 0xfe;
976                     val >>= 1;
977                     break;
978             }
979         }
980         v |= val;
981         len -= j;
982     }
983     return v;
984 }
985 
986 
987 /* Name    :   kdtAskTree
988    Function:   Tree Traversal routine
989    Input   :   iByteNo ofsset to the first byte containing the bits
990                to extract (0..sizeof(treebody))
991                iBitNo  ofsset to the first bit to be extracted (0..7)
992    Returns :   >0    continue, no solution yet found
993                =0    solution found
994                <0    error, no solution found
995    Notes   :
996 */
kdtAskTree(register kdt_subobj_t * this,picoos_uint16 * invec,const kdt_nratt_t invecmax,picoos_uint32 * iByteNo,picoos_int8 * iBitNo)997 static picoos_int8 kdtAskTree(register kdt_subobj_t *this,
998                               picoos_uint16 *invec,
999                               const kdt_nratt_t invecmax,
1000                               picoos_uint32 *iByteNo,
1001                               picoos_int8 *iBitNo) {
1002     picoos_uint32 iNodeType;
1003     picoos_uint8 iQuestion;
1004     picoos_int32 iVal;
1005     picoos_int32 iForks;
1006     picoos_int32 iID;
1007 
1008     picoos_int32 iCut, iSubsetType, iBitPos, iBitCount, iPos, iJump, iDecision;
1009     picoos_int32 i;
1010     picoos_char iIsDecide;
1011 
1012     PICODBG_TRACE(("start"));
1013 
1014     /* get node type, value should be in kdt_nodetype_t range */
1015     iNodeType = kdtGetShiftVal(this, PICOKDT_NODETYPE_NRBITS, iByteNo, iBitNo);
1016     PICODBG_TRACE(("iNodeType: %d", iNodeType));
1017 
1018     /* get attribute to be used in question, check if in range, and get val */
1019     /* check of vfields argument done in initialize */
1020     iQuestion = kdtGetShiftVal(this, this->vfields[eQuestion], iByteNo, iBitNo);
1021     if ((iQuestion < this->nrattributes) && (iQuestion < invecmax)) {
1022         iVal = invec[iQuestion];
1023     } else {
1024         this->dset = FALSE;
1025         PICODBG_TRACE(("invalid question"));
1026         return -1;    /* iQuestion invalid */
1027     }
1028     iForks = 0;
1029     iID = -1;
1030     PICODBG_TRACE(("iQuestion: %d", iQuestion));
1031 
1032     switch (iNodeType) {
1033         case eNBinary: {
1034             iForks = 2;
1035             iID = iVal;
1036             break;
1037         }
1038         case eNContinuous: {
1039             iForks = 2;
1040             iID = 1;
1041             iCut = kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eCut),
1042                                   iByteNo, iBitNo); /*read the threshold*/
1043             if (iVal <= iCut) {
1044                 iID = 0;
1045             }
1046             break;
1047         }
1048         case eNDiscrete: {
1049             iForks =
1050                 kdtGetShiftVal(this,
1051                                kdtGetQFieldsVal(this, iQuestion, eForkCount),
1052                                iByteNo, iBitNo);
1053 
1054             for (i = 0; i < iForks-1; i++) {
1055                 iSubsetType =
1056                     kdtGetShiftVal(this, PICOKDT_SUBSETTYPE_NRBITS,
1057                                    iByteNo, iBitNo);
1058 
1059                 switch (iSubsetType) {
1060                     case eOneValue: {
1061                         if (iID > -1) {
1062                             kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo),
1063                                      iByteNo, iBitNo);
1064                             break;
1065                         }
1066                         iBitPos =
1067                             kdtGetShiftVal(this,
1068                                            kdtGetQFieldsVal(this, iQuestion,
1069                                                             eBitNo),
1070                                            iByteNo, iBitNo);
1071                         if (iVal == iBitPos) {
1072                             iID = i;
1073                         }
1074                         break;
1075                     }
1076                     case eTwoValues: {
1077                         if (iID > -1) {
1078                             kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) +
1079                                       kdtGetQFieldsVal(this, iQuestion, eBitCount)),
1080                                      iByteNo, iBitNo);
1081                             break;
1082                         }
1083 
1084                         iBitPos =
1085                             kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1086                                                                   eBitNo),
1087                                            iByteNo, iBitNo);
1088                         iBitCount =
1089                             kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1090                                                                   eBitCount),
1091                                            iByteNo, iBitNo);
1092                         if ((iVal == iBitPos) || (iVal == iBitCount)) {
1093                             iID = i;
1094                         }
1095                         break;
1096                     }
1097                     case eWithoutBitMask: {
1098                         if (iID > -1) {
1099                             kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) +
1100                                       kdtGetQFieldsVal(this, iQuestion, eBitCount)),
1101                                      iByteNo, iBitNo);
1102                             break;
1103                         }
1104 
1105                         iBitPos =
1106                             kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1107                                                                   eBitNo),
1108                                            iByteNo, iBitNo);
1109                         iBitCount =
1110                             kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1111                                                                   eBitCount),
1112                                            iByteNo, iBitNo);
1113                         if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) {
1114                             iID = i;
1115                         }
1116                         break;
1117                     }
1118                     case eBitMask: {
1119                         iBitPos = 0;
1120                         if (iID > -1) {
1121                             kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo),
1122                                      iByteNo, iBitNo);
1123                         } else {
1124                             iBitPos =
1125                                 kdtGetShiftVal(this,
1126                                                kdtGetQFieldsVal(this, iQuestion,
1127                                                                 eBitNo),
1128                                                iByteNo, iBitNo);
1129                         }
1130 
1131                         iBitCount =
1132                             kdtGetShiftVal(this,
1133                                            kdtGetQFieldsVal(this, iQuestion,
1134                                                             eBitCount),
1135                                            iByteNo, iBitNo);
1136                         if (iID > -1) {
1137                             kdt_jump(iBitCount, iByteNo, iBitNo);
1138                             break;
1139                         }
1140 
1141                         if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) {
1142                             iPos = iVal - iBitPos;
1143                             kdt_jump((iVal - iBitPos), iByteNo, iBitNo);
1144                          /* if (kdtIsVal(this, *iByteNo, *iBitNo))*/
1145                             if ((this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) {
1146                                 iID = i;
1147                             }
1148                             kdt_jump((iBitCount - (iVal-iBitPos)), iByteNo, iBitNo);
1149                         } else {
1150                             kdt_jump(iBitCount, iByteNo, iBitNo);
1151                         }
1152                         break;
1153                     }/*end case eBitMask*/
1154                 }/*end switch (iSubsetType)*/
1155             }/*end for ( i = 0; i < iForks-1; i++ ) */
1156 
1157             /*default tree branch*/
1158             if (-1 == iID) {
1159                 iID = iForks-1;
1160             }
1161             break;
1162         }/*end case eNDiscrete*/
1163     }/*end switch (iNodeType)*/
1164 
1165     for (i = 0; i < iForks; i++) {
1166         iIsDecide = kdtGetShiftVal(this, PICOKDT_ISDECIDE_NRBITS, iByteNo, iBitNo);
1167 
1168         PICODBG_TRACE(("doing forks: %d", i));
1169 
1170         if (!iIsDecide) {
1171             if (iID == i) {
1172                 iJump =
1173                     kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eJump),
1174                                    iByteNo, iBitNo);
1175                 kdt_jump(iJump, iByteNo, iBitNo);
1176                 this->dset = FALSE;
1177                 return 1;    /* to be continued, no solution yet found */
1178             } else {
1179                 kdt_jump(kdtGetQFieldsVal(this, iQuestion, eJump),
1180                          iByteNo, iBitNo);
1181             }
1182         } else {
1183             if (iID == i) {
1184                 /* check of vfields argument done in initialize */
1185                 iDecision = kdtGetShiftVal(this, this->vfields[eDecide],
1186                                            iByteNo, iBitNo);
1187                 this->dclass = iDecision;
1188                 this->dset = TRUE;
1189                 return 0;    /* solution found */
1190             } else {
1191                 /* check of vfields argument done in initialize */
1192                 kdt_jump(this->vfields[eDecide], iByteNo, iBitNo);
1193             }
1194         }/*end if (!iIsDecide)*/
1195     }/*end for (i = 0; i < iForks; i++ )*/
1196 
1197     this->dset = FALSE;
1198     PICODBG_TRACE(("problem determining class"));
1199     return -1; /* solution not found, problem determining a class */
1200 }
1201 
1202 
1203 
1204 /* ************************************************************/
1205 /* decision tree support functions, mappings */
1206 /* ************************************************************/
1207 
1208 
1209 /* size==1 -> MapInByte, size==2 -> MapInWord,
1210    size determined from table type contained in kb.
1211    if the inmaptable is empty, outval = inval */
1212 
kdtMapInFixed(const kdt_subobj_t * dt,const picoos_uint8 imtnr,const picoos_uint16 inval,picoos_uint16 * outval,picoos_uint16 * outfallbackval)1213 static picoos_uint8 kdtMapInFixed(const kdt_subobj_t *dt,
1214                                   const picoos_uint8 imtnr,
1215                                   const picoos_uint16 inval,
1216                                   picoos_uint16 *outval,
1217                                   picoos_uint16 *outfallbackval) {
1218     picoos_uint8 size;
1219     picoos_uint32 pos;
1220     picoos_uint16 lentable;
1221     picoos_uint16 posbound;
1222     picoos_uint16 i;
1223 
1224     *outval = 0;
1225     *outfallbackval = 0;
1226 
1227     size = 0;
1228     pos = 0;
1229 
1230     /* check what can be checked */
1231     if (imtnr >= dt->inpmaptable[pos++]) {   /* outside tablenr range? */
1232         PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d",
1233                        dt->inpmaptable[pos-1], imtnr));
1234         return FALSE;
1235     }
1236 
1237     /* go forward to the needed tablenr */
1238     if (imtnr > 0) {
1239         pos = dt->beg_offset[imtnr];
1240     }
1241 
1242     /* get length */
1243     lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1244         dt->inpmaptable[pos];
1245     posbound = pos + lentable;
1246     pos += 2;
1247 
1248     /* check type of table and set size */
1249     if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_EMPTY) {
1250         /* empty table no mapping needed */
1251         PICODBG_TRACE(("empty table: %d", imtnr));
1252         *outval = inval;
1253         return TRUE;
1254     } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_BYTE) {
1255         size = 1;
1256     } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_WORD) {
1257         size = 2;
1258     } else {
1259         /* wrong table type */
1260         PICODBG_ERROR(("wrong table type %d", dt->inpmaptable[pos]));
1261         return FALSE;
1262     }
1263     pos++;
1264 
1265     /* set fallback value in case of failed mapping, and set upper bound pos */
1266     *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1267         dt->inpmaptable[pos];
1268     pos += 2;
1269 
1270     /* size must be 1 or 2 here, keep 'redundant' so save time */
1271     if (size == 1) {
1272         for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1273             if (inval == dt->inpmaptable[pos]) {
1274                 *outval = i;
1275                 PICODBG_TRACE(("s1 %d in %d -> out %d", imtnr, inval, *outval));
1276                 return TRUE;
1277             }
1278             pos++;
1279         }
1280     } else if (size == 2) {
1281         posbound--;
1282         for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1283             if (inval == (((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1284                           dt->inpmaptable[pos])) {
1285                 *outval = i;
1286                 PICODBG_TRACE(("s2 %d in %d -> out %d", imtnr, inval, *outval));
1287                 return TRUE;
1288             }
1289             pos += 2;
1290         }
1291     } else {
1292         /* impossible size */
1293         PICODBG_ERROR(("wrong size %d", size));
1294         return FALSE;
1295     }
1296 
1297     PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval));
1298     return FALSE;
1299 }
1300 
1301 
kdtMapInGraph(const kdt_subobj_t * dt,const picoos_uint8 imtnr,const picoos_uint8 * inval,const picoos_uint8 invalmaxlen,picoos_uint16 * outval,picoos_uint16 * outfallbackval)1302 static picoos_uint8 kdtMapInGraph(const kdt_subobj_t *dt,
1303                                   const picoos_uint8 imtnr,
1304                                   const picoos_uint8 *inval,
1305                                   const picoos_uint8 invalmaxlen,
1306                                   picoos_uint16 *outval,
1307                                   picoos_uint16 *outfallbackval) {
1308     picoos_uint8 ilen;
1309     picoos_uint8 tlen;
1310     picoos_uint8 cont;
1311     picoos_uint32 pos;
1312     picoos_uint16 lentable;
1313     picoos_uint16 posbound;
1314     picoos_uint16 i;
1315     picoos_uint8 j;
1316 
1317     *outfallbackval = 0;
1318 
1319     pos = 0;
1320     /* check what can be checked */
1321     if ((imtnr >= dt->inpmaptable[pos++]) ||     /* outside tablenr range? */
1322         (invalmaxlen == 0) ||                    /* too short? */
1323         ((ilen = picobase_det_utf8_length(inval[0])) == 0) ||   /* invalid? */
1324         (ilen > invalmaxlen)) {                  /* not accessible? */
1325         PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d, invalmaxlen: %d, "
1326                        "ilen: %d",
1327                        dt->inpmaptable[pos-1], imtnr, invalmaxlen, ilen));
1328         return FALSE;
1329     }
1330 
1331     /* go forward to the needed tablenr */
1332     for (i = 0; i < imtnr; i++) {
1333         lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1334             dt->inpmaptable[pos];
1335         pos += lentable;
1336     }
1337 
1338     /* get length and check type of inpmaptable */
1339     lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1340         dt->inpmaptable[pos];
1341     posbound = pos + lentable;
1342     pos += 2;
1343 
1344 #if defined(PICO_DEBUG)
1345     if (1) {
1346         int id;
1347         PICODBG_TRACE(("imtnr %d", imtnr));
1348         for (id = pos-2; id < posbound; id++) {
1349             PICODBG_TRACE(("imtbyte pos %d, %c %d", id - (pos-2),
1350                            dt->inpmaptable[id], dt->inpmaptable[id]));
1351         }
1352     }
1353 #endif
1354 
1355     /* check type of table */
1356     if (dt->inpmaptable[pos] != PICOKDT_MTTYPE_GRAPH) {
1357         /* empty table does not make sense for graph */
1358         /* wrong table type */
1359         PICODBG_ERROR(("wrong table type"));
1360         return FALSE;
1361     }
1362     pos++;
1363 
1364     /* set fallback value in case of failed mapping, and set upper bound pos */
1365     *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1366         dt->inpmaptable[pos];
1367     pos += 2;
1368 
1369     /* sequential search */
1370     for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1371         tlen = picobase_det_utf8_length(dt->inpmaptable[pos]);
1372         if ((pos + tlen) > posbound) {
1373             PICODBG_ERROR(("trying outside imt, posb: %d, pos: %d, tlen: %d",
1374                            posbound, pos, tlen));
1375             return FALSE;
1376         }
1377         if (ilen == tlen) {
1378             cont = TRUE;
1379             for (j = 0; cont && (j < ilen); j++) {
1380                 if (dt->inpmaptable[pos + j] != inval[j]) {
1381                     cont = FALSE;
1382                 }
1383             }
1384             if (cont && (j == ilen)) {    /* match found */
1385                 *outval = i;
1386                 PICODBG_TRACE(("found mapval, posb %d, pos %d, i %d, tlen %d",
1387                                posbound, pos, i, tlen));
1388                 return TRUE;
1389             }
1390         }
1391         pos += tlen;
1392     }
1393     PICODBG_DEBUG(("outside imt %d, posb/pos/i: %d/%d/%d, fallback: %d",
1394                    imtnr, posbound, pos, i, *outfallbackval));
1395     return FALSE;
1396 }
1397 
1398 
1399 /* size==1 -> MapOutByte,    size==2 -> MapOutWord */
kdtMapOutFixed(const kdt_subobj_t * dt,const picoos_uint16 inval,picoos_uint16 * outval)1400 static picoos_uint8 kdtMapOutFixed(const kdt_subobj_t *dt,
1401                                    const picoos_uint16 inval,
1402                                    picoos_uint16 *outval) {
1403     picoos_uint8 size;
1404     picoos_uint16 nr;
1405 
1406     /* no check of lentable vs. nr in initialize done */
1407 
1408     size = 0;
1409 
1410     /* type */
1411     nr = dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE];
1412 
1413     /* check type of table and set size */
1414     if (nr == PICOKDT_MTTYPE_EMPTY) {
1415         /* empty table no mapping needed */
1416         PICODBG_TRACE(("empty table"));
1417         *outval = inval;
1418         return TRUE;
1419     } else if (nr == PICOKDT_MTTYPE_BYTE) {
1420         size = 1;
1421     } else if (nr == PICOKDT_MTTYPE_WORD) {
1422         size = 2;
1423     } else {
1424         /* wrong table type */
1425         PICODBG_ERROR(("wrong table type %d", nr));
1426         return FALSE;
1427     }
1428 
1429     /* number of mapvalues */
1430     nr = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START +
1431                                           PICOKDT_MTPOS_NUMBER + 1])) << 8
1432         | dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_NUMBER];
1433 
1434     if (inval < nr) {
1435         if (size == 1) {
1436             *outval = dt->outmaptable[PICOKDT_MTPOS_START +
1437                                       PICOKDT_MTPOS_MAPSTART + (size * inval)];
1438         } else {
1439             *outval = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START +
1440                           PICOKDT_MTPOS_MAPSTART + (size * inval) + 1])) << 8
1441                                      | dt->outmaptable[PICOKDT_MTPOS_START +
1442                           PICOKDT_MTPOS_MAPSTART + (size * inval)];
1443         }
1444         return TRUE;
1445     } else {
1446         *outval = 0;
1447         return FALSE;
1448     }
1449 }
1450 
1451 
1452 /* size==1 -> ReverseMapOutByte,    size==2 -> ReverseMapOutWord */
1453 /* outmaptable also used to map from decoded tree output domain to
1454    direct tree output domain */
kdtReverseMapOutFixed(const kdt_subobj_t * dt,const picoos_uint16 inval,picoos_uint16 * outval,picoos_uint16 * outfallbackval)1455 static picoos_uint8 kdtReverseMapOutFixed(const kdt_subobj_t *dt,
1456                                           const picoos_uint16 inval,
1457                                           picoos_uint16 *outval,
1458                                           picoos_uint16 *outfallbackval) {
1459     picoos_uint8 size;
1460     picoos_uint32 pos;
1461     picoos_uint16 lentable;
1462     picoos_uint16 posbound;
1463     picoos_uint16 i;
1464 
1465     /* no check of lentable vs. nr in initialize done */
1466 
1467     size = 0;
1468     pos = 0;
1469     *outval = 0;
1470     *outfallbackval = 0;
1471 
1472     if (dt->outmaptable == NULL) {
1473         /* empty table no mapping needed */
1474         PICODBG_TRACE(("empty table"));
1475         *outval = inval;
1476         return TRUE;
1477     }
1478 
1479     /* check what can be checked */
1480     if (dt->outmaptable[pos++] != 1) {   /* only one omt possible */
1481         PICODBG_ERROR(("check failed: nrtab: %d", dt->outmaptable[pos-1]));
1482         return FALSE;
1483     }
1484 
1485     /* get length */
1486     lentable = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1487         dt->outmaptable[pos];
1488     posbound = pos + lentable;
1489     pos += 2;
1490 
1491     /* check type of table and set size */
1492     /* if (dt->outmaptable[pos] == PICOKDT_MTTYPE_EMPTY), in
1493        ...Initialize the omt is set to NULL if not existing, checked
1494        above */
1495 
1496     if (dt->outmaptable[pos] == PICOKDT_MTTYPE_BYTE) {
1497         size = 1;
1498     } else if (dt->outmaptable[pos] == PICOKDT_MTTYPE_WORD) {
1499         size = 2;
1500     } else {
1501         /* wrong table type */
1502         PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos]));
1503         return FALSE;
1504     }
1505     pos++;
1506 
1507     /* set fallback value in case of failed mapping, and set upper bound pos */
1508     *outfallbackval = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1509         dt->outmaptable[pos];
1510     pos += 2;
1511 
1512     /* size must be 1 or 2 here, keep 'redundant' so save time */
1513     if (size == 1) {
1514         for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1515             if (inval == dt->outmaptable[pos]) {
1516                 *outval = i;
1517                 PICODBG_TRACE(("s1 inval %d -> outval %d", inval, *outval));
1518                 return TRUE;
1519             }
1520             pos++;
1521         }
1522     } else if (size == 2) {
1523         posbound--;
1524         for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1525             if (inval == (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1526                           dt->outmaptable[pos])) {
1527                 *outval = i;
1528                 PICODBG_TRACE(("s2 inval %d -> outval %d", inval, *outval));
1529                 return TRUE;
1530             }
1531             pos += 2;
1532         }
1533     } else {
1534         /* impossible size */
1535         PICODBG_ERROR(("wrong size %d", size));
1536         return FALSE;
1537     }
1538 
1539     PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval));
1540     return FALSE;
1541 }
1542 
1543 
picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this,const picoos_uint16 inval,picoos_uint16 * outval,picoos_uint16 * outfallbackval)1544 picoos_uint8 picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this,
1545                                           const picoos_uint16 inval,
1546                                           picoos_uint16 *outval,
1547                                           picoos_uint16 *outfallbackval) {
1548 
1549     kdtposd_subobj_t * dtposd = (kdtposd_subobj_t *)this;
1550     kdt_subobj_t * dt = &(dtposd->dt);
1551     return kdtReverseMapOutFixed(dt,inval, outval, outfallbackval);
1552 }
1553 
1554 /* not yet impl. size==1 -> MapOutByteToVar,
1555    fix:  size==2 -> MapOutWordToVar */
kdtMapOutVar(const kdt_subobj_t * dt,const picoos_uint16 inval,picoos_uint8 * nr,picoos_uint16 * outval,const picoos_uint16 outvalmaxlen)1556 static picoos_uint8 kdtMapOutVar(const kdt_subobj_t *dt,
1557                                  const picoos_uint16 inval,
1558                                  picoos_uint8 *nr,
1559                                  picoos_uint16 *outval,
1560                                  const picoos_uint16 outvalmaxlen) {
1561     picoos_uint16 pos;
1562     picoos_uint16 off2ind;
1563     picoos_uint16 lentable;
1564     picoos_uint16 nrinbytes;
1565     picoos_uint8 size;
1566     picoos_uint16 offset1;
1567     picoos_uint16 i;
1568 
1569     if (dt->outmaptable == NULL) {
1570         /* empty table not possible */
1571         PICODBG_ERROR(("no table found"));
1572         return FALSE;
1573     }
1574 
1575     /* nr of tables == 1 already checked in *Initialize, no need here, go
1576        directly to position 1 */
1577     pos = 1;
1578 
1579     /* get length of table */
1580     lentable = (((picoos_uint16)(dt->outmaptable[pos + 1])) << 8 |
1581                 dt->outmaptable[pos]);
1582     pos += 2;
1583 
1584     /* check table type */
1585     if (dt->outmaptable[pos] != PICOKDT_MTTYPE_BYTETOVAR) {
1586         /* wrong table type */
1587         PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos]));
1588         return FALSE;
1589     }
1590     size = 2;
1591     pos++;
1592 
1593     /* get nr of ele in maptable (= nr of possible invals) */
1594     nrinbytes = (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1595                  dt->outmaptable[pos]);
1596     pos += 2;
1597 
1598     /* check what's checkable */
1599     if (nrinbytes == 0) {
1600         PICODBG_ERROR(("table with length zero"));
1601         return FALSE;
1602     } else if (inval >= nrinbytes) {
1603         PICODBG_ERROR(("inval %d outside valid range %d", inval, nrinbytes));
1604         return FALSE;
1605     }
1606 
1607     PICODBG_TRACE(("inval %d, lentable %d, nrinbytes %d, pos %d", inval,
1608                    lentable, nrinbytes, pos));
1609 
1610     /* set off2ind to the position of the start of offset2-val */
1611     /* offset2 points to start of next ele */
1612     off2ind = pos + (size*inval);
1613 
1614     /* get number of output values, offset2 - offset1 */
1615     if (inval == 0) {
1616         offset1 = 0;
1617     } else {
1618         offset1 = (((picoos_uint16)(dt->outmaptable[off2ind - 1])) << 8 |
1619                    dt->outmaptable[off2ind - 2]);
1620     }
1621     *nr = (((picoos_uint16)(dt->outmaptable[off2ind + 1])) << 8 |
1622            dt->outmaptable[off2ind]) - offset1;
1623 
1624     PICODBG_TRACE(("offset1 %d, nr %d, pos %d", offset1, *nr, pos));
1625 
1626     /* set pos to position of 1st value being mapped to */
1627     pos += (size * nrinbytes) + offset1;
1628 
1629     if ((pos + *nr - 1) > lentable) {
1630         /* outside table, should not happen */
1631         PICODBG_ERROR(("problem with table index, pos %d, nr %d, len %d",
1632                        pos, *nr, lentable));
1633         return FALSE;
1634     }
1635     if (*nr > outvalmaxlen) {
1636         /* not enough space in outval */
1637         PICODBG_ERROR(("overflow in outval, %d > %d", *nr, outvalmaxlen));
1638         return FALSE;
1639     }
1640 
1641     /* finally, copy outmap result to outval */
1642     for (i = 0; i < *nr; i++) {
1643         outval[i] = dt->outmaptable[pos++];
1644     }
1645     return TRUE;
1646 }
1647 
1648 
1649 
1650 /* ************************************************************/
1651 /* decision tree POS prediction (PosP) functions */
1652 /* ************************************************************/
1653 
1654 /* number of prefix and suffix graphemes used to construct the input vector */
1655 #define KDT_POSP_NRGRAPHPREFATT   4
1656 #define KDT_POSP_NRGRAPHSUFFATT   6
1657 #define KDT_POSP_NRGRAPHATT      10
1658 
1659 /* positions of specgraph and nrgraphs attributes */
1660 #define KDT_POSP_SPECGRAPHATTPOS 10
1661 #define KDT_POSP_NRGRAPHSATTPOS  11
1662 
1663 
1664 /* construct PosP input vector
1665 
1666    PosP invec: 12 elements
1667 
1668    prefix        0-3  prefix graphemes (encoded using tree inpmaptable 0-3)
1669    suffix        4-9  suffix graphemes (encoded using tree inpmaptable 4-9)
1670    isspecchar    10   is a special grapheme (e.g. hyphen) inside the word (0/1)?
1671    nr-utf-graphs 11   number of graphemes (ie. UTF8 chars)
1672 
1673    if there are less than 10 graphemes, each grapheme is used only
1674    once, with the suffix having higher priority, ie.  elements 0-9 are
1675    filled as follows:
1676 
1677     #graph
1678     1        0 0 0 0  0 0 0 0 0 1
1679     2        0 0 0 0  0 0 0 0 1 2
1680     3        0 0 0 0  0 0 0 1 2 3
1681     4        0 0 0 0  0 0 1 2 3 4
1682     5        0 0 0 0  0 1 2 3 4 5
1683     6        0 0 0 0  1 2 3 4 5 6
1684     7        1 0 0 0  2 3 4 5 6 7
1685     8        1 2 0 0  3 4 5 6 7 8
1686     9        1 2 3 0  4 5 6 7 8 9
1687     10       1 2 3 4  5 6 7 8 9 10
1688     11       1 2 3 4  6 7 8 9 10 11
1689     ...
1690 
1691     1-6: Fill chbuf
1692     7-10: front to invec 1st part, remove front, add rear
1693     >10: remove front, add rear
1694     no more graph ->
1695     while chbuflen>0:
1696       add rear to the last empty slot in 2nd part of invec, remove rear
1697 */
1698 
1699 
picokdt_dtPosPconstructInVec(const picokdt_DtPosP this,const picoos_uint8 * graph,const picoos_uint16 graphlen,const picoos_uint8 specgraphflag)1700 picoos_uint8 picokdt_dtPosPconstructInVec(const picokdt_DtPosP this,
1701                                           const picoos_uint8 *graph,
1702                                           const picoos_uint16 graphlen,
1703                                           const picoos_uint8 specgraphflag) {
1704     kdtposp_subobj_t *dtposp;
1705 
1706     /* utf8 circular char buffer, used as restricted input deque */
1707     /* 2nd part of graph invec has KDT_POSP_NRGRAPHSUFFATT elements, */
1708     /* max of UTF8_MAXLEN bytes per utf8 char */
1709     picoos_uint8 chbuf[KDT_POSP_NRGRAPHSUFFATT][PICOBASE_UTF8_MAXLEN];
1710     picoos_uint8 chbrear;   /* next free pos */
1711     picoos_uint8 chbfront;  /* next read pos */
1712     picoos_uint8 chblen;    /* empty=0; full=KDT_POSP_NRGRAPHSUFFATT */
1713 
1714     picoos_uint16 poscg;    /* position of current graph (= utf8 char) */
1715     picoos_uint16 lencg = 0;    /* length of current grapheme */
1716     picoos_uint16 nrutfg;   /* number of utf graphemes */
1717     picoos_uint8 invecpos;  /* next element to add in invec */
1718     picoos_uint16 fallback; /* fallback value for failed graph encodings */
1719     picoos_uint8 i;
1720 
1721     dtposp = (kdtposp_subobj_t *)this;
1722     chbrear = 0;
1723     chbfront = 0;
1724     chblen = 0;
1725     poscg = 0;
1726     nrutfg = 0;
1727     invecpos = 0;
1728 
1729     PICODBG_DEBUG(("graphlen %d", graphlen));
1730 
1731     /* not needed, since all elements are set
1732     for (i = 0; i < PICOKDT_NRATT_POSP; i++) {
1733         dtposp->invec[i] = '\x63';
1734     }
1735     */
1736 
1737     dtposp->inveclen = 0;
1738 
1739     while ((poscg < graphlen) &&
1740            ((lencg = picobase_det_utf8_length(graph[poscg])) > 0)) {
1741         if (chblen >= KDT_POSP_NRGRAPHSUFFATT) {      /* chbuf full */
1742             if (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* prefix not full */
1743                 /* att-encode front utf graph and add in invec */
1744                 if (!kdtMapInGraph(&(dtposp->dt), invecpos,
1745                                    chbuf[chbfront], PICOBASE_UTF8_MAXLEN,
1746                                    &(dtposp->invec[invecpos]),
1747                                    &fallback)) {
1748                     if (fallback) {
1749                         dtposp->invec[invecpos] = fallback;
1750                     } else {
1751                         return FALSE;
1752                     }
1753                 }
1754                 invecpos++;
1755             }
1756             /* remove front utf graph */
1757             chbfront++;
1758             chbfront %= KDT_POSP_NRGRAPHSUFFATT;
1759             chblen--;
1760         }
1761         /* add current utf graph to chbuf */
1762         for (i=0; i<lencg; i++) {
1763             chbuf[chbrear][i] = graph[poscg++];
1764         }
1765         if (i < PICOBASE_UTF8_MAXLEN) {
1766             chbuf[chbrear][i] = '\0';
1767         }
1768         chbrear++;
1769         chbrear %= KDT_POSP_NRGRAPHSUFFATT;
1770         chblen++;
1771         /* increase utf graph count */
1772         nrutfg++;
1773     }
1774 
1775     if ((lencg == 0) || (chblen == 0)) {
1776         return FALSE;
1777     } else if (chblen > 0) {
1778 
1779         while (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* fill up prefix */
1780             if (!kdtMapInGraph(&(dtposp->dt), invecpos,
1781                                PICOKDT_OUTSIDEGRAPH_DEFSTR,
1782                                PICOKDT_OUTSIDEGRAPH_DEFLEN,
1783                                &(dtposp->invec[invecpos]), &fallback)) {
1784                 if (fallback) {
1785                     dtposp->invec[invecpos] = fallback;
1786                 } else {
1787                     return FALSE;
1788                 }
1789             }
1790             invecpos++;
1791         }
1792 
1793         for (i = (KDT_POSP_NRGRAPHATT - 1);
1794              i >= KDT_POSP_NRGRAPHPREFATT; i--) {
1795             if (chblen > 0) {
1796                 if (chbrear == 0) {
1797                     chbrear = KDT_POSP_NRGRAPHSUFFATT - 1;
1798                 } else {
1799                     chbrear--;
1800                 }
1801                 if (!kdtMapInGraph(&(dtposp->dt), i, chbuf[chbrear],
1802                                    PICOBASE_UTF8_MAXLEN,
1803                                    &(dtposp->invec[i]), &fallback)) {
1804                     if (fallback) {
1805                         dtposp->invec[i] = fallback;
1806                     } else {
1807                         return FALSE;
1808                     }
1809                 }
1810                 chblen--;
1811             } else {
1812                 if (!kdtMapInGraph(&(dtposp->dt), i,
1813                                    PICOKDT_OUTSIDEGRAPH_DEFSTR,
1814                                    PICOKDT_OUTSIDEGRAPH_DEFLEN,
1815                                    &(dtposp->invec[i]), &fallback)) {
1816                     if (fallback) {
1817                         dtposp->invec[i] = fallback;
1818                     } else {
1819                         return FALSE;
1820                     }
1821                 }
1822             }
1823         }
1824 
1825         /* set isSpecChar attribute, reuse var i */
1826         i = (specgraphflag ? 1 : 0);
1827         if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_SPECGRAPHATTPOS, i,
1828                            &(dtposp->invec[KDT_POSP_SPECGRAPHATTPOS]),
1829                            &fallback)) {
1830             if (fallback) {
1831                 dtposp->invec[KDT_POSP_SPECGRAPHATTPOS] = fallback;
1832             } else {
1833                 return FALSE;
1834             }
1835         }
1836 
1837         /* set nrGraphs attribute */
1838         if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_NRGRAPHSATTPOS, nrutfg,
1839                            &(dtposp->invec[KDT_POSP_NRGRAPHSATTPOS]),
1840                            &fallback)) {
1841             if (fallback) {
1842                 dtposp->invec[KDT_POSP_NRGRAPHSATTPOS] = fallback;
1843             } else {
1844                 return FALSE;
1845             }
1846         }
1847         PICODBG_DEBUG(("posp-invec: [%d,%d,%d,%d|%d,%d,%d,%d,%d,%d|%d|%d]",
1848                        dtposp->invec[0], dtposp->invec[1], dtposp->invec[2],
1849                        dtposp->invec[3], dtposp->invec[4], dtposp->invec[5],
1850                        dtposp->invec[6], dtposp->invec[7], dtposp->invec[8],
1851                        dtposp->invec[9], dtposp->invec[10],
1852                        dtposp->invec[11], dtposp->invec[12]));
1853         dtposp->inveclen = PICOKDT_NRINPMT_POSP;
1854         return TRUE;
1855     }
1856 
1857     return FALSE;
1858 }
1859 
1860 
picokdt_dtPosPclassify(const picokdt_DtPosP this)1861 picoos_uint8 picokdt_dtPosPclassify(const picokdt_DtPosP this) {
1862     picoos_uint32 iByteNo;
1863     picoos_int8 iBitNo;
1864     picoos_int8 rv;
1865     kdtposp_subobj_t *dtposp;
1866     kdt_subobj_t *dt;
1867 
1868     dtposp = (kdtposp_subobj_t *)this;
1869     dt = &(dtposp->dt);
1870     iByteNo = 0;
1871     iBitNo = 7;
1872     while ((rv = kdtAskTree(dt, dtposp->invec, PICOKDT_NRATT_POSP,
1873                             &iByteNo, &iBitNo)) > 0) {
1874         PICODBG_TRACE(("asking tree"));
1875     }
1876     PICODBG_DEBUG(("done: %d", dt->dclass));
1877     return ((rv == 0) && dt->dset);
1878 }
1879 
1880 
picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this,picokdt_classify_result_t * dtres)1881 picoos_uint8 picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this,
1882                                              picokdt_classify_result_t *dtres) {
1883     kdtposp_subobj_t *dtposp;
1884     picoos_uint16 val;
1885 
1886     dtposp = (kdtposp_subobj_t *)this;
1887 
1888     if (dtposp->dt.dset &&
1889         kdtMapOutFixed(&(dtposp->dt), dtposp->dt.dclass, &val)) {
1890         dtres->set = TRUE;
1891         dtres->class = val;
1892         return TRUE;
1893     } else {
1894         dtres->set = FALSE;
1895         return FALSE;
1896     }
1897 }
1898 
1899 
1900 
1901 /* ************************************************************/
1902 /* decision tree POS disambiguation (PosD) functions */
1903 /* ************************************************************/
1904 
1905 
picokdt_dtPosDconstructInVec(const picokdt_DtPosD this,const picoos_uint16 * input)1906 picoos_uint8 picokdt_dtPosDconstructInVec(const picokdt_DtPosD this,
1907                                           const picoos_uint16 * input) {
1908     kdtposd_subobj_t *dtposd;
1909     picoos_uint8 i;
1910     picoos_uint16 fallback = 0;
1911 
1912     dtposd = (kdtposd_subobj_t *)this;
1913     dtposd->inveclen = 0;
1914 
1915     PICODBG_DEBUG(("in: [%d,%d,%d|%d|%d,%d,%d]",
1916                    input[0], input[1], input[2],
1917                    input[3], input[4], input[5],
1918                    input[6]));
1919     for (i = 0; i < PICOKDT_NRATT_POSD; i++) {
1920 
1921         /* do the imt mapping for all inval */
1922         if (!kdtMapInFixed(&(dtposd->dt), i, input[i],
1923                            &(dtposd->invec[i]), &fallback)) {
1924             if (fallback) {
1925                 PICODBG_DEBUG(("*** using fallback for input mapping: %i -> %i", input[i], fallback));
1926                 dtposd->invec[i] = fallback;
1927             } else {
1928                 PICODBG_ERROR(("problem doing input mapping"));
1929                 return FALSE;
1930             }
1931         }
1932     }
1933 
1934     PICODBG_DEBUG(("out: [%d,%d,%d|%d|%d,%d,%d]",
1935                    dtposd->invec[0], dtposd->invec[1], dtposd->invec[2],
1936                    dtposd->invec[3], dtposd->invec[4], dtposd->invec[5],
1937                    dtposd->invec[6]));
1938     dtposd->inveclen = PICOKDT_NRINPMT_POSD;
1939     return TRUE;
1940 }
1941 
1942 
picokdt_dtPosDclassify(const picokdt_DtPosD this,picoos_uint16 * treeout)1943 picoos_uint8 picokdt_dtPosDclassify(const picokdt_DtPosD this,
1944                                     picoos_uint16 *treeout) {
1945     picoos_uint32 iByteNo;
1946     picoos_int8 iBitNo;
1947     picoos_int8 rv;
1948     kdtposd_subobj_t *dtposd;
1949     kdt_subobj_t *dt;
1950 
1951     dtposd = (kdtposd_subobj_t *)this;
1952     dt = &(dtposd->dt);
1953     iByteNo = 0;
1954     iBitNo = 7;
1955     while ((rv = kdtAskTree(dt, dtposd->invec, PICOKDT_NRATT_POSD,
1956                             &iByteNo, &iBitNo)) > 0) {
1957         PICODBG_TRACE(("asking tree"));
1958     }
1959     PICODBG_DEBUG(("done: %d", dt->dclass));
1960     if ((rv == 0) && dt->dset) {
1961         *treeout = dt->dclass;
1962         return TRUE;
1963     } else {
1964         return FALSE;
1965     }
1966 }
1967 
1968 
1969 /* decompose the tree output and return the class in dtres
1970    dtres:         POS classification result
1971    returns:       TRUE if okay, FALSE otherwise
1972 */
picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this,picokdt_classify_result_t * dtres)1973 picoos_uint8 picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this,
1974                                              picokdt_classify_result_t *dtres) {
1975     kdtposd_subobj_t *dtposd;
1976     picoos_uint16 val;
1977 
1978     dtposd = (kdtposd_subobj_t *)this;
1979 
1980     if (dtposd->dt.dset &&
1981         kdtMapOutFixed(&(dtposd->dt), dtposd->dt.dclass, &val)) {
1982         dtres->set = TRUE;
1983         dtres->class = val;
1984         return TRUE;
1985     } else {
1986         dtres->set = FALSE;
1987         return FALSE;
1988     }
1989 }
1990 
1991 
1992 
1993 /* ************************************************************/
1994 /* decision tree grapheme-to-phoneme (G2P) functions */
1995 /* ************************************************************/
1996 
1997 
1998 /* get the nr'th (starting at 0) utf char in utfgraph */
kdtGetUTF8char(const picoos_uint8 * utfgraph,const picoos_uint16 graphlen,const picoos_uint16 nr,picoos_uint8 * utf8char)1999 static picoos_uint8 kdtGetUTF8char(const picoos_uint8 *utfgraph,
2000                                    const picoos_uint16 graphlen,
2001                                    const picoos_uint16 nr,
2002                                    picoos_uint8 *utf8char) {
2003     picoos_uint16 i;
2004     picoos_uint32 pos;
2005 
2006     pos = 0;
2007     for (i = 0; i < nr; i++) {
2008         if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &pos)) {
2009             return FALSE;
2010         }
2011     }
2012     return picobase_get_next_utf8char(utfgraph, graphlen, &pos, utf8char);
2013 }
2014 
2015 /* determine the utfchar count (starting at 1) of the utfchar starting at pos */
kdtGetUTF8Nr(const picoos_uint8 * utfgraph,const picoos_uint16 graphlen,const picoos_uint16 pos)2016 static picoos_uint16 kdtGetUTF8Nr(const picoos_uint8 *utfgraph,
2017                                   const picoos_uint16 graphlen,
2018                                   const picoos_uint16 pos) {
2019     picoos_uint32 postmp;
2020     picoos_uint16 count;
2021 
2022     count = 0;
2023     postmp = 0;
2024     while ((postmp <= pos) && (count < graphlen)) {
2025         if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &postmp)) {
2026             PICODBG_ERROR(("invalid utf8 string, count: %d, pos: %d, post: %d",
2027                            count, pos, postmp));
2028             return count + 1;
2029         }
2030         count++;
2031     }
2032     return count;
2033 }
2034 
2035 
picokdt_dtG2PconstructInVec(const picokdt_DtG2P this,const picoos_uint8 * graph,const picoos_uint16 graphlen,const picoos_uint8 count,const picoos_uint8 pos,const picoos_uint8 nrvow,const picoos_uint8 ordvow,picoos_uint8 * primstressflag,const picoos_uint16 phonech1,const picoos_uint16 phonech2,const picoos_uint16 phonech3)2036 picoos_uint8 picokdt_dtG2PconstructInVec(const picokdt_DtG2P this,
2037                                          const picoos_uint8 *graph,
2038                                          const picoos_uint16 graphlen,
2039                                          const picoos_uint8 count,
2040                                          const picoos_uint8 pos,
2041                                          const picoos_uint8 nrvow,
2042                                          const picoos_uint8 ordvow,
2043                                          picoos_uint8 *primstressflag,
2044                                          const picoos_uint16 phonech1,
2045                                          const picoos_uint16 phonech2,
2046                                          const picoos_uint16 phonech3) {
2047     kdtg2p_subobj_t *dtg2p;
2048     picoos_uint16 fallback = 0;
2049     picoos_uint8 iAttr;
2050     picoos_uint8 utf8char[PICOBASE_UTF8_MAXLEN + 1];
2051     picoos_uint16 inval;
2052     picoos_int16 cinv;
2053     picoos_uint8 retval;
2054     picoos_int32 utfgraphlen;
2055     picoos_uint16 utfcount;
2056 
2057     dtg2p = (kdtg2p_subobj_t *)this;
2058     retval = TRUE;
2059     inval = 0;
2060 
2061     PICODBG_TRACE(("in:  [%d,%d,%d|%d,%d|%d|%d,%d,%d]", graphlen, count, pos,
2062                    nrvow, ordvow, *primstressflag, phonech1, phonech2,
2063                    phonech3));
2064 
2065     dtg2p->inveclen = 0;
2066 
2067     /* many speed-ups possible */
2068 
2069     /* graph attributes */
2070     /*   count   >     =         <=     count
2071        iAttr lowbound eow     upbound  delta
2072          0     4      4       graphlen    5
2073          1     3      3       graphlen    4
2074          2     2      2       graphlen    3
2075          3     1      1       graphlen    2
2076          4     0      -       graphlen    1
2077 
2078          5     0  graphlen    graphlen-1  0
2079          6     0  graphlen-1  graphlen-2 -1
2080          7     0  graphlen-2  graphlen-3 -2
2081          8     0  graphlen-3  graphlen-4 -3
2082      */
2083 
2084     /* graph attributes left (context -4/-3/-2/-1) and current, MapInGraph */
2085 
2086     utfgraphlen = picobase_utf8_length(graph, graphlen);
2087     if (utfgraphlen <= 0) {
2088         utfgraphlen = 0;
2089     }
2090     utfcount = kdtGetUTF8Nr(graph, graphlen, count);
2091 
2092     cinv = 4;
2093     for (iAttr = 0; iAttr < 5; iAttr++) {
2094         if ((utfcount > cinv) && (utfcount <= utfgraphlen)) {
2095 
2096 /*            utf8char[0] = graph[count - cinv - 1];*/
2097             if (!kdtGetUTF8char(graph, graphlen, utfcount-cinv-1,
2098                                 utf8char)) {
2099                 PICODBG_WARN(("problem getting UTF char %d", utfcount-cinv-1));
2100                 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2101                 utf8char[1] = '\0';
2102             }
2103         } else {
2104             if ((utfcount == cinv) && (iAttr != 4)) {
2105                 utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH;
2106             } else {
2107                 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2108             }
2109             utf8char[1] = '\0';
2110         }
2111 
2112         if (!kdtMapInGraph(&(dtg2p->dt), iAttr,
2113                            utf8char, PICOBASE_UTF8_MAXLEN,
2114                            &(dtg2p->invec[iAttr]),
2115                            &fallback)) {
2116             if (fallback) {
2117                 dtg2p->invec[iAttr] = fallback;
2118             } else {
2119                 PICODBG_WARN(("setting attribute %d to zero", iAttr));
2120                 dtg2p->invec[iAttr] = 0;
2121                 retval = FALSE;
2122             }
2123         }
2124         PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0]));
2125         cinv--;
2126     }
2127 
2128     /* graph attributes right (context 1/2/3/4), MapInGraph */
2129     cinv = utfgraphlen;
2130     for (iAttr = 5; iAttr < 9; iAttr++) {
2131         if ((utfcount > 0) && (utfcount <= (cinv - 1))) {
2132 /*            utf8char[0] = graph[count + graphlen - cinv];*/
2133             if (!kdtGetUTF8char(graph, graphlen, utfcount+utfgraphlen-cinv,
2134                                 utf8char)) {
2135                 PICODBG_WARN(("problem getting UTF char %d",
2136                               utfcount+utfgraphlen-cinv-1));
2137                 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2138                 utf8char[1] = '\0';
2139             }
2140         } else {
2141             if (utfcount == cinv) {
2142                 utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH;
2143                 utf8char[1] = '\0';
2144             } else {
2145                 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2146                 utf8char[1] = '\0';
2147             }
2148         }
2149         if (!kdtMapInGraph(&(dtg2p->dt), iAttr,
2150                            utf8char, PICOBASE_UTF8_MAXLEN,
2151                            &(dtg2p->invec[iAttr]),
2152                            &fallback)) {
2153             if (fallback) {
2154                 dtg2p->invec[iAttr] = fallback;
2155             } else {
2156                 PICODBG_WARN(("setting attribute %d to zero", iAttr));
2157                 dtg2p->invec[iAttr] = 0;
2158                 retval = FALSE;
2159             }
2160         }
2161         PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0]));
2162         cinv--;
2163     }
2164 
2165     /* other attributes, MapInFixed */
2166     for (iAttr = 9; iAttr < PICOKDT_NRATT_G2P; iAttr++) {
2167         switch (iAttr) {
2168             case 9:     /* word POS, Fix1 */
2169                 inval = pos;
2170                 break;
2171             case 10:    /* nr of vowel-like graphs in word, if vowel, Fix2  */
2172                 inval = nrvow;
2173                 break;
2174             case 11:    /* order of current vowel-like graph in word, Fix2 */
2175                 inval = ordvow;
2176                 break;
2177             case 12:    /* primary stress mark, Fix2 */
2178                 if (*primstressflag == 1) {
2179                     /*already set previously*/
2180                     inval = 1;
2181                 } else {
2182                     inval = 0;
2183                 }
2184                 break;
2185             case 13:    /* phone chunk right context +1, Hist */
2186                 inval = phonech1;
2187                 break;
2188             case 14:    /* phone chunk right context +2, Hist */
2189                 inval = phonech2;
2190                 break;
2191             case 15:    /* phone chunk right context +3, Hist */
2192                 inval = phonech3;
2193                 break;
2194         }
2195 
2196         PICODBG_TRACE(("invec %d %d", iAttr, inval));
2197 
2198         if (!kdtMapInFixed(&(dtg2p->dt), iAttr, inval,
2199                            &(dtg2p->invec[iAttr]), &fallback)) {
2200             if (fallback) {
2201                 dtg2p->invec[iAttr] = fallback;
2202             } else {
2203                 PICODBG_WARN(("setting attribute %d to zero", iAttr));
2204                 dtg2p->invec[iAttr] = 0;
2205                 retval = FALSE;
2206             }
2207         }
2208     }
2209 
2210     PICODBG_TRACE(("out: [%d,%d%,%d,%d|%d|%d,%d,%d,%d|%d,%d,%d,%d|"
2211                    "%d,%d,%d]", dtg2p->invec[0], dtg2p->invec[1],
2212                    dtg2p->invec[2], dtg2p->invec[3], dtg2p->invec[4],
2213                    dtg2p->invec[5], dtg2p->invec[6], dtg2p->invec[7],
2214                    dtg2p->invec[8], dtg2p->invec[9], dtg2p->invec[10],
2215                    dtg2p->invec[11], dtg2p->invec[12], dtg2p->invec[13],
2216                    dtg2p->invec[14], dtg2p->invec[15]));
2217 
2218     dtg2p->inveclen = PICOKDT_NRINPMT_G2P;
2219     return retval;
2220 }
2221 
2222 
2223 
2224 
picokdt_dtG2Pclassify(const picokdt_DtG2P this,picoos_uint16 * treeout)2225 picoos_uint8 picokdt_dtG2Pclassify(const picokdt_DtG2P this,
2226                                    picoos_uint16 *treeout) {
2227     picoos_uint32 iByteNo;
2228     picoos_int8 iBitNo;
2229     picoos_int8 rv;
2230     kdtg2p_subobj_t *dtg2p;
2231     kdt_subobj_t *dt;
2232 
2233     dtg2p = (kdtg2p_subobj_t *)this;
2234     dt = &(dtg2p->dt);
2235     iByteNo = 0;
2236     iBitNo = 7;
2237     while ((rv = kdtAskTree(dt, dtg2p->invec, PICOKDT_NRATT_G2P,
2238                             &iByteNo, &iBitNo)) > 0) {
2239         PICODBG_TRACE(("asking tree"));
2240     }
2241     PICODBG_TRACE(("done: %d", dt->dclass));
2242     if ((rv == 0) && dt->dset) {
2243         *treeout = dt->dclass;
2244         return TRUE;
2245     } else {
2246         return FALSE;
2247     }
2248 }
2249 
2250 
2251 
picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this,picokdt_classify_vecresult_t * dtvres)2252 picoos_uint8 picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this,
2253                                   picokdt_classify_vecresult_t *dtvres) {
2254     kdtg2p_subobj_t *dtg2p;
2255 
2256     dtg2p = (kdtg2p_subobj_t *)this;
2257 
2258     if (dtg2p->dt.dset &&
2259         kdtMapOutVar(&(dtg2p->dt), dtg2p->dt.dclass, &(dtvres->nr),
2260                      dtvres->classvec, PICOKDT_MAXSIZE_OUTVEC)) {
2261         return TRUE;
2262     } else {
2263         dtvres->nr = 0;
2264         return FALSE;
2265     }
2266     return TRUE;
2267 }
2268 
2269 
2270 
2271 /* ************************************************************/
2272 /* decision tree phrasing (PHR) functions */
2273 /* ************************************************************/
2274 
picokdt_dtPHRconstructInVec(const picokdt_DtPHR this,const picoos_uint8 pre2,const picoos_uint8 pre1,const picoos_uint8 src,const picoos_uint8 fol1,const picoos_uint8 fol2,const picoos_uint16 nrwordspre,const picoos_uint16 nrwordsfol,const picoos_uint16 nrsyllsfol)2275 picoos_uint8 picokdt_dtPHRconstructInVec(const picokdt_DtPHR this,
2276                                          const picoos_uint8 pre2,
2277                                          const picoos_uint8 pre1,
2278                                          const picoos_uint8 src,
2279                                          const picoos_uint8 fol1,
2280                                          const picoos_uint8 fol2,
2281                                          const picoos_uint16 nrwordspre,
2282                                          const picoos_uint16 nrwordsfol,
2283                                          const picoos_uint16 nrsyllsfol) {
2284     kdtphr_subobj_t *dtphr;
2285     picoos_uint8 i;
2286     picoos_uint16 inval = 0;
2287     picoos_uint16 fallback = 0;
2288 
2289     dtphr = (kdtphr_subobj_t *)this;
2290     PICODBG_DEBUG(("in:  [%d,%d|%d|%d,%d|%d,%d,%d]",
2291                    pre2, pre1, src, fol1, fol2,
2292                    nrwordspre, nrwordsfol, nrsyllsfol));
2293     dtphr->inveclen = 0;
2294 
2295     for (i = 0; i < PICOKDT_NRATT_PHR; i++) {
2296         switch (i) {
2297             case 0: inval = pre2; break;
2298             case 1: inval = pre1; break;
2299             case 2: inval = src; break;
2300             case 3: inval = fol1;  break;
2301             case 4: inval = fol2; break;
2302             case 5: inval = nrwordspre; break;
2303             case 6: inval = nrwordsfol; break;
2304             case 7: inval = nrsyllsfol; break;
2305             default:
2306                 PICODBG_ERROR(("size mismatch"));
2307                 return FALSE;
2308                 break;
2309         }
2310 
2311         /* do the imt mapping for all inval */
2312         if (!kdtMapInFixed(&(dtphr->dt), i, inval,
2313                            &(dtphr->invec[i]), &fallback)) {
2314             if (fallback) {
2315                 dtphr->invec[i] = fallback;
2316             } else {
2317                 PICODBG_ERROR(("problem doing input mapping"));
2318                 return FALSE;
2319             }
2320         }
2321     }
2322 
2323     PICODBG_DEBUG(("out: [%d,%d|%d|%d,%d|%d,%d,%d]",
2324                    dtphr->invec[0], dtphr->invec[1], dtphr->invec[2],
2325                    dtphr->invec[3], dtphr->invec[4], dtphr->invec[5],
2326                    dtphr->invec[6], dtphr->invec[7]));
2327     dtphr->inveclen = PICOKDT_NRINPMT_PHR;
2328     return TRUE;
2329 }
2330 
2331 
picokdt_dtPHRclassify(const picokdt_DtPHR this)2332 picoos_uint8 picokdt_dtPHRclassify(const picokdt_DtPHR this) {
2333     picoos_uint32 iByteNo;
2334     picoos_int8 iBitNo;
2335     picoos_int8 rv;
2336     kdtphr_subobj_t *dtphr;
2337     kdt_subobj_t *dt;
2338 
2339     dtphr = (kdtphr_subobj_t *)this;
2340     dt = &(dtphr->dt);
2341     iByteNo = 0;
2342     iBitNo = 7;
2343     while ((rv = kdtAskTree(dt, dtphr->invec, PICOKDT_NRATT_PHR,
2344                             &iByteNo, &iBitNo)) > 0) {
2345         PICODBG_TRACE(("asking tree"));
2346     }
2347     PICODBG_DEBUG(("done: %d", dt->dclass));
2348     return ((rv == 0) && dt->dset);
2349 }
2350 
2351 
picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this,picokdt_classify_result_t * dtres)2352 picoos_uint8 picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this,
2353                                             picokdt_classify_result_t *dtres) {
2354     kdtphr_subobj_t *dtphr;
2355     picoos_uint16 val;
2356 
2357     dtphr = (kdtphr_subobj_t *)this;
2358 
2359     if (dtphr->dt.dset &&
2360         kdtMapOutFixed(&(dtphr->dt), dtphr->dt.dclass, &val)) {
2361         dtres->set = TRUE;
2362         dtres->class = val;
2363         return TRUE;
2364     } else {
2365         dtres->set = FALSE;
2366         return FALSE;
2367     }
2368 }
2369 
2370 
2371 
2372 /* ************************************************************/
2373 /* decision tree phono-acoustical model (PAM) functions */
2374 /* ************************************************************/
2375 
picokdt_dtPAMconstructInVec(const picokdt_DtPAM this,const picoos_uint8 * vec,const picoos_uint8 veclen)2376 picoos_uint8 picokdt_dtPAMconstructInVec(const picokdt_DtPAM this,
2377                                          const picoos_uint8 *vec,
2378                                          const picoos_uint8 veclen) {
2379     kdtpam_subobj_t *dtpam;
2380     picoos_uint8 i;
2381     picoos_uint16 fallback = 0;
2382 
2383     dtpam = (kdtpam_subobj_t *)this;
2384 
2385     PICODBG_TRACE(("in0:  %d %d %d %d %d %d %d %d %d %d",
2386                    vec[0], vec[1], vec[2], vec[3], vec[4],
2387                    vec[5], vec[6], vec[7], vec[8], vec[9]));
2388     PICODBG_TRACE(("in1:  %d %d %d %d %d %d %d %d %d %d",
2389                    vec[10], vec[11], vec[12], vec[13], vec[14],
2390                    vec[15], vec[16], vec[17], vec[18], vec[19]));
2391     PICODBG_TRACE(("in2:  %d %d %d %d %d %d %d %d %d %d",
2392                    vec[20], vec[21], vec[22], vec[23], vec[24],
2393                    vec[25], vec[26], vec[27], vec[28], vec[29]));
2394     PICODBG_TRACE(("in3:  %d %d %d %d %d %d %d %d %d %d",
2395                    vec[30], vec[31], vec[32], vec[33], vec[34],
2396                    vec[35], vec[36], vec[37], vec[38], vec[39]));
2397     PICODBG_TRACE(("in4:  %d %d %d %d %d %d %d %d %d %d",
2398                    vec[40], vec[41], vec[42], vec[43], vec[44],
2399                    vec[45], vec[46], vec[47], vec[48], vec[49]));
2400     PICODBG_TRACE(("in5:  %d %d %d %d %d %d %d %d %d %d",
2401                    vec[50], vec[51], vec[52], vec[53], vec[54],
2402                    vec[55], vec[56], vec[57], vec[58], vec[59]));
2403 
2404     dtpam->inveclen = 0;
2405 
2406     /* check veclen */
2407     if (veclen != PICOKDT_NRINPMT_PAM) {
2408         PICODBG_ERROR(("wrong number of input vector elements"));
2409         return FALSE;
2410     }
2411 
2412     for (i = 0; i < PICOKDT_NRATT_PAM; i++) {
2413 
2414         /* do the imt mapping for all vec eles */
2415         if (!kdtMapInFixed(&(dtpam->dt), i, vec[i],
2416                            &(dtpam->invec[i]), &fallback)) {
2417             if (fallback) {
2418                 dtpam->invec[i] = fallback;
2419             } else {
2420                 PICODBG_ERROR(("problem doing input mapping, %d %d", i,vec[i]));
2421                 return FALSE;
2422             }
2423         }
2424     }
2425 
2426     PICODBG_TRACE(("in0:  %d %d %d %d %d %d %d %d %d %d",
2427                    dtpam->invec[0], dtpam->invec[1], dtpam->invec[2],
2428                    dtpam->invec[3], dtpam->invec[4], dtpam->invec[5],
2429                    dtpam->invec[6], dtpam->invec[7], dtpam->invec[8],
2430                    dtpam->invec[9]));
2431     PICODBG_TRACE(("in1:  %d %d %d %d %d %d %d %d %d %d",
2432                    dtpam->invec[10], dtpam->invec[11], dtpam->invec[12],
2433                    dtpam->invec[13], dtpam->invec[14], dtpam->invec[15],
2434                    dtpam->invec[16], dtpam->invec[17], dtpam->invec[18],
2435                    dtpam->invec[19]));
2436     PICODBG_TRACE(("in2:  %d %d %d %d %d %d %d %d %d %d",
2437                    dtpam->invec[20], dtpam->invec[21], dtpam->invec[22],
2438                    dtpam->invec[23], dtpam->invec[24], dtpam->invec[25],
2439                    dtpam->invec[26], dtpam->invec[27], dtpam->invec[28],
2440                    dtpam->invec[29]));
2441     PICODBG_TRACE(("in3:  %d %d %d %d %d %d %d %d %d %d",
2442                    dtpam->invec[30], dtpam->invec[31], dtpam->invec[32],
2443                    dtpam->invec[33], dtpam->invec[34], dtpam->invec[35],
2444                    dtpam->invec[36], dtpam->invec[37], dtpam->invec[38],
2445                    dtpam->invec[39]));
2446     PICODBG_TRACE(("in4:  %d %d %d %d %d %d %d %d %d %d",
2447                    dtpam->invec[40], dtpam->invec[41], dtpam->invec[42],
2448                    dtpam->invec[43], dtpam->invec[44], dtpam->invec[45],
2449                    dtpam->invec[46], dtpam->invec[47], dtpam->invec[48],
2450                    dtpam->invec[49]));
2451     PICODBG_TRACE(("in5:  %d %d %d %d %d %d %d %d %d %d",
2452                    dtpam->invec[50], dtpam->invec[51], dtpam->invec[52],
2453                    dtpam->invec[53], dtpam->invec[54], dtpam->invec[55],
2454                    dtpam->invec[56], dtpam->invec[57], dtpam->invec[58],
2455                    dtpam->invec[59]));
2456 
2457     dtpam->inveclen = PICOKDT_NRINPMT_PAM;
2458     return TRUE;
2459 }
2460 
2461 
picokdt_dtPAMclassify(const picokdt_DtPAM this)2462 picoos_uint8 picokdt_dtPAMclassify(const picokdt_DtPAM this) {
2463     picoos_uint32 iByteNo;
2464     picoos_int8 iBitNo;
2465     picoos_int8 rv;
2466     kdtpam_subobj_t *dtpam;
2467     kdt_subobj_t *dt;
2468 
2469     dtpam = (kdtpam_subobj_t *)this;
2470     dt = &(dtpam->dt);
2471     iByteNo = 0;
2472     iBitNo = 7;
2473     while ((rv = kdtAskTree(dt, dtpam->invec, PICOKDT_NRATT_PAM,
2474                             &iByteNo, &iBitNo)) > 0) {
2475         PICODBG_TRACE(("asking tree"));
2476     }
2477     PICODBG_DEBUG(("done: %d", dt->dclass));
2478     return ((rv == 0) && dt->dset);
2479 }
2480 
2481 
picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this,picokdt_classify_result_t * dtres)2482 picoos_uint8 picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this,
2483                                             picokdt_classify_result_t *dtres) {
2484     kdtpam_subobj_t *dtpam;
2485     picoos_uint16 val;
2486 
2487     dtpam = (kdtpam_subobj_t *)this;
2488 
2489     if (dtpam->dt.dset &&
2490         kdtMapOutFixed(&(dtpam->dt), dtpam->dt.dclass, &val)) {
2491         dtres->set = TRUE;
2492         dtres->class = val;
2493         return TRUE;
2494     } else {
2495         dtres->set = FALSE;
2496         return FALSE;
2497     }
2498 }
2499 
2500 
2501 
2502 /* ************************************************************/
2503 /* decision tree accentuation (ACC) functions */
2504 /* ************************************************************/
2505 
picokdt_dtACCconstructInVec(const picokdt_DtACC this,const picoos_uint8 pre2,const picoos_uint8 pre1,const picoos_uint8 src,const picoos_uint8 fol1,const picoos_uint8 fol2,const picoos_uint16 hist1,const picoos_uint16 hist2,const picoos_uint16 nrwordspre,const picoos_uint16 nrsyllspre,const picoos_uint16 nrwordsfol,const picoos_uint16 nrsyllsfol,const picoos_uint16 footwordsfol,const picoos_uint16 footsyllsfol)2506 picoos_uint8 picokdt_dtACCconstructInVec(const picokdt_DtACC this,
2507                                          const picoos_uint8 pre2,
2508                                          const picoos_uint8 pre1,
2509                                          const picoos_uint8 src,
2510                                          const picoos_uint8 fol1,
2511                                          const picoos_uint8 fol2,
2512                                          const picoos_uint16 hist1,
2513                                          const picoos_uint16 hist2,
2514                                          const picoos_uint16 nrwordspre,
2515                                          const picoos_uint16 nrsyllspre,
2516                                          const picoos_uint16 nrwordsfol,
2517                                          const picoos_uint16 nrsyllsfol,
2518                                          const picoos_uint16 footwordsfol,
2519                                          const picoos_uint16 footsyllsfol) {
2520     kdtacc_subobj_t *dtacc;
2521     picoos_uint8 i;
2522     picoos_uint16 inval = 0;
2523     picoos_uint16 fallback = 0;
2524 
2525     dtacc = (kdtacc_subobj_t *)this;
2526     PICODBG_DEBUG(("in:  [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]",
2527                    pre2, pre1, src, fol1, fol2, hist1, hist2,
2528                    nrwordspre, nrsyllspre, nrwordsfol, nrsyllsfol,
2529                    footwordsfol, footsyllsfol));
2530     dtacc->inveclen = 0;
2531 
2532     for (i = 0; i < PICOKDT_NRATT_ACC; i++) {
2533         switch (i) {
2534             case 0: inval = pre2; break;
2535             case 1: inval = pre1; break;
2536             case 2: inval = src; break;
2537             case 3: inval = fol1;  break;
2538             case 4: inval = fol2; break;
2539             case 5: inval = hist1; break;
2540             case 6: inval = hist2; break;
2541             case 7: inval = nrwordspre; break;
2542             case 8: inval = nrsyllspre; break;
2543             case 9: inval = nrwordsfol; break;
2544             case 10: inval = nrsyllsfol; break;
2545             case 11: inval = footwordsfol; break;
2546             case 12: inval = footsyllsfol; break;
2547             default:
2548                 PICODBG_ERROR(("size mismatch"));
2549                 return FALSE;
2550                 break;
2551         }
2552 
2553         if (((i == 5) || (i == 6)) && (inval == PICOKDT_HISTORY_ZERO)) {
2554             /* in input to this function the HISTORY_ZERO is used to
2555                mark the no-value-available case. For sparsity reasons
2556                this was not used in the training. For
2557                no-value-available cases, instead, do reverse out
2558                mapping of ACC0 to get tree domain for ACC0  */
2559             if (!kdtReverseMapOutFixed(&(dtacc->dt), PICODATA_ACC0,
2560                                        &inval, &fallback)) {
2561                 if (fallback) {
2562                     inval = fallback;
2563                 } else {
2564                     PICODBG_ERROR(("problem doing reverse output mapping"));
2565                     return FALSE;
2566                 }
2567             }
2568         }
2569 
2570         /* do the imt mapping for all inval */
2571         if (!kdtMapInFixed(&(dtacc->dt), i, inval,
2572                            &(dtacc->invec[i]), &fallback)) {
2573             if (fallback) {
2574                 dtacc->invec[i] = fallback;
2575             } else {
2576                 PICODBG_ERROR(("problem doing input mapping"));
2577                 return FALSE;
2578             }
2579         }
2580     }
2581 
2582     PICODBG_DEBUG(("out: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]",
2583                    dtacc->invec[0], dtacc->invec[1], dtacc->invec[2],
2584                    dtacc->invec[3], dtacc->invec[4], dtacc->invec[5],
2585                    dtacc->invec[6], dtacc->invec[7], dtacc->invec[8],
2586                    dtacc->invec[9], dtacc->invec[10], dtacc->invec[11],
2587                    dtacc->invec[12]));
2588     dtacc->inveclen = PICOKDT_NRINPMT_ACC;
2589     return TRUE;
2590 }
2591 
2592 
picokdt_dtACCclassify(const picokdt_DtACC this,picoos_uint16 * treeout)2593 picoos_uint8 picokdt_dtACCclassify(const picokdt_DtACC this,
2594                                    picoos_uint16 *treeout) {
2595     picoos_uint32 iByteNo;
2596     picoos_int8 iBitNo;
2597     picoos_int8 rv;
2598     kdtacc_subobj_t *dtacc;
2599     kdt_subobj_t *dt;
2600 
2601     dtacc = (kdtacc_subobj_t *)this;
2602     dt = &(dtacc->dt);
2603     iByteNo = 0;
2604     iBitNo = 7;
2605     while ((rv = kdtAskTree(dt, dtacc->invec, PICOKDT_NRATT_ACC,
2606                             &iByteNo, &iBitNo)) > 0) {
2607         PICODBG_TRACE(("asking tree"));
2608     }
2609     PICODBG_TRACE(("done: %d", dt->dclass));
2610     if ((rv == 0) && dt->dset) {
2611         *treeout = dt->dclass;
2612         return TRUE;
2613     } else {
2614         return FALSE;
2615     }
2616 }
2617 
2618 
picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this,picokdt_classify_result_t * dtres)2619 picoos_uint8 picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this,
2620                                             picokdt_classify_result_t *dtres) {
2621     kdtacc_subobj_t *dtacc;
2622     picoos_uint16 val;
2623 
2624     dtacc = (kdtacc_subobj_t *)this;
2625 
2626     if (dtacc->dt.dset &&
2627         kdtMapOutFixed(&(dtacc->dt), dtacc->dt.dclass, &val)) {
2628         dtres->set = TRUE;
2629         dtres->class = val;
2630         return TRUE;
2631     } else {
2632         dtres->set = FALSE;
2633         return FALSE;
2634     }
2635 }
2636 
2637 #ifdef __cplusplus
2638 }
2639 #endif
2640 
2641 
2642 /* end */
2643