1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picoklex.h
18  *
19  * knowledge base: lexicon
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 
29 #ifndef PICOKLEX_H_
30 #define PICOKLEX_H_
31 
32 #include "picoos.h"
33 #include "picoknow.h"
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 #if 0
39 }
40 #endif
41 
42 
43 /* ************************************************************/
44 /* function to create specialized kb, */
45 /* to be used by picorsrc only */
46 /* ************************************************************/
47 
48 pico_status_t picoklex_specializeLexKnowledgeBase(picoknow_KnowledgeBase this,
49                                                   picoos_Common common);
50 
51 
52 /* ************************************************************/
53 /* lexicon type and getLex function */
54 /* ************************************************************/
55 
56 /* lexicon type */
57 typedef struct picoklex_lex * picoklex_Lex;
58 
59 /* return kb lex for usage in PU */
60 picoklex_Lex picoklex_getLex(picoknow_KnowledgeBase this);
61 
62 
63 /* ************************************************************/
64 /* lexicon lookup result type */
65 /* ************************************************************/
66 
67 /* max nr of results */
68 #define PICOKLEX_MAX_NRRES   4
69 
70 /* nr of bytes used for pos and index, needs to fit in uint32, ie. max 4 */
71 #define PICOKLEX_POSIND_SIZE 4
72 /* nr of bytes used for index, needs to fit in uint32, ie. max 4 */
73 #define PICOKLEX_IND_SIZE    3
74 /* max len (in bytes) of ind, (PICOKLEX_MAX_NRRES * PICOKLEX_POSIND_SIZE) */
75 #define PICOKLEX_POSIND_MAXLEN 16
76 
77 
78 /* the lexicon lookup result(s) are stored in field posind, which
79    contains a sequence of
80      POS1-byte, IND1-bytes, POS2-byte, IND2-bytes, etc.
81 
82    the IND-bytes are the byte position(s) in the lexblocks part of the
83    lexicon byte stream, starting at picoklex_lex_t.lexblocks.
84 
85    for lexentries without phones only the POS (there can be only one)
86    is stored in posind, nrres equals one, and phonfound is FALSE.
87 */
88 
89 typedef struct {
90     picoos_uint8 nrres;      /* number of results, 0 of no entry found */
91     picoos_uint8 posindlen;  /* number of posind bytes */
92     picoos_uint8 phonfound;  /* phones found flag, TRUE if found */
93     picoos_uint8 posind[PICOKLEX_POSIND_MAXLEN]; /* sequence of multi-ind,
94                                                     one per result */
95 } picoklex_lexl_result_t;
96 
97 
98 /* ************************************************************/
99 /* lexicon lookup functions */
100 /* ************************************************************/
101 
102 /** lookup lex by graph; result(s) are in lexres, ie. the phones are
103    not returned directly (because they are used later and space can be
104    saved using indices first), lexres contains an index (or several)
105    to the entry for later fast lookup once the phones are needed.
106    PICOKLEX_IND_SIZE bytes are used for the index, these ind bytes are
107    saved in the WORDINDEX items. If at least one entry is found TRUE
108    is returned, FALSE otherwise */
109 picoos_uint8 picoklex_lexLookup(const picoklex_Lex this,
110                                 const picoos_uint8 *graph,
111                                 const picoos_uint16 graphlen,
112                                 picoklex_lexl_result_t *lexres);
113 
114 /** lookup lex entry by index ind; ind is a sequence of bytes with
115    length indlen (must be equal PICOKLEX_IND_SIZE) that is the content
116    of a WORDINDEX item. Returns TRUE if okay, FALSE otherwise */
117 picoos_uint8 picoklex_lexIndLookup(const picoklex_Lex this,
118                                    const picoos_uint8 *ind,
119                                    const picoos_uint8 indlen,
120                                    picoos_uint8 *pos,
121                                    picoos_uint8 **phon,
122                                    picoos_uint8 *phonlen);
123 
124 #ifdef __cplusplus
125 }
126 #endif
127 
128 
129 #endif /*PICOKLEX_H_*/
130