1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picowa.h
18  *
19  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
20  * All rights reserved.
21  *
22  * History:
23  * - 2009-04-20 -- initial version
24  *
25  */
26 
27 
28 /**
29  * @addtogroup picowa
30  * ---------------------------------------------------\n
31  * <b> Pico Word Analysis </b>\n
32  * ---------------------------------------------------\n
33 itemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content
34 in the following
35 
36 items input\n
37 ===========
38 
39 processed by wa:
40 - WORDGRAPH(NA,NA)graph
41 - OTHER(NA,NA)string
42 
43 unprocessed:
44 - all other item types are forwarded through the PU without modification:
45   - PUNC
46   - CMD
47 
48 
49 minimal input size (before processing starts)\n
50 ==================
51 
52 processing (ie. lex lookup and POS prediction) is possible with
53 - one item
54 
55 
56 items processed and output\n
57 ==========================
58 
59 processing an input WORDGRAPH results in one of the following items:
60 - WORDGRAPH(POSes,NA)graph
61    - graph not in lex, POSes determined with dtree, or
62    - graph in lex - single entry without phone (:G2P), POSes from lex
63 - WORDINDEX(POSes,NA)pos1|ind1...posN|indN
64    - graph in lex - {1,4} entries with phone, pos1...posN from lex,
65      {1,4} lexentries indices in content, POSes combined with map table
66      in klex
67 
68 processing an input OTHER results in the item being skipped (in the
69 future this can be extended to e.g. spelling)
70 
71 see picotok.h for PUNC and CMD
72 
73 - POSes %d
74   - is the superset of all single POS and POS combinations defined
75   in the lingware as unique symbol
76 - graph, len>0, utf8 graphemes, %s
77 - pos1|ind1, pos2|ind2, ..., posN|indN
78   - pos? are the single, unambiguous POS only, one byte %d
79   - ind? are the lexentry indices, three bytes %d %d %d
80 
81 
82 lexicon (system lexicon, but must also be ensured for user lexica)\n
83 =======
84 
85 - POS GRAPH PHON, all mandatory, but
86   - * PHON can be an empty string -> no pronunciation in the resulting TTS output
87   - * PHON can be :G2P -> use G2P later to add pronunciation
88 - (POS,GRAPH) is a uniq key (only one entry allowed)
89 - (GRAPH) is almost a uniq key (2-4 entries with the same GRAPH, and
90   differing POS and differing PHON possible)
91   - for one graph we can have 2-4 solutions from the lex which all
92      need to be passed on the the next PU
93   - in this case GRAPH, POS, and PHON all must be available in lex
94   - in this case for each entry only a non-ambiguous, unique POS ID
95      is possible)
96 
97 other limitations\n
98 =================
99 
100 - item size: header plus len=256 (valid for Pico in general)
101 - wa uses one item context only -> internal buffer set to 256+4
102  */
103 
104 
105 #ifndef PICOWA_H_
106 #define PICOWA_H_
107 
108 #include "picoos.h"
109 #include "picodata.h"
110 #include "picorsrc.h"
111 
112 #ifdef __cplusplus
113 extern "C" {
114 #endif
115 #if 0
116 }
117 #endif
118 
119 
120 /* maximum length of an item incl. head for input and output buffers */
121 #define PICOWA_MAXITEMSIZE 260
122 
123 
124 picodata_ProcessingUnit picowa_newWordAnaUnit(
125         picoos_MemoryManager mm,
126     picoos_Common common,
127         picodata_CharBuffer cbIn,
128         picodata_CharBuffer cbOut,
129         picorsrc_Voice voice);
130 
131 #ifdef __cplusplus
132 }
133 #endif
134 
135 #endif /*PICOWA_H_*/
136