1 /*
2  * Copyright (C) 2008-2012  OMRON SOFTWARE Co., Ltd.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nj_lib.h"
18 #include "nj_err.h"
19 #include "nj_ext.h"
20 #include "nj_dic.h"
21 #include "njd.h"
22 
23 #define DATA_SIZE (10)
24 #define DATA_OFFSET_FHINSI          (0)
25 #define DATA_OFFSET_BHINSI          (1)
26 #define DATA_OFFSET_HINDO           (2)
27 #define DATA_OFFSET_CANDIDATE       (3)
28 #define DATA_OFFSET_CANDIDATE_LEN   (5)
29 #define DATA_OFFSET_YOMI            (6)
30 #define DATA_OFFSET_YOMI_LEN        (9)
31 
32 #define YOMINASI_DIC_FREQ_DIV 63
33 
34 #define DATA_FHINSI(x)                                                  \
35     ( (NJ_UINT16)(0x01FF &                                              \
36                   (((NJ_UINT16)*((x)+DATA_OFFSET_FHINSI  ) << 1) |      \
37                    (           *((x)+DATA_OFFSET_FHINSI+1) >> 7))) )
38 #define DATA_BHINSI(x)                                                  \
39     ( (NJ_UINT16)(0x01FF &                                              \
40                   (((NJ_UINT16)*((x)+DATA_OFFSET_BHINSI  ) << 2) |      \
41                    (           *((x)+DATA_OFFSET_BHINSI+1) >> 6))) )
42 #define DATA_HINDO(x)                                                   \
43     ((NJ_HINDO)(0x003F & ((NJ_UINT16)*((x)+DATA_OFFSET_HINDO))))
44 #define DATA_CANDIDATE(x)                                               \
45     ((NJ_UINT32)(0x000FFFFF &                                           \
46                  (((NJ_UINT32)*((x)+DATA_OFFSET_CANDIDATE)   << 12) |   \
47                   ((NJ_UINT32)*((x)+DATA_OFFSET_CANDIDATE+1) <<  4) |   \
48                   (           *((x)+DATA_OFFSET_CANDIDATE+2) >>  4))))
49 #define DATA_CANDIDATE_SIZE(x)                                          \
50     ((NJ_UINT8)((*((x)+DATA_OFFSET_CANDIDATE_LEN)   << 4) |             \
51                 (*((x)+DATA_OFFSET_CANDIDATE_LEN+1) >> 4)))
52 #define DATA_YOMI(x) \
53     ((NJ_UINT32)(0x000FFFFF &                                           \
54                  (((NJ_UINT32)*((x)+DATA_OFFSET_YOMI)   << 16) |        \
55                   ((NJ_UINT32)*((x)+DATA_OFFSET_YOMI+1) <<  8) |        \
56                   (           *((x)+DATA_OFFSET_YOMI+2)      ))))
57 #define DATA_YOMI_SIZE(x)                       \
58     ((NJ_UINT8)((*((x)+DATA_OFFSET_YOMI_LEN))))
59 
60 #define YOMI_INDX_TOP_ADDR(h) ((NJ_UINT8*)((h)+NJ_INT32_READ((h)+0x1C)))
61 #define YOMI_INDX_CNT(h) ((NJ_UINT16)(NJ_INT16_READ((h)+0x20)))
62 #define YOMI_INDX_BYTE(h) ((NJ_UINT16)(NJ_INT16_READ((h)+0x22)))
63 #define STEM_AREA_TOP_ADDR(h) ((NJ_UINT8*)((h)+NJ_INT32_READ((h)+0x24)))
64 #define STRS_AREA_TOP_ADDR(h) ((NJ_UINT8*)((h)+NJ_INT32_READ((h)+0x28)))
65 #define YOMI_AREA_TOP_ADDR(h) ((NJ_UINT8*)((h)+NJ_INT32_READ((h)+0x2C)))
66 
67 #define NO_CONV_FLG ((NJ_UINT32) 0x00080000L)
68 
69 #define HINSI_OFFSET (7)
70 
71 #define CURRENT_INFO_SET (NJ_UINT8)(0x10)
72 
73 static NJ_UINT16 search_data(NJ_SEARCH_CONDITION *condition, NJ_SEARCH_LOCATION_SET *loctset);
74 static NJ_UINT16 convert_to_yomi(NJ_DIC_HANDLE hdl, NJ_UINT8 *index, NJ_UINT16 len, NJ_CHAR *yomi, NJ_UINT16 size);
75 static NJ_UINT16 yomi_strcmp_forward(NJ_DIC_HANDLE hdl, NJ_UINT8 *data, NJ_CHAR *yomi);
76 
search_data(NJ_SEARCH_CONDITION * condition,NJ_SEARCH_LOCATION_SET * loctset)77 static NJ_UINT16 search_data(NJ_SEARCH_CONDITION *condition, NJ_SEARCH_LOCATION_SET *loctset)
78 {
79     NJ_UINT32 offset;
80     NJ_UINT8 *data;
81     NJ_UINT16 i, j;
82     NJ_UINT16 hindo;
83     NJ_UINT8 hit_flg;
84     NJ_UINT8 *tmp_hinsi = NULL;
85 
86 
87     offset = loctset->loct.current;
88     data = STEM_AREA_TOP_ADDR(loctset->loct.handle) + offset;
89 
90     if (GET_LOCATION_STATUS(loctset->loct.status) != NJ_ST_SEARCH_NO_INIT) {
91         data += DATA_SIZE;
92         offset += DATA_SIZE;
93 
94 
95         if (data >= STRS_AREA_TOP_ADDR(loctset->loct.handle)) {
96 
97             loctset->loct.status = NJ_ST_SEARCH_END;
98             return 0;
99         }
100     }
101 
102 
103     tmp_hinsi = condition->hinsi.fore;
104     condition->hinsi.fore = condition->hinsi.yominasi_fore;
105 
106     i = (STRS_AREA_TOP_ADDR(loctset->loct.handle) - data) / DATA_SIZE;
107     for (j = 0; j < i; j++) {
108 
109         if (njd_connect_test(condition, DATA_FHINSI(data), DATA_BHINSI(data))) {
110 
111             hit_flg = 0;
112 
113             if (condition->operation == NJ_CUR_OP_LINK) {
114 
115                 hit_flg = 1;
116             } else {
117 
118 
119 
120                 if (yomi_strcmp_forward(loctset->loct.handle, data, condition->yomi)) {
121 
122                     hit_flg = 1;
123                 }
124             }
125 
126             if (hit_flg) {
127 
128                 loctset->loct.current_info = CURRENT_INFO_SET;
129                 loctset->loct.current = offset;
130                 loctset->loct.status = NJ_ST_SEARCH_READY;
131                 hindo = DATA_HINDO(STEM_AREA_TOP_ADDR(loctset->loct.handle) + loctset->loct.current);
132                 loctset->cache_freq = CALCULATE_HINDO(hindo, loctset->dic_freq.base,
133                                                       loctset->dic_freq.high, YOMINASI_DIC_FREQ_DIV);
134 
135 
136                 condition->hinsi.fore = tmp_hinsi;
137                 return 1;
138             }
139         }
140 
141         data += DATA_SIZE;
142         offset += DATA_SIZE;
143     }
144 
145     loctset->loct.status = NJ_ST_SEARCH_END;
146 
147     condition->hinsi.fore = tmp_hinsi;
148     return 0;
149 }
150 
convert_to_yomi(NJ_DIC_HANDLE hdl,NJ_UINT8 * index,NJ_UINT16 len,NJ_CHAR * yomi,NJ_UINT16 size)151 static NJ_UINT16 convert_to_yomi(NJ_DIC_HANDLE hdl, NJ_UINT8 *index, NJ_UINT16 len, NJ_CHAR *yomi, NJ_UINT16 size)
152 {
153     NJ_UINT8  *wkc;
154     NJ_CHAR   *wky;
155     NJ_UINT16 i, idx, yib, ret;
156     NJ_UINT16 j, char_len;
157 
158 
159 
160     wkc = YOMI_INDX_TOP_ADDR(hdl);
161 
162 
163     yib = YOMI_INDX_BYTE(hdl);
164 
165 
166     if (NJ_CHAR_ILLEGAL_DIC_YINDEX(yib)) {
167 
168         return 0;
169     }
170 
171 
172     ret = 0;
173     wky = yomi;
174     for (i = 0; i < len; i++) {
175         idx = (NJ_UINT16)((*index - 1) * yib);
176         if (yib == 2) {
177             char_len = UTL_CHAR(wkc + idx);
178 
179             if (((ret + char_len + NJ_TERM_LEN) * sizeof(NJ_CHAR)) > size) {
180                 return (size / sizeof(NJ_CHAR));
181             }
182             for (j = 0; j < char_len; j++) {
183                 NJ_CHAR_COPY(wky, wkc + idx + j);
184                 wky++;
185                 ret++;
186             }
187         } else {
188 
189             if (((ret + 1 + NJ_TERM_LEN) * sizeof(NJ_CHAR)) > size) {
190                 return (size / sizeof(NJ_CHAR));
191             }
192             *wky++ = (NJ_CHAR)(*(wkc + idx));
193             ret++;
194         }
195         index++;
196     }
197     *wky = NJ_CHAR_NUL;
198     return ret;
199 }
200 
yomi_strcmp_forward(NJ_DIC_HANDLE hdl,NJ_UINT8 * data,NJ_CHAR * yomi)201 static NJ_UINT16 yomi_strcmp_forward(NJ_DIC_HANDLE hdl, NJ_UINT8 *data, NJ_CHAR *yomi)
202 {
203     NJ_UINT8 *area;
204     NJ_CHAR  *stroke;
205     NJ_CHAR   buf[NJ_MAX_LEN + NJ_TERM_LEN];
206     NJ_UINT16 ylen, dic_ylen, j, size;
207 
208 
209 
210     size = sizeof(buf);
211     stroke = buf;
212 
213 
214     area = YOMI_AREA_TOP_ADDR(hdl) + DATA_YOMI(data);
215 
216     if (YOMI_INDX_CNT(hdl) == 0) {
217 
218         dic_ylen = DATA_YOMI_SIZE(data) / sizeof(NJ_CHAR);
219 
220 
221         if (size < ((dic_ylen + NJ_TERM_LEN) * sizeof(NJ_CHAR))) {
222             return 0;
223         }
224         for (j = 0; j < dic_ylen; j++) {
225             NJ_CHAR_COPY(stroke, area);
226             stroke++;
227             area += sizeof(NJ_CHAR);
228         }
229         *stroke = NJ_CHAR_NUL;
230     } else {
231 
232         dic_ylen = convert_to_yomi(hdl, area, DATA_YOMI_SIZE(data), stroke, size);
233 
234 
235         if (size < ((dic_ylen + NJ_TERM_LEN) * sizeof(NJ_CHAR))) {
236             return 0;
237         }
238     }
239 
240 
241     ylen = nj_strlen(yomi);
242 
243 
244     if (dic_ylen < ylen) {
245 
246         return 0;
247     }
248 
249 
250     if (nj_strncmp(yomi, buf, ylen) == 0) {
251 
252         return 1;
253     }
254     return 0;
255 }
256 
njd_f_search_word(NJ_SEARCH_CONDITION * con,NJ_SEARCH_LOCATION_SET * loctset)257 NJ_INT16 njd_f_search_word(NJ_SEARCH_CONDITION *con, NJ_SEARCH_LOCATION_SET *loctset)
258 {
259     NJ_UINT16 ret;
260 
261     switch (con->operation) {
262     case NJ_CUR_OP_LINK:
263 
264 
265         if ((con->hinsi.yominasi_fore == NULL) ||
266             (con->hinsi.foreSize == 0)) {
267             loctset->loct.status = NJ_ST_SEARCH_END;
268             return 0;
269         }
270         break;
271     case NJ_CUR_OP_FORE:
272 
273 
274         if (NJ_CHAR_STRLEN_IS_0(con->yomi)) {
275             loctset->loct.status = NJ_ST_SEARCH_END;
276             return 0;
277         }
278 
279 
280         if ((con->hinsi.yominasi_fore == NULL) ||
281             (con->hinsi.foreSize == 0)) {
282             loctset->loct.status = NJ_ST_SEARCH_END;
283             return 0;
284         }
285         break;
286     default:
287 
288         loctset->loct.status = NJ_ST_SEARCH_END;
289         return 0;
290     }
291 
292 
293     if (con->mode != NJ_CUR_MODE_FREQ) {
294 
295         loctset->loct.status = NJ_ST_SEARCH_END;
296         return 0;
297     }
298 
299 
300     if ((GET_LOCATION_STATUS(loctset->loct.status) == NJ_ST_SEARCH_NO_INIT)
301         || (GET_LOCATION_STATUS(loctset->loct.status) == NJ_ST_SEARCH_READY)) {
302 
303         ret = search_data(con, loctset);
304         if (ret < 1) {
305 
306             loctset->loct.status = NJ_ST_SEARCH_END;
307         }
308         return ret;
309     } else {
310 
311         loctset->loct.status = NJ_ST_SEARCH_END;
312         return 0;
313     }
314 }
315 
njd_f_get_word(NJ_SEARCH_LOCATION_SET * loctset,NJ_WORD * word)316 NJ_INT16 njd_f_get_word(NJ_SEARCH_LOCATION_SET *loctset, NJ_WORD *word)
317 {
318     NJ_UINT8 *data;
319     NJ_CHAR  stroke[NJ_MAX_LEN + NJ_TERM_LEN];
320     NJ_INT16 yomilen, kouholen;
321 
322 
323 
324     if (GET_LOCATION_STATUS(loctset->loct.status) == NJ_ST_SEARCH_END) {
325         return 0;
326     }
327 
328 
329     data = STEM_AREA_TOP_ADDR(loctset->loct.handle) + loctset->loct.current;
330 
331     NJ_SET_YLEN_TO_STEM(word, 1);
332 
333 
334     word->stem.loc = loctset->loct;
335     yomilen = njd_f_get_stroke(word, stroke, sizeof(stroke));
336     if (yomilen <= 0) {
337         return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_WORD, NJ_ERR_INVALID_RESULT);
338     }
339     word->stem.info1 = yomilen;
340     word->stem.info1 |= (NJ_UINT16)(DATA_FHINSI(data) << HINSI_OFFSET);
341     word->stem.info2 = (NJ_UINT16)(DATA_BHINSI(data) << HINSI_OFFSET);
342     kouholen = (NJ_UINT16)DATA_CANDIDATE_SIZE(data)/sizeof(NJ_CHAR);
343     if (kouholen == 0) {
344 
345         kouholen = yomilen;
346     }
347     word->stem.info2 |= kouholen;
348     word->stem.hindo = CALCULATE_HINDO(DATA_HINDO(data), loctset->dic_freq.base,
349                                        loctset->dic_freq.high, YOMINASI_DIC_FREQ_DIV);
350 
351 
352     word->stem.type = 0;
353 
354     return 1;
355 }
356 
njd_f_get_stroke(NJ_WORD * word,NJ_CHAR * stroke,NJ_UINT16 size)357 NJ_INT16 njd_f_get_stroke(NJ_WORD *word, NJ_CHAR *stroke, NJ_UINT16 size) {
358     NJ_SEARCH_LOCATION *loc;
359     NJ_UINT8 *area, *data;
360     NJ_UINT16 len;
361     NJ_UINT32 j;
362 
363     if (NJ_GET_YLEN_FROM_STEM(word) == 0) {
364         return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_STROKE, NJ_ERR_INVALID_RESULT);
365     }
366 
367 
368 
369     loc = &word->stem.loc;
370     data = STEM_AREA_TOP_ADDR(loc->handle) + loc->current;
371 
372 
373     area = YOMI_AREA_TOP_ADDR(loc->handle) + DATA_YOMI(data);
374 
375     if (YOMI_INDX_CNT(loc->handle) == 0) {
376 
377         len = DATA_YOMI_SIZE(data)/sizeof(NJ_CHAR);
378 
379 
380         if (size < ((len + NJ_TERM_LEN) * sizeof(NJ_CHAR))) {
381             return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_STROKE, NJ_ERR_BUFFER_NOT_ENOUGH);
382         }
383 
384         for (j = 0; j < len; j++) {
385             NJ_CHAR_COPY(stroke, area);
386             stroke++;
387             area += sizeof(NJ_CHAR);
388         }
389         *stroke = NJ_CHAR_NUL;
390     } else {
391 
392         len = convert_to_yomi(loc->handle, area, DATA_YOMI_SIZE(data), stroke, size);
393 
394 
395         if (size < ((len + NJ_TERM_LEN) * sizeof(NJ_CHAR))) {
396             return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_STROKE, NJ_ERR_BUFFER_NOT_ENOUGH);
397         }
398     }
399     return len;
400 }
401 
njd_f_get_candidate(NJ_WORD * word,NJ_CHAR * candidate,NJ_UINT16 size)402 NJ_INT16 njd_f_get_candidate(NJ_WORD *word, NJ_CHAR *candidate, NJ_UINT16 size)
403 {
404     NJ_SEARCH_LOCATION *loc;
405     NJ_UINT8 *data, *area;
406     NJ_CHAR   work[NJ_MAX_LEN + NJ_TERM_LEN];
407     NJ_UINT16 len, j;
408 
409 
410 
411 
412     loc = &word->stem.loc;
413     data = STEM_AREA_TOP_ADDR(loc->handle) + loc->current;
414 
415 
416     len = DATA_CANDIDATE_SIZE(data)/sizeof(NJ_CHAR);
417     if (size < ((len + NJ_TERM_LEN) * sizeof(NJ_CHAR))) {
418         return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_CANDIDATE, NJ_ERR_BUFFER_NOT_ENOUGH);
419     }
420 
421 
422     if (len == 0) {
423 
424         area = YOMI_AREA_TOP_ADDR(loc->handle) + DATA_YOMI(data);
425         if (YOMI_INDX_CNT(loc->handle) == 0) {
426 
427             len = DATA_YOMI_SIZE(data)/sizeof(NJ_CHAR);
428 
429 
430             if (size < ((len + NJ_TERM_LEN) * sizeof(NJ_CHAR))) {
431                 return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_STROKE, NJ_ERR_BUFFER_NOT_ENOUGH);
432             }
433             for (j = 0; j < len; j++) {
434                 NJ_CHAR_COPY(candidate + j, area);
435                 area += sizeof(NJ_CHAR);
436             }
437             candidate[len] = NJ_CHAR_NUL;
438             return len;
439         } else {
440 
441             len = convert_to_yomi(loc->handle, area, DATA_YOMI_SIZE(data), work, size);
442 
443 
444             if (size < ((len + NJ_TERM_LEN) * sizeof(NJ_CHAR))) {
445                 return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_CANDIDATE, NJ_ERR_BUFFER_NOT_ENOUGH);
446             }
447         }
448 
449         if (DATA_CANDIDATE(data) & NO_CONV_FLG) {
450             nje_convert_hira_to_kata(work, candidate, len);
451         } else {
452             for (j = 0; j < len; j++) {
453                 candidate[j] = work[j];
454             }
455         }
456     } else {
457 
458         area = STRS_AREA_TOP_ADDR(loc->handle) + DATA_CANDIDATE(data);
459         for (j = 0; j < len; j++) {
460             NJ_CHAR_COPY(candidate + j, area);
461             area += sizeof(NJ_CHAR);
462         }
463     }
464 
465     candidate[len] = NJ_CHAR_NUL;
466     return len;
467 }
468