1 /*
2  * Copyright (C) 2009, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "LatinIME: dictionary.cpp"
18 
19 #include "suggest/core/dictionary/dictionary.h"
20 
21 #include "defines.h"
22 #include "suggest/core/dictionary/dictionary_utils.h"
23 #include "suggest/core/policy/dictionary_header_structure_policy.h"
24 #include "suggest/core/result/suggestion_results.h"
25 #include "suggest/core/session/dic_traverse_session.h"
26 #include "suggest/core/session/prev_words_info.h"
27 #include "suggest/core/suggest.h"
28 #include "suggest/core/suggest_options.h"
29 #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
30 #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
31 #include "utils/log_utils.h"
32 #include "utils/time_keeper.h"
33 
34 namespace latinime {
35 
36 const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
37 
Dictionary(JNIEnv * env,DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy)38 Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
39         dictionaryStructureWithBufferPolicy)
40         : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
41           mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
42           mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
43     logDictionaryInfo(env);
44 }
45 
getSuggestions(ProximityInfo * proximityInfo,DicTraverseSession * traverseSession,int * xcoordinates,int * ycoordinates,int * times,int * pointerIds,int * inputCodePoints,int inputSize,const PrevWordsInfo * const prevWordsInfo,const SuggestOptions * const suggestOptions,const float languageWeight,SuggestionResults * const outSuggestionResults) const46 void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
47         int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
48         int inputSize, const PrevWordsInfo *const prevWordsInfo,
49         const SuggestOptions *const suggestOptions, const float languageWeight,
50         SuggestionResults *const outSuggestionResults) const {
51     TimeKeeper::setCurrentTime();
52     traverseSession->init(this, prevWordsInfo, suggestOptions);
53     const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
54     suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
55             ycoordinates, times, pointerIds, inputCodePoints, inputSize,
56             languageWeight, outSuggestionResults);
57     if (DEBUG_DICT) {
58         outSuggestionResults->dumpSuggestions();
59     }
60 }
61 
NgramListenerForPrediction(const PrevWordsInfo * const prevWordsInfo,SuggestionResults * const suggestionResults,const DictionaryStructureWithBufferPolicy * const dictStructurePolicy)62 Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
63         const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const suggestionResults,
64         const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
65     : mPrevWordsInfo(prevWordsInfo), mSuggestionResults(suggestionResults),
66       mDictStructurePolicy(dictStructurePolicy) {}
67 
onVisitEntry(const int ngramProbability,const int targetPtNodePos)68 void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
69         const int targetPtNodePos) {
70     if (targetPtNodePos == NOT_A_DICT_POS) {
71         return;
72     }
73     if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
74             && ngramProbability == NOT_A_PROBABILITY) {
75         return;
76     }
77     int targetWordCodePoints[MAX_WORD_LENGTH];
78     int unigramProbability = 0;
79     const int codePointCount = mDictStructurePolicy->
80             getCodePointsAndProbabilityAndReturnCodePointCount(targetPtNodePos,
81                     MAX_WORD_LENGTH, targetWordCodePoints, &unigramProbability);
82     if (codePointCount <= 0) {
83         return;
84     }
85     const int probability = mDictStructurePolicy->getProbability(
86             unigramProbability, ngramProbability);
87     mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability);
88 }
89 
getPredictions(const PrevWordsInfo * const prevWordsInfo,SuggestionResults * const outSuggestionResults) const90 void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
91         SuggestionResults *const outSuggestionResults) const {
92     TimeKeeper::setCurrentTime();
93     NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults,
94             mDictionaryStructureWithBufferPolicy.get());
95     int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
96     prevWordsInfo->getPrevWordsTerminalPtNodePos(
97             mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
98             true /* tryLowerCaseSearch */);
99     mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsPtNodePos, &listener);
100 }
101 
getProbability(const int * word,int length) const102 int Dictionary::getProbability(const int *word, int length) const {
103     return getNgramProbability(nullptr /* prevWordsInfo */, word, length);
104 }
105 
getMaxProbabilityOfExactMatches(const int * word,int length) const106 int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
107     TimeKeeper::setCurrentTime();
108     return DictionaryUtils::getMaxProbabilityOfExactMatches(
109             mDictionaryStructureWithBufferPolicy.get(), word, length);
110 }
111 
getNgramProbability(const PrevWordsInfo * const prevWordsInfo,const int * word,int length) const112 int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
113         int length) const {
114     TimeKeeper::setCurrentTime();
115     int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
116             length, false /* forceLowerCaseSearch */);
117     if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
118     if (!prevWordsInfo) {
119         return getDictionaryStructurePolicy()->getProbabilityOfPtNode(
120                 nullptr /* prevWordsPtNodePos */, nextWordPos);
121     }
122     int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
123     prevWordsInfo->getPrevWordsTerminalPtNodePos(
124             mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
125             true /* tryLowerCaseSearch */);
126     return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsPtNodePos, nextWordPos);
127 }
128 
addUnigramEntry(const int * const word,const int length,const UnigramProperty * const unigramProperty)129 bool Dictionary::addUnigramEntry(const int *const word, const int length,
130         const UnigramProperty *const unigramProperty) {
131     if (unigramProperty->representsBeginningOfSentence()
132             && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
133                     ->supportsBeginningOfSentence()) {
134         AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
135         return false;
136     }
137     TimeKeeper::setCurrentTime();
138     return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
139 }
140 
removeUnigramEntry(const int * const codePoints,const int codePointCount)141 bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
142     TimeKeeper::setCurrentTime();
143     return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints, codePointCount);
144 }
145 
addNgramEntry(const PrevWordsInfo * const prevWordsInfo,const BigramProperty * const bigramProperty)146 bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
147         const BigramProperty *const bigramProperty) {
148     TimeKeeper::setCurrentTime();
149     return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
150 }
151 
removeNgramEntry(const PrevWordsInfo * const prevWordsInfo,const int * const word,const int length)152 bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
153         const int *const word, const int length) {
154     TimeKeeper::setCurrentTime();
155     return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
156 }
157 
flush(const char * const filePath)158 bool Dictionary::flush(const char *const filePath) {
159     TimeKeeper::setCurrentTime();
160     return mDictionaryStructureWithBufferPolicy->flush(filePath);
161 }
162 
flushWithGC(const char * const filePath)163 bool Dictionary::flushWithGC(const char *const filePath) {
164     TimeKeeper::setCurrentTime();
165     return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
166 }
167 
needsToRunGC(const bool mindsBlockByGC)168 bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
169     TimeKeeper::setCurrentTime();
170     return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
171 }
172 
getProperty(const char * const query,const int queryLength,char * const outResult,const int maxResultLength)173 void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
174         const int maxResultLength) {
175     TimeKeeper::setCurrentTime();
176     return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult,
177             maxResultLength);
178 }
179 
getWordProperty(const int * const codePoints,const int codePointCount)180 const WordProperty Dictionary::getWordProperty(const int *const codePoints,
181         const int codePointCount) {
182     TimeKeeper::setCurrentTime();
183     return mDictionaryStructureWithBufferPolicy->getWordProperty(
184             codePoints, codePointCount);
185 }
186 
getNextWordAndNextToken(const int token,int * const outCodePoints,int * const outCodePointCount)187 int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
188         int *const outCodePointCount) {
189     TimeKeeper::setCurrentTime();
190     return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
191             token, outCodePoints, outCodePointCount);
192 }
193 
logDictionaryInfo(JNIEnv * const env) const194 void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
195     int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
196     int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
197     int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
198     const DictionaryHeaderStructurePolicy *const headerPolicy =
199             getDictionaryStructurePolicy()->getHeaderStructurePolicy();
200     headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
201             HEADER_ATTRIBUTE_BUFFER_SIZE);
202     headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
203             HEADER_ATTRIBUTE_BUFFER_SIZE);
204     headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer,
205             HEADER_ATTRIBUTE_BUFFER_SIZE);
206 
207     char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
208     char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
209     char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
210     intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
211             dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
212     intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
213             versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
214     intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
215             dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
216 
217     LogUtils::logToJava(env,
218             "Dictionary info: dictionary = %s ; version = %s ; date = %s",
219             dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
220 }
221 
222 } // namespace latinime
223