1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_DICTIONARY_H
18 #define LATINIME_DICTIONARY_H
19 
20 #include <memory>
21 
22 #include "defines.h"
23 #include "jni.h"
24 #include "dictionary/interface/dictionary_header_structure_policy.h"
25 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
26 #include "dictionary/interface/ngram_listener.h"
27 #include "dictionary/property/historical_info.h"
28 #include "dictionary/property/word_property.h"
29 #include "suggest/core/suggest_interface.h"
30 #include "utils/int_array_view.h"
31 
32 namespace latinime {
33 
34 class DictionaryStructureWithBufferPolicy;
35 class DicTraverseSession;
36 class NgramContext;
37 class ProximityInfo;
38 class SuggestionResults;
39 class SuggestOptions;
40 
41 class Dictionary {
42  public:
43     // Taken from SuggestedWords.java
44     static const int KIND_MASK_KIND = 0xFF; // Mask to get only the kind
45     static const int KIND_TYPED = 0; // What user typed
46     static const int KIND_CORRECTION = 1; // Simple correction/suggestion
47     static const int KIND_COMPLETION = 2; // Completion (suggestion with appended chars)
48     static const int KIND_WHITELIST = 3; // Whitelisted word
49     static const int KIND_BLACKLIST = 4; // Blacklisted word
50     static const int KIND_HARDCODED = 5; // Hardcoded suggestion, e.g. punctuation
51     static const int KIND_APP_DEFINED = 6; // Suggested by the application
52     static const int KIND_SHORTCUT = 7; // A shortcut
53     static const int KIND_PREDICTION = 8; // A prediction (== a suggestion with no input)
54     // KIND_RESUMED: A resumed suggestion (comes from a span, currently this type is used only
55     // in java for re-correction)
56     static const int KIND_RESUMED = 9;
57     static const int KIND_OOV_CORRECTION = 10; // Most probable string correction
58 
59     static const int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags
60     static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
61     static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
62     static const int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
63     static const int KIND_FLAG_APPROPRIATE_FOR_AUTOCORRECTION = 0x10000000;
64 
65     Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
66             dictionaryStructureWithBufferPolicy);
67 
68     void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
69             int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
70             int inputSize, const NgramContext *const ngramContext,
71             const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
72             SuggestionResults *const outSuggestionResults) const;
73 
74     void getPredictions(const NgramContext *const ngramContext,
75             SuggestionResults *const outSuggestionResults) const;
76 
77     int getProbability(const CodePointArrayView codePoints) const;
78 
79     int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const;
80 
81     int getNgramProbability(const NgramContext *const ngramContext,
82             const CodePointArrayView codePoints) const;
83 
84     bool addUnigramEntry(const CodePointArrayView codePoints,
85             const UnigramProperty *const unigramProperty);
86 
87     bool removeUnigramEntry(const CodePointArrayView codePoints);
88 
89     bool addNgramEntry(const NgramProperty *const ngramProperty);
90 
91     bool removeNgramEntry(const NgramContext *const ngramContext,
92             const CodePointArrayView codePoints);
93 
94     bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
95             const CodePointArrayView codePoints, const bool isValidWord,
96             const HistoricalInfo historicalInfo);
97 
98     bool flush(const char *const filePath);
99 
100     bool flushWithGC(const char *const filePath);
101 
102     bool needsToRunGC(const bool mindsBlockByGC);
103 
104     void getProperty(const char *const query, const int queryLength, char *const outResult,
105             const int maxResultLength);
106 
107     const WordProperty getWordProperty(const CodePointArrayView codePoints);
108 
109     // Method to iterate all words in the dictionary.
110     // The returned token has to be used to get the next word. If token is 0, this method newly
111     // starts iterating the dictionary.
112     int getNextWordAndNextToken(const int token, int *const outCodePoints,
113             int *const outCodePointCount);
114 
getDictionaryStructurePolicy()115     const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
116         return mDictionaryStructureWithBufferPolicy.get();
117     }
118 
119  private:
120     DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
121 
122     typedef std::unique_ptr<SuggestInterface> SuggestInterfacePtr;
123 
124     class NgramListenerForPrediction : public NgramListener {
125      public:
126         NgramListenerForPrediction(const NgramContext *const ngramContext,
127                 const WordIdArrayView prevWordIds, SuggestionResults *const suggestionResults,
128                 const DictionaryStructureWithBufferPolicy *const dictStructurePolicy);
129         virtual void onVisitEntry(const int ngramProbability, const int targetWordId);
130 
131      private:
132         DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction);
133 
134         const NgramContext *const mNgramContext;
135         const WordIdArrayView mPrevWordIds;
136         SuggestionResults *const mSuggestionResults;
137         const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy;
138     };
139 
140     static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
141 
142     const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
143             mDictionaryStructureWithBufferPolicy;
144     const SuggestInterfacePtr mGestureSuggest;
145     const SuggestInterfacePtr mTypingSuggest;
146 
147     void logDictionaryInfo(JNIEnv *const env) const;
148 };
149 } // namespace latinime
150 #endif // LATINIME_DICTIONARY_H
151