1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin;
18 
19 import android.text.TextUtils;
20 import android.util.Log;
21 import android.util.SparseArray;
22 
23 import com.android.inputmethod.annotations.UsedForTesting;
24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
25 import com.android.inputmethod.latin.common.ComposedData;
26 import com.android.inputmethod.latin.common.Constants;
27 import com.android.inputmethod.latin.common.FileUtils;
28 import com.android.inputmethod.latin.common.InputPointers;
29 import com.android.inputmethod.latin.common.StringUtils;
30 import com.android.inputmethod.latin.makedict.DictionaryHeader;
31 import com.android.inputmethod.latin.makedict.FormatSpec;
32 import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
33 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
34 import com.android.inputmethod.latin.makedict.WordProperty;
35 import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
36 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
37 import com.android.inputmethod.latin.utils.JniUtils;
38 import com.android.inputmethod.latin.utils.WordInputEventForPersonalization;
39 
40 import java.io.File;
41 import java.util.ArrayList;
42 import java.util.Arrays;
43 import java.util.HashMap;
44 import java.util.Locale;
45 import java.util.Map;
46 
47 import javax.annotation.Nonnull;
48 
49 /**
50  * Implements a static, compacted, binary dictionary of standard words.
51  */
52 // TODO: All methods which should be locked need to have a suffix "Locked".
53 public final class BinaryDictionary extends Dictionary {
54     private static final String TAG = BinaryDictionary.class.getSimpleName();
55 
56     // The cutoff returned by native for auto-commit confidence.
57     // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
58     private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
59 
60     public static final int DICTIONARY_MAX_WORD_LENGTH = 48;
61     public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3;
62 
63     @UsedForTesting
64     public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
65     @UsedForTesting
66     public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
67     @UsedForTesting
68     public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
69     @UsedForTesting
70     public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
71 
72     public static final int NOT_A_VALID_TIMESTAMP = -1;
73 
74     // Format to get unigram flags from native side via getWordPropertyNative().
75     private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5;
76     private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
77     private static final int FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX = 1;
78     private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2;
79     private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; // DEPRECATED
80     private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4;
81 
82     // Format to get probability and historical info from native side via getWordPropertyNative().
83     public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4;
84     public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0;
85     public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1;
86     public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2;
87     public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3;
88 
89     public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate";
90     public static final String DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION = ".migrating";
91 
92     private long mNativeDict;
93     private final long mDictSize;
94     private final String mDictFilePath;
95     private final boolean mUseFullEditDistance;
96     private final boolean mIsUpdatable;
97     private boolean mHasUpdated;
98 
99     private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>();
100 
101     // TODO: There should be a way to remove used DicTraverseSession objects from
102     // {@code mDicTraverseSessions}.
getTraverseSession(final int traverseSessionId)103     private DicTraverseSession getTraverseSession(final int traverseSessionId) {
104         synchronized(mDicTraverseSessions) {
105             DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
106             if (traverseSession == null) {
107                 traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
108                 mDicTraverseSessions.put(traverseSessionId, traverseSession);
109             }
110             return traverseSession;
111         }
112     }
113 
114     /**
115      * Constructs binary dictionary using existing dictionary file.
116      * @param filename the name of the file to read through native code.
117      * @param offset the offset of the dictionary data within the file.
118      * @param length the length of the binary data.
119      * @param useFullEditDistance whether to use the full edit distance in suggestions
120      * @param dictType the dictionary type, as a human-readable string
121      * @param isUpdatable whether to open the dictionary file in writable mode.
122      */
BinaryDictionary(final String filename, final long offset, final long length, final boolean useFullEditDistance, final Locale locale, final String dictType, final boolean isUpdatable)123     public BinaryDictionary(final String filename, final long offset, final long length,
124             final boolean useFullEditDistance, final Locale locale, final String dictType,
125             final boolean isUpdatable) {
126         super(dictType, locale);
127         mDictSize = length;
128         mDictFilePath = filename;
129         mIsUpdatable = isUpdatable;
130         mHasUpdated = false;
131         mUseFullEditDistance = useFullEditDistance;
132         loadDictionary(filename, offset, length, isUpdatable);
133     }
134 
135     /**
136      * Constructs binary dictionary on memory.
137      * @param filename the name of the file used to flush.
138      * @param useFullEditDistance whether to use the full edit distance in suggestions
139      * @param dictType the dictionary type, as a human-readable string
140      * @param formatVersion the format version of the dictionary
141      * @param attributeMap the attributes of the dictionary
142      */
BinaryDictionary(final String filename, final boolean useFullEditDistance, final Locale locale, final String dictType, final long formatVersion, final Map<String, String> attributeMap)143     public BinaryDictionary(final String filename, final boolean useFullEditDistance,
144             final Locale locale, final String dictType, final long formatVersion,
145             final Map<String, String> attributeMap) {
146         super(dictType, locale);
147         mDictSize = 0;
148         mDictFilePath = filename;
149         // On memory dictionary is always updatable.
150         mIsUpdatable = true;
151         mHasUpdated = false;
152         mUseFullEditDistance = useFullEditDistance;
153         final String[] keyArray = new String[attributeMap.size()];
154         final String[] valueArray = new String[attributeMap.size()];
155         int index = 0;
156         for (final String key : attributeMap.keySet()) {
157             keyArray[index] = key;
158             valueArray[index] = attributeMap.get(key);
159             index++;
160         }
161         mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray);
162     }
163 
164 
165     static {
JniUtils.loadNativeLibrary()166         JniUtils.loadNativeLibrary();
167     }
168 
openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable)169     private static native long openNative(String sourceDir, long dictOffset, long dictSize,
170             boolean isUpdatable);
createOnMemoryNative(long formatVersion, String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray)171     private static native long createOnMemoryNative(long formatVersion,
172             String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray);
getHeaderInfoNative(long dict, int[] outHeaderSize, int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, ArrayList<int[]> outAttributeValues)173     private static native void getHeaderInfoNative(long dict, int[] outHeaderSize,
174             int[] outFormatVersion, ArrayList<int[]> outAttributeKeys,
175             ArrayList<int[]> outAttributeValues);
flushNative(long dict, String filePath)176     private static native boolean flushNative(long dict, String filePath);
needsToRunGCNative(long dict, boolean mindsBlockByGC)177     private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
flushWithGCNative(long dict, String filePath)178     private static native boolean flushWithGCNative(long dict, String filePath);
closeNative(long dict)179     private static native void closeNative(long dict);
getFormatVersionNative(long dict)180     private static native int getFormatVersionNative(long dict);
getProbabilityNative(long dict, int[] word)181     private static native int getProbabilityNative(long dict, int[] word);
getMaxProbabilityOfExactMatchesNative(long dict, int[] word)182     private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word);
getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word)183     private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays,
184             boolean[] isBeginningOfSentenceArray, int[] word);
getWordPropertyNative(long dict, int[] word, boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray, ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray, ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo, ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities)185     private static native void getWordPropertyNative(long dict, int[] word,
186             boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags,
187             int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray,
188             ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
189             ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo,
190             ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities);
getNextWordNative(long dict, int token, int[] outCodePoints, boolean[] outIsBeginningOfSentence)191     private static native int getNextWordNative(long dict, int token, int[] outCodePoints,
192             boolean[] outIsBeginningOfSentence);
getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int prevWordCount, int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence, float[] inOutWeightOfLangModelVsSpatialModel)193     private static native void getSuggestionsNative(long dict, long proximityInfo,
194             long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
195             int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
196             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
197             int prevWordCount, int[] outputSuggestionCount, int[] outputCodePoints,
198             int[] outputScores, int[] outputIndices, int[] outputTypes,
199             int[] outputAutoCommitFirstWordConfidence,
200             float[] inOutWeightOfLangModelVsSpatialModel);
addUnigramEntryNative(long dict, int[] word, int probability, int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, boolean isNotAWord, boolean isPossiblyOffensive, int timestamp)201     private static native boolean addUnigramEntryNative(long dict, int[] word, int probability,
202             int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
203             boolean isNotAWord, boolean isPossiblyOffensive, int timestamp);
removeUnigramEntryNative(long dict, int[] word)204     private static native boolean removeUnigramEntryNative(long dict, int[] word);
addNgramEntryNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word, int probability, int timestamp)205     private static native boolean addNgramEntryNative(long dict,
206             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
207             int[] word, int probability, int timestamp);
removeNgramEntryNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word)208     private static native boolean removeNgramEntryNative(long dict,
209             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word);
updateEntriesForWordWithNgramContextNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word, boolean isValidWord, int count, int timestamp)210     private static native boolean updateEntriesForWordWithNgramContextNative(long dict,
211             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
212             int[] word, boolean isValidWord, int count, int timestamp);
updateEntriesForInputEventsNative(long dict, WordInputEventForPersonalization[] inputEvents, int startIndex)213     private static native int updateEntriesForInputEventsNative(long dict,
214             WordInputEventForPersonalization[] inputEvents, int startIndex);
getPropertyNative(long dict, String query)215     private static native String getPropertyNative(long dict, String query);
isCorruptedNative(long dict)216     private static native boolean isCorruptedNative(long dict);
migrateNative(long dict, String dictFilePath, long newFormatVersion)217     private static native boolean migrateNative(long dict, String dictFilePath,
218             long newFormatVersion);
219 
220     // TODO: Move native dict into session
loadDictionary(final String path, final long startOffset, final long length, final boolean isUpdatable)221     private void loadDictionary(final String path, final long startOffset,
222             final long length, final boolean isUpdatable) {
223         mHasUpdated = false;
224         mNativeDict = openNative(path, startOffset, length, isUpdatable);
225     }
226 
227     // TODO: Check isCorrupted() for main dictionaries.
isCorrupted()228     public boolean isCorrupted() {
229         if (!isValidDictionary()) {
230             return false;
231         }
232         if (!isCorruptedNative(mNativeDict)) {
233             return false;
234         }
235         // TODO: Record the corruption.
236         Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted.");
237         Log.e(TAG, "locale: " + mLocale);
238         Log.e(TAG, "dict size: " + mDictSize);
239         Log.e(TAG, "updatable: " + mIsUpdatable);
240         return true;
241     }
242 
getHeader()243     public DictionaryHeader getHeader() throws UnsupportedFormatException {
244         if (mNativeDict == 0) {
245             return null;
246         }
247         final int[] outHeaderSize = new int[1];
248         final int[] outFormatVersion = new int[1];
249         final ArrayList<int[]> outAttributeKeys = new ArrayList<>();
250         final ArrayList<int[]> outAttributeValues = new ArrayList<>();
251         getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys,
252                 outAttributeValues);
253         final HashMap<String, String> attributes = new HashMap<>();
254         for (int i = 0; i < outAttributeKeys.size(); i++) {
255             final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray(
256                     outAttributeKeys.get(i));
257             final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray(
258                     outAttributeValues.get(i));
259             attributes.put(attributeKey, attributeValue);
260         }
261         final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals(
262                 attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY));
263         return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes),
264                 new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo));
265     }
266 
267     @Override
getSuggestions(final ComposedData composedData, final NgramContext ngramContext, final long proximityInfoHandle, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId, final float weightForLocale, final float[] inOutWeightOfLangModelVsSpatialModel)268     public ArrayList<SuggestedWordInfo> getSuggestions(final ComposedData composedData,
269             final NgramContext ngramContext, final long proximityInfoHandle,
270             final SettingsValuesForSuggestion settingsValuesForSuggestion,
271             final int sessionId, final float weightForLocale,
272             final float[] inOutWeightOfLangModelVsSpatialModel) {
273         if (!isValidDictionary()) {
274             return null;
275         }
276         final DicTraverseSession session = getTraverseSession(sessionId);
277         Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE);
278         ngramContext.outputToArray(session.mPrevWordCodePointArrays,
279                 session.mIsBeginningOfSentenceArray);
280         final InputPointers inputPointers = composedData.mInputPointers;
281         final boolean isGesture = composedData.mIsBatchMode;
282         final int inputSize;
283         if (!isGesture) {
284             inputSize =
285                     composedData.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount(
286                         session.mInputCodePoints);
287             if (inputSize < 0) {
288                 return null;
289             }
290         } else {
291             inputSize = inputPointers.getPointerSize();
292         }
293         session.mNativeSuggestOptions.setUseFullEditDistance(mUseFullEditDistance);
294         session.mNativeSuggestOptions.setIsGesture(isGesture);
295         session.mNativeSuggestOptions.setBlockOffensiveWords(
296                 settingsValuesForSuggestion.mBlockPotentiallyOffensive);
297         session.mNativeSuggestOptions.setWeightForLocale(weightForLocale);
298         if (inOutWeightOfLangModelVsSpatialModel != null) {
299             session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
300                     inOutWeightOfLangModelVsSpatialModel[0];
301         } else {
302             session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
303                     Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL;
304         }
305         // TOOD: Pass multiple previous words information for n-gram.
306         getSuggestionsNative(mNativeDict, proximityInfoHandle,
307                 getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
308                 inputPointers.getYCoordinates(), inputPointers.getTimes(),
309                 inputPointers.getPointerIds(), session.mInputCodePoints, inputSize,
310                 session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays,
311                 session.mIsBeginningOfSentenceArray, ngramContext.getPrevWordCount(),
312                 session.mOutputSuggestionCount, session.mOutputCodePoints, session.mOutputScores,
313                 session.mSpaceIndices, session.mOutputTypes,
314                 session.mOutputAutoCommitFirstWordConfidence,
315                 session.mInputOutputWeightOfLangModelVsSpatialModel);
316         if (inOutWeightOfLangModelVsSpatialModel != null) {
317             inOutWeightOfLangModelVsSpatialModel[0] =
318                     session.mInputOutputWeightOfLangModelVsSpatialModel[0];
319         }
320         final int count = session.mOutputSuggestionCount[0];
321         final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>();
322         for (int j = 0; j < count; ++j) {
323             final int start = j * DICTIONARY_MAX_WORD_LENGTH;
324             int len = 0;
325             while (len < DICTIONARY_MAX_WORD_LENGTH
326                     && session.mOutputCodePoints[start + len] != 0) {
327                 ++len;
328             }
329             if (len > 0) {
330                 suggestions.add(new SuggestedWordInfo(
331                         new String(session.mOutputCodePoints, start, len),
332                         "" /* prevWordsContext */,
333                         (int)(session.mOutputScores[j] * weightForLocale),
334                         session.mOutputTypes[j],
335                         this /* sourceDict */,
336                         session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
337                         session.mOutputAutoCommitFirstWordConfidence[0]));
338             }
339         }
340         return suggestions;
341     }
342 
isValidDictionary()343     public boolean isValidDictionary() {
344         return mNativeDict != 0;
345     }
346 
getFormatVersion()347     public int getFormatVersion() {
348         return getFormatVersionNative(mNativeDict);
349     }
350 
351     @Override
isInDictionary(final String word)352     public boolean isInDictionary(final String word) {
353         return getFrequency(word) != NOT_A_PROBABILITY;
354     }
355 
356     @Override
getFrequency(final String word)357     public int getFrequency(final String word) {
358         if (TextUtils.isEmpty(word)) {
359             return NOT_A_PROBABILITY;
360         }
361         final int[] codePoints = StringUtils.toCodePointArray(word);
362         return getProbabilityNative(mNativeDict, codePoints);
363     }
364 
365     @Override
getMaxFrequencyOfExactMatches(final String word)366     public int getMaxFrequencyOfExactMatches(final String word) {
367         if (TextUtils.isEmpty(word)) {
368             return NOT_A_PROBABILITY;
369         }
370         final int[] codePoints = StringUtils.toCodePointArray(word);
371         return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints);
372     }
373 
374     @UsedForTesting
isValidNgram(final NgramContext ngramContext, final String word)375     public boolean isValidNgram(final NgramContext ngramContext, final String word) {
376         return getNgramProbability(ngramContext, word) != NOT_A_PROBABILITY;
377     }
378 
getNgramProbability(final NgramContext ngramContext, final String word)379     public int getNgramProbability(final NgramContext ngramContext, final String word) {
380         if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
381             return NOT_A_PROBABILITY;
382         }
383         final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
384         final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
385         ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
386         final int[] wordCodePoints = StringUtils.toCodePointArray(word);
387         return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays,
388                 isBeginningOfSentenceArray, wordCodePoints);
389     }
390 
getWordProperty(final String word, final boolean isBeginningOfSentence)391     public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) {
392         if (word == null) {
393             return null;
394         }
395         final int[] codePoints = StringUtils.toCodePointArray(word);
396         final int[] outCodePoints = new int[DICTIONARY_MAX_WORD_LENGTH];
397         final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
398         final int[] outProbabilityInfo =
399                 new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
400         final ArrayList<int[][]> outNgramPrevWordsArray = new ArrayList<>();
401         final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray =
402                 new ArrayList<>();
403         final ArrayList<int[]> outNgramTargets = new ArrayList<>();
404         final ArrayList<int[]> outNgramProbabilityInfo = new ArrayList<>();
405         final ArrayList<int[]> outShortcutTargets = new ArrayList<>();
406         final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>();
407         getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints,
408                 outFlags, outProbabilityInfo, outNgramPrevWordsArray,
409                 outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets,
410                 outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities);
411         return new WordProperty(codePoints,
412                 outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
413                 outFlags[FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX],
414                 outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX],
415                 outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo,
416                 outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray,
417                 outNgramTargets, outNgramProbabilityInfo);
418     }
419 
420     public static class GetNextWordPropertyResult {
421         public WordProperty mWordProperty;
422         public int mNextToken;
423 
GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken)424         public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) {
425             mWordProperty = wordProperty;
426             mNextToken = nextToken;
427         }
428     }
429 
430     /**
431      * Method to iterate all words in the dictionary for makedict.
432      * If token is 0, this method newly starts iterating the dictionary.
433      */
getNextWordProperty(final int token)434     public GetNextWordPropertyResult getNextWordProperty(final int token) {
435         final int[] codePoints = new int[DICTIONARY_MAX_WORD_LENGTH];
436         final boolean[] isBeginningOfSentence = new boolean[1];
437         final int nextToken = getNextWordNative(mNativeDict, token, codePoints,
438                 isBeginningOfSentence);
439         final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
440         return new GetNextWordPropertyResult(
441                 getWordProperty(word, isBeginningOfSentence[0]), nextToken);
442     }
443 
444     // Add a unigram entry to binary dictionary with unigram attributes in native code.
addUnigramEntry( final String word, final int probability, final boolean isBeginningOfSentence, final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp)445     public boolean addUnigramEntry(
446             final String word, final int probability, final boolean isBeginningOfSentence,
447             final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp) {
448         if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
449             return false;
450         }
451         final int[] codePoints = StringUtils.toCodePointArray(word);
452         if (!addUnigramEntryNative(mNativeDict, codePoints, probability,
453                 null /* shortcutTargetCodePoints */, 0 /* shortcutProbability */,
454                 isBeginningOfSentence, isNotAWord, isPossiblyOffensive, timestamp)) {
455             return false;
456         }
457         mHasUpdated = true;
458         return true;
459     }
460 
461     // Remove a unigram entry from the binary dictionary in native code.
removeUnigramEntry(final String word)462     public boolean removeUnigramEntry(final String word) {
463         if (TextUtils.isEmpty(word)) {
464             return false;
465         }
466         final int[] codePoints = StringUtils.toCodePointArray(word);
467         if (!removeUnigramEntryNative(mNativeDict, codePoints)) {
468             return false;
469         }
470         mHasUpdated = true;
471         return true;
472     }
473 
474     // Add an n-gram entry to the binary dictionary with timestamp in native code.
addNgramEntry(final NgramContext ngramContext, final String word, final int probability, final int timestamp)475     public boolean addNgramEntry(final NgramContext ngramContext, final String word,
476             final int probability, final int timestamp) {
477         if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
478             return false;
479         }
480         final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
481         final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
482         ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
483         final int[] wordCodePoints = StringUtils.toCodePointArray(word);
484         if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays,
485                 isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) {
486             return false;
487         }
488         mHasUpdated = true;
489         return true;
490     }
491 
492     // Update entries for the word occurrence with the ngramContext.
updateEntriesForWordWithNgramContext(@onnull final NgramContext ngramContext, final String word, final boolean isValidWord, final int count, final int timestamp)493     public boolean updateEntriesForWordWithNgramContext(@Nonnull final NgramContext ngramContext,
494             final String word, final boolean isValidWord, final int count, final int timestamp) {
495         if (TextUtils.isEmpty(word)) {
496             return false;
497         }
498         final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
499         final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
500         ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
501         final int[] wordCodePoints = StringUtils.toCodePointArray(word);
502         if (!updateEntriesForWordWithNgramContextNative(mNativeDict, prevWordCodePointArrays,
503                 isBeginningOfSentenceArray, wordCodePoints, isValidWord, count, timestamp)) {
504             return false;
505         }
506         mHasUpdated = true;
507         return true;
508     }
509 
510     @UsedForTesting
updateEntriesForInputEvents(final WordInputEventForPersonalization[] inputEvents)511     public void updateEntriesForInputEvents(final WordInputEventForPersonalization[] inputEvents) {
512         if (!isValidDictionary()) {
513             return;
514         }
515         int processedEventCount = 0;
516         while (processedEventCount < inputEvents.length) {
517             if (needsToRunGC(true /* mindsBlockByGC */)) {
518                 flushWithGC();
519             }
520             processedEventCount = updateEntriesForInputEventsNative(mNativeDict, inputEvents,
521                     processedEventCount);
522             mHasUpdated = true;
523             if (processedEventCount <= 0) {
524                 return;
525             }
526         }
527     }
528 
reopen()529     private void reopen() {
530         close();
531         final File dictFile = new File(mDictFilePath);
532         // WARNING: Because we pass 0 as the offset and file.length() as the length, this can
533         // only be called for actual files. Right now it's only called by the flush() family of
534         // functions, which require an updatable dictionary, so it's okay. But beware.
535         loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
536                 dictFile.length(), mIsUpdatable);
537     }
538 
539     // Flush to dict file if the dictionary has been updated.
flush()540     public boolean flush() {
541         if (!isValidDictionary()) {
542             return false;
543         }
544         if (mHasUpdated) {
545             if (!flushNative(mNativeDict, mDictFilePath)) {
546                 return false;
547             }
548             reopen();
549         }
550         return true;
551     }
552 
553     // Run GC and flush to dict file if the dictionary has been updated.
flushWithGCIfHasUpdated()554     public boolean flushWithGCIfHasUpdated() {
555         if (mHasUpdated) {
556             return flushWithGC();
557         }
558         return true;
559     }
560 
561     // Run GC and flush to dict file.
flushWithGC()562     public boolean flushWithGC() {
563         if (!isValidDictionary()) {
564             return false;
565         }
566         if (!flushWithGCNative(mNativeDict, mDictFilePath)) {
567             return false;
568         }
569         reopen();
570         return true;
571     }
572 
573     /**
574      * Checks whether GC is needed to run or not.
575      * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
576      * the blocking in some situations such as in idle time or just before closing.
577      * @return whether GC is needed to run or not.
578      */
needsToRunGC(final boolean mindsBlockByGC)579     public boolean needsToRunGC(final boolean mindsBlockByGC) {
580         if (!isValidDictionary()) {
581             return false;
582         }
583         return needsToRunGCNative(mNativeDict, mindsBlockByGC);
584     }
585 
migrateTo(final int newFormatVersion)586     public boolean migrateTo(final int newFormatVersion) {
587         if (!isValidDictionary()) {
588             return false;
589         }
590         final File isMigratingDir =
591                 new File(mDictFilePath + DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION);
592         if (isMigratingDir.exists()) {
593             isMigratingDir.delete();
594             Log.e(TAG, "Previous migration attempt failed probably due to a crash. "
595                         + "Giving up using the old dictionary (" + mDictFilePath + ").");
596             return false;
597         }
598         if (!isMigratingDir.mkdir()) {
599             Log.e(TAG, "Cannot create a dir (" + isMigratingDir.getAbsolutePath()
600                     + ") to record migration.");
601             return false;
602         }
603         try {
604             final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION;
605             if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) {
606                 return false;
607             }
608             close();
609             final File dictFile = new File(mDictFilePath);
610             final File tmpDictFile = new File(tmpDictFilePath);
611             if (!FileUtils.deleteRecursively(dictFile)) {
612                 return false;
613             }
614             if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) {
615                 return false;
616             }
617             loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
618                     dictFile.length(), mIsUpdatable);
619             return true;
620         } finally {
621             isMigratingDir.delete();
622         }
623     }
624 
625     @UsedForTesting
getPropertyForGettingStats(final String query)626     public String getPropertyForGettingStats(final String query) {
627         if (!isValidDictionary()) {
628             return "";
629         }
630         return getPropertyNative(mNativeDict, query);
631     }
632 
633     @Override
shouldAutoCommit(final SuggestedWordInfo candidate)634     public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
635         return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT;
636     }
637 
638     @Override
close()639     public void close() {
640         synchronized (mDicTraverseSessions) {
641             final int sessionsSize = mDicTraverseSessions.size();
642             for (int index = 0; index < sessionsSize; ++index) {
643                 final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
644                 if (traverseSession != null) {
645                     traverseSession.close();
646                 }
647             }
648             mDicTraverseSessions.clear();
649         }
650         closeInternalLocked();
651     }
652 
closeInternalLocked()653     private synchronized void closeInternalLocked() {
654         if (mNativeDict != 0) {
655             closeNative(mNativeDict);
656             mNativeDict = 0;
657         }
658     }
659 
660     // TODO: Manage BinaryDictionary instances without using WeakReference or something.
661     @Override
finalize()662     protected void finalize() throws Throwable {
663         try {
664             closeInternalLocked();
665         } finally {
666             super.finalize();
667         }
668     }
669 }
670