1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin;
18 
19 import android.text.TextUtils;
20 import android.util.Log;
21 import android.util.SparseArray;
22 
23 import com.android.inputmethod.annotations.UsedForTesting;
24 import com.android.inputmethod.keyboard.ProximityInfo;
25 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
26 import com.android.inputmethod.latin.makedict.DictionaryHeader;
27 import com.android.inputmethod.latin.makedict.FormatSpec;
28 import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
29 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
30 import com.android.inputmethod.latin.makedict.WordProperty;
31 import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
32 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
33 import com.android.inputmethod.latin.utils.FileUtils;
34 import com.android.inputmethod.latin.utils.JniUtils;
35 import com.android.inputmethod.latin.utils.LanguageModelParam;
36 import com.android.inputmethod.latin.utils.StringUtils;
37 
38 import java.io.File;
39 import java.util.ArrayList;
40 import java.util.Arrays;
41 import java.util.HashMap;
42 import java.util.Locale;
43 import java.util.Map;
44 
45 /**
46  * Implements a static, compacted, binary dictionary of standard words.
47  */
48 // TODO: All methods which should be locked need to have a suffix "Locked".
49 public final class BinaryDictionary extends Dictionary {
50     private static final String TAG = BinaryDictionary.class.getSimpleName();
51 
52     // The cutoff returned by native for auto-commit confidence.
53     // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
54     private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
55 
56     @UsedForTesting
57     public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
58     @UsedForTesting
59     public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
60     @UsedForTesting
61     public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
62     @UsedForTesting
63     public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
64 
65     public static final int NOT_A_VALID_TIMESTAMP = -1;
66 
67     // Format to get unigram flags from native side via getWordPropertyNative().
68     private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5;
69     private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
70     private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1;
71     private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2;
72     private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
73     private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4;
74 
75     // Format to get probability and historical info from native side via getWordPropertyNative().
76     public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4;
77     public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0;
78     public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1;
79     public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2;
80     public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3;
81 
82     public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate";
83     public static final String DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION = ".migrating";
84 
85     private long mNativeDict;
86     private final Locale mLocale;
87     private final long mDictSize;
88     private final String mDictFilePath;
89     private final boolean mUseFullEditDistance;
90     private final boolean mIsUpdatable;
91     private boolean mHasUpdated;
92 
93     private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>();
94 
95     // TODO: There should be a way to remove used DicTraverseSession objects from
96     // {@code mDicTraverseSessions}.
getTraverseSession(final int traverseSessionId)97     private DicTraverseSession getTraverseSession(final int traverseSessionId) {
98         synchronized(mDicTraverseSessions) {
99             DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
100             if (traverseSession == null) {
101                 traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
102                 mDicTraverseSessions.put(traverseSessionId, traverseSession);
103             }
104             return traverseSession;
105         }
106     }
107 
108     /**
109      * Constructs binary dictionary using existing dictionary file.
110      * @param filename the name of the file to read through native code.
111      * @param offset the offset of the dictionary data within the file.
112      * @param length the length of the binary data.
113      * @param useFullEditDistance whether to use the full edit distance in suggestions
114      * @param dictType the dictionary type, as a human-readable string
115      * @param isUpdatable whether to open the dictionary file in writable mode.
116      */
BinaryDictionary(final String filename, final long offset, final long length, final boolean useFullEditDistance, final Locale locale, final String dictType, final boolean isUpdatable)117     public BinaryDictionary(final String filename, final long offset, final long length,
118             final boolean useFullEditDistance, final Locale locale, final String dictType,
119             final boolean isUpdatable) {
120         super(dictType);
121         mLocale = locale;
122         mDictSize = length;
123         mDictFilePath = filename;
124         mIsUpdatable = isUpdatable;
125         mHasUpdated = false;
126         mUseFullEditDistance = useFullEditDistance;
127         loadDictionary(filename, offset, length, isUpdatable);
128     }
129 
130     /**
131      * Constructs binary dictionary on memory.
132      * @param filename the name of the file used to flush.
133      * @param useFullEditDistance whether to use the full edit distance in suggestions
134      * @param dictType the dictionary type, as a human-readable string
135      * @param formatVersion the format version of the dictionary
136      * @param attributeMap the attributes of the dictionary
137      */
BinaryDictionary(final String filename, final boolean useFullEditDistance, final Locale locale, final String dictType, final long formatVersion, final Map<String, String> attributeMap)138     public BinaryDictionary(final String filename, final boolean useFullEditDistance,
139             final Locale locale, final String dictType, final long formatVersion,
140             final Map<String, String> attributeMap) {
141         super(dictType);
142         mLocale = locale;
143         mDictSize = 0;
144         mDictFilePath = filename;
145         // On memory dictionary is always updatable.
146         mIsUpdatable = true;
147         mHasUpdated = false;
148         mUseFullEditDistance = useFullEditDistance;
149         final String[] keyArray = new String[attributeMap.size()];
150         final String[] valueArray = new String[attributeMap.size()];
151         int index = 0;
152         for (final String key : attributeMap.keySet()) {
153             keyArray[index] = key;
154             valueArray[index] = attributeMap.get(key);
155             index++;
156         }
157         mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray);
158     }
159 
160 
161     static {
JniUtils.loadNativeLibrary()162         JniUtils.loadNativeLibrary();
163     }
164 
openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable)165     private static native long openNative(String sourceDir, long dictOffset, long dictSize,
166             boolean isUpdatable);
createOnMemoryNative(long formatVersion, String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray)167     private static native long createOnMemoryNative(long formatVersion,
168             String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray);
getHeaderInfoNative(long dict, int[] outHeaderSize, int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, ArrayList<int[]> outAttributeValues)169     private static native void getHeaderInfoNative(long dict, int[] outHeaderSize,
170             int[] outFormatVersion, ArrayList<int[]> outAttributeKeys,
171             ArrayList<int[]> outAttributeValues);
flushNative(long dict, String filePath)172     private static native boolean flushNative(long dict, String filePath);
needsToRunGCNative(long dict, boolean mindsBlockByGC)173     private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
flushWithGCNative(long dict, String filePath)174     private static native boolean flushWithGCNative(long dict, String filePath);
closeNative(long dict)175     private static native void closeNative(long dict);
getFormatVersionNative(long dict)176     private static native int getFormatVersionNative(long dict);
getProbabilityNative(long dict, int[] word)177     private static native int getProbabilityNative(long dict, int[] word);
getMaxProbabilityOfExactMatchesNative(long dict, int[] word)178     private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word);
getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word)179     private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays,
180             boolean[] isBeginningOfSentenceArray, int[] word);
getWordPropertyNative(long dict, int[] word, boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo, ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities)181     private static native void getWordPropertyNative(long dict, int[] word,
182             boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags,
183             int[] outProbabilityInfo, ArrayList<int[]> outBigramTargets,
184             ArrayList<int[]> outBigramProbabilityInfo, ArrayList<int[]> outShortcutTargets,
185             ArrayList<Integer> outShortcutProbabilities);
getNextWordNative(long dict, int token, int[] outCodePoints, boolean[] outIsBeginningOfSentence)186     private static native int getNextWordNative(long dict, int token, int[] outCodePoints,
187             boolean[] outIsBeginningOfSentence);
getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence, float[] inOutLanguageWeight)188     private static native void getSuggestionsNative(long dict, long proximityInfo,
189             long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
190             int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
191             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
192             int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores,
193             int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence,
194             float[] inOutLanguageWeight);
addUnigramEntryNative(long dict, int[] word, int probability, int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, boolean isNotAWord, boolean isBlacklisted, int timestamp)195     private static native boolean addUnigramEntryNative(long dict, int[] word, int probability,
196             int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
197             boolean isNotAWord, boolean isBlacklisted, int timestamp);
removeUnigramEntryNative(long dict, int[] word)198     private static native boolean removeUnigramEntryNative(long dict, int[] word);
addNgramEntryNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word, int probability, int timestamp)199     private static native boolean addNgramEntryNative(long dict,
200             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
201             int[] word, int probability, int timestamp);
removeNgramEntryNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word)202     private static native boolean removeNgramEntryNative(long dict,
203             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word);
addMultipleDictionaryEntriesNative(long dict, LanguageModelParam[] languageModelParams, int startIndex)204     private static native int addMultipleDictionaryEntriesNative(long dict,
205             LanguageModelParam[] languageModelParams, int startIndex);
getPropertyNative(long dict, String query)206     private static native String getPropertyNative(long dict, String query);
isCorruptedNative(long dict)207     private static native boolean isCorruptedNative(long dict);
migrateNative(long dict, String dictFilePath, long newFormatVersion)208     private static native boolean migrateNative(long dict, String dictFilePath,
209             long newFormatVersion);
210 
211     // TODO: Move native dict into session
loadDictionary(final String path, final long startOffset, final long length, final boolean isUpdatable)212     private final void loadDictionary(final String path, final long startOffset,
213             final long length, final boolean isUpdatable) {
214         mHasUpdated = false;
215         mNativeDict = openNative(path, startOffset, length, isUpdatable);
216     }
217 
218     // TODO: Check isCorrupted() for main dictionaries.
isCorrupted()219     public boolean isCorrupted() {
220         if (!isValidDictionary()) {
221             return false;
222         }
223         if (!isCorruptedNative(mNativeDict)) {
224             return false;
225         }
226         // TODO: Record the corruption.
227         Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted.");
228         Log.e(TAG, "locale: " + mLocale);
229         Log.e(TAG, "dict size: " + mDictSize);
230         Log.e(TAG, "updatable: " + mIsUpdatable);
231         return true;
232     }
233 
getHeader()234     public DictionaryHeader getHeader() throws UnsupportedFormatException {
235         if (mNativeDict == 0) {
236             return null;
237         }
238         final int[] outHeaderSize = new int[1];
239         final int[] outFormatVersion = new int[1];
240         final ArrayList<int[]> outAttributeKeys = new ArrayList<>();
241         final ArrayList<int[]> outAttributeValues = new ArrayList<>();
242         getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys,
243                 outAttributeValues);
244         final HashMap<String, String> attributes = new HashMap<>();
245         for (int i = 0; i < outAttributeKeys.size(); i++) {
246             final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray(
247                     outAttributeKeys.get(i));
248             final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray(
249                     outAttributeValues.get(i));
250             attributes.put(attributeKey, attributeValue);
251         }
252         final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals(
253                 attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY));
254         return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes),
255                 new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo));
256     }
257 
258     @Override
getSuggestions(final WordComposer composer, final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId, final float[] inOutLanguageWeight)259     public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
260             final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo,
261             final SettingsValuesForSuggestion settingsValuesForSuggestion,
262             final int sessionId, final float[] inOutLanguageWeight) {
263         if (!isValidDictionary()) {
264             return null;
265         }
266         final DicTraverseSession session = getTraverseSession(sessionId);
267         Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE);
268         prevWordsInfo.outputToArray(session.mPrevWordCodePointArrays,
269                 session.mIsBeginningOfSentenceArray);
270         final InputPointers inputPointers = composer.getInputPointers();
271         final boolean isGesture = composer.isBatchMode();
272         final int inputSize;
273         if (!isGesture) {
274             inputSize = composer.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount(
275                     session.mInputCodePoints);
276             if (inputSize < 0) {
277                 return null;
278             }
279         } else {
280             inputSize = inputPointers.getPointerSize();
281         }
282         session.mNativeSuggestOptions.setUseFullEditDistance(mUseFullEditDistance);
283         session.mNativeSuggestOptions.setIsGesture(isGesture);
284         session.mNativeSuggestOptions.setBlockOffensiveWords(
285                 settingsValuesForSuggestion.mBlockPotentiallyOffensive);
286         session.mNativeSuggestOptions.setSpaceAwareGestureEnabled(
287                 settingsValuesForSuggestion.mSpaceAwareGestureEnabled);
288         session.mNativeSuggestOptions.setAdditionalFeaturesOptions(
289                 settingsValuesForSuggestion.mAdditionalFeaturesSettingValues);
290         if (inOutLanguageWeight != null) {
291             session.mInputOutputLanguageWeight[0] = inOutLanguageWeight[0];
292         } else {
293             session.mInputOutputLanguageWeight[0] = Dictionary.NOT_A_LANGUAGE_WEIGHT;
294         }
295         // TOOD: Pass multiple previous words information for n-gram.
296         getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(),
297                 getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
298                 inputPointers.getYCoordinates(), inputPointers.getTimes(),
299                 inputPointers.getPointerIds(), session.mInputCodePoints, inputSize,
300                 session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays,
301                 session.mIsBeginningOfSentenceArray, session.mOutputSuggestionCount,
302                 session.mOutputCodePoints, session.mOutputScores, session.mSpaceIndices,
303                 session.mOutputTypes, session.mOutputAutoCommitFirstWordConfidence,
304                 session.mInputOutputLanguageWeight);
305         if (inOutLanguageWeight != null) {
306             inOutLanguageWeight[0] = session.mInputOutputLanguageWeight[0];
307         }
308         final int count = session.mOutputSuggestionCount[0];
309         final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>();
310         for (int j = 0; j < count; ++j) {
311             final int start = j * Constants.DICTIONARY_MAX_WORD_LENGTH;
312             int len = 0;
313             while (len < Constants.DICTIONARY_MAX_WORD_LENGTH
314                     && session.mOutputCodePoints[start + len] != 0) {
315                 ++len;
316             }
317             if (len > 0) {
318                 suggestions.add(new SuggestedWordInfo(
319                         new String(session.mOutputCodePoints, start, len),
320                         session.mOutputScores[j], session.mOutputTypes[j], this /* sourceDict */,
321                         session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
322                         session.mOutputAutoCommitFirstWordConfidence[0]));
323             }
324         }
325         return suggestions;
326     }
327 
isValidDictionary()328     public boolean isValidDictionary() {
329         return mNativeDict != 0;
330     }
331 
getFormatVersion()332     public int getFormatVersion() {
333         return getFormatVersionNative(mNativeDict);
334     }
335 
336     @Override
isInDictionary(final String word)337     public boolean isInDictionary(final String word) {
338         return getFrequency(word) != NOT_A_PROBABILITY;
339     }
340 
341     @Override
getFrequency(final String word)342     public int getFrequency(final String word) {
343         if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY;
344         int[] codePoints = StringUtils.toCodePointArray(word);
345         return getProbabilityNative(mNativeDict, codePoints);
346     }
347 
348     @Override
getMaxFrequencyOfExactMatches(final String word)349     public int getMaxFrequencyOfExactMatches(final String word) {
350         if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY;
351         int[] codePoints = StringUtils.toCodePointArray(word);
352         return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints);
353     }
354 
355     @UsedForTesting
isValidNgram(final PrevWordsInfo prevWordsInfo, final String word)356     public boolean isValidNgram(final PrevWordsInfo prevWordsInfo, final String word) {
357         return getNgramProbability(prevWordsInfo, word) != NOT_A_PROBABILITY;
358     }
359 
getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word)360     public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) {
361         if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
362             return NOT_A_PROBABILITY;
363         }
364         final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][];
365         final boolean[] isBeginningOfSentenceArray =
366                 new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
367         prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
368         final int[] wordCodePoints = StringUtils.toCodePointArray(word);
369         return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays,
370                 isBeginningOfSentenceArray, wordCodePoints);
371     }
372 
getWordProperty(final String word, final boolean isBeginningOfSentence)373     public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) {
374         if (word == null) {
375             return null;
376         }
377         final int[] codePoints = StringUtils.toCodePointArray(word);
378         final int[] outCodePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH];
379         final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
380         final int[] outProbabilityInfo =
381                 new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
382         final ArrayList<int[]> outBigramTargets = new ArrayList<>();
383         final ArrayList<int[]> outBigramProbabilityInfo = new ArrayList<>();
384         final ArrayList<int[]> outShortcutTargets = new ArrayList<>();
385         final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>();
386         getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints,
387                 outFlags, outProbabilityInfo, outBigramTargets, outBigramProbabilityInfo,
388                 outShortcutTargets, outShortcutProbabilities);
389         return new WordProperty(codePoints,
390                 outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
391                 outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX],
392                 outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX],
393                 outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX],
394                 outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo,
395                 outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
396                 outShortcutProbabilities);
397     }
398 
399     public static class GetNextWordPropertyResult {
400         public WordProperty mWordProperty;
401         public int mNextToken;
402 
GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken)403         public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) {
404             mWordProperty = wordProperty;
405             mNextToken = nextToken;
406         }
407     }
408 
409     /**
410      * Method to iterate all words in the dictionary for makedict.
411      * If token is 0, this method newly starts iterating the dictionary.
412      */
getNextWordProperty(final int token)413     public GetNextWordPropertyResult getNextWordProperty(final int token) {
414         final int[] codePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH];
415         final boolean[] isBeginningOfSentence = new boolean[1];
416         final int nextToken = getNextWordNative(mNativeDict, token, codePoints,
417                 isBeginningOfSentence);
418         final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
419         return new GetNextWordPropertyResult(
420                 getWordProperty(word, isBeginningOfSentence[0]), nextToken);
421     }
422 
423     // Add a unigram entry to binary dictionary with unigram attributes in native code.
addUnigramEntry(final String word, final int probability, final String shortcutTarget, final int shortcutProbability, final boolean isBeginningOfSentence, final boolean isNotAWord, final boolean isBlacklisted, final int timestamp)424     public boolean addUnigramEntry(final String word, final int probability,
425             final String shortcutTarget, final int shortcutProbability,
426             final boolean isBeginningOfSentence, final boolean isNotAWord,
427             final boolean isBlacklisted, final int timestamp) {
428         if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
429             return false;
430         }
431         final int[] codePoints = StringUtils.toCodePointArray(word);
432         final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
433                 StringUtils.toCodePointArray(shortcutTarget) : null;
434         if (!addUnigramEntryNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
435                 shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp)) {
436             return false;
437         }
438         mHasUpdated = true;
439         return true;
440     }
441 
442     // Remove a unigram entry from the binary dictionary in native code.
removeUnigramEntry(final String word)443     public boolean removeUnigramEntry(final String word) {
444         if (TextUtils.isEmpty(word)) {
445             return false;
446         }
447         final int[] codePoints = StringUtils.toCodePointArray(word);
448         if (!removeUnigramEntryNative(mNativeDict, codePoints)) {
449             return false;
450         }
451         mHasUpdated = true;
452         return true;
453     }
454 
455     // Add an n-gram entry to the binary dictionary with timestamp in native code.
addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word, final int probability, final int timestamp)456     public boolean addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word,
457             final int probability, final int timestamp) {
458         if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
459             return false;
460         }
461         final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][];
462         final boolean[] isBeginningOfSentenceArray =
463                 new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
464         prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
465         final int[] wordCodePoints = StringUtils.toCodePointArray(word);
466         if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays,
467                 isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) {
468             return false;
469         }
470         mHasUpdated = true;
471         return true;
472     }
473 
474     // Remove an n-gram entry from the binary dictionary in native code.
removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word)475     public boolean removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) {
476         if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
477             return false;
478         }
479         final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][];
480         final boolean[] isBeginningOfSentenceArray =
481                 new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
482         prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
483         final int[] wordCodePoints = StringUtils.toCodePointArray(word);
484         if (!removeNgramEntryNative(mNativeDict, prevWordCodePointArrays,
485                 isBeginningOfSentenceArray, wordCodePoints)) {
486             return false;
487         }
488         mHasUpdated = true;
489         return true;
490     }
491 
addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams)492     public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) {
493         if (!isValidDictionary()) return;
494         int processedParamCount = 0;
495         while (processedParamCount < languageModelParams.length) {
496             if (needsToRunGC(true /* mindsBlockByGC */)) {
497                 flushWithGC();
498             }
499             processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict,
500                     languageModelParams, processedParamCount);
501             mHasUpdated = true;
502             if (processedParamCount <= 0) {
503                 return;
504             }
505         }
506     }
507 
reopen()508     private void reopen() {
509         close();
510         final File dictFile = new File(mDictFilePath);
511         // WARNING: Because we pass 0 as the offset and file.length() as the length, this can
512         // only be called for actual files. Right now it's only called by the flush() family of
513         // functions, which require an updatable dictionary, so it's okay. But beware.
514         loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
515                 dictFile.length(), mIsUpdatable);
516     }
517 
518     // Flush to dict file if the dictionary has been updated.
flush()519     public boolean flush() {
520         if (!isValidDictionary()) return false;
521         if (mHasUpdated) {
522             if (!flushNative(mNativeDict, mDictFilePath)) {
523                 return false;
524             }
525             reopen();
526         }
527         return true;
528     }
529 
530     // Run GC and flush to dict file if the dictionary has been updated.
flushWithGCIfHasUpdated()531     public boolean flushWithGCIfHasUpdated() {
532         if (mHasUpdated) {
533             return flushWithGC();
534         }
535         return true;
536     }
537 
538     // Run GC and flush to dict file.
flushWithGC()539     public boolean flushWithGC() {
540         if (!isValidDictionary()) return false;
541         if (!flushWithGCNative(mNativeDict, mDictFilePath)) {
542             return false;
543         }
544         reopen();
545         return true;
546     }
547 
548     /**
549      * Checks whether GC is needed to run or not.
550      * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
551      * the blocking in some situations such as in idle time or just before closing.
552      * @return whether GC is needed to run or not.
553      */
needsToRunGC(final boolean mindsBlockByGC)554     public boolean needsToRunGC(final boolean mindsBlockByGC) {
555         if (!isValidDictionary()) return false;
556         return needsToRunGCNative(mNativeDict, mindsBlockByGC);
557     }
558 
migrateTo(final int newFormatVersion)559     public boolean migrateTo(final int newFormatVersion) {
560         if (!isValidDictionary()) {
561             return false;
562         }
563         final File isMigratingDir =
564                 new File(mDictFilePath + DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION);
565         if (isMigratingDir.exists()) {
566             isMigratingDir.delete();
567             Log.e(TAG, "Previous migration attempt failed probably due to a crash. "
568                         + "Giving up using the old dictionary (" + mDictFilePath + ").");
569             return false;
570         }
571         if (!isMigratingDir.mkdir()) {
572             Log.e(TAG, "Cannot create a dir (" + isMigratingDir.getAbsolutePath()
573                     + ") to record migration.");
574             return false;
575         }
576         try {
577             final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION;
578             if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) {
579                 return false;
580             }
581             close();
582             final File dictFile = new File(mDictFilePath);
583             final File tmpDictFile = new File(tmpDictFilePath);
584             if (!FileUtils.deleteRecursively(dictFile)) {
585                 return false;
586             }
587             if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) {
588                 return false;
589             }
590             loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
591                     dictFile.length(), mIsUpdatable);
592             return true;
593         } finally {
594             isMigratingDir.delete();
595         }
596     }
597 
598     @UsedForTesting
getPropertyForTest(final String query)599     public String getPropertyForTest(final String query) {
600         if (!isValidDictionary()) return "";
601         return getPropertyNative(mNativeDict, query);
602     }
603 
604     @Override
shouldAutoCommit(final SuggestedWordInfo candidate)605     public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
606         return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT;
607     }
608 
609     @Override
close()610     public void close() {
611         synchronized (mDicTraverseSessions) {
612             final int sessionsSize = mDicTraverseSessions.size();
613             for (int index = 0; index < sessionsSize; ++index) {
614                 final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
615                 if (traverseSession != null) {
616                     traverseSession.close();
617                 }
618             }
619             mDicTraverseSessions.clear();
620         }
621         closeInternalLocked();
622     }
623 
closeInternalLocked()624     private synchronized void closeInternalLocked() {
625         if (mNativeDict != 0) {
626             closeNative(mNativeDict);
627             mNativeDict = 0;
628         }
629     }
630 
631     // TODO: Manage BinaryDictionary instances without using WeakReference or something.
632     @Override
finalize()633     protected void finalize() throws Throwable {
634         try {
635             closeInternalLocked();
636         } finally {
637             super.finalize();
638         }
639     }
640 }
641