1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.text.TextUtils; 20 import android.util.Log; 21 import android.util.SparseArray; 22 23 import com.android.inputmethod.annotations.UsedForTesting; 24 import com.android.inputmethod.keyboard.ProximityInfo; 25 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 26 import com.android.inputmethod.latin.makedict.DictionaryHeader; 27 import com.android.inputmethod.latin.makedict.FormatSpec; 28 import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; 29 import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 30 import com.android.inputmethod.latin.makedict.WordProperty; 31 import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion; 32 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 33 import com.android.inputmethod.latin.utils.FileUtils; 34 import com.android.inputmethod.latin.utils.JniUtils; 35 import com.android.inputmethod.latin.utils.LanguageModelParam; 36 import com.android.inputmethod.latin.utils.StringUtils; 37 38 import java.io.File; 39 import java.util.ArrayList; 40 import java.util.Arrays; 41 import java.util.HashMap; 42 import java.util.Locale; 43 import java.util.Map; 44 45 /** 46 * Implements a static, compacted, binary dictionary of standard words. 47 */ 48 // TODO: All methods which should be locked need to have a suffix "Locked". 49 public final class BinaryDictionary extends Dictionary { 50 private static final String TAG = BinaryDictionary.class.getSimpleName(); 51 52 // The cutoff returned by native for auto-commit confidence. 53 // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h 54 private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000; 55 56 @UsedForTesting 57 public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; 58 @UsedForTesting 59 public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; 60 @UsedForTesting 61 public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; 62 @UsedForTesting 63 public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; 64 65 public static final int NOT_A_VALID_TIMESTAMP = -1; 66 67 // Format to get unigram flags from native side via getWordPropertyNative(). 68 private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5; 69 private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; 70 private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1; 71 private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2; 72 private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; 73 private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4; 74 75 // Format to get probability and historical info from native side via getWordPropertyNative(). 76 public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4; 77 public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0; 78 public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1; 79 public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2; 80 public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3; 81 82 public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate"; 83 public static final String DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION = ".migrating"; 84 85 private long mNativeDict; 86 private final Locale mLocale; 87 private final long mDictSize; 88 private final String mDictFilePath; 89 private final boolean mUseFullEditDistance; 90 private final boolean mIsUpdatable; 91 private boolean mHasUpdated; 92 93 private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>(); 94 95 // TODO: There should be a way to remove used DicTraverseSession objects from 96 // {@code mDicTraverseSessions}. getTraverseSession(final int traverseSessionId)97 private DicTraverseSession getTraverseSession(final int traverseSessionId) { 98 synchronized(mDicTraverseSessions) { 99 DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId); 100 if (traverseSession == null) { 101 traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize); 102 mDicTraverseSessions.put(traverseSessionId, traverseSession); 103 } 104 return traverseSession; 105 } 106 } 107 108 /** 109 * Constructs binary dictionary using existing dictionary file. 110 * @param filename the name of the file to read through native code. 111 * @param offset the offset of the dictionary data within the file. 112 * @param length the length of the binary data. 113 * @param useFullEditDistance whether to use the full edit distance in suggestions 114 * @param dictType the dictionary type, as a human-readable string 115 * @param isUpdatable whether to open the dictionary file in writable mode. 116 */ BinaryDictionary(final String filename, final long offset, final long length, final boolean useFullEditDistance, final Locale locale, final String dictType, final boolean isUpdatable)117 public BinaryDictionary(final String filename, final long offset, final long length, 118 final boolean useFullEditDistance, final Locale locale, final String dictType, 119 final boolean isUpdatable) { 120 super(dictType); 121 mLocale = locale; 122 mDictSize = length; 123 mDictFilePath = filename; 124 mIsUpdatable = isUpdatable; 125 mHasUpdated = false; 126 mUseFullEditDistance = useFullEditDistance; 127 loadDictionary(filename, offset, length, isUpdatable); 128 } 129 130 /** 131 * Constructs binary dictionary on memory. 132 * @param filename the name of the file used to flush. 133 * @param useFullEditDistance whether to use the full edit distance in suggestions 134 * @param dictType the dictionary type, as a human-readable string 135 * @param formatVersion the format version of the dictionary 136 * @param attributeMap the attributes of the dictionary 137 */ BinaryDictionary(final String filename, final boolean useFullEditDistance, final Locale locale, final String dictType, final long formatVersion, final Map<String, String> attributeMap)138 public BinaryDictionary(final String filename, final boolean useFullEditDistance, 139 final Locale locale, final String dictType, final long formatVersion, 140 final Map<String, String> attributeMap) { 141 super(dictType); 142 mLocale = locale; 143 mDictSize = 0; 144 mDictFilePath = filename; 145 // On memory dictionary is always updatable. 146 mIsUpdatable = true; 147 mHasUpdated = false; 148 mUseFullEditDistance = useFullEditDistance; 149 final String[] keyArray = new String[attributeMap.size()]; 150 final String[] valueArray = new String[attributeMap.size()]; 151 int index = 0; 152 for (final String key : attributeMap.keySet()) { 153 keyArray[index] = key; 154 valueArray[index] = attributeMap.get(key); 155 index++; 156 } 157 mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray); 158 } 159 160 161 static { JniUtils.loadNativeLibrary()162 JniUtils.loadNativeLibrary(); 163 } 164 openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable)165 private static native long openNative(String sourceDir, long dictOffset, long dictSize, 166 boolean isUpdatable); createOnMemoryNative(long formatVersion, String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray)167 private static native long createOnMemoryNative(long formatVersion, 168 String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray); getHeaderInfoNative(long dict, int[] outHeaderSize, int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, ArrayList<int[]> outAttributeValues)169 private static native void getHeaderInfoNative(long dict, int[] outHeaderSize, 170 int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, 171 ArrayList<int[]> outAttributeValues); flushNative(long dict, String filePath)172 private static native boolean flushNative(long dict, String filePath); needsToRunGCNative(long dict, boolean mindsBlockByGC)173 private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC); flushWithGCNative(long dict, String filePath)174 private static native boolean flushWithGCNative(long dict, String filePath); closeNative(long dict)175 private static native void closeNative(long dict); getFormatVersionNative(long dict)176 private static native int getFormatVersionNative(long dict); getProbabilityNative(long dict, int[] word)177 private static native int getProbabilityNative(long dict, int[] word); getMaxProbabilityOfExactMatchesNative(long dict, int[] word)178 private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word); getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word)179 private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays, 180 boolean[] isBeginningOfSentenceArray, int[] word); getWordPropertyNative(long dict, int[] word, boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo, ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities)181 private static native void getWordPropertyNative(long dict, int[] word, 182 boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, 183 int[] outProbabilityInfo, ArrayList<int[]> outBigramTargets, 184 ArrayList<int[]> outBigramProbabilityInfo, ArrayList<int[]> outShortcutTargets, 185 ArrayList<Integer> outShortcutProbabilities); getNextWordNative(long dict, int token, int[] outCodePoints, boolean[] outIsBeginningOfSentence)186 private static native int getNextWordNative(long dict, int token, int[] outCodePoints, 187 boolean[] outIsBeginningOfSentence); getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence, float[] inOutLanguageWeight)188 private static native void getSuggestionsNative(long dict, long proximityInfo, 189 long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, 190 int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, 191 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, 192 int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores, 193 int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence, 194 float[] inOutLanguageWeight); addUnigramEntryNative(long dict, int[] word, int probability, int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, boolean isNotAWord, boolean isBlacklisted, int timestamp)195 private static native boolean addUnigramEntryNative(long dict, int[] word, int probability, 196 int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, 197 boolean isNotAWord, boolean isBlacklisted, int timestamp); removeUnigramEntryNative(long dict, int[] word)198 private static native boolean removeUnigramEntryNative(long dict, int[] word); addNgramEntryNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word, int probability, int timestamp)199 private static native boolean addNgramEntryNative(long dict, 200 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, 201 int[] word, int probability, int timestamp); removeNgramEntryNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word)202 private static native boolean removeNgramEntryNative(long dict, 203 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word); addMultipleDictionaryEntriesNative(long dict, LanguageModelParam[] languageModelParams, int startIndex)204 private static native int addMultipleDictionaryEntriesNative(long dict, 205 LanguageModelParam[] languageModelParams, int startIndex); getPropertyNative(long dict, String query)206 private static native String getPropertyNative(long dict, String query); isCorruptedNative(long dict)207 private static native boolean isCorruptedNative(long dict); migrateNative(long dict, String dictFilePath, long newFormatVersion)208 private static native boolean migrateNative(long dict, String dictFilePath, 209 long newFormatVersion); 210 211 // TODO: Move native dict into session loadDictionary(final String path, final long startOffset, final long length, final boolean isUpdatable)212 private final void loadDictionary(final String path, final long startOffset, 213 final long length, final boolean isUpdatable) { 214 mHasUpdated = false; 215 mNativeDict = openNative(path, startOffset, length, isUpdatable); 216 } 217 218 // TODO: Check isCorrupted() for main dictionaries. isCorrupted()219 public boolean isCorrupted() { 220 if (!isValidDictionary()) { 221 return false; 222 } 223 if (!isCorruptedNative(mNativeDict)) { 224 return false; 225 } 226 // TODO: Record the corruption. 227 Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted."); 228 Log.e(TAG, "locale: " + mLocale); 229 Log.e(TAG, "dict size: " + mDictSize); 230 Log.e(TAG, "updatable: " + mIsUpdatable); 231 return true; 232 } 233 getHeader()234 public DictionaryHeader getHeader() throws UnsupportedFormatException { 235 if (mNativeDict == 0) { 236 return null; 237 } 238 final int[] outHeaderSize = new int[1]; 239 final int[] outFormatVersion = new int[1]; 240 final ArrayList<int[]> outAttributeKeys = new ArrayList<>(); 241 final ArrayList<int[]> outAttributeValues = new ArrayList<>(); 242 getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys, 243 outAttributeValues); 244 final HashMap<String, String> attributes = new HashMap<>(); 245 for (int i = 0; i < outAttributeKeys.size(); i++) { 246 final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray( 247 outAttributeKeys.get(i)); 248 final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray( 249 outAttributeValues.get(i)); 250 attributes.put(attributeKey, attributeValue); 251 } 252 final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals( 253 attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY)); 254 return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes), 255 new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo)); 256 } 257 258 @Override getSuggestions(final WordComposer composer, final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId, final float[] inOutLanguageWeight)259 public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, 260 final PrevWordsInfo prevWordsInfo, final ProximityInfo proximityInfo, 261 final SettingsValuesForSuggestion settingsValuesForSuggestion, 262 final int sessionId, final float[] inOutLanguageWeight) { 263 if (!isValidDictionary()) { 264 return null; 265 } 266 final DicTraverseSession session = getTraverseSession(sessionId); 267 Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE); 268 prevWordsInfo.outputToArray(session.mPrevWordCodePointArrays, 269 session.mIsBeginningOfSentenceArray); 270 final InputPointers inputPointers = composer.getInputPointers(); 271 final boolean isGesture = composer.isBatchMode(); 272 final int inputSize; 273 if (!isGesture) { 274 inputSize = composer.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount( 275 session.mInputCodePoints); 276 if (inputSize < 0) { 277 return null; 278 } 279 } else { 280 inputSize = inputPointers.getPointerSize(); 281 } 282 session.mNativeSuggestOptions.setUseFullEditDistance(mUseFullEditDistance); 283 session.mNativeSuggestOptions.setIsGesture(isGesture); 284 session.mNativeSuggestOptions.setBlockOffensiveWords( 285 settingsValuesForSuggestion.mBlockPotentiallyOffensive); 286 session.mNativeSuggestOptions.setSpaceAwareGestureEnabled( 287 settingsValuesForSuggestion.mSpaceAwareGestureEnabled); 288 session.mNativeSuggestOptions.setAdditionalFeaturesOptions( 289 settingsValuesForSuggestion.mAdditionalFeaturesSettingValues); 290 if (inOutLanguageWeight != null) { 291 session.mInputOutputLanguageWeight[0] = inOutLanguageWeight[0]; 292 } else { 293 session.mInputOutputLanguageWeight[0] = Dictionary.NOT_A_LANGUAGE_WEIGHT; 294 } 295 // TOOD: Pass multiple previous words information for n-gram. 296 getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), 297 getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(), 298 inputPointers.getYCoordinates(), inputPointers.getTimes(), 299 inputPointers.getPointerIds(), session.mInputCodePoints, inputSize, 300 session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays, 301 session.mIsBeginningOfSentenceArray, session.mOutputSuggestionCount, 302 session.mOutputCodePoints, session.mOutputScores, session.mSpaceIndices, 303 session.mOutputTypes, session.mOutputAutoCommitFirstWordConfidence, 304 session.mInputOutputLanguageWeight); 305 if (inOutLanguageWeight != null) { 306 inOutLanguageWeight[0] = session.mInputOutputLanguageWeight[0]; 307 } 308 final int count = session.mOutputSuggestionCount[0]; 309 final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>(); 310 for (int j = 0; j < count; ++j) { 311 final int start = j * Constants.DICTIONARY_MAX_WORD_LENGTH; 312 int len = 0; 313 while (len < Constants.DICTIONARY_MAX_WORD_LENGTH 314 && session.mOutputCodePoints[start + len] != 0) { 315 ++len; 316 } 317 if (len > 0) { 318 suggestions.add(new SuggestedWordInfo( 319 new String(session.mOutputCodePoints, start, len), 320 session.mOutputScores[j], session.mOutputTypes[j], this /* sourceDict */, 321 session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */, 322 session.mOutputAutoCommitFirstWordConfidence[0])); 323 } 324 } 325 return suggestions; 326 } 327 isValidDictionary()328 public boolean isValidDictionary() { 329 return mNativeDict != 0; 330 } 331 getFormatVersion()332 public int getFormatVersion() { 333 return getFormatVersionNative(mNativeDict); 334 } 335 336 @Override isInDictionary(final String word)337 public boolean isInDictionary(final String word) { 338 return getFrequency(word) != NOT_A_PROBABILITY; 339 } 340 341 @Override getFrequency(final String word)342 public int getFrequency(final String word) { 343 if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY; 344 int[] codePoints = StringUtils.toCodePointArray(word); 345 return getProbabilityNative(mNativeDict, codePoints); 346 } 347 348 @Override getMaxFrequencyOfExactMatches(final String word)349 public int getMaxFrequencyOfExactMatches(final String word) { 350 if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY; 351 int[] codePoints = StringUtils.toCodePointArray(word); 352 return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints); 353 } 354 355 @UsedForTesting isValidNgram(final PrevWordsInfo prevWordsInfo, final String word)356 public boolean isValidNgram(final PrevWordsInfo prevWordsInfo, final String word) { 357 return getNgramProbability(prevWordsInfo, word) != NOT_A_PROBABILITY; 358 } 359 getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word)360 public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) { 361 if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { 362 return NOT_A_PROBABILITY; 363 } 364 final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; 365 final boolean[] isBeginningOfSentenceArray = 366 new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 367 prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); 368 final int[] wordCodePoints = StringUtils.toCodePointArray(word); 369 return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays, 370 isBeginningOfSentenceArray, wordCodePoints); 371 } 372 getWordProperty(final String word, final boolean isBeginningOfSentence)373 public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) { 374 if (word == null) { 375 return null; 376 } 377 final int[] codePoints = StringUtils.toCodePointArray(word); 378 final int[] outCodePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH]; 379 final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT]; 380 final int[] outProbabilityInfo = 381 new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT]; 382 final ArrayList<int[]> outBigramTargets = new ArrayList<>(); 383 final ArrayList<int[]> outBigramProbabilityInfo = new ArrayList<>(); 384 final ArrayList<int[]> outShortcutTargets = new ArrayList<>(); 385 final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>(); 386 getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints, 387 outFlags, outProbabilityInfo, outBigramTargets, outBigramProbabilityInfo, 388 outShortcutTargets, outShortcutProbabilities); 389 return new WordProperty(codePoints, 390 outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], 391 outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX], 392 outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX], 393 outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], 394 outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo, 395 outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, 396 outShortcutProbabilities); 397 } 398 399 public static class GetNextWordPropertyResult { 400 public WordProperty mWordProperty; 401 public int mNextToken; 402 GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken)403 public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) { 404 mWordProperty = wordProperty; 405 mNextToken = nextToken; 406 } 407 } 408 409 /** 410 * Method to iterate all words in the dictionary for makedict. 411 * If token is 0, this method newly starts iterating the dictionary. 412 */ getNextWordProperty(final int token)413 public GetNextWordPropertyResult getNextWordProperty(final int token) { 414 final int[] codePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH]; 415 final boolean[] isBeginningOfSentence = new boolean[1]; 416 final int nextToken = getNextWordNative(mNativeDict, token, codePoints, 417 isBeginningOfSentence); 418 final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); 419 return new GetNextWordPropertyResult( 420 getWordProperty(word, isBeginningOfSentence[0]), nextToken); 421 } 422 423 // Add a unigram entry to binary dictionary with unigram attributes in native code. addUnigramEntry(final String word, final int probability, final String shortcutTarget, final int shortcutProbability, final boolean isBeginningOfSentence, final boolean isNotAWord, final boolean isBlacklisted, final int timestamp)424 public boolean addUnigramEntry(final String word, final int probability, 425 final String shortcutTarget, final int shortcutProbability, 426 final boolean isBeginningOfSentence, final boolean isNotAWord, 427 final boolean isBlacklisted, final int timestamp) { 428 if (word == null || (word.isEmpty() && !isBeginningOfSentence)) { 429 return false; 430 } 431 final int[] codePoints = StringUtils.toCodePointArray(word); 432 final int[] shortcutTargetCodePoints = (shortcutTarget != null) ? 433 StringUtils.toCodePointArray(shortcutTarget) : null; 434 if (!addUnigramEntryNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints, 435 shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp)) { 436 return false; 437 } 438 mHasUpdated = true; 439 return true; 440 } 441 442 // Remove a unigram entry from the binary dictionary in native code. removeUnigramEntry(final String word)443 public boolean removeUnigramEntry(final String word) { 444 if (TextUtils.isEmpty(word)) { 445 return false; 446 } 447 final int[] codePoints = StringUtils.toCodePointArray(word); 448 if (!removeUnigramEntryNative(mNativeDict, codePoints)) { 449 return false; 450 } 451 mHasUpdated = true; 452 return true; 453 } 454 455 // Add an n-gram entry to the binary dictionary with timestamp in native code. addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word, final int probability, final int timestamp)456 public boolean addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word, 457 final int probability, final int timestamp) { 458 if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { 459 return false; 460 } 461 final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; 462 final boolean[] isBeginningOfSentenceArray = 463 new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 464 prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); 465 final int[] wordCodePoints = StringUtils.toCodePointArray(word); 466 if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays, 467 isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) { 468 return false; 469 } 470 mHasUpdated = true; 471 return true; 472 } 473 474 // Remove an n-gram entry from the binary dictionary in native code. removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word)475 public boolean removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) { 476 if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) { 477 return false; 478 } 479 final int[][] prevWordCodePointArrays = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][]; 480 final boolean[] isBeginningOfSentenceArray = 481 new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 482 prevWordsInfo.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); 483 final int[] wordCodePoints = StringUtils.toCodePointArray(word); 484 if (!removeNgramEntryNative(mNativeDict, prevWordCodePointArrays, 485 isBeginningOfSentenceArray, wordCodePoints)) { 486 return false; 487 } 488 mHasUpdated = true; 489 return true; 490 } 491 addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams)492 public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) { 493 if (!isValidDictionary()) return; 494 int processedParamCount = 0; 495 while (processedParamCount < languageModelParams.length) { 496 if (needsToRunGC(true /* mindsBlockByGC */)) { 497 flushWithGC(); 498 } 499 processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict, 500 languageModelParams, processedParamCount); 501 mHasUpdated = true; 502 if (processedParamCount <= 0) { 503 return; 504 } 505 } 506 } 507 reopen()508 private void reopen() { 509 close(); 510 final File dictFile = new File(mDictFilePath); 511 // WARNING: Because we pass 0 as the offset and file.length() as the length, this can 512 // only be called for actual files. Right now it's only called by the flush() family of 513 // functions, which require an updatable dictionary, so it's okay. But beware. 514 loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, 515 dictFile.length(), mIsUpdatable); 516 } 517 518 // Flush to dict file if the dictionary has been updated. flush()519 public boolean flush() { 520 if (!isValidDictionary()) return false; 521 if (mHasUpdated) { 522 if (!flushNative(mNativeDict, mDictFilePath)) { 523 return false; 524 } 525 reopen(); 526 } 527 return true; 528 } 529 530 // Run GC and flush to dict file if the dictionary has been updated. flushWithGCIfHasUpdated()531 public boolean flushWithGCIfHasUpdated() { 532 if (mHasUpdated) { 533 return flushWithGC(); 534 } 535 return true; 536 } 537 538 // Run GC and flush to dict file. flushWithGC()539 public boolean flushWithGC() { 540 if (!isValidDictionary()) return false; 541 if (!flushWithGCNative(mNativeDict, mDictFilePath)) { 542 return false; 543 } 544 reopen(); 545 return true; 546 } 547 548 /** 549 * Checks whether GC is needed to run or not. 550 * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about 551 * the blocking in some situations such as in idle time or just before closing. 552 * @return whether GC is needed to run or not. 553 */ needsToRunGC(final boolean mindsBlockByGC)554 public boolean needsToRunGC(final boolean mindsBlockByGC) { 555 if (!isValidDictionary()) return false; 556 return needsToRunGCNative(mNativeDict, mindsBlockByGC); 557 } 558 migrateTo(final int newFormatVersion)559 public boolean migrateTo(final int newFormatVersion) { 560 if (!isValidDictionary()) { 561 return false; 562 } 563 final File isMigratingDir = 564 new File(mDictFilePath + DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION); 565 if (isMigratingDir.exists()) { 566 isMigratingDir.delete(); 567 Log.e(TAG, "Previous migration attempt failed probably due to a crash. " 568 + "Giving up using the old dictionary (" + mDictFilePath + ")."); 569 return false; 570 } 571 if (!isMigratingDir.mkdir()) { 572 Log.e(TAG, "Cannot create a dir (" + isMigratingDir.getAbsolutePath() 573 + ") to record migration."); 574 return false; 575 } 576 try { 577 final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION; 578 if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) { 579 return false; 580 } 581 close(); 582 final File dictFile = new File(mDictFilePath); 583 final File tmpDictFile = new File(tmpDictFilePath); 584 if (!FileUtils.deleteRecursively(dictFile)) { 585 return false; 586 } 587 if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) { 588 return false; 589 } 590 loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, 591 dictFile.length(), mIsUpdatable); 592 return true; 593 } finally { 594 isMigratingDir.delete(); 595 } 596 } 597 598 @UsedForTesting getPropertyForTest(final String query)599 public String getPropertyForTest(final String query) { 600 if (!isValidDictionary()) return ""; 601 return getPropertyNative(mNativeDict, query); 602 } 603 604 @Override shouldAutoCommit(final SuggestedWordInfo candidate)605 public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { 606 return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT; 607 } 608 609 @Override close()610 public void close() { 611 synchronized (mDicTraverseSessions) { 612 final int sessionsSize = mDicTraverseSessions.size(); 613 for (int index = 0; index < sessionsSize; ++index) { 614 final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index); 615 if (traverseSession != null) { 616 traverseSession.close(); 617 } 618 } 619 mDicTraverseSessions.clear(); 620 } 621 closeInternalLocked(); 622 } 623 closeInternalLocked()624 private synchronized void closeInternalLocked() { 625 if (mNativeDict != 0) { 626 closeNative(mNativeDict); 627 mNativeDict = 0; 628 } 629 } 630 631 // TODO: Manage BinaryDictionary instances without using WeakReference or something. 632 @Override finalize()633 protected void finalize() throws Throwable { 634 try { 635 closeInternalLocked(); 636 } finally { 637 super.finalize(); 638 } 639 } 640 } 641