1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import android.content.ContentResolver; 20 import android.database.ContentObserver; 21 import android.os.Binder; 22 import android.provider.UserDictionary.Words; 23 import android.service.textservice.SpellCheckerService.Session; 24 import android.text.TextUtils; 25 import android.util.Log; 26 import android.util.LruCache; 27 import android.view.textservice.SuggestionsInfo; 28 import android.view.textservice.TextInfo; 29 30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 31 import com.android.inputmethod.keyboard.Keyboard; 32 import com.android.inputmethod.keyboard.ProximityInfo; 33 import com.android.inputmethod.latin.Constants; 34 import com.android.inputmethod.latin.PrevWordsInfo; 35 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 36 import com.android.inputmethod.latin.WordComposer; 37 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 38 import com.android.inputmethod.latin.utils.CoordinateUtils; 39 import com.android.inputmethod.latin.utils.LocaleUtils; 40 import com.android.inputmethod.latin.utils.ScriptUtils; 41 import com.android.inputmethod.latin.utils.StringUtils; 42 import com.android.inputmethod.latin.utils.SuggestionResults; 43 44 import java.util.ArrayList; 45 import java.util.Locale; 46 47 public abstract class AndroidWordLevelSpellCheckerSession extends Session { 48 private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); 49 private static final boolean DBG = false; 50 51 public final static String[] EMPTY_STRING_ARRAY = new String[0]; 52 53 // Immutable, but not available in the constructor. 54 private Locale mLocale; 55 // Cache this for performance 56 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 57 private final AndroidSpellCheckerService mService; 58 protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 59 private final ContentObserver mObserver; 60 61 private static final class SuggestionsParams { 62 public final String[] mSuggestions; 63 public final int mFlags; SuggestionsParams(String[] suggestions, int flags)64 public SuggestionsParams(String[] suggestions, int flags) { 65 mSuggestions = suggestions; 66 mFlags = flags; 67 } 68 } 69 70 protected static final class SuggestionsCache { 71 private static final char CHAR_DELIMITER = '\uFFFC'; 72 private static final int MAX_CACHE_SIZE = 50; 73 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 74 new LruCache<>(MAX_CACHE_SIZE); 75 76 // TODO: Support n-gram input generateKey(final String query, final PrevWordsInfo prevWordsInfo)77 private static String generateKey(final String query, final PrevWordsInfo prevWordsInfo) { 78 if (TextUtils.isEmpty(query) || !prevWordsInfo.isValid()) { 79 return query; 80 } 81 return query + CHAR_DELIMITER + prevWordsInfo; 82 } 83 getSuggestionsFromCache(String query, final PrevWordsInfo prevWordsInfo)84 public SuggestionsParams getSuggestionsFromCache(String query, 85 final PrevWordsInfo prevWordsInfo) { 86 return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWordsInfo)); 87 } 88 putSuggestionsToCache( final String query, final PrevWordsInfo prevWordsInfo, final String[] suggestions, final int flags)89 public void putSuggestionsToCache( 90 final String query, final PrevWordsInfo prevWordsInfo, 91 final String[] suggestions, final int flags) { 92 if (suggestions == null || TextUtils.isEmpty(query)) { 93 return; 94 } 95 mUnigramSuggestionsInfoCache.put( 96 generateKey(query, prevWordsInfo), new SuggestionsParams(suggestions, flags)); 97 } 98 clearCache()99 public void clearCache() { 100 mUnigramSuggestionsInfoCache.evictAll(); 101 } 102 } 103 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service)104 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { 105 mService = service; 106 final ContentResolver cres = service.getContentResolver(); 107 108 mObserver = new ContentObserver(null) { 109 @Override 110 public void onChange(boolean self) { 111 mSuggestionsCache.clearCache(); 112 } 113 }; 114 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 115 } 116 117 @Override onCreate()118 public void onCreate() { 119 final String localeString = getLocale(); 120 mLocale = LocaleUtils.constructLocaleFromString(localeString); 121 mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale); 122 } 123 124 @Override onClose()125 public void onClose() { 126 final ContentResolver cres = mService.getContentResolver(); 127 cres.unregisterContentObserver(mObserver); 128 } 129 130 private static final int CHECKABILITY_CHECKABLE = 0; 131 private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1; 132 private static final int CHECKABILITY_CONTAINS_PERIOD = 2; 133 private static final int CHECKABILITY_EMAIL_OR_URL = 3; 134 private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4; 135 private static final int CHECKABILITY_TOO_SHORT = 5; 136 /** 137 * Finds out whether a particular string should be filtered out of spell checking. 138 * 139 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 140 * we know we will never recognize, this accepts a script identifier that should be one 141 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 142 * different languages. 143 * 144 * @param text the string to evaluate. 145 * @param script the identifier for the script this spell checker recognizes 146 * @return one of the FILTER_OUT_* constants above. 147 */ getCheckabilityInScript(final String text, final int script)148 private static int getCheckabilityInScript(final String text, final int script) { 149 if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT; 150 151 // TODO: check if an equivalent processing can't be done more quickly with a 152 // compiled regexp. 153 // Filter by first letter 154 final int firstCodePoint = text.codePointAt(0); 155 // Filter out words that don't start with a letter or an apostrophe 156 if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script) 157 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE; 158 159 // Filter contents 160 final int length = text.length(); 161 int letterCount = 0; 162 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 163 final int codePoint = text.codePointAt(i); 164 // Any word containing a COMMERCIAL_AT is probably an e-mail address 165 // Any word containing a SLASH is probably either an ad-hoc combination of two 166 // words or a URI - in either case we don't want to spell check that 167 if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) { 168 return CHECKABILITY_EMAIL_OR_URL; 169 } 170 // If the string contains a period, native returns strange suggestions (it seems 171 // to return suggestions for everything up to the period only and to ignore the 172 // rest), so we suppress lookup if there is a period. 173 // TODO: investigate why native returns these suggestions and remove this code. 174 if (Constants.CODE_PERIOD == codePoint) { 175 return CHECKABILITY_CONTAINS_PERIOD; 176 } 177 if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount; 178 } 179 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 180 // in this word are letters 181 return (letterCount * 4 < length * 3) 182 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE; 183 } 184 185 /** 186 * Helper method to test valid capitalizations of a word. 187 * 188 * If the "text" is lower-case, we test only the exact string. 189 * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased 190 * version of it "text". 191 * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased 192 * version of it "text" and the capitalized version of it "Text". 193 */ isInDictForAnyCapitalization(final String text, final int capitalizeType)194 private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) { 195 // If the word is in there as is, then it's in the dictionary. If not, we'll test lower 196 // case versions, but only if the word is not already all-lower case or mixed case. 197 if (mService.isValidWord(mLocale, text)) return true; 198 if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false; 199 200 // If we come here, we have a capitalized word (either First- or All-). 201 // Downcase the word and look it up again. If the word is only capitalized, we 202 // tested all possibilities, so if it's still negative we can return false. 203 final String lowerCaseText = text.toLowerCase(mLocale); 204 if (mService.isValidWord(mLocale, lowerCaseText)) return true; 205 if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false; 206 207 // If the lower case version is not in the dictionary, it's still possible 208 // that we have an all-caps version of a word that needs to be capitalized 209 // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans". 210 return mService.isValidWord(mLocale, 211 StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale)); 212 } 213 214 // Note : this must be reentrant 215 /** 216 * Gets a list of suggestions for a specific string. This returns a list of possible 217 * corrections for the text passed as an argument. It may split or group words, and 218 * even perform grammatical analysis. 219 */ onGetSuggestionsInternal(final TextInfo textInfo, final int suggestionsLimit)220 private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo, 221 final int suggestionsLimit) { 222 return onGetSuggestionsInternal(textInfo, null, suggestionsLimit); 223 } 224 onGetSuggestionsInternal( final TextInfo textInfo, final PrevWordsInfo prevWordsInfo, final int suggestionsLimit)225 protected SuggestionsInfo onGetSuggestionsInternal( 226 final TextInfo textInfo, final PrevWordsInfo prevWordsInfo, 227 final int suggestionsLimit) { 228 try { 229 final String inText = textInfo.getText(); 230 final SuggestionsParams cachedSuggestionsParams = 231 mSuggestionsCache.getSuggestionsFromCache(inText, prevWordsInfo); 232 if (cachedSuggestionsParams != null) { 233 if (DBG) { 234 Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); 235 } 236 return new SuggestionsInfo( 237 cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); 238 } 239 final int checkability = getCheckabilityInScript(inText, mScript); 240 if (CHECKABILITY_CHECKABLE != checkability) { 241 if (CHECKABILITY_CONTAINS_PERIOD == checkability) { 242 final String[] splitText = inText.split(Constants.REGEXP_PERIOD); 243 boolean allWordsAreValid = true; 244 for (final String word : splitText) { 245 if (!mService.isValidWord(mLocale, word)) { 246 allWordsAreValid = false; 247 break; 248 } 249 } 250 if (allWordsAreValid) { 251 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO 252 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS, 253 new String[] { 254 TextUtils.join(Constants.STRING_SPACE, splitText) }); 255 } 256 } 257 return mService.isValidWord(mLocale, inText) ? 258 AndroidSpellCheckerService.getInDictEmptySuggestions() : 259 AndroidSpellCheckerService.getNotInDictEmptySuggestions( 260 CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */); 261 } 262 final String text = inText.replaceAll( 263 AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); 264 final int capitalizeType = StringUtils.getCapitalizationType(text); 265 boolean isInDict = true; 266 if (!mService.hasMainDictionaryForLocale(mLocale)) { 267 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 268 false /* reportAsTypo */); 269 } 270 final Keyboard keyboard = mService.getKeyboardForLocale(mLocale); 271 final WordComposer composer = new WordComposer(); 272 final int[] codePoints = StringUtils.toCodePointArray(text); 273 final int[] coordinates; 274 final ProximityInfo proximityInfo; 275 if (null == keyboard) { 276 coordinates = CoordinateUtils.newCoordinateArray(codePoints.length, 277 Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE); 278 proximityInfo = null; 279 } else { 280 coordinates = keyboard.getCoordinates(codePoints); 281 proximityInfo = keyboard.getProximityInfo(); 282 } 283 composer.setComposingWord(codePoints, coordinates); 284 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 285 final SuggestionResults suggestionResults = mService.getSuggestionResults( 286 mLocale, composer, prevWordsInfo, proximityInfo); 287 final Result result = getResult(capitalizeType, mLocale, suggestionsLimit, 288 mService.getRecommendedThreshold(), text, suggestionResults); 289 isInDict = isInDictForAnyCapitalization(text, capitalizeType); 290 if (DBG) { 291 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 292 + suggestionsLimit); 293 Log.i(TAG, "IsInDict = " + isInDict); 294 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 295 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); 296 if (null != result.mSuggestions) { 297 for (String suggestion : result.mSuggestions) { 298 Log.i(TAG, suggestion); 299 } 300 } 301 } 302 303 final int flags = 304 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 305 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) 306 | (result.mHasRecommendedSuggestions 307 ? SuggestionsInfoCompatUtils 308 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 309 : 0); 310 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 311 mSuggestionsCache.putSuggestionsToCache(text, prevWordsInfo, result.mSuggestions, 312 flags); 313 return retval; 314 } catch (RuntimeException e) { 315 // Don't kill the keyboard if there is a bug in the spell checker 316 if (DBG) { 317 throw e; 318 } else { 319 Log.e(TAG, "Exception while spellcheking", e); 320 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 321 false /* reportAsTypo */); 322 } 323 } 324 } 325 326 private static final class Result { 327 public final String[] mSuggestions; 328 public final boolean mHasRecommendedSuggestions; Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions)329 public Result(final String[] gatheredSuggestions, 330 final boolean hasRecommendedSuggestions) { 331 mSuggestions = gatheredSuggestions; 332 mHasRecommendedSuggestions = hasRecommendedSuggestions; 333 } 334 } 335 getResult(final int capitalizeType, final Locale locale, final int suggestionsLimit, final float recommendedThreshold, final String originalText, final SuggestionResults suggestionResults)336 private static Result getResult(final int capitalizeType, final Locale locale, 337 final int suggestionsLimit, final float recommendedThreshold, final String originalText, 338 final SuggestionResults suggestionResults) { 339 if (suggestionResults.isEmpty() || suggestionsLimit <= 0) { 340 return new Result(null /* gatheredSuggestions */, 341 false /* hasRecommendedSuggestions */); 342 } 343 if (DBG) { 344 for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) { 345 Log.i(TAG, "" + suggestedWordInfo.mScore + " " + suggestedWordInfo.mWord); 346 } 347 } 348 final ArrayList<String> suggestions = new ArrayList<>(); 349 for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) { 350 final String suggestion; 351 if (StringUtils.CAPITALIZE_ALL == capitalizeType) { 352 suggestion = suggestedWordInfo.mWord.toUpperCase(locale); 353 } else if (StringUtils.CAPITALIZE_FIRST == capitalizeType) { 354 suggestion = StringUtils.capitalizeFirstCodePoint( 355 suggestedWordInfo.mWord, locale); 356 } else { 357 suggestion = suggestedWordInfo.mWord; 358 } 359 suggestions.add(suggestion); 360 } 361 StringUtils.removeDupes(suggestions); 362 // This returns a String[], while toArray() returns an Object[] which cannot be cast 363 // into a String[]. 364 final String[] gatheredSuggestions = 365 suggestions.subList(0, Math.min(suggestions.size(), suggestionsLimit)) 366 .toArray(EMPTY_STRING_ARRAY); 367 368 final int bestScore = suggestionResults.first().mScore; 369 final String bestSuggestion = suggestions.get(0); 370 final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( 371 originalText, bestSuggestion.toString(), bestScore); 372 final boolean hasRecommendedSuggestions = (normalizedScore > recommendedThreshold); 373 if (DBG) { 374 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 375 Log.i(TAG, "Normalized score = " + normalizedScore 376 + " (threshold " + recommendedThreshold 377 + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions); 378 } 379 return new Result(gatheredSuggestions, hasRecommendedSuggestions); 380 } 381 382 /* 383 * The spell checker acts on its own behalf. That is needed, in particular, to be able to 384 * access the dictionary files, which the provider restricts to the identity of Latin IME. 385 * Since it's called externally by the application, the spell checker is using the identity 386 * of the application by default unless we clearCallingIdentity. 387 * That's what the following method does. 388 */ 389 @Override onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit)390 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 391 final int suggestionsLimit) { 392 long ident = Binder.clearCallingIdentity(); 393 try { 394 return onGetSuggestionsInternal(textInfo, suggestionsLimit); 395 } finally { 396 Binder.restoreCallingIdentity(ident); 397 } 398 } 399 } 400