1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin.spellcheck;
18 
19 import android.content.ContentResolver;
20 import android.database.ContentObserver;
21 import android.os.Binder;
22 import android.provider.UserDictionary.Words;
23 import android.service.textservice.SpellCheckerService.Session;
24 import android.text.TextUtils;
25 import android.util.Log;
26 import android.util.LruCache;
27 import android.view.textservice.SuggestionsInfo;
28 import android.view.textservice.TextInfo;
29 
30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
31 import com.android.inputmethod.keyboard.Keyboard;
32 import com.android.inputmethod.keyboard.ProximityInfo;
33 import com.android.inputmethod.latin.Constants;
34 import com.android.inputmethod.latin.PrevWordsInfo;
35 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
36 import com.android.inputmethod.latin.WordComposer;
37 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
38 import com.android.inputmethod.latin.utils.CoordinateUtils;
39 import com.android.inputmethod.latin.utils.LocaleUtils;
40 import com.android.inputmethod.latin.utils.ScriptUtils;
41 import com.android.inputmethod.latin.utils.StringUtils;
42 import com.android.inputmethod.latin.utils.SuggestionResults;
43 
44 import java.util.ArrayList;
45 import java.util.Locale;
46 
47 public abstract class AndroidWordLevelSpellCheckerSession extends Session {
48     private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName();
49     private static final boolean DBG = false;
50 
51     public final static String[] EMPTY_STRING_ARRAY = new String[0];
52 
53     // Immutable, but not available in the constructor.
54     private Locale mLocale;
55     // Cache this for performance
56     private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
57     private final AndroidSpellCheckerService mService;
58     protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
59     private final ContentObserver mObserver;
60 
61     private static final class SuggestionsParams {
62         public final String[] mSuggestions;
63         public final int mFlags;
SuggestionsParams(String[] suggestions, int flags)64         public SuggestionsParams(String[] suggestions, int flags) {
65             mSuggestions = suggestions;
66             mFlags = flags;
67         }
68     }
69 
70     protected static final class SuggestionsCache {
71         private static final char CHAR_DELIMITER = '\uFFFC';
72         private static final int MAX_CACHE_SIZE = 50;
73         private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
74                 new LruCache<>(MAX_CACHE_SIZE);
75 
76         // TODO: Support n-gram input
generateKey(final String query, final PrevWordsInfo prevWordsInfo)77         private static String generateKey(final String query, final PrevWordsInfo prevWordsInfo) {
78             if (TextUtils.isEmpty(query) || !prevWordsInfo.isValid()) {
79                 return query;
80             }
81             return query + CHAR_DELIMITER + prevWordsInfo;
82         }
83 
getSuggestionsFromCache(String query, final PrevWordsInfo prevWordsInfo)84         public SuggestionsParams getSuggestionsFromCache(String query,
85                 final PrevWordsInfo prevWordsInfo) {
86             return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWordsInfo));
87         }
88 
putSuggestionsToCache( final String query, final PrevWordsInfo prevWordsInfo, final String[] suggestions, final int flags)89         public void putSuggestionsToCache(
90                 final String query, final PrevWordsInfo prevWordsInfo,
91                 final String[] suggestions, final int flags) {
92             if (suggestions == null || TextUtils.isEmpty(query)) {
93                 return;
94             }
95             mUnigramSuggestionsInfoCache.put(
96                     generateKey(query, prevWordsInfo), new SuggestionsParams(suggestions, flags));
97         }
98 
clearCache()99         public void clearCache() {
100             mUnigramSuggestionsInfoCache.evictAll();
101         }
102     }
103 
AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service)104     AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) {
105         mService = service;
106         final ContentResolver cres = service.getContentResolver();
107 
108         mObserver = new ContentObserver(null) {
109             @Override
110             public void onChange(boolean self) {
111                 mSuggestionsCache.clearCache();
112             }
113         };
114         cres.registerContentObserver(Words.CONTENT_URI, true, mObserver);
115     }
116 
117     @Override
onCreate()118     public void onCreate() {
119         final String localeString = getLocale();
120         mLocale = LocaleUtils.constructLocaleFromString(localeString);
121         mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale);
122     }
123 
124     @Override
onClose()125     public void onClose() {
126         final ContentResolver cres = mService.getContentResolver();
127         cres.unregisterContentObserver(mObserver);
128     }
129 
130     private static final int CHECKABILITY_CHECKABLE = 0;
131     private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1;
132     private static final int CHECKABILITY_CONTAINS_PERIOD = 2;
133     private static final int CHECKABILITY_EMAIL_OR_URL = 3;
134     private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4;
135     private static final int CHECKABILITY_TOO_SHORT = 5;
136     /**
137      * Finds out whether a particular string should be filtered out of spell checking.
138      *
139      * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
140      * we know we will never recognize, this accepts a script identifier that should be one
141      * of the SCRIPT_* constants defined above, to rule out quickly characters from very
142      * different languages.
143      *
144      * @param text the string to evaluate.
145      * @param script the identifier for the script this spell checker recognizes
146      * @return one of the FILTER_OUT_* constants above.
147      */
getCheckabilityInScript(final String text, final int script)148     private static int getCheckabilityInScript(final String text, final int script) {
149         if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT;
150 
151         // TODO: check if an equivalent processing can't be done more quickly with a
152         // compiled regexp.
153         // Filter by first letter
154         final int firstCodePoint = text.codePointAt(0);
155         // Filter out words that don't start with a letter or an apostrophe
156         if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script)
157                 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
158 
159         // Filter contents
160         final int length = text.length();
161         int letterCount = 0;
162         for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
163             final int codePoint = text.codePointAt(i);
164             // Any word containing a COMMERCIAL_AT is probably an e-mail address
165             // Any word containing a SLASH is probably either an ad-hoc combination of two
166             // words or a URI - in either case we don't want to spell check that
167             if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) {
168                 return CHECKABILITY_EMAIL_OR_URL;
169             }
170             // If the string contains a period, native returns strange suggestions (it seems
171             // to return suggestions for everything up to the period only and to ignore the
172             // rest), so we suppress lookup if there is a period.
173             // TODO: investigate why native returns these suggestions and remove this code.
174             if (Constants.CODE_PERIOD == codePoint) {
175                 return CHECKABILITY_CONTAINS_PERIOD;
176             }
177             if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount;
178         }
179         // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
180         // in this word are letters
181         return (letterCount * 4 < length * 3)
182                 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE;
183     }
184 
185     /**
186      * Helper method to test valid capitalizations of a word.
187      *
188      * If the "text" is lower-case, we test only the exact string.
189      * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased
190      *  version of it "text".
191      * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased
192      *  version of it "text" and the capitalized version of it "Text".
193      */
isInDictForAnyCapitalization(final String text, final int capitalizeType)194     private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) {
195         // If the word is in there as is, then it's in the dictionary. If not, we'll test lower
196         // case versions, but only if the word is not already all-lower case or mixed case.
197         if (mService.isValidWord(mLocale, text)) return true;
198         if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false;
199 
200         // If we come here, we have a capitalized word (either First- or All-).
201         // Downcase the word and look it up again. If the word is only capitalized, we
202         // tested all possibilities, so if it's still negative we can return false.
203         final String lowerCaseText = text.toLowerCase(mLocale);
204         if (mService.isValidWord(mLocale, lowerCaseText)) return true;
205         if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false;
206 
207         // If the lower case version is not in the dictionary, it's still possible
208         // that we have an all-caps version of a word that needs to be capitalized
209         // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans".
210         return mService.isValidWord(mLocale,
211                 StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale));
212     }
213 
214     // Note : this must be reentrant
215     /**
216      * Gets a list of suggestions for a specific string. This returns a list of possible
217      * corrections for the text passed as an argument. It may split or group words, and
218      * even perform grammatical analysis.
219      */
onGetSuggestionsInternal(final TextInfo textInfo, final int suggestionsLimit)220     private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo,
221             final int suggestionsLimit) {
222         return onGetSuggestionsInternal(textInfo, null, suggestionsLimit);
223     }
224 
onGetSuggestionsInternal( final TextInfo textInfo, final PrevWordsInfo prevWordsInfo, final int suggestionsLimit)225     protected SuggestionsInfo onGetSuggestionsInternal(
226             final TextInfo textInfo, final PrevWordsInfo prevWordsInfo,
227             final int suggestionsLimit) {
228         try {
229             final String inText = textInfo.getText();
230             final SuggestionsParams cachedSuggestionsParams =
231                     mSuggestionsCache.getSuggestionsFromCache(inText, prevWordsInfo);
232             if (cachedSuggestionsParams != null) {
233                 if (DBG) {
234                     Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags);
235                 }
236                 return new SuggestionsInfo(
237                         cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions);
238             }
239             final int checkability = getCheckabilityInScript(inText, mScript);
240             if (CHECKABILITY_CHECKABLE != checkability) {
241                 if (CHECKABILITY_CONTAINS_PERIOD == checkability) {
242                     final String[] splitText = inText.split(Constants.REGEXP_PERIOD);
243                     boolean allWordsAreValid = true;
244                     for (final String word : splitText) {
245                         if (!mService.isValidWord(mLocale, word)) {
246                             allWordsAreValid = false;
247                             break;
248                         }
249                     }
250                     if (allWordsAreValid) {
251                         return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
252                                 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS,
253                                 new String[] {
254                                         TextUtils.join(Constants.STRING_SPACE, splitText) });
255                     }
256                 }
257                 return mService.isValidWord(mLocale, inText) ?
258                         AndroidSpellCheckerService.getInDictEmptySuggestions() :
259                         AndroidSpellCheckerService.getNotInDictEmptySuggestions(
260                                 CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */);
261             }
262             final String text = inText.replaceAll(
263                     AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE);
264             final int capitalizeType = StringUtils.getCapitalizationType(text);
265             boolean isInDict = true;
266             if (!mService.hasMainDictionaryForLocale(mLocale)) {
267                 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
268                         false /* reportAsTypo */);
269             }
270             final Keyboard keyboard = mService.getKeyboardForLocale(mLocale);
271             final WordComposer composer = new WordComposer();
272             final int[] codePoints = StringUtils.toCodePointArray(text);
273             final int[] coordinates;
274             final ProximityInfo proximityInfo;
275             if (null == keyboard) {
276                 coordinates = CoordinateUtils.newCoordinateArray(codePoints.length,
277                         Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE);
278                 proximityInfo = null;
279             } else {
280                 coordinates = keyboard.getCoordinates(codePoints);
281                 proximityInfo = keyboard.getProximityInfo();
282             }
283             composer.setComposingWord(codePoints, coordinates);
284             // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
285             final SuggestionResults suggestionResults = mService.getSuggestionResults(
286                     mLocale, composer, prevWordsInfo, proximityInfo);
287             final Result result = getResult(capitalizeType, mLocale, suggestionsLimit,
288                     mService.getRecommendedThreshold(), text, suggestionResults);
289             isInDict = isInDictForAnyCapitalization(text, capitalizeType);
290             if (DBG) {
291                 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
292                         + suggestionsLimit);
293                 Log.i(TAG, "IsInDict = " + isInDict);
294                 Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
295                 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions);
296                 if (null != result.mSuggestions) {
297                     for (String suggestion : result.mSuggestions) {
298                         Log.i(TAG, suggestion);
299                     }
300                 }
301             }
302 
303             final int flags =
304                     (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
305                             : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO)
306                     | (result.mHasRecommendedSuggestions
307                             ? SuggestionsInfoCompatUtils
308                                     .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
309                             : 0);
310             final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
311             mSuggestionsCache.putSuggestionsToCache(text, prevWordsInfo, result.mSuggestions,
312                     flags);
313             return retval;
314         } catch (RuntimeException e) {
315             // Don't kill the keyboard if there is a bug in the spell checker
316             if (DBG) {
317                 throw e;
318             } else {
319                 Log.e(TAG, "Exception while spellcheking", e);
320                 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
321                         false /* reportAsTypo */);
322             }
323         }
324     }
325 
326     private static final class Result {
327         public final String[] mSuggestions;
328         public final boolean mHasRecommendedSuggestions;
Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions)329         public Result(final String[] gatheredSuggestions,
330                 final boolean hasRecommendedSuggestions) {
331             mSuggestions = gatheredSuggestions;
332             mHasRecommendedSuggestions = hasRecommendedSuggestions;
333         }
334     }
335 
getResult(final int capitalizeType, final Locale locale, final int suggestionsLimit, final float recommendedThreshold, final String originalText, final SuggestionResults suggestionResults)336     private static Result getResult(final int capitalizeType, final Locale locale,
337             final int suggestionsLimit, final float recommendedThreshold, final String originalText,
338             final SuggestionResults suggestionResults) {
339         if (suggestionResults.isEmpty() || suggestionsLimit <= 0) {
340             return new Result(null /* gatheredSuggestions */,
341                     false /* hasRecommendedSuggestions */);
342         }
343         if (DBG) {
344             for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) {
345                 Log.i(TAG, "" + suggestedWordInfo.mScore + " " + suggestedWordInfo.mWord);
346             }
347         }
348         final ArrayList<String> suggestions = new ArrayList<>();
349         for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) {
350             final String suggestion;
351             if (StringUtils.CAPITALIZE_ALL == capitalizeType) {
352                 suggestion = suggestedWordInfo.mWord.toUpperCase(locale);
353             } else if (StringUtils.CAPITALIZE_FIRST == capitalizeType) {
354                 suggestion = StringUtils.capitalizeFirstCodePoint(
355                         suggestedWordInfo.mWord, locale);
356             } else {
357                 suggestion = suggestedWordInfo.mWord;
358             }
359             suggestions.add(suggestion);
360         }
361         StringUtils.removeDupes(suggestions);
362         // This returns a String[], while toArray() returns an Object[] which cannot be cast
363         // into a String[].
364         final String[] gatheredSuggestions =
365                 suggestions.subList(0, Math.min(suggestions.size(), suggestionsLimit))
366                         .toArray(EMPTY_STRING_ARRAY);
367 
368         final int bestScore = suggestionResults.first().mScore;
369         final String bestSuggestion = suggestions.get(0);
370         final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
371                 originalText, bestSuggestion.toString(), bestScore);
372         final boolean hasRecommendedSuggestions = (normalizedScore > recommendedThreshold);
373         if (DBG) {
374             Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
375             Log.i(TAG, "Normalized score = " + normalizedScore
376                     + " (threshold " + recommendedThreshold
377                     + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions);
378         }
379         return new Result(gatheredSuggestions, hasRecommendedSuggestions);
380     }
381 
382     /*
383      * The spell checker acts on its own behalf. That is needed, in particular, to be able to
384      * access the dictionary files, which the provider restricts to the identity of Latin IME.
385      * Since it's called externally by the application, the spell checker is using the identity
386      * of the application by default unless we clearCallingIdentity.
387      * That's what the following method does.
388      */
389     @Override
onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit)390     public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
391             final int suggestionsLimit) {
392         long ident = Binder.clearCallingIdentity();
393         try {
394             return onGetSuggestionsInternal(textInfo, suggestionsLimit);
395         } finally {
396             Binder.restoreCallingIdentity(ident);
397         }
398     }
399 }
400