1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin;
18 
19 import android.text.TextUtils;
20 
21 import com.android.inputmethod.annotations.UsedForTesting;
22 import com.android.inputmethod.latin.common.StringUtils;
23 import com.android.inputmethod.latin.define.DecoderSpecificConstants;
24 
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 
28 import javax.annotation.Nonnull;
29 
30 /**
31  * Class to represent information of previous words. This class is used to add n-gram entries
32  * into binary dictionaries, to get predictions, and to get suggestions.
33  */
34 public class NgramContext {
35     @Nonnull
36     public static final NgramContext EMPTY_PREV_WORDS_INFO =
37             new NgramContext(WordInfo.EMPTY_WORD_INFO);
38     @Nonnull
39     public static final NgramContext BEGINNING_OF_SENTENCE =
40             new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
41 
42     public static final String BEGINNING_OF_SENTENCE_TAG = "<S>";
43 
44     public static final String CONTEXT_SEPARATOR = " ";
45 
getEmptyPrevWordsContext(int maxPrevWordCount)46     public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) {
47         return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO);
48     }
49 
50     /**
51      * Word information used to represent previous words information.
52      */
53     public static class WordInfo {
54         @Nonnull
55         public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
56         @Nonnull
57         public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo();
58 
59         // This is an empty char sequence when mIsBeginningOfSentence is true.
60         public final CharSequence mWord;
61         // TODO: Have sentence separator.
62         // Whether the current context is beginning of sentence or not. This is true when composing
63         // at the beginning of an input field or composing a word after a sentence separator.
64         public final boolean mIsBeginningOfSentence;
65 
66         // Beginning of sentence.
WordInfo()67         private WordInfo() {
68             mWord = "";
69             mIsBeginningOfSentence = true;
70         }
71 
WordInfo(final CharSequence word)72         public WordInfo(final CharSequence word) {
73             mWord = word;
74             mIsBeginningOfSentence = false;
75         }
76 
isValid()77         public boolean isValid() {
78             return mWord != null;
79         }
80 
81         @Override
hashCode()82         public int hashCode() {
83             return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
84         }
85 
86         @Override
equals(Object o)87         public boolean equals(Object o) {
88             if (this == o) return true;
89             if (!(o instanceof WordInfo)) return false;
90             final WordInfo wordInfo = (WordInfo)o;
91             if (mWord == null || wordInfo.mWord == null) {
92                 return mWord == wordInfo.mWord
93                         && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
94             }
95             return TextUtils.equals(mWord, wordInfo.mWord)
96                     && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
97         }
98     }
99 
100     // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
101     // have any context for that previous word including the "beginning of sentence context" - we
102     // just don't know what to predict using the information. An example of that is after a comma.
103     // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
104     // WordComposer was reset and before starting a new composing word, but we should never be
105     // calling getSuggetions* in this situation.
106     private final WordInfo[] mPrevWordsInfo;
107     private final int mPrevWordsCount;
108 
109     private final int mMaxPrevWordCount;
110 
111     // Construct from the previous word information.
NgramContext(final WordInfo... prevWordsInfo)112     public NgramContext(final WordInfo... prevWordsInfo) {
113         this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo);
114     }
115 
NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo)116     public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) {
117         mPrevWordsInfo = prevWordsInfo;
118         mPrevWordsCount = prevWordsInfo.length;
119         mMaxPrevWordCount = maxPrevWordCount;
120     }
121 
122     /**
123      * Create next prevWordsInfo using current prevWordsInfo.
124      */
125     @Nonnull
getNextNgramContext(final WordInfo wordInfo)126     public NgramContext getNextNgramContext(final WordInfo wordInfo) {
127         final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1);
128         final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
129         prevWordsInfo[0] = wordInfo;
130         System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
131         return new NgramContext(mMaxPrevWordCount, prevWordsInfo);
132     }
133 
134 
135     /**
136      * Extracts the previous words context.
137      *
138      * @return a String with the previous words separated by white space.
139      */
extractPrevWordsContext()140     public String extractPrevWordsContext() {
141         final ArrayList<String> terms = new ArrayList<>();
142         for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
143             if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
144                 final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
145                 if (wordInfo.mIsBeginningOfSentence) {
146                     terms.add(BEGINNING_OF_SENTENCE_TAG);
147                 } else {
148                     final String term = wordInfo.mWord.toString();
149                     if (!term.isEmpty()) {
150                         terms.add(term);
151                     }
152                 }
153             }
154         }
155         return TextUtils.join(CONTEXT_SEPARATOR, terms);
156     }
157 
158     /**
159      * Extracts the previous words context.
160      *
161      * @return a String array with the previous words.
162      */
extractPrevWordsContextArray()163     public String[] extractPrevWordsContextArray() {
164         final ArrayList<String> prevTermList = new ArrayList<>();
165         for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
166             if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
167                 final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
168                 if (wordInfo.mIsBeginningOfSentence) {
169                     prevTermList.add(BEGINNING_OF_SENTENCE_TAG);
170                 } else {
171                     final String term = wordInfo.mWord.toString();
172                     if (!term.isEmpty()) {
173                         prevTermList.add(term);
174                     }
175                 }
176             }
177         }
178         final String[] contextStringArray = prevTermList.toArray(new String[prevTermList.size()]);
179         return contextStringArray;
180     }
181 
isValid()182     public boolean isValid() {
183         return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid();
184     }
185 
isBeginningOfSentenceContext()186     public boolean isBeginningOfSentenceContext() {
187         return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence;
188     }
189 
190     // n is 1-indexed.
191     // TODO: Remove
getNthPrevWord(final int n)192     public CharSequence getNthPrevWord(final int n) {
193         if (n <= 0 || n > mPrevWordsCount) {
194             return null;
195         }
196         return mPrevWordsInfo[n - 1].mWord;
197     }
198 
199     // n is 1-indexed.
200     @UsedForTesting
isNthPrevWordBeginningOfSentence(final int n)201     public boolean isNthPrevWordBeginningOfSentence(final int n) {
202         if (n <= 0 || n > mPrevWordsCount) {
203             return false;
204         }
205         return mPrevWordsInfo[n - 1].mIsBeginningOfSentence;
206     }
207 
outputToArray(final int[][] codePointArrays, final boolean[] isBeginningOfSentenceArray)208     public void outputToArray(final int[][] codePointArrays,
209             final boolean[] isBeginningOfSentenceArray) {
210         for (int i = 0; i < mPrevWordsCount; i++) {
211             final WordInfo wordInfo = mPrevWordsInfo[i];
212             if (wordInfo == null || !wordInfo.isValid()) {
213                 codePointArrays[i] = new int[0];
214                 isBeginningOfSentenceArray[i] = false;
215                 continue;
216             }
217             codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
218             isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
219         }
220     }
221 
getPrevWordCount()222     public int getPrevWordCount() {
223         return mPrevWordsCount;
224     }
225 
226     @Override
hashCode()227     public int hashCode() {
228         int hashValue = 0;
229         for (final WordInfo wordInfo : mPrevWordsInfo) {
230             if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
231                 break;
232             }
233             hashValue ^= wordInfo.hashCode();
234         }
235         return hashValue;
236     }
237 
238     @Override
equals(Object o)239     public boolean equals(Object o) {
240         if (this == o) return true;
241         if (!(o instanceof NgramContext)) return false;
242         final NgramContext prevWordsInfo = (NgramContext)o;
243 
244         final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount);
245         for (int i = 0; i < minLength; i++) {
246             if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) {
247                 return false;
248             }
249         }
250         final WordInfo[] longerWordsInfo;
251         final int longerWordsInfoCount;
252         if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) {
253             longerWordsInfo = mPrevWordsInfo;
254             longerWordsInfoCount = mPrevWordsCount;
255         } else {
256             longerWordsInfo = prevWordsInfo.mPrevWordsInfo;
257             longerWordsInfoCount = prevWordsInfo.mPrevWordsCount;
258         }
259         for (int i = minLength; i < longerWordsInfoCount; i++) {
260             if (longerWordsInfo[i] != null
261                     && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) {
262                 return false;
263             }
264         }
265         return true;
266     }
267 
268     @Override
toString()269     public String toString() {
270         final StringBuffer builder = new StringBuffer();
271         for (int i = 0; i < mPrevWordsCount; i++) {
272             final WordInfo wordInfo = mPrevWordsInfo[i];
273             builder.append("PrevWord[");
274             builder.append(i);
275             builder.append("]: ");
276             if (wordInfo == null) {
277                 builder.append("null. ");
278                 continue;
279             }
280             if (!wordInfo.isValid()) {
281                 builder.append("Empty. ");
282                 continue;
283             }
284             builder.append(wordInfo.mWord);
285             builder.append(", isBeginningOfSentence: ");
286             builder.append(wordInfo.mIsBeginningOfSentence);
287             builder.append(". ");
288         }
289         return builder.toString();
290     }
291 }
292