1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin.makedict;
18 
19 import com.android.inputmethod.annotations.UsedForTesting;
20 import com.android.inputmethod.latin.BinaryDictionary;
21 import com.android.inputmethod.latin.Dictionary;
22 import com.android.inputmethod.latin.NgramContext;
23 import com.android.inputmethod.latin.NgramContext.WordInfo;
24 import com.android.inputmethod.latin.common.StringUtils;
25 import com.android.inputmethod.latin.utils.CombinedFormatUtils;
26 
27 import java.util.ArrayList;
28 import java.util.Arrays;
29 
30 import javax.annotation.Nullable;
31 
32 /**
33  * Utility class for a word with a probability.
34  *
35  * This is chiefly used to iterate a dictionary.
36  */
37 public final class WordProperty implements Comparable<WordProperty> {
38     public final String mWord;
39     public final ProbabilityInfo mProbabilityInfo;
40     public final ArrayList<NgramProperty> mNgrams;
41     // TODO: Support mIsBeginningOfSentence.
42     public final boolean mIsBeginningOfSentence;
43     public final boolean mIsNotAWord;
44     public final boolean mIsPossiblyOffensive;
45     public final boolean mHasNgrams;
46 
47     private int mHashCode = 0;
48 
49     // TODO: Support n-gram.
50     @UsedForTesting
WordProperty(final String word, final ProbabilityInfo probabilityInfo, @Nullable final ArrayList<WeightedString> bigrams, final boolean isNotAWord, final boolean isPossiblyOffensive)51     public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
52             @Nullable final ArrayList<WeightedString> bigrams,
53             final boolean isNotAWord, final boolean isPossiblyOffensive) {
54         mWord = word;
55         mProbabilityInfo = probabilityInfo;
56         if (null == bigrams) {
57             mNgrams = null;
58         } else {
59             mNgrams = new ArrayList<>();
60             final NgramContext ngramContext = new NgramContext(new WordInfo(mWord));
61             for (final WeightedString bigramTarget : bigrams) {
62                 mNgrams.add(new NgramProperty(bigramTarget, ngramContext));
63             }
64         }
65         mIsBeginningOfSentence = false;
66         mIsNotAWord = isNotAWord;
67         mIsPossiblyOffensive = isPossiblyOffensive;
68         mHasNgrams = bigrams != null && !bigrams.isEmpty();
69     }
70 
createProbabilityInfoFromArray(final int[] probabilityInfo)71     private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
72       return new ProbabilityInfo(
73               probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
74               probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
75               probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
76               probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
77     }
78 
79     // Construct word property using information from native code.
80     // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
WordProperty(final int[] codePoints, final boolean isNotAWord, final boolean isPossiblyOffensive, final boolean hasBigram, final boolean isBeginningOfSentence, final int[] probabilityInfo, final ArrayList<int[][]> ngramPrevWordsArray, final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray, final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo)81     public WordProperty(final int[] codePoints, final boolean isNotAWord,
82             final boolean isPossiblyOffensive, final boolean hasBigram,
83             final boolean isBeginningOfSentence, final int[] probabilityInfo,
84             final ArrayList<int[][]> ngramPrevWordsArray,
85             final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray,
86             final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo) {
87         mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
88         mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
89         final ArrayList<NgramProperty> ngrams = new ArrayList<>();
90         mIsBeginningOfSentence = isBeginningOfSentence;
91         mIsNotAWord = isNotAWord;
92         mIsPossiblyOffensive = isPossiblyOffensive;
93         mHasNgrams = hasBigram;
94 
95         final int relatedNgramCount = ngramTargets.size();
96         for (int i = 0; i < relatedNgramCount; i++) {
97             final String ngramTargetString =
98                     StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i));
99             final WeightedString ngramTarget = new WeightedString(ngramTargetString,
100                     createProbabilityInfoFromArray(ngramProbabilityInfo.get(i)));
101             final int[][] prevWords = ngramPrevWordsArray.get(i);
102             final boolean[] isBeginningOfSentenceArray =
103                     ngramPrevWordIsBeginningOfSentenceArray.get(i);
104             final WordInfo[] wordInfoArray = new WordInfo[prevWords.length];
105             for (int j = 0; j < prevWords.length; j++) {
106                 wordInfoArray[j] = isBeginningOfSentenceArray[j]
107                         ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
108                         : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray(
109                                 prevWords[j]));
110             }
111             final NgramContext ngramContext = new NgramContext(wordInfoArray);
112             ngrams.add(new NgramProperty(ngramTarget, ngramContext));
113         }
114         mNgrams = ngrams.isEmpty() ? null : ngrams;
115     }
116 
117     // TODO: Remove
118     @UsedForTesting
getBigrams()119     public ArrayList<WeightedString> getBigrams() {
120         if (null == mNgrams) {
121             return null;
122         }
123         final ArrayList<WeightedString> bigrams = new ArrayList<>();
124         for (final NgramProperty ngram : mNgrams) {
125             if (ngram.mNgramContext.getPrevWordCount() == 1) {
126                 bigrams.add(ngram.mTargetWord);
127             }
128         }
129         return bigrams;
130     }
131 
getProbability()132     public int getProbability() {
133         return mProbabilityInfo.mProbability;
134     }
135 
computeHashCode(WordProperty word)136     private static int computeHashCode(WordProperty word) {
137         return Arrays.hashCode(new Object[] {
138                 word.mWord,
139                 word.mProbabilityInfo,
140                 word.mNgrams,
141                 word.mIsNotAWord,
142                 word.mIsPossiblyOffensive
143         });
144     }
145 
146     /**
147      * Three-way comparison.
148      *
149      * A Word x is greater than a word y if x has a higher frequency. If they have the same
150      * frequency, they are sorted in lexicographic order.
151      */
152     @Override
compareTo(final WordProperty w)153     public int compareTo(final WordProperty w) {
154         if (getProbability() < w.getProbability()) return 1;
155         if (getProbability() > w.getProbability()) return -1;
156         return mWord.compareTo(w.mWord);
157     }
158 
159     /**
160      * Equality test.
161      *
162      * Words are equal if they have the same frequency, the same spellings, and the same
163      * attributes.
164      */
165     @Override
equals(Object o)166     public boolean equals(Object o) {
167         if (o == this) return true;
168         if (!(o instanceof WordProperty)) return false;
169         WordProperty w = (WordProperty)o;
170         return mProbabilityInfo.equals(w.mProbabilityInfo)
171                 && mWord.equals(w.mWord) && equals(mNgrams, w.mNgrams)
172                 && mIsNotAWord == w.mIsNotAWord && mIsPossiblyOffensive == w.mIsPossiblyOffensive
173                 && mHasNgrams == w.mHasNgrams;
174     }
175 
176     // TDOO: Have a utility method like java.util.Objects.equals.
equals(final ArrayList<T> a, final ArrayList<T> b)177     private static <T> boolean equals(final ArrayList<T> a, final ArrayList<T> b) {
178         if (null == a) {
179             return null == b;
180         }
181         return a.equals(b);
182     }
183 
184     @Override
hashCode()185     public int hashCode() {
186         if (mHashCode == 0) {
187             mHashCode = computeHashCode(this);
188         }
189         return mHashCode;
190     }
191 
192     @UsedForTesting
isValid()193     public boolean isValid() {
194         return getProbability() != Dictionary.NOT_A_PROBABILITY;
195     }
196 
197     @Override
toString()198     public String toString() {
199         return CombinedFormatUtils.formatWordProperty(this);
200     }
201 }
202