1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin;
18 
19 import android.test.AndroidTestCase;
20 import android.test.suitebuilder.annotation.LargeTest;
21 import android.text.TextUtils;
22 import android.util.Pair;
23 
24 import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
25 import com.android.inputmethod.latin.makedict.CodePointUtils;
26 import com.android.inputmethod.latin.makedict.FormatSpec;
27 import com.android.inputmethod.latin.makedict.WeightedString;
28 import com.android.inputmethod.latin.makedict.WordProperty;
29 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
30 import com.android.inputmethod.latin.utils.FileUtils;
31 import com.android.inputmethod.latin.utils.LanguageModelParam;
32 
33 import java.io.File;
34 import java.io.IOException;
35 import java.util.ArrayList;
36 import java.util.HashMap;
37 import java.util.HashSet;
38 import java.util.Locale;
39 import java.util.Map;
40 import java.util.Random;
41 
42 // TODO Use the seed passed as an argument for makedict test.
43 @LargeTest
44 public class BinaryDictionaryTests extends AndroidTestCase {
45     private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
46     private static final String TEST_LOCALE = "test";
47     private static final int[] DICT_FORMAT_VERSIONS =
48             new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
49 
canCheckBigramProbability(final int formatVersion)50     private static boolean canCheckBigramProbability(final int formatVersion) {
51         return formatVersion > FormatSpec.VERSION401;
52     }
53 
supportsBeginningOfSentence(final int formatVersion)54     private static boolean supportsBeginningOfSentence(final int formatVersion) {
55         return formatVersion > FormatSpec.VERSION401;
56     }
57 
createEmptyDictionaryAndGetFile(final String dictId, final int formatVersion)58     private File createEmptyDictionaryAndGetFile(final String dictId,
59             final int formatVersion) throws IOException {
60         if (formatVersion == FormatSpec.VERSION4
61                 || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
62                 || formatVersion == FormatSpec.VERSION4_DEV) {
63             return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
64         } else {
65             throw new IOException("Dictionary format version " + formatVersion
66                     + " is not supported.");
67         }
68     }
69 
createEmptyVer4DictionaryAndGetFile(final String dictId, final int formatVersion)70     private File createEmptyVer4DictionaryAndGetFile(final String dictId,
71             final int formatVersion) throws IOException {
72         final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
73                 getContext().getCacheDir());
74         file.delete();
75         file.mkdir();
76         Map<String, String> attributeMap = new HashMap<>();
77         if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
78                 Locale.ENGLISH, attributeMap)) {
79             return file;
80         } else {
81             throw new IOException("Empty dictionary " + file.getAbsolutePath()
82                     + " cannot be created. Format version: " + formatVersion);
83         }
84     }
85 
testIsValidDictionary()86     public void testIsValidDictionary() {
87         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
88             testIsValidDictionary(formatVersion);
89         }
90     }
91 
testIsValidDictionary(final int formatVersion)92     private void testIsValidDictionary(final int formatVersion) {
93         File dictFile = null;
94         try {
95             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
96         } catch (IOException e) {
97             fail("IOException while writing an initial dictionary : " + e);
98         }
99         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
100                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
101                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
102         assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
103                 binaryDictionary.isValidDictionary());
104         binaryDictionary.close();
105         assertFalse("binaryDictionary must be invalid after closing.",
106                 binaryDictionary.isValidDictionary());
107         FileUtils.deleteRecursively(dictFile);
108         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
109                 dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
110                 TEST_LOCALE, true /* isUpdatable */);
111         assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
112                 binaryDictionary.isValidDictionary());
113         binaryDictionary.close();
114     }
115 
testConstructingDictionaryOnMemory()116     public void testConstructingDictionaryOnMemory() {
117         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
118             testConstructingDictionaryOnMemory(formatVersion);
119         }
120     }
121 
testConstructingDictionaryOnMemory(final int formatVersion)122     private void testConstructingDictionaryOnMemory(final int formatVersion) {
123         File dictFile = null;
124         try {
125             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
126         } catch (IOException e) {
127             fail("IOException while writing an initial dictionary : " + e);
128         }
129         FileUtils.deleteRecursively(dictFile);
130         assertFalse(dictFile.exists());
131         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
132                 true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
133                 new HashMap<String, String>());
134         assertTrue(binaryDictionary.isValidDictionary());
135         assertEquals(formatVersion, binaryDictionary.getFormatVersion());
136         final int probability = 100;
137         addUnigramWord(binaryDictionary, "word", probability);
138         assertEquals(probability, binaryDictionary.getFrequency("word"));
139         assertFalse(dictFile.exists());
140         binaryDictionary.flush();
141         assertTrue(dictFile.exists());
142         assertTrue(binaryDictionary.isValidDictionary());
143         assertEquals(formatVersion, binaryDictionary.getFormatVersion());
144         assertEquals(probability, binaryDictionary.getFrequency("word"));
145         binaryDictionary.close();
146         dictFile.delete();
147     }
148 
testAddTooLongWord()149     public void testAddTooLongWord() {
150         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
151             testAddTooLongWord(formatVersion);
152         }
153     }
154 
testAddTooLongWord(final int formatVersion)155     private void testAddTooLongWord(final int formatVersion) {
156         File dictFile = null;
157         try {
158             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
159         } catch (IOException e) {
160             fail("IOException while writing an initial dictionary : " + e);
161         }
162         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
163                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
164                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
165 
166         final StringBuffer stringBuilder = new StringBuffer();
167         for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) {
168             stringBuilder.append('a');
169         }
170         final String validLongWord = stringBuilder.toString();
171         stringBuilder.append('a');
172         final String invalidLongWord = stringBuilder.toString();
173         final int probability = 100;
174         addUnigramWord(binaryDictionary, "aaa", probability);
175         addUnigramWord(binaryDictionary, validLongWord, probability);
176         addUnigramWord(binaryDictionary, invalidLongWord, probability);
177         // Too long short cut.
178         binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
179                 10 /* shortcutProbability */, false /* isBeginningOfSentence */,
180                 false /* isNotAWord */, false /* isBlacklisted */,
181                 BinaryDictionary.NOT_A_VALID_TIMESTAMP);
182         addUnigramWord(binaryDictionary, "abc", probability);
183         final int updatedProbability = 200;
184         // Update.
185         addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
186         addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
187         addUnigramWord(binaryDictionary, "abc", updatedProbability);
188 
189         assertEquals(probability, binaryDictionary.getFrequency("aaa"));
190         assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
191         assertEquals(BinaryDictionary.NOT_A_PROBABILITY,
192                 binaryDictionary.getFrequency(invalidLongWord));
193         assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
194         dictFile.delete();
195     }
196 
addUnigramWord(final BinaryDictionary binaryDictionary, final String word, final int probability)197     private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
198             final int probability) {
199         binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
200                 BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
201                 false /* isBeginningOfSentence */, false /* isNotAWord */,
202                 false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
203     }
204 
addBigramWords(final BinaryDictionary binaryDictionary, final String word0, final String word1, final int probability)205     private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
206             final String word1, final int probability) {
207         binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability,
208                 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
209     }
210 
isValidBigram(final BinaryDictionary binaryDictionary, final String word0, final String word1)211     private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
212             final String word0, final String word1) {
213         return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1);
214     }
215 
removeBigramEntry(final BinaryDictionary binaryDictionary, final String word0, final String word1)216     private static void removeBigramEntry(final BinaryDictionary binaryDictionary,
217             final String word0, final String word1) {
218         binaryDictionary.removeNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1);
219     }
220 
getBigramProbability(final BinaryDictionary binaryDictionary, final String word0, final String word1)221     private static int getBigramProbability(final BinaryDictionary binaryDictionary,
222             final String word0,  final String word1) {
223         return binaryDictionary.getNgramProbability(new PrevWordsInfo(new WordInfo(word0)), word1);
224     }
225 
testAddUnigramWord()226     public void testAddUnigramWord() {
227         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
228             testAddUnigramWord(formatVersion);
229         }
230     }
231 
testAddUnigramWord(final int formatVersion)232     private void testAddUnigramWord(final int formatVersion) {
233         File dictFile = null;
234         try {
235             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
236         } catch (IOException e) {
237             fail("IOException while writing an initial dictionary : " + e);
238         }
239         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
240                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
241                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
242 
243         final int probability = 100;
244         addUnigramWord(binaryDictionary, "aaa", probability);
245         // Reallocate and create.
246         addUnigramWord(binaryDictionary, "aab", probability);
247         // Insert into children.
248         addUnigramWord(binaryDictionary, "aac", probability);
249         // Make terminal.
250         addUnigramWord(binaryDictionary, "aa", probability);
251         // Create children.
252         addUnigramWord(binaryDictionary, "aaaa", probability);
253         // Reallocate and make termianl.
254         addUnigramWord(binaryDictionary, "a", probability);
255 
256         final int updatedProbability = 200;
257         // Update.
258         addUnigramWord(binaryDictionary, "aaa", updatedProbability);
259 
260         assertEquals(probability, binaryDictionary.getFrequency("aab"));
261         assertEquals(probability, binaryDictionary.getFrequency("aac"));
262         assertEquals(probability, binaryDictionary.getFrequency("aa"));
263         assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
264         assertEquals(probability, binaryDictionary.getFrequency("a"));
265         assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
266 
267         dictFile.delete();
268     }
269 
testRandomlyAddUnigramWord()270     public void testRandomlyAddUnigramWord() {
271         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
272             testRandomlyAddUnigramWord(formatVersion);
273         }
274     }
275 
testRandomlyAddUnigramWord(final int formatVersion)276     private void testRandomlyAddUnigramWord(final int formatVersion) {
277         final int wordCount = 1000;
278         final int codePointSetSize = 50;
279         final long seed = System.currentTimeMillis();
280 
281         File dictFile = null;
282         try {
283             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
284         } catch (IOException e) {
285             fail("IOException while writing an initial dictionary : " + e);
286         }
287         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
288                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
289                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
290 
291         final HashMap<String, Integer> probabilityMap = new HashMap<>();
292         // Test a word that isn't contained within the dictionary.
293         final Random random = new Random(seed);
294         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
295         for (int i = 0; i < wordCount; ++i) {
296             final String word = CodePointUtils.generateWord(random, codePointSet);
297             probabilityMap.put(word, random.nextInt(0xFF));
298         }
299         for (String word : probabilityMap.keySet()) {
300             addUnigramWord(binaryDictionary, word, probabilityMap.get(word));
301         }
302         for (String word : probabilityMap.keySet()) {
303             assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
304         }
305         dictFile.delete();
306     }
307 
testAddBigramWords()308     public void testAddBigramWords() {
309         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
310             testAddBigramWords(formatVersion);
311         }
312     }
313 
testAddBigramWords(final int formatVersion)314     private void testAddBigramWords(final int formatVersion) {
315         File dictFile = null;
316         try {
317             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
318         } catch (IOException e) {
319             fail("IOException while writing an initial dictionary : " + e);
320         }
321         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
322                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
323                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
324 
325         final int unigramProbability = 100;
326         final int bigramProbability = 150;
327         final int updatedBigramProbability = 200;
328         addUnigramWord(binaryDictionary, "aaa", unigramProbability);
329         addUnigramWord(binaryDictionary, "abb", unigramProbability);
330         addUnigramWord(binaryDictionary, "bcc", unigramProbability);
331         addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
332         addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
333         addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
334         addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
335 
336         assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
337         assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
338         assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
339         assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
340         if (canCheckBigramProbability(formatVersion)) {
341             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
342             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
343             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
344             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
345         }
346 
347         addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
348         if (canCheckBigramProbability(formatVersion)) {
349             assertEquals(updatedBigramProbability,
350                     getBigramProbability(binaryDictionary, "aaa", "abb"));
351         }
352 
353         assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
354         assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
355         assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
356         assertEquals(Dictionary.NOT_A_PROBABILITY,
357                 getBigramProbability(binaryDictionary, "bcc", "aaa"));
358         assertEquals(Dictionary.NOT_A_PROBABILITY,
359                 getBigramProbability(binaryDictionary, "bcc", "bbc"));
360         assertEquals(Dictionary.NOT_A_PROBABILITY,
361                 getBigramProbability(binaryDictionary, "aaa", "aaa"));
362 
363         // Testing bigram link.
364         addUnigramWord(binaryDictionary, "abcde", unigramProbability);
365         addUnigramWord(binaryDictionary, "fghij", unigramProbability);
366         addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability);
367         addUnigramWord(binaryDictionary, "fgh", unigramProbability);
368         addUnigramWord(binaryDictionary, "abc", unigramProbability);
369         addUnigramWord(binaryDictionary, "f", unigramProbability);
370 
371         if (canCheckBigramProbability(formatVersion)) {
372             assertEquals(bigramProbability,
373                     getBigramProbability(binaryDictionary, "abcde", "fghij"));
374         }
375         assertEquals(Dictionary.NOT_A_PROBABILITY,
376                 getBigramProbability(binaryDictionary, "abcde", "fgh"));
377         addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
378         if (canCheckBigramProbability(formatVersion)) {
379             assertEquals(updatedBigramProbability,
380                     getBigramProbability(binaryDictionary, "abcde", "fghij"));
381         }
382 
383         dictFile.delete();
384     }
385 
testRandomlyAddBigramWords()386     public void testRandomlyAddBigramWords() {
387         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
388             testRandomlyAddBigramWords(formatVersion);
389         }
390     }
391 
testRandomlyAddBigramWords(final int formatVersion)392     private void testRandomlyAddBigramWords(final int formatVersion) {
393         final int wordCount = 100;
394         final int bigramCount = 1000;
395         final int codePointSetSize = 50;
396         final long seed = System.currentTimeMillis();
397         final Random random = new Random(seed);
398 
399         File dictFile = null;
400         try {
401             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
402         } catch (IOException e) {
403             fail("IOException while writing an initial dictionary : " + e);
404         }
405         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
406                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
407                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
408 
409         final ArrayList<String> words = new ArrayList<>();
410         final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
411         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
412         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
413         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
414 
415         for (int i = 0; i < wordCount; ++i) {
416             final String word = CodePointUtils.generateWord(random, codePointSet);
417             words.add(word);
418             final int unigramProbability = random.nextInt(0xFF);
419             unigramProbabilities.put(word, unigramProbability);
420             addUnigramWord(binaryDictionary, word, unigramProbability);
421         }
422 
423         for (int i = 0; i < bigramCount; i++) {
424             final String word0 = words.get(random.nextInt(wordCount));
425             final String word1 = words.get(random.nextInt(wordCount));
426             if (TextUtils.equals(word0, word1)) {
427                 continue;
428             }
429             final Pair<String, String> bigram = new Pair<>(word0, word1);
430             bigramWords.add(bigram);
431             final int unigramProbability = unigramProbabilities.get(word1);
432             final int bigramProbability =
433                     unigramProbability + random.nextInt(0xFF - unigramProbability);
434             bigramProbabilities.put(bigram, bigramProbability);
435             addBigramWords(binaryDictionary, word0, word1, bigramProbability);
436         }
437 
438         for (final Pair<String, String> bigram : bigramWords) {
439             final int bigramProbability = bigramProbabilities.get(bigram);
440             assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
441                     isValidBigram(binaryDictionary, bigram.first, bigram.second));
442             if (canCheckBigramProbability(formatVersion)) {
443                 assertEquals(bigramProbability,
444                         getBigramProbability(binaryDictionary, bigram.first, bigram.second));
445             }
446         }
447 
448         dictFile.delete();
449     }
450 
testRemoveBigramWords()451     public void testRemoveBigramWords() {
452         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
453             testRemoveBigramWords(formatVersion);
454         }
455     }
456 
testRemoveBigramWords(final int formatVersion)457     private void testRemoveBigramWords(final int formatVersion) {
458         File dictFile = null;
459         try {
460             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
461         } catch (IOException e) {
462             fail("IOException while writing an initial dictionary : " + e);
463         }
464         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
465                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
466                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
467         final int unigramProbability = 100;
468         final int bigramProbability = 150;
469         addUnigramWord(binaryDictionary, "aaa", unigramProbability);
470         addUnigramWord(binaryDictionary, "abb", unigramProbability);
471         addUnigramWord(binaryDictionary, "bcc", unigramProbability);
472         addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
473         addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
474         addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
475         addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
476 
477         assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
478         assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
479         assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
480         assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
481 
482         removeBigramEntry(binaryDictionary, "aaa", "abb");
483         assertFalse(isValidBigram(binaryDictionary, "aaa", "abb"));
484         addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
485         assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
486 
487 
488         removeBigramEntry(binaryDictionary, "aaa", "bcc");
489         assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc"));
490         removeBigramEntry(binaryDictionary, "abb", "aaa");
491         assertFalse(isValidBigram(binaryDictionary, "abb", "aaa"));
492         removeBigramEntry(binaryDictionary, "abb", "bcc");
493         assertFalse(isValidBigram(binaryDictionary, "abb", "bcc"));
494 
495         removeBigramEntry(binaryDictionary, "aaa", "abb");
496         // Test remove non-existing bigram operation.
497         removeBigramEntry(binaryDictionary, "aaa", "abb");
498         removeBigramEntry(binaryDictionary, "bcc", "aaa");
499 
500         dictFile.delete();
501     }
502 
testFlushDictionary()503     public void testFlushDictionary() {
504         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
505             testFlushDictionary(formatVersion);
506         }
507     }
508 
testFlushDictionary(final int formatVersion)509     private void testFlushDictionary(final int formatVersion) {
510         File dictFile = null;
511         try {
512             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
513         } catch (IOException e) {
514             fail("IOException while writing an initial dictionary : " + e);
515         }
516         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
517                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
518                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
519 
520         final int probability = 100;
521         addUnigramWord(binaryDictionary, "aaa", probability);
522         addUnigramWord(binaryDictionary, "abcd", probability);
523         // Close without flushing.
524         binaryDictionary.close();
525 
526         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
527                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
528                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
529 
530         assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
531         assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
532 
533         addUnigramWord(binaryDictionary, "aaa", probability);
534         addUnigramWord(binaryDictionary, "abcd", probability);
535         binaryDictionary.flush();
536         binaryDictionary.close();
537 
538         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
539                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
540                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
541 
542         assertEquals(probability, binaryDictionary.getFrequency("aaa"));
543         assertEquals(probability, binaryDictionary.getFrequency("abcd"));
544         addUnigramWord(binaryDictionary, "bcde", probability);
545         binaryDictionary.flush();
546         binaryDictionary.close();
547 
548         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
549                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
550                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
551         assertEquals(probability, binaryDictionary.getFrequency("bcde"));
552         binaryDictionary.close();
553 
554         dictFile.delete();
555     }
556 
testFlushWithGCDictionary()557     public void testFlushWithGCDictionary() {
558         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
559             testFlushWithGCDictionary(formatVersion);
560         }
561     }
562 
testFlushWithGCDictionary(final int formatVersion)563     private void testFlushWithGCDictionary(final int formatVersion) {
564         File dictFile = null;
565         try {
566             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
567         } catch (IOException e) {
568             fail("IOException while writing an initial dictionary : " + e);
569         }
570         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
571                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
572                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
573 
574         final int unigramProbability = 100;
575         final int bigramProbability = 150;
576         addUnigramWord(binaryDictionary, "aaa", unigramProbability);
577         addUnigramWord(binaryDictionary, "abb", unigramProbability);
578         addUnigramWord(binaryDictionary, "bcc", unigramProbability);
579         addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
580         addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
581         addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
582         addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
583         binaryDictionary.flushWithGC();
584         binaryDictionary.close();
585 
586         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
587                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
588                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
589         assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
590         assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
591         assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
592         if (canCheckBigramProbability(formatVersion)) {
593             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
594             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
595             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
596             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
597         }
598         assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
599         assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
600         assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
601         binaryDictionary.flushWithGC();
602         binaryDictionary.close();
603 
604         dictFile.delete();
605     }
606 
testAddBigramWordsAndFlashWithGC()607     public void testAddBigramWordsAndFlashWithGC() {
608         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
609             testAddBigramWordsAndFlashWithGC(formatVersion);
610         }
611     }
612 
613     // TODO: Evaluate performance of GC
testAddBigramWordsAndFlashWithGC(final int formatVersion)614     private void testAddBigramWordsAndFlashWithGC(final int formatVersion) {
615         final int wordCount = 100;
616         final int bigramCount = 1000;
617         final int codePointSetSize = 30;
618         final long seed = System.currentTimeMillis();
619         final Random random = new Random(seed);
620 
621         File dictFile = null;
622         try {
623             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
624         } catch (IOException e) {
625             fail("IOException while writing an initial dictionary : " + e);
626         }
627 
628         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
629                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
630                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
631 
632         final ArrayList<String> words = new ArrayList<>();
633         final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
634         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
635         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
636         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
637 
638         for (int i = 0; i < wordCount; ++i) {
639             final String word = CodePointUtils.generateWord(random, codePointSet);
640             words.add(word);
641             final int unigramProbability = random.nextInt(0xFF);
642             unigramProbabilities.put(word, unigramProbability);
643             addUnigramWord(binaryDictionary, word, unigramProbability);
644         }
645 
646         for (int i = 0; i < bigramCount; i++) {
647             final String word0 = words.get(random.nextInt(wordCount));
648             final String word1 = words.get(random.nextInt(wordCount));
649             if (TextUtils.equals(word0, word1)) {
650                 continue;
651             }
652             final Pair<String, String> bigram = new Pair<>(word0, word1);
653             bigramWords.add(bigram);
654             final int unigramProbability = unigramProbabilities.get(word1);
655             final int bigramProbability =
656                     unigramProbability + random.nextInt(0xFF - unigramProbability);
657             bigramProbabilities.put(bigram, bigramProbability);
658             addBigramWords(binaryDictionary, word0, word1, bigramProbability);
659         }
660 
661         binaryDictionary.flushWithGC();
662         binaryDictionary.close();
663         binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
664                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
665                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
666 
667 
668         for (final Pair<String, String> bigram : bigramWords) {
669             final int bigramProbability = bigramProbabilities.get(bigram);
670             assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
671                     isValidBigram(binaryDictionary, bigram.first, bigram.second));
672             if (canCheckBigramProbability(formatVersion)) {
673                 assertEquals(bigramProbability,
674                         getBigramProbability(binaryDictionary, bigram.first, bigram.second));
675             }
676         }
677 
678         dictFile.delete();
679     }
680 
testRandomOperationsAndFlashWithGC()681     public void testRandomOperationsAndFlashWithGC() {
682         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
683             testRandomOperationsAndFlashWithGC(formatVersion);
684         }
685     }
686 
testRandomOperationsAndFlashWithGC(final int formatVersion)687     private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
688         final int flashWithGCIterationCount = 50;
689         final int operationCountInEachIteration = 200;
690         final int initialUnigramCount = 100;
691         final float addUnigramProb = 0.5f;
692         final float addBigramProb = 0.8f;
693         final float removeBigramProb = 0.2f;
694         final int codePointSetSize = 30;
695 
696         final long seed = System.currentTimeMillis();
697         final Random random = new Random(seed);
698 
699         File dictFile = null;
700         try {
701             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
702         } catch (IOException e) {
703             fail("IOException while writing an initial dictionary : " + e);
704         }
705 
706         BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
707                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
708                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
709         final ArrayList<String> words = new ArrayList<>();
710         final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
711         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
712         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
713         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
714         for (int i = 0; i < initialUnigramCount; ++i) {
715             final String word = CodePointUtils.generateWord(random, codePointSet);
716             words.add(word);
717             final int unigramProbability = random.nextInt(0xFF);
718             unigramProbabilities.put(word, unigramProbability);
719             addUnigramWord(binaryDictionary, word, unigramProbability);
720         }
721         binaryDictionary.flushWithGC();
722         binaryDictionary.close();
723 
724         for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
725             binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
726                     0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
727                     Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
728             for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
729                 // Add unigram.
730                 if (random.nextFloat() < addUnigramProb) {
731                     final String word = CodePointUtils.generateWord(random, codePointSet);
732                     words.add(word);
733                     final int unigramProbability = random.nextInt(0xFF);
734                     unigramProbabilities.put(word, unigramProbability);
735                     addUnigramWord(binaryDictionary, word, unigramProbability);
736                 }
737                 // Add bigram.
738                 if (random.nextFloat() < addBigramProb && words.size() > 2) {
739                     final int word0Index = random.nextInt(words.size());
740                     int word1Index = random.nextInt(words.size() - 1);
741                     if (word0Index <= word1Index) {
742                         word1Index++;
743                     }
744                     final String word0 = words.get(word0Index);
745                     final String word1 = words.get(word1Index);
746                     if (TextUtils.equals(word0, word1)) {
747                         continue;
748                     }
749                     final int unigramProbability = unigramProbabilities.get(word1);
750                     final int bigramProbability =
751                             unigramProbability + random.nextInt(0xFF - unigramProbability);
752                     final Pair<String, String> bigram = new Pair<>(word0, word1);
753                     bigramWords.add(bigram);
754                     bigramProbabilities.put(bigram, bigramProbability);
755                     addBigramWords(binaryDictionary, word0, word1, bigramProbability);
756                 }
757                 // Remove bigram.
758                 if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
759                     final int bigramIndex = random.nextInt(bigramWords.size());
760                     final Pair<String, String> bigram = bigramWords.get(bigramIndex);
761                     bigramWords.remove(bigramIndex);
762                     bigramProbabilities.remove(bigram);
763                     removeBigramEntry(binaryDictionary, bigram.first, bigram.second);
764                 }
765             }
766 
767             // Test whether the all unigram operations are collectlly handled.
768             for (int i = 0; i < words.size(); i++) {
769                 final String word = words.get(i);
770                 final int unigramProbability = unigramProbabilities.get(word);
771                 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
772             }
773             // Test whether the all bigram operations are collectlly handled.
774             for (int i = 0; i < bigramWords.size(); i++) {
775                 final Pair<String, String> bigram = bigramWords.get(i);
776                 final int probability;
777                 if (bigramProbabilities.containsKey(bigram)) {
778                     final int bigramProbability = bigramProbabilities.get(bigram);
779                     probability = bigramProbability;
780                 } else {
781                     probability = Dictionary.NOT_A_PROBABILITY;
782                 }
783 
784                 if (canCheckBigramProbability(formatVersion)) {
785                     assertEquals(probability,
786                             getBigramProbability(binaryDictionary, bigram.first, bigram.second));
787                 }
788                 assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
789                         isValidBigram(binaryDictionary, bigram.first, bigram.second));
790             }
791             binaryDictionary.flushWithGC();
792             binaryDictionary.close();
793         }
794 
795         dictFile.delete();
796     }
797 
testAddManyUnigramsAndFlushWithGC()798     public void testAddManyUnigramsAndFlushWithGC() {
799         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
800             testAddManyUnigramsAndFlushWithGC(formatVersion);
801         }
802     }
803 
testAddManyUnigramsAndFlushWithGC(final int formatVersion)804     private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
805         final int flashWithGCIterationCount = 3;
806         final int codePointSetSize = 50;
807 
808         final long seed = System.currentTimeMillis();
809         final Random random = new Random(seed);
810 
811         File dictFile = null;
812         try {
813             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
814         } catch (IOException e) {
815             fail("IOException while writing an initial dictionary : " + e);
816         }
817 
818         final ArrayList<String> words = new ArrayList<>();
819         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
820         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
821 
822         BinaryDictionary binaryDictionary;
823         for (int i = 0; i < flashWithGCIterationCount; i++) {
824             binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
825                     0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
826                     Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
827             while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
828                 final String word = CodePointUtils.generateWord(random, codePointSet);
829                 words.add(word);
830                 final int unigramProbability = random.nextInt(0xFF);
831                 unigramProbabilities.put(word, unigramProbability);
832                 addUnigramWord(binaryDictionary, word, unigramProbability);
833             }
834 
835             for (int j = 0; j < words.size(); j++) {
836                 final String word = words.get(j);
837                 final int unigramProbability = unigramProbabilities.get(word);
838                 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
839             }
840 
841             binaryDictionary.flushWithGC();
842             binaryDictionary.close();
843         }
844 
845         dictFile.delete();
846     }
847 
testUnigramAndBigramCount()848     public void testUnigramAndBigramCount() {
849         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
850             testUnigramAndBigramCount(formatVersion);
851         }
852     }
853 
testUnigramAndBigramCount(final int formatVersion)854     private void testUnigramAndBigramCount(final int formatVersion) {
855         final int flashWithGCIterationCount = 10;
856         final int codePointSetSize = 50;
857         final int unigramCountPerIteration = 1000;
858         final int bigramCountPerIteration = 2000;
859         final long seed = System.currentTimeMillis();
860         final Random random = new Random(seed);
861 
862         File dictFile = null;
863         try {
864             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
865         } catch (IOException e) {
866             fail("IOException while writing an initial dictionary : " + e);
867         }
868 
869         final ArrayList<String> words = new ArrayList<>();
870         final HashSet<Pair<String, String>> bigrams = new HashSet<>();
871         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
872 
873         BinaryDictionary binaryDictionary;
874         for (int i = 0; i < flashWithGCIterationCount; i++) {
875             binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
876                     0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
877                     Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
878             for (int j = 0; j < unigramCountPerIteration; j++) {
879                 final String word = CodePointUtils.generateWord(random, codePointSet);
880                 words.add(word);
881                 final int unigramProbability = random.nextInt(0xFF);
882                 addUnigramWord(binaryDictionary, word, unigramProbability);
883             }
884             for (int j = 0; j < bigramCountPerIteration; j++) {
885                 final String word0 = words.get(random.nextInt(words.size()));
886                 final String word1 = words.get(random.nextInt(words.size()));
887                 if (TextUtils.equals(word0, word1)) {
888                     continue;
889                 }
890                 bigrams.add(new Pair<>(word0, word1));
891                 final int bigramProbability = random.nextInt(0xF);
892                 addBigramWords(binaryDictionary, word0, word1, bigramProbability);
893             }
894             assertEquals(new HashSet<>(words).size(), Integer.parseInt(
895                     binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
896             assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
897                     binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
898             binaryDictionary.flushWithGC();
899             assertEquals(new HashSet<>(words).size(), Integer.parseInt(
900                     binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
901             assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
902                     binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
903             binaryDictionary.close();
904         }
905 
906         dictFile.delete();
907     }
908 
testAddMultipleDictionaryEntries()909     public void testAddMultipleDictionaryEntries() {
910         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
911             testAddMultipleDictionaryEntries(formatVersion);
912         }
913     }
914 
testAddMultipleDictionaryEntries(final int formatVersion)915     private void testAddMultipleDictionaryEntries(final int formatVersion) {
916         final int codePointSetSize = 20;
917         final int lmParamCount = 1000;
918         final double bigramContinueRate = 0.9;
919         final long seed = System.currentTimeMillis();
920         final Random random = new Random(seed);
921 
922         File dictFile = null;
923         try {
924             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
925         } catch (IOException e) {
926             fail("IOException while writing an initial dictionary : " + e);
927         }
928 
929         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
930         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
931         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
932 
933         final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
934         String prevWord = null;
935         for (int i = 0; i < languageModelParams.length; i++) {
936             final String word = CodePointUtils.generateWord(random, codePointSet);
937             final int probability = random.nextInt(0xFF);
938             final int bigramProbability = probability + random.nextInt(0xFF - probability);
939             unigramProbabilities.put(word, probability);
940             if (prevWord == null) {
941                 languageModelParams[i] = new LanguageModelParam(word, probability,
942                         BinaryDictionary.NOT_A_VALID_TIMESTAMP);
943             } else {
944                 languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
945                         bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
946                 bigramProbabilities.put(new Pair<>(prevWord, word),
947                         bigramProbability);
948             }
949             prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
950         }
951 
952         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
953                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
954                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
955         binaryDictionary.addMultipleDictionaryEntries(languageModelParams);
956 
957         for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) {
958             assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey()));
959         }
960 
961         for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
962             final String word0 = entry.getKey().first;
963             final String word1 = entry.getKey().second;
964             final int bigramProbability = entry.getValue();
965             assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
966                     isValidBigram(binaryDictionary, word0, word1));
967             if (canCheckBigramProbability(formatVersion)) {
968                 assertEquals(bigramProbability,
969                         getBigramProbability(binaryDictionary, word0, word1));
970             }
971         }
972     }
973 
testGetWordProperties()974     public void testGetWordProperties() {
975         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
976             testGetWordProperties(formatVersion);
977         }
978     }
979 
testGetWordProperties(final int formatVersion)980     private void testGetWordProperties(final int formatVersion) {
981         final long seed = System.currentTimeMillis();
982         final Random random = new Random(seed);
983         final int UNIGRAM_COUNT = 1000;
984         final int BIGRAM_COUNT = 1000;
985         final int codePointSetSize = 20;
986         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
987 
988         File dictFile = null;
989         try {
990             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
991         } catch (IOException e) {
992             fail("IOException while writing an initial dictionary : " + e);
993         }
994         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
995                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
996                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
997 
998         final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
999                 false /* isBeginningOfSentence */);
1000         assertFalse(invalidWordProperty.isValid());
1001 
1002         final ArrayList<String> words = new ArrayList<>();
1003         final HashMap<String, Integer> wordProbabilities = new HashMap<>();
1004         final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
1005         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
1006 
1007         for (int i = 0; i < UNIGRAM_COUNT; i++) {
1008             final String word = CodePointUtils.generateWord(random, codePointSet);
1009             final int unigramProbability = random.nextInt(0xFF);
1010             final boolean isNotAWord = random.nextBoolean();
1011             final boolean isBlacklisted = random.nextBoolean();
1012             // TODO: Add tests for historical info.
1013             binaryDictionary.addUnigramEntry(word, unigramProbability,
1014                     null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
1015                     false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
1016                     BinaryDictionary.NOT_A_VALID_TIMESTAMP);
1017             if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1018                 binaryDictionary.flushWithGC();
1019             }
1020             words.add(word);
1021             wordProbabilities.put(word, unigramProbability);
1022             final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
1023                     false /* isBeginningOfSentence */);
1024             assertEquals(word, wordProperty.mWord);
1025             assertTrue(wordProperty.isValid());
1026             assertEquals(isNotAWord, wordProperty.mIsNotAWord);
1027             assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
1028             assertEquals(false, wordProperty.mHasBigrams);
1029             assertEquals(false, wordProperty.mHasShortcuts);
1030             assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
1031             assertTrue(wordProperty.mShortcutTargets.isEmpty());
1032         }
1033 
1034         for (int i = 0; i < BIGRAM_COUNT; i++) {
1035             final int word0Index = random.nextInt(wordProbabilities.size());
1036             final int word1Index = random.nextInt(wordProbabilities.size());
1037             if (word0Index == word1Index) {
1038                 continue;
1039             }
1040             final String word0 = words.get(word0Index);
1041             final String word1 = words.get(word1Index);
1042             final int unigramProbability = wordProbabilities.get(word1);
1043             final int bigramProbability =
1044                     unigramProbability + random.nextInt(0xFF - unigramProbability);
1045             addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1046             if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1047                 binaryDictionary.flushWithGC();
1048             }
1049             if (!bigrams.containsKey(word0)) {
1050                 final HashSet<String> bigramWord1s = new HashSet<>();
1051                 bigrams.put(word0, bigramWord1s);
1052             }
1053             bigrams.get(word0).add(word1);
1054             bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability);
1055         }
1056 
1057         for (int i = 0; i < words.size(); i++) {
1058             final String word0 = words.get(i);
1059             if (!bigrams.containsKey(word0)) {
1060                 continue;
1061             }
1062             final HashSet<String> bigramWord1s = bigrams.get(word0);
1063             final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
1064                     false /* isBeginningOfSentence */);
1065             assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
1066             for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
1067                 final String word1 = wordProperty.mBigrams.get(j).mWord;
1068                 assertTrue(bigramWord1s.contains(word1));
1069                 if (canCheckBigramProbability(formatVersion)) {
1070                     final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
1071                     assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
1072                 }
1073             }
1074         }
1075     }
1076 
testIterateAllWords()1077     public void testIterateAllWords() {
1078         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1079             testIterateAllWords(formatVersion);
1080         }
1081     }
1082 
testIterateAllWords(final int formatVersion)1083     private void testIterateAllWords(final int formatVersion) {
1084         final long seed = System.currentTimeMillis();
1085         final Random random = new Random(seed);
1086         final int UNIGRAM_COUNT = 1000;
1087         final int BIGRAM_COUNT = 1000;
1088         final int codePointSetSize = 20;
1089         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1090 
1091         File dictFile = null;
1092         try {
1093             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1094         } catch (IOException e) {
1095             fail("IOException while writing an initial dictionary : " + e);
1096         }
1097         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1098                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1099                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1100 
1101         final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
1102                 false /* isBeginningOfSentence */);
1103         assertFalse(invalidWordProperty.isValid());
1104 
1105         final ArrayList<String> words = new ArrayList<>();
1106         final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>();
1107         final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
1108         final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
1109                 new HashMap<>();
1110 
1111         for (int i = 0; i < UNIGRAM_COUNT; i++) {
1112             final String word = CodePointUtils.generateWord(random, codePointSet);
1113             final int unigramProbability = random.nextInt(0xFF);
1114             addUnigramWord(binaryDictionary, word, unigramProbability);
1115             if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1116                 binaryDictionary.flushWithGC();
1117             }
1118             words.add(word);
1119             wordProbabilitiesToCheckLater.put(word, unigramProbability);
1120         }
1121 
1122         for (int i = 0; i < BIGRAM_COUNT; i++) {
1123             final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size());
1124             final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size());
1125             if (word0Index == word1Index) {
1126                 continue;
1127             }
1128             final String word0 = words.get(word0Index);
1129             final String word1 = words.get(word1Index);
1130             final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
1131             final int bigramProbability =
1132                     unigramProbability + random.nextInt(0xFF - unigramProbability);
1133             addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1134             if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1135                 binaryDictionary.flushWithGC();
1136             }
1137             if (!bigrams.containsKey(word0)) {
1138                 final HashSet<String> bigramWord1s = new HashSet<>();
1139                 bigrams.put(word0, bigramWord1s);
1140             }
1141             bigrams.get(word0).add(word1);
1142             bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability);
1143         }
1144 
1145         final HashSet<String> wordSet = new HashSet<>(words);
1146         final HashSet<Pair<String, String>> bigramSet =
1147                 new HashSet<>(bigramProbabilitiesToCheckLater.keySet());
1148         int token = 0;
1149         do {
1150             final BinaryDictionary.GetNextWordPropertyResult result =
1151                     binaryDictionary.getNextWordProperty(token);
1152             final WordProperty wordProperty = result.mWordProperty;
1153             final String word0 = wordProperty.mWord;
1154             assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
1155                     wordProperty.mProbabilityInfo.mProbability);
1156             wordSet.remove(word0);
1157             final HashSet<String> bigramWord1s = bigrams.get(word0);
1158             for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
1159                 final String word1 = wordProperty.mBigrams.get(j).mWord;
1160                 assertTrue(bigramWord1s.contains(word1));
1161                 final Pair<String, String> bigram = new Pair<>(word0, word1);
1162                 if (canCheckBigramProbability(formatVersion)) {
1163                     final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
1164                     assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
1165                 }
1166                 bigramSet.remove(bigram);
1167             }
1168             token = result.mNextToken;
1169         } while (token != 0);
1170         assertTrue(wordSet.isEmpty());
1171         assertTrue(bigramSet.isEmpty());
1172     }
1173 
testAddShortcuts()1174     public void testAddShortcuts() {
1175         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1176             testAddShortcuts(formatVersion);
1177         }
1178     }
1179 
testAddShortcuts(final int formatVersion)1180     private void testAddShortcuts(final int formatVersion) {
1181         File dictFile = null;
1182         try {
1183             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1184         } catch (IOException e) {
1185             fail("IOException while writing an initial dictionary : " + e);
1186         }
1187         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1188                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1189                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1190 
1191         final int unigramProbability = 100;
1192         final int shortcutProbability = 10;
1193         binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
1194                 shortcutProbability, false /* isBeginningOfSentence */,
1195                 false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
1196         WordProperty wordProperty = binaryDictionary.getWordProperty("aaa",
1197                 false /* isBeginningOfSentence */);
1198         assertEquals(1, wordProperty.mShortcutTargets.size());
1199         assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
1200         assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
1201         final int updatedShortcutProbability = 2;
1202         binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
1203                 updatedShortcutProbability, false /* isBeginningOfSentence */,
1204                 false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
1205         wordProperty = binaryDictionary.getWordProperty("aaa",
1206                 false /* isBeginningOfSentence */);
1207         assertEquals(1, wordProperty.mShortcutTargets.size());
1208         assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
1209         assertEquals(updatedShortcutProbability,
1210                 wordProperty.mShortcutTargets.get(0).getProbability());
1211         binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
1212                 shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
1213                 false /* isBlacklisted */, 0 /* timestamp */);
1214         final HashMap<String, Integer> shortcutTargets = new HashMap<>();
1215         shortcutTargets.put("zzz", updatedShortcutProbability);
1216         shortcutTargets.put("yyy", shortcutProbability);
1217         wordProperty = binaryDictionary.getWordProperty("aaa",
1218                 false /* isBeginningOfSentence */);
1219         assertEquals(2, wordProperty.mShortcutTargets.size());
1220         for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1221             assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
1222             assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
1223                     shortcutTarget.getProbability());
1224             shortcutTargets.remove(shortcutTarget.mWord);
1225         }
1226         shortcutTargets.put("zzz", updatedShortcutProbability);
1227         shortcutTargets.put("yyy", shortcutProbability);
1228         binaryDictionary.flushWithGC();
1229         wordProperty = binaryDictionary.getWordProperty("aaa",
1230                 false /* isBeginningOfSentence */);
1231         assertEquals(2, wordProperty.mShortcutTargets.size());
1232         for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1233             assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
1234             assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
1235                     shortcutTarget.getProbability());
1236             shortcutTargets.remove(shortcutTarget.mWord);
1237         }
1238     }
1239 
testAddManyShortcuts()1240     public void testAddManyShortcuts() {
1241         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1242             testAddManyShortcuts(formatVersion);
1243         }
1244     }
1245 
testAddManyShortcuts(final int formatVersion)1246     private void testAddManyShortcuts(final int formatVersion) {
1247         final long seed = System.currentTimeMillis();
1248         final Random random = new Random(seed);
1249         final int UNIGRAM_COUNT = 1000;
1250         final int SHORTCUT_COUNT = 10000;
1251         final int codePointSetSize = 20;
1252         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1253 
1254         final ArrayList<String> words = new ArrayList<>();
1255         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
1256         final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>();
1257 
1258         File dictFile = null;
1259         try {
1260             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1261         } catch (IOException e) {
1262             fail("IOException while writing an initial dictionary : " + e);
1263         }
1264         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1265                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1266                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1267 
1268         for (int i = 0; i < UNIGRAM_COUNT; i++) {
1269             final String word = CodePointUtils.generateWord(random, codePointSet);
1270             final int unigramProbability = random.nextInt(0xFF);
1271             addUnigramWord(binaryDictionary, word, unigramProbability);
1272             words.add(word);
1273             unigramProbabilities.put(word, unigramProbability);
1274             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1275                 binaryDictionary.flushWithGC();
1276             }
1277         }
1278         for (int i = 0; i < SHORTCUT_COUNT; i++) {
1279             final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet);
1280             final int shortcutProbability = random.nextInt(0xF);
1281             final String word = words.get(random.nextInt(words.size()));
1282             final int unigramProbability = unigramProbabilities.get(word);
1283             binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
1284                     shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
1285                     false /* isBlacklisted */, 0 /* timestamp */);
1286             if (shortcutTargets.containsKey(word)) {
1287                 final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
1288                 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
1289             } else {
1290                 final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>();
1291                 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
1292                 shortcutTargets.put(word, shortcutTargetsOfWord);
1293             }
1294             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1295                 binaryDictionary.flushWithGC();
1296             }
1297         }
1298 
1299         for (final String word : words) {
1300             final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
1301                     false /* isBeginningOfSentence */);
1302             assertEquals((int)unigramProbabilities.get(word),
1303                     wordProperty.mProbabilityInfo.mProbability);
1304             if (!shortcutTargets.containsKey(word)) {
1305                 // The word does not have shortcut targets.
1306                 continue;
1307             }
1308             assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
1309             for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1310                 final String targetCodePonts = shortcutTarget.mWord;
1311                 assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
1312                         shortcutTarget.getProbability());
1313             }
1314         }
1315     }
1316 
testDictMigration()1317     public void testDictMigration() {
1318         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1319             testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
1320         }
1321     }
1322 
testDictMigration(final int fromFormatVersion, final int toFormatVersion)1323     private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
1324         File dictFile = null;
1325         try {
1326             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
1327         } catch (IOException e) {
1328             fail("IOException while writing an initial dictionary : " + e);
1329         }
1330         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1331                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1332                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1333         final int unigramProbability = 100;
1334         addUnigramWord(binaryDictionary, "aaa", unigramProbability);
1335         addUnigramWord(binaryDictionary, "bbb", unigramProbability);
1336         final int bigramProbability = 150;
1337         addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
1338         final int shortcutProbability = 10;
1339         binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
1340                 false /* isBeginningOfSentence */, false /* isNotAWord */,
1341                 false /* isBlacklisted */, 0 /* timestamp */);
1342         binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
1343                 Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
1344                 true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
1345         binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE,
1346                 "aaa", bigramProbability, 0 /* timestamp */);
1347         assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
1348         assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
1349         assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
1350         assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
1351         assertTrue(binaryDictionary.migrateTo(toFormatVersion));
1352         assertTrue(binaryDictionary.isValidDictionary());
1353         assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
1354         assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
1355         assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
1356         if (canCheckBigramProbability(toFormatVersion)) {
1357             assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
1358             assertEquals(bigramProbability, binaryDictionary.getNgramProbability(
1359                     PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa"));
1360         }
1361         assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
1362         WordProperty wordProperty = binaryDictionary.getWordProperty("ccc",
1363                 false /* isBeginningOfSentence */);
1364         assertEquals(1, wordProperty.mShortcutTargets.size());
1365         assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord);
1366         wordProperty = binaryDictionary.getWordProperty("ddd",
1367                 false /* isBeginningOfSentence */);
1368         assertTrue(wordProperty.mIsBlacklistEntry);
1369         assertTrue(wordProperty.mIsNotAWord);
1370     }
1371 
testLargeDictMigration()1372     public void testLargeDictMigration() {
1373         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1374             testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
1375         }
1376     }
1377 
testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion)1378     private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) {
1379         final int UNIGRAM_COUNT = 3000;
1380         final int BIGRAM_COUNT = 3000;
1381         final int codePointSetSize = 50;
1382         final long seed = System.currentTimeMillis();
1383         final Random random = new Random(seed);
1384 
1385         File dictFile = null;
1386         try {
1387             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
1388         } catch (IOException e) {
1389             fail("IOException while writing an initial dictionary : " + e);
1390         }
1391         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1392                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1393                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1394 
1395         final ArrayList<String> words = new ArrayList<>();
1396         final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
1397         final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1398         final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
1399         final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
1400 
1401         for (int i = 0; i < UNIGRAM_COUNT; i++) {
1402             final String word = CodePointUtils.generateWord(random, codePointSet);
1403             final int unigramProbability = random.nextInt(0xFF);
1404             addUnigramWord(binaryDictionary, word, unigramProbability);
1405             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1406                 binaryDictionary.flushWithGC();
1407             }
1408             words.add(word);
1409             unigramProbabilities.put(word, unigramProbability);
1410         }
1411 
1412         for (int i = 0; i < BIGRAM_COUNT; i++) {
1413             final int word0Index = random.nextInt(words.size());
1414             final int word1Index = random.nextInt(words.size());
1415             if (word0Index == word1Index) {
1416                 continue;
1417             }
1418             final String word0 = words.get(word0Index);
1419             final String word1 = words.get(word1Index);
1420             final int unigramProbability = unigramProbabilities.get(word1);
1421             final int bigramProbability =
1422                     random.nextInt(0xFF - unigramProbability) + unigramProbability;
1423             addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1424             if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1425                 binaryDictionary.flushWithGC();
1426             }
1427             final Pair<String, String> bigram = new Pair<>(word0, word1);
1428             bigrams.add(bigram);
1429             bigramProbabilities.put(bigram, bigramProbability);
1430         }
1431         assertTrue(binaryDictionary.migrateTo(toFormatVersion));
1432 
1433         for (final String word : words) {
1434             assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
1435         }
1436         assertEquals(unigramProbabilities.size(), Integer.parseInt(
1437                 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
1438 
1439         for (final Pair<String, String> bigram : bigrams) {
1440             if (canCheckBigramProbability(toFormatVersion)) {
1441                 assertEquals((int)bigramProbabilities.get(bigram),
1442                         getBigramProbability(binaryDictionary, bigram.first, bigram.second));
1443             }
1444             assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
1445         }
1446         assertEquals(bigramProbabilities.size(), Integer.parseInt(
1447                 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
1448     }
1449 
testBeginningOfSentence()1450     public void testBeginningOfSentence() {
1451         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1452             if (supportsBeginningOfSentence(formatVersion)) {
1453                 testBeginningOfSentence(formatVersion);
1454             }
1455         }
1456     }
1457 
testBeginningOfSentence(final int formatVersion)1458     private void testBeginningOfSentence(final int formatVersion) {
1459         File dictFile = null;
1460         try {
1461             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1462         } catch (IOException e) {
1463             fail("IOException while writing an initial dictionary : " + e);
1464         }
1465         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1466                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1467                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1468         final int dummyProbability = 0;
1469         final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
1470         final int bigramProbability = 200;
1471         addUnigramWord(binaryDictionary, "aaa", dummyProbability);
1472         binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
1473                 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1474         assertEquals(bigramProbability,
1475                 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
1476         binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
1477                 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1478         addUnigramWord(binaryDictionary, "bbb", dummyProbability);
1479         binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability,
1480                 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1481         binaryDictionary.flushWithGC();
1482         assertEquals(bigramProbability,
1483                 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
1484         assertEquals(bigramProbability,
1485                 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb"));
1486     }
1487 
testGetMaxFrequencyOfExactMatches()1488     public void testGetMaxFrequencyOfExactMatches() {
1489         for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1490             testGetMaxFrequencyOfExactMatches(formatVersion);
1491         }
1492     }
1493 
testGetMaxFrequencyOfExactMatches(final int formatVersion)1494     private void testGetMaxFrequencyOfExactMatches(final int formatVersion) {
1495         File dictFile = null;
1496         try {
1497             dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1498         } catch (IOException e) {
1499             fail("IOException while writing an initial dictionary : " + e);
1500         }
1501         final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1502                 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1503                 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1504         addUnigramWord(binaryDictionary, "abc", 10);
1505         addUnigramWord(binaryDictionary, "aBc", 15);
1506         assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1507         addUnigramWord(binaryDictionary, "ab'c", 20);
1508         assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1509         addUnigramWord(binaryDictionary, "a-b-c", 25);
1510         assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1511         addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30);
1512         assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1513         addUnigramWord(binaryDictionary, "ab c", 255);
1514         assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1515     }
1516 }
1517