1 /*
2  *******************************************************************************
3  * Copyright (C) 2012-2014, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  */
7 
8 package com.ibm.icu.text;
9 
10 import java.io.IOException;
11 import java.nio.ByteBuffer;
12 
13 import com.ibm.icu.impl.Assert;
14 import com.ibm.icu.impl.ICUBinary;
15 import com.ibm.icu.impl.ICUData;
16 import com.ibm.icu.impl.ICUResourceBundle;
17 import com.ibm.icu.util.UResourceBundle;
18 
19 final class DictionaryData {
20     // disallow instantiation
DictionaryData()21     private DictionaryData() { }
22 
23     public static final int TRIE_TYPE_BYTES = 0;
24     public static final int TRIE_TYPE_UCHARS = 1;
25     public static final int TRIE_TYPE_MASK = 7;
26     public static final int TRIE_HAS_VALUES = 8;
27     public static final int TRANSFORM_NONE = 0;
28     public static final int TRANSFORM_TYPE_OFFSET = 0x1000000;
29     public static final int TRANSFORM_TYPE_MASK = 0x7f000000;
30     public static final int TRANSFORM_OFFSET_MASK = 0x1fffff;
31 
32     public static final int IX_STRING_TRIE_OFFSET = 0;
33     public static final int IX_RESERVED1_OFFSET = 1;
34     public static final int IX_RESERVED2_OFFSET = 2;
35     public static final int IX_TOTAL_SIZE = 3;
36     public static final int IX_TRIE_TYPE = 4;
37     public static final int IX_TRANSFORM = 5;
38     public static final int IX_RESERVED6 = 6;
39     public static final int IX_RESERVED7 = 7;
40     public static final int IX_COUNT = 8;
41 
42     private static final int DATA_FORMAT_ID = 0x44696374;
43 
loadDictionaryFor(String dictType)44     public static DictionaryMatcher loadDictionaryFor(String dictType) throws IOException {
45         ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BRKITR_BASE_NAME);
46         String dictFileName = rb.getStringWithFallback("dictionaries/" + dictType);
47         dictFileName = ICUData.ICU_BRKITR_NAME + '/' + dictFileName;
48         ByteBuffer bytes = ICUBinary.getRequiredData(dictFileName);
49         ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null);
50         int[] indexes = new int[IX_COUNT];
51         // TODO: read indexes[IX_STRING_TRIE_OFFSET] first, then read a variable-length indexes[]
52         for (int i = 0; i < IX_COUNT; i++) {
53             indexes[i] = bytes.getInt();
54         }
55         int offset = indexes[IX_STRING_TRIE_OFFSET];
56         Assert.assrt(offset >= (4 * IX_COUNT));
57         if (offset > (4 * IX_COUNT)) {
58             int diff = offset - (4 * IX_COUNT);
59             ICUBinary.skipBytes(bytes, diff);
60         }
61         int trieType = indexes[IX_TRIE_TYPE] & TRIE_TYPE_MASK;
62         int totalSize = indexes[IX_TOTAL_SIZE] - offset;
63         DictionaryMatcher m = null;
64         if (trieType == TRIE_TYPE_BYTES) {
65             int transform = indexes[IX_TRANSFORM];
66             byte[] data = new byte[totalSize];
67             int i;
68             for (i = 0; i < data.length; i++) {
69                 data[i] = bytes.get();
70             }
71             Assert.assrt(i == totalSize);
72             m = new BytesDictionaryMatcher(data, transform);
73         } else if (trieType == TRIE_TYPE_UCHARS) {
74             Assert.assrt(totalSize % 2 == 0);
75             int num = totalSize / 2;
76             char[] data = new char[totalSize / 2];
77             for (int i = 0; i < num; i++) {
78                 data[i] = bytes.getChar();
79             }
80             m = new CharsDictionaryMatcher(new String(data));
81         } else {
82             m = null;
83         }
84         return m;
85     }
86 }
87