1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html
4 /*
5  ******************************************************************************
6  * Copyright (C) 1996-2015, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  ******************************************************************************
9  */
10 
11 package android.icu.impl;
12 
13 import java.io.IOException;
14 import java.nio.ByteBuffer;
15 import java.util.Arrays;
16 
17 import android.icu.text.UTF16;
18 
19 /**
20  * Trie implementation which stores data in int, 32 bits.
21  * 2015-sep-03: Used only in CharsetSelector which could be switched to {@link Trie2_32}
22  * as long as that does not load ICU4C selector data.
23  *
24  * @author synwee
25  * @see android.icu.impl.Trie
26  * @hide Only a subset of ICU is exposed in Android
27  */
28 public class IntTrie extends Trie
29 {
30     // public constructors ---------------------------------------------
31 
32     /**
33     * <p>Creates a new Trie with the settings for the trie data.</p>
34     * <p>Unserialize the 32-bit-aligned input stream and use the data for the
35     * trie.</p>
36     * @param bytes file buffer to a ICU data file, containing the trie
37     * @param dataManipulate object which provides methods to parse the char
38     *                        data
39     * @throws IOException thrown when data reading fails
40     */
IntTrie(ByteBuffer bytes, DataManipulate dataManipulate)41     public IntTrie(ByteBuffer bytes, DataManipulate dataManipulate)
42                                                     throws IOException
43     {
44         super(bytes, dataManipulate);
45         if (!isIntTrie()) {
46             throw new IllegalArgumentException(
47                                "Data given does not belong to a int trie.");
48         }
49     }
50 
51     /**
52      * Make a dummy IntTrie.
53      * A dummy trie is an empty runtime trie, used when a real data trie cannot
54      * be loaded.
55      *
56      * The trie always returns the initialValue,
57      * or the leadUnitValue for lead surrogate code points.
58      * The Latin-1 part is always set up to be linear.
59      *
60      * @param initialValue the initial value that is set for all code points
61      * @param leadUnitValue the value for lead surrogate code _units_ that do not
62      *                      have associated supplementary data
63      * @param dataManipulate object which provides methods to parse the char data
64      */
65     @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770
IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate)66     public IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
67         super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
68 
69         int dataLength, latin1Length, i, limit;
70         char block;
71 
72         /* calculate the actual size of the dummy trie data */
73 
74         /* max(Latin-1, block 0) */
75         dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
76         if(leadUnitValue!=initialValue) {
77             dataLength+=DATA_BLOCK_LENGTH;
78         }
79         m_data_=new int[dataLength];
80         m_dataLength_=dataLength;
81 
82         m_initialValue_=initialValue;
83 
84         /* fill the index and data arrays */
85 
86         /* indexes are preset to 0 (block 0) */
87 
88         /* Latin-1 data */
89         for(i=0; i<latin1Length; ++i) {
90             m_data_[i]=initialValue;
91         }
92 
93         if(leadUnitValue!=initialValue) {
94             /* indexes for lead surrogate code units to the block after Latin-1 */
95             block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
96             i=0xd800>>INDEX_STAGE_1_SHIFT_;
97             limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
98             for(; i<limit; ++i) {
99                 m_index_[i]=block;
100             }
101 
102             /* data for lead surrogate code units */
103             limit=latin1Length+DATA_BLOCK_LENGTH;
104             for(i=latin1Length; i<limit; ++i) {
105                 m_data_[i]=leadUnitValue;
106             }
107         }
108     }
109 
110     // public methods --------------------------------------------------
111 
112     /**
113     * Gets the value associated with the codepoint.
114     * If no value is associated with the codepoint, a default value will be
115     * returned.
116     * @param ch codepoint
117     * @return offset to data
118     */
getCodePointValue(int ch)119     public final int getCodePointValue(int ch)
120     {
121         int offset;
122 
123         // fastpath for U+0000..U+D7FF
124         if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
125             // copy of getRawOffset()
126             offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
127                     + (ch & INDEX_STAGE_3_MASK_);
128             return m_data_[offset];
129         }
130 
131         // handle U+D800..U+10FFFF
132         offset = getCodePointOffset(ch);
133         return (offset >= 0) ? m_data_[offset] : m_initialValue_;
134     }
135 
136     /**
137     * Gets the value to the data which this lead surrogate character points
138     * to.
139     * Returned data may contain folding offset information for the next
140     * trailing surrogate character.
141     * This method does not guarantee correct results for trail surrogates.
142     * @param ch lead surrogate character
143     * @return data value
144     */
getLeadValue(char ch)145     public final int getLeadValue(char ch)
146     {
147         return m_data_[getLeadOffset(ch)];
148     }
149 
150     /**
151     * Get the value associated with the BMP code point.
152     * Lead surrogate code points are treated as normal code points, with
153     * unfolded values that may differ from getLeadValue() results.
154     * @param ch the input BMP code point
155     * @return trie data value associated with the BMP codepoint
156     */
getBMPValue(char ch)157     public final int getBMPValue(char ch)
158     {
159         return m_data_[getBMPOffset(ch)];
160     }
161 
162     /**
163     * Get the value associated with a pair of surrogates.
164     * @param lead a lead surrogate
165     * @param trail a trail surrogate
166     */
getSurrogateValue(char lead, char trail)167     public final int getSurrogateValue(char lead, char trail)
168     {
169         if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) {
170             throw new IllegalArgumentException(
171                 "Argument characters do not form a supplementary character");
172         }
173         // get fold position for the next trail surrogate
174         int offset = getSurrogateOffset(lead, trail);
175 
176         // get the real data from the folded lead/trail units
177         if (offset > 0) {
178             return m_data_[offset];
179         }
180 
181         // return m_initialValue_ if there is an error
182         return m_initialValue_;
183     }
184 
185     /**
186     * Get a value from a folding offset (from the value of a lead surrogate)
187     * and a trail surrogate.
188     * @param leadvalue the value of a lead surrogate that contains the
189     *        folding offset
190     * @param trail surrogate
191     * @return trie data value associated with the trail character
192     */
getTrailValue(int leadvalue, char trail)193     public final int getTrailValue(int leadvalue, char trail)
194     {
195         if (m_dataManipulate_ == null) {
196             throw new NullPointerException(
197                              "The field DataManipulate in this Trie is null");
198         }
199         int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
200         if (offset > 0) {
201             return m_data_[getRawOffset(offset,
202                                          (char)(trail & SURROGATE_MASK_))];
203         }
204         return m_initialValue_;
205     }
206 
207     /**
208      * <p>Gets the latin 1 fast path value.</p>
209      * <p>Note this only works if latin 1 characters have their own linear
210      * array.</p>
211      * @param ch latin 1 characters
212      * @return value associated with latin character
213      */
getLatin1LinearValue(char ch)214     public final int getLatin1LinearValue(char ch)
215     {
216         return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch];
217     }
218 
219     /**
220      * Checks if the argument Trie has the same data as this Trie
221      * @param other Trie to check
222      * @return true if the argument Trie has the same data as this Trie, false
223      *         otherwise
224      */
225     ///CLOVER:OFF
226     @Override
equals(Object other)227     public boolean equals(Object other)
228     {
229         boolean result = super.equals(other);
230         if (result && other instanceof IntTrie) {
231             IntTrie othertrie = (IntTrie)other;
232             if (m_initialValue_ != othertrie.m_initialValue_
233                 || !Arrays.equals(m_data_, othertrie.m_data_)) {
234                 return false;
235             }
236             return true;
237         }
238         return false;
239     }
240 
241     @Override
hashCode()242     public int hashCode() {
243         assert false : "hashCode not designed";
244         return 42;
245     }
246     ///CLOVER:ON
247 
248     // protected methods -----------------------------------------------
249 
250     /**
251     * <p>Parses the input stream and stores its trie content into a index and
252     * data array</p>
253     * @param bytes data buffer containing trie data
254     */
255     @Override
unserialize(ByteBuffer bytes)256     protected final void unserialize(ByteBuffer bytes)
257     {
258         super.unserialize(bytes);
259         // one used for initial value
260         m_data_ = ICUBinary.getInts(bytes, m_dataLength_, 0);
261         m_initialValue_ = m_data_[0];
262     }
263 
264     /**
265     * Gets the offset to the data which the surrogate pair points to.
266     * @param lead lead surrogate
267     * @param trail trailing surrogate
268     * @return offset to data
269     */
270     @Override
getSurrogateOffset(char lead, char trail)271     protected final int getSurrogateOffset(char lead, char trail)
272     {
273         if (m_dataManipulate_ == null) {
274             throw new NullPointerException(
275                              "The field DataManipulate in this Trie is null");
276         }
277         // get fold position for the next trail surrogate
278         int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
279 
280         // get the real data from the folded lead/trail units
281         if (offset > 0) {
282             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
283         }
284 
285         // return -1 if there is an error, in this case we return the default
286         // value: m_initialValue_
287         return -1;
288     }
289 
290     /**
291     * Gets the value at the argument index.
292     * For use internally in TrieIterator
293     * @param index value at index will be retrieved
294     * @return 32 bit value
295     * @see android.icu.impl.TrieIterator
296     */
297     @Override
getValue(int index)298     protected final int getValue(int index)
299     {
300       return m_data_[index];
301     }
302 
303     /**
304     * Gets the default initial value
305     * @return 32 bit value
306     */
307     @Override
getInitialValue()308     protected final int getInitialValue()
309     {
310         return m_initialValue_;
311     }
312 
313     // package private methods -----------------------------------------
314 
315     /**
316      * Internal constructor for builder use
317      * @param index the index array to be slotted into this trie
318      * @param data the data array to be slotted into this trie
319      * @param initialvalue the initial value for this trie
320      * @param options trie options to use
321      * @param datamanipulate folding implementation
322      */
IntTrie(char index[], int data[], int initialvalue, int options, DataManipulate datamanipulate)323     IntTrie(char index[], int data[], int initialvalue, int options,
324             DataManipulate datamanipulate)
325     {
326         super(index, options, datamanipulate);
327         m_data_ = data;
328         m_dataLength_ = m_data_.length;
329         m_initialValue_ = initialvalue;
330     }
331 
332     // private data members --------------------------------------------
333 
334     /**
335     * Default value
336     */
337     private int m_initialValue_;
338     /**
339     * Array of char data
340     */
341     private int m_data_[];
342 }
343