1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  ******************************************************************************
5  * Copyright (C) 1996-2015, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ******************************************************************************
8  */
9 
10 package com.ibm.icu.impl;
11 
12 import java.io.IOException;
13 import java.nio.ByteBuffer;
14 import java.util.Arrays;
15 
16 import com.ibm.icu.text.UTF16;
17 
18 /**
19  * Trie implementation which stores data in int, 32 bits.
20  * 2015-sep-03: Used only in CharsetSelector which could be switched to {@link Trie2_32}
21  * as long as that does not load ICU4C selector data.
22  *
23  * @author synwee
24  * @see com.ibm.icu.impl.Trie
25  * @since release 2.1, Jan 01 2002
26  */
27 public class IntTrie extends Trie
28 {
29     // public constructors ---------------------------------------------
30 
31     /**
32     * <p>Creates a new Trie with the settings for the trie data.</p>
33     * <p>Unserialize the 32-bit-aligned input stream and use the data for the
34     * trie.</p>
35     * @param bytes file buffer to a ICU data file, containing the trie
36     * @param dataManipulate object which provides methods to parse the char
37     *                        data
38     * @throws IOException thrown when data reading fails
39     */
IntTrie(ByteBuffer bytes, DataManipulate dataManipulate)40     public IntTrie(ByteBuffer bytes, DataManipulate dataManipulate)
41                                                     throws IOException
42     {
43         super(bytes, dataManipulate);
44         if (!isIntTrie()) {
45             throw new IllegalArgumentException(
46                                "Data given does not belong to a int trie.");
47         }
48     }
49 
50     /**
51      * Make a dummy IntTrie.
52      * A dummy trie is an empty runtime trie, used when a real data trie cannot
53      * be loaded.
54      *
55      * The trie always returns the initialValue,
56      * or the leadUnitValue for lead surrogate code points.
57      * The Latin-1 part is always set up to be linear.
58      *
59      * @param initialValue the initial value that is set for all code points
60      * @param leadUnitValue the value for lead surrogate code _units_ that do not
61      *                      have associated supplementary data
62      * @param dataManipulate object which provides methods to parse the char data
63      */
64     @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770
IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate)65     public IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
66         super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
67 
68         int dataLength, latin1Length, i, limit;
69         char block;
70 
71         /* calculate the actual size of the dummy trie data */
72 
73         /* max(Latin-1, block 0) */
74         dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
75         if(leadUnitValue!=initialValue) {
76             dataLength+=DATA_BLOCK_LENGTH;
77         }
78         m_data_=new int[dataLength];
79         m_dataLength_=dataLength;
80 
81         m_initialValue_=initialValue;
82 
83         /* fill the index and data arrays */
84 
85         /* indexes are preset to 0 (block 0) */
86 
87         /* Latin-1 data */
88         for(i=0; i<latin1Length; ++i) {
89             m_data_[i]=initialValue;
90         }
91 
92         if(leadUnitValue!=initialValue) {
93             /* indexes for lead surrogate code units to the block after Latin-1 */
94             block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
95             i=0xd800>>INDEX_STAGE_1_SHIFT_;
96             limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
97             for(; i<limit; ++i) {
98                 m_index_[i]=block;
99             }
100 
101             /* data for lead surrogate code units */
102             limit=latin1Length+DATA_BLOCK_LENGTH;
103             for(i=latin1Length; i<limit; ++i) {
104                 m_data_[i]=leadUnitValue;
105             }
106         }
107     }
108 
109     // public methods --------------------------------------------------
110 
111     /**
112     * Gets the value associated with the codepoint.
113     * If no value is associated with the codepoint, a default value will be
114     * returned.
115     * @param ch codepoint
116     * @return offset to data
117     */
getCodePointValue(int ch)118     public final int getCodePointValue(int ch)
119     {
120         int offset;
121 
122         // fastpath for U+0000..U+D7FF
123         if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
124             // copy of getRawOffset()
125             offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
126                     + (ch & INDEX_STAGE_3_MASK_);
127             return m_data_[offset];
128         }
129 
130         // handle U+D800..U+10FFFF
131         offset = getCodePointOffset(ch);
132         return (offset >= 0) ? m_data_[offset] : m_initialValue_;
133     }
134 
135     /**
136     * Gets the value to the data which this lead surrogate character points
137     * to.
138     * Returned data may contain folding offset information for the next
139     * trailing surrogate character.
140     * This method does not guarantee correct results for trail surrogates.
141     * @param ch lead surrogate character
142     * @return data value
143     */
getLeadValue(char ch)144     public final int getLeadValue(char ch)
145     {
146         return m_data_[getLeadOffset(ch)];
147     }
148 
149     /**
150     * Get the value associated with the BMP code point.
151     * Lead surrogate code points are treated as normal code points, with
152     * unfolded values that may differ from getLeadValue() results.
153     * @param ch the input BMP code point
154     * @return trie data value associated with the BMP codepoint
155     */
getBMPValue(char ch)156     public final int getBMPValue(char ch)
157     {
158         return m_data_[getBMPOffset(ch)];
159     }
160 
161     /**
162     * Get the value associated with a pair of surrogates.
163     * @param lead a lead surrogate
164     * @param trail a trail surrogate
165     */
getSurrogateValue(char lead, char trail)166     public final int getSurrogateValue(char lead, char trail)
167     {
168         if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) {
169             throw new IllegalArgumentException(
170                 "Argument characters do not form a supplementary character");
171         }
172         // get fold position for the next trail surrogate
173         int offset = getSurrogateOffset(lead, trail);
174 
175         // get the real data from the folded lead/trail units
176         if (offset > 0) {
177             return m_data_[offset];
178         }
179 
180         // return m_initialValue_ if there is an error
181         return m_initialValue_;
182     }
183 
184     /**
185     * Get a value from a folding offset (from the value of a lead surrogate)
186     * and a trail surrogate.
187     * @param leadvalue the value of a lead surrogate that contains the
188     *        folding offset
189     * @param trail surrogate
190     * @return trie data value associated with the trail character
191     */
getTrailValue(int leadvalue, char trail)192     public final int getTrailValue(int leadvalue, char trail)
193     {
194         if (m_dataManipulate_ == null) {
195             throw new NullPointerException(
196                              "The field DataManipulate in this Trie is null");
197         }
198         int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
199         if (offset > 0) {
200             return m_data_[getRawOffset(offset,
201                                          (char)(trail & SURROGATE_MASK_))];
202         }
203         return m_initialValue_;
204     }
205 
206     /**
207      * <p>Gets the latin 1 fast path value.</p>
208      * <p>Note this only works if latin 1 characters have their own linear
209      * array.</p>
210      * @param ch latin 1 characters
211      * @return value associated with latin character
212      */
getLatin1LinearValue(char ch)213     public final int getLatin1LinearValue(char ch)
214     {
215         return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch];
216     }
217 
218     /**
219      * Checks if the argument Trie has the same data as this Trie
220      * @param other Trie to check
221      * @return true if the argument Trie has the same data as this Trie, false
222      *         otherwise
223      */
224     ///CLOVER:OFF
225     @Override
equals(Object other)226     public boolean equals(Object other)
227     {
228         boolean result = super.equals(other);
229         if (result && other instanceof IntTrie) {
230             IntTrie othertrie = (IntTrie)other;
231             if (m_initialValue_ != othertrie.m_initialValue_
232                 || !Arrays.equals(m_data_, othertrie.m_data_)) {
233                 return false;
234             }
235             return true;
236         }
237         return false;
238     }
239 
240     @Override
hashCode()241     public int hashCode() {
242         assert false : "hashCode not designed";
243         return 42;
244     }
245     ///CLOVER:ON
246 
247     // protected methods -----------------------------------------------
248 
249     /**
250     * <p>Parses the input stream and stores its trie content into a index and
251     * data array</p>
252     * @param bytes data buffer containing trie data
253     */
254     @Override
unserialize(ByteBuffer bytes)255     protected final void unserialize(ByteBuffer bytes)
256     {
257         super.unserialize(bytes);
258         // one used for initial value
259         m_data_ = ICUBinary.getInts(bytes, m_dataLength_, 0);
260         m_initialValue_ = m_data_[0];
261     }
262 
263     /**
264     * Gets the offset to the data which the surrogate pair points to.
265     * @param lead lead surrogate
266     * @param trail trailing surrogate
267     * @return offset to data
268     */
269     @Override
getSurrogateOffset(char lead, char trail)270     protected final int getSurrogateOffset(char lead, char trail)
271     {
272         if (m_dataManipulate_ == null) {
273             throw new NullPointerException(
274                              "The field DataManipulate in this Trie is null");
275         }
276         // get fold position for the next trail surrogate
277         int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
278 
279         // get the real data from the folded lead/trail units
280         if (offset > 0) {
281             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
282         }
283 
284         // return -1 if there is an error, in this case we return the default
285         // value: m_initialValue_
286         return -1;
287     }
288 
289     /**
290     * Gets the value at the argument index.
291     * For use internally in TrieIterator
292     * @param index value at index will be retrieved
293     * @return 32 bit value
294     * @see com.ibm.icu.impl.TrieIterator
295     */
296     @Override
getValue(int index)297     protected final int getValue(int index)
298     {
299       return m_data_[index];
300     }
301 
302     /**
303     * Gets the default initial value
304     * @return 32 bit value
305     */
306     @Override
getInitialValue()307     protected final int getInitialValue()
308     {
309         return m_initialValue_;
310     }
311 
312     // package private methods -----------------------------------------
313 
314     /**
315      * Internal constructor for builder use
316      * @param index the index array to be slotted into this trie
317      * @param data the data array to be slotted into this trie
318      * @param initialvalue the initial value for this trie
319      * @param options trie options to use
320      * @param datamanipulate folding implementation
321      */
IntTrie(char index[], int data[], int initialvalue, int options, DataManipulate datamanipulate)322     IntTrie(char index[], int data[], int initialvalue, int options,
323             DataManipulate datamanipulate)
324     {
325         super(index, options, datamanipulate);
326         m_data_ = data;
327         m_dataLength_ = m_data_.length;
328         m_initialValue_ = initialvalue;
329     }
330 
331     // private data members --------------------------------------------
332 
333     /**
334     * Default value
335     */
336     private int m_initialValue_;
337     /**
338     * Array of char data
339     */
340     private int m_data_[];
341 }
342