1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2011-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   created on: 2011jan05
9 *   created by: Markus W. Scherer
10 *   ported from ICU4C bytestriebuilder.h/.cpp
11 */
12 
13 package com.ibm.icu.util;
14 
15 import java.nio.ByteBuffer;
16 
17 /**
18  * Builder class for BytesTrie.
19  *
20  * <p>This class is not intended for public subclassing.
21  *
22  * @stable ICU 4.8
23  * @author Markus W. Scherer
24  */
25 public final class BytesTrieBuilder extends StringTrieBuilder {
26     /**
27      * Constructs an empty builder.
28      * @stable ICU 4.8
29      */
BytesTrieBuilder()30     public BytesTrieBuilder() {}
31 
32     // Used in add() to wrap the bytes into a CharSequence for StringTrieBuilder.addImpl().
33     private static final class BytesAsCharSequence implements CharSequence {
BytesAsCharSequence(byte[] sequence, int length)34         public BytesAsCharSequence(byte[] sequence, int length) {
35             s=sequence;
36             len=length;
37         }
charAt(int i)38         public char charAt(int i) { return (char)(s[i]&0xff); }
length()39         public int length() { return len; }
subSequence(int start, int end)40         public CharSequence subSequence(int start, int end) { return null; }
41 
42         private byte[] s;
43         private int len;
44     }
45 
46     /**
47      * Adds a (byte sequence, value) pair.
48      * The byte sequence must be unique.
49      * Bytes 0..length-1 will be copied; the builder does not keep
50      * a reference to the input array.
51      * @param sequence The array that contains the byte sequence, starting at index 0.
52      * @param length The length of the byte sequence.
53      * @param value The value associated with this byte sequence.
54      * @return this
55      * @stable ICU 4.8
56      */
add(byte[] sequence, int length, int value)57     public BytesTrieBuilder add(byte[] sequence, int length, int value) {
58         addImpl(new BytesAsCharSequence(sequence, length), value);
59         return this;
60     }
61 
62     /**
63      * Builds a BytesTrie for the add()ed data.
64      * Once built, no further data can be add()ed until clear() is called.
65      *
66      * <p>A BytesTrie cannot be empty. At least one (byte sequence, value) pair
67      * must have been add()ed.
68      *
69      * <p>Multiple calls to build() or buildByteBuffer() return tries or buffers
70      * which share the builder's byte array, without rebuilding.
71      * <em>The byte array must not be modified via the buildByteBuffer() result object.</em>
72      * After clear() has been called, a new array will be used.
73      * @param buildOption Build option, see StringTrieBuilder.Option.
74      * @return A new BytesTrie for the add()ed data.
75      * @stable ICU 4.8
76      */
build(StringTrieBuilder.Option buildOption)77     public BytesTrie build(StringTrieBuilder.Option buildOption) {
78         buildBytes(buildOption);
79         return new BytesTrie(bytes, bytes.length-bytesLength);
80     }
81 
82     /**
83      * Builds a BytesTrie for the add()ed data and byte-serializes it.
84      * Once built, no further data can be add()ed until clear() is called.
85      *
86      * <p>A BytesTrie cannot be empty. At least one (byte sequence, value) pair
87      * must have been add()ed.
88      *
89      * <p>Multiple calls to build() or buildByteBuffer() return tries or buffers
90      * which share the builder's byte array, without rebuilding.
91      * <em>Do not modify the bytes in the buffer!</em>
92      * After clear() has been called, a new array will be used.
93      *
94      * <p>The serialized BytesTrie is accessible via the buffer's
95      * array()/arrayOffset()+position() or remaining()/get(byte[]) etc.
96      * @param buildOption Build option, see StringTrieBuilder.Option.
97      * @return A ByteBuffer with the byte-serialized BytesTrie for the add()ed data.
98      *         The buffer is not read-only and array() can be called.
99      * @stable ICU 4.8
100      */
buildByteBuffer(StringTrieBuilder.Option buildOption)101     public ByteBuffer buildByteBuffer(StringTrieBuilder.Option buildOption) {
102         buildBytes(buildOption);
103         return ByteBuffer.wrap(bytes, bytes.length-bytesLength, bytesLength);
104     }
105 
buildBytes(StringTrieBuilder.Option buildOption)106     private void buildBytes(StringTrieBuilder.Option buildOption) {
107         // Create and byte-serialize the trie for the elements.
108         if(bytes==null) {
109             bytes=new byte[1024];
110         }
111         buildImpl(buildOption);
112     }
113 
114     /**
115      * Removes all (byte sequence, value) pairs.
116      * New data can then be add()ed and a new trie can be built.
117      * @return this
118      * @stable ICU 4.8
119      */
clear()120     public BytesTrieBuilder clear() {
121         clearImpl();
122         bytes=null;
123         bytesLength=0;
124         return this;
125     }
126 
127     /**
128      * {@inheritDoc}
129      * @internal
130      * @deprecated This API is ICU internal only.
131      */
132     @Deprecated
133     @Override
matchNodesCanHaveValues()134     protected boolean matchNodesCanHaveValues() /*const*/ { return false; }
135 
136     /**
137      * {@inheritDoc}
138      * @internal
139      * @deprecated This API is ICU internal only.
140      */
141     @Deprecated
142     @Override
getMaxBranchLinearSubNodeLength()143     protected int getMaxBranchLinearSubNodeLength() /*const*/ { return BytesTrie.kMaxBranchLinearSubNodeLength; }
144     /**
145      * {@inheritDoc}
146      * @internal
147      * @deprecated This API is ICU internal only.
148      */
149     @Deprecated
150     @Override
getMinLinearMatch()151     protected int getMinLinearMatch() /*const*/ { return BytesTrie.kMinLinearMatch; }
152     /**
153      * {@inheritDoc}
154      * @internal
155      * @deprecated This API is ICU internal only.
156      */
157     @Deprecated
158     @Override
getMaxLinearMatchLength()159     protected int getMaxLinearMatchLength() /*const*/ { return BytesTrie.kMaxLinearMatchLength; }
160 
ensureCapacity(int length)161     private void ensureCapacity(int length) {
162         if(length>bytes.length) {
163             int newCapacity=bytes.length;
164             do {
165                 newCapacity*=2;
166             } while(newCapacity<=length);
167             byte[] newBytes=new byte[newCapacity];
168             System.arraycopy(bytes, bytes.length-bytesLength,
169                              newBytes, newBytes.length-bytesLength, bytesLength);
170             bytes=newBytes;
171         }
172     }
173     /**
174      * {@inheritDoc}
175      * @internal
176      * @deprecated This API is ICU internal only.
177      */
178     @Deprecated
179     @Override
write(int b)180     protected int write(int b) {
181         int newLength=bytesLength+1;
182         ensureCapacity(newLength);
183         bytesLength=newLength;
184         bytes[bytes.length-bytesLength]=(byte)b;
185         return bytesLength;
186     }
187     /**
188      * {@inheritDoc}
189      * @internal
190      * @deprecated This API is ICU internal only.
191      */
192     @Deprecated
193     @Override
write(int offset, int length)194     protected int write(int offset, int length) {
195         int newLength=bytesLength+length;
196         ensureCapacity(newLength);
197         bytesLength=newLength;
198         int bytesOffset=bytes.length-bytesLength;
199         while(length>0) {
200             bytes[bytesOffset++]=(byte)strings.charAt(offset++);
201             --length;
202         }
203         return bytesLength;
204     }
write(byte[] b, int length)205     private int write(byte[] b, int length) {
206         int newLength=bytesLength+length;
207         ensureCapacity(newLength);
208         bytesLength=newLength;
209         System.arraycopy(b, 0, bytes, bytes.length-bytesLength, length);
210         return bytesLength;
211     }
212 
213     // For writeValueAndFinal() and writeDeltaTo().
214     private final byte[] intBytes=new byte[5];
215 
216     /**
217      * {@inheritDoc}
218      * @internal
219      * @deprecated This API is ICU internal only.
220      */
221     @Deprecated
222     @Override
writeValueAndFinal(int i, boolean isFinal)223     protected int writeValueAndFinal(int i, boolean isFinal) {
224         if(0<=i && i<=BytesTrie.kMaxOneByteValue) {
225             return write(((BytesTrie.kMinOneByteValueLead+i)<<1)|(isFinal?1:0));
226         }
227         int length=1;
228         if(i<0 || i>0xffffff) {
229             intBytes[0]=(byte)BytesTrie.kFiveByteValueLead;
230             intBytes[1]=(byte)(i>>24);
231             intBytes[2]=(byte)(i>>16);
232             intBytes[3]=(byte)(i>>8);
233             intBytes[4]=(byte)i;
234             length=5;
235         // } else if(i<=BytesTrie.kMaxOneByteValue) {
236         //     intBytes[0]=(byte)(BytesTrie.kMinOneByteValueLead+i);
237         } else {
238             if(i<=BytesTrie.kMaxTwoByteValue) {
239                 intBytes[0]=(byte)(BytesTrie.kMinTwoByteValueLead+(i>>8));
240             } else {
241                 if(i<=BytesTrie.kMaxThreeByteValue) {
242                     intBytes[0]=(byte)(BytesTrie.kMinThreeByteValueLead+(i>>16));
243                 } else {
244                     intBytes[0]=(byte)BytesTrie.kFourByteValueLead;
245                     intBytes[1]=(byte)(i>>16);
246                     length=2;
247                 }
248                 intBytes[length++]=(byte)(i>>8);
249             }
250             intBytes[length++]=(byte)i;
251         }
252         intBytes[0]=(byte)((intBytes[0]<<1)|(isFinal?1:0));
253         return write(intBytes, length);
254     }
255     /**
256      * {@inheritDoc}
257      * @internal
258      * @deprecated This API is ICU internal only.
259      */
260     @Deprecated
261     @Override
writeValueAndType(boolean hasValue, int value, int node)262     protected int writeValueAndType(boolean hasValue, int value, int node) {
263         int offset=write(node);
264         if(hasValue) {
265             offset=writeValueAndFinal(value, false);
266         }
267         return offset;
268     }
269     /**
270      * {@inheritDoc}
271      * @internal
272      * @deprecated This API is ICU internal only.
273      */
274     @Deprecated
275     @Override
writeDeltaTo(int jumpTarget)276     protected int writeDeltaTo(int jumpTarget) {
277         int i=bytesLength-jumpTarget;
278         assert(i>=0);
279         if(i<=BytesTrie.kMaxOneByteDelta) {
280             return write(i);
281         }
282         int length;
283         if(i<=BytesTrie.kMaxTwoByteDelta) {
284             intBytes[0]=(byte)(BytesTrie.kMinTwoByteDeltaLead+(i>>8));
285             length=1;
286         } else {
287             if(i<=BytesTrie.kMaxThreeByteDelta) {
288                 intBytes[0]=(byte)(BytesTrie.kMinThreeByteDeltaLead+(i>>16));
289                 length=2;
290             } else {
291                 if(i<=0xffffff) {
292                     intBytes[0]=(byte)BytesTrie.kFourByteDeltaLead;
293                     length=3;
294                 } else {
295                     intBytes[0]=(byte)BytesTrie.kFiveByteDeltaLead;
296                     intBytes[1]=(byte)(i>>24);
297                     length=4;
298                 }
299                 intBytes[1]=(byte)(i>>16);
300             }
301             intBytes[1]=(byte)(i>>8);
302         }
303         intBytes[length++]=(byte)i;
304         return write(intBytes, length);
305     }
306 
307     // Byte serialization of the trie.
308     // Grows from the back: bytesLength measures from the end of the buffer!
309     private byte[] bytes;
310     private int bytesLength;
311 }
312