1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2011-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * created on: 2011jan05 9 * created by: Markus W. Scherer 10 * ported from ICU4C bytestriebuilder.h/.cpp 11 */ 12 13 package com.ibm.icu.util; 14 15 import java.nio.ByteBuffer; 16 17 /** 18 * Builder class for BytesTrie. 19 * 20 * <p>This class is not intended for public subclassing. 21 * 22 * @stable ICU 4.8 23 * @author Markus W. Scherer 24 */ 25 public final class BytesTrieBuilder extends StringTrieBuilder { 26 /** 27 * Constructs an empty builder. 28 * @stable ICU 4.8 29 */ BytesTrieBuilder()30 public BytesTrieBuilder() {} 31 32 // Used in add() to wrap the bytes into a CharSequence for StringTrieBuilder.addImpl(). 33 private static final class BytesAsCharSequence implements CharSequence { BytesAsCharSequence(byte[] sequence, int length)34 public BytesAsCharSequence(byte[] sequence, int length) { 35 s=sequence; 36 len=length; 37 } charAt(int i)38 public char charAt(int i) { return (char)(s[i]&0xff); } length()39 public int length() { return len; } subSequence(int start, int end)40 public CharSequence subSequence(int start, int end) { return null; } 41 42 private byte[] s; 43 private int len; 44 } 45 46 /** 47 * Adds a (byte sequence, value) pair. 48 * The byte sequence must be unique. 49 * Bytes 0..length-1 will be copied; the builder does not keep 50 * a reference to the input array. 51 * @param sequence The array that contains the byte sequence, starting at index 0. 52 * @param length The length of the byte sequence. 53 * @param value The value associated with this byte sequence. 54 * @return this 55 * @stable ICU 4.8 56 */ add(byte[] sequence, int length, int value)57 public BytesTrieBuilder add(byte[] sequence, int length, int value) { 58 addImpl(new BytesAsCharSequence(sequence, length), value); 59 return this; 60 } 61 62 /** 63 * Builds a BytesTrie for the add()ed data. 64 * Once built, no further data can be add()ed until clear() is called. 65 * 66 * <p>A BytesTrie cannot be empty. At least one (byte sequence, value) pair 67 * must have been add()ed. 68 * 69 * <p>Multiple calls to build() or buildByteBuffer() return tries or buffers 70 * which share the builder's byte array, without rebuilding. 71 * <em>The byte array must not be modified via the buildByteBuffer() result object.</em> 72 * After clear() has been called, a new array will be used. 73 * @param buildOption Build option, see StringTrieBuilder.Option. 74 * @return A new BytesTrie for the add()ed data. 75 * @stable ICU 4.8 76 */ build(StringTrieBuilder.Option buildOption)77 public BytesTrie build(StringTrieBuilder.Option buildOption) { 78 buildBytes(buildOption); 79 return new BytesTrie(bytes, bytes.length-bytesLength); 80 } 81 82 /** 83 * Builds a BytesTrie for the add()ed data and byte-serializes it. 84 * Once built, no further data can be add()ed until clear() is called. 85 * 86 * <p>A BytesTrie cannot be empty. At least one (byte sequence, value) pair 87 * must have been add()ed. 88 * 89 * <p>Multiple calls to build() or buildByteBuffer() return tries or buffers 90 * which share the builder's byte array, without rebuilding. 91 * <em>Do not modify the bytes in the buffer!</em> 92 * After clear() has been called, a new array will be used. 93 * 94 * <p>The serialized BytesTrie is accessible via the buffer's 95 * array()/arrayOffset()+position() or remaining()/get(byte[]) etc. 96 * @param buildOption Build option, see StringTrieBuilder.Option. 97 * @return A ByteBuffer with the byte-serialized BytesTrie for the add()ed data. 98 * The buffer is not read-only and array() can be called. 99 * @stable ICU 4.8 100 */ buildByteBuffer(StringTrieBuilder.Option buildOption)101 public ByteBuffer buildByteBuffer(StringTrieBuilder.Option buildOption) { 102 buildBytes(buildOption); 103 return ByteBuffer.wrap(bytes, bytes.length-bytesLength, bytesLength); 104 } 105 buildBytes(StringTrieBuilder.Option buildOption)106 private void buildBytes(StringTrieBuilder.Option buildOption) { 107 // Create and byte-serialize the trie for the elements. 108 if(bytes==null) { 109 bytes=new byte[1024]; 110 } 111 buildImpl(buildOption); 112 } 113 114 /** 115 * Removes all (byte sequence, value) pairs. 116 * New data can then be add()ed and a new trie can be built. 117 * @return this 118 * @stable ICU 4.8 119 */ clear()120 public BytesTrieBuilder clear() { 121 clearImpl(); 122 bytes=null; 123 bytesLength=0; 124 return this; 125 } 126 127 /** 128 * {@inheritDoc} 129 * @internal 130 * @deprecated This API is ICU internal only. 131 */ 132 @Deprecated 133 @Override matchNodesCanHaveValues()134 protected boolean matchNodesCanHaveValues() /*const*/ { return false; } 135 136 /** 137 * {@inheritDoc} 138 * @internal 139 * @deprecated This API is ICU internal only. 140 */ 141 @Deprecated 142 @Override getMaxBranchLinearSubNodeLength()143 protected int getMaxBranchLinearSubNodeLength() /*const*/ { return BytesTrie.kMaxBranchLinearSubNodeLength; } 144 /** 145 * {@inheritDoc} 146 * @internal 147 * @deprecated This API is ICU internal only. 148 */ 149 @Deprecated 150 @Override getMinLinearMatch()151 protected int getMinLinearMatch() /*const*/ { return BytesTrie.kMinLinearMatch; } 152 /** 153 * {@inheritDoc} 154 * @internal 155 * @deprecated This API is ICU internal only. 156 */ 157 @Deprecated 158 @Override getMaxLinearMatchLength()159 protected int getMaxLinearMatchLength() /*const*/ { return BytesTrie.kMaxLinearMatchLength; } 160 ensureCapacity(int length)161 private void ensureCapacity(int length) { 162 if(length>bytes.length) { 163 int newCapacity=bytes.length; 164 do { 165 newCapacity*=2; 166 } while(newCapacity<=length); 167 byte[] newBytes=new byte[newCapacity]; 168 System.arraycopy(bytes, bytes.length-bytesLength, 169 newBytes, newBytes.length-bytesLength, bytesLength); 170 bytes=newBytes; 171 } 172 } 173 /** 174 * {@inheritDoc} 175 * @internal 176 * @deprecated This API is ICU internal only. 177 */ 178 @Deprecated 179 @Override write(int b)180 protected int write(int b) { 181 int newLength=bytesLength+1; 182 ensureCapacity(newLength); 183 bytesLength=newLength; 184 bytes[bytes.length-bytesLength]=(byte)b; 185 return bytesLength; 186 } 187 /** 188 * {@inheritDoc} 189 * @internal 190 * @deprecated This API is ICU internal only. 191 */ 192 @Deprecated 193 @Override write(int offset, int length)194 protected int write(int offset, int length) { 195 int newLength=bytesLength+length; 196 ensureCapacity(newLength); 197 bytesLength=newLength; 198 int bytesOffset=bytes.length-bytesLength; 199 while(length>0) { 200 bytes[bytesOffset++]=(byte)strings.charAt(offset++); 201 --length; 202 } 203 return bytesLength; 204 } write(byte[] b, int length)205 private int write(byte[] b, int length) { 206 int newLength=bytesLength+length; 207 ensureCapacity(newLength); 208 bytesLength=newLength; 209 System.arraycopy(b, 0, bytes, bytes.length-bytesLength, length); 210 return bytesLength; 211 } 212 213 // For writeValueAndFinal() and writeDeltaTo(). 214 private final byte[] intBytes=new byte[5]; 215 216 /** 217 * {@inheritDoc} 218 * @internal 219 * @deprecated This API is ICU internal only. 220 */ 221 @Deprecated 222 @Override writeValueAndFinal(int i, boolean isFinal)223 protected int writeValueAndFinal(int i, boolean isFinal) { 224 if(0<=i && i<=BytesTrie.kMaxOneByteValue) { 225 return write(((BytesTrie.kMinOneByteValueLead+i)<<1)|(isFinal?1:0)); 226 } 227 int length=1; 228 if(i<0 || i>0xffffff) { 229 intBytes[0]=(byte)BytesTrie.kFiveByteValueLead; 230 intBytes[1]=(byte)(i>>24); 231 intBytes[2]=(byte)(i>>16); 232 intBytes[3]=(byte)(i>>8); 233 intBytes[4]=(byte)i; 234 length=5; 235 // } else if(i<=BytesTrie.kMaxOneByteValue) { 236 // intBytes[0]=(byte)(BytesTrie.kMinOneByteValueLead+i); 237 } else { 238 if(i<=BytesTrie.kMaxTwoByteValue) { 239 intBytes[0]=(byte)(BytesTrie.kMinTwoByteValueLead+(i>>8)); 240 } else { 241 if(i<=BytesTrie.kMaxThreeByteValue) { 242 intBytes[0]=(byte)(BytesTrie.kMinThreeByteValueLead+(i>>16)); 243 } else { 244 intBytes[0]=(byte)BytesTrie.kFourByteValueLead; 245 intBytes[1]=(byte)(i>>16); 246 length=2; 247 } 248 intBytes[length++]=(byte)(i>>8); 249 } 250 intBytes[length++]=(byte)i; 251 } 252 intBytes[0]=(byte)((intBytes[0]<<1)|(isFinal?1:0)); 253 return write(intBytes, length); 254 } 255 /** 256 * {@inheritDoc} 257 * @internal 258 * @deprecated This API is ICU internal only. 259 */ 260 @Deprecated 261 @Override writeValueAndType(boolean hasValue, int value, int node)262 protected int writeValueAndType(boolean hasValue, int value, int node) { 263 int offset=write(node); 264 if(hasValue) { 265 offset=writeValueAndFinal(value, false); 266 } 267 return offset; 268 } 269 /** 270 * {@inheritDoc} 271 * @internal 272 * @deprecated This API is ICU internal only. 273 */ 274 @Deprecated 275 @Override writeDeltaTo(int jumpTarget)276 protected int writeDeltaTo(int jumpTarget) { 277 int i=bytesLength-jumpTarget; 278 assert(i>=0); 279 if(i<=BytesTrie.kMaxOneByteDelta) { 280 return write(i); 281 } 282 int length; 283 if(i<=BytesTrie.kMaxTwoByteDelta) { 284 intBytes[0]=(byte)(BytesTrie.kMinTwoByteDeltaLead+(i>>8)); 285 length=1; 286 } else { 287 if(i<=BytesTrie.kMaxThreeByteDelta) { 288 intBytes[0]=(byte)(BytesTrie.kMinThreeByteDeltaLead+(i>>16)); 289 length=2; 290 } else { 291 if(i<=0xffffff) { 292 intBytes[0]=(byte)BytesTrie.kFourByteDeltaLead; 293 length=3; 294 } else { 295 intBytes[0]=(byte)BytesTrie.kFiveByteDeltaLead; 296 intBytes[1]=(byte)(i>>24); 297 length=4; 298 } 299 intBytes[1]=(byte)(i>>16); 300 } 301 intBytes[1]=(byte)(i>>8); 302 } 303 intBytes[length++]=(byte)i; 304 return write(intBytes, length); 305 } 306 307 // Byte serialization of the trie. 308 // Grows from the back: bytesLength measures from the end of the buffer! 309 private byte[] bytes; 310 private int bytesLength; 311 } 312