1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2009, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package android.icu.impl; 11 12 /** 13 * @author aheninger 14 * 15 * A Trie2Writable is a modifiable, or build-time Trie2. 16 * Functions for reading data from the Trie are all from class Trie2. 17 * @hide Only a subset of ICU is exposed in Android 18 * 19 */ 20 public class Trie2Writable extends Trie2 { 21 22 23 /** 24 * Create a new, empty, writable Trie2. 32-bit data values are used. 25 * 26 * @param initialValueP the initial value that is set for all code points 27 * @param errorValueP the value for out-of-range code points and illegal UTF-8 28 */ Trie2Writable(int initialValueP, int errorValueP)29 public Trie2Writable(int initialValueP, int errorValueP) { 30 // This constructor corresponds to utrie2_open() in ICU4C. 31 init(initialValueP, errorValueP); 32 } 33 34 init(int initialValueP, int errorValueP)35 private void init(int initialValueP, int errorValueP) { 36 this.initialValue = initialValueP; 37 this.errorValue = errorValueP; 38 this.highStart = 0x110000; 39 40 this.data = new int[UNEWTRIE2_INITIAL_DATA_LENGTH]; 41 this.dataCapacity = UNEWTRIE2_INITIAL_DATA_LENGTH; 42 this.initialValue = initialValueP; 43 this.errorValue = errorValueP; 44 this.highStart = 0x110000; 45 this.firstFreeBlock = 0; /* no free block in the list */ 46 this.isCompacted = false; 47 48 /* 49 * preallocate and reset 50 * - ASCII 51 * - the bad-UTF-8-data block 52 * - the null data block 53 */ 54 int i, j; 55 for(i=0; i<0x80; ++i) { 56 data[i] = initialValue; 57 } 58 for(; i<0xc0; ++i) { 59 data[i] = errorValue; 60 } 61 for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) { 62 data[i] = initialValue; 63 } 64 dataNullOffset = UNEWTRIE2_DATA_NULL_OFFSET; 65 dataLength = UNEWTRIE2_DATA_START_OFFSET; 66 67 /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */ 68 for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { 69 index2[i]=j; 70 map[i]=1; 71 } 72 73 /* reference counts for the bad-UTF-8-data block */ 74 for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { 75 map[i]=0; 76 } 77 78 /* 79 * Reference counts for the null data block: all blocks except for the ASCII blocks. 80 * Plus 1 so that we don't drop this block during compaction. 81 * Plus as many as needed for lead surrogate code points. 82 */ 83 /* i==newTrie->dataNullOffset */ 84 map[i++] = 85 (0x110000>>UTRIE2_SHIFT_2) - 86 (0x80>>UTRIE2_SHIFT_2) + 87 1 + 88 UTRIE2_LSCP_INDEX_2_LENGTH; 89 j += UTRIE2_DATA_BLOCK_LENGTH; 90 for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { 91 map[i]=0; 92 } 93 94 /* 95 * set the remaining indexes in the BMP index-2 block 96 * to the null data block 97 */ 98 for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { 99 index2[i]=UNEWTRIE2_DATA_NULL_OFFSET; 100 } 101 102 /* 103 * Fill the index gap with impossible values so that compaction 104 * does not overlap other index-2 blocks with the gap. 105 */ 106 for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) { 107 index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1; 108 } 109 110 /* set the indexes in the null index-2 block */ 111 for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) { 112 index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET; 113 } 114 index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET; 115 index2Length=UNEWTRIE2_INDEX_2_START_OFFSET; 116 117 /* set the index-1 indexes for the linear index-2 block */ 118 for(i=0, j=0; 119 i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; 120 ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH 121 ) { 122 index1[i]=j; 123 } 124 125 /* set the remaining index-1 indexes to the null index-2 block */ 126 for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { 127 index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET; 128 } 129 130 /* 131 * Preallocate and reset data for U+0080..U+07ff, 132 * for 2-byte UTF-8 which will be compacted in 64-blocks 133 * even if UTRIE2_DATA_BLOCK_LENGTH is smaller. 134 */ 135 for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) { 136 set(i, initialValue); 137 } 138 139 } 140 141 142 /** 143 * Create a new build time (modifiable) Trie2 whose contents are the same as the source Trie2. 144 * 145 * @param source the source Trie2. Its contents will be copied into the new Trie2. 146 */ Trie2Writable(Trie2 source)147 public Trie2Writable(Trie2 source) { 148 init(source.initialValue, source.errorValue); 149 150 for (Range r: source) { 151 setRange(r, true); 152 } 153 } 154 155 isInNullBlock(int c, boolean forLSCP)156 private boolean isInNullBlock(int c, boolean forLSCP) { 157 int i2, block; 158 159 if(Character.isHighSurrogate((char)c) && forLSCP) { 160 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ 161 (c>>UTRIE2_SHIFT_2); 162 } else { 163 i2=index1[c>>UTRIE2_SHIFT_1]+ 164 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); 165 } 166 block=index2[i2]; 167 return (block==dataNullOffset); 168 } 169 allocIndex2Block()170 private int allocIndex2Block() { 171 int newBlock, newTop; 172 173 newBlock=index2Length; 174 newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH; 175 if(newTop > index2.length) { 176 throw new IllegalStateException("Internal error in Trie2 creation."); 177 /* 178 * Should never occur. 179 * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect, 180 * or the code writes more values than should be possible. 181 */ 182 } 183 index2Length=newTop; 184 System.arraycopy(index2, index2NullOffset, index2, newBlock, UTRIE2_INDEX_2_BLOCK_LENGTH); 185 return newBlock; 186 } 187 getIndex2Block(int c, boolean forLSCP)188 private int getIndex2Block(int c, boolean forLSCP) { 189 int i1, i2; 190 191 if(c>=0xd800 && c<0xdc00 && forLSCP) { 192 return UTRIE2_LSCP_INDEX_2_OFFSET; 193 } 194 195 i1=c>>UTRIE2_SHIFT_1; 196 i2=index1[i1]; 197 if(i2==index2NullOffset) { 198 i2=allocIndex2Block(); 199 index1[i1]=i2; 200 } 201 return i2; 202 } 203 allocDataBlock(int copyBlock)204 private int allocDataBlock(int copyBlock) { 205 int newBlock, newTop; 206 207 if(firstFreeBlock!=0) { 208 /* get the first free block */ 209 newBlock=firstFreeBlock; 210 firstFreeBlock=-map[newBlock>>UTRIE2_SHIFT_2]; 211 } else { 212 /* get a new block from the high end */ 213 newBlock=dataLength; 214 newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH; 215 if(newTop>dataCapacity) { 216 /* out of memory in the data array */ 217 int capacity; 218 int[] newData; 219 220 if(dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) { 221 capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH; 222 } else if(dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) { 223 capacity=UNEWTRIE2_MAX_DATA_LENGTH; 224 } else { 225 /* 226 * Should never occur. 227 * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect, 228 * or the code writes more values than should be possible. 229 */ 230 throw new IllegalStateException("Internal error in Trie2 creation."); 231 } 232 newData = new int[capacity]; 233 System.arraycopy(data, 0, newData, 0, dataLength); 234 data=newData; 235 dataCapacity=capacity; 236 } 237 dataLength=newTop; 238 } 239 System.arraycopy(data, copyBlock, data, newBlock, UTRIE2_DATA_BLOCK_LENGTH); 240 map[newBlock>>UTRIE2_SHIFT_2]=0; 241 return newBlock; 242 } 243 244 245 /* call when the block's reference counter reaches 0 */ releaseDataBlock(int block)246 private void releaseDataBlock(int block) { 247 /* put this block at the front of the free-block chain */ 248 map[block>>UTRIE2_SHIFT_2]=-firstFreeBlock; 249 firstFreeBlock=block; 250 } 251 252 isWritableBlock(int block)253 private boolean isWritableBlock(int block) { 254 return (block!=dataNullOffset && 1==map[block>>UTRIE2_SHIFT_2]); 255 } 256 setIndex2Entry(int i2, int block)257 private void setIndex2Entry(int i2, int block) { 258 int oldBlock; 259 ++map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */ 260 oldBlock=index2[i2]; 261 if(0 == --map[oldBlock>>UTRIE2_SHIFT_2]) { 262 releaseDataBlock(oldBlock); 263 } 264 index2[i2]=block; 265 } 266 267 268 /** 269 * No error checking for illegal arguments. 270 * 271 * @hide draft / provisional / internal are hidden on Android 272 */ getDataBlock(int c, boolean forLSCP)273 private int getDataBlock(int c, boolean forLSCP) { 274 int i2, oldBlock, newBlock; 275 276 i2=getIndex2Block(c, forLSCP); 277 278 i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 279 oldBlock=index2[i2]; 280 if(isWritableBlock(oldBlock)) { 281 return oldBlock; 282 } 283 284 /* allocate a new data block */ 285 newBlock=allocDataBlock(oldBlock); 286 setIndex2Entry(i2, newBlock); 287 return newBlock; 288 } 289 /** 290 * Set a value for a code point. 291 * 292 * @param c the code point 293 * @param value the value 294 */ set(int c, int value)295 public Trie2Writable set(int c, int value) { 296 if (c<0 || c>0x10ffff) { 297 throw new IllegalArgumentException("Invalid code point."); 298 } 299 set(c, true, value); 300 fHash = 0; 301 return this; 302 } 303 set(int c, boolean forLSCP, int value)304 private Trie2Writable set(int c, boolean forLSCP, int value) { 305 int block; 306 if (isCompacted) { 307 uncompact(); 308 } 309 block = getDataBlock(c, forLSCP); 310 data[block + (c&UTRIE2_DATA_MASK)] = value; 311 return this; 312 } 313 314 315 /* 316 * Uncompact a compacted Trie2Writable. 317 * This is needed if a the WritableTrie2 was compacted in preparation for creating a read-only 318 * Trie2, and then is subsequently altered. 319 * 320 * The structure is a bit awkward - it would be cleaner to leave the original 321 * Trie2 unaltered - but compacting in place was taken directly from the ICU4C code. 322 * 323 * The approach is to create a new (uncompacted) Trie2Writable from this one, then transfer 324 * the guts from the new to the old. 325 */ uncompact()326 private void uncompact() { 327 Trie2Writable tempTrie = new Trie2Writable(this); 328 329 // Members from Trie2Writable 330 this.index1 = tempTrie.index1; 331 this.index2 = tempTrie.index2; 332 this.data = tempTrie.data; 333 this.index2Length = tempTrie.index2Length; 334 this.dataCapacity = tempTrie.dataCapacity; 335 this.isCompacted = tempTrie.isCompacted; 336 337 // Members From Trie2 338 this.header = tempTrie.header; 339 this.index = tempTrie.index; 340 this.data16 = tempTrie.data16; 341 this.data32 = tempTrie.data32; 342 this.indexLength = tempTrie.indexLength; 343 this.dataLength = tempTrie.dataLength; 344 this.index2NullOffset = tempTrie.index2NullOffset; 345 this.initialValue = tempTrie.initialValue; 346 this.errorValue = tempTrie.errorValue; 347 this.highStart = tempTrie.highStart; 348 this.highValueIndex = tempTrie.highValueIndex; 349 this.dataNullOffset = tempTrie.dataNullOffset; 350 } 351 352 writeBlock(int block, int value)353 private void writeBlock(int block, int value) { 354 int limit=block+UTRIE2_DATA_BLOCK_LENGTH; 355 while(block<limit) { 356 data[block++]=value; 357 } 358 } 359 360 /** 361 * initialValue is ignored if overwrite=TRUE 362 * @hide draft / provisional / internal are hidden on Android 363 */ fillBlock(int block, int start, int limit, int value, int initialValue, boolean overwrite)364 private void fillBlock(int block, /*UChar32*/ int start, /*UChar32*/ int limit, 365 int value, int initialValue, boolean overwrite) { 366 int i; 367 int pLimit = block+limit; 368 if(overwrite) { 369 for (i=block+start; i<pLimit; i++) { 370 data[i] = value; 371 } 372 } else { 373 for (i=block+start; i<pLimit; i++) { 374 if(data[i]==initialValue) { 375 data[i]=value; 376 } 377 } 378 } 379 } 380 381 /** 382 * Set a value in a range of code points [start..end]. 383 * All code points c with start<=c<=end will get the value if 384 * overwrite is TRUE or if the old value is the initial value. 385 * 386 * @param start the first code point to get the value 387 * @param end the last code point to get the value (inclusive) 388 * @param value the value 389 * @param overwrite flag for whether old non-initial values are to be overwritten 390 */ setRange(int start, int end, int value, boolean overwrite)391 public Trie2Writable setRange(int start, int end, 392 int value, boolean overwrite) { 393 /* 394 * repeat value in [start..end] 395 * mark index values for repeat-data blocks by setting bit 31 of the index values 396 * fill around existing values if any, if(overwrite) 397 */ 398 int block, rest, repeatBlock; 399 int /*UChar32*/ limit; 400 401 if(start>0x10ffff || start<0 || end>0x10ffff || end<0 || start>end) { 402 throw new IllegalArgumentException("Invalid code point range."); 403 } 404 if(!overwrite && value==initialValue) { 405 return this; /* nothing to do */ 406 } 407 fHash = 0; 408 if(isCompacted) { 409 this.uncompact(); 410 } 411 412 limit=end+1; 413 if((start&UTRIE2_DATA_MASK) != 0) { 414 int /*UChar32*/ nextStart; 415 416 /* set partial block at [start..following block boundary[ */ 417 block=getDataBlock(start, true); 418 419 nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK; 420 if(nextStart<=limit) { 421 fillBlock(block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, 422 value, initialValue, overwrite); 423 start=nextStart; 424 } else { 425 fillBlock(block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK, 426 value, initialValue, overwrite); 427 return this; 428 } 429 } 430 431 /* number of positions in the last, partial block */ 432 rest=limit&UTRIE2_DATA_MASK; 433 434 /* round down limit to a block boundary */ 435 limit&=~UTRIE2_DATA_MASK; 436 437 /* iterate over all-value blocks */ 438 if(value==initialValue) { 439 repeatBlock=dataNullOffset; 440 } else { 441 repeatBlock=-1; 442 } 443 444 while(start<limit) { 445 int i2; 446 boolean setRepeatBlock=false; 447 448 if(value==initialValue && isInNullBlock(start, true)) { 449 start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */ 450 continue; 451 } 452 453 /* get index value */ 454 i2=getIndex2Block(start, true); 455 i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 456 block=index2[i2]; 457 if(isWritableBlock(block)) { 458 /* already allocated */ 459 if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) { 460 /* 461 * We overwrite all values, and it's not a 462 * protected (ASCII-linear or 2-byte UTF-8) block: 463 * replace with the repeatBlock. 464 */ 465 setRepeatBlock=true; 466 } else { 467 /* !overwrite, or protected block: just write the values into this block */ 468 fillBlock(block, 469 0, UTRIE2_DATA_BLOCK_LENGTH, 470 value, initialValue, overwrite); 471 } 472 } else if(data[block]!=value && (overwrite || block==dataNullOffset)) { 473 /* 474 * Set the repeatBlock instead of the null block or previous repeat block: 475 * 476 * If !isWritableBlock() then all entries in the block have the same value 477 * because it's the null block or a range block (the repeatBlock from a previous 478 * call to utrie2_setRange32()). 479 * No other blocks are used multiple times before compacting. 480 * 481 * The null block is the only non-writable block with the initialValue because 482 * of the repeatBlock initialization above. (If value==initialValue, then 483 * the repeatBlock will be the null data block.) 484 * 485 * We set our repeatBlock if the desired value differs from the block's value, 486 * and if we overwrite any data or if the data is all initial values 487 * (which is the same as the block being the null block, see above). 488 */ 489 setRepeatBlock=true; 490 } 491 if(setRepeatBlock) { 492 if(repeatBlock>=0) { 493 setIndex2Entry(i2, repeatBlock); 494 } else { 495 /* create and set and fill the repeatBlock */ 496 repeatBlock=getDataBlock(start, true); 497 writeBlock(repeatBlock, value); 498 } 499 } 500 501 start+=UTRIE2_DATA_BLOCK_LENGTH; 502 } 503 504 if(rest>0) { 505 /* set partial block at [last block boundary..limit[ */ 506 block=getDataBlock(start, true); 507 fillBlock(block, 0, rest, value, initialValue, overwrite); 508 } 509 510 return this; 511 } 512 513 /** 514 * Set the values from a Trie2.Range. 515 * 516 * All code points within the range will get the value if 517 * overwrite is TRUE or if the old value is the initial value. 518 * 519 * Ranges with the lead surrogate flag set will set the alternate 520 * lead-surrogate values in the Trie, rather than the code point values. 521 * 522 * This function is intended to work with the ranges produced when iterating 523 * the contents of a source Trie. 524 * 525 * @param range contains the range of code points and the value to be set. 526 * @param overwrite flag for whether old non-initial values are to be overwritten 527 */ setRange(Trie2.Range range, boolean overwrite)528 public Trie2Writable setRange(Trie2.Range range, boolean overwrite) { 529 fHash = 0; 530 if (range.leadSurrogate) { 531 for (int c=range.startCodePoint; c<=range.endCodePoint; c++) { 532 if (overwrite || getFromU16SingleLead((char)c) == this.initialValue) { 533 setForLeadSurrogateCodeUnit((char)c, range.value); 534 } 535 } 536 } else { 537 setRange(range.startCodePoint, range.endCodePoint, range.value, overwrite); 538 } 539 return this; 540 } 541 542 /** 543 * Set a value for a UTF-16 code unit. 544 * Note that a Trie2 stores separate values for 545 * supplementary code points in the lead surrogate range 546 * (accessed via the plain set() and get() interfaces) 547 * and for lead surrogate code units. 548 * 549 * The lead surrogate code unit values are set via this function and 550 * read by the function getFromU16SingleLead(). 551 * 552 * For code units outside of the lead surrogate range, this function 553 * behaves identically to set(). 554 * 555 * @param codeUnit A UTF-16 code unit. 556 * @param value the value to be stored in the Trie2. 557 */ setForLeadSurrogateCodeUnit(char codeUnit, int value)558 public Trie2Writable setForLeadSurrogateCodeUnit(char codeUnit, int value) { 559 fHash = 0; 560 set(codeUnit, false, value); 561 return this; 562 } 563 564 565 /** 566 * Get the value for a code point as stored in the Trie2. 567 * 568 * @param codePoint the code point 569 * @return the value 570 */ 571 @Override get(int codePoint)572 public int get(int codePoint) { 573 if (codePoint<0 || codePoint>0x10ffff) { 574 return errorValue; 575 } else { 576 return get(codePoint, true); 577 } 578 } 579 580 get(int c, boolean fromLSCP)581 private int get(int c, boolean fromLSCP) { 582 int i2, block; 583 584 if(c>=highStart && (!(c>=0xd800 && c<0xdc00) || fromLSCP)) { 585 return data[dataLength-UTRIE2_DATA_GRANULARITY]; 586 } 587 588 if((c>=0xd800 && c<0xdc00) && fromLSCP) { 589 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ 590 (c>>UTRIE2_SHIFT_2); 591 } else { 592 i2=index1[c>>UTRIE2_SHIFT_1]+ 593 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); 594 } 595 block=index2[i2]; 596 return data[block+(c&UTRIE2_DATA_MASK)]; 597 } 598 599 /** 600 * Get a trie value for a UTF-16 code unit. 601 * 602 * This function returns the same value as get() if the input 603 * character is outside of the lead surrogate range 604 * 605 * There are two values stored in a Trie for inputs in the lead 606 * surrogate range. This function returns the alternate value, 607 * while Trie2.get() returns the main value. 608 * 609 * @param c the code point or lead surrogate value. 610 * @return the value 611 */ 612 @Override getFromU16SingleLead(char c)613 public int getFromU16SingleLead(char c) { 614 return get(c, false); 615 } 616 617 /* compaction --------------------------------------------------------------- */ 618 equal_int(int[] a, int s, int t, int length)619 private boolean equal_int(int[] a, int s, int t, int length) { 620 for (int i=0; i<length; i++) { 621 if (a[s+i] != a[t+i]) { 622 return false; 623 } 624 } 625 return true; 626 } 627 628 findSameIndex2Block(int index2Length, int otherBlock)629 private int findSameIndex2Block(int index2Length, int otherBlock) { 630 int block; 631 632 /* ensure that we do not even partially get past index2Length */ 633 index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH; 634 635 for(block=0; block<=index2Length; ++block) { 636 if(equal_int(index2, block, otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) { 637 return block; 638 } 639 } 640 return -1; 641 } 642 643 findSameDataBlock(int dataLength, int otherBlock, int blockLength)644 private int findSameDataBlock(int dataLength, int otherBlock, int blockLength) { 645 int block; 646 647 /* ensure that we do not even partially get past dataLength */ 648 dataLength-=blockLength; 649 650 for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) { 651 if(equal_int(data, block, otherBlock, blockLength)) { 652 return block; 653 } 654 } 655 return -1; 656 } 657 658 /* 659 * Find the start of the last range in the trie by enumerating backward. 660 * Indexes for supplementary code points higher than this will be omitted. 661 */ findHighStart(int highValue)662 private int findHighStart(int highValue) { 663 664 int value; 665 int c, prev; 666 int i1, i2, j, i2Block, prevI2Block, block, prevBlock; 667 668 669 /* set variables for previous range */ 670 if(highValue==initialValue) { 671 prevI2Block=index2NullOffset; 672 prevBlock=dataNullOffset; 673 } else { 674 prevI2Block=-1; 675 prevBlock=-1; 676 } 677 prev=0x110000; 678 679 /* enumerate index-2 blocks */ 680 i1=UNEWTRIE2_INDEX_1_LENGTH; 681 c=prev; 682 while(c>0) { 683 i2Block=index1[--i1]; 684 if(i2Block==prevI2Block) { 685 /* the index-2 block is the same as the previous one, and filled with highValue */ 686 c-=UTRIE2_CP_PER_INDEX_1_ENTRY; 687 continue; 688 } 689 prevI2Block=i2Block; 690 if(i2Block==index2NullOffset) { 691 /* this is the null index-2 block */ 692 if(highValue!=initialValue) { 693 return c; 694 } 695 c-=UTRIE2_CP_PER_INDEX_1_ENTRY; 696 } else { 697 /* enumerate data blocks for one index-2 block */ 698 for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) { 699 block=index2[i2Block+ --i2]; 700 if(block==prevBlock) { 701 /* the block is the same as the previous one, and filled with highValue */ 702 c-=UTRIE2_DATA_BLOCK_LENGTH; 703 continue; 704 } 705 prevBlock=block; 706 if(block==dataNullOffset) { 707 /* this is the null data block */ 708 if(highValue!=initialValue) { 709 return c; 710 } 711 c-=UTRIE2_DATA_BLOCK_LENGTH; 712 } else { 713 for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) { 714 value=data[block+ --j]; 715 if(value!=highValue) { 716 return c; 717 } 718 --c; 719 } 720 } 721 } 722 } 723 } 724 725 /* deliver last range */ 726 return 0; 727 } 728 729 /* 730 * Compact a build-time trie. 731 * 732 * The compaction 733 * - removes blocks that are identical with earlier ones 734 * - overlaps adjacent blocks as much as possible (if overlap==TRUE) 735 * - moves blocks in steps of the data granularity 736 * - moves and overlaps blocks that overlap with multiple values in the overlap region 737 * 738 * It does not 739 * - try to move and overlap blocks that are not already adjacent 740 */ compactData()741 private void compactData() { 742 int start, newStart, movedStart; 743 int blockLength, overlap; 744 int i, mapIndex, blockCount; 745 746 /* do not compact linear-ASCII data */ 747 newStart=UTRIE2_DATA_START_OFFSET; 748 for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) { 749 map[i]=start; 750 } 751 752 /* 753 * Start with a block length of 64 for 2-byte UTF-8, 754 * then switch to UTRIE2_DATA_BLOCK_LENGTH. 755 */ 756 blockLength=64; 757 blockCount=blockLength>>UTRIE2_SHIFT_2; 758 for(start=newStart; start<dataLength;) { 759 /* 760 * start: index of first entry of current block 761 * newStart: index where the current block is to be moved 762 * (right after current end of already-compacted data) 763 */ 764 if(start==UNEWTRIE2_DATA_0800_OFFSET) { 765 blockLength=UTRIE2_DATA_BLOCK_LENGTH; 766 blockCount=1; 767 } 768 769 /* skip blocks that are not used */ 770 if(map[start>>UTRIE2_SHIFT_2]<=0) { 771 /* advance start to the next block */ 772 start+=blockLength; 773 774 /* leave newStart with the previous block! */ 775 continue; 776 } 777 778 /* search for an identical block */ 779 movedStart=findSameDataBlock(newStart, start, blockLength); 780 if(movedStart >= 0) { 781 /* found an identical block, set the other block's index value for the current block */ 782 for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { 783 map[mapIndex++]=movedStart; 784 movedStart+=UTRIE2_DATA_BLOCK_LENGTH; 785 } 786 787 /* advance start to the next block */ 788 start+=blockLength; 789 790 /* leave newStart with the previous block! */ 791 continue; 792 } 793 794 /* see if the beginning of this block can be overlapped with the end of the previous block */ 795 /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ 796 for(overlap=blockLength-UTRIE2_DATA_GRANULARITY; 797 overlap>0 && !equal_int(data, (newStart-overlap), start, overlap); 798 overlap-=UTRIE2_DATA_GRANULARITY) {} 799 800 if(overlap>0 || newStart<start) { 801 /* some overlap, or just move the whole block */ 802 movedStart=newStart-overlap; 803 for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { 804 map[mapIndex++]=movedStart; 805 movedStart+=UTRIE2_DATA_BLOCK_LENGTH; 806 } 807 808 /* move the non-overlapping indexes to their new positions */ 809 start+=overlap; 810 for(i=blockLength-overlap; i>0; --i) { 811 data[newStart++]=data[start++]; 812 } 813 } else /* no overlap && newStart==start */ { 814 for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { 815 map[mapIndex++]=start; 816 start+=UTRIE2_DATA_BLOCK_LENGTH; 817 } 818 newStart=start; 819 } 820 } 821 822 /* now adjust the index-2 table */ 823 for(i=0; i<index2Length; ++i) { 824 if(i==UNEWTRIE2_INDEX_GAP_OFFSET) { 825 /* Gap indexes are invalid (-1). Skip over the gap. */ 826 i+=UNEWTRIE2_INDEX_GAP_LENGTH; 827 } 828 index2[i]=map[index2[i]>>UTRIE2_SHIFT_2]; 829 } 830 dataNullOffset=map[dataNullOffset>>UTRIE2_SHIFT_2]; 831 832 /* ensure dataLength alignment */ 833 while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) { 834 data[newStart++]=initialValue; 835 } 836 837 if (UTRIE2_DEBUG) { 838 /* we saved some space */ 839 System.out.printf("compacting UTrie2: count of 32-bit data words %d->%d%n", 840 dataLength, newStart); 841 } 842 843 dataLength=newStart; 844 } 845 compactIndex2()846 private void compactIndex2() { 847 int i, start, newStart, movedStart, overlap; 848 849 /* do not compact linear-BMP index-2 blocks */ 850 newStart=UTRIE2_INDEX_2_BMP_LENGTH; 851 for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) { 852 map[i]=start; 853 } 854 855 /* Reduce the index table gap to what will be needed at runtime. */ 856 newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((highStart-0x10000)>>UTRIE2_SHIFT_1); 857 858 for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<index2Length;) { 859 /* 860 * start: index of first entry of current block 861 * newStart: index where the current block is to be moved 862 * (right after current end of already-compacted data) 863 */ 864 865 /* search for an identical block */ 866 if( (movedStart=findSameIndex2Block(newStart, start)) 867 >=0 868 ) { 869 /* found an identical block, set the other block's index value for the current block */ 870 map[start>>UTRIE2_SHIFT_1_2]=movedStart; 871 872 /* advance start to the next block */ 873 start+=UTRIE2_INDEX_2_BLOCK_LENGTH; 874 875 /* leave newStart with the previous block! */ 876 continue; 877 } 878 879 /* see if the beginning of this block can be overlapped with the end of the previous block */ 880 /* look for maximum overlap with the previous, adjacent block */ 881 for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1; 882 overlap>0 && !equal_int(index2, newStart-overlap, start, overlap); 883 --overlap) {} 884 885 if(overlap>0 || newStart<start) { 886 /* some overlap, or just move the whole block */ 887 map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap; 888 889 /* move the non-overlapping indexes to their new positions */ 890 start+=overlap; 891 for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) { 892 index2[newStart++]=index2[start++]; 893 } 894 } else /* no overlap && newStart==start */ { 895 map[start>>UTRIE2_SHIFT_1_2]=start; 896 start+=UTRIE2_INDEX_2_BLOCK_LENGTH; 897 newStart=start; 898 } 899 } 900 901 /* now adjust the index-1 table */ 902 for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { 903 index1[i]=map[index1[i]>>UTRIE2_SHIFT_1_2]; 904 } 905 index2NullOffset=map[index2NullOffset>>UTRIE2_SHIFT_1_2]; 906 907 /* 908 * Ensure data table alignment: 909 * Needs to be granularity-aligned for 16-bit trie 910 * (so that dataMove will be down-shiftable), 911 * and 2-aligned for uint32_t data. 912 */ 913 while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) { 914 /* Arbitrary value: 0x3fffc not possible for real data. */ 915 index2[newStart++]=0x0000ffff<<UTRIE2_INDEX_SHIFT; 916 } 917 918 if (UTRIE2_DEBUG) { 919 /* we saved some space */ 920 System.out.printf("compacting UTrie2: count of 16-bit index-2 words %d->%d%n", 921 index2Length, newStart); 922 } 923 924 index2Length=newStart; 925 } 926 compactTrie()927 private void compactTrie() { 928 int localHighStart; 929 int suppHighStart; 930 int highValue; 931 932 /* find highStart and round it up */ 933 highValue=get(0x10ffff); 934 localHighStart=findHighStart(highValue); 935 localHighStart=(localHighStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1); 936 if(localHighStart==0x110000) { 937 highValue=errorValue; 938 } 939 940 /* 941 * Set trie->highStart only after utrie2_get32(trie, highStart). 942 * Otherwise utrie2_get32(trie, highStart) would try to read the highValue. 943 */ 944 this.highStart=localHighStart; 945 946 if (UTRIE2_DEBUG) { 947 System.out.printf("UTrie2: highStart U+%04x highValue 0x%x initialValue 0x%x%n", 948 highStart, highValue, initialValue); 949 } 950 951 if(highStart<0x110000) { 952 /* Blank out [highStart..10ffff] to release associated data blocks. */ 953 suppHighStart= highStart<=0x10000 ? 0x10000 : highStart; 954 setRange(suppHighStart, 0x10ffff, initialValue, true); 955 } 956 957 compactData(); 958 if(highStart>0x10000) { 959 compactIndex2(); 960 } else { 961 if (UTRIE2_DEBUG) { 962 System.out.printf("UTrie2: highStart U+%04x count of 16-bit index-2 words %d->%d%n", 963 highStart, index2Length, UTRIE2_INDEX_1_OFFSET); 964 } 965 } 966 967 /* 968 * Store the highValue in the data array and round up the dataLength. 969 * Must be done after compactData() because that assumes that dataLength 970 * is a multiple of UTRIE2_DATA_BLOCK_LENGTH. 971 */ 972 data[dataLength++]=highValue; 973 while((dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) { 974 data[dataLength++]=initialValue; 975 } 976 977 isCompacted=true; 978 } 979 980 981 /** 982 * Produce an optimized, read-only Trie2_16 from this writable Trie. 983 * The data values outside of the range that will fit in a 16 bit 984 * unsigned value will be truncated. 985 */ toTrie2_16()986 public Trie2_16 toTrie2_16() { 987 Trie2_16 frozenTrie = new Trie2_16(); 988 freeze(frozenTrie, ValueWidth.BITS_16); 989 return frozenTrie; 990 } 991 992 993 /** 994 * Produce an optimized, read-only Trie2_32 from this writable Trie. 995 * 996 */ toTrie2_32()997 public Trie2_32 toTrie2_32() { 998 Trie2_32 frozenTrie = new Trie2_32(); 999 freeze(frozenTrie, ValueWidth.BITS_32); 1000 return frozenTrie; 1001 } 1002 1003 1004 /** 1005 * Maximum length of the runtime index array. 1006 * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength. 1007 * (The actual maximum length is lower, 1008 * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.) 1009 */ 1010 private static final int UTRIE2_MAX_INDEX_LENGTH = 0xffff; 1011 1012 /** 1013 * Maximum length of the runtime data array. 1014 * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT, 1015 * and by uint16_t UTrie2Header.shiftedDataLength. 1016 */ 1017 private static final int UTRIE2_MAX_DATA_LENGTH = 0xffff<<UTRIE2_INDEX_SHIFT; 1018 1019 /* Compact the data and then populate an optimized read-only Trie. */ freeze(Trie2 dest, ValueWidth valueBits)1020 private void freeze(Trie2 dest, ValueWidth valueBits) { 1021 int i; 1022 int allIndexesLength; 1023 int dataMove; /* >0 if the data is moved to the end of the index array */ 1024 1025 1026 /* compact if necessary */ 1027 if(!isCompacted) { 1028 compactTrie(); 1029 } 1030 1031 if(highStart<=0x10000) { 1032 allIndexesLength=UTRIE2_INDEX_1_OFFSET; 1033 } else { 1034 allIndexesLength=index2Length; 1035 } 1036 if(valueBits==ValueWidth.BITS_16) { 1037 dataMove=allIndexesLength; 1038 } else { 1039 dataMove=0; 1040 } 1041 1042 /* are indexLength and dataLength within limits? */ 1043 if( /* for unshifted indexLength */ 1044 allIndexesLength>UTRIE2_MAX_INDEX_LENGTH || 1045 /* for unshifted dataNullOffset */ 1046 (dataMove+dataNullOffset)>0xffff || 1047 /* for unshifted 2-byte UTF-8 index-2 values */ 1048 (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff || 1049 /* for shiftedDataLength */ 1050 (dataMove+dataLength)>UTRIE2_MAX_DATA_LENGTH) { 1051 throw new UnsupportedOperationException("Trie2 data is too large."); 1052 } 1053 1054 /* calculate the sizes of, and allocate, the index and data arrays */ 1055 int indexLength = allIndexesLength; 1056 if (valueBits==ValueWidth.BITS_16) { 1057 indexLength += dataLength; 1058 } else { 1059 dest.data32 = new int[dataLength]; 1060 } 1061 dest.index = new char[indexLength]; 1062 1063 dest.indexLength = allIndexesLength; 1064 dest.dataLength = dataLength; 1065 if(highStart<=0x10000) { 1066 dest.index2NullOffset = 0xffff; 1067 } else { 1068 dest.index2NullOffset = UTRIE2_INDEX_2_OFFSET + index2NullOffset; 1069 } 1070 dest.initialValue = initialValue; 1071 dest.errorValue = errorValue; 1072 dest.highStart = highStart; 1073 dest.highValueIndex = dataMove + dataLength - UTRIE2_DATA_GRANULARITY; 1074 dest.dataNullOffset = (dataMove+dataNullOffset); 1075 1076 // Create a header and set the its fields. 1077 // (This is only used in the event that we serialize the Trie, but is 1078 // convenient to do here.) 1079 dest.header = new Trie2.UTrie2Header(); 1080 dest.header.signature = 0x54726932; /* "Tri2" */ 1081 dest.header.options = valueBits==ValueWidth.BITS_16 ? 0 : 1; 1082 dest.header.indexLength = dest.indexLength; 1083 dest.header.shiftedDataLength = dest.dataLength>>UTRIE2_INDEX_SHIFT; 1084 dest.header.index2NullOffset = dest.index2NullOffset; 1085 dest.header.dataNullOffset = dest.dataNullOffset; 1086 dest.header.shiftedHighStart = dest.highStart>>UTRIE2_SHIFT_1; 1087 1088 1089 1090 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */ 1091 int destIdx = 0; 1092 for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; i++) { 1093 dest.index[destIdx++] = (char)((index2[i]+dataMove) >> UTRIE2_INDEX_SHIFT); 1094 } 1095 if (UTRIE2_DEBUG) { 1096 System.out.println("\n\nIndex2 for BMP limit is " + Integer.toHexString(destIdx)); 1097 } 1098 1099 /* write UTF-8 2-byte index-2 values, not right-shifted */ 1100 for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ 1101 dest.index[destIdx++] = (char)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); 1102 } 1103 for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ 1104 dest.index[destIdx++]=(char)(dataMove+index2[i<<(6-UTRIE2_SHIFT_2)]); 1105 } 1106 if (UTRIE2_DEBUG) { 1107 System.out.println("Index2 for UTF-8 2byte values limit is " + Integer.toHexString(destIdx)); 1108 } 1109 1110 if(highStart>0x10000) { 1111 int index1Length = (highStart-0x10000)>>UTRIE2_SHIFT_1; 1112 int index2Offset = UTRIE2_INDEX_2_BMP_LENGTH + UTRIE2_UTF8_2B_INDEX_2_LENGTH + index1Length; 1113 1114 /* write 16-bit index-1 values for supplementary code points */ 1115 //p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; 1116 for(i=0; i<index1Length; i++) { 1117 //*dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++); 1118 dest.index[destIdx++] = (char)(UTRIE2_INDEX_2_OFFSET + index1[i+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH]); 1119 } 1120 if (UTRIE2_DEBUG) { 1121 System.out.println("Index 1 for supplementals, limit is " + Integer.toHexString(destIdx)); 1122 } 1123 1124 /* 1125 * write the index-2 array values for supplementary code points, 1126 * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove 1127 */ 1128 for(i=0; i<index2Length-index2Offset; i++) { 1129 dest.index[destIdx++] = (char)((dataMove + index2[index2Offset+i])>>UTRIE2_INDEX_SHIFT); 1130 } 1131 if (UTRIE2_DEBUG) { 1132 System.out.println("Index 2 for supplementals, limit is " + Integer.toHexString(destIdx)); 1133 } 1134 } 1135 1136 /* write the 16/32-bit data array */ 1137 switch(valueBits) { 1138 case BITS_16: 1139 /* write 16-bit data values */ 1140 assert(destIdx == dataMove); 1141 dest.data16 = destIdx; 1142 for(i=0; i<dataLength; i++) { 1143 dest.index[destIdx++] = (char)data[i]; 1144 } 1145 break; 1146 case BITS_32: 1147 /* write 32-bit data values */ 1148 for (i=0; i<dataLength; i++) { 1149 dest.data32[i] = this.data[i]; 1150 } 1151 break; 1152 } 1153 // The writable, but compressed, Trie2 stays around unless the caller drops its references to it. 1154 } 1155 1156 1157 /* Start with allocation of 16k data entries. */ 1158 private static final int UNEWTRIE2_INITIAL_DATA_LENGTH = 1<<14; 1159 1160 /* Grow about 8x each time. */ 1161 private static final int UNEWTRIE2_MEDIUM_DATA_LENGTH = 1<<17; 1162 1163 /** The null index-2 block, following the gap in the index-2 table. */ 1164 private static final int UNEWTRIE2_INDEX_2_NULL_OFFSET = UNEWTRIE2_INDEX_GAP_OFFSET + UNEWTRIE2_INDEX_GAP_LENGTH; 1165 1166 /** The start of allocated index-2 blocks. */ 1167 private static final int UNEWTRIE2_INDEX_2_START_OFFSET = UNEWTRIE2_INDEX_2_NULL_OFFSET + UTRIE2_INDEX_2_BLOCK_LENGTH; 1168 1169 /** 1170 * The null data block. 1171 * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller, 1172 * to work with 6-bit trail bytes from 2-byte UTF-8. 1173 */ 1174 private static final int UNEWTRIE2_DATA_NULL_OFFSET = UTRIE2_DATA_START_OFFSET; 1175 1176 /** The start of allocated data blocks. */ 1177 private static final int UNEWTRIE2_DATA_START_OFFSET = UNEWTRIE2_DATA_NULL_OFFSET+0x40; 1178 1179 /** 1180 * The start of data blocks for U+0800 and above. 1181 * Below, compaction uses a block length of 64 for 2-byte UTF-8. 1182 * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH. 1183 * Data values for 0x780 code points beyond ASCII. 1184 */ 1185 private static final int UNEWTRIE2_DATA_0800_OFFSET = UNEWTRIE2_DATA_START_OFFSET+0x780; 1186 1187 // 1188 // Private data members. From struct UNewTrie2 in ICU4C 1189 // 1190 private int[] index1 = new int[UNEWTRIE2_INDEX_1_LENGTH]; 1191 private int[] index2 = new int[UNEWTRIE2_MAX_INDEX_2_LENGTH]; 1192 private int[] data; 1193 1194 private int index2Length; 1195 private int dataCapacity; 1196 private int firstFreeBlock; 1197 private int index2NullOffset; 1198 private boolean isCompacted; 1199 1200 1201 /* 1202 * Multi-purpose per-data-block table. 1203 * 1204 * Before compacting: 1205 * 1206 * Per-data-block reference counters/free-block list. 1207 * 0: unused 1208 * >0: reference counter (number of index-2 entries pointing here) 1209 * <0: next free data block in free-block list 1210 * 1211 * While compacting: 1212 * 1213 * Map of adjusted indexes, used in compactData() and compactIndex2(). 1214 * Maps from original indexes to new ones. 1215 */ 1216 private int[] map = new int[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2]; 1217 1218 1219 private boolean UTRIE2_DEBUG = false; 1220 1221 } 1222