1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 2009, International Business Machines Corporation and         *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package android.icu.impl;
11 
12 /**
13  * @author aheninger
14  *
15  * A Trie2Writable is a modifiable, or build-time Trie2.
16  * Functions for reading data from the Trie are all from class Trie2.
17  * @hide Only a subset of ICU is exposed in Android
18  *
19  */
20 public class Trie2Writable extends Trie2 {
21 
22 
23     /**
24      * Create a new, empty, writable Trie2. 32-bit data values are used.
25      *
26      * @param initialValueP the initial value that is set for all code points
27      * @param errorValueP the value for out-of-range code points and illegal UTF-8
28      */
Trie2Writable(int initialValueP, int errorValueP)29     public  Trie2Writable(int initialValueP, int errorValueP) {
30         // This constructor corresponds to utrie2_open() in ICU4C.
31         init(initialValueP, errorValueP);
32     }
33 
34 
init(int initialValueP, int errorValueP)35     private void init(int initialValueP, int errorValueP) {
36         this.initialValue = initialValueP;
37         this.errorValue   = errorValueP;
38         this.highStart    = 0x110000;
39 
40         this.data           = new int[UNEWTRIE2_INITIAL_DATA_LENGTH];
41         this.dataCapacity   = UNEWTRIE2_INITIAL_DATA_LENGTH;
42         this.initialValue   = initialValueP;
43         this.errorValue     = errorValueP;
44         this.highStart      = 0x110000;
45         this.firstFreeBlock = 0;  /* no free block in the list */
46         this.isCompacted    = false;
47 
48         /*
49          * preallocate and reset
50          * - ASCII
51          * - the bad-UTF-8-data block
52          * - the null data block
53          */
54         int i, j;
55         for(i=0; i<0x80; ++i) {
56             data[i] = initialValue;
57         }
58         for(; i<0xc0; ++i) {
59             data[i] = errorValue;
60         }
61         for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
62             data[i] = initialValue;
63         }
64         dataNullOffset = UNEWTRIE2_DATA_NULL_OFFSET;
65         dataLength     = UNEWTRIE2_DATA_START_OFFSET;
66 
67         /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
68         for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
69             index2[i]=j;
70             map[i]=1;
71         }
72 
73         /* reference counts for the bad-UTF-8-data block */
74         for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
75             map[i]=0;
76         }
77 
78         /*
79          * Reference counts for the null data block: all blocks except for the ASCII blocks.
80          * Plus 1 so that we don't drop this block during compaction.
81          * Plus as many as needed for lead surrogate code points.
82          */
83         /* i==newTrie->dataNullOffset */
84         map[i++] =
85             (0x110000>>UTRIE2_SHIFT_2) -
86             (0x80>>UTRIE2_SHIFT_2) +
87             1 +
88             UTRIE2_LSCP_INDEX_2_LENGTH;
89         j += UTRIE2_DATA_BLOCK_LENGTH;
90         for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
91             map[i]=0;
92         }
93 
94         /*
95          * set the remaining indexes in the BMP index-2 block
96          * to the null data block
97          */
98         for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
99             index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
100         }
101 
102         /*
103          * Fill the index gap with impossible values so that compaction
104          * does not overlap other index-2 blocks with the gap.
105          */
106         for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
107             index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
108         }
109 
110         /* set the indexes in the null index-2 block */
111         for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
112             index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
113         }
114         index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
115         index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
116 
117         /* set the index-1 indexes for the linear index-2 block */
118         for(i=0, j=0;
119             i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
120             ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
121         ) {
122             index1[i]=j;
123         }
124 
125         /* set the remaining index-1 indexes to the null index-2 block */
126         for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
127             index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
128         }
129 
130         /*
131          * Preallocate and reset data for U+0080..U+07ff,
132          * for 2-byte UTF-8 which will be compacted in 64-blocks
133          * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
134          */
135         for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
136             set(i, initialValue);
137         }
138 
139     }
140 
141 
142     /**
143      * Create a new build time (modifiable) Trie2 whose contents are the same as the source Trie2.
144      *
145      * @param source the source Trie2.  Its contents will be copied into the new Trie2.
146      */
Trie2Writable(Trie2 source)147     public Trie2Writable(Trie2 source) {
148         init(source.initialValue, source.errorValue);
149 
150         for (Range r: source) {
151             setRange(r, true);
152         }
153     }
154 
155 
isInNullBlock(int c, boolean forLSCP)156     private boolean isInNullBlock(int c, boolean forLSCP) {
157         int i2, block;
158 
159         if(Character.isHighSurrogate((char)c) && forLSCP) {
160             i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
161                 (c>>UTRIE2_SHIFT_2);
162         } else {
163             i2=index1[c>>UTRIE2_SHIFT_1]+
164                 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
165         }
166         block=index2[i2];
167         return (block==dataNullOffset);
168     }
169 
allocIndex2Block()170     private int allocIndex2Block() {
171         int newBlock, newTop;
172 
173         newBlock=index2Length;
174         newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
175         if(newTop > index2.length) {
176             throw new IllegalStateException("Internal error in Trie2 creation.");
177             /*
178              * Should never occur.
179              * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
180              * or the code writes more values than should be possible.
181              */
182         }
183         index2Length=newTop;
184         System.arraycopy(index2, index2NullOffset, index2, newBlock, UTRIE2_INDEX_2_BLOCK_LENGTH);
185         return newBlock;
186     }
187 
getIndex2Block(int c, boolean forLSCP)188     private int getIndex2Block(int c, boolean forLSCP) {
189         int i1, i2;
190 
191         if(c>=0xd800 && c<0xdc00 && forLSCP) {
192             return UTRIE2_LSCP_INDEX_2_OFFSET;
193         }
194 
195         i1=c>>UTRIE2_SHIFT_1;
196         i2=index1[i1];
197         if(i2==index2NullOffset) {
198             i2=allocIndex2Block();
199             index1[i1]=i2;
200         }
201         return i2;
202     }
203 
allocDataBlock(int copyBlock)204     private int allocDataBlock(int copyBlock) {
205         int newBlock, newTop;
206 
207         if(firstFreeBlock!=0) {
208             /* get the first free block */
209             newBlock=firstFreeBlock;
210             firstFreeBlock=-map[newBlock>>UTRIE2_SHIFT_2];
211         } else {
212             /* get a new block from the high end */
213             newBlock=dataLength;
214             newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
215             if(newTop>dataCapacity) {
216                 /* out of memory in the data array */
217                 int capacity;
218                 int[] newData;
219 
220                 if(dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
221                     capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
222                 } else if(dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
223                     capacity=UNEWTRIE2_MAX_DATA_LENGTH;
224                 } else {
225                     /*
226                      * Should never occur.
227                      * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
228                      * or the code writes more values than should be possible.
229                      */
230                     throw new IllegalStateException("Internal error in Trie2 creation.");
231                 }
232                 newData = new int[capacity];
233                 System.arraycopy(data, 0, newData, 0, dataLength);
234                 data=newData;
235                 dataCapacity=capacity;
236             }
237             dataLength=newTop;
238         }
239         System.arraycopy(data, copyBlock, data, newBlock, UTRIE2_DATA_BLOCK_LENGTH);
240         map[newBlock>>UTRIE2_SHIFT_2]=0;
241         return newBlock;
242     }
243 
244 
245     /* call when the block's reference counter reaches 0 */
releaseDataBlock(int block)246     private void releaseDataBlock(int block) {
247         /* put this block at the front of the free-block chain */
248         map[block>>UTRIE2_SHIFT_2]=-firstFreeBlock;
249         firstFreeBlock=block;
250     }
251 
252 
isWritableBlock(int block)253     private boolean isWritableBlock(int block) {
254         return (block!=dataNullOffset && 1==map[block>>UTRIE2_SHIFT_2]);
255     }
256 
setIndex2Entry(int i2, int block)257     private void setIndex2Entry(int i2, int block) {
258         int oldBlock;
259         ++map[block>>UTRIE2_SHIFT_2];  /* increment first, in case block==oldBlock! */
260         oldBlock=index2[i2];
261         if(0 == --map[oldBlock>>UTRIE2_SHIFT_2]) {
262             releaseDataBlock(oldBlock);
263         }
264         index2[i2]=block;
265     }
266 
267 
268     /**
269      * No error checking for illegal arguments.
270      *
271      * @hide draft / provisional / internal are hidden on Android
272      */
getDataBlock(int c, boolean forLSCP)273     private int getDataBlock(int c, boolean forLSCP) {
274         int i2, oldBlock, newBlock;
275 
276         i2=getIndex2Block(c, forLSCP);
277 
278         i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
279         oldBlock=index2[i2];
280         if(isWritableBlock(oldBlock)) {
281             return oldBlock;
282         }
283 
284         /* allocate a new data block */
285         newBlock=allocDataBlock(oldBlock);
286         setIndex2Entry(i2, newBlock);
287         return newBlock;
288     }
289     /**
290      * Set a value for a code point.
291      *
292      * @param c the code point
293      * @param value the value
294      */
set(int c, int value)295     public Trie2Writable set(int c, int value) {
296         if (c<0 || c>0x10ffff) {
297             throw new IllegalArgumentException("Invalid code point.");
298         }
299         set(c, true, value);
300         fHash = 0;
301         return this;
302     }
303 
set(int c, boolean forLSCP, int value)304     private Trie2Writable set(int c, boolean forLSCP, int value) {
305         int block;
306         if (isCompacted) {
307             uncompact();
308         }
309         block = getDataBlock(c, forLSCP);
310         data[block + (c&UTRIE2_DATA_MASK)] = value;
311         return this;
312     }
313 
314 
315     /*
316      * Uncompact a compacted Trie2Writable.
317      * This is needed if a the WritableTrie2 was compacted in preparation for creating a read-only
318      * Trie2, and then is subsequently altered.
319      *
320      * The structure is a bit awkward - it would be cleaner to leave the original
321      * Trie2 unaltered - but compacting in place was taken directly from the ICU4C code.
322      *
323      * The approach is to create a new (uncompacted) Trie2Writable from this one, then transfer
324      * the guts from the new to the old.
325      */
uncompact()326     private void uncompact() {
327         Trie2Writable tempTrie = new Trie2Writable(this);
328 
329         // Members from Trie2Writable
330         this.index1       = tempTrie.index1;
331         this.index2       = tempTrie.index2;
332         this.data         = tempTrie.data;
333         this.index2Length = tempTrie.index2Length;
334         this.dataCapacity = tempTrie.dataCapacity;
335         this.isCompacted  = tempTrie.isCompacted;
336 
337         // Members From Trie2
338         this.header           = tempTrie.header;
339         this.index            = tempTrie.index;
340         this.data16           = tempTrie.data16;
341         this.data32           = tempTrie.data32;
342         this.indexLength      = tempTrie.indexLength;
343         this.dataLength       = tempTrie.dataLength;
344         this.index2NullOffset = tempTrie.index2NullOffset;
345         this.initialValue     = tempTrie.initialValue;
346         this.errorValue       = tempTrie.errorValue;
347         this.highStart        = tempTrie.highStart;
348         this.highValueIndex   = tempTrie.highValueIndex;
349         this.dataNullOffset   = tempTrie.dataNullOffset;
350     }
351 
352 
writeBlock(int block, int value)353     private void writeBlock(int  block, int value) {
354         int  limit=block+UTRIE2_DATA_BLOCK_LENGTH;
355         while(block<limit) {
356             data[block++]=value;
357         }
358     }
359 
360     /**
361      * initialValue is ignored if overwrite=TRUE
362      * @hide draft / provisional / internal are hidden on Android
363      */
fillBlock(int block, int start, int limit, int value, int initialValue, boolean overwrite)364     private void fillBlock(int block, /*UChar32*/ int start, /*UChar32*/ int limit,
365               int value, int initialValue, boolean overwrite) {
366         int i;
367         int pLimit = block+limit;
368         if(overwrite) {
369             for (i=block+start; i<pLimit; i++) {
370                 data[i] = value;
371             }
372         } else {
373             for (i=block+start; i<pLimit; i++) {
374                 if(data[i]==initialValue) {
375                     data[i]=value;
376                 }
377             }
378         }
379     }
380 
381     /**
382      * Set a value in a range of code points [start..end].
383      * All code points c with start<=c<=end will get the value if
384      * overwrite is TRUE or if the old value is the initial value.
385      *
386      * @param start the first code point to get the value
387      * @param end the last code point to get the value (inclusive)
388      * @param value the value
389      * @param overwrite flag for whether old non-initial values are to be overwritten
390      */
setRange(int start, int end, int value, boolean overwrite)391      public Trie2Writable setRange(int start, int end,
392                            int value, boolean overwrite) {
393          /*
394           * repeat value in [start..end]
395           * mark index values for repeat-data blocks by setting bit 31 of the index values
396           * fill around existing values if any, if(overwrite)
397           */
398          int block, rest, repeatBlock;
399          int /*UChar32*/ limit;
400 
401          if(start>0x10ffff || start<0 || end>0x10ffff || end<0 || start>end) {
402              throw new IllegalArgumentException("Invalid code point range.");
403          }
404          if(!overwrite && value==initialValue) {
405              return this; /* nothing to do */
406          }
407          fHash = 0;
408          if(isCompacted) {
409              this.uncompact();
410          }
411 
412          limit=end+1;
413          if((start&UTRIE2_DATA_MASK) != 0) {
414              int  /*UChar32*/ nextStart;
415 
416              /* set partial block at [start..following block boundary[ */
417              block=getDataBlock(start, true);
418 
419              nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK;
420              if(nextStart<=limit) {
421                  fillBlock(block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
422                            value, initialValue, overwrite);
423                  start=nextStart;
424              } else {
425                  fillBlock(block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
426                            value, initialValue, overwrite);
427                  return this;
428              }
429          }
430 
431          /* number of positions in the last, partial block */
432          rest=limit&UTRIE2_DATA_MASK;
433 
434          /* round down limit to a block boundary */
435          limit&=~UTRIE2_DATA_MASK;
436 
437          /* iterate over all-value blocks */
438          if(value==initialValue) {
439              repeatBlock=dataNullOffset;
440          } else {
441              repeatBlock=-1;
442          }
443 
444          while(start<limit) {
445              int i2;
446              boolean setRepeatBlock=false;
447 
448              if(value==initialValue && isInNullBlock(start, true)) {
449                  start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
450                  continue;
451              }
452 
453              /* get index value */
454              i2=getIndex2Block(start, true);
455              i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
456              block=index2[i2];
457              if(isWritableBlock(block)) {
458                  /* already allocated */
459                  if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
460                      /*
461                       * We overwrite all values, and it's not a
462                       * protected (ASCII-linear or 2-byte UTF-8) block:
463                       * replace with the repeatBlock.
464                       */
465                      setRepeatBlock=true;
466                  } else {
467                      /* !overwrite, or protected block: just write the values into this block */
468                      fillBlock(block,
469                                0, UTRIE2_DATA_BLOCK_LENGTH,
470                                value, initialValue, overwrite);
471                  }
472              } else if(data[block]!=value && (overwrite || block==dataNullOffset)) {
473                  /*
474                   * Set the repeatBlock instead of the null block or previous repeat block:
475                   *
476                   * If !isWritableBlock() then all entries in the block have the same value
477                   * because it's the null block or a range block (the repeatBlock from a previous
478                   * call to utrie2_setRange32()).
479                   * No other blocks are used multiple times before compacting.
480                   *
481                   * The null block is the only non-writable block with the initialValue because
482                   * of the repeatBlock initialization above. (If value==initialValue, then
483                   * the repeatBlock will be the null data block.)
484                   *
485                   * We set our repeatBlock if the desired value differs from the block's value,
486                   * and if we overwrite any data or if the data is all initial values
487                   * (which is the same as the block being the null block, see above).
488                   */
489                  setRepeatBlock=true;
490              }
491              if(setRepeatBlock) {
492                  if(repeatBlock>=0) {
493                      setIndex2Entry(i2, repeatBlock);
494                  } else {
495                      /* create and set and fill the repeatBlock */
496                      repeatBlock=getDataBlock(start, true);
497                      writeBlock(repeatBlock, value);
498                  }
499              }
500 
501              start+=UTRIE2_DATA_BLOCK_LENGTH;
502          }
503 
504          if(rest>0) {
505              /* set partial block at [last block boundary..limit[ */
506              block=getDataBlock(start, true);
507              fillBlock(block, 0, rest, value, initialValue, overwrite);
508          }
509 
510          return this;
511      }
512 
513      /**
514       * Set the values from a Trie2.Range.
515       *
516       * All code points within the range will get the value if
517       * overwrite is TRUE or if the old value is the initial value.
518       *
519       * Ranges with the lead surrogate flag set will set the alternate
520       * lead-surrogate values in the Trie, rather than the code point values.
521       *
522       * This function is intended to work with the ranges produced when iterating
523       * the contents of a source Trie.
524       *
525       * @param range contains the range of code points and the value to be set.
526       * @param overwrite flag for whether old non-initial values are to be overwritten
527       */
setRange(Trie2.Range range, boolean overwrite)528       public Trie2Writable setRange(Trie2.Range range, boolean overwrite) {
529           fHash = 0;
530           if (range.leadSurrogate) {
531               for (int c=range.startCodePoint; c<=range.endCodePoint; c++) {
532                   if (overwrite || getFromU16SingleLead((char)c) == this.initialValue)  {
533                       setForLeadSurrogateCodeUnit((char)c, range.value);
534                   }
535               }
536           } else {
537               setRange(range.startCodePoint, range.endCodePoint, range.value, overwrite);
538           }
539           return this;
540       }
541 
542      /**
543       * Set a value for a UTF-16 code unit.
544       * Note that a Trie2 stores separate values for
545       * supplementary code points in the lead surrogate range
546       * (accessed via the plain set() and get() interfaces)
547       * and for lead surrogate code units.
548       *
549       * The lead surrogate code unit values are set via this function and
550       * read by the function getFromU16SingleLead().
551       *
552       * For code units outside of the lead surrogate range, this function
553       * behaves identically to set().
554       *
555       * @param codeUnit A UTF-16 code unit.
556       * @param value the value to be stored in the Trie2.
557       */
setForLeadSurrogateCodeUnit(char codeUnit, int value)558      public Trie2Writable setForLeadSurrogateCodeUnit(char codeUnit, int value) {
559          fHash = 0;
560          set(codeUnit, false, value);
561          return this;
562      }
563 
564 
565      /**
566       * Get the value for a code point as stored in the Trie2.
567       *
568       * @param codePoint the code point
569       * @return the value
570       */
571     @Override
get(int codePoint)572     public int get(int codePoint) {
573         if (codePoint<0 || codePoint>0x10ffff) {
574             return errorValue;
575         } else {
576             return get(codePoint, true);
577         }
578     }
579 
580 
get(int c, boolean fromLSCP)581     private int get(int c, boolean fromLSCP) {
582         int i2, block;
583 
584         if(c>=highStart && (!(c>=0xd800 && c<0xdc00) || fromLSCP)) {
585             return data[dataLength-UTRIE2_DATA_GRANULARITY];
586         }
587 
588         if((c>=0xd800 && c<0xdc00) && fromLSCP) {
589             i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
590                 (c>>UTRIE2_SHIFT_2);
591         } else {
592             i2=index1[c>>UTRIE2_SHIFT_1]+
593                 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
594         }
595         block=index2[i2];
596         return data[block+(c&UTRIE2_DATA_MASK)];
597     }
598 
599     /**
600      * Get a trie value for a UTF-16 code unit.
601      *
602      * This function returns the same value as get() if the input
603      * character is outside of the lead surrogate range
604      *
605      * There are two values stored in a Trie for inputs in the lead
606      * surrogate range.  This function returns the alternate value,
607      * while Trie2.get() returns the main value.
608      *
609      * @param c the code point or lead surrogate value.
610      * @return the value
611      */
612     @Override
getFromU16SingleLead(char c)613     public int getFromU16SingleLead(char c) {
614         return get(c, false);
615     }
616 
617     /* compaction --------------------------------------------------------------- */
618 
equal_int(int[] a, int s, int t, int length)619     private boolean equal_int(int[] a, int s, int t, int length) {
620         for (int i=0; i<length; i++) {
621             if (a[s+i] != a[t+i]) {
622                 return false;
623             }
624         }
625         return true;
626     }
627 
628 
findSameIndex2Block(int index2Length, int otherBlock)629     private int findSameIndex2Block(int index2Length, int otherBlock) {
630         int block;
631 
632         /* ensure that we do not even partially get past index2Length */
633         index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
634 
635         for(block=0; block<=index2Length; ++block) {
636             if(equal_int(index2, block, otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
637                 return block;
638             }
639         }
640         return -1;
641     }
642 
643 
findSameDataBlock(int dataLength, int otherBlock, int blockLength)644     private int findSameDataBlock(int dataLength, int otherBlock, int blockLength) {
645         int block;
646 
647         /* ensure that we do not even partially get past dataLength */
648         dataLength-=blockLength;
649 
650         for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
651             if(equal_int(data, block, otherBlock, blockLength)) {
652                 return block;
653             }
654         }
655         return -1;
656     }
657 
658     /*
659      * Find the start of the last range in the trie by enumerating backward.
660      * Indexes for supplementary code points higher than this will be omitted.
661      */
findHighStart(int highValue)662     private int findHighStart(int highValue) {
663 
664         int value;
665         int c, prev;
666         int i1, i2, j, i2Block, prevI2Block, block, prevBlock;
667 
668 
669         /* set variables for previous range */
670         if(highValue==initialValue) {
671             prevI2Block=index2NullOffset;
672             prevBlock=dataNullOffset;
673         } else {
674             prevI2Block=-1;
675             prevBlock=-1;
676         }
677         prev=0x110000;
678 
679         /* enumerate index-2 blocks */
680         i1=UNEWTRIE2_INDEX_1_LENGTH;
681         c=prev;
682         while(c>0) {
683             i2Block=index1[--i1];
684             if(i2Block==prevI2Block) {
685                 /* the index-2 block is the same as the previous one, and filled with highValue */
686                 c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
687                 continue;
688             }
689             prevI2Block=i2Block;
690             if(i2Block==index2NullOffset) {
691                 /* this is the null index-2 block */
692                 if(highValue!=initialValue) {
693                     return c;
694                 }
695                 c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
696             } else {
697                 /* enumerate data blocks for one index-2 block */
698                 for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
699                     block=index2[i2Block+ --i2];
700                     if(block==prevBlock) {
701                         /* the block is the same as the previous one, and filled with highValue */
702                         c-=UTRIE2_DATA_BLOCK_LENGTH;
703                         continue;
704                     }
705                     prevBlock=block;
706                     if(block==dataNullOffset) {
707                         /* this is the null data block */
708                         if(highValue!=initialValue) {
709                             return c;
710                         }
711                         c-=UTRIE2_DATA_BLOCK_LENGTH;
712                     } else {
713                         for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
714                             value=data[block+ --j];
715                             if(value!=highValue) {
716                                 return c;
717                             }
718                             --c;
719                         }
720                     }
721                 }
722             }
723         }
724 
725         /* deliver last range */
726         return 0;
727     }
728 
729     /*
730      * Compact a build-time trie.
731      *
732      * The compaction
733      * - removes blocks that are identical with earlier ones
734      * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
735      * - moves blocks in steps of the data granularity
736      * - moves and overlaps blocks that overlap with multiple values in the overlap region
737      *
738      * It does not
739      * - try to move and overlap blocks that are not already adjacent
740      */
compactData()741     private void compactData() {
742         int start, newStart, movedStart;
743         int blockLength, overlap;
744         int i, mapIndex, blockCount;
745 
746         /* do not compact linear-ASCII data */
747         newStart=UTRIE2_DATA_START_OFFSET;
748         for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
749             map[i]=start;
750         }
751 
752         /*
753          * Start with a block length of 64 for 2-byte UTF-8,
754          * then switch to UTRIE2_DATA_BLOCK_LENGTH.
755          */
756         blockLength=64;
757         blockCount=blockLength>>UTRIE2_SHIFT_2;
758         for(start=newStart; start<dataLength;) {
759             /*
760              * start: index of first entry of current block
761              * newStart: index where the current block is to be moved
762              *           (right after current end of already-compacted data)
763              */
764             if(start==UNEWTRIE2_DATA_0800_OFFSET) {
765                 blockLength=UTRIE2_DATA_BLOCK_LENGTH;
766                 blockCount=1;
767             }
768 
769             /* skip blocks that are not used */
770             if(map[start>>UTRIE2_SHIFT_2]<=0) {
771                 /* advance start to the next block */
772                 start+=blockLength;
773 
774                 /* leave newStart with the previous block! */
775                 continue;
776             }
777 
778             /* search for an identical block */
779             movedStart=findSameDataBlock(newStart, start, blockLength);
780             if(movedStart >= 0) {
781                 /* found an identical block, set the other block's index value for the current block */
782                 for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
783                     map[mapIndex++]=movedStart;
784                     movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
785                 }
786 
787                 /* advance start to the next block */
788                 start+=blockLength;
789 
790                 /* leave newStart with the previous block! */
791                 continue;
792             }
793 
794             /* see if the beginning of this block can be overlapped with the end of the previous block */
795             /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
796             for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
797                 overlap>0 && !equal_int(data, (newStart-overlap), start, overlap);
798                 overlap-=UTRIE2_DATA_GRANULARITY) {}
799 
800             if(overlap>0 || newStart<start) {
801                 /* some overlap, or just move the whole block */
802                 movedStart=newStart-overlap;
803                 for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
804                     map[mapIndex++]=movedStart;
805                     movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
806                 }
807 
808                 /* move the non-overlapping indexes to their new positions */
809                 start+=overlap;
810                 for(i=blockLength-overlap; i>0; --i) {
811                     data[newStart++]=data[start++];
812                 }
813             } else /* no overlap && newStart==start */ {
814                 for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
815                     map[mapIndex++]=start;
816                     start+=UTRIE2_DATA_BLOCK_LENGTH;
817                 }
818                 newStart=start;
819             }
820         }
821 
822         /* now adjust the index-2 table */
823         for(i=0; i<index2Length; ++i) {
824             if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
825                 /* Gap indexes are invalid (-1). Skip over the gap. */
826                 i+=UNEWTRIE2_INDEX_GAP_LENGTH;
827             }
828             index2[i]=map[index2[i]>>UTRIE2_SHIFT_2];
829         }
830         dataNullOffset=map[dataNullOffset>>UTRIE2_SHIFT_2];
831 
832         /* ensure dataLength alignment */
833         while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
834             data[newStart++]=initialValue;
835         }
836 
837         if  (UTRIE2_DEBUG) {
838             /* we saved some space */
839             System.out.printf("compacting UTrie2: count of 32-bit data words %d->%d%n",
840                 dataLength, newStart);
841         }
842 
843         dataLength=newStart;
844     }
845 
compactIndex2()846     private void compactIndex2() {
847         int i, start, newStart, movedStart, overlap;
848 
849         /* do not compact linear-BMP index-2 blocks */
850         newStart=UTRIE2_INDEX_2_BMP_LENGTH;
851         for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
852             map[i]=start;
853         }
854 
855         /* Reduce the index table gap to what will be needed at runtime. */
856         newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((highStart-0x10000)>>UTRIE2_SHIFT_1);
857 
858         for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<index2Length;) {
859             /*
860              * start: index of first entry of current block
861              * newStart: index where the current block is to be moved
862              *           (right after current end of already-compacted data)
863              */
864 
865             /* search for an identical block */
866             if( (movedStart=findSameIndex2Block(newStart, start))
867                  >=0
868             ) {
869                 /* found an identical block, set the other block's index value for the current block */
870                 map[start>>UTRIE2_SHIFT_1_2]=movedStart;
871 
872                 /* advance start to the next block */
873                 start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
874 
875                 /* leave newStart with the previous block! */
876                 continue;
877             }
878 
879             /* see if the beginning of this block can be overlapped with the end of the previous block */
880             /* look for maximum overlap with the previous, adjacent block */
881             for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
882                 overlap>0 && !equal_int(index2, newStart-overlap, start, overlap);
883                 --overlap) {}
884 
885             if(overlap>0 || newStart<start) {
886                 /* some overlap, or just move the whole block */
887                 map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
888 
889                 /* move the non-overlapping indexes to their new positions */
890                 start+=overlap;
891                 for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
892                     index2[newStart++]=index2[start++];
893                 }
894             } else /* no overlap && newStart==start */ {
895                 map[start>>UTRIE2_SHIFT_1_2]=start;
896                 start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
897                 newStart=start;
898             }
899         }
900 
901         /* now adjust the index-1 table */
902         for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
903             index1[i]=map[index1[i]>>UTRIE2_SHIFT_1_2];
904         }
905         index2NullOffset=map[index2NullOffset>>UTRIE2_SHIFT_1_2];
906 
907         /*
908          * Ensure data table alignment:
909          * Needs to be granularity-aligned for 16-bit trie
910          * (so that dataMove will be down-shiftable),
911          * and 2-aligned for uint32_t data.
912          */
913         while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
914             /* Arbitrary value: 0x3fffc not possible for real data. */
915             index2[newStart++]=0x0000ffff<<UTRIE2_INDEX_SHIFT;
916         }
917 
918         if (UTRIE2_DEBUG) {
919             /* we saved some space */
920             System.out.printf("compacting UTrie2: count of 16-bit index-2 words %d->%d%n",
921                     index2Length, newStart);
922         }
923 
924         index2Length=newStart;
925     }
926 
compactTrie()927     private void compactTrie() {
928         int localHighStart;
929         int suppHighStart;
930         int highValue;
931 
932         /* find highStart and round it up */
933         highValue=get(0x10ffff);
934         localHighStart=findHighStart(highValue);
935         localHighStart=(localHighStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
936         if(localHighStart==0x110000) {
937             highValue=errorValue;
938         }
939 
940         /*
941          * Set trie->highStart only after utrie2_get32(trie, highStart).
942          * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
943          */
944         this.highStart=localHighStart;
945 
946         if (UTRIE2_DEBUG) {
947             System.out.printf("UTrie2: highStart U+%04x  highValue 0x%x  initialValue 0x%x%n",
948                 highStart, highValue, initialValue);
949         }
950 
951         if(highStart<0x110000) {
952             /* Blank out [highStart..10ffff] to release associated data blocks. */
953             suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
954             setRange(suppHighStart, 0x10ffff, initialValue, true);
955         }
956 
957         compactData();
958         if(highStart>0x10000) {
959             compactIndex2();
960         } else {
961             if (UTRIE2_DEBUG) {
962                  System.out.printf("UTrie2: highStart U+%04x  count of 16-bit index-2 words %d->%d%n",
963                          highStart, index2Length, UTRIE2_INDEX_1_OFFSET);
964             }
965         }
966 
967         /*
968          * Store the highValue in the data array and round up the dataLength.
969          * Must be done after compactData() because that assumes that dataLength
970          * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
971          */
972         data[dataLength++]=highValue;
973         while((dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
974             data[dataLength++]=initialValue;
975         }
976 
977         isCompacted=true;
978     }
979 
980 
981     /**
982      * Produce an optimized, read-only Trie2_16 from this writable Trie.
983      * The data values outside of the range that will fit in a 16 bit
984      * unsigned value will be truncated.
985      */
toTrie2_16()986     public Trie2_16 toTrie2_16() {
987         Trie2_16 frozenTrie = new Trie2_16();
988         freeze(frozenTrie, ValueWidth.BITS_16);
989         return frozenTrie;
990     }
991 
992 
993     /**
994      * Produce an optimized, read-only Trie2_32 from this writable Trie.
995      *
996      */
toTrie2_32()997     public Trie2_32 toTrie2_32() {
998         Trie2_32 frozenTrie = new Trie2_32();
999         freeze(frozenTrie, ValueWidth.BITS_32);
1000         return frozenTrie;
1001     }
1002 
1003 
1004     /**
1005      * Maximum length of the runtime index array.
1006      * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
1007      * (The actual maximum length is lower,
1008      * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
1009      */
1010     private static final int UTRIE2_MAX_INDEX_LENGTH = 0xffff;
1011 
1012     /**
1013      * Maximum length of the runtime data array.
1014      * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
1015      * and by uint16_t UTrie2Header.shiftedDataLength.
1016      */
1017     private static final int  UTRIE2_MAX_DATA_LENGTH = 0xffff<<UTRIE2_INDEX_SHIFT;
1018 
1019     /* Compact the data and then populate an optimized read-only Trie.  */
freeze(Trie2 dest, ValueWidth valueBits)1020     private void freeze(Trie2 dest, ValueWidth valueBits) {
1021         int  i;
1022         int  allIndexesLength;
1023         int  dataMove;  /* >0 if the data is moved to the end of the index array */
1024 
1025 
1026         /* compact if necessary */
1027         if(!isCompacted) {
1028             compactTrie();
1029         }
1030 
1031         if(highStart<=0x10000) {
1032             allIndexesLength=UTRIE2_INDEX_1_OFFSET;
1033         } else {
1034             allIndexesLength=index2Length;
1035         }
1036         if(valueBits==ValueWidth.BITS_16) {
1037             dataMove=allIndexesLength;
1038         } else {
1039             dataMove=0;
1040         }
1041 
1042         /* are indexLength and dataLength within limits? */
1043         if( /* for unshifted indexLength */
1044             allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
1045             /* for unshifted dataNullOffset */
1046             (dataMove+dataNullOffset)>0xffff ||
1047             /* for unshifted 2-byte UTF-8 index-2 values */
1048             (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
1049             /* for shiftedDataLength */
1050             (dataMove+dataLength)>UTRIE2_MAX_DATA_LENGTH) {
1051                 throw new UnsupportedOperationException("Trie2 data is too large.");
1052         }
1053 
1054         /* calculate the sizes of, and allocate, the index and data arrays */
1055         int indexLength = allIndexesLength;
1056         if (valueBits==ValueWidth.BITS_16) {
1057             indexLength += dataLength;
1058         } else {
1059             dest.data32 = new int[dataLength];
1060         }
1061         dest.index = new char[indexLength];
1062 
1063         dest.indexLength = allIndexesLength;
1064         dest.dataLength  = dataLength;
1065         if(highStart<=0x10000) {
1066             dest.index2NullOffset = 0xffff;
1067         } else {
1068             dest.index2NullOffset = UTRIE2_INDEX_2_OFFSET + index2NullOffset;
1069         }
1070         dest.initialValue   = initialValue;
1071         dest.errorValue     = errorValue;
1072         dest.highStart      = highStart;
1073         dest.highValueIndex = dataMove + dataLength - UTRIE2_DATA_GRANULARITY;
1074         dest.dataNullOffset = (dataMove+dataNullOffset);
1075 
1076         // Create a header and set the its fields.
1077         //   (This is only used in the event that we serialize the Trie, but is
1078         //    convenient to do here.)
1079         dest.header = new Trie2.UTrie2Header();
1080         dest.header.signature         = 0x54726932; /* "Tri2" */
1081         dest.header.options           = valueBits==ValueWidth.BITS_16 ? 0 : 1;
1082         dest.header.indexLength       = dest.indexLength;
1083         dest.header.shiftedDataLength = dest.dataLength>>UTRIE2_INDEX_SHIFT;
1084         dest.header.index2NullOffset  = dest.index2NullOffset;
1085         dest.header.dataNullOffset    = dest.dataNullOffset;
1086         dest.header.shiftedHighStart  = dest.highStart>>UTRIE2_SHIFT_1;
1087 
1088 
1089 
1090         /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
1091         int destIdx = 0;
1092         for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; i++) {
1093             dest.index[destIdx++] = (char)((index2[i]+dataMove) >> UTRIE2_INDEX_SHIFT);
1094         }
1095         if (UTRIE2_DEBUG) {
1096             System.out.println("\n\nIndex2 for BMP limit is " + Integer.toHexString(destIdx));
1097         }
1098 
1099         /* write UTF-8 2-byte index-2 values, not right-shifted */
1100         for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
1101             dest.index[destIdx++] = (char)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
1102         }
1103         for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
1104             dest.index[destIdx++]=(char)(dataMove+index2[i<<(6-UTRIE2_SHIFT_2)]);
1105         }
1106         if (UTRIE2_DEBUG) {
1107             System.out.println("Index2 for UTF-8 2byte values limit is " + Integer.toHexString(destIdx));
1108         }
1109 
1110         if(highStart>0x10000) {
1111             int index1Length = (highStart-0x10000)>>UTRIE2_SHIFT_1;
1112             int index2Offset = UTRIE2_INDEX_2_BMP_LENGTH + UTRIE2_UTF8_2B_INDEX_2_LENGTH + index1Length;
1113 
1114             /* write 16-bit index-1 values for supplementary code points */
1115             //p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
1116             for(i=0; i<index1Length; i++) {
1117                 //*dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
1118                 dest.index[destIdx++] = (char)(UTRIE2_INDEX_2_OFFSET + index1[i+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH]);
1119             }
1120             if (UTRIE2_DEBUG) {
1121                 System.out.println("Index 1 for supplementals, limit is " + Integer.toHexString(destIdx));
1122             }
1123 
1124             /*
1125              * write the index-2 array values for supplementary code points,
1126              * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
1127              */
1128             for(i=0; i<index2Length-index2Offset; i++) {
1129                 dest.index[destIdx++] = (char)((dataMove + index2[index2Offset+i])>>UTRIE2_INDEX_SHIFT);
1130             }
1131             if (UTRIE2_DEBUG) {
1132                 System.out.println("Index 2 for supplementals, limit is " + Integer.toHexString(destIdx));
1133             }
1134         }
1135 
1136         /* write the 16/32-bit data array */
1137         switch(valueBits) {
1138         case BITS_16:
1139             /* write 16-bit data values */
1140             assert(destIdx == dataMove);
1141             dest.data16 = destIdx;
1142             for(i=0; i<dataLength; i++) {
1143                 dest.index[destIdx++] = (char)data[i];
1144             }
1145             break;
1146         case BITS_32:
1147             /* write 32-bit data values */
1148             for (i=0; i<dataLength; i++) {
1149                 dest.data32[i] = this.data[i];
1150             }
1151             break;
1152         }
1153         // The writable, but compressed, Trie2 stays around unless the caller drops its references to it.
1154     }
1155 
1156 
1157     /* Start with allocation of 16k data entries. */
1158     private static final int UNEWTRIE2_INITIAL_DATA_LENGTH = 1<<14;
1159 
1160     /* Grow about 8x each time. */
1161     private static final int UNEWTRIE2_MEDIUM_DATA_LENGTH = 1<<17;
1162 
1163     /** The null index-2 block, following the gap in the index-2 table. */
1164     private static final int UNEWTRIE2_INDEX_2_NULL_OFFSET = UNEWTRIE2_INDEX_GAP_OFFSET + UNEWTRIE2_INDEX_GAP_LENGTH;
1165 
1166     /** The start of allocated index-2 blocks. */
1167     private static final int UNEWTRIE2_INDEX_2_START_OFFSET = UNEWTRIE2_INDEX_2_NULL_OFFSET + UTRIE2_INDEX_2_BLOCK_LENGTH;
1168 
1169     /**
1170      * The null data block.
1171      * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
1172      * to work with 6-bit trail bytes from 2-byte UTF-8.
1173      */
1174     private static final int UNEWTRIE2_DATA_NULL_OFFSET = UTRIE2_DATA_START_OFFSET;
1175 
1176     /** The start of allocated data blocks. */
1177     private static final int UNEWTRIE2_DATA_START_OFFSET = UNEWTRIE2_DATA_NULL_OFFSET+0x40;
1178 
1179     /**
1180      * The start of data blocks for U+0800 and above.
1181      * Below, compaction uses a block length of 64 for 2-byte UTF-8.
1182      * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
1183      * Data values for 0x780 code points beyond ASCII.
1184      */
1185     private static final int UNEWTRIE2_DATA_0800_OFFSET = UNEWTRIE2_DATA_START_OFFSET+0x780;
1186 
1187     //
1188     // Private data members.  From struct UNewTrie2 in ICU4C
1189     //
1190     private  int[]   index1 = new int[UNEWTRIE2_INDEX_1_LENGTH];
1191     private  int[]   index2 = new int[UNEWTRIE2_MAX_INDEX_2_LENGTH];
1192     private  int[]   data;
1193 
1194     private  int     index2Length;
1195     private  int     dataCapacity;
1196     private  int     firstFreeBlock;
1197     private  int     index2NullOffset;
1198     private  boolean isCompacted;
1199 
1200 
1201     /*
1202      * Multi-purpose per-data-block table.
1203      *
1204      * Before compacting:
1205      *
1206      * Per-data-block reference counters/free-block list.
1207      *  0: unused
1208      * >0: reference counter (number of index-2 entries pointing here)
1209      * <0: next free data block in free-block list
1210      *
1211      * While compacting:
1212      *
1213      * Map of adjusted indexes, used in compactData() and compactIndex2().
1214      * Maps from original indexes to new ones.
1215      */
1216      private  int[]   map = new int[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2];
1217 
1218 
1219      private boolean UTRIE2_DEBUG = false;
1220 
1221 }
1222