1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 2001-2014, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *   file name:  utrie2.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2008aug16 (starting from a copy of utrie.c)
14 *   created by: Markus W. Scherer
15 *
16 *   This is a common implementation of a Unicode trie.
17 *   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
18 *   Unicode code points (0..0x10ffff).
19 *   This is the second common version of a Unicode trie (hence the name UTrie2).
20 *   See utrie2.h for a comparison.
21 *
22 *   This file contains only the runtime and enumeration code, for read-only access.
23 *   See utrie2_builder.c for the builder code.
24 */
25 #ifdef UTRIE2_DEBUG
26 #   include <stdio.h>
27 #endif
28 
29 #include "unicode/utypes.h"
30 #include "unicode/utf.h"
31 #include "unicode/utf8.h"
32 #include "unicode/utf16.h"
33 #include "cmemory.h"
34 #include "utrie2.h"
35 #include "utrie2_impl.h"
36 #include "uassert.h"
37 
38 /* Public UTrie2 API implementation ----------------------------------------- */
39 
40 static uint32_t
get32(const UNewTrie2 * trie,UChar32 c,UBool fromLSCP)41 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
42     int32_t i2, block;
43 
44     if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
45         return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
46     }
47 
48     if(U_IS_LEAD(c) && fromLSCP) {
49         i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
50             (c>>UTRIE2_SHIFT_2);
51     } else {
52         i2=trie->index1[c>>UTRIE2_SHIFT_1]+
53             ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
54     }
55     block=trie->index2[i2];
56     return trie->data[block+(c&UTRIE2_DATA_MASK)];
57 }
58 
59 U_CAPI uint32_t U_EXPORT2
utrie2_get32(const UTrie2 * trie,UChar32 c)60 utrie2_get32(const UTrie2 *trie, UChar32 c) {
61     if(trie->data16!=NULL) {
62         return UTRIE2_GET16(trie, c);
63     } else if(trie->data32!=NULL) {
64         return UTRIE2_GET32(trie, c);
65     } else if((uint32_t)c>0x10ffff) {
66         return trie->errorValue;
67     } else {
68         return get32(trie->newTrie, c, TRUE);
69     }
70 }
71 
72 U_CAPI uint32_t U_EXPORT2
utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 * trie,UChar32 c)73 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
74     if(!U_IS_LEAD(c)) {
75         return trie->errorValue;
76     }
77     if(trie->data16!=NULL) {
78         return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
79     } else if(trie->data32!=NULL) {
80         return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
81     } else {
82         return get32(trie->newTrie, c, FALSE);
83     }
84 }
85 
86 static inline int32_t
u8Index(const UTrie2 * trie,UChar32 c,int32_t i)87 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
88     int32_t idx=
89         _UTRIE2_INDEX_FROM_CP(
90             trie,
91             trie->data32==NULL ? trie->indexLength : 0,
92             c);
93     return (idx<<3)|i;
94 }
95 
96 U_CAPI int32_t U_EXPORT2
utrie2_internalU8NextIndex(const UTrie2 * trie,UChar32 c,const uint8_t * src,const uint8_t * limit)97 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
98                            const uint8_t *src, const uint8_t *limit) {
99     int32_t i, length;
100     i=0;
101     /* support 64-bit pointers by avoiding cast of arbitrary difference */
102     if((limit-src)<=7) {
103         length=(int32_t)(limit-src);
104     } else {
105         length=7;
106     }
107     c=utf8_nextCharSafeBody(src, &i, length, c, -1);
108     return u8Index(trie, c, i);
109 }
110 
111 U_CAPI int32_t U_EXPORT2
utrie2_internalU8PrevIndex(const UTrie2 * trie,UChar32 c,const uint8_t * start,const uint8_t * src)112 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
113                            const uint8_t *start, const uint8_t *src) {
114     int32_t i, length;
115     /* support 64-bit pointers by avoiding cast of arbitrary difference */
116     if((src-start)<=7) {
117         i=length=(int32_t)(src-start);
118     } else {
119         i=length=7;
120         start=src-7;
121     }
122     c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
123     i=length-i;  /* number of bytes read backward from src */
124     return u8Index(trie, c, i);
125 }
126 
127 U_CAPI UTrie2 * U_EXPORT2
utrie2_openFromSerialized(UTrie2ValueBits valueBits,const void * data,int32_t length,int32_t * pActualLength,UErrorCode * pErrorCode)128 utrie2_openFromSerialized(UTrie2ValueBits valueBits,
129                           const void *data, int32_t length, int32_t *pActualLength,
130                           UErrorCode *pErrorCode) {
131     const UTrie2Header *header;
132     const uint16_t *p16;
133     int32_t actualLength;
134 
135     UTrie2 tempTrie;
136     UTrie2 *trie;
137 
138     if(U_FAILURE(*pErrorCode)) {
139         return 0;
140     }
141 
142     if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
143         valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
144     ) {
145         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
146         return 0;
147     }
148 
149     /* enough data for a trie header? */
150     if(length<(int32_t)sizeof(UTrie2Header)) {
151         *pErrorCode=U_INVALID_FORMAT_ERROR;
152         return 0;
153     }
154 
155     /* check the signature */
156     header=(const UTrie2Header *)data;
157     if(header->signature!=UTRIE2_SIG) {
158         *pErrorCode=U_INVALID_FORMAT_ERROR;
159         return 0;
160     }
161 
162     /* get the options */
163     if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
164         *pErrorCode=U_INVALID_FORMAT_ERROR;
165         return 0;
166     }
167 
168     /* get the length values and offsets */
169     uprv_memset(&tempTrie, 0, sizeof(tempTrie));
170     tempTrie.indexLength=header->indexLength;
171     tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
172     tempTrie.index2NullOffset=header->index2NullOffset;
173     tempTrie.dataNullOffset=header->dataNullOffset;
174 
175     tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
176     tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
177     if(valueBits==UTRIE2_16_VALUE_BITS) {
178         tempTrie.highValueIndex+=tempTrie.indexLength;
179     }
180 
181     /* calculate the actual length */
182     actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
183     if(valueBits==UTRIE2_16_VALUE_BITS) {
184         actualLength+=tempTrie.dataLength*2;
185     } else {
186         actualLength+=tempTrie.dataLength*4;
187     }
188     if(length<actualLength) {
189         *pErrorCode=U_INVALID_FORMAT_ERROR;  /* not enough bytes */
190         return 0;
191     }
192 
193     /* allocate the trie */
194     trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
195     if(trie==NULL) {
196         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
197         return 0;
198     }
199     uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
200     trie->memory=(uint32_t *)data;
201     trie->length=actualLength;
202     trie->isMemoryOwned=FALSE;
203 
204     /* set the pointers to its index and data arrays */
205     p16=(const uint16_t *)(header+1);
206     trie->index=p16;
207     p16+=trie->indexLength;
208 
209     /* get the data */
210     switch(valueBits) {
211     case UTRIE2_16_VALUE_BITS:
212         trie->data16=p16;
213         trie->data32=NULL;
214         trie->initialValue=trie->index[trie->dataNullOffset];
215         trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
216         break;
217     case UTRIE2_32_VALUE_BITS:
218         trie->data16=NULL;
219         trie->data32=(const uint32_t *)p16;
220         trie->initialValue=trie->data32[trie->dataNullOffset];
221         trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
222         break;
223     default:
224         *pErrorCode=U_INVALID_FORMAT_ERROR;
225         return 0;
226     }
227 
228     if(pActualLength!=NULL) {
229         *pActualLength=actualLength;
230     }
231     return trie;
232 }
233 
234 U_CAPI UTrie2 * U_EXPORT2
utrie2_openDummy(UTrie2ValueBits valueBits,uint32_t initialValue,uint32_t errorValue,UErrorCode * pErrorCode)235 utrie2_openDummy(UTrie2ValueBits valueBits,
236                  uint32_t initialValue, uint32_t errorValue,
237                  UErrorCode *pErrorCode) {
238     UTrie2 *trie;
239     UTrie2Header *header;
240     uint32_t *p;
241     uint16_t *dest16;
242     int32_t indexLength, dataLength, length, i;
243     int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
244 
245     if(U_FAILURE(*pErrorCode)) {
246         return 0;
247     }
248 
249     if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
250         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
251         return 0;
252     }
253 
254     /* calculate the total length of the dummy trie data */
255     indexLength=UTRIE2_INDEX_1_OFFSET;
256     dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
257     length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
258     if(valueBits==UTRIE2_16_VALUE_BITS) {
259         length+=dataLength*2;
260     } else {
261         length+=dataLength*4;
262     }
263 
264     /* allocate the trie */
265     trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
266     if(trie==NULL) {
267         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
268         return 0;
269     }
270     uprv_memset(trie, 0, sizeof(UTrie2));
271     trie->memory=uprv_malloc(length);
272     if(trie->memory==NULL) {
273         uprv_free(trie);
274         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
275         return 0;
276     }
277     trie->length=length;
278     trie->isMemoryOwned=TRUE;
279 
280     /* set the UTrie2 fields */
281     if(valueBits==UTRIE2_16_VALUE_BITS) {
282         dataMove=indexLength;
283     } else {
284         dataMove=0;
285     }
286 
287     trie->indexLength=indexLength;
288     trie->dataLength=dataLength;
289     trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
290     trie->dataNullOffset=(uint16_t)dataMove;
291     trie->initialValue=initialValue;
292     trie->errorValue=errorValue;
293     trie->highStart=0;
294     trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
295 
296     /* set the header fields */
297     header=(UTrie2Header *)trie->memory;
298 
299     header->signature=UTRIE2_SIG; /* "Tri2" */
300     header->options=(uint16_t)valueBits;
301 
302     header->indexLength=(uint16_t)indexLength;
303     header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
304     header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
305     header->dataNullOffset=(uint16_t)dataMove;
306     header->shiftedHighStart=0;
307 
308     /* fill the index and data arrays */
309     dest16=(uint16_t *)(header+1);
310     trie->index=dest16;
311 
312     /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
313     for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
314         *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT);  /* null data block */
315     }
316 
317     /* write UTF-8 2-byte index-2 values, not right-shifted */
318     for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
319         *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
320     }
321     for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
322         *dest16++=(uint16_t)dataMove;
323     }
324 
325     /* write the 16/32-bit data array */
326     switch(valueBits) {
327     case UTRIE2_16_VALUE_BITS:
328         /* write 16-bit data values */
329         trie->data16=dest16;
330         trie->data32=NULL;
331         for(i=0; i<0x80; ++i) {
332             *dest16++=(uint16_t)initialValue;
333         }
334         for(; i<0xc0; ++i) {
335             *dest16++=(uint16_t)errorValue;
336         }
337         /* highValue and reserved values */
338         for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
339             *dest16++=(uint16_t)initialValue;
340         }
341         break;
342     case UTRIE2_32_VALUE_BITS:
343         /* write 32-bit data values */
344         p=(uint32_t *)dest16;
345         trie->data16=NULL;
346         trie->data32=p;
347         for(i=0; i<0x80; ++i) {
348             *p++=initialValue;
349         }
350         for(; i<0xc0; ++i) {
351             *p++=errorValue;
352         }
353         /* highValue and reserved values */
354         for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
355             *p++=initialValue;
356         }
357         break;
358     default:
359         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
360         return 0;
361     }
362 
363     return trie;
364 }
365 
366 U_CAPI void U_EXPORT2
utrie2_close(UTrie2 * trie)367 utrie2_close(UTrie2 *trie) {
368     if(trie!=NULL) {
369         if(trie->isMemoryOwned) {
370             uprv_free(trie->memory);
371         }
372         if(trie->newTrie!=NULL) {
373             uprv_free(trie->newTrie->data);
374             uprv_free(trie->newTrie);
375         }
376         uprv_free(trie);
377     }
378 }
379 
380 U_CAPI int32_t U_EXPORT2
utrie2_getVersion(const void * data,int32_t length,UBool anyEndianOk)381 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) {
382     uint32_t signature;
383     if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) {
384         return 0;
385     }
386     signature=*(const uint32_t *)data;
387     if(signature==UTRIE2_SIG) {
388         return 2;
389     }
390     if(anyEndianOk && signature==UTRIE2_OE_SIG) {
391         return 2;
392     }
393     if(signature==UTRIE_SIG) {
394         return 1;
395     }
396     if(anyEndianOk && signature==UTRIE_OE_SIG) {
397         return 1;
398     }
399     return 0;
400 }
401 
402 U_CAPI UBool U_EXPORT2
utrie2_isFrozen(const UTrie2 * trie)403 utrie2_isFrozen(const UTrie2 *trie) {
404     return (UBool)(trie->newTrie==NULL);
405 }
406 
407 U_CAPI int32_t U_EXPORT2
utrie2_serialize(const UTrie2 * trie,void * data,int32_t capacity,UErrorCode * pErrorCode)408 utrie2_serialize(const UTrie2 *trie,
409                  void *data, int32_t capacity,
410                  UErrorCode *pErrorCode) {
411     /* argument check */
412     if(U_FAILURE(*pErrorCode)) {
413         return 0;
414     }
415 
416     if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
417         capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
418     ) {
419         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
420         return 0;
421     }
422 
423     if(capacity>=trie->length) {
424         uprv_memcpy(data, trie->memory, trie->length);
425     } else {
426         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
427     }
428     return trie->length;
429 }
430 
431 U_CAPI int32_t U_EXPORT2
utrie2_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)432 utrie2_swap(const UDataSwapper *ds,
433             const void *inData, int32_t length, void *outData,
434             UErrorCode *pErrorCode) {
435     const UTrie2Header *inTrie;
436     UTrie2Header trie;
437     int32_t dataLength, size;
438     UTrie2ValueBits valueBits;
439 
440     if(U_FAILURE(*pErrorCode)) {
441         return 0;
442     }
443     if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
444         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
445         return 0;
446     }
447 
448     /* setup and swapping */
449     if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
450         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
451         return 0;
452     }
453 
454     inTrie=(const UTrie2Header *)inData;
455     trie.signature=ds->readUInt32(inTrie->signature);
456     trie.options=ds->readUInt16(inTrie->options);
457     trie.indexLength=ds->readUInt16(inTrie->indexLength);
458     trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
459 
460     valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
461     dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
462 
463     if( trie.signature!=UTRIE2_SIG ||
464         valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
465         trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
466         dataLength<UTRIE2_DATA_START_OFFSET
467     ) {
468         *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
469         return 0;
470     }
471 
472     size=sizeof(UTrie2Header)+trie.indexLength*2;
473     switch(valueBits) {
474     case UTRIE2_16_VALUE_BITS:
475         size+=dataLength*2;
476         break;
477     case UTRIE2_32_VALUE_BITS:
478         size+=dataLength*4;
479         break;
480     default:
481         *pErrorCode=U_INVALID_FORMAT_ERROR;
482         return 0;
483     }
484 
485     if(length>=0) {
486         UTrie2Header *outTrie;
487 
488         if(length<size) {
489             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
490             return 0;
491         }
492 
493         outTrie=(UTrie2Header *)outData;
494 
495         /* swap the header */
496         ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
497         ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
498 
499         /* swap the index and the data */
500         switch(valueBits) {
501         case UTRIE2_16_VALUE_BITS:
502             ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
503             break;
504         case UTRIE2_32_VALUE_BITS:
505             ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
506             ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
507                                      (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
508             break;
509         default:
510             *pErrorCode=U_INVALID_FORMAT_ERROR;
511             return 0;
512         }
513     }
514 
515     return size;
516 }
517 
518 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c
519 // to avoid a dependency from utrie2.cpp on utrie.c.
520 
521 /* enumeration -------------------------------------------------------------- */
522 
523 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
524 
525 /* default UTrie2EnumValue() returns the input value itself */
526 static uint32_t U_CALLCONV
enumSameValue(const void *,uint32_t value)527 enumSameValue(const void * /*context*/, uint32_t value) {
528     return value;
529 }
530 
531 /**
532  * Enumerate all ranges of code points with the same relevant values.
533  * The values are transformed from the raw trie entries by the enumValue function.
534  *
535  * Currently requires start<limit and both start and limit must be multiples
536  * of UTRIE2_DATA_BLOCK_LENGTH.
537  *
538  * Optimizations:
539  * - Skip a whole block if we know that it is filled with a single value,
540  *   and it is the same as we visited just before.
541  * - Handle the null block specially because we know a priori that it is filled
542  *   with a single value.
543  */
544 static void
enumEitherTrie(const UTrie2 * trie,UChar32 start,UChar32 limit,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)545 enumEitherTrie(const UTrie2 *trie,
546                UChar32 start, UChar32 limit,
547                UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
548     const uint32_t *data32;
549     const uint16_t *idx;
550 
551     uint32_t value, prevValue, initialValue;
552     UChar32 c, prev, highStart;
553     int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
554 
555     if(enumRange==NULL) {
556         return;
557     }
558     if(enumValue==NULL) {
559         enumValue=enumSameValue;
560     }
561 
562     if(trie->newTrie==NULL) {
563         /* frozen trie */
564         idx=trie->index;
565         U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */
566         data32=trie->data32;
567 
568         index2NullOffset=trie->index2NullOffset;
569         nullBlock=trie->dataNullOffset;
570     } else {
571         /* unfrozen, mutable trie */
572         idx=NULL;
573         data32=trie->newTrie->data;
574         U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */
575 
576         index2NullOffset=trie->newTrie->index2NullOffset;
577         nullBlock=trie->newTrie->dataNullOffset;
578     }
579 
580     highStart=trie->highStart;
581 
582     /* get the enumeration value that corresponds to an initial-value trie data entry */
583     initialValue=enumValue(context, trie->initialValue);
584 
585     /* set variables for previous range */
586     prevI2Block=-1;
587     prevBlock=-1;
588     prev=start;
589     prevValue=0;
590 
591     /* enumerate index-2 blocks */
592     for(c=start; c<limit && c<highStart;) {
593         /* Code point limit for iterating inside this i2Block. */
594         UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
595         if(limit<tempLimit) {
596             tempLimit=limit;
597         }
598         if(c<=0xffff) {
599             if(!U_IS_SURROGATE(c)) {
600                 i2Block=c>>UTRIE2_SHIFT_2;
601             } else if(U_IS_SURROGATE_LEAD(c)) {
602                 /*
603                  * Enumerate values for lead surrogate code points, not code units:
604                  * This special block has half the normal length.
605                  */
606                 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
607                 tempLimit=MIN_VALUE(0xdc00, limit);
608             } else {
609                 /*
610                  * Switch back to the normal part of the index-2 table.
611                  * Enumerate the second half of the surrogates block.
612                  */
613                 i2Block=0xd800>>UTRIE2_SHIFT_2;
614                 tempLimit=MIN_VALUE(0xe000, limit);
615             }
616         } else {
617             /* supplementary code points */
618             if(idx!=NULL) {
619                 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
620                               (c>>UTRIE2_SHIFT_1)];
621             } else {
622                 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
623             }
624             if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
625                 /*
626                  * The index-2 block is the same as the previous one, and filled with prevValue.
627                  * Only possible for supplementary code points because the linear-BMP index-2
628                  * table creates unique i2Block values.
629                  */
630                 c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
631                 continue;
632             }
633         }
634         prevI2Block=i2Block;
635         if(i2Block==index2NullOffset) {
636             /* this is the null index-2 block */
637             if(prevValue!=initialValue) {
638                 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
639                     return;
640                 }
641                 prevBlock=nullBlock;
642                 prev=c;
643                 prevValue=initialValue;
644             }
645             c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
646         } else {
647             /* enumerate data blocks for one index-2 block */
648             int32_t i2, i2Limit;
649             i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
650             if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
651                 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
652             } else {
653                 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
654             }
655             for(; i2<i2Limit; ++i2) {
656                 if(idx!=NULL) {
657                     block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT;
658                 } else {
659                     block=trie->newTrie->index2[i2Block+i2];
660                 }
661                 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
662                     /* the block is the same as the previous one, and filled with prevValue */
663                     c+=UTRIE2_DATA_BLOCK_LENGTH;
664                     continue;
665                 }
666                 prevBlock=block;
667                 if(block==nullBlock) {
668                     /* this is the null data block */
669                     if(prevValue!=initialValue) {
670                         if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
671                             return;
672                         }
673                         prev=c;
674                         prevValue=initialValue;
675                     }
676                     c+=UTRIE2_DATA_BLOCK_LENGTH;
677                 } else {
678                     for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
679                         value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
680                         if(value!=prevValue) {
681                             if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
682                                 return;
683                             }
684                             prev=c;
685                             prevValue=value;
686                         }
687                         ++c;
688                     }
689                 }
690             }
691         }
692     }
693 
694     if(c>limit) {
695         c=limit;  /* could be higher if in the index2NullOffset */
696     } else if(c<limit) {
697         /* c==highStart<limit */
698         uint32_t highValue;
699         if(idx!=NULL) {
700             highValue=
701                 data32!=NULL ?
702                     data32[trie->highValueIndex] :
703                     idx[trie->highValueIndex];
704         } else {
705             highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
706         }
707         value=enumValue(context, highValue);
708         if(value!=prevValue) {
709             if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
710                 return;
711             }
712             prev=c;
713             prevValue=value;
714         }
715         c=limit;
716     }
717 
718     /* deliver last range */
719     enumRange(context, prev, c-1, prevValue);
720 }
721 
722 U_CAPI void U_EXPORT2
utrie2_enum(const UTrie2 * trie,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)723 utrie2_enum(const UTrie2 *trie,
724             UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
725     enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
726 }
727 
728 U_CAPI void U_EXPORT2
utrie2_enumForLeadSurrogate(const UTrie2 * trie,UChar32 lead,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)729 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
730                             UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
731                             const void *context) {
732     if(!U16_IS_LEAD(lead)) {
733         return;
734     }
735     lead=(lead-0xd7c0)<<10;   /* start code point */
736     enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
737 }
738 
739 /* C++ convenience wrappers ------------------------------------------------- */
740 
741 U_NAMESPACE_BEGIN
742 
previous16()743 uint16_t BackwardUTrie2StringIterator::previous16() {
744     codePointLimit=codePointStart;
745     if(start>=codePointStart) {
746         codePoint=U_SENTINEL;
747         return 0;
748     }
749     uint16_t result;
750     UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
751     return result;
752 }
753 
next16()754 uint16_t ForwardUTrie2StringIterator::next16() {
755     codePointStart=codePointLimit;
756     if(codePointLimit==limit) {
757         codePoint=U_SENTINEL;
758         return 0;
759     }
760     uint16_t result;
761     UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
762     return result;
763 }
764 
765 U_NAMESPACE_END
766