1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 2001-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  utrie2.cpp
11 *   encoding:   US-ASCII
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2008aug16 (starting from a copy of utrie.c)
16 *   created by: Markus W. Scherer
17 *
18 *   This is a common implementation of a Unicode trie.
19 *   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
20 *   Unicode code points (0..0x10ffff).
21 *   This is the second common version of a Unicode trie (hence the name UTrie2).
22 *   See utrie2.h for a comparison.
23 *
24 *   This file contains only the runtime and enumeration code, for read-only access.
25 *   See utrie2_builder.c for the builder code.
26 */
27 #ifdef UTRIE2_DEBUG
28 #   include <stdio.h>
29 #endif
30 
31 #include "unicode/utypes.h"
32 #include "unicode/utf.h"
33 #include "unicode/utf8.h"
34 #include "unicode/utf16.h"
35 #include "cmemory.h"
36 #include "utrie2.h"
37 #include "utrie2_impl.h"
38 #include "uassert.h"
39 
40 /* Public UTrie2 API implementation ----------------------------------------- */
41 
42 static uint32_t
get32(const UNewTrie2 * trie,UChar32 c,UBool fromLSCP)43 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
44     int32_t i2, block;
45 
46     if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
47         return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
48     }
49 
50     if(U_IS_LEAD(c) && fromLSCP) {
51         i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
52             (c>>UTRIE2_SHIFT_2);
53     } else {
54         i2=trie->index1[c>>UTRIE2_SHIFT_1]+
55             ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
56     }
57     block=trie->index2[i2];
58     return trie->data[block+(c&UTRIE2_DATA_MASK)];
59 }
60 
61 U_CAPI uint32_t U_EXPORT2
utrie2_get32(const UTrie2 * trie,UChar32 c)62 utrie2_get32(const UTrie2 *trie, UChar32 c) {
63     if(trie->data16!=NULL) {
64         return UTRIE2_GET16(trie, c);
65     } else if(trie->data32!=NULL) {
66         return UTRIE2_GET32(trie, c);
67     } else if((uint32_t)c>0x10ffff) {
68         return trie->errorValue;
69     } else {
70         return get32(trie->newTrie, c, TRUE);
71     }
72 }
73 
74 U_CAPI uint32_t U_EXPORT2
utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 * trie,UChar32 c)75 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
76     if(!U_IS_LEAD(c)) {
77         return trie->errorValue;
78     }
79     if(trie->data16!=NULL) {
80         return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
81     } else if(trie->data32!=NULL) {
82         return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
83     } else {
84         return get32(trie->newTrie, c, FALSE);
85     }
86 }
87 
88 static inline int32_t
u8Index(const UTrie2 * trie,UChar32 c,int32_t i)89 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
90     int32_t idx=
91         _UTRIE2_INDEX_FROM_CP(
92             trie,
93             trie->data32==NULL ? trie->indexLength : 0,
94             c);
95     return (idx<<3)|i;
96 }
97 
98 U_CAPI int32_t U_EXPORT2
utrie2_internalU8NextIndex(const UTrie2 * trie,UChar32 c,const uint8_t * src,const uint8_t * limit)99 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
100                            const uint8_t *src, const uint8_t *limit) {
101     int32_t i, length;
102     i=0;
103     /* support 64-bit pointers by avoiding cast of arbitrary difference */
104     if((limit-src)<=7) {
105         length=(int32_t)(limit-src);
106     } else {
107         length=7;
108     }
109     c=utf8_nextCharSafeBody(src, &i, length, c, -1);
110     return u8Index(trie, c, i);
111 }
112 
113 U_CAPI int32_t U_EXPORT2
utrie2_internalU8PrevIndex(const UTrie2 * trie,UChar32 c,const uint8_t * start,const uint8_t * src)114 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
115                            const uint8_t *start, const uint8_t *src) {
116     int32_t i, length;
117     /* support 64-bit pointers by avoiding cast of arbitrary difference */
118     if((src-start)<=7) {
119         i=length=(int32_t)(src-start);
120     } else {
121         i=length=7;
122         start=src-7;
123     }
124     c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
125     i=length-i;  /* number of bytes read backward from src */
126     return u8Index(trie, c, i);
127 }
128 
129 U_CAPI UTrie2 * U_EXPORT2
utrie2_openFromSerialized(UTrie2ValueBits valueBits,const void * data,int32_t length,int32_t * pActualLength,UErrorCode * pErrorCode)130 utrie2_openFromSerialized(UTrie2ValueBits valueBits,
131                           const void *data, int32_t length, int32_t *pActualLength,
132                           UErrorCode *pErrorCode) {
133     const UTrie2Header *header;
134     const uint16_t *p16;
135     int32_t actualLength;
136 
137     UTrie2 tempTrie;
138     UTrie2 *trie;
139 
140     if(U_FAILURE(*pErrorCode)) {
141         return 0;
142     }
143 
144     if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
145         valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
146     ) {
147         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
148         return 0;
149     }
150 
151     /* enough data for a trie header? */
152     if(length<(int32_t)sizeof(UTrie2Header)) {
153         *pErrorCode=U_INVALID_FORMAT_ERROR;
154         return 0;
155     }
156 
157     /* check the signature */
158     header=(const UTrie2Header *)data;
159     if(header->signature!=UTRIE2_SIG) {
160         *pErrorCode=U_INVALID_FORMAT_ERROR;
161         return 0;
162     }
163 
164     /* get the options */
165     if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
166         *pErrorCode=U_INVALID_FORMAT_ERROR;
167         return 0;
168     }
169 
170     /* get the length values and offsets */
171     uprv_memset(&tempTrie, 0, sizeof(tempTrie));
172     tempTrie.indexLength=header->indexLength;
173     tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
174     tempTrie.index2NullOffset=header->index2NullOffset;
175     tempTrie.dataNullOffset=header->dataNullOffset;
176 
177     tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
178     tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
179     if(valueBits==UTRIE2_16_VALUE_BITS) {
180         tempTrie.highValueIndex+=tempTrie.indexLength;
181     }
182 
183     /* calculate the actual length */
184     actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
185     if(valueBits==UTRIE2_16_VALUE_BITS) {
186         actualLength+=tempTrie.dataLength*2;
187     } else {
188         actualLength+=tempTrie.dataLength*4;
189     }
190     if(length<actualLength) {
191         *pErrorCode=U_INVALID_FORMAT_ERROR;  /* not enough bytes */
192         return 0;
193     }
194 
195     /* allocate the trie */
196     trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
197     if(trie==NULL) {
198         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
199         return 0;
200     }
201     uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
202     trie->memory=(uint32_t *)data;
203     trie->length=actualLength;
204     trie->isMemoryOwned=FALSE;
205 
206     /* set the pointers to its index and data arrays */
207     p16=(const uint16_t *)(header+1);
208     trie->index=p16;
209     p16+=trie->indexLength;
210 
211     /* get the data */
212     switch(valueBits) {
213     case UTRIE2_16_VALUE_BITS:
214         trie->data16=p16;
215         trie->data32=NULL;
216         trie->initialValue=trie->index[trie->dataNullOffset];
217         trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
218         break;
219     case UTRIE2_32_VALUE_BITS:
220         trie->data16=NULL;
221         trie->data32=(const uint32_t *)p16;
222         trie->initialValue=trie->data32[trie->dataNullOffset];
223         trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
224         break;
225     default:
226         *pErrorCode=U_INVALID_FORMAT_ERROR;
227         return 0;
228     }
229 
230     if(pActualLength!=NULL) {
231         *pActualLength=actualLength;
232     }
233     return trie;
234 }
235 
236 U_CAPI UTrie2 * U_EXPORT2
utrie2_openDummy(UTrie2ValueBits valueBits,uint32_t initialValue,uint32_t errorValue,UErrorCode * pErrorCode)237 utrie2_openDummy(UTrie2ValueBits valueBits,
238                  uint32_t initialValue, uint32_t errorValue,
239                  UErrorCode *pErrorCode) {
240     UTrie2 *trie;
241     UTrie2Header *header;
242     uint32_t *p;
243     uint16_t *dest16;
244     int32_t indexLength, dataLength, length, i;
245     int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
246 
247     if(U_FAILURE(*pErrorCode)) {
248         return 0;
249     }
250 
251     if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
252         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
253         return 0;
254     }
255 
256     /* calculate the total length of the dummy trie data */
257     indexLength=UTRIE2_INDEX_1_OFFSET;
258     dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
259     length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
260     if(valueBits==UTRIE2_16_VALUE_BITS) {
261         length+=dataLength*2;
262     } else {
263         length+=dataLength*4;
264     }
265 
266     /* allocate the trie */
267     trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
268     if(trie==NULL) {
269         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
270         return 0;
271     }
272     uprv_memset(trie, 0, sizeof(UTrie2));
273     trie->memory=uprv_malloc(length);
274     if(trie->memory==NULL) {
275         uprv_free(trie);
276         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
277         return 0;
278     }
279     trie->length=length;
280     trie->isMemoryOwned=TRUE;
281 
282     /* set the UTrie2 fields */
283     if(valueBits==UTRIE2_16_VALUE_BITS) {
284         dataMove=indexLength;
285     } else {
286         dataMove=0;
287     }
288 
289     trie->indexLength=indexLength;
290     trie->dataLength=dataLength;
291     trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
292     trie->dataNullOffset=(uint16_t)dataMove;
293     trie->initialValue=initialValue;
294     trie->errorValue=errorValue;
295     trie->highStart=0;
296     trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
297 
298     /* set the header fields */
299     header=(UTrie2Header *)trie->memory;
300 
301     header->signature=UTRIE2_SIG; /* "Tri2" */
302     header->options=(uint16_t)valueBits;
303 
304     header->indexLength=(uint16_t)indexLength;
305     header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
306     header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
307     header->dataNullOffset=(uint16_t)dataMove;
308     header->shiftedHighStart=0;
309 
310     /* fill the index and data arrays */
311     dest16=(uint16_t *)(header+1);
312     trie->index=dest16;
313 
314     /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
315     for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
316         *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT);  /* null data block */
317     }
318 
319     /* write UTF-8 2-byte index-2 values, not right-shifted */
320     for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
321         *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
322     }
323     for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
324         *dest16++=(uint16_t)dataMove;
325     }
326 
327     /* write the 16/32-bit data array */
328     switch(valueBits) {
329     case UTRIE2_16_VALUE_BITS:
330         /* write 16-bit data values */
331         trie->data16=dest16;
332         trie->data32=NULL;
333         for(i=0; i<0x80; ++i) {
334             *dest16++=(uint16_t)initialValue;
335         }
336         for(; i<0xc0; ++i) {
337             *dest16++=(uint16_t)errorValue;
338         }
339         /* highValue and reserved values */
340         for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
341             *dest16++=(uint16_t)initialValue;
342         }
343         break;
344     case UTRIE2_32_VALUE_BITS:
345         /* write 32-bit data values */
346         p=(uint32_t *)dest16;
347         trie->data16=NULL;
348         trie->data32=p;
349         for(i=0; i<0x80; ++i) {
350             *p++=initialValue;
351         }
352         for(; i<0xc0; ++i) {
353             *p++=errorValue;
354         }
355         /* highValue and reserved values */
356         for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
357             *p++=initialValue;
358         }
359         break;
360     default:
361         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
362         return 0;
363     }
364 
365     return trie;
366 }
367 
368 U_CAPI void U_EXPORT2
utrie2_close(UTrie2 * trie)369 utrie2_close(UTrie2 *trie) {
370     if(trie!=NULL) {
371         if(trie->isMemoryOwned) {
372             uprv_free(trie->memory);
373         }
374         if(trie->newTrie!=NULL) {
375             uprv_free(trie->newTrie->data);
376             uprv_free(trie->newTrie);
377         }
378         uprv_free(trie);
379     }
380 }
381 
382 U_CAPI int32_t U_EXPORT2
utrie2_getVersion(const void * data,int32_t length,UBool anyEndianOk)383 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) {
384     uint32_t signature;
385     if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) {
386         return 0;
387     }
388     signature=*(const uint32_t *)data;
389     if(signature==UTRIE2_SIG) {
390         return 2;
391     }
392     if(anyEndianOk && signature==UTRIE2_OE_SIG) {
393         return 2;
394     }
395     if(signature==UTRIE_SIG) {
396         return 1;
397     }
398     if(anyEndianOk && signature==UTRIE_OE_SIG) {
399         return 1;
400     }
401     return 0;
402 }
403 
404 U_CAPI UBool U_EXPORT2
utrie2_isFrozen(const UTrie2 * trie)405 utrie2_isFrozen(const UTrie2 *trie) {
406     return (UBool)(trie->newTrie==NULL);
407 }
408 
409 U_CAPI int32_t U_EXPORT2
utrie2_serialize(const UTrie2 * trie,void * data,int32_t capacity,UErrorCode * pErrorCode)410 utrie2_serialize(const UTrie2 *trie,
411                  void *data, int32_t capacity,
412                  UErrorCode *pErrorCode) {
413     /* argument check */
414     if(U_FAILURE(*pErrorCode)) {
415         return 0;
416     }
417 
418     if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
419         capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
420     ) {
421         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
422         return 0;
423     }
424 
425     if(capacity>=trie->length) {
426         uprv_memcpy(data, trie->memory, trie->length);
427     } else {
428         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
429     }
430     return trie->length;
431 }
432 
433 U_CAPI int32_t U_EXPORT2
utrie2_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)434 utrie2_swap(const UDataSwapper *ds,
435             const void *inData, int32_t length, void *outData,
436             UErrorCode *pErrorCode) {
437     const UTrie2Header *inTrie;
438     UTrie2Header trie;
439     int32_t dataLength, size;
440     UTrie2ValueBits valueBits;
441 
442     if(U_FAILURE(*pErrorCode)) {
443         return 0;
444     }
445     if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
446         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
447         return 0;
448     }
449 
450     /* setup and swapping */
451     if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
452         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
453         return 0;
454     }
455 
456     inTrie=(const UTrie2Header *)inData;
457     trie.signature=ds->readUInt32(inTrie->signature);
458     trie.options=ds->readUInt16(inTrie->options);
459     trie.indexLength=ds->readUInt16(inTrie->indexLength);
460     trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
461 
462     valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
463     dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
464 
465     if( trie.signature!=UTRIE2_SIG ||
466         valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
467         trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
468         dataLength<UTRIE2_DATA_START_OFFSET
469     ) {
470         *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
471         return 0;
472     }
473 
474     size=sizeof(UTrie2Header)+trie.indexLength*2;
475     switch(valueBits) {
476     case UTRIE2_16_VALUE_BITS:
477         size+=dataLength*2;
478         break;
479     case UTRIE2_32_VALUE_BITS:
480         size+=dataLength*4;
481         break;
482     default:
483         *pErrorCode=U_INVALID_FORMAT_ERROR;
484         return 0;
485     }
486 
487     if(length>=0) {
488         UTrie2Header *outTrie;
489 
490         if(length<size) {
491             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
492             return 0;
493         }
494 
495         outTrie=(UTrie2Header *)outData;
496 
497         /* swap the header */
498         ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
499         ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
500 
501         /* swap the index and the data */
502         switch(valueBits) {
503         case UTRIE2_16_VALUE_BITS:
504             ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
505             break;
506         case UTRIE2_32_VALUE_BITS:
507             ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
508             ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
509                                      (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
510             break;
511         default:
512             *pErrorCode=U_INVALID_FORMAT_ERROR;
513             return 0;
514         }
515     }
516 
517     return size;
518 }
519 
520 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c
521 // to avoid a dependency from utrie2.cpp on utrie.c.
522 
523 /* enumeration -------------------------------------------------------------- */
524 
525 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
526 
527 /* default UTrie2EnumValue() returns the input value itself */
528 static uint32_t U_CALLCONV
enumSameValue(const void *,uint32_t value)529 enumSameValue(const void * /*context*/, uint32_t value) {
530     return value;
531 }
532 
533 /**
534  * Enumerate all ranges of code points with the same relevant values.
535  * The values are transformed from the raw trie entries by the enumValue function.
536  *
537  * Currently requires start<limit and both start and limit must be multiples
538  * of UTRIE2_DATA_BLOCK_LENGTH.
539  *
540  * Optimizations:
541  * - Skip a whole block if we know that it is filled with a single value,
542  *   and it is the same as we visited just before.
543  * - Handle the null block specially because we know a priori that it is filled
544  *   with a single value.
545  */
546 static void
enumEitherTrie(const UTrie2 * trie,UChar32 start,UChar32 limit,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)547 enumEitherTrie(const UTrie2 *trie,
548                UChar32 start, UChar32 limit,
549                UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
550     const uint32_t *data32;
551     const uint16_t *idx;
552 
553     uint32_t value, prevValue, initialValue;
554     UChar32 c, prev, highStart;
555     int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
556 
557     if(enumRange==NULL) {
558         return;
559     }
560     if(enumValue==NULL) {
561         enumValue=enumSameValue;
562     }
563 
564     if(trie->newTrie==NULL) {
565         /* frozen trie */
566         idx=trie->index;
567         U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */
568         data32=trie->data32;
569 
570         index2NullOffset=trie->index2NullOffset;
571         nullBlock=trie->dataNullOffset;
572     } else {
573         /* unfrozen, mutable trie */
574         idx=NULL;
575         data32=trie->newTrie->data;
576         U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */
577 
578         index2NullOffset=trie->newTrie->index2NullOffset;
579         nullBlock=trie->newTrie->dataNullOffset;
580     }
581 
582     highStart=trie->highStart;
583 
584     /* get the enumeration value that corresponds to an initial-value trie data entry */
585     initialValue=enumValue(context, trie->initialValue);
586 
587     /* set variables for previous range */
588     prevI2Block=-1;
589     prevBlock=-1;
590     prev=start;
591     prevValue=0;
592 
593     /* enumerate index-2 blocks */
594     for(c=start; c<limit && c<highStart;) {
595         /* Code point limit for iterating inside this i2Block. */
596         UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
597         if(limit<tempLimit) {
598             tempLimit=limit;
599         }
600         if(c<=0xffff) {
601             if(!U_IS_SURROGATE(c)) {
602                 i2Block=c>>UTRIE2_SHIFT_2;
603             } else if(U_IS_SURROGATE_LEAD(c)) {
604                 /*
605                  * Enumerate values for lead surrogate code points, not code units:
606                  * This special block has half the normal length.
607                  */
608                 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
609                 tempLimit=MIN_VALUE(0xdc00, limit);
610             } else {
611                 /*
612                  * Switch back to the normal part of the index-2 table.
613                  * Enumerate the second half of the surrogates block.
614                  */
615                 i2Block=0xd800>>UTRIE2_SHIFT_2;
616                 tempLimit=MIN_VALUE(0xe000, limit);
617             }
618         } else {
619             /* supplementary code points */
620             if(idx!=NULL) {
621                 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
622                               (c>>UTRIE2_SHIFT_1)];
623             } else {
624                 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
625             }
626             if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
627                 /*
628                  * The index-2 block is the same as the previous one, and filled with prevValue.
629                  * Only possible for supplementary code points because the linear-BMP index-2
630                  * table creates unique i2Block values.
631                  */
632                 c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
633                 continue;
634             }
635         }
636         prevI2Block=i2Block;
637         if(i2Block==index2NullOffset) {
638             /* this is the null index-2 block */
639             if(prevValue!=initialValue) {
640                 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
641                     return;
642                 }
643                 prevBlock=nullBlock;
644                 prev=c;
645                 prevValue=initialValue;
646             }
647             c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
648         } else {
649             /* enumerate data blocks for one index-2 block */
650             int32_t i2, i2Limit;
651             i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
652             if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
653                 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
654             } else {
655                 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
656             }
657             for(; i2<i2Limit; ++i2) {
658                 if(idx!=NULL) {
659                     block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT;
660                 } else {
661                     block=trie->newTrie->index2[i2Block+i2];
662                 }
663                 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
664                     /* the block is the same as the previous one, and filled with prevValue */
665                     c+=UTRIE2_DATA_BLOCK_LENGTH;
666                     continue;
667                 }
668                 prevBlock=block;
669                 if(block==nullBlock) {
670                     /* this is the null data block */
671                     if(prevValue!=initialValue) {
672                         if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
673                             return;
674                         }
675                         prev=c;
676                         prevValue=initialValue;
677                     }
678                     c+=UTRIE2_DATA_BLOCK_LENGTH;
679                 } else {
680                     for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
681                         value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
682                         if(value!=prevValue) {
683                             if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
684                                 return;
685                             }
686                             prev=c;
687                             prevValue=value;
688                         }
689                         ++c;
690                     }
691                 }
692             }
693         }
694     }
695 
696     if(c>limit) {
697         c=limit;  /* could be higher if in the index2NullOffset */
698     } else if(c<limit) {
699         /* c==highStart<limit */
700         uint32_t highValue;
701         if(idx!=NULL) {
702             highValue=
703                 data32!=NULL ?
704                     data32[trie->highValueIndex] :
705                     idx[trie->highValueIndex];
706         } else {
707             highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
708         }
709         value=enumValue(context, highValue);
710         if(value!=prevValue) {
711             if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
712                 return;
713             }
714             prev=c;
715             prevValue=value;
716         }
717         c=limit;
718     }
719 
720     /* deliver last range */
721     enumRange(context, prev, c-1, prevValue);
722 }
723 
724 U_CAPI void U_EXPORT2
utrie2_enum(const UTrie2 * trie,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)725 utrie2_enum(const UTrie2 *trie,
726             UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
727     enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
728 }
729 
730 U_CAPI void U_EXPORT2
utrie2_enumForLeadSurrogate(const UTrie2 * trie,UChar32 lead,UTrie2EnumValue * enumValue,UTrie2EnumRange * enumRange,const void * context)731 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
732                             UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
733                             const void *context) {
734     if(!U16_IS_LEAD(lead)) {
735         return;
736     }
737     lead=(lead-0xd7c0)<<10;   /* start code point */
738     enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
739 }
740 
741 /* C++ convenience wrappers ------------------------------------------------- */
742 
743 U_NAMESPACE_BEGIN
744 
previous16()745 uint16_t BackwardUTrie2StringIterator::previous16() {
746     codePointLimit=codePointStart;
747     if(start>=codePointStart) {
748         codePoint=U_SENTINEL;
749         return 0;
750     }
751     uint16_t result;
752     UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
753     return result;
754 }
755 
next16()756 uint16_t ForwardUTrie2StringIterator::next16() {
757     codePointStart=codePointLimit;
758     if(codePointLimit==limit) {
759         codePoint=U_SENTINEL;
760         return 0;
761     }
762     uint16_t result;
763     UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
764     return result;
765 }
766 
767 U_NAMESPACE_END
768