1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 2007-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 * file name: bmpset.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2007jan29
14 * created by: Markus W. Scherer
15 */
16
17 #include "unicode/utypes.h"
18 #include "unicode/uniset.h"
19 #include "unicode/utf8.h"
20 #include "unicode/utf16.h"
21 #include "cmemory.h"
22 #include "bmpset.h"
23 #include "uassert.h"
24
25 U_NAMESPACE_BEGIN
26
BMPSet(const int32_t * parentList,int32_t parentListLength)27 BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
28 list(parentList), listLength(parentListLength) {
29 uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
30 uprv_memset(table7FF, 0, sizeof(table7FF));
31 uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
32
33 /*
34 * Set the list indexes for binary searches for
35 * U+0800, U+1000, U+2000, .., U+F000, U+10000.
36 * U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
37 * looked up in the bit tables.
38 * The last pair of indexes is for finding supplementary code points.
39 */
40 list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
41 int32_t i;
42 for(i=1; i<=0x10; ++i) {
43 list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
44 }
45 list4kStarts[0x11]=listLength-1;
46
47 initBits();
48 overrideIllegal();
49 }
50
BMPSet(const BMPSet & otherBMPSet,const int32_t * newParentList,int32_t newParentListLength)51 BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
52 list(newParentList), listLength(newParentListLength) {
53 uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
54 uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
55 uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
56 uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
57 }
58
~BMPSet()59 BMPSet::~BMPSet() {
60 }
61
62 /*
63 * Set bits in a bit rectangle in "vertical" bit organization.
64 * start<limit<=0x800
65 */
set32x64Bits(uint32_t table[64],int32_t start,int32_t limit)66 static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
67 U_ASSERT(start<limit);
68 U_ASSERT(limit<=0x800);
69
70 int32_t lead=start>>6; // Named for UTF-8 2-byte lead byte with upper 5 bits.
71 int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
72
73 // Set one bit indicating an all-one block.
74 uint32_t bits=(uint32_t)1<<lead;
75 if((start+1)==limit) { // Single-character shortcut.
76 table[trail]|=bits;
77 return;
78 }
79
80 int32_t limitLead=limit>>6;
81 int32_t limitTrail=limit&0x3f;
82
83 if(lead==limitLead) {
84 // Partial vertical bit column.
85 while(trail<limitTrail) {
86 table[trail++]|=bits;
87 }
88 } else {
89 // Partial vertical bit column,
90 // followed by a bit rectangle,
91 // followed by another partial vertical bit column.
92 if(trail>0) {
93 do {
94 table[trail++]|=bits;
95 } while(trail<64);
96 ++lead;
97 }
98 if(lead<limitLead) {
99 bits=~((1<<lead)-1);
100 if(limitLead<0x20) {
101 bits&=(1<<limitLead)-1;
102 }
103 for(trail=0; trail<64; ++trail) {
104 table[trail]|=bits;
105 }
106 }
107 // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
108 // In that case, bits=1<<limitLead is undefined but the bits value
109 // is not used because trail<limitTrail is already false.
110 bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
111 for(trail=0; trail<limitTrail; ++trail) {
112 table[trail]|=bits;
113 }
114 }
115 }
116
initBits()117 void BMPSet::initBits() {
118 UChar32 start, limit;
119 int32_t listIndex=0;
120
121 // Set asciiBytes[].
122 do {
123 start=list[listIndex++];
124 if(listIndex<listLength) {
125 limit=list[listIndex++];
126 } else {
127 limit=0x110000;
128 }
129 if(start>=0x80) {
130 break;
131 }
132 do {
133 asciiBytes[start++]=1;
134 } while(start<limit && start<0x80);
135 } while(limit<=0x80);
136
137 // Set table7FF[].
138 while(start<0x800) {
139 set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
140 if(limit>0x800) {
141 start=0x800;
142 break;
143 }
144
145 start=list[listIndex++];
146 if(listIndex<listLength) {
147 limit=list[listIndex++];
148 } else {
149 limit=0x110000;
150 }
151 }
152
153 // Set bmpBlockBits[].
154 int32_t minStart=0x800;
155 while(start<0x10000) {
156 if(limit>0x10000) {
157 limit=0x10000;
158 }
159
160 if(start<minStart) {
161 start=minStart;
162 }
163 if(start<limit) { // Else: Another range entirely in a known mixed-value block.
164 if(start&0x3f) {
165 // Mixed-value block of 64 code points.
166 start>>=6;
167 bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
168 start=(start+1)<<6; // Round up to the next block boundary.
169 minStart=start; // Ignore further ranges in this block.
170 }
171 if(start<limit) {
172 if(start<(limit&~0x3f)) {
173 // Multiple all-ones blocks of 64 code points each.
174 set32x64Bits(bmpBlockBits, start>>6, limit>>6);
175 }
176
177 if(limit&0x3f) {
178 // Mixed-value block of 64 code points.
179 limit>>=6;
180 bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
181 limit=(limit+1)<<6; // Round up to the next block boundary.
182 minStart=limit; // Ignore further ranges in this block.
183 }
184 }
185 }
186
187 if(limit==0x10000) {
188 break;
189 }
190
191 start=list[listIndex++];
192 if(listIndex<listLength) {
193 limit=list[listIndex++];
194 } else {
195 limit=0x110000;
196 }
197 }
198 }
199
200 /*
201 * Override some bits and bytes to the result of contains(FFFD)
202 * for faster validity checking at runtime.
203 * No need to set 0 values where they were reset to 0 in the constructor
204 * and not modified by initBits().
205 * (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
206 * Need to set 0 values for surrogates D800..DFFF.
207 */
overrideIllegal()208 void BMPSet::overrideIllegal() {
209 uint32_t bits, mask;
210 int32_t i;
211
212 if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
213 // contains(FFFD)==TRUE
214 for(i=0x80; i<0xc0; ++i) {
215 asciiBytes[i]=1;
216 }
217
218 bits=3; // Lead bytes 0xC0 and 0xC1.
219 for(i=0; i<64; ++i) {
220 table7FF[i]|=bits;
221 }
222
223 bits=1; // Lead byte 0xE0.
224 for(i=0; i<32; ++i) { // First half of 4k block.
225 bmpBlockBits[i]|=bits;
226 }
227
228 mask=~(0x10001<<0xd); // Lead byte 0xED.
229 bits=1<<0xd;
230 for(i=32; i<64; ++i) { // Second half of 4k block.
231 bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
232 }
233 } else {
234 // contains(FFFD)==FALSE
235 mask=~(0x10001<<0xd); // Lead byte 0xED.
236 for(i=32; i<64; ++i) { // Second half of 4k block.
237 bmpBlockBits[i]&=mask;
238 }
239 }
240 }
241
findCodePoint(UChar32 c,int32_t lo,int32_t hi) const242 int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
243 /* Examples:
244 findCodePoint(c)
245 set list[] c=0 1 3 4 7 8
246 === ============== ===========
247 [] [110000] 0 0 0 0 0 0
248 [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
249 [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
250 [:Any:] [0, 110000] 1 1 1 1 1 1
251 */
252
253 // Return the smallest i such that c < list[i]. Assume
254 // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
255 if (c < list[lo])
256 return lo;
257 // High runner test. c is often after the last range, so an
258 // initial check for this condition pays off.
259 if (lo >= hi || c >= list[hi-1])
260 return hi;
261 // invariant: c >= list[lo]
262 // invariant: c < list[hi]
263 for (;;) {
264 int32_t i = (lo + hi) >> 1;
265 if (i == lo) {
266 break; // Found!
267 } else if (c < list[i]) {
268 hi = i;
269 } else {
270 lo = i;
271 }
272 }
273 return hi;
274 }
275
276 UBool
contains(UChar32 c) const277 BMPSet::contains(UChar32 c) const {
278 if((uint32_t)c<=0x7f) {
279 return (UBool)asciiBytes[c];
280 } else if((uint32_t)c<=0x7ff) {
281 return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
282 } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
283 int lead=c>>12;
284 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
285 if(twoBits<=1) {
286 // All 64 code points with the same bits 15..6
287 // are either in the set or not.
288 return (UBool)twoBits;
289 } else {
290 // Look up the code point in its 4k block of code points.
291 return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
292 }
293 } else if((uint32_t)c<=0x10ffff) {
294 // surrogate or supplementary code point
295 return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
296 } else {
297 // Out-of-range code points get FALSE, consistent with long-standing
298 // behavior of UnicodeSet::contains(c).
299 return FALSE;
300 }
301 }
302
303 /*
304 * Check for sufficient length for trail unit for each surrogate pair.
305 * Handle single surrogates as surrogate code points as usual in ICU.
306 */
307 const UChar *
span(const UChar * s,const UChar * limit,USetSpanCondition spanCondition) const308 BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
309 UChar c, c2;
310
311 if(spanCondition) {
312 // span
313 do {
314 c=*s;
315 if(c<=0x7f) {
316 if(!asciiBytes[c]) {
317 break;
318 }
319 } else if(c<=0x7ff) {
320 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
321 break;
322 }
323 } else if(c<0xd800 || c>=0xe000) {
324 int lead=c>>12;
325 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
326 if(twoBits<=1) {
327 // All 64 code points with the same bits 15..6
328 // are either in the set or not.
329 if(twoBits==0) {
330 break;
331 }
332 } else {
333 // Look up the code point in its 4k block of code points.
334 if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
335 break;
336 }
337 }
338 } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
339 // surrogate code point
340 if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
341 break;
342 }
343 } else {
344 // surrogate pair
345 if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
346 break;
347 }
348 ++s;
349 }
350 } while(++s<limit);
351 } else {
352 // span not
353 do {
354 c=*s;
355 if(c<=0x7f) {
356 if(asciiBytes[c]) {
357 break;
358 }
359 } else if(c<=0x7ff) {
360 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
361 break;
362 }
363 } else if(c<0xd800 || c>=0xe000) {
364 int lead=c>>12;
365 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
366 if(twoBits<=1) {
367 // All 64 code points with the same bits 15..6
368 // are either in the set or not.
369 if(twoBits!=0) {
370 break;
371 }
372 } else {
373 // Look up the code point in its 4k block of code points.
374 if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
375 break;
376 }
377 }
378 } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
379 // surrogate code point
380 if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
381 break;
382 }
383 } else {
384 // surrogate pair
385 if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
386 break;
387 }
388 ++s;
389 }
390 } while(++s<limit);
391 }
392 return s;
393 }
394
395 /* Symmetrical with span(). */
396 const UChar *
spanBack(const UChar * s,const UChar * limit,USetSpanCondition spanCondition) const397 BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
398 UChar c, c2;
399
400 if(spanCondition) {
401 // span
402 for(;;) {
403 c=*(--limit);
404 if(c<=0x7f) {
405 if(!asciiBytes[c]) {
406 break;
407 }
408 } else if(c<=0x7ff) {
409 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
410 break;
411 }
412 } else if(c<0xd800 || c>=0xe000) {
413 int lead=c>>12;
414 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
415 if(twoBits<=1) {
416 // All 64 code points with the same bits 15..6
417 // are either in the set or not.
418 if(twoBits==0) {
419 break;
420 }
421 } else {
422 // Look up the code point in its 4k block of code points.
423 if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
424 break;
425 }
426 }
427 } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
428 // surrogate code point
429 if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
430 break;
431 }
432 } else {
433 // surrogate pair
434 if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
435 break;
436 }
437 --limit;
438 }
439 if(s==limit) {
440 return s;
441 }
442 }
443 } else {
444 // span not
445 for(;;) {
446 c=*(--limit);
447 if(c<=0x7f) {
448 if(asciiBytes[c]) {
449 break;
450 }
451 } else if(c<=0x7ff) {
452 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
453 break;
454 }
455 } else if(c<0xd800 || c>=0xe000) {
456 int lead=c>>12;
457 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
458 if(twoBits<=1) {
459 // All 64 code points with the same bits 15..6
460 // are either in the set or not.
461 if(twoBits!=0) {
462 break;
463 }
464 } else {
465 // Look up the code point in its 4k block of code points.
466 if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
467 break;
468 }
469 }
470 } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
471 // surrogate code point
472 if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
473 break;
474 }
475 } else {
476 // surrogate pair
477 if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
478 break;
479 }
480 --limit;
481 }
482 if(s==limit) {
483 return s;
484 }
485 }
486 }
487 return limit+1;
488 }
489
490 /*
491 * Precheck for sufficient trail bytes at end of string only once per span.
492 * Check validity.
493 */
494 const uint8_t *
spanUTF8(const uint8_t * s,int32_t length,USetSpanCondition spanCondition) const495 BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
496 const uint8_t *limit=s+length;
497 uint8_t b=*s;
498 if((int8_t)b>=0) {
499 // Initial all-ASCII span.
500 if(spanCondition) {
501 do {
502 if(!asciiBytes[b] || ++s==limit) {
503 return s;
504 }
505 b=*s;
506 } while((int8_t)b>=0);
507 } else {
508 do {
509 if(asciiBytes[b] || ++s==limit) {
510 return s;
511 }
512 b=*s;
513 } while((int8_t)b>=0);
514 }
515 length=(int32_t)(limit-s);
516 }
517
518 if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
519 spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
520 }
521
522 const uint8_t *limit0=limit;
523
524 /*
525 * Make sure that the last 1/2/3/4-byte sequence before limit is complete
526 * or runs into a lead byte.
527 * In the span loop compare s with limit only once
528 * per multi-byte character.
529 *
530 * Give a trailing illegal sequence the same value as the result of contains(FFFD),
531 * including it if that is part of the span, otherwise set limit0 to before
532 * the truncated sequence.
533 */
534 b=*(limit-1);
535 if((int8_t)b<0) {
536 // b>=0x80: lead or trail byte
537 if(b<0xc0) {
538 // single trail byte, check for preceding 3- or 4-byte lead byte
539 if(length>=2 && (b=*(limit-2))>=0xe0) {
540 limit-=2;
541 if(asciiBytes[0x80]!=spanCondition) {
542 limit0=limit;
543 }
544 } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
545 // 4-byte lead byte with only two trail bytes
546 limit-=3;
547 if(asciiBytes[0x80]!=spanCondition) {
548 limit0=limit;
549 }
550 }
551 } else {
552 // lead byte with no trail bytes
553 --limit;
554 if(asciiBytes[0x80]!=spanCondition) {
555 limit0=limit;
556 }
557 }
558 }
559
560 uint8_t t1, t2, t3;
561
562 while(s<limit) {
563 b=*s;
564 if(b<0xc0) {
565 // ASCII; or trail bytes with the result of contains(FFFD).
566 if(spanCondition) {
567 do {
568 if(!asciiBytes[b]) {
569 return s;
570 } else if(++s==limit) {
571 return limit0;
572 }
573 b=*s;
574 } while(b<0xc0);
575 } else {
576 do {
577 if(asciiBytes[b]) {
578 return s;
579 } else if(++s==limit) {
580 return limit0;
581 }
582 b=*s;
583 } while(b<0xc0);
584 }
585 }
586 ++s; // Advance past the lead byte.
587 if(b>=0xe0) {
588 if(b<0xf0) {
589 if( /* handle U+0000..U+FFFF inline */
590 (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
591 (t2=(uint8_t)(s[1]-0x80)) <= 0x3f
592 ) {
593 b&=0xf;
594 uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
595 if(twoBits<=1) {
596 // All 64 code points with this lead byte and middle trail byte
597 // are either in the set or not.
598 if(twoBits!=(uint32_t)spanCondition) {
599 return s-1;
600 }
601 } else {
602 // Look up the code point in its 4k block of code points.
603 UChar32 c=(b<<12)|(t1<<6)|t2;
604 if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
605 return s-1;
606 }
607 }
608 s+=2;
609 continue;
610 }
611 } else if( /* handle U+10000..U+10FFFF inline */
612 (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
613 (t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
614 (t3=(uint8_t)(s[2]-0x80)) <= 0x3f
615 ) {
616 // Give an illegal sequence the same value as the result of contains(FFFD).
617 UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
618 if( ( (0x10000<=c && c<=0x10ffff) ?
619 containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
620 asciiBytes[0x80]
621 ) != spanCondition
622 ) {
623 return s-1;
624 }
625 s+=3;
626 continue;
627 }
628 } else /* 0xc0<=b<0xe0 */ {
629 if( /* handle U+0000..U+07FF inline */
630 (t1=(uint8_t)(*s-0x80)) <= 0x3f
631 ) {
632 if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
633 return s-1;
634 }
635 ++s;
636 continue;
637 }
638 }
639
640 // Give an illegal sequence the same value as the result of contains(FFFD).
641 // Handle each byte of an illegal sequence separately to simplify the code;
642 // no need to optimize error handling.
643 if(asciiBytes[0x80]!=spanCondition) {
644 return s-1;
645 }
646 }
647
648 return limit0;
649 }
650
651 /*
652 * While going backwards through UTF-8 optimize only for ASCII.
653 * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
654 * possible to tell from the last byte in a multi-byte sequence how many
655 * preceding bytes there should be. Therefore, going backwards through UTF-8
656 * is much harder than going forward.
657 */
658 int32_t
spanBackUTF8(const uint8_t * s,int32_t length,USetSpanCondition spanCondition) const659 BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
660 if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
661 spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
662 }
663
664 uint8_t b;
665
666 do {
667 b=s[--length];
668 if((int8_t)b>=0) {
669 // ASCII sub-span
670 if(spanCondition) {
671 do {
672 if(!asciiBytes[b]) {
673 return length+1;
674 } else if(length==0) {
675 return 0;
676 }
677 b=s[--length];
678 } while((int8_t)b>=0);
679 } else {
680 do {
681 if(asciiBytes[b]) {
682 return length+1;
683 } else if(length==0) {
684 return 0;
685 }
686 b=s[--length];
687 } while((int8_t)b>=0);
688 }
689 }
690
691 int32_t prev=length;
692 UChar32 c;
693 // trail byte: collect a multi-byte character
694 // (or lead byte in last-trail position)
695 c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
696 // c is a valid code point, not ASCII, not a surrogate
697 if(c<=0x7ff) {
698 if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
699 return prev+1;
700 }
701 } else if(c<=0xffff) {
702 int lead=c>>12;
703 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
704 if(twoBits<=1) {
705 // All 64 code points with the same bits 15..6
706 // are either in the set or not.
707 if(twoBits!=(uint32_t)spanCondition) {
708 return prev+1;
709 }
710 } else {
711 // Look up the code point in its 4k block of code points.
712 if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
713 return prev+1;
714 }
715 }
716 } else {
717 if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
718 return prev+1;
719 }
720 }
721 } while(length>0);
722 return 0;
723 }
724
725 U_NAMESPACE_END
726