1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 2007-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: bmpset.cpp
11 * encoding: US-ASCII
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2007jan29
16 * created by: Markus W. Scherer
17 */
18
19 #include "unicode/utypes.h"
20 #include "unicode/uniset.h"
21 #include "unicode/utf8.h"
22 #include "unicode/utf16.h"
23 #include "cmemory.h"
24 #include "bmpset.h"
25 #include "uassert.h"
26
27 U_NAMESPACE_BEGIN
28
BMPSet(const int32_t * parentList,int32_t parentListLength)29 BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
30 list(parentList), listLength(parentListLength) {
31 uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
32 uprv_memset(table7FF, 0, sizeof(table7FF));
33 uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
34
35 /*
36 * Set the list indexes for binary searches for
37 * U+0800, U+1000, U+2000, .., U+F000, U+10000.
38 * U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
39 * looked up in the bit tables.
40 * The last pair of indexes is for finding supplementary code points.
41 */
42 list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
43 int32_t i;
44 for(i=1; i<=0x10; ++i) {
45 list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
46 }
47 list4kStarts[0x11]=listLength-1;
48
49 initBits();
50 overrideIllegal();
51 }
52
BMPSet(const BMPSet & otherBMPSet,const int32_t * newParentList,int32_t newParentListLength)53 BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
54 list(newParentList), listLength(newParentListLength) {
55 uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
56 uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
57 uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
58 uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
59 }
60
~BMPSet()61 BMPSet::~BMPSet() {
62 }
63
64 /*
65 * Set bits in a bit rectangle in "vertical" bit organization.
66 * start<limit<=0x800
67 */
set32x64Bits(uint32_t table[64],int32_t start,int32_t limit)68 static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
69 U_ASSERT(start<limit);
70 U_ASSERT(limit<=0x800);
71
72 int32_t lead=start>>6; // Named for UTF-8 2-byte lead byte with upper 5 bits.
73 int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
74
75 // Set one bit indicating an all-one block.
76 uint32_t bits=(uint32_t)1<<lead;
77 if((start+1)==limit) { // Single-character shortcut.
78 table[trail]|=bits;
79 return;
80 }
81
82 int32_t limitLead=limit>>6;
83 int32_t limitTrail=limit&0x3f;
84
85 if(lead==limitLead) {
86 // Partial vertical bit column.
87 while(trail<limitTrail) {
88 table[trail++]|=bits;
89 }
90 } else {
91 // Partial vertical bit column,
92 // followed by a bit rectangle,
93 // followed by another partial vertical bit column.
94 if(trail>0) {
95 do {
96 table[trail++]|=bits;
97 } while(trail<64);
98 ++lead;
99 }
100 if(lead<limitLead) {
101 bits=~((1<<lead)-1);
102 if(limitLead<0x20) {
103 bits&=(1<<limitLead)-1;
104 }
105 for(trail=0; trail<64; ++trail) {
106 table[trail]|=bits;
107 }
108 }
109 // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
110 // In that case, bits=1<<limitLead is undefined but the bits value
111 // is not used because trail<limitTrail is already false.
112 bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
113 for(trail=0; trail<limitTrail; ++trail) {
114 table[trail]|=bits;
115 }
116 }
117 }
118
initBits()119 void BMPSet::initBits() {
120 UChar32 start, limit;
121 int32_t listIndex=0;
122
123 // Set asciiBytes[].
124 do {
125 start=list[listIndex++];
126 if(listIndex<listLength) {
127 limit=list[listIndex++];
128 } else {
129 limit=0x110000;
130 }
131 if(start>=0x80) {
132 break;
133 }
134 do {
135 asciiBytes[start++]=1;
136 } while(start<limit && start<0x80);
137 } while(limit<=0x80);
138
139 // Set table7FF[].
140 while(start<0x800) {
141 set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
142 if(limit>0x800) {
143 start=0x800;
144 break;
145 }
146
147 start=list[listIndex++];
148 if(listIndex<listLength) {
149 limit=list[listIndex++];
150 } else {
151 limit=0x110000;
152 }
153 }
154
155 // Set bmpBlockBits[].
156 int32_t minStart=0x800;
157 while(start<0x10000) {
158 if(limit>0x10000) {
159 limit=0x10000;
160 }
161
162 if(start<minStart) {
163 start=minStart;
164 }
165 if(start<limit) { // Else: Another range entirely in a known mixed-value block.
166 if(start&0x3f) {
167 // Mixed-value block of 64 code points.
168 start>>=6;
169 bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
170 start=(start+1)<<6; // Round up to the next block boundary.
171 minStart=start; // Ignore further ranges in this block.
172 }
173 if(start<limit) {
174 if(start<(limit&~0x3f)) {
175 // Multiple all-ones blocks of 64 code points each.
176 set32x64Bits(bmpBlockBits, start>>6, limit>>6);
177 }
178
179 if(limit&0x3f) {
180 // Mixed-value block of 64 code points.
181 limit>>=6;
182 bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
183 limit=(limit+1)<<6; // Round up to the next block boundary.
184 minStart=limit; // Ignore further ranges in this block.
185 }
186 }
187 }
188
189 if(limit==0x10000) {
190 break;
191 }
192
193 start=list[listIndex++];
194 if(listIndex<listLength) {
195 limit=list[listIndex++];
196 } else {
197 limit=0x110000;
198 }
199 }
200 }
201
202 /*
203 * Override some bits and bytes to the result of contains(FFFD)
204 * for faster validity checking at runtime.
205 * No need to set 0 values where they were reset to 0 in the constructor
206 * and not modified by initBits().
207 * (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
208 * Need to set 0 values for surrogates D800..DFFF.
209 */
overrideIllegal()210 void BMPSet::overrideIllegal() {
211 uint32_t bits, mask;
212 int32_t i;
213
214 if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
215 // contains(FFFD)==TRUE
216 for(i=0x80; i<0xc0; ++i) {
217 asciiBytes[i]=1;
218 }
219
220 bits=3; // Lead bytes 0xC0 and 0xC1.
221 for(i=0; i<64; ++i) {
222 table7FF[i]|=bits;
223 }
224
225 bits=1; // Lead byte 0xE0.
226 for(i=0; i<32; ++i) { // First half of 4k block.
227 bmpBlockBits[i]|=bits;
228 }
229
230 mask=~(0x10001<<0xd); // Lead byte 0xED.
231 bits=1<<0xd;
232 for(i=32; i<64; ++i) { // Second half of 4k block.
233 bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
234 }
235 } else {
236 // contains(FFFD)==FALSE
237 mask=~(0x10001<<0xd); // Lead byte 0xED.
238 for(i=32; i<64; ++i) { // Second half of 4k block.
239 bmpBlockBits[i]&=mask;
240 }
241 }
242 }
243
findCodePoint(UChar32 c,int32_t lo,int32_t hi) const244 int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
245 /* Examples:
246 findCodePoint(c)
247 set list[] c=0 1 3 4 7 8
248 === ============== ===========
249 [] [110000] 0 0 0 0 0 0
250 [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
251 [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
252 [:Any:] [0, 110000] 1 1 1 1 1 1
253 */
254
255 // Return the smallest i such that c < list[i]. Assume
256 // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
257 if (c < list[lo])
258 return lo;
259 // High runner test. c is often after the last range, so an
260 // initial check for this condition pays off.
261 if (lo >= hi || c >= list[hi-1])
262 return hi;
263 // invariant: c >= list[lo]
264 // invariant: c < list[hi]
265 for (;;) {
266 int32_t i = (lo + hi) >> 1;
267 if (i == lo) {
268 break; // Found!
269 } else if (c < list[i]) {
270 hi = i;
271 } else {
272 lo = i;
273 }
274 }
275 return hi;
276 }
277
278 UBool
contains(UChar32 c) const279 BMPSet::contains(UChar32 c) const {
280 if((uint32_t)c<=0x7f) {
281 return (UBool)asciiBytes[c];
282 } else if((uint32_t)c<=0x7ff) {
283 return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
284 } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
285 int lead=c>>12;
286 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
287 if(twoBits<=1) {
288 // All 64 code points with the same bits 15..6
289 // are either in the set or not.
290 return (UBool)twoBits;
291 } else {
292 // Look up the code point in its 4k block of code points.
293 return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
294 }
295 } else if((uint32_t)c<=0x10ffff) {
296 // surrogate or supplementary code point
297 return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
298 } else {
299 // Out-of-range code points get FALSE, consistent with long-standing
300 // behavior of UnicodeSet::contains(c).
301 return FALSE;
302 }
303 }
304
305 /*
306 * Check for sufficient length for trail unit for each surrogate pair.
307 * Handle single surrogates as surrogate code points as usual in ICU.
308 */
309 const UChar *
span(const UChar * s,const UChar * limit,USetSpanCondition spanCondition) const310 BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
311 UChar c, c2;
312
313 if(spanCondition) {
314 // span
315 do {
316 c=*s;
317 if(c<=0x7f) {
318 if(!asciiBytes[c]) {
319 break;
320 }
321 } else if(c<=0x7ff) {
322 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
323 break;
324 }
325 } else if(c<0xd800 || c>=0xe000) {
326 int lead=c>>12;
327 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
328 if(twoBits<=1) {
329 // All 64 code points with the same bits 15..6
330 // are either in the set or not.
331 if(twoBits==0) {
332 break;
333 }
334 } else {
335 // Look up the code point in its 4k block of code points.
336 if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
337 break;
338 }
339 }
340 } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
341 // surrogate code point
342 if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
343 break;
344 }
345 } else {
346 // surrogate pair
347 if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
348 break;
349 }
350 ++s;
351 }
352 } while(++s<limit);
353 } else {
354 // span not
355 do {
356 c=*s;
357 if(c<=0x7f) {
358 if(asciiBytes[c]) {
359 break;
360 }
361 } else if(c<=0x7ff) {
362 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
363 break;
364 }
365 } else if(c<0xd800 || c>=0xe000) {
366 int lead=c>>12;
367 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
368 if(twoBits<=1) {
369 // All 64 code points with the same bits 15..6
370 // are either in the set or not.
371 if(twoBits!=0) {
372 break;
373 }
374 } else {
375 // Look up the code point in its 4k block of code points.
376 if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
377 break;
378 }
379 }
380 } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
381 // surrogate code point
382 if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
383 break;
384 }
385 } else {
386 // surrogate pair
387 if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
388 break;
389 }
390 ++s;
391 }
392 } while(++s<limit);
393 }
394 return s;
395 }
396
397 /* Symmetrical with span(). */
398 const UChar *
spanBack(const UChar * s,const UChar * limit,USetSpanCondition spanCondition) const399 BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
400 UChar c, c2;
401
402 if(spanCondition) {
403 // span
404 for(;;) {
405 c=*(--limit);
406 if(c<=0x7f) {
407 if(!asciiBytes[c]) {
408 break;
409 }
410 } else if(c<=0x7ff) {
411 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
412 break;
413 }
414 } else if(c<0xd800 || c>=0xe000) {
415 int lead=c>>12;
416 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
417 if(twoBits<=1) {
418 // All 64 code points with the same bits 15..6
419 // are either in the set or not.
420 if(twoBits==0) {
421 break;
422 }
423 } else {
424 // Look up the code point in its 4k block of code points.
425 if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
426 break;
427 }
428 }
429 } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
430 // surrogate code point
431 if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
432 break;
433 }
434 } else {
435 // surrogate pair
436 if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
437 break;
438 }
439 --limit;
440 }
441 if(s==limit) {
442 return s;
443 }
444 }
445 } else {
446 // span not
447 for(;;) {
448 c=*(--limit);
449 if(c<=0x7f) {
450 if(asciiBytes[c]) {
451 break;
452 }
453 } else if(c<=0x7ff) {
454 if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
455 break;
456 }
457 } else if(c<0xd800 || c>=0xe000) {
458 int lead=c>>12;
459 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
460 if(twoBits<=1) {
461 // All 64 code points with the same bits 15..6
462 // are either in the set or not.
463 if(twoBits!=0) {
464 break;
465 }
466 } else {
467 // Look up the code point in its 4k block of code points.
468 if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
469 break;
470 }
471 }
472 } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
473 // surrogate code point
474 if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
475 break;
476 }
477 } else {
478 // surrogate pair
479 if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
480 break;
481 }
482 --limit;
483 }
484 if(s==limit) {
485 return s;
486 }
487 }
488 }
489 return limit+1;
490 }
491
492 /*
493 * Precheck for sufficient trail bytes at end of string only once per span.
494 * Check validity.
495 */
496 const uint8_t *
spanUTF8(const uint8_t * s,int32_t length,USetSpanCondition spanCondition) const497 BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
498 const uint8_t *limit=s+length;
499 uint8_t b=*s;
500 if((int8_t)b>=0) {
501 // Initial all-ASCII span.
502 if(spanCondition) {
503 do {
504 if(!asciiBytes[b] || ++s==limit) {
505 return s;
506 }
507 b=*s;
508 } while((int8_t)b>=0);
509 } else {
510 do {
511 if(asciiBytes[b] || ++s==limit) {
512 return s;
513 }
514 b=*s;
515 } while((int8_t)b>=0);
516 }
517 length=(int32_t)(limit-s);
518 }
519
520 if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
521 spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
522 }
523
524 const uint8_t *limit0=limit;
525
526 /*
527 * Make sure that the last 1/2/3/4-byte sequence before limit is complete
528 * or runs into a lead byte.
529 * In the span loop compare s with limit only once
530 * per multi-byte character.
531 *
532 * Give a trailing illegal sequence the same value as the result of contains(FFFD),
533 * including it if that is part of the span, otherwise set limit0 to before
534 * the truncated sequence.
535 */
536 b=*(limit-1);
537 if((int8_t)b<0) {
538 // b>=0x80: lead or trail byte
539 if(b<0xc0) {
540 // single trail byte, check for preceding 3- or 4-byte lead byte
541 if(length>=2 && (b=*(limit-2))>=0xe0) {
542 limit-=2;
543 if(asciiBytes[0x80]!=spanCondition) {
544 limit0=limit;
545 }
546 } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
547 // 4-byte lead byte with only two trail bytes
548 limit-=3;
549 if(asciiBytes[0x80]!=spanCondition) {
550 limit0=limit;
551 }
552 }
553 } else {
554 // lead byte with no trail bytes
555 --limit;
556 if(asciiBytes[0x80]!=spanCondition) {
557 limit0=limit;
558 }
559 }
560 }
561
562 uint8_t t1, t2, t3;
563
564 while(s<limit) {
565 b=*s;
566 if(b<0xc0) {
567 // ASCII; or trail bytes with the result of contains(FFFD).
568 if(spanCondition) {
569 do {
570 if(!asciiBytes[b]) {
571 return s;
572 } else if(++s==limit) {
573 return limit0;
574 }
575 b=*s;
576 } while(b<0xc0);
577 } else {
578 do {
579 if(asciiBytes[b]) {
580 return s;
581 } else if(++s==limit) {
582 return limit0;
583 }
584 b=*s;
585 } while(b<0xc0);
586 }
587 }
588 ++s; // Advance past the lead byte.
589 if(b>=0xe0) {
590 if(b<0xf0) {
591 if( /* handle U+0000..U+FFFF inline */
592 (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
593 (t2=(uint8_t)(s[1]-0x80)) <= 0x3f
594 ) {
595 b&=0xf;
596 uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
597 if(twoBits<=1) {
598 // All 64 code points with this lead byte and middle trail byte
599 // are either in the set or not.
600 if(twoBits!=(uint32_t)spanCondition) {
601 return s-1;
602 }
603 } else {
604 // Look up the code point in its 4k block of code points.
605 UChar32 c=(b<<12)|(t1<<6)|t2;
606 if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
607 return s-1;
608 }
609 }
610 s+=2;
611 continue;
612 }
613 } else if( /* handle U+10000..U+10FFFF inline */
614 (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
615 (t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
616 (t3=(uint8_t)(s[2]-0x80)) <= 0x3f
617 ) {
618 // Give an illegal sequence the same value as the result of contains(FFFD).
619 UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
620 if( ( (0x10000<=c && c<=0x10ffff) ?
621 containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
622 asciiBytes[0x80]
623 ) != spanCondition
624 ) {
625 return s-1;
626 }
627 s+=3;
628 continue;
629 }
630 } else /* 0xc0<=b<0xe0 */ {
631 if( /* handle U+0000..U+07FF inline */
632 (t1=(uint8_t)(*s-0x80)) <= 0x3f
633 ) {
634 if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
635 return s-1;
636 }
637 ++s;
638 continue;
639 }
640 }
641
642 // Give an illegal sequence the same value as the result of contains(FFFD).
643 // Handle each byte of an illegal sequence separately to simplify the code;
644 // no need to optimize error handling.
645 if(asciiBytes[0x80]!=spanCondition) {
646 return s-1;
647 }
648 }
649
650 return limit0;
651 }
652
653 /*
654 * While going backwards through UTF-8 optimize only for ASCII.
655 * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
656 * possible to tell from the last byte in a multi-byte sequence how many
657 * preceding bytes there should be. Therefore, going backwards through UTF-8
658 * is much harder than going forward.
659 */
660 int32_t
spanBackUTF8(const uint8_t * s,int32_t length,USetSpanCondition spanCondition) const661 BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
662 if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
663 spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
664 }
665
666 uint8_t b;
667
668 do {
669 b=s[--length];
670 if((int8_t)b>=0) {
671 // ASCII sub-span
672 if(spanCondition) {
673 do {
674 if(!asciiBytes[b]) {
675 return length+1;
676 } else if(length==0) {
677 return 0;
678 }
679 b=s[--length];
680 } while((int8_t)b>=0);
681 } else {
682 do {
683 if(asciiBytes[b]) {
684 return length+1;
685 } else if(length==0) {
686 return 0;
687 }
688 b=s[--length];
689 } while((int8_t)b>=0);
690 }
691 }
692
693 int32_t prev=length;
694 UChar32 c;
695 // trail byte: collect a multi-byte character
696 // (or lead byte in last-trail position)
697 c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
698 // c is a valid code point, not ASCII, not a surrogate
699 if(c<=0x7ff) {
700 if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
701 return prev+1;
702 }
703 } else if(c<=0xffff) {
704 int lead=c>>12;
705 uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
706 if(twoBits<=1) {
707 // All 64 code points with the same bits 15..6
708 // are either in the set or not.
709 if(twoBits!=(uint32_t)spanCondition) {
710 return prev+1;
711 }
712 } else {
713 // Look up the code point in its 4k block of code points.
714 if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
715 return prev+1;
716 }
717 }
718 } else {
719 if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
720 return prev+1;
721 }
722 }
723 } while(length>0);
724 return 0;
725 }
726
727 U_NAMESPACE_END
728