1# This set of tests is for UTF-8 support and Unicode property support, with 2# relevance only for the 8-bit library. 3 4# The next 5 patterns have UTF-8 errors 5 6/[�]/utf 7Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80 8 9/�/utf 10Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end 11 12/���xxx/utf 13Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80 14 15/Â��������/utf 16Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set 17 18/Â��������/match_invalid_utf 19Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set 20 21# Now test subjects 22 23/badutf/utf 24\= Expect UTF-8 errors 25 X\xdf 26Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1 27 XX\xef 28Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 29 XXX\xef\x80 30Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3 31 X\xf7 32Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1 33 XX\xf7\x80 34Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 35 XXX\xf7\x80\x80 36Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3 37 \xfb 38Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0 39 \xfb\x80 40Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 41 \xfb\x80\x80 42Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 43 \xfb\x80\x80\x80 44Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 45 \xfd 46Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0 47 \xfd\x80 48Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0 49 \xfd\x80\x80 50Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 51 \xfd\x80\x80\x80 52Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 53 \xfd\x80\x80\x80\x80 54Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 55 \xdf\x7f 56Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0 57 \xef\x7f\x80 58Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0 59 \xef\x80\x7f 60Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0 61 \xf7\x7f\x80\x80 62Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0 63 \xf7\x80\x7f\x80 64Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0 65 \xf7\x80\x80\x7f 66Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0 67 \xfb\x7f\x80\x80\x80 68Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0 69 \xfb\x80\x7f\x80\x80 70Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0 71 \xfb\x80\x80\x7f\x80 72Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0 73 \xfb\x80\x80\x80\x7f 74Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0 75 \xfd\x7f\x80\x80\x80\x80 76Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0 77 \xfd\x80\x7f\x80\x80\x80 78Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0 79 \xfd\x80\x80\x7f\x80\x80 80Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0 81 \xfd\x80\x80\x80\x7f\x80 82Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0 83 \xfd\x80\x80\x80\x80\x7f 84Failed: error -12: UTF-8 error: byte 6 top bits not 0x80 at offset 0 85 \xed\xa0\x80 86Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 87 \xc0\x8f 88Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 0 89 \xe0\x80\x8f 90Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 0 91 \xf0\x80\x80\x8f 92Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0 93 \xf8\x80\x80\x80\x8f 94Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0 95 \xfc\x80\x80\x80\x80\x8f 96Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0 97 \x80 98Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0 99 \xfe 100Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 101 \xff 102Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 103 104/badutf/utf 105\= Expect UTF-8 errors 106 XX\xfb\x80\x80\x80\x80 107Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2 108 XX\xfd\x80\x80\x80\x80\x80 109Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 2 110 XX\xf7\xbf\xbf\xbf 111Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2 112 113/shortutf/utf 114\= Expect UTF-8 errors 115 XX\xdf\=ph 116Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 117 XX\xef\=ph 118Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 119 XX\xef\x80\=ph 120Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 121 \xf7\=ph 122Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 123 \xf7\x80\=ph 124Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 125 \xf7\x80\x80\=ph 126Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 127 \xfb\=ph 128Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0 129 \xfb\x80\=ph 130Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 131 \xfb\x80\x80\=ph 132Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 133 \xfb\x80\x80\x80\=ph 134Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 135 \xfd\=ph 136Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0 137 \xfd\x80\=ph 138Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0 139 \xfd\x80\x80\=ph 140Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 141 \xfd\x80\x80\x80\=ph 142Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 143 \xfd\x80\x80\x80\x80\=ph 144Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0 145 146/anything/utf 147\= Expect UTF-8 errors 148 X\xc0\x80 149Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1 150 XX\xc1\x8f 151Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 2 152 XXX\xe0\x9f\x80 153Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 3 154 \xf0\x8f\x80\x80 155Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0 156 \xf8\x87\x80\x80\x80 157Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0 158 \xfc\x83\x80\x80\x80\x80 159Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0 160 \xfe\x80\x80\x80\x80\x80 161Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 162 \xff\x80\x80\x80\x80\x80 163Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0 164 \xf8\x88\x80\x80\x80 165Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 166 \xf9\x87\x80\x80\x80 167Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 168 \xfc\x84\x80\x80\x80\x80 169Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 170 \xfd\x83\x80\x80\x80\x80 171Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 172\= Expect no match 173 \xc3\x8f 174No match 175 \xe0\xaf\x80 176No match 177 \xe1\x80\x80 178No match 179 \xf0\x9f\x80\x80 180No match 181 \xf1\x8f\x80\x80 182No match 183 \xf8\x88\x80\x80\x80\=no_utf_check 184No match 185 \xf9\x87\x80\x80\x80\=no_utf_check 186No match 187 \xfc\x84\x80\x80\x80\x80\=no_utf_check 188No match 189 \xfd\x83\x80\x80\x80\x80\=no_utf_check 190No match 191 192# Similar tests with offsets 193 194/badutf/utf 195\= Expect UTF-8 errors 196 X\xdfabcd 197Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 198 X\xdfabcd\=offset=1 199Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 200\= Expect no match 201 X\xdfabcd\=offset=2 202No match 203 204/(?<=x)badutf/utf 205\= Expect UTF-8 errors 206 X\xdfabcd 207Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 208 X\xdfabcd\=offset=1 209Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 210 X\xdfabcd\=offset=2 211Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 212 X\xdfabcd\xdf\=offset=3 213Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6 214\= Expect no match 215 X\xdfabcd\=offset=3 216No match 217 218/(?<=xx)badutf/utf 219\= Expect UTF-8 errors 220 X\xdfabcd 221Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 222 X\xdfabcd\=offset=1 223Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 224 X\xdfabcd\=offset=2 225Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 226 X\xdfabcd\=offset=3 227Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 228 229/(?<=xxxx)badutf/utf 230\= Expect UTF-8 errors 231 X\xdfabcd 232Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 233 X\xdfabcd\=offset=1 234Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 235 X\xdfabcd\=offset=2 236Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 237 X\xdfabcd\=offset=3 238Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1 239 X\xdfabc\xdf\=offset=6 240Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5 241 X\xdfabc\xdf\=offset=7 242Failed: error -33: bad offset value 243\= Expect no match 244 X\xdfabcd\=offset=6 245No match 246 247/\x{100}/IB,utf 248------------------------------------------------------------------ 249 Bra 250 \x{100} 251 Ket 252 End 253------------------------------------------------------------------ 254Capture group count = 0 255Options: utf 256First code unit = \xc4 257Last code unit = \x80 258Subject length lower bound = 1 259 260/\x{1000}/IB,utf 261------------------------------------------------------------------ 262 Bra 263 \x{1000} 264 Ket 265 End 266------------------------------------------------------------------ 267Capture group count = 0 268Options: utf 269First code unit = \xe1 270Last code unit = \x80 271Subject length lower bound = 1 272 273/\x{10000}/IB,utf 274------------------------------------------------------------------ 275 Bra 276 \x{10000} 277 Ket 278 End 279------------------------------------------------------------------ 280Capture group count = 0 281Options: utf 282First code unit = \xf0 283Last code unit = \x80 284Subject length lower bound = 1 285 286/\x{100000}/IB,utf 287------------------------------------------------------------------ 288 Bra 289 \x{100000} 290 Ket 291 End 292------------------------------------------------------------------ 293Capture group count = 0 294Options: utf 295First code unit = \xf4 296Last code unit = \x80 297Subject length lower bound = 1 298 299/\x{10ffff}/IB,utf 300------------------------------------------------------------------ 301 Bra 302 \x{10ffff} 303 Ket 304 End 305------------------------------------------------------------------ 306Capture group count = 0 307Options: utf 308First code unit = \xf4 309Last code unit = \xbf 310Subject length lower bound = 1 311 312/[\x{ff}]/IB,utf 313------------------------------------------------------------------ 314 Bra 315 \x{ff} 316 Ket 317 End 318------------------------------------------------------------------ 319Capture group count = 0 320Options: utf 321First code unit = \xc3 322Last code unit = \xbf 323Subject length lower bound = 1 324 325/[\x{100}]/IB,utf 326------------------------------------------------------------------ 327 Bra 328 \x{100} 329 Ket 330 End 331------------------------------------------------------------------ 332Capture group count = 0 333Options: utf 334First code unit = \xc4 335Last code unit = \x80 336Subject length lower bound = 1 337 338/\x80/IB,utf 339------------------------------------------------------------------ 340 Bra 341 \x{80} 342 Ket 343 End 344------------------------------------------------------------------ 345Capture group count = 0 346Options: utf 347First code unit = \xc2 348Last code unit = \x80 349Subject length lower bound = 1 350 351/\xff/IB,utf 352------------------------------------------------------------------ 353 Bra 354 \x{ff} 355 Ket 356 End 357------------------------------------------------------------------ 358Capture group count = 0 359Options: utf 360First code unit = \xc3 361Last code unit = \xbf 362Subject length lower bound = 1 363 364/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf 365------------------------------------------------------------------ 366 Bra 367 \x{d55c}\x{ad6d}\x{c5b4} 368 Ket 369 End 370------------------------------------------------------------------ 371Capture group count = 0 372Options: utf 373First code unit = \xed 374Last code unit = \xb4 375Subject length lower bound = 3 376 \x{D55c}\x{ad6d}\x{C5B4} 377 0: \x{d55c}\x{ad6d}\x{c5b4} 378 379/\x{65e5}\x{672c}\x{8a9e}/IB,utf 380------------------------------------------------------------------ 381 Bra 382 \x{65e5}\x{672c}\x{8a9e} 383 Ket 384 End 385------------------------------------------------------------------ 386Capture group count = 0 387Options: utf 388First code unit = \xe6 389Last code unit = \x9e 390Subject length lower bound = 3 391 \x{65e5}\x{672c}\x{8a9e} 392 0: \x{65e5}\x{672c}\x{8a9e} 393 394/\x{80}/IB,utf 395------------------------------------------------------------------ 396 Bra 397 \x{80} 398 Ket 399 End 400------------------------------------------------------------------ 401Capture group count = 0 402Options: utf 403First code unit = \xc2 404Last code unit = \x80 405Subject length lower bound = 1 406 407/\x{084}/IB,utf 408------------------------------------------------------------------ 409 Bra 410 \x{84} 411 Ket 412 End 413------------------------------------------------------------------ 414Capture group count = 0 415Options: utf 416First code unit = \xc2 417Last code unit = \x84 418Subject length lower bound = 1 419 420/\x{104}/IB,utf 421------------------------------------------------------------------ 422 Bra 423 \x{104} 424 Ket 425 End 426------------------------------------------------------------------ 427Capture group count = 0 428Options: utf 429First code unit = \xc4 430Last code unit = \x84 431Subject length lower bound = 1 432 433/\x{861}/IB,utf 434------------------------------------------------------------------ 435 Bra 436 \x{861} 437 Ket 438 End 439------------------------------------------------------------------ 440Capture group count = 0 441Options: utf 442First code unit = \xe0 443Last code unit = \xa1 444Subject length lower bound = 1 445 446/\x{212ab}/IB,utf 447------------------------------------------------------------------ 448 Bra 449 \x{212ab} 450 Ket 451 End 452------------------------------------------------------------------ 453Capture group count = 0 454Options: utf 455First code unit = \xf0 456Last code unit = \xab 457Subject length lower bound = 1 458 459/[^ab\xC0-\xF0]/IB,utf 460------------------------------------------------------------------ 461 Bra 462 [\x00-`c-\xbf\xf1-\xff] (neg) 463 Ket 464 End 465------------------------------------------------------------------ 466Capture group count = 0 467Options: utf 468Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 469 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 470 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 471 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 472 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 473 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 474 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 475 \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 476 \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 477 \xfe \xff 478Subject length lower bound = 1 479 \x{f1} 480 0: \x{f1} 481 \x{bf} 482 0: \x{bf} 483 \x{100} 484 0: \x{100} 485 \x{1000} 486 0: \x{1000} 487\= Expect no match 488 \x{c0} 489No match 490 \x{f0} 491No match 492 493/Ā{3,4}/IB,utf 494------------------------------------------------------------------ 495 Bra 496 \x{100}{3} 497 \x{100}?+ 498 Ket 499 End 500------------------------------------------------------------------ 501Capture group count = 0 502Options: utf 503First code unit = \xc4 504Last code unit = \x80 505Subject length lower bound = 3 506 \x{100}\x{100}\x{100}\x{100\x{100} 507 0: \x{100}\x{100}\x{100} 508 509/(\x{100}+|x)/IB,utf 510------------------------------------------------------------------ 511 Bra 512 CBra 1 513 \x{100}++ 514 Alt 515 x 516 Ket 517 Ket 518 End 519------------------------------------------------------------------ 520Capture group count = 1 521Options: utf 522Starting code units: x \xc4 523Subject length lower bound = 1 524 525/(\x{100}*a|x)/IB,utf 526------------------------------------------------------------------ 527 Bra 528 CBra 1 529 \x{100}*+ 530 a 531 Alt 532 x 533 Ket 534 Ket 535 End 536------------------------------------------------------------------ 537Capture group count = 1 538Options: utf 539Starting code units: a x \xc4 540Subject length lower bound = 1 541 542/(\x{100}{0,2}a|x)/IB,utf 543------------------------------------------------------------------ 544 Bra 545 CBra 1 546 \x{100}{0,2}+ 547 a 548 Alt 549 x 550 Ket 551 Ket 552 End 553------------------------------------------------------------------ 554Capture group count = 1 555Options: utf 556Starting code units: a x \xc4 557Subject length lower bound = 1 558 559/(\x{100}{1,2}a|x)/IB,utf 560------------------------------------------------------------------ 561 Bra 562 CBra 1 563 \x{100} 564 \x{100}{0,1}+ 565 a 566 Alt 567 x 568 Ket 569 Ket 570 End 571------------------------------------------------------------------ 572Capture group count = 1 573Options: utf 574Starting code units: x \xc4 575Subject length lower bound = 1 576 577/\x{100}/IB,utf 578------------------------------------------------------------------ 579 Bra 580 \x{100} 581 Ket 582 End 583------------------------------------------------------------------ 584Capture group count = 0 585Options: utf 586First code unit = \xc4 587Last code unit = \x80 588Subject length lower bound = 1 589 590/a\x{100}\x{101}*/IB,utf 591------------------------------------------------------------------ 592 Bra 593 a\x{100} 594 \x{101}*+ 595 Ket 596 End 597------------------------------------------------------------------ 598Capture group count = 0 599Options: utf 600First code unit = 'a' 601Last code unit = \x80 602Subject length lower bound = 2 603 604/a\x{100}\x{101}+/IB,utf 605------------------------------------------------------------------ 606 Bra 607 a\x{100} 608 \x{101}++ 609 Ket 610 End 611------------------------------------------------------------------ 612Capture group count = 0 613Options: utf 614First code unit = 'a' 615Last code unit = \x81 616Subject length lower bound = 3 617 618/[^\x{c4}]/IB 619------------------------------------------------------------------ 620 Bra 621 [^\x{c4}] 622 Ket 623 End 624------------------------------------------------------------------ 625Capture group count = 0 626Subject length lower bound = 1 627 628/[\x{100}]/IB,utf 629------------------------------------------------------------------ 630 Bra 631 \x{100} 632 Ket 633 End 634------------------------------------------------------------------ 635Capture group count = 0 636Options: utf 637First code unit = \xc4 638Last code unit = \x80 639Subject length lower bound = 1 640 \x{100} 641 0: \x{100} 642 Z\x{100} 643 0: \x{100} 644 \x{100}Z 645 0: \x{100} 646 647/[\xff]/IB,utf 648------------------------------------------------------------------ 649 Bra 650 \x{ff} 651 Ket 652 End 653------------------------------------------------------------------ 654Capture group count = 0 655Options: utf 656First code unit = \xc3 657Last code unit = \xbf 658Subject length lower bound = 1 659 >\x{ff}< 660 0: \x{ff} 661 662/[^\xff]/IB,utf 663------------------------------------------------------------------ 664 Bra 665 [^\x{ff}] 666 Ket 667 End 668------------------------------------------------------------------ 669Capture group count = 0 670Options: utf 671Subject length lower bound = 1 672 673/\x{100}abc(xyz(?1))/IB,utf 674------------------------------------------------------------------ 675 Bra 676 \x{100}abc 677 CBra 1 678 xyz 679 Recurse 680 Ket 681 Ket 682 End 683------------------------------------------------------------------ 684Capture group count = 1 685Options: utf 686First code unit = \xc4 687Last code unit = 'z' 688Subject length lower bound = 7 689 690/\777/I,utf 691Capture group count = 0 692Options: utf 693First code unit = \xc7 694Last code unit = \xbf 695Subject length lower bound = 1 696 \x{1ff} 697 0: \x{1ff} 698 \777 699 0: \x{1ff} 700 701/\x{100}+\x{200}/IB,utf 702------------------------------------------------------------------ 703 Bra 704 \x{100}++ 705 \x{200} 706 Ket 707 End 708------------------------------------------------------------------ 709Capture group count = 0 710Options: utf 711First code unit = \xc4 712Last code unit = \x80 713Subject length lower bound = 2 714 715/\x{100}+X/IB,utf 716------------------------------------------------------------------ 717 Bra 718 \x{100}++ 719 X 720 Ket 721 End 722------------------------------------------------------------------ 723Capture group count = 0 724Options: utf 725First code unit = \xc4 726Last code unit = 'X' 727Subject length lower bound = 2 728 729/^[\QĀ\E-\QŐ\E/B,utf 730Failed: error 106 at offset 15: missing terminating ] for character class 731 732# This tests the stricter UTF-8 check according to RFC 3629. 733 734/X/utf 735\= Expect UTF-8 errors 736 \x{d800} 737Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 738 \x{da00} 739Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 740 \x{dfff} 741Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0 742 \x{110000} 743Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0 744 \x{2000000} 745Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0 746 \x{7fffffff} 747Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 748\= Expect no match 749 \x{d800}\=no_utf_check 750No match 751 \x{da00}\=no_utf_check 752No match 753 \x{dfff}\=no_utf_check 754No match 755 \x{110000}\=no_utf_check 756No match 757 \x{2000000}\=no_utf_check 758No match 759 \x{7fffffff}\=no_utf_check 760No match 761 762/(*UTF8)\x{1234}/ 763 abcd\x{1234}pqr 764 0: \x{1234} 765 766/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I 767Capture group count = 0 768Compile options: <none> 769Overall options: utf 770\R matches any Unicode newline 771Forced newline is CRLF 772First code unit = 'a' 773Last code unit = 'b' 774Subject length lower bound = 3 775 776/\h/I,utf 777Capture group count = 0 778Options: utf 779Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3 780Subject length lower bound = 1 781 ABC\x{09} 782 0: \x{09} 783 ABC\x{20} 784 0: 785 ABC\x{a0} 786 0: \x{a0} 787 ABC\x{1680} 788 0: \x{1680} 789 ABC\x{180e} 790 0: \x{180e} 791 ABC\x{2000} 792 0: \x{2000} 793 ABC\x{202f} 794 0: \x{202f} 795 ABC\x{205f} 796 0: \x{205f} 797 ABC\x{3000} 798 0: \x{3000} 799 800/\v/I,utf 801Capture group count = 0 802Options: utf 803Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 804Subject length lower bound = 1 805 ABC\x{0a} 806 0: \x{0a} 807 ABC\x{0b} 808 0: \x{0b} 809 ABC\x{0c} 810 0: \x{0c} 811 ABC\x{0d} 812 0: \x{0d} 813 ABC\x{85} 814 0: \x{85} 815 ABC\x{2028} 816 0: \x{2028} 817 818/\h*A/I,utf 819Capture group count = 0 820Options: utf 821Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 822Last code unit = 'A' 823Subject length lower bound = 1 824 CDBABC 825 0: A 826 827/\v+A/I,utf 828Capture group count = 0 829Options: utf 830Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 831Last code unit = 'A' 832Subject length lower bound = 2 833 834/\s?xxx\s/I,utf 835Capture group count = 0 836Options: utf 837Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x 838Last code unit = 'x' 839Subject length lower bound = 4 840 841/\sxxx\s/I,utf,tables=2 842Capture group count = 0 843Options: utf 844Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2 845Last code unit = 'x' 846Subject length lower bound = 5 847 AB\x{85}xxx\x{a0}XYZ 848 0: \x{85}xxx\x{a0} 849 AB\x{a0}xxx\x{85}XYZ 850 0: \x{a0}xxx\x{85} 851 852/\S \S/I,utf,tables=2 853Capture group count = 0 854Options: utf 855Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 856 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 857 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 858 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 859 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 860 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 861 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 862 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 863 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 864Last code unit = ' ' 865Subject length lower bound = 3 866 \x{a2} \x{84} 867 0: \x{a2} \x{84} 868 A Z 869 0: A Z 870 871/a+/utf 872 a\x{123}aa\=offset=1 873 0: aa 874 a\x{123}aa\=offset=3 875 0: aa 876 a\x{123}aa\=offset=4 877 0: a 878\= Expect bad offset value 879 a\x{123}aa\=offset=6 880Failed: error -33: bad offset value 881\= Expect bad UTF-8 offset 882 a\x{123}aa\=offset=2 883Error -36 (bad UTF-8 offset) 884\= Expect no match 885 a\x{123}aa\=offset=5 886No match 887 888/\x{1234}+/Ii,utf 889Capture group count = 0 890Options: caseless utf 891Starting code units: \xe1 892Subject length lower bound = 1 893 894/\x{1234}+?/Ii,utf 895Capture group count = 0 896Options: caseless utf 897Starting code units: \xe1 898Subject length lower bound = 1 899 900/\x{1234}++/Ii,utf 901Capture group count = 0 902Options: caseless utf 903Starting code units: \xe1 904Subject length lower bound = 1 905 906/\x{1234}{2}/Ii,utf 907Capture group count = 0 908Options: caseless utf 909Starting code units: \xe1 910Subject length lower bound = 2 911 912/[^\x{c4}]/IB,utf 913------------------------------------------------------------------ 914 Bra 915 [^\x{c4}] 916 Ket 917 End 918------------------------------------------------------------------ 919Capture group count = 0 920Options: utf 921Subject length lower bound = 1 922 923/X+\x{200}/IB,utf 924------------------------------------------------------------------ 925 Bra 926 X++ 927 \x{200} 928 Ket 929 End 930------------------------------------------------------------------ 931Capture group count = 0 932Options: utf 933First code unit = 'X' 934Last code unit = \x80 935Subject length lower bound = 2 936 937/\R/I,utf 938Capture group count = 0 939Options: utf 940Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 941Subject length lower bound = 1 942 943/\777/IB,utf 944------------------------------------------------------------------ 945 Bra 946 \x{1ff} 947 Ket 948 End 949------------------------------------------------------------------ 950Capture group count = 0 951Options: utf 952First code unit = \xc7 953Last code unit = \xbf 954Subject length lower bound = 1 955 956/\w+\x{C4}/B,utf 957------------------------------------------------------------------ 958 Bra 959 \w++ 960 \x{c4} 961 Ket 962 End 963------------------------------------------------------------------ 964 a\x{C4}\x{C4} 965 0: a\x{c4} 966 967/\w+\x{C4}/B,utf,tables=2 968------------------------------------------------------------------ 969 Bra 970 \w+ 971 \x{c4} 972 Ket 973 End 974------------------------------------------------------------------ 975 a\x{C4}\x{C4} 976 0: a\x{c4}\x{c4} 977 978/\W+\x{C4}/B,utf 979------------------------------------------------------------------ 980 Bra 981 \W+ 982 \x{c4} 983 Ket 984 End 985------------------------------------------------------------------ 986 !\x{C4} 987 0: !\x{c4} 988 989/\W+\x{C4}/B,utf,tables=2 990------------------------------------------------------------------ 991 Bra 992 \W++ 993 \x{c4} 994 Ket 995 End 996------------------------------------------------------------------ 997 !\x{C4} 998 0: !\x{c4} 999 1000/\W+\x{A1}/B,utf 1001------------------------------------------------------------------ 1002 Bra 1003 \W+ 1004 \x{a1} 1005 Ket 1006 End 1007------------------------------------------------------------------ 1008 !\x{A1} 1009 0: !\x{a1} 1010 1011/\W+\x{A1}/B,utf,tables=2 1012------------------------------------------------------------------ 1013 Bra 1014 \W+ 1015 \x{a1} 1016 Ket 1017 End 1018------------------------------------------------------------------ 1019 !\x{A1} 1020 0: !\x{a1} 1021 1022/X\s+\x{A0}/B,utf 1023------------------------------------------------------------------ 1024 Bra 1025 X 1026 \s++ 1027 \x{a0} 1028 Ket 1029 End 1030------------------------------------------------------------------ 1031 X\x20\x{A0}\x{A0} 1032 0: X \x{a0} 1033 1034/X\s+\x{A0}/B,utf,tables=2 1035------------------------------------------------------------------ 1036 Bra 1037 X 1038 \s+ 1039 \x{a0} 1040 Ket 1041 End 1042------------------------------------------------------------------ 1043 X\x20\x{A0}\x{A0} 1044 0: X \x{a0}\x{a0} 1045 1046/\S+\x{A0}/B,utf 1047------------------------------------------------------------------ 1048 Bra 1049 \S+ 1050 \x{a0} 1051 Ket 1052 End 1053------------------------------------------------------------------ 1054 X\x{A0}\x{A0} 1055 0: X\x{a0}\x{a0} 1056 1057/\S+\x{A0}/B,utf,tables=2 1058------------------------------------------------------------------ 1059 Bra 1060 \S++ 1061 \x{a0} 1062 Ket 1063 End 1064------------------------------------------------------------------ 1065 X\x{A0}\x{A0} 1066 0: X\x{a0} 1067 1068/\x{a0}+\s!/B,utf 1069------------------------------------------------------------------ 1070 Bra 1071 \x{a0}++ 1072 \s 1073 ! 1074 Ket 1075 End 1076------------------------------------------------------------------ 1077 \x{a0}\x20! 1078 0: \x{a0} ! 1079 1080/\x{a0}+\s!/B,utf,tables=2 1081------------------------------------------------------------------ 1082 Bra 1083 \x{a0}+ 1084 \s 1085 ! 1086 Ket 1087 End 1088------------------------------------------------------------------ 1089 \x{a0}\x20! 1090 0: \x{a0} ! 1091 1092/A/utf 1093 \x{ff000041} 1094** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8 1095 \x{7f000041} 1096Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0 1097 1098/(*UTF8)abc/never_utf 1099Failed: error 174 at offset 7: using UTF is disabled by the application 1100 1101/abc/utf,never_utf 1102Failed: error 174 at offset 0: using UTF is disabled by the application 1103 1104/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf 1105------------------------------------------------------------------ 1106 Bra 1107 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} 1108 Ket 1109 End 1110------------------------------------------------------------------ 1111Capture group count = 0 1112Options: caseless utf 1113First code unit = 'A' (caseless) 1114Subject length lower bound = 5 1115 1116/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf 1117------------------------------------------------------------------ 1118 Bra 1119 A\x{391}\x{10427}\x{ff3a}\x{1fb0} 1120 Ket 1121 End 1122------------------------------------------------------------------ 1123Capture group count = 0 1124Options: utf 1125First code unit = 'A' 1126Last code unit = \xb0 1127Subject length lower bound = 5 1128 1129/AB\x{1fb0}/IB,utf 1130------------------------------------------------------------------ 1131 Bra 1132 AB\x{1fb0} 1133 Ket 1134 End 1135------------------------------------------------------------------ 1136Capture group count = 0 1137Options: utf 1138First code unit = 'A' 1139Last code unit = \xb0 1140Subject length lower bound = 3 1141 1142/AB\x{1fb0}/IBi,utf 1143------------------------------------------------------------------ 1144 Bra 1145 /i AB\x{1fb0} 1146 Ket 1147 End 1148------------------------------------------------------------------ 1149Capture group count = 0 1150Options: caseless utf 1151First code unit = 'A' (caseless) 1152Last code unit = 'B' (caseless) 1153Subject length lower bound = 3 1154 1155/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf 1156Capture group count = 0 1157Options: caseless utf 1158Starting code units: \xd0 \xd1 1159Subject length lower bound = 17 1160 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 1161 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 1162 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 1163 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 1164 1165/[ⱥ]/Bi,utf 1166------------------------------------------------------------------ 1167 Bra 1168 /i \x{2c65} 1169 Ket 1170 End 1171------------------------------------------------------------------ 1172 1173/[^ⱥ]/Bi,utf 1174------------------------------------------------------------------ 1175 Bra 1176 /i [^\x{2c65}] 1177 Ket 1178 End 1179------------------------------------------------------------------ 1180 1181/\h/I 1182Capture group count = 0 1183Starting code units: \x09 \x20 \xa0 1184Subject length lower bound = 1 1185 1186/\v/I 1187Capture group count = 0 1188Starting code units: \x0a \x0b \x0c \x0d \x85 1189Subject length lower bound = 1 1190 1191/\R/I 1192Capture group count = 0 1193Starting code units: \x0a \x0b \x0c \x0d \x85 1194Subject length lower bound = 1 1195 1196/[[:blank:]]/B,ucp 1197------------------------------------------------------------------ 1198 Bra 1199 [\x09 \xa0] 1200 Ket 1201 End 1202------------------------------------------------------------------ 1203 1204/\x{212a}+/Ii,utf 1205Capture group count = 0 1206Options: caseless utf 1207Starting code units: K k \xe2 1208Subject length lower bound = 1 1209 KKkk\x{212a} 1210 0: KKkk\x{212a} 1211 1212/s+/Ii,utf 1213Capture group count = 0 1214Options: caseless utf 1215Starting code units: S s \xc5 1216Subject length lower bound = 1 1217 SSss\x{17f} 1218 0: SSss\x{17f} 1219 1220/\x{100}*A/IB,utf 1221------------------------------------------------------------------ 1222 Bra 1223 \x{100}*+ 1224 A 1225 Ket 1226 End 1227------------------------------------------------------------------ 1228Capture group count = 0 1229Options: utf 1230Starting code units: A \xc4 1231Last code unit = 'A' 1232Subject length lower bound = 1 1233 A 1234 0: A 1235 1236/\x{100}*\d(?R)/IB,utf 1237------------------------------------------------------------------ 1238 Bra 1239 \x{100}*+ 1240 \d 1241 Recurse 1242 Ket 1243 End 1244------------------------------------------------------------------ 1245Capture group count = 0 1246Options: utf 1247Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 1248Subject length lower bound = 1 1249 1250/[Z\x{100}]/IB,utf 1251------------------------------------------------------------------ 1252 Bra 1253 [Z\x{100}] 1254 Ket 1255 End 1256------------------------------------------------------------------ 1257Capture group count = 0 1258Options: utf 1259Starting code units: Z \xc4 1260Subject length lower bound = 1 1261 Z\x{100} 1262 0: Z 1263 \x{100} 1264 0: \x{100} 1265 \x{100}Z 1266 0: \x{100} 1267 1268/[z-\x{100}]/IB,utf 1269------------------------------------------------------------------ 1270 Bra 1271 [z-\xff\x{100}] 1272 Ket 1273 End 1274------------------------------------------------------------------ 1275Capture group count = 0 1276Options: utf 1277Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4 1278Subject length lower bound = 1 1279 1280/[z\Qa-d]Ā\E]/IB,utf 1281------------------------------------------------------------------ 1282 Bra 1283 [\-\]adz\x{100}] 1284 Ket 1285 End 1286------------------------------------------------------------------ 1287Capture group count = 0 1288Options: utf 1289Starting code units: - ] a d z \xc4 1290Subject length lower bound = 1 1291 \x{100} 1292 0: \x{100} 1293 Ā 1294 0: \x{100} 1295 1296/[ab\x{100}]abc(xyz(?1))/IB,utf 1297------------------------------------------------------------------ 1298 Bra 1299 [ab\x{100}] 1300 abc 1301 CBra 1 1302 xyz 1303 Recurse 1304 Ket 1305 Ket 1306 End 1307------------------------------------------------------------------ 1308Capture group count = 1 1309Options: utf 1310Starting code units: a b \xc4 1311Last code unit = 'z' 1312Subject length lower bound = 7 1313 1314/\x{100}*\s/IB,utf 1315------------------------------------------------------------------ 1316 Bra 1317 \x{100}*+ 1318 \s 1319 Ket 1320 End 1321------------------------------------------------------------------ 1322Capture group count = 0 1323Options: utf 1324Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4 1325Subject length lower bound = 1 1326 1327/\x{100}*\d/IB,utf 1328------------------------------------------------------------------ 1329 Bra 1330 \x{100}*+ 1331 \d 1332 Ket 1333 End 1334------------------------------------------------------------------ 1335Capture group count = 0 1336Options: utf 1337Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 1338Subject length lower bound = 1 1339 1340/\x{100}*\w/IB,utf 1341------------------------------------------------------------------ 1342 Bra 1343 \x{100}*+ 1344 \w 1345 Ket 1346 End 1347------------------------------------------------------------------ 1348Capture group count = 0 1349Options: utf 1350Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 1351 Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 1352 \xc4 1353Subject length lower bound = 1 1354 1355/\x{100}*\D/IB,utf 1356------------------------------------------------------------------ 1357 Bra 1358 \x{100}* 1359 \D 1360 Ket 1361 End 1362------------------------------------------------------------------ 1363Capture group count = 0 1364Options: utf 1365Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1366 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1367 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 1368 ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c 1369 d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 1370 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 1371 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 1372 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 1373 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 1374 \xff 1375Subject length lower bound = 1 1376 1377/\x{100}*\S/IB,utf 1378------------------------------------------------------------------ 1379 Bra 1380 \x{100}* 1381 \S 1382 Ket 1383 End 1384------------------------------------------------------------------ 1385Capture group count = 0 1386Options: utf 1387Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 1388 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 1389 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 1390 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 1391 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 1392 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 1393 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 1394 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 1395 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1396Subject length lower bound = 1 1397 1398/\x{100}*\W/IB,utf 1399------------------------------------------------------------------ 1400 Bra 1401 \x{100}* 1402 \W 1403 Ket 1404 End 1405------------------------------------------------------------------ 1406Capture group count = 0 1407Options: utf 1408Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1409 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1410 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 1411 ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 1412 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 1413 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 1414 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 1415 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1416Subject length lower bound = 1 1417 1418/[\x{105}-\x{109}]/IBi,utf 1419------------------------------------------------------------------ 1420 Bra 1421 [\x{104}-\x{109}] 1422 Ket 1423 End 1424------------------------------------------------------------------ 1425Capture group count = 0 1426Options: caseless utf 1427Starting code units: \xc4 1428Subject length lower bound = 1 1429 \x{104} 1430 0: \x{104} 1431 \x{105} 1432 0: \x{105} 1433 \x{109} 1434 0: \x{109} 1435\= Expect no match 1436 \x{100} 1437No match 1438 \x{10a} 1439No match 1440 1441/[z-\x{100}]/IBi,utf 1442------------------------------------------------------------------ 1443 Bra 1444 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] 1445 Ket 1446 End 1447------------------------------------------------------------------ 1448Capture group count = 0 1449Options: caseless utf 1450Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2 1451Subject length lower bound = 1 1452 Z 1453 0: Z 1454 z 1455 0: z 1456 \x{39c} 1457 0: \x{39c} 1458 \x{178} 1459 0: \x{178} 1460 | 1461 0: | 1462 \x{80} 1463 0: \x{80} 1464 \x{ff} 1465 0: \x{ff} 1466 \x{100} 1467 0: \x{100} 1468 \x{101} 1469 0: \x{101} 1470\= Expect no match 1471 \x{102} 1472No match 1473 Y 1474No match 1475 y 1476No match 1477 1478/[z-\x{100}]/IBi,utf 1479------------------------------------------------------------------ 1480 Bra 1481 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] 1482 Ket 1483 End 1484------------------------------------------------------------------ 1485Capture group count = 0 1486Options: caseless utf 1487Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xce \xe1 \xe2 1488Subject length lower bound = 1 1489 1490/\x{3a3}B/IBi,utf 1491------------------------------------------------------------------ 1492 Bra 1493 clist 03a3 03c2 03c3 1494 /i B 1495 Ket 1496 End 1497------------------------------------------------------------------ 1498Capture group count = 0 1499Options: caseless utf 1500Starting code units: \xce \xcf 1501Last code unit = 'B' (caseless) 1502Subject length lower bound = 2 1503 1504/abc/utf,replace=� 1505 abc 1506Failed: error -3: UTF-8 error: 1 byte missing at end 1507 1508/(?<=(a)(?-1))x/I,utf 1509Capture group count = 1 1510Max lookbehind = 2 1511Options: utf 1512First code unit = 'x' 1513Subject length lower bound = 1 1514 a\x80zx\=offset=3 1515Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1 1516 1517/[\W\p{Any}]/B 1518------------------------------------------------------------------ 1519 Bra 1520 [\x00-/:-@[-^`{-\xff\p{Any}] 1521 Ket 1522 End 1523------------------------------------------------------------------ 1524 abc 1525 0: a 1526 123 1527 0: 1 1528 1529/[\W\pL]/B 1530------------------------------------------------------------------ 1531 Bra 1532 [\x00-/:-@[-^`{-\xff\p{L}] 1533 Ket 1534 End 1535------------------------------------------------------------------ 1536 abc 1537 0: a 1538\= Expect no match 1539 123 1540No match 1541 1542/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf 1543Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) 1544 1545/[\s[:^ascii:]]/B,ucp 1546------------------------------------------------------------------ 1547 Bra 1548 [\x80-\xff\p{Xsp}] 1549 Ket 1550 End 1551------------------------------------------------------------------ 1552 1553# A special extra option allows excaped surrogate code points in 8-bit mode, 1554# but subjects containing them must not be UTF-checked. 1555 1556/\x{d800}/I,utf,allow_surrogate_escapes 1557Capture group count = 0 1558Options: utf 1559Extra options: allow_surrogate_escapes 1560First code unit = \xed 1561Last code unit = \x80 1562Subject length lower bound = 1 1563 \x{d800}\=no_utf_check 1564 0: \x{d800} 1565 1566/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes 1567 \x{dfff}\x{df01}\=no_utf_check 1568 0: \x{dfff}\x{df01} 1569 1570# This has different starting code units in 8-bit mode. 1571 1572/^[^ab]/IB,utf 1573------------------------------------------------------------------ 1574 Bra 1575 ^ 1576 [\x00-`c-\xff] (neg) 1577 Ket 1578 End 1579------------------------------------------------------------------ 1580Capture group count = 0 1581Compile options: utf 1582Overall options: anchored utf 1583Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1584 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1585 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 1586 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 1587 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 1588 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 1589 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 1590 \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 1591 \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 1592 \xfe \xff 1593Subject length lower bound = 1 1594 c 1595 0: c 1596 \x{ff} 1597 0: \x{ff} 1598 \x{100} 1599 0: \x{100} 1600\= Expect no match 1601 aaa 1602No match 1603 1604# Offsets are different in 8-bit mode. 1605 1606/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout 1607 123abcáyzabcdef789abcሴqr 1608 1(2) Old 6 6 "" New 6 8 "<>" 1609 2(2) Old 13 13 "" New 15 17 "<>" 1610 3(2) Old 13 16 "def" New 17 22 "<def>" 1611 4(2) Old 22 22 "" New 28 30 "<>" 1612 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr 1613 1614# Check name length with non-ASCII characters 1615 1616/(?'ABáC678901234567890123456789012'...)/utf 1617 1618/(?'ABáC6789012345678901234567890123'...)/utf 1619Failed: error 148 at offset 36: subpattern name is too long (maximum 32 code units) 1620 1621/(?'ABZC6789012345678901234567890123'...)/utf 1622 1623/(?(n/utf 1624Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) 1625 1626/(?(á/utf 1627Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?) 1628 1629# Invalid UTF-8 tests 1630 1631/.../g,match_invalid_utf 1632 abcd\x80wxzy\x80pqrs 1633 0: abc 1634 0: wxz 1635 0: pqr 1636 abcd\x{80}wxzy\x80pqrs 1637 0: abc 1638 0: d\x{80}w 1639 0: xzy 1640 0: pqr 1641 1642/abc/match_invalid_utf 1643 ab\x80ab\=ph 1644Partial match: ab 1645\= Expect no match 1646 ab\x80cdef\=ph 1647No match 1648 1649/ab$/match_invalid_utf 1650 ab\x80cdeab 1651 0: ab 1652\= Expect no match 1653 ab\x80cde 1654No match 1655 1656/.../g,match_invalid_utf 1657 abcd\x{80}wxzy\x80pqrs 1658 0: abc 1659 0: d\x{80}w 1660 0: xzy 1661 0: pqr 1662 1663/(?<=x)../g,match_invalid_utf 1664 abcd\x{80}wxzy\x80pqrs 1665 0: zy 1666 abcd\x{80}wxzy\x80xpqrs 1667 0: zy 1668 0: pq 1669 1670/X$/match_invalid_utf 1671\= Expect no match 1672 X\xc4 1673No match 1674 1675/(?<=..)X/match_invalid_utf,aftertext 1676 AB\x80AQXYZ 1677 0: X 1678 0+ YZ 1679 AB\x80AQXYZ\=offset=5 1680 0: X 1681 0+ YZ 1682 AB\x80\x80AXYZXC\=offset=5 1683 0: X 1684 0+ C 1685\= Expect no match 1686 AB\x80XYZ 1687No match 1688 AB\x80XYZ\=offset=3 1689No match 1690 AB\xfeXYZ 1691No match 1692 AB\xffXYZ\=offset=3 1693No match 1694 AB\x80AXYZ 1695No match 1696 AB\x80AXYZ\=offset=4 1697No match 1698 AB\x80\x80AXYZ\=offset=5 1699No match 1700 1701/.../match_invalid_utf 1702 AB\xc4CCC 1703 0: CCC 1704\= Expect no match 1705 A\x{d800}B 1706No match 1707 A\x{110000}B 1708No match 1709 A\xc4B 1710No match 1711 1712/\bX/match_invalid_utf 1713 A\x80X 1714 0: X 1715 1716/\BX/match_invalid_utf 1717\= Expect no match 1718 A\x80X 1719No match 1720 1721/(?<=...)X/match_invalid_utf 1722 AAA\x80BBBXYZ 1723 0: X 1724\= Expect no match 1725 AAA\x80BXYZ 1726No match 1727 AAA\x80BBXYZ 1728No match 1729 1730# ------------------------------------- 1731 1732/(*UTF)(?=\x{123})/I 1733Capture group count = 0 1734May match empty string 1735Compile options: <none> 1736Overall options: utf 1737First code unit = \xc4 1738Last code unit = \xa3 1739Subject length lower bound = 1 1740 1741/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf 1742Capture group count = 0 1743Options: utf 1744Starting code units: \xc3 1745Last code unit = 'X' 1746Subject length lower bound = 3 1747 1748/[,]/BI,utf 1749------------------------------------------------------------------ 1750 Bra 1751 [,\x{fff9f}] 1752 Ket 1753 End 1754------------------------------------------------------------------ 1755Capture group count = 0 1756Options: utf 1757Starting code units: , \xf3 1758Subject length lower bound = 1 1759 1760/[\x{fff4}-\x{ffff8}]/I,utf 1761Capture group count = 0 1762Options: utf 1763Starting code units: \xef \xf0 \xf1 \xf2 \xf3 1764Subject length lower bound = 1 1765 1766/[\x{fff4}-\x{afff8}\x{10ffff}]/I,utf 1767Capture group count = 0 1768Options: utf 1769Starting code units: \xef \xf0 \xf1 \xf2 \xf4 1770Subject length lower bound = 1 1771 1772/[\xff\x{ffff}]/I,utf 1773Capture group count = 0 1774Options: utf 1775Starting code units: \xc3 \xef 1776Subject length lower bound = 1 1777 1778/[\xff\x{ff}]/I,utf 1779Capture group count = 0 1780Options: utf 1781Starting code units: \xc3 1782Subject length lower bound = 1 1783 abc\x{ff}def 1784 0: \x{ff} 1785 1786/[\xff\x{ff}]/I 1787Capture group count = 0 1788First code unit = \xff 1789Subject length lower bound = 1 1790 abc\x{ff}def 1791 0: \xff 1792 1793/[Ss]/I 1794Capture group count = 0 1795First code unit = 'S' (caseless) 1796Subject length lower bound = 1 1797 1798/[Ss]/I,utf 1799Capture group count = 0 1800Options: utf 1801Starting code units: S s 1802Subject length lower bound = 1 1803 1804/(?:\x{ff}|\x{3000})/I,utf 1805Capture group count = 0 1806Options: utf 1807Starting code units: \xc3 \xe3 1808Subject length lower bound = 1 1809 1810/x/utf 1811 abxyz 1812 0: x 1813 \x80\=startchar 1814Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0 1815 abc\x80\=startchar 1816Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3 1817 abc\x80\=startchar,offset=3 1818Error -36 (bad UTF-8 offset) 1819 1820/\x{c1}+\x{e1}/iIB,ucp 1821------------------------------------------------------------------ 1822 Bra 1823 /i \x{c1}+ 1824 /i \x{e1} 1825 Ket 1826 End 1827------------------------------------------------------------------ 1828Capture group count = 0 1829Options: caseless ucp 1830First code unit = \xc1 (caseless) 1831Last code unit = \xe1 (caseless) 1832Subject length lower bound = 2 1833 \x{c1}\x{c1}\x{c1} 1834 0: \xc1\xc1\xc1 1835 \x{e1}\x{e1}\x{e1} 1836 0: \xe1\xe1\xe1 1837 1838/a|\x{c1}/iI,ucp 1839Capture group count = 0 1840Options: caseless ucp 1841Starting code units: A a \xc1 \xe1 1842Subject length lower bound = 1 1843 \x{e1}xxx 1844 0: \xe1 1845 1846/a|\x{c1}/iI,utf 1847Capture group count = 0 1848Options: caseless utf 1849Starting code units: A a \xc3 1850Subject length lower bound = 1 1851 \x{e1}xxx 1852 0: \x{e1} 1853 1854/\x{c1}|\x{e1}/iI,ucp 1855Capture group count = 0 1856Options: caseless ucp 1857First code unit = \xc1 (caseless) 1858Subject length lower bound = 1 1859 1860/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended 1861 X\x{e1}Y 1862 1: >\xc1< 1863 1864/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended 1865 X\x{c1}Y 1866 1: >\xe1< 1867 1868# Without UTF or UCP characters > 127 have only one case in the default locale. 1869 1870/X(\x{e1})Y/replace=>\U$1<,substitute_extended 1871 X\x{e1}Y 1872 1: >\xe1< 1873 1874/A/utf,match_invalid_utf,caseless 1875 \xe5A 1876 0: A 1877 1878/\bch\b/utf,match_invalid_utf 1879 qchq\=ph 1880Partial match: 1881 qchq\=ps 1882Partial match: 1883 1884# End of testinput10 1885