1# This set of tests is for UTF-16 and UTF-32 support, including Unicode 2# properties. It is relevant only to the 16-bit and 32-bit libraries. The 3# output is different for each library, so there are separate output files. 4 5/���xxx/IB,utf,no_utf_check 6** Failed: invalid UTF-8 string cannot be converted to 16-bit string 7 8/abc/utf 9 �] 10** Failed: invalid UTF-8 string cannot be used as input in UTF mode 11 12# Check maximum character size 13 14/\x{ffff}/IB,utf 15------------------------------------------------------------------ 16 Bra 17 \x{ffff} 18 Ket 19 End 20------------------------------------------------------------------ 21Capture group count = 0 22Options: utf 23First code unit = \x{ffff} 24Subject length lower bound = 1 25 26/\x{10000}/IB,utf 27------------------------------------------------------------------ 28 Bra 29 \x{10000} 30 Ket 31 End 32------------------------------------------------------------------ 33Capture group count = 0 34Options: utf 35First code unit = \x{d800} 36Last code unit = \x{dc00} 37Subject length lower bound = 1 38 39/\x{100}/IB,utf 40------------------------------------------------------------------ 41 Bra 42 \x{100} 43 Ket 44 End 45------------------------------------------------------------------ 46Capture group count = 0 47Options: utf 48First code unit = \x{100} 49Subject length lower bound = 1 50 51/\x{1000}/IB,utf 52------------------------------------------------------------------ 53 Bra 54 \x{1000} 55 Ket 56 End 57------------------------------------------------------------------ 58Capture group count = 0 59Options: utf 60First code unit = \x{1000} 61Subject length lower bound = 1 62 63/\x{10000}/IB,utf 64------------------------------------------------------------------ 65 Bra 66 \x{10000} 67 Ket 68 End 69------------------------------------------------------------------ 70Capture group count = 0 71Options: utf 72First code unit = \x{d800} 73Last code unit = \x{dc00} 74Subject length lower bound = 1 75 76/\x{100000}/IB,utf 77------------------------------------------------------------------ 78 Bra 79 \x{100000} 80 Ket 81 End 82------------------------------------------------------------------ 83Capture group count = 0 84Options: utf 85First code unit = \x{dbc0} 86Last code unit = \x{dc00} 87Subject length lower bound = 1 88 89/\x{10ffff}/IB,utf 90------------------------------------------------------------------ 91 Bra 92 \x{10ffff} 93 Ket 94 End 95------------------------------------------------------------------ 96Capture group count = 0 97Options: utf 98First code unit = \x{dbff} 99Last code unit = \x{dfff} 100Subject length lower bound = 1 101 102/[\x{ff}]/IB,utf 103------------------------------------------------------------------ 104 Bra 105 \x{ff} 106 Ket 107 End 108------------------------------------------------------------------ 109Capture group count = 0 110Options: utf 111First code unit = \xff 112Subject length lower bound = 1 113 114/[\x{100}]/IB,utf 115------------------------------------------------------------------ 116 Bra 117 \x{100} 118 Ket 119 End 120------------------------------------------------------------------ 121Capture group count = 0 122Options: utf 123First code unit = \x{100} 124Subject length lower bound = 1 125 126/\x80/IB,utf 127------------------------------------------------------------------ 128 Bra 129 \x{80} 130 Ket 131 End 132------------------------------------------------------------------ 133Capture group count = 0 134Options: utf 135First code unit = \x80 136Subject length lower bound = 1 137 138/\xff/IB,utf 139------------------------------------------------------------------ 140 Bra 141 \x{ff} 142 Ket 143 End 144------------------------------------------------------------------ 145Capture group count = 0 146Options: utf 147First code unit = \xff 148Subject length lower bound = 1 149 150/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf 151------------------------------------------------------------------ 152 Bra 153 \x{d55c}\x{ad6d}\x{c5b4} 154 Ket 155 End 156------------------------------------------------------------------ 157Capture group count = 0 158Options: utf 159First code unit = \x{d55c} 160Last code unit = \x{c5b4} 161Subject length lower bound = 3 162 \x{D55c}\x{ad6d}\x{C5B4} 163 0: \x{d55c}\x{ad6d}\x{c5b4} 164 165/\x{65e5}\x{672c}\x{8a9e}/IB,utf 166------------------------------------------------------------------ 167 Bra 168 \x{65e5}\x{672c}\x{8a9e} 169 Ket 170 End 171------------------------------------------------------------------ 172Capture group count = 0 173Options: utf 174First code unit = \x{65e5} 175Last code unit = \x{8a9e} 176Subject length lower bound = 3 177 \x{65e5}\x{672c}\x{8a9e} 178 0: \x{65e5}\x{672c}\x{8a9e} 179 180/\x{80}/IB,utf 181------------------------------------------------------------------ 182 Bra 183 \x{80} 184 Ket 185 End 186------------------------------------------------------------------ 187Capture group count = 0 188Options: utf 189First code unit = \x80 190Subject length lower bound = 1 191 192/\x{084}/IB,utf 193------------------------------------------------------------------ 194 Bra 195 \x{84} 196 Ket 197 End 198------------------------------------------------------------------ 199Capture group count = 0 200Options: utf 201First code unit = \x84 202Subject length lower bound = 1 203 204/\x{104}/IB,utf 205------------------------------------------------------------------ 206 Bra 207 \x{104} 208 Ket 209 End 210------------------------------------------------------------------ 211Capture group count = 0 212Options: utf 213First code unit = \x{104} 214Subject length lower bound = 1 215 216/\x{861}/IB,utf 217------------------------------------------------------------------ 218 Bra 219 \x{861} 220 Ket 221 End 222------------------------------------------------------------------ 223Capture group count = 0 224Options: utf 225First code unit = \x{861} 226Subject length lower bound = 1 227 228/\x{212ab}/IB,utf 229------------------------------------------------------------------ 230 Bra 231 \x{212ab} 232 Ket 233 End 234------------------------------------------------------------------ 235Capture group count = 0 236Options: utf 237First code unit = \x{d844} 238Last code unit = \x{deab} 239Subject length lower bound = 1 240 241/[^ab\xC0-\xF0]/IB,utf 242------------------------------------------------------------------ 243 Bra 244 [\x00-`c-\xbf\xf1-\xff] (neg) 245 Ket 246 End 247------------------------------------------------------------------ 248Capture group count = 0 249Options: utf 250Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 251 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 252 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 253 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 254 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 255 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e 256 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d 257 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac 258 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb 259 \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb 260 \xfc \xfd \xfe \xff 261Subject length lower bound = 1 262 \x{f1} 263 0: \x{f1} 264 \x{bf} 265 0: \x{bf} 266 \x{100} 267 0: \x{100} 268 \x{1000} 269 0: \x{1000} 270\= Expect no match 271 \x{c0} 272No match 273 \x{f0} 274No match 275 276/Ā{3,4}/IB,utf 277------------------------------------------------------------------ 278 Bra 279 \x{100}{3} 280 \x{100}?+ 281 Ket 282 End 283------------------------------------------------------------------ 284Capture group count = 0 285Options: utf 286First code unit = \x{100} 287Last code unit = \x{100} 288Subject length lower bound = 3 289 \x{100}\x{100}\x{100}\x{100\x{100} 290 0: \x{100}\x{100}\x{100} 291 292/(\x{100}+|x)/IB,utf 293------------------------------------------------------------------ 294 Bra 295 CBra 1 296 \x{100}++ 297 Alt 298 x 299 Ket 300 Ket 301 End 302------------------------------------------------------------------ 303Capture group count = 1 304Options: utf 305Starting code units: x \xff 306Subject length lower bound = 1 307 308/(\x{100}*a|x)/IB,utf 309------------------------------------------------------------------ 310 Bra 311 CBra 1 312 \x{100}*+ 313 a 314 Alt 315 x 316 Ket 317 Ket 318 End 319------------------------------------------------------------------ 320Capture group count = 1 321Options: utf 322Starting code units: a x \xff 323Subject length lower bound = 1 324 325/(\x{100}{0,2}a|x)/IB,utf 326------------------------------------------------------------------ 327 Bra 328 CBra 1 329 \x{100}{0,2}+ 330 a 331 Alt 332 x 333 Ket 334 Ket 335 End 336------------------------------------------------------------------ 337Capture group count = 1 338Options: utf 339Starting code units: a x \xff 340Subject length lower bound = 1 341 342/(\x{100}{1,2}a|x)/IB,utf 343------------------------------------------------------------------ 344 Bra 345 CBra 1 346 \x{100} 347 \x{100}{0,1}+ 348 a 349 Alt 350 x 351 Ket 352 Ket 353 End 354------------------------------------------------------------------ 355Capture group count = 1 356Options: utf 357Starting code units: x \xff 358Subject length lower bound = 1 359 360/\x{100}/IB,utf 361------------------------------------------------------------------ 362 Bra 363 \x{100} 364 Ket 365 End 366------------------------------------------------------------------ 367Capture group count = 0 368Options: utf 369First code unit = \x{100} 370Subject length lower bound = 1 371 372/a\x{100}\x{101}*/IB,utf 373------------------------------------------------------------------ 374 Bra 375 a\x{100} 376 \x{101}*+ 377 Ket 378 End 379------------------------------------------------------------------ 380Capture group count = 0 381Options: utf 382First code unit = 'a' 383Last code unit = \x{100} 384Subject length lower bound = 2 385 386/a\x{100}\x{101}+/IB,utf 387------------------------------------------------------------------ 388 Bra 389 a\x{100} 390 \x{101}++ 391 Ket 392 End 393------------------------------------------------------------------ 394Capture group count = 0 395Options: utf 396First code unit = 'a' 397Last code unit = \x{101} 398Subject length lower bound = 3 399 400/[^\x{c4}]/IB 401------------------------------------------------------------------ 402 Bra 403 [^\x{c4}] 404 Ket 405 End 406------------------------------------------------------------------ 407Capture group count = 0 408Subject length lower bound = 1 409 410/[\x{100}]/IB,utf 411------------------------------------------------------------------ 412 Bra 413 \x{100} 414 Ket 415 End 416------------------------------------------------------------------ 417Capture group count = 0 418Options: utf 419First code unit = \x{100} 420Subject length lower bound = 1 421 \x{100} 422 0: \x{100} 423 Z\x{100} 424 0: \x{100} 425 \x{100}Z 426 0: \x{100} 427 428/[\xff]/IB,utf 429------------------------------------------------------------------ 430 Bra 431 \x{ff} 432 Ket 433 End 434------------------------------------------------------------------ 435Capture group count = 0 436Options: utf 437First code unit = \xff 438Subject length lower bound = 1 439 >\x{ff}< 440 0: \x{ff} 441 442/[^\xff]/IB,utf 443------------------------------------------------------------------ 444 Bra 445 [^\x{ff}] 446 Ket 447 End 448------------------------------------------------------------------ 449Capture group count = 0 450Options: utf 451Subject length lower bound = 1 452 453/\x{100}abc(xyz(?1))/IB,utf 454------------------------------------------------------------------ 455 Bra 456 \x{100}abc 457 CBra 1 458 xyz 459 Recurse 460 Ket 461 Ket 462 End 463------------------------------------------------------------------ 464Capture group count = 1 465Options: utf 466First code unit = \x{100} 467Last code unit = 'z' 468Subject length lower bound = 7 469 470/\777/I,utf 471Capture group count = 0 472Options: utf 473First code unit = \x{1ff} 474Subject length lower bound = 1 475 \x{1ff} 476 0: \x{1ff} 477 \777 478 0: \x{1ff} 479 480/\x{100}+\x{200}/IB,utf 481------------------------------------------------------------------ 482 Bra 483 \x{100}++ 484 \x{200} 485 Ket 486 End 487------------------------------------------------------------------ 488Capture group count = 0 489Options: utf 490First code unit = \x{100} 491Last code unit = \x{200} 492Subject length lower bound = 2 493 494/\x{100}+X/IB,utf 495------------------------------------------------------------------ 496 Bra 497 \x{100}++ 498 X 499 Ket 500 End 501------------------------------------------------------------------ 502Capture group count = 0 503Options: utf 504First code unit = \x{100} 505Last code unit = 'X' 506Subject length lower bound = 2 507 508/^[\QĀ\E-\QŐ\E/B,utf 509Failed: error 106 at offset 13: missing terminating ] for character class 510 511/X/utf 512 XX\x{d800}\=no_utf_check 513 0: X 514 XX\x{da00}\=no_utf_check 515 0: X 516 XX\x{dc00}\=no_utf_check 517 0: X 518 XX\x{de00}\=no_utf_check 519 0: X 520 XX\x{dfff}\=no_utf_check 521 0: X 522\= Expect UTF error 523 XX\x{d800} 524Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 525 XX\x{da00} 526Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 527 XX\x{dc00} 528Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 529 XX\x{de00} 530Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 531 XX\x{dfff} 532Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 533 XX\x{110000} 534** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 535 XX\x{d800}\x{1234} 536Failed: error -25: UTF-16 error: invalid low surrogate at offset 2 537\= Expect no match 538 XX\x{d800}\=offset=3 539No match 540 541/(?<=.)X/utf 542 XX\x{d800}\=offset=3 543Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 544 545/(*UTF16)\x{11234}/ 546 abcd\x{11234}pqr 547 0: \x{11234} 548 549/(*UTF)\x{11234}/I 550Capture group count = 0 551Compile options: <none> 552Overall options: utf 553First code unit = \x{d804} 554Last code unit = \x{de34} 555Subject length lower bound = 1 556 abcd\x{11234}pqr 557 0: \x{11234} 558 559/(*UTF-32)\x{11234}/ 560Failed: error 160 at offset 5: (*VERB) not recognized or malformed 561 abcd\x{11234}pqr 562 563/(*UTF-32)\x{112}/ 564Failed: error 160 at offset 5: (*VERB) not recognized or malformed 565 abcd\x{11234}pqr 566 567/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I 568Capture group count = 0 569Compile options: <none> 570Overall options: utf 571\R matches any Unicode newline 572Forced newline is CRLF 573First code unit = 'a' 574Last code unit = 'b' 575Subject length lower bound = 3 576 577/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I 578Failed: error 160 at offset 14: (*VERB) not recognized or malformed 579 580/\h/I,utf 581Capture group count = 0 582Options: utf 583Starting code units: \x09 \x20 \xa0 \xff 584Subject length lower bound = 1 585 ABC\x{09} 586 0: \x{09} 587 ABC\x{20} 588 0: 589 ABC\x{a0} 590 0: \x{a0} 591 ABC\x{1680} 592 0: \x{1680} 593 ABC\x{180e} 594 0: \x{180e} 595 ABC\x{2000} 596 0: \x{2000} 597 ABC\x{202f} 598 0: \x{202f} 599 ABC\x{205f} 600 0: \x{205f} 601 ABC\x{3000} 602 0: \x{3000} 603 604/\v/I,utf 605Capture group count = 0 606Options: utf 607Starting code units: \x0a \x0b \x0c \x0d \x85 \xff 608Subject length lower bound = 1 609 ABC\x{0a} 610 0: \x{0a} 611 ABC\x{0b} 612 0: \x{0b} 613 ABC\x{0c} 614 0: \x{0c} 615 ABC\x{0d} 616 0: \x{0d} 617 ABC\x{85} 618 0: \x{85} 619 ABC\x{2028} 620 0: \x{2028} 621 622/\h*A/I,utf 623Capture group count = 0 624Options: utf 625Starting code units: \x09 \x20 A \xa0 \xff 626Last code unit = 'A' 627Subject length lower bound = 1 628 CDBABC 629 0: A 630 \x{2000}ABC 631 0: \x{2000}A 632 633/\R*A/I,bsr=unicode,utf 634Capture group count = 0 635Options: utf 636\R matches any Unicode newline 637Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff 638Last code unit = 'A' 639Subject length lower bound = 1 640 CDBABC 641 0: A 642 \x{2028}A 643 0: \x{2028}A 644 645/\v+A/I,utf 646Capture group count = 0 647Options: utf 648Starting code units: \x0a \x0b \x0c \x0d \x85 \xff 649Last code unit = 'A' 650Subject length lower bound = 2 651 652/\s?xxx\s/I,utf 653Capture group count = 0 654Options: utf 655Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x 656Last code unit = 'x' 657Subject length lower bound = 4 658 659/\sxxx\s/I,utf,tables=2 660Capture group count = 0 661Options: utf 662Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 663Last code unit = 'x' 664Subject length lower bound = 5 665 AB\x{85}xxx\x{a0}XYZ 666 0: \x{85}xxx\x{a0} 667 AB\x{a0}xxx\x{85}XYZ 668 0: \x{a0}xxx\x{85} 669 670/\S \S/I,utf,tables=2 671Capture group count = 0 672Options: utf 673Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 674 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 675 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 676 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 677 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 678 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 679 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 680 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 681 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 682 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 683 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 684 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 685 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 686 \xff 687Last code unit = ' ' 688Subject length lower bound = 3 689 \x{a2} \x{84} 690 0: \x{a2} \x{84} 691 A Z 692 0: A Z 693 694/a+/utf 695 a\x{123}aa\=offset=1 696 0: aa 697 a\x{123}aa\=offset=2 698 0: aa 699 a\x{123}aa\=offset=3 700 0: a 701\= Expect no match 702 a\x{123}aa\=offset=4 703No match 704\= Expect bad offset error 705 a\x{123}aa\=offset=5 706Failed: error -33: bad offset value 707 a\x{123}aa\=offset=6 708Failed: error -33: bad offset value 709 710/\x{1234}+/Ii,utf 711Capture group count = 0 712Options: caseless utf 713First code unit = \x{1234} 714Subject length lower bound = 1 715 716/\x{1234}+?/Ii,utf 717Capture group count = 0 718Options: caseless utf 719First code unit = \x{1234} 720Subject length lower bound = 1 721 722/\x{1234}++/Ii,utf 723Capture group count = 0 724Options: caseless utf 725First code unit = \x{1234} 726Subject length lower bound = 1 727 728/\x{1234}{2}/Ii,utf 729Capture group count = 0 730Options: caseless utf 731First code unit = \x{1234} 732Last code unit = \x{1234} 733Subject length lower bound = 2 734 735/[^\x{c4}]/IB,utf 736------------------------------------------------------------------ 737 Bra 738 [^\x{c4}] 739 Ket 740 End 741------------------------------------------------------------------ 742Capture group count = 0 743Options: utf 744Subject length lower bound = 1 745 746/X+\x{200}/IB,utf 747------------------------------------------------------------------ 748 Bra 749 X++ 750 \x{200} 751 Ket 752 End 753------------------------------------------------------------------ 754Capture group count = 0 755Options: utf 756First code unit = 'X' 757Last code unit = \x{200} 758Subject length lower bound = 2 759 760/\R/I,utf 761Capture group count = 0 762Options: utf 763Starting code units: \x0a \x0b \x0c \x0d \x85 \xff 764Subject length lower bound = 1 765 766# Check bad offset 767 768/a/utf 769\= Expect bad UTF-16 offset, or no match in 32-bit 770 \x{10000}\=offset=1 771Error -36 (bad UTF-16 offset) 772 \x{10000}ab\=offset=1 773Error -36 (bad UTF-16 offset) 774\= Expect 16-bit match, 32-bit no match 775 \x{10000}ab\=offset=2 776 0: a 777\= Expect no match 778 \x{10000}ab\=offset=3 779No match 780\= Expect no match in 16-bit, bad offset in 32-bit 781 \x{10000}ab\=offset=4 782No match 783\= Expect bad offset 784 \x{10000}ab\=offset=5 785Failed: error -33: bad offset value 786 787/�/utf 788Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate 789 790/\w+\x{C4}/B,utf 791------------------------------------------------------------------ 792 Bra 793 \w++ 794 \x{c4} 795 Ket 796 End 797------------------------------------------------------------------ 798 a\x{C4}\x{C4} 799 0: a\x{c4} 800 801/\w+\x{C4}/B,utf,tables=2 802------------------------------------------------------------------ 803 Bra 804 \w+ 805 \x{c4} 806 Ket 807 End 808------------------------------------------------------------------ 809 a\x{C4}\x{C4} 810 0: a\x{c4}\x{c4} 811 812/\W+\x{C4}/B,utf 813------------------------------------------------------------------ 814 Bra 815 \W+ 816 \x{c4} 817 Ket 818 End 819------------------------------------------------------------------ 820 !\x{C4} 821 0: !\x{c4} 822 823/\W+\x{C4}/B,utf,tables=2 824------------------------------------------------------------------ 825 Bra 826 \W++ 827 \x{c4} 828 Ket 829 End 830------------------------------------------------------------------ 831 !\x{C4} 832 0: !\x{c4} 833 834/\W+\x{A1}/B,utf 835------------------------------------------------------------------ 836 Bra 837 \W+ 838 \x{a1} 839 Ket 840 End 841------------------------------------------------------------------ 842 !\x{A1} 843 0: !\x{a1} 844 845/\W+\x{A1}/B,utf,tables=2 846------------------------------------------------------------------ 847 Bra 848 \W+ 849 \x{a1} 850 Ket 851 End 852------------------------------------------------------------------ 853 !\x{A1} 854 0: !\x{a1} 855 856/X\s+\x{A0}/B,utf 857------------------------------------------------------------------ 858 Bra 859 X 860 \s++ 861 \x{a0} 862 Ket 863 End 864------------------------------------------------------------------ 865 X\x20\x{A0}\x{A0} 866 0: X \x{a0} 867 868/X\s+\x{A0}/B,utf,tables=2 869------------------------------------------------------------------ 870 Bra 871 X 872 \s+ 873 \x{a0} 874 Ket 875 End 876------------------------------------------------------------------ 877 X\x20\x{A0}\x{A0} 878 0: X \x{a0}\x{a0} 879 880/\S+\x{A0}/B,utf 881------------------------------------------------------------------ 882 Bra 883 \S+ 884 \x{a0} 885 Ket 886 End 887------------------------------------------------------------------ 888 X\x{A0}\x{A0} 889 0: X\x{a0}\x{a0} 890 891/\S+\x{A0}/B,utf,tables=2 892------------------------------------------------------------------ 893 Bra 894 \S++ 895 \x{a0} 896 Ket 897 End 898------------------------------------------------------------------ 899 X\x{A0}\x{A0} 900 0: X\x{a0} 901 902/\x{a0}+\s!/B,utf 903------------------------------------------------------------------ 904 Bra 905 \x{a0}++ 906 \s 907 ! 908 Ket 909 End 910------------------------------------------------------------------ 911 \x{a0}\x20! 912 0: \x{a0} ! 913 914/\x{a0}+\s!/B,utf,tables=2 915------------------------------------------------------------------ 916 Bra 917 \x{a0}+ 918 \s 919 ! 920 Ket 921 End 922------------------------------------------------------------------ 923 \x{a0}\x20! 924 0: \x{a0} ! 925 926/(*UTF)abc/never_utf 927Failed: error 174 at offset 6: using UTF is disabled by the application 928 929/abc/utf,never_utf 930Failed: error 174 at offset 0: using UTF is disabled by the application 931 932/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf 933------------------------------------------------------------------ 934 Bra 935 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} 936 Ket 937 End 938------------------------------------------------------------------ 939Capture group count = 0 940Options: caseless utf 941First code unit = 'A' (caseless) 942Last code unit = \x{1fb0} (caseless) 943Subject length lower bound = 5 944 945/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf 946------------------------------------------------------------------ 947 Bra 948 A\x{391}\x{10427}\x{ff3a}\x{1fb0} 949 Ket 950 End 951------------------------------------------------------------------ 952Capture group count = 0 953Options: utf 954First code unit = 'A' 955Last code unit = \x{1fb0} 956Subject length lower bound = 5 957 958/AB\x{1fb0}/IB,utf 959------------------------------------------------------------------ 960 Bra 961 AB\x{1fb0} 962 Ket 963 End 964------------------------------------------------------------------ 965Capture group count = 0 966Options: utf 967First code unit = 'A' 968Last code unit = \x{1fb0} 969Subject length lower bound = 3 970 971/AB\x{1fb0}/IBi,utf 972------------------------------------------------------------------ 973 Bra 974 /i AB\x{1fb0} 975 Ket 976 End 977------------------------------------------------------------------ 978Capture group count = 0 979Options: caseless utf 980First code unit = 'A' (caseless) 981Last code unit = \x{1fb0} (caseless) 982Subject length lower bound = 3 983 984/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf 985Capture group count = 0 986Options: caseless utf 987First code unit = \x{401} (caseless) 988Last code unit = \x{42f} (caseless) 989Subject length lower bound = 17 990 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 991 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 992 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 993 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 994 995/[ⱥ]/Bi,utf 996------------------------------------------------------------------ 997 Bra 998 /i \x{2c65} 999 Ket 1000 End 1001------------------------------------------------------------------ 1002 1003/[^ⱥ]/Bi,utf 1004------------------------------------------------------------------ 1005 Bra 1006 /i [^\x{2c65}] 1007 Ket 1008 End 1009------------------------------------------------------------------ 1010 1011/[[:blank:]]/B,ucp 1012------------------------------------------------------------------ 1013 Bra 1014 [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] 1015 Ket 1016 End 1017------------------------------------------------------------------ 1018 1019/\x{212a}+/Ii,utf 1020Capture group count = 0 1021Options: caseless utf 1022Starting code units: K k \xff 1023Subject length lower bound = 1 1024 KKkk\x{212a} 1025 0: KKkk\x{212a} 1026 1027/s+/Ii,utf 1028Capture group count = 0 1029Options: caseless utf 1030Starting code units: S s \xff 1031Subject length lower bound = 1 1032 SSss\x{17f} 1033 0: SSss\x{17f} 1034 1035# Non-UTF characters should give errors in both 16-bit and 32-bit modes. 1036 1037/\x{110000}/utf 1038Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large 1039 1040/\o{4200000}/utf 1041Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large 1042 1043/\x{100}*A/IB,utf 1044------------------------------------------------------------------ 1045 Bra 1046 \x{100}*+ 1047 A 1048 Ket 1049 End 1050------------------------------------------------------------------ 1051Capture group count = 0 1052Options: utf 1053Starting code units: A \xff 1054Last code unit = 'A' 1055Subject length lower bound = 1 1056 A 1057 0: A 1058 1059/\x{100}*\d(?R)/IB,utf 1060------------------------------------------------------------------ 1061 Bra 1062 \x{100}*+ 1063 \d 1064 Recurse 1065 Ket 1066 End 1067------------------------------------------------------------------ 1068Capture group count = 0 1069Options: utf 1070Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff 1071Subject length lower bound = 1 1072 1073/[Z\x{100}]/IB,utf 1074------------------------------------------------------------------ 1075 Bra 1076 [Z\x{100}] 1077 Ket 1078 End 1079------------------------------------------------------------------ 1080Capture group count = 0 1081Options: utf 1082Starting code units: Z \xff 1083Subject length lower bound = 1 1084 Z\x{100} 1085 0: Z 1086 \x{100} 1087 0: \x{100} 1088 \x{100}Z 1089 0: \x{100} 1090 1091/[z-\x{100}]/IB,utf 1092------------------------------------------------------------------ 1093 Bra 1094 [z-\xff\x{100}] 1095 Ket 1096 End 1097------------------------------------------------------------------ 1098Capture group count = 0 1099Options: utf 1100Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 1101 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 1102 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 1103 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 1104 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 1105 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 1106 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 1107 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 1108 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1109Subject length lower bound = 1 1110 1111/[z\Qa-d]Ā\E]/IB,utf 1112------------------------------------------------------------------ 1113 Bra 1114 [\-\]adz\x{100}] 1115 Ket 1116 End 1117------------------------------------------------------------------ 1118Capture group count = 0 1119Options: utf 1120Starting code units: - ] a d z \xff 1121Subject length lower bound = 1 1122 \x{100} 1123 0: \x{100} 1124 Ā 1125 0: \x{100} 1126 1127/[ab\x{100}]abc(xyz(?1))/IB,utf 1128------------------------------------------------------------------ 1129 Bra 1130 [ab\x{100}] 1131 abc 1132 CBra 1 1133 xyz 1134 Recurse 1135 Ket 1136 Ket 1137 End 1138------------------------------------------------------------------ 1139Capture group count = 1 1140Options: utf 1141Starting code units: a b \xff 1142Last code unit = 'z' 1143Subject length lower bound = 7 1144 1145/\x{100}*\s/IB,utf 1146------------------------------------------------------------------ 1147 Bra 1148 \x{100}*+ 1149 \s 1150 Ket 1151 End 1152------------------------------------------------------------------ 1153Capture group count = 0 1154Options: utf 1155Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff 1156Subject length lower bound = 1 1157 1158/\x{100}*\d/IB,utf 1159------------------------------------------------------------------ 1160 Bra 1161 \x{100}*+ 1162 \d 1163 Ket 1164 End 1165------------------------------------------------------------------ 1166Capture group count = 0 1167Options: utf 1168Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff 1169Subject length lower bound = 1 1170 1171/\x{100}*\w/IB,utf 1172------------------------------------------------------------------ 1173 Bra 1174 \x{100}*+ 1175 \w 1176 Ket 1177 End 1178------------------------------------------------------------------ 1179Capture group count = 0 1180Options: utf 1181Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 1182 Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 1183 \xff 1184Subject length lower bound = 1 1185 1186/\x{100}*\D/IB,utf 1187------------------------------------------------------------------ 1188 Bra 1189 \x{100}* 1190 \D 1191 Ket 1192 End 1193------------------------------------------------------------------ 1194Capture group count = 0 1195Options: utf 1196Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1197 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1198 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 1199 ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c 1200 d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 1201 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 1202 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 1203 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf 1204 \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe 1205 \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd 1206 \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc 1207 \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb 1208 \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa 1209 \xfb \xfc \xfd \xfe \xff 1210Subject length lower bound = 1 1211 1212/\x{100}*\S/IB,utf 1213------------------------------------------------------------------ 1214 Bra 1215 \x{100}* 1216 \S 1217 Ket 1218 End 1219------------------------------------------------------------------ 1220Capture group count = 0 1221Options: utf 1222Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 1223 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 1224 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 1225 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 1226 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 1227 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 1228 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 1229 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 1230 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 1231 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf 1232 \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde 1233 \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed 1234 \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc 1235 \xfd \xfe \xff 1236Subject length lower bound = 1 1237 1238/\x{100}*\W/IB,utf 1239------------------------------------------------------------------ 1240 Bra 1241 \x{100}* 1242 \W 1243 Ket 1244 End 1245------------------------------------------------------------------ 1246Capture group count = 0 1247Options: utf 1248Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1249 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1250 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 1251 ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 1252 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 1253 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 1254 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 1255 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 1256 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 1257 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 1258 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 1259 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1260Subject length lower bound = 1 1261 1262/[\x{105}-\x{109}]/IBi,utf 1263------------------------------------------------------------------ 1264 Bra 1265 [\x{104}-\x{109}] 1266 Ket 1267 End 1268------------------------------------------------------------------ 1269Capture group count = 0 1270Options: caseless utf 1271Starting code units: \xff 1272Subject length lower bound = 1 1273 \x{104} 1274 0: \x{104} 1275 \x{105} 1276 0: \x{105} 1277 \x{109} 1278 0: \x{109} 1279\= Expect no match 1280 \x{100} 1281No match 1282 \x{10a} 1283No match 1284 1285/[z-\x{100}]/IBi,utf 1286------------------------------------------------------------------ 1287 Bra 1288 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] 1289 Ket 1290 End 1291------------------------------------------------------------------ 1292Capture group count = 0 1293Options: caseless utf 1294Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 1295 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 1296 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 1297 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 1298 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 1299 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 1300 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 1301 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 1302 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 1303 \xff 1304Subject length lower bound = 1 1305 Z 1306 0: Z 1307 z 1308 0: z 1309 \x{39c} 1310 0: \x{39c} 1311 \x{178} 1312 0: \x{178} 1313 | 1314 0: | 1315 \x{80} 1316 0: \x{80} 1317 \x{ff} 1318 0: \x{ff} 1319 \x{100} 1320 0: \x{100} 1321 \x{101} 1322 0: \x{101} 1323\= Expect no match 1324 \x{102} 1325No match 1326 Y 1327No match 1328 y 1329No match 1330 1331/[z-\x{100}]/IBi,utf 1332------------------------------------------------------------------ 1333 Bra 1334 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] 1335 Ket 1336 End 1337------------------------------------------------------------------ 1338Capture group count = 0 1339Options: caseless utf 1340Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 1341 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 1342 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 1343 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 1344 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 1345 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 1346 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 1347 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 1348 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 1349 \xff 1350Subject length lower bound = 1 1351 1352/\x{3a3}B/IBi,utf 1353------------------------------------------------------------------ 1354 Bra 1355 clist 03a3 03c2 03c3 1356 /i B 1357 Ket 1358 End 1359------------------------------------------------------------------ 1360Capture group count = 0 1361Options: caseless utf 1362Starting code units: \xff 1363Last code unit = 'B' (caseless) 1364Subject length lower bound = 2 1365 1366/./utf 1367 \x{110000} 1368** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 1369 1370/(*UTF)ab������z/B 1371------------------------------------------------------------------ 1372 Bra 1373 ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z 1374 Ket 1375 End 1376------------------------------------------------------------------ 1377 1378/ab������z/utf 1379** Failed: character value greater than 0x10ffff cannot be converted to UTF 1380 1381/[\W\p{Any}]/B 1382------------------------------------------------------------------ 1383 Bra 1384 [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffff}] 1385 Ket 1386 End 1387------------------------------------------------------------------ 1388 abc 1389 0: a 1390 123 1391 0: 1 1392 1393/[\W\pL]/B 1394------------------------------------------------------------------ 1395 Bra 1396 [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffff}] 1397 Ket 1398 End 1399------------------------------------------------------------------ 1400 abc 1401 0: a 1402 \x{100} 1403 0: \x{100} 1404 \x{308} 1405 0: \x{308} 1406\= Expect no match 1407 123 1408No match 1409 1410/[\s[:^ascii:]]/B,ucp 1411------------------------------------------------------------------ 1412 Bra 1413 [\x80-\xff\p{Xsp}\x{100}-\x{ffff}] 1414 Ket 1415 End 1416------------------------------------------------------------------ 1417 1418/\pP/ucp 1419 \x{7fffffff} 1420** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled. 1421** Truncation will probably give the wrong result. 1422No match 1423 1424# A special extra option allows excaped surrogate code points in 32-bit mode, 1425# but subjects containing them must not be UTF-checked. These patterns give 1426# errors in 16-bit mode. 1427 1428/\x{d800}/I,utf,allow_surrogate_escapes 1429Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode 1430 \x{d800}\=no_utf_check 1431 1432/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes 1433Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode 1434 \x{dfff}\x{df01}\=no_utf_check 1435 1436# This has different starting code units in 8-bit mode. 1437 1438/^[^ab]/IB,utf 1439------------------------------------------------------------------ 1440 Bra 1441 ^ 1442 [\x00-`c-\xff] (neg) 1443 Ket 1444 End 1445------------------------------------------------------------------ 1446Capture group count = 0 1447Compile options: utf 1448Overall options: anchored utf 1449Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1450 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1451 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 1452 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 1453 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 1454 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e 1455 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d 1456 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac 1457 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb 1458 \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca 1459 \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 1460 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 1461 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 1462 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1463Subject length lower bound = 1 1464 c 1465 0: c 1466 \x{ff} 1467 0: \x{ff} 1468 \x{100} 1469 0: \x{100} 1470\= Expect no match 1471 aaa 1472No match 1473 1474# Offsets are different in 8-bit mode. 1475 1476/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout 1477 123abcáyzabcdef789abcሴqr 1478 1(2) Old 6 6 "" New 6 8 "<>" 1479 2(2) Old 12 12 "" New 14 16 "<>" 1480 3(2) Old 12 15 "def" New 16 21 "<def>" 1481 4(2) Old 21 21 "" New 27 29 "<>" 1482 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr 1483 1484# A few script run tests in non-UTF mode (but they need Unicode support) 1485 1486/^(*script_run:.{4})/ 1487 \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han 1488 0: \x{3041}\x{30a1}\x{3007}\x{3007} 1489 \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han 1490 0: \x{30a1}\x{3041}\x{3007}\x{3007} 1491 \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul 1492 0: \x{1100}\x{2e80}\x{2e80}\x{1101} 1493 1494/^(*sr:.*)/utf,allow_surrogate_escapes 1495Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode 1496 \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana 1497 \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check 1498 1499/(?(n/utf 1500Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) 1501 1502/(?(á/utf 1503Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) 1504 1505# Invalid UTF-16/32 tests. 1506 1507/.../g,match_invalid_utf 1508 abcd\x{df00}wxzy\x{df00}pqrs 1509 0: abc 1510 0: wxz 1511 0: pqr 1512 abcd\x{80}wxzy\x{df00}pqrs 1513 0: abc 1514 0: d\x{80}w 1515 0: xzy 1516 0: pqr 1517 1518/abc/match_invalid_utf 1519 ab\x{df00}ab\=ph 1520Partial match: ab 1521\= Expect no match 1522 ab\x{df00}cdef\=ph 1523No match 1524 1525/ab$/match_invalid_utf 1526 ab\x{df00}cdeab 1527 0: ab 1528\= Expect no match 1529 ab\x{df00}cde 1530No match 1531 1532/.../g,match_invalid_utf 1533 abcd\x{80}wxzy\x{df00}pqrs 1534 0: abc 1535 0: d\x{80}w 1536 0: xzy 1537 0: pqr 1538 1539/(?<=x)../g,match_invalid_utf 1540 abcd\x{80}wxzy\x{df00}pqrs 1541 0: zy 1542 abcd\x{80}wxzy\x{df00}xpqrs 1543 0: zy 1544 0: pq 1545 1546/X$/match_invalid_utf 1547\= Expect no match 1548 X\x{df00} 1549No match 1550 1551/(?<=..)X/match_invalid_utf,aftertext 1552 AB\x{df00}AQXYZ 1553 0: X 1554 0+ YZ 1555 AB\x{df00}AQXYZ\=offset=5 1556 0: X 1557 0+ YZ 1558 AB\x{df00}\x{df00}AXYZXC\=offset=5 1559 0: X 1560 0+ C 1561\= Expect no match 1562 AB\x{df00}XYZ 1563No match 1564 AB\x{df00}XYZ\=offset=3 1565No match 1566 AB\x{df00}AXYZ 1567No match 1568 AB\x{df00}AXYZ\=offset=4 1569No match 1570 AB\x{df00}\x{df00}AXYZ\=offset=5 1571No match 1572 1573/.../match_invalid_utf 1574\= Expect no match 1575 A\x{d800}B 1576No match 1577 A\x{110000}B 1578** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 1579 1580/aa/utf,ucp,match_invalid_utf,global 1581 aa\x{d800}aa 1582 0: aa 1583 0: aa 1584 1585/aa/utf,ucp,match_invalid_utf,global 1586 \x{d800}aa 1587 0: aa 1588 1589# ---------------------------------------------------- 1590 1591/(*UTF)(?=\x{123})/I 1592Capture group count = 0 1593May match empty string 1594Compile options: <none> 1595Overall options: utf 1596First code unit = \x{123} 1597Subject length lower bound = 1 1598 1599/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf 1600Capture group count = 0 1601Options: utf 1602First code unit = \xc1 (caseless) 1603Last code unit = \x{145} (caseless) 1604Subject length lower bound = 3 1605 1606/[\xff\x{ffff}]/I,utf 1607Capture group count = 0 1608Options: utf 1609Starting code units: \xff 1610Subject length lower bound = 1 1611 1612/[\xff\x{ff}]/I,utf 1613Capture group count = 0 1614Options: utf 1615Starting code units: \xff 1616Subject length lower bound = 1 1617 1618/[\xff\x{ff}]/I 1619Capture group count = 0 1620Starting code units: \xff 1621Subject length lower bound = 1 1622 1623/[Ss]/I 1624Capture group count = 0 1625First code unit = 'S' (caseless) 1626Subject length lower bound = 1 1627 1628/[Ss]/I,utf 1629Capture group count = 0 1630Options: utf 1631Starting code units: S s 1632Subject length lower bound = 1 1633 1634/(?:\x{ff}|\x{3000})/I,utf 1635Capture group count = 0 1636Options: utf 1637Starting code units: \xff 1638Subject length lower bound = 1 1639 1640# ---------------------------------------------------- 1641# UCP and casing tests 1642 1643/\x{120}/i,I 1644Capture group count = 0 1645Options: caseless 1646First code unit = \x{120} 1647Subject length lower bound = 1 1648 1649/\x{c1}/i,I,ucp 1650Capture group count = 0 1651Options: caseless ucp 1652First code unit = \xc1 (caseless) 1653Subject length lower bound = 1 1654 1655/[\x{120}\x{121}]/iB,ucp 1656------------------------------------------------------------------ 1657 Bra 1658 /i \x{120} 1659 Ket 1660 End 1661------------------------------------------------------------------ 1662 1663/[ab\x{120}]+/iB,ucp 1664------------------------------------------------------------------ 1665 Bra 1666 [ABab\x{120}-\x{121}]++ 1667 Ket 1668 End 1669------------------------------------------------------------------ 1670 aABb\x{121}\x{120} 1671 0: aABb\x{121}\x{120} 1672 1673/\x{c1}/i,no_start_optimize 1674\= Expect no match 1675 \x{e1} 1676No match 1677 1678/\x{120}\x{c1}/i,ucp,no_start_optimize 1679 \x{121}\x{e1} 1680 0: \x{121}\xe1 1681 1682/\x{120}\x{c1}/i,ucp 1683 \x{121}\x{e1} 1684 0: \x{121}\xe1 1685 1686/[^\x{120}]/i,no_start_optimize 1687 \x{121} 1688 0: \x{121} 1689 1690/[^\x{120}]/i,ucp,no_start_optimize 1691\= Expect no match 1692 \x{121} 1693No match 1694 1695/[^\x{120}]/i 1696 \x{121} 1697 0: \x{121} 1698 1699/[^\x{120}]/i,ucp 1700\= Expect no match 1701 \x{121} 1702No match 1703 1704/\x{120}{2}/i,ucp 1705 \x{121}\x{121} 1706 0: \x{121}\x{121} 1707 1708/[^\x{120}]{2}/i,ucp 1709\= Expect no match 1710 \x{121}\x{121} 1711No match 1712 1713/\x{c1}+\x{e1}/iB,ucp 1714------------------------------------------------------------------ 1715 Bra 1716 /i \x{c1}+ 1717 /i \x{e1} 1718 Ket 1719 End 1720------------------------------------------------------------------ 1721 \x{c1}\x{c1}\x{c1} 1722 0: \xc1\xc1\xc1 1723 1724/\x{c1}+\x{e1}/iIB,ucp 1725------------------------------------------------------------------ 1726 Bra 1727 /i \x{c1}+ 1728 /i \x{e1} 1729 Ket 1730 End 1731------------------------------------------------------------------ 1732Capture group count = 0 1733Options: caseless ucp 1734First code unit = \xc1 (caseless) 1735Last code unit = \xe1 (caseless) 1736Subject length lower bound = 2 1737 \x{c1}\x{c1}\x{c1} 1738 0: \xc1\xc1\xc1 1739 \x{e1}\x{e1}\x{e1} 1740 0: \xe1\xe1\xe1 1741 1742/a|\x{c1}/iI,ucp 1743Capture group count = 0 1744Options: caseless ucp 1745Starting code units: A a \xc1 \xe1 1746Subject length lower bound = 1 1747 \x{e1}xxx 1748 0: \xe1 1749 1750/\x{c1}|\x{e1}/iI,ucp 1751Capture group count = 0 1752Options: caseless ucp 1753First code unit = \xc1 (caseless) 1754Subject length lower bound = 1 1755 1756/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended 1757 X\x{e1}Y 1758 1: >\xc1< 1759 1760/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended 1761 X\x{121}Y 1762 1: >\x{120}< 1763 1764/s/i,ucp 1765 \x{17f} 1766 0: \x{17f} 1767 1768/s/i,utf 1769 \x{17f} 1770 0: \x{17f} 1771 1772/[^s]/i,ucp 1773\= Expect no match 1774 \x{17f} 1775No match 1776 1777/[^s]/i,utf 1778\= Expect no match 1779 \x{17f} 1780No match 1781 1782# ---------------------------------------------------- 1783 1784# End of testinput12 1785