1# This set of tests is for UTF-16 and UTF-32 support, including Unicode 2# properties. It is relevant only to the 16-bit and 32-bit libraries. The 3# output is different for each library, so there are separate output files. 4 5/���xxx/IB,utf,no_utf_check 6** Failed: invalid UTF-8 string cannot be converted to 16-bit string 7 8/abc/utf 9 �] 10** Failed: invalid UTF-8 string cannot be used as input in UTF mode 11 12# Check maximum character size 13 14/\x{ffff}/IB,utf 15------------------------------------------------------------------ 16 Bra 17 \x{ffff} 18 Ket 19 End 20------------------------------------------------------------------ 21Capturing subpattern count = 0 22Options: utf 23First code unit = \x{ffff} 24Subject length lower bound = 1 25 26/\x{10000}/IB,utf 27------------------------------------------------------------------ 28 Bra 29 \x{10000} 30 Ket 31 End 32------------------------------------------------------------------ 33Capturing subpattern count = 0 34Options: utf 35First code unit = \x{d800} 36Last code unit = \x{dc00} 37Subject length lower bound = 1 38 39/\x{100}/IB,utf 40------------------------------------------------------------------ 41 Bra 42 \x{100} 43 Ket 44 End 45------------------------------------------------------------------ 46Capturing subpattern count = 0 47Options: utf 48First code unit = \x{100} 49Subject length lower bound = 1 50 51/\x{1000}/IB,utf 52------------------------------------------------------------------ 53 Bra 54 \x{1000} 55 Ket 56 End 57------------------------------------------------------------------ 58Capturing subpattern count = 0 59Options: utf 60First code unit = \x{1000} 61Subject length lower bound = 1 62 63/\x{10000}/IB,utf 64------------------------------------------------------------------ 65 Bra 66 \x{10000} 67 Ket 68 End 69------------------------------------------------------------------ 70Capturing subpattern count = 0 71Options: utf 72First code unit = \x{d800} 73Last code unit = \x{dc00} 74Subject length lower bound = 1 75 76/\x{100000}/IB,utf 77------------------------------------------------------------------ 78 Bra 79 \x{100000} 80 Ket 81 End 82------------------------------------------------------------------ 83Capturing subpattern count = 0 84Options: utf 85First code unit = \x{dbc0} 86Last code unit = \x{dc00} 87Subject length lower bound = 1 88 89/\x{10ffff}/IB,utf 90------------------------------------------------------------------ 91 Bra 92 \x{10ffff} 93 Ket 94 End 95------------------------------------------------------------------ 96Capturing subpattern count = 0 97Options: utf 98First code unit = \x{dbff} 99Last code unit = \x{dfff} 100Subject length lower bound = 1 101 102/[\x{ff}]/IB,utf 103------------------------------------------------------------------ 104 Bra 105 \x{ff} 106 Ket 107 End 108------------------------------------------------------------------ 109Capturing subpattern count = 0 110Options: utf 111First code unit = \xff 112Subject length lower bound = 1 113 114/[\x{100}]/IB,utf 115------------------------------------------------------------------ 116 Bra 117 \x{100} 118 Ket 119 End 120------------------------------------------------------------------ 121Capturing subpattern count = 0 122Options: utf 123First code unit = \x{100} 124Subject length lower bound = 1 125 126/\x80/IB,utf 127------------------------------------------------------------------ 128 Bra 129 \x{80} 130 Ket 131 End 132------------------------------------------------------------------ 133Capturing subpattern count = 0 134Options: utf 135First code unit = \x80 136Subject length lower bound = 1 137 138/\xff/IB,utf 139------------------------------------------------------------------ 140 Bra 141 \x{ff} 142 Ket 143 End 144------------------------------------------------------------------ 145Capturing subpattern count = 0 146Options: utf 147First code unit = \xff 148Subject length lower bound = 1 149 150/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf 151------------------------------------------------------------------ 152 Bra 153 \x{d55c}\x{ad6d}\x{c5b4} 154 Ket 155 End 156------------------------------------------------------------------ 157Capturing subpattern count = 0 158Options: utf 159First code unit = \x{d55c} 160Last code unit = \x{c5b4} 161Subject length lower bound = 3 162 \x{D55c}\x{ad6d}\x{C5B4} 163 0: \x{d55c}\x{ad6d}\x{c5b4} 164 165/\x{65e5}\x{672c}\x{8a9e}/IB,utf 166------------------------------------------------------------------ 167 Bra 168 \x{65e5}\x{672c}\x{8a9e} 169 Ket 170 End 171------------------------------------------------------------------ 172Capturing subpattern count = 0 173Options: utf 174First code unit = \x{65e5} 175Last code unit = \x{8a9e} 176Subject length lower bound = 3 177 \x{65e5}\x{672c}\x{8a9e} 178 0: \x{65e5}\x{672c}\x{8a9e} 179 180/\x{80}/IB,utf 181------------------------------------------------------------------ 182 Bra 183 \x{80} 184 Ket 185 End 186------------------------------------------------------------------ 187Capturing subpattern count = 0 188Options: utf 189First code unit = \x80 190Subject length lower bound = 1 191 192/\x{084}/IB,utf 193------------------------------------------------------------------ 194 Bra 195 \x{84} 196 Ket 197 End 198------------------------------------------------------------------ 199Capturing subpattern count = 0 200Options: utf 201First code unit = \x84 202Subject length lower bound = 1 203 204/\x{104}/IB,utf 205------------------------------------------------------------------ 206 Bra 207 \x{104} 208 Ket 209 End 210------------------------------------------------------------------ 211Capturing subpattern count = 0 212Options: utf 213First code unit = \x{104} 214Subject length lower bound = 1 215 216/\x{861}/IB,utf 217------------------------------------------------------------------ 218 Bra 219 \x{861} 220 Ket 221 End 222------------------------------------------------------------------ 223Capturing subpattern count = 0 224Options: utf 225First code unit = \x{861} 226Subject length lower bound = 1 227 228/\x{212ab}/IB,utf 229------------------------------------------------------------------ 230 Bra 231 \x{212ab} 232 Ket 233 End 234------------------------------------------------------------------ 235Capturing subpattern count = 0 236Options: utf 237First code unit = \x{d844} 238Last code unit = \x{deab} 239Subject length lower bound = 1 240 241/[^ab\xC0-\xF0]/IB,utf 242------------------------------------------------------------------ 243 Bra 244 [\x00-`c-\xbf\xf1-\xff] (neg) 245 Ket 246 End 247------------------------------------------------------------------ 248Capturing subpattern count = 0 249Options: utf 250Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 251 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 252 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 253 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 254 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 255 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e 256 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d 257 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac 258 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb 259 \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb 260 \xfc \xfd \xfe \xff 261Subject length lower bound = 1 262 \x{f1} 263 0: \x{f1} 264 \x{bf} 265 0: \x{bf} 266 \x{100} 267 0: \x{100} 268 \x{1000} 269 0: \x{1000} 270\= Expect no match 271 \x{c0} 272No match 273 \x{f0} 274No match 275 276/Ā{3,4}/IB,utf 277------------------------------------------------------------------ 278 Bra 279 \x{100}{3} 280 \x{100}?+ 281 Ket 282 End 283------------------------------------------------------------------ 284Capturing subpattern count = 0 285Options: utf 286First code unit = \x{100} 287Last code unit = \x{100} 288Subject length lower bound = 3 289 \x{100}\x{100}\x{100}\x{100\x{100} 290 0: \x{100}\x{100}\x{100} 291 292/(\x{100}+|x)/IB,utf 293------------------------------------------------------------------ 294 Bra 295 CBra 1 296 \x{100}++ 297 Alt 298 x 299 Ket 300 Ket 301 End 302------------------------------------------------------------------ 303Capturing subpattern count = 1 304Options: utf 305Starting code units: x \xff 306Subject length lower bound = 1 307 308/(\x{100}*a|x)/IB,utf 309------------------------------------------------------------------ 310 Bra 311 CBra 1 312 \x{100}*+ 313 a 314 Alt 315 x 316 Ket 317 Ket 318 End 319------------------------------------------------------------------ 320Capturing subpattern count = 1 321Options: utf 322Starting code units: a x \xff 323Subject length lower bound = 1 324 325/(\x{100}{0,2}a|x)/IB,utf 326------------------------------------------------------------------ 327 Bra 328 CBra 1 329 \x{100}{0,2}+ 330 a 331 Alt 332 x 333 Ket 334 Ket 335 End 336------------------------------------------------------------------ 337Capturing subpattern count = 1 338Options: utf 339Starting code units: a x \xff 340Subject length lower bound = 1 341 342/(\x{100}{1,2}a|x)/IB,utf 343------------------------------------------------------------------ 344 Bra 345 CBra 1 346 \x{100} 347 \x{100}{0,1}+ 348 a 349 Alt 350 x 351 Ket 352 Ket 353 End 354------------------------------------------------------------------ 355Capturing subpattern count = 1 356Options: utf 357Starting code units: x \xff 358Subject length lower bound = 1 359 360/\x{100}/IB,utf 361------------------------------------------------------------------ 362 Bra 363 \x{100} 364 Ket 365 End 366------------------------------------------------------------------ 367Capturing subpattern count = 0 368Options: utf 369First code unit = \x{100} 370Subject length lower bound = 1 371 372/a\x{100}\x{101}*/IB,utf 373------------------------------------------------------------------ 374 Bra 375 a\x{100} 376 \x{101}*+ 377 Ket 378 End 379------------------------------------------------------------------ 380Capturing subpattern count = 0 381Options: utf 382First code unit = 'a' 383Last code unit = \x{100} 384Subject length lower bound = 2 385 386/a\x{100}\x{101}+/IB,utf 387------------------------------------------------------------------ 388 Bra 389 a\x{100} 390 \x{101}++ 391 Ket 392 End 393------------------------------------------------------------------ 394Capturing subpattern count = 0 395Options: utf 396First code unit = 'a' 397Last code unit = \x{101} 398Subject length lower bound = 3 399 400/[^\x{c4}]/IB 401------------------------------------------------------------------ 402 Bra 403 [^\x{c4}] 404 Ket 405 End 406------------------------------------------------------------------ 407Capturing subpattern count = 0 408Subject length lower bound = 1 409 410/[\x{100}]/IB,utf 411------------------------------------------------------------------ 412 Bra 413 \x{100} 414 Ket 415 End 416------------------------------------------------------------------ 417Capturing subpattern count = 0 418Options: utf 419First code unit = \x{100} 420Subject length lower bound = 1 421 \x{100} 422 0: \x{100} 423 Z\x{100} 424 0: \x{100} 425 \x{100}Z 426 0: \x{100} 427 428/[\xff]/IB,utf 429------------------------------------------------------------------ 430 Bra 431 \x{ff} 432 Ket 433 End 434------------------------------------------------------------------ 435Capturing subpattern count = 0 436Options: utf 437First code unit = \xff 438Subject length lower bound = 1 439 >\x{ff}< 440 0: \x{ff} 441 442/[^\xff]/IB,utf 443------------------------------------------------------------------ 444 Bra 445 [^\x{ff}] 446 Ket 447 End 448------------------------------------------------------------------ 449Capturing subpattern count = 0 450Options: utf 451Subject length lower bound = 1 452 453/\x{100}abc(xyz(?1))/IB,utf 454------------------------------------------------------------------ 455 Bra 456 \x{100}abc 457 CBra 1 458 xyz 459 Recurse 460 Ket 461 Ket 462 End 463------------------------------------------------------------------ 464Capturing subpattern count = 1 465Options: utf 466First code unit = \x{100} 467Last code unit = 'z' 468Subject length lower bound = 7 469 470/\777/I,utf 471Capturing subpattern count = 0 472Options: utf 473First code unit = \x{1ff} 474Subject length lower bound = 1 475 \x{1ff} 476 0: \x{1ff} 477 \777 478 0: \x{1ff} 479 480/\x{100}+\x{200}/IB,utf 481------------------------------------------------------------------ 482 Bra 483 \x{100}++ 484 \x{200} 485 Ket 486 End 487------------------------------------------------------------------ 488Capturing subpattern count = 0 489Options: utf 490First code unit = \x{100} 491Last code unit = \x{200} 492Subject length lower bound = 2 493 494/\x{100}+X/IB,utf 495------------------------------------------------------------------ 496 Bra 497 \x{100}++ 498 X 499 Ket 500 End 501------------------------------------------------------------------ 502Capturing subpattern count = 0 503Options: utf 504First code unit = \x{100} 505Last code unit = 'X' 506Subject length lower bound = 2 507 508/^[\QĀ\E-\QŐ\E/B,utf 509Failed: error 106 at offset 13: missing terminating ] for character class 510 511/X/utf 512 XX\x{d800}\=no_utf_check 513 0: X 514 XX\x{da00}\=no_utf_check 515 0: X 516 XX\x{dc00}\=no_utf_check 517 0: X 518 XX\x{de00}\=no_utf_check 519 0: X 520 XX\x{dfff}\=no_utf_check 521 0: X 522\= Expect UTF error 523 XX\x{d800} 524Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 525 XX\x{da00} 526Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 527 XX\x{dc00} 528Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 529 XX\x{de00} 530Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 531 XX\x{dfff} 532Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 533 XX\x{110000} 534** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 535 XX\x{d800}\x{1234} 536Failed: error -25: UTF-16 error: invalid low surrogate at offset 3 537\= Expect no match 538 XX\x{d800}\=offset=3 539No match 540 541/(?<=.)X/utf 542 XX\x{d800}\=offset=3 543Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 544 545/(*UTF16)\x{11234}/ 546 abcd\x{11234}pqr 547 0: \x{11234} 548 549/(*UTF)\x{11234}/I 550Capturing subpattern count = 0 551Compile options: <none> 552Overall options: utf 553First code unit = \x{d804} 554Last code unit = \x{de34} 555Subject length lower bound = 1 556 abcd\x{11234}pqr 557 0: \x{11234} 558 559/(*UTF-32)\x{11234}/ 560Failed: error 160 at offset 5: (*VERB) not recognized or malformed 561 abcd\x{11234}pqr 562 563/(*UTF-32)\x{112}/ 564Failed: error 160 at offset 5: (*VERB) not recognized or malformed 565 abcd\x{11234}pqr 566 567/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I 568Capturing subpattern count = 0 569Compile options: <none> 570Overall options: utf 571\R matches any Unicode newline 572Forced newline is CRLF 573First code unit = 'a' 574Last code unit = 'b' 575Subject length lower bound = 3 576 577/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I 578Failed: error 160 at offset 12: (*VERB) not recognized or malformed 579 580/\h/I,utf 581Capturing subpattern count = 0 582Options: utf 583Starting code units: \x09 \x20 \xa0 \xff 584Subject length lower bound = 1 585 ABC\x{09} 586 0: \x{09} 587 ABC\x{20} 588 0: 589 ABC\x{a0} 590 0: \x{a0} 591 ABC\x{1680} 592 0: \x{1680} 593 ABC\x{180e} 594 0: \x{180e} 595 ABC\x{2000} 596 0: \x{2000} 597 ABC\x{202f} 598 0: \x{202f} 599 ABC\x{205f} 600 0: \x{205f} 601 ABC\x{3000} 602 0: \x{3000} 603 604/\v/I,utf 605Capturing subpattern count = 0 606Options: utf 607Starting code units: \x0a \x0b \x0c \x0d \x85 \xff 608Subject length lower bound = 1 609 ABC\x{0a} 610 0: \x{0a} 611 ABC\x{0b} 612 0: \x{0b} 613 ABC\x{0c} 614 0: \x{0c} 615 ABC\x{0d} 616 0: \x{0d} 617 ABC\x{85} 618 0: \x{85} 619 ABC\x{2028} 620 0: \x{2028} 621 622/\h*A/I,utf 623Capturing subpattern count = 0 624Options: utf 625Starting code units: \x09 \x20 A \xa0 \xff 626Last code unit = 'A' 627Subject length lower bound = 1 628 CDBABC 629 0: A 630 \x{2000}ABC 631 0: \x{2000}A 632 633/\R*A/I,bsr=unicode,utf 634Capturing subpattern count = 0 635Options: utf 636\R matches any Unicode newline 637Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff 638Last code unit = 'A' 639Subject length lower bound = 1 640 CDBABC 641 0: A 642 \x{2028}A 643 0: \x{2028}A 644 645/\v+A/I,utf 646Capturing subpattern count = 0 647Options: utf 648Starting code units: \x0a \x0b \x0c \x0d \x85 \xff 649Last code unit = 'A' 650Subject length lower bound = 2 651 652/\s?xxx\s/I,utf 653Capturing subpattern count = 0 654Options: utf 655Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x 656Last code unit = 'x' 657Subject length lower bound = 4 658 659/\sxxx\s/I,utf,tables=2 660Capturing subpattern count = 0 661Options: utf 662Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 663Last code unit = 'x' 664Subject length lower bound = 5 665 AB\x{85}xxx\x{a0}XYZ 666 0: \x{85}xxx\x{a0} 667 AB\x{a0}xxx\x{85}XYZ 668 0: \x{a0}xxx\x{85} 669 670/\S \S/I,utf,tables=2 671Capturing subpattern count = 0 672Options: utf 673Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 674 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 675 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 676 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 677 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 678 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 679 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 680 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 681 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 682 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 683 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 684 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 685 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 686 \xff 687Last code unit = ' ' 688Subject length lower bound = 3 689 \x{a2} \x{84} 690 0: \x{a2} \x{84} 691 A Z 692 0: A Z 693 694/a+/utf 695 a\x{123}aa\=offset=1 696 0: aa 697 a\x{123}aa\=offset=2 698 0: aa 699 a\x{123}aa\=offset=3 700 0: a 701\= Expect no match 702 a\x{123}aa\=offset=4 703No match 704\= Expect bad offset error 705 a\x{123}aa\=offset=5 706Failed: error -33: bad offset value 707 a\x{123}aa\=offset=6 708Failed: error -33: bad offset value 709 710/\x{1234}+/Ii,utf 711Capturing subpattern count = 0 712Options: caseless utf 713First code unit = \x{1234} 714Subject length lower bound = 1 715 716/\x{1234}+?/Ii,utf 717Capturing subpattern count = 0 718Options: caseless utf 719First code unit = \x{1234} 720Subject length lower bound = 1 721 722/\x{1234}++/Ii,utf 723Capturing subpattern count = 0 724Options: caseless utf 725First code unit = \x{1234} 726Subject length lower bound = 1 727 728/\x{1234}{2}/Ii,utf 729Capturing subpattern count = 0 730Options: caseless utf 731First code unit = \x{1234} 732Last code unit = \x{1234} 733Subject length lower bound = 2 734 735/[^\x{c4}]/IB,utf 736------------------------------------------------------------------ 737 Bra 738 [^\x{c4}] 739 Ket 740 End 741------------------------------------------------------------------ 742Capturing subpattern count = 0 743Options: utf 744Subject length lower bound = 1 745 746/X+\x{200}/IB,utf 747------------------------------------------------------------------ 748 Bra 749 X++ 750 \x{200} 751 Ket 752 End 753------------------------------------------------------------------ 754Capturing subpattern count = 0 755Options: utf 756First code unit = 'X' 757Last code unit = \x{200} 758Subject length lower bound = 2 759 760/\R/I,utf 761Capturing subpattern count = 0 762Options: utf 763Starting code units: \x0a \x0b \x0c \x0d \x85 \xff 764Subject length lower bound = 1 765 766# Check bad offset 767 768/a/utf 769\= Expect bad UTF-16 offset, or no match in 32-bit 770 \x{10000}\=offset=1 771Error -36 (bad UTF-16 offset) 772 \x{10000}ab\=offset=1 773Error -36 (bad UTF-16 offset) 774\= Expect 16-bit match, 32-bit no match 775 \x{10000}ab\=offset=2 776 0: a 777\= Expect no match 778 \x{10000}ab\=offset=3 779No match 780\= Expect no match in 16-bit, bad offset in 32-bit 781 \x{10000}ab\=offset=4 782No match 783\= Expect bad offset 784 \x{10000}ab\=offset=5 785Failed: error -33: bad offset value 786 787/�/utf 788Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate 789 790/\w+\x{C4}/B,utf 791------------------------------------------------------------------ 792 Bra 793 \w++ 794 \x{c4} 795 Ket 796 End 797------------------------------------------------------------------ 798 a\x{C4}\x{C4} 799 0: a\x{c4} 800 801/\w+\x{C4}/B,utf,tables=2 802------------------------------------------------------------------ 803 Bra 804 \w+ 805 \x{c4} 806 Ket 807 End 808------------------------------------------------------------------ 809 a\x{C4}\x{C4} 810 0: a\x{c4}\x{c4} 811 812/\W+\x{C4}/B,utf 813------------------------------------------------------------------ 814 Bra 815 \W+ 816 \x{c4} 817 Ket 818 End 819------------------------------------------------------------------ 820 !\x{C4} 821 0: !\x{c4} 822 823/\W+\x{C4}/B,utf,tables=2 824------------------------------------------------------------------ 825 Bra 826 \W++ 827 \x{c4} 828 Ket 829 End 830------------------------------------------------------------------ 831 !\x{C4} 832 0: !\x{c4} 833 834/\W+\x{A1}/B,utf 835------------------------------------------------------------------ 836 Bra 837 \W+ 838 \x{a1} 839 Ket 840 End 841------------------------------------------------------------------ 842 !\x{A1} 843 0: !\x{a1} 844 845/\W+\x{A1}/B,utf,tables=2 846------------------------------------------------------------------ 847 Bra 848 \W+ 849 \x{a1} 850 Ket 851 End 852------------------------------------------------------------------ 853 !\x{A1} 854 0: !\x{a1} 855 856/X\s+\x{A0}/B,utf 857------------------------------------------------------------------ 858 Bra 859 X 860 \s++ 861 \x{a0} 862 Ket 863 End 864------------------------------------------------------------------ 865 X\x20\x{A0}\x{A0} 866 0: X \x{a0} 867 868/X\s+\x{A0}/B,utf,tables=2 869------------------------------------------------------------------ 870 Bra 871 X 872 \s+ 873 \x{a0} 874 Ket 875 End 876------------------------------------------------------------------ 877 X\x20\x{A0}\x{A0} 878 0: X \x{a0}\x{a0} 879 880/\S+\x{A0}/B,utf 881------------------------------------------------------------------ 882 Bra 883 \S+ 884 \x{a0} 885 Ket 886 End 887------------------------------------------------------------------ 888 X\x{A0}\x{A0} 889 0: X\x{a0}\x{a0} 890 891/\S+\x{A0}/B,utf,tables=2 892------------------------------------------------------------------ 893 Bra 894 \S++ 895 \x{a0} 896 Ket 897 End 898------------------------------------------------------------------ 899 X\x{A0}\x{A0} 900 0: X\x{a0} 901 902/\x{a0}+\s!/B,utf 903------------------------------------------------------------------ 904 Bra 905 \x{a0}++ 906 \s 907 ! 908 Ket 909 End 910------------------------------------------------------------------ 911 \x{a0}\x20! 912 0: \x{a0} ! 913 914/\x{a0}+\s!/B,utf,tables=2 915------------------------------------------------------------------ 916 Bra 917 \x{a0}+ 918 \s 919 ! 920 Ket 921 End 922------------------------------------------------------------------ 923 \x{a0}\x20! 924 0: \x{a0} ! 925 926/(*UTF)abc/never_utf 927Failed: error 174 at offset 6: using UTF is disabled by the application 928 929/abc/utf,never_utf 930Failed: error 174 at offset 0: using UTF is disabled by the application 931 932/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf 933------------------------------------------------------------------ 934 Bra 935 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} 936 Ket 937 End 938------------------------------------------------------------------ 939Capturing subpattern count = 0 940Options: caseless utf 941First code unit = 'A' (caseless) 942Last code unit = \x{1fb0} (caseless) 943Subject length lower bound = 5 944 945/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf 946------------------------------------------------------------------ 947 Bra 948 A\x{391}\x{10427}\x{ff3a}\x{1fb0} 949 Ket 950 End 951------------------------------------------------------------------ 952Capturing subpattern count = 0 953Options: utf 954First code unit = 'A' 955Last code unit = \x{1fb0} 956Subject length lower bound = 5 957 958/AB\x{1fb0}/IB,utf 959------------------------------------------------------------------ 960 Bra 961 AB\x{1fb0} 962 Ket 963 End 964------------------------------------------------------------------ 965Capturing subpattern count = 0 966Options: utf 967First code unit = 'A' 968Last code unit = \x{1fb0} 969Subject length lower bound = 3 970 971/AB\x{1fb0}/IBi,utf 972------------------------------------------------------------------ 973 Bra 974 /i AB\x{1fb0} 975 Ket 976 End 977------------------------------------------------------------------ 978Capturing subpattern count = 0 979Options: caseless utf 980First code unit = 'A' (caseless) 981Last code unit = \x{1fb0} (caseless) 982Subject length lower bound = 3 983 984/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf 985Capturing subpattern count = 0 986Options: caseless utf 987First code unit = \x{401} (caseless) 988Last code unit = \x{42f} (caseless) 989Subject length lower bound = 17 990 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 991 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 992 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 993 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 994 995/[ⱥ]/Bi,utf 996------------------------------------------------------------------ 997 Bra 998 /i \x{2c65} 999 Ket 1000 End 1001------------------------------------------------------------------ 1002 1003/[^ⱥ]/Bi,utf 1004------------------------------------------------------------------ 1005 Bra 1006 /i [^\x{2c65}] 1007 Ket 1008 End 1009------------------------------------------------------------------ 1010 1011/[[:blank:]]/B,ucp 1012------------------------------------------------------------------ 1013 Bra 1014 [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] 1015 Ket 1016 End 1017------------------------------------------------------------------ 1018 1019/\x{212a}+/Ii,utf 1020Capturing subpattern count = 0 1021Options: caseless utf 1022Starting code units: K k \xff 1023Subject length lower bound = 1 1024 KKkk\x{212a} 1025 0: KKkk\x{212a} 1026 1027/s+/Ii,utf 1028Capturing subpattern count = 0 1029Options: caseless utf 1030Starting code units: S s \xff 1031Subject length lower bound = 1 1032 SSss\x{17f} 1033 0: SSss\x{17f} 1034 1035# Non-UTF characters should give errors in both 16-bit and 32-bit modes. 1036 1037/\x{110000}/utf 1038Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large 1039 1040/\o{4200000}/utf 1041Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large 1042 1043/\x{100}*A/IB,utf 1044------------------------------------------------------------------ 1045 Bra 1046 \x{100}*+ 1047 A 1048 Ket 1049 End 1050------------------------------------------------------------------ 1051Capturing subpattern count = 0 1052Options: utf 1053Starting code units: A \xff 1054Last code unit = 'A' 1055Subject length lower bound = 1 1056 A 1057 0: A 1058 1059/\x{100}*\d(?R)/IB,utf 1060------------------------------------------------------------------ 1061 Bra 1062 \x{100}*+ 1063 \d 1064 Recurse 1065 Ket 1066 End 1067------------------------------------------------------------------ 1068Capturing subpattern count = 0 1069Options: utf 1070Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff 1071Subject length lower bound = 1 1072 1073/[Z\x{100}]/IB,utf 1074------------------------------------------------------------------ 1075 Bra 1076 [Z\x{100}] 1077 Ket 1078 End 1079------------------------------------------------------------------ 1080Capturing subpattern count = 0 1081Options: utf 1082Starting code units: Z \xff 1083Subject length lower bound = 1 1084 Z\x{100} 1085 0: Z 1086 \x{100} 1087 0: \x{100} 1088 \x{100}Z 1089 0: \x{100} 1090 1091/[z-\x{100}]/IB,utf 1092------------------------------------------------------------------ 1093 Bra 1094 [z-\xff\x{100}] 1095 Ket 1096 End 1097------------------------------------------------------------------ 1098Capturing subpattern count = 0 1099Options: utf 1100Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 1101 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 1102 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 1103 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 1104 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 1105 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 1106 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 1107 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 1108 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1109Subject length lower bound = 1 1110 1111/[z\Qa-d]Ā\E]/IB,utf 1112------------------------------------------------------------------ 1113 Bra 1114 [\-\]adz\x{100}] 1115 Ket 1116 End 1117------------------------------------------------------------------ 1118Capturing subpattern count = 0 1119Options: utf 1120Starting code units: - ] a d z \xff 1121Subject length lower bound = 1 1122 \x{100} 1123 0: \x{100} 1124 Ā 1125 0: \x{100} 1126 1127/[ab\x{100}]abc(xyz(?1))/IB,utf 1128------------------------------------------------------------------ 1129 Bra 1130 [ab\x{100}] 1131 abc 1132 CBra 1 1133 xyz 1134 Recurse 1135 Ket 1136 Ket 1137 End 1138------------------------------------------------------------------ 1139Capturing subpattern count = 1 1140Options: utf 1141Starting code units: a b \xff 1142Last code unit = 'z' 1143Subject length lower bound = 7 1144 1145/\x{100}*\s/IB,utf 1146------------------------------------------------------------------ 1147 Bra 1148 \x{100}*+ 1149 \s 1150 Ket 1151 End 1152------------------------------------------------------------------ 1153Capturing subpattern count = 0 1154Options: utf 1155Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff 1156Subject length lower bound = 1 1157 1158/\x{100}*\d/IB,utf 1159------------------------------------------------------------------ 1160 Bra 1161 \x{100}*+ 1162 \d 1163 Ket 1164 End 1165------------------------------------------------------------------ 1166Capturing subpattern count = 0 1167Options: utf 1168Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff 1169Subject length lower bound = 1 1170 1171/\x{100}*\w/IB,utf 1172------------------------------------------------------------------ 1173 Bra 1174 \x{100}*+ 1175 \w 1176 Ket 1177 End 1178------------------------------------------------------------------ 1179Capturing subpattern count = 0 1180Options: utf 1181Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 1182 Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 1183 \xff 1184Subject length lower bound = 1 1185 1186/\x{100}*\D/IB,utf 1187------------------------------------------------------------------ 1188 Bra 1189 \x{100}* 1190 \D 1191 Ket 1192 End 1193------------------------------------------------------------------ 1194Capturing subpattern count = 0 1195Options: utf 1196Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1197 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1198 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 1199 ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c 1200 d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 1201 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 1202 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 1203 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf 1204 \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe 1205 \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd 1206 \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc 1207 \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb 1208 \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa 1209 \xfb \xfc \xfd \xfe \xff 1210Subject length lower bound = 1 1211 1212/\x{100}*\S/IB,utf 1213------------------------------------------------------------------ 1214 Bra 1215 \x{100}* 1216 \S 1217 Ket 1218 End 1219------------------------------------------------------------------ 1220Capturing subpattern count = 0 1221Options: utf 1222Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 1223 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 1224 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 1225 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 1226 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 1227 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 1228 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 1229 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 1230 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 1231 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf 1232 \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde 1233 \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed 1234 \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc 1235 \xfd \xfe \xff 1236Subject length lower bound = 1 1237 1238/\x{100}*\W/IB,utf 1239------------------------------------------------------------------ 1240 Bra 1241 \x{100}* 1242 \W 1243 Ket 1244 End 1245------------------------------------------------------------------ 1246Capturing subpattern count = 0 1247Options: utf 1248Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1249 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1250 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 1251 ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 1252 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 1253 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 1254 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 1255 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 1256 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 1257 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 1258 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 1259 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1260Subject length lower bound = 1 1261 1262/[\x{105}-\x{109}]/IBi,utf 1263------------------------------------------------------------------ 1264 Bra 1265 [\x{104}-\x{109}] 1266 Ket 1267 End 1268------------------------------------------------------------------ 1269Capturing subpattern count = 0 1270Options: caseless utf 1271Starting code units: \xff 1272Subject length lower bound = 1 1273 \x{104} 1274 0: \x{104} 1275 \x{105} 1276 0: \x{105} 1277 \x{109} 1278 0: \x{109} 1279\= Expect no match 1280 \x{100} 1281No match 1282 \x{10a} 1283No match 1284 1285/[z-\x{100}]/IBi,utf 1286------------------------------------------------------------------ 1287 Bra 1288 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] 1289 Ket 1290 End 1291------------------------------------------------------------------ 1292Capturing subpattern count = 0 1293Options: caseless utf 1294Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 1295 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 1296 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 1297 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 1298 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 1299 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 1300 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 1301 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 1302 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 1303 \xff 1304Subject length lower bound = 1 1305 Z 1306 0: Z 1307 z 1308 0: z 1309 \x{39c} 1310 0: \x{39c} 1311 \x{178} 1312 0: \x{178} 1313 | 1314 0: | 1315 \x{80} 1316 0: \x{80} 1317 \x{ff} 1318 0: \x{ff} 1319 \x{100} 1320 0: \x{100} 1321 \x{101} 1322 0: \x{101} 1323\= Expect no match 1324 \x{102} 1325No match 1326 Y 1327No match 1328 y 1329No match 1330 1331/[z-\x{100}]/IBi,utf 1332------------------------------------------------------------------ 1333 Bra 1334 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] 1335 Ket 1336 End 1337------------------------------------------------------------------ 1338Capturing subpattern count = 0 1339Options: caseless utf 1340Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 1341 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 1342 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 1343 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 1344 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 1345 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 1346 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 1347 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 1348 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 1349 \xff 1350Subject length lower bound = 1 1351 1352/\x{3a3}B/IBi,utf 1353------------------------------------------------------------------ 1354 Bra 1355 clist 03a3 03c2 03c3 1356 /i B 1357 Ket 1358 End 1359------------------------------------------------------------------ 1360Capturing subpattern count = 0 1361Options: caseless utf 1362Starting code units: \xff 1363Last code unit = 'B' (caseless) 1364Subject length lower bound = 2 1365 1366/./utf 1367 \x{110000} 1368** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 1369 1370/(*UTF)ab������z/B 1371------------------------------------------------------------------ 1372 Bra 1373 ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z 1374 Ket 1375 End 1376------------------------------------------------------------------ 1377 1378/ab������z/utf 1379** Failed: character value greater than 0x10ffff cannot be converted to UTF 1380 1381/[\W\p{Any}]/B 1382------------------------------------------------------------------ 1383 Bra 1384 [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffff}] 1385 Ket 1386 End 1387------------------------------------------------------------------ 1388 abc 1389 0: a 1390 123 1391 0: 1 1392 1393/[\W\pL]/B 1394------------------------------------------------------------------ 1395 Bra 1396 [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffff}] 1397 Ket 1398 End 1399------------------------------------------------------------------ 1400 abc 1401 0: a 1402 \x{100} 1403 0: \x{100} 1404 \x{308} 1405 0: \x{308} 1406\= Expect no match 1407 123 1408No match 1409 1410/[\s[:^ascii:]]/B,ucp 1411------------------------------------------------------------------ 1412 Bra 1413 [\x80-\xff\p{Xsp}\x{100}-\x{ffff}] 1414 Ket 1415 End 1416------------------------------------------------------------------ 1417 1418/\pP/ucp 1419 \x{7fffffff} 1420** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled. 1421** Truncation will probably give the wrong result. 1422No match 1423 1424# A special extra option allows excaped surrogate code points in 32-bit mode, 1425# but subjects containing them must not be UTF-checked. These patterns give 1426# errors in 16-bit mode. 1427 1428/\x{d800}/I,utf,allow_surrogate_escapes 1429Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode 1430 \x{d800}\=no_utf_check 1431 1432/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes 1433Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode 1434 \x{dfff}\x{df01}\=no_utf_check 1435 1436# This has different starting code units in 8-bit mode. 1437 1438/^[^ab]/IB,utf 1439------------------------------------------------------------------ 1440 Bra 1441 ^ 1442 [\x00-`c-\xff] (neg) 1443 Ket 1444 End 1445------------------------------------------------------------------ 1446Capturing subpattern count = 0 1447Compile options: utf 1448Overall options: anchored utf 1449Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 1450 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 1451 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 1452 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 1453 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 1454 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e 1455 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d 1456 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac 1457 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb 1458 \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca 1459 \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 1460 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 1461 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 1462 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 1463Subject length lower bound = 1 1464 c 1465 0: c 1466 \x{ff} 1467 0: \x{ff} 1468 \x{100} 1469 0: \x{100} 1470\= Expect no match 1471 aaa 1472No match 1473 1474# End of testinput12 1475