1# This set of tests is for UTF-16 and UTF-32 support, including Unicode
2# properties. It is relevant only to the 16-bit and 32-bit libraries. The
3# output is different for each library, so there are separate output files.
4
5/���xxx/IB,utf,no_utf_check
6** Failed: invalid UTF-8 string cannot be converted to 32-bit string
7
8/abc/utf
9    �]
10** Failed: invalid UTF-8 string cannot be used as input in UTF mode
11
12# Check maximum character size
13
14/\x{ffff}/IB,utf
15------------------------------------------------------------------
16        Bra
17        \x{ffff}
18        Ket
19        End
20------------------------------------------------------------------
21Capture group count = 0
22Options: utf
23First code unit = \x{ffff}
24Subject length lower bound = 1
25
26/\x{10000}/IB,utf
27------------------------------------------------------------------
28        Bra
29        \x{10000}
30        Ket
31        End
32------------------------------------------------------------------
33Capture group count = 0
34Options: utf
35First code unit = \x{10000}
36Subject length lower bound = 1
37
38/\x{100}/IB,utf
39------------------------------------------------------------------
40        Bra
41        \x{100}
42        Ket
43        End
44------------------------------------------------------------------
45Capture group count = 0
46Options: utf
47First code unit = \x{100}
48Subject length lower bound = 1
49
50/\x{1000}/IB,utf
51------------------------------------------------------------------
52        Bra
53        \x{1000}
54        Ket
55        End
56------------------------------------------------------------------
57Capture group count = 0
58Options: utf
59First code unit = \x{1000}
60Subject length lower bound = 1
61
62/\x{10000}/IB,utf
63------------------------------------------------------------------
64        Bra
65        \x{10000}
66        Ket
67        End
68------------------------------------------------------------------
69Capture group count = 0
70Options: utf
71First code unit = \x{10000}
72Subject length lower bound = 1
73
74/\x{100000}/IB,utf
75------------------------------------------------------------------
76        Bra
77        \x{100000}
78        Ket
79        End
80------------------------------------------------------------------
81Capture group count = 0
82Options: utf
83First code unit = \x{100000}
84Subject length lower bound = 1
85
86/\x{10ffff}/IB,utf
87------------------------------------------------------------------
88        Bra
89        \x{10ffff}
90        Ket
91        End
92------------------------------------------------------------------
93Capture group count = 0
94Options: utf
95First code unit = \x{10ffff}
96Subject length lower bound = 1
97
98/[\x{ff}]/IB,utf
99------------------------------------------------------------------
100        Bra
101        \x{ff}
102        Ket
103        End
104------------------------------------------------------------------
105Capture group count = 0
106Options: utf
107First code unit = \xff
108Subject length lower bound = 1
109
110/[\x{100}]/IB,utf
111------------------------------------------------------------------
112        Bra
113        \x{100}
114        Ket
115        End
116------------------------------------------------------------------
117Capture group count = 0
118Options: utf
119First code unit = \x{100}
120Subject length lower bound = 1
121
122/\x80/IB,utf
123------------------------------------------------------------------
124        Bra
125        \x{80}
126        Ket
127        End
128------------------------------------------------------------------
129Capture group count = 0
130Options: utf
131First code unit = \x80
132Subject length lower bound = 1
133
134/\xff/IB,utf
135------------------------------------------------------------------
136        Bra
137        \x{ff}
138        Ket
139        End
140------------------------------------------------------------------
141Capture group count = 0
142Options: utf
143First code unit = \xff
144Subject length lower bound = 1
145
146/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
147------------------------------------------------------------------
148        Bra
149        \x{d55c}\x{ad6d}\x{c5b4}
150        Ket
151        End
152------------------------------------------------------------------
153Capture group count = 0
154Options: utf
155First code unit = \x{d55c}
156Last code unit = \x{c5b4}
157Subject length lower bound = 3
158    \x{D55c}\x{ad6d}\x{C5B4}
159 0: \x{d55c}\x{ad6d}\x{c5b4}
160
161/\x{65e5}\x{672c}\x{8a9e}/IB,utf
162------------------------------------------------------------------
163        Bra
164        \x{65e5}\x{672c}\x{8a9e}
165        Ket
166        End
167------------------------------------------------------------------
168Capture group count = 0
169Options: utf
170First code unit = \x{65e5}
171Last code unit = \x{8a9e}
172Subject length lower bound = 3
173    \x{65e5}\x{672c}\x{8a9e}
174 0: \x{65e5}\x{672c}\x{8a9e}
175
176/\x{80}/IB,utf
177------------------------------------------------------------------
178        Bra
179        \x{80}
180        Ket
181        End
182------------------------------------------------------------------
183Capture group count = 0
184Options: utf
185First code unit = \x80
186Subject length lower bound = 1
187
188/\x{084}/IB,utf
189------------------------------------------------------------------
190        Bra
191        \x{84}
192        Ket
193        End
194------------------------------------------------------------------
195Capture group count = 0
196Options: utf
197First code unit = \x84
198Subject length lower bound = 1
199
200/\x{104}/IB,utf
201------------------------------------------------------------------
202        Bra
203        \x{104}
204        Ket
205        End
206------------------------------------------------------------------
207Capture group count = 0
208Options: utf
209First code unit = \x{104}
210Subject length lower bound = 1
211
212/\x{861}/IB,utf
213------------------------------------------------------------------
214        Bra
215        \x{861}
216        Ket
217        End
218------------------------------------------------------------------
219Capture group count = 0
220Options: utf
221First code unit = \x{861}
222Subject length lower bound = 1
223
224/\x{212ab}/IB,utf
225------------------------------------------------------------------
226        Bra
227        \x{212ab}
228        Ket
229        End
230------------------------------------------------------------------
231Capture group count = 0
232Options: utf
233First code unit = \x{212ab}
234Subject length lower bound = 1
235
236/[^ab\xC0-\xF0]/IB,utf
237------------------------------------------------------------------
238        Bra
239        [\x00-`c-\xbf\xf1-\xff] (neg)
240        Ket
241        End
242------------------------------------------------------------------
243Capture group count = 0
244Options: utf
245Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
246  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
247  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
248  5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
249  Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
250  \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
251  \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
252  \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
253  \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
254  \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
255  \xfc \xfd \xfe \xff
256Subject length lower bound = 1
257    \x{f1}
258 0: \x{f1}
259    \x{bf}
260 0: \x{bf}
261    \x{100}
262 0: \x{100}
263    \x{1000}
264 0: \x{1000}
265\= Expect no match
266    \x{c0}
267No match
268    \x{f0}
269No match
270
271/Ā{3,4}/IB,utf
272------------------------------------------------------------------
273        Bra
274        \x{100}{3}
275        \x{100}?+
276        Ket
277        End
278------------------------------------------------------------------
279Capture group count = 0
280Options: utf
281First code unit = \x{100}
282Last code unit = \x{100}
283Subject length lower bound = 3
284  \x{100}\x{100}\x{100}\x{100\x{100}
285 0: \x{100}\x{100}\x{100}
286
287/(\x{100}+|x)/IB,utf
288------------------------------------------------------------------
289        Bra
290        CBra 1
291        \x{100}++
292        Alt
293        x
294        Ket
295        Ket
296        End
297------------------------------------------------------------------
298Capture group count = 1
299Options: utf
300Starting code units: x \xff
301Subject length lower bound = 1
302
303/(\x{100}*a|x)/IB,utf
304------------------------------------------------------------------
305        Bra
306        CBra 1
307        \x{100}*+
308        a
309        Alt
310        x
311        Ket
312        Ket
313        End
314------------------------------------------------------------------
315Capture group count = 1
316Options: utf
317Starting code units: a x \xff
318Subject length lower bound = 1
319
320/(\x{100}{0,2}a|x)/IB,utf
321------------------------------------------------------------------
322        Bra
323        CBra 1
324        \x{100}{0,2}+
325        a
326        Alt
327        x
328        Ket
329        Ket
330        End
331------------------------------------------------------------------
332Capture group count = 1
333Options: utf
334Starting code units: a x \xff
335Subject length lower bound = 1
336
337/(\x{100}{1,2}a|x)/IB,utf
338------------------------------------------------------------------
339        Bra
340        CBra 1
341        \x{100}
342        \x{100}{0,1}+
343        a
344        Alt
345        x
346        Ket
347        Ket
348        End
349------------------------------------------------------------------
350Capture group count = 1
351Options: utf
352Starting code units: x \xff
353Subject length lower bound = 1
354
355/\x{100}/IB,utf
356------------------------------------------------------------------
357        Bra
358        \x{100}
359        Ket
360        End
361------------------------------------------------------------------
362Capture group count = 0
363Options: utf
364First code unit = \x{100}
365Subject length lower bound = 1
366
367/a\x{100}\x{101}*/IB,utf
368------------------------------------------------------------------
369        Bra
370        a\x{100}
371        \x{101}*+
372        Ket
373        End
374------------------------------------------------------------------
375Capture group count = 0
376Options: utf
377First code unit = 'a'
378Last code unit = \x{100}
379Subject length lower bound = 2
380
381/a\x{100}\x{101}+/IB,utf
382------------------------------------------------------------------
383        Bra
384        a\x{100}
385        \x{101}++
386        Ket
387        End
388------------------------------------------------------------------
389Capture group count = 0
390Options: utf
391First code unit = 'a'
392Last code unit = \x{101}
393Subject length lower bound = 3
394
395/[^\x{c4}]/IB
396------------------------------------------------------------------
397        Bra
398        [^\x{c4}]
399        Ket
400        End
401------------------------------------------------------------------
402Capture group count = 0
403Subject length lower bound = 1
404
405/[\x{100}]/IB,utf
406------------------------------------------------------------------
407        Bra
408        \x{100}
409        Ket
410        End
411------------------------------------------------------------------
412Capture group count = 0
413Options: utf
414First code unit = \x{100}
415Subject length lower bound = 1
416    \x{100}
417 0: \x{100}
418    Z\x{100}
419 0: \x{100}
420    \x{100}Z
421 0: \x{100}
422
423/[\xff]/IB,utf
424------------------------------------------------------------------
425        Bra
426        \x{ff}
427        Ket
428        End
429------------------------------------------------------------------
430Capture group count = 0
431Options: utf
432First code unit = \xff
433Subject length lower bound = 1
434    >\x{ff}<
435 0: \x{ff}
436
437/[^\xff]/IB,utf
438------------------------------------------------------------------
439        Bra
440        [^\x{ff}]
441        Ket
442        End
443------------------------------------------------------------------
444Capture group count = 0
445Options: utf
446Subject length lower bound = 1
447
448/\x{100}abc(xyz(?1))/IB,utf
449------------------------------------------------------------------
450        Bra
451        \x{100}abc
452        CBra 1
453        xyz
454        Recurse
455        Ket
456        Ket
457        End
458------------------------------------------------------------------
459Capture group count = 1
460Options: utf
461First code unit = \x{100}
462Last code unit = 'z'
463Subject length lower bound = 7
464
465/\777/I,utf
466Capture group count = 0
467Options: utf
468First code unit = \x{1ff}
469Subject length lower bound = 1
470  \x{1ff}
471 0: \x{1ff}
472  \777
473 0: \x{1ff}
474
475/\x{100}+\x{200}/IB,utf
476------------------------------------------------------------------
477        Bra
478        \x{100}++
479        \x{200}
480        Ket
481        End
482------------------------------------------------------------------
483Capture group count = 0
484Options: utf
485First code unit = \x{100}
486Last code unit = \x{200}
487Subject length lower bound = 2
488
489/\x{100}+X/IB,utf
490------------------------------------------------------------------
491        Bra
492        \x{100}++
493        X
494        Ket
495        End
496------------------------------------------------------------------
497Capture group count = 0
498Options: utf
499First code unit = \x{100}
500Last code unit = 'X'
501Subject length lower bound = 2
502
503/^[\QĀ\E-\QŐ\E/B,utf
504Failed: error 106 at offset 13: missing terminating ] for character class
505
506/X/utf
507    XX\x{d800}\=no_utf_check
508 0: X
509    XX\x{da00}\=no_utf_check
510 0: X
511    XX\x{dc00}\=no_utf_check
512 0: X
513    XX\x{de00}\=no_utf_check
514 0: X
515    XX\x{dfff}\=no_utf_check
516 0: X
517\= Expect UTF error
518    XX\x{d800}
519Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
520    XX\x{da00}
521Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
522    XX\x{dc00}
523Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
524    XX\x{de00}
525Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
526    XX\x{dfff}
527Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
528    XX\x{110000}
529Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2
530    XX\x{d800}\x{1234}
531Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
532\= Expect no match
533    XX\x{d800}\=offset=3
534No match
535
536/(?<=.)X/utf
537    XX\x{d800}\=offset=3
538Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
539
540/(*UTF16)\x{11234}/
541Failed: error 160 at offset 7: (*VERB) not recognized or malformed
542  abcd\x{11234}pqr
543
544/(*UTF)\x{11234}/I
545Capture group count = 0
546Compile options: <none>
547Overall options: utf
548First code unit = \x{11234}
549Subject length lower bound = 1
550  abcd\x{11234}pqr
551 0: \x{11234}
552
553/(*UTF-32)\x{11234}/
554Failed: error 160 at offset 5: (*VERB) not recognized or malformed
555  abcd\x{11234}pqr
556
557/(*UTF-32)\x{112}/
558Failed: error 160 at offset 5: (*VERB) not recognized or malformed
559  abcd\x{11234}pqr
560
561/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
562Failed: error 160 at offset 14: (*VERB) not recognized or malformed
563
564/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
565Capture group count = 0
566Compile options: <none>
567Overall options: utf
568\R matches any Unicode newline
569Forced newline is CRLF
570First code unit = 'a'
571Last code unit = 'b'
572Subject length lower bound = 3
573
574/\h/I,utf
575Capture group count = 0
576Options: utf
577Starting code units: \x09 \x20 \xa0 \xff
578Subject length lower bound = 1
579    ABC\x{09}
580 0: \x{09}
581    ABC\x{20}
582 0:
583    ABC\x{a0}
584 0: \x{a0}
585    ABC\x{1680}
586 0: \x{1680}
587    ABC\x{180e}
588 0: \x{180e}
589    ABC\x{2000}
590 0: \x{2000}
591    ABC\x{202f}
592 0: \x{202f}
593    ABC\x{205f}
594 0: \x{205f}
595    ABC\x{3000}
596 0: \x{3000}
597
598/\v/I,utf
599Capture group count = 0
600Options: utf
601Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
602Subject length lower bound = 1
603    ABC\x{0a}
604 0: \x{0a}
605    ABC\x{0b}
606 0: \x{0b}
607    ABC\x{0c}
608 0: \x{0c}
609    ABC\x{0d}
610 0: \x{0d}
611    ABC\x{85}
612 0: \x{85}
613    ABC\x{2028}
614 0: \x{2028}
615
616/\h*A/I,utf
617Capture group count = 0
618Options: utf
619Starting code units: \x09 \x20 A \xa0 \xff
620Last code unit = 'A'
621Subject length lower bound = 1
622    CDBABC
623 0: A
624    \x{2000}ABC
625 0: \x{2000}A
626
627/\R*A/I,bsr=unicode,utf
628Capture group count = 0
629Options: utf
630\R matches any Unicode newline
631Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff
632Last code unit = 'A'
633Subject length lower bound = 1
634    CDBABC
635 0: A
636    \x{2028}A
637 0: \x{2028}A
638
639/\v+A/I,utf
640Capture group count = 0
641Options: utf
642Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
643Last code unit = 'A'
644Subject length lower bound = 2
645
646/\s?xxx\s/I,utf
647Capture group count = 0
648Options: utf
649Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
650Last code unit = 'x'
651Subject length lower bound = 4
652
653/\sxxx\s/I,utf,tables=2
654Capture group count = 0
655Options: utf
656Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
657Last code unit = 'x'
658Subject length lower bound = 5
659    AB\x{85}xxx\x{a0}XYZ
660 0: \x{85}xxx\x{a0}
661    AB\x{a0}xxx\x{85}XYZ
662 0: \x{a0}xxx\x{85}
663
664/\S \S/I,utf,tables=2
665Capture group count = 0
666Options: utf
667Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
668  \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
669  \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
670  D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
671  i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
672  \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
673  \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
674  \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
675  \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
676  \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
677  \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
678  \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
679  \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
680  \xff
681Last code unit = ' '
682Subject length lower bound = 3
683    \x{a2} \x{84}
684 0: \x{a2} \x{84}
685    A Z
686 0: A Z
687
688/a+/utf
689    a\x{123}aa\=offset=1
690 0: aa
691    a\x{123}aa\=offset=2
692 0: aa
693    a\x{123}aa\=offset=3
694 0: a
695\= Expect no match
696    a\x{123}aa\=offset=4
697No match
698\= Expect bad offset error
699    a\x{123}aa\=offset=5
700Failed: error -33: bad offset value
701    a\x{123}aa\=offset=6
702Failed: error -33: bad offset value
703
704/\x{1234}+/Ii,utf
705Capture group count = 0
706Options: caseless utf
707First code unit = \x{1234}
708Subject length lower bound = 1
709
710/\x{1234}+?/Ii,utf
711Capture group count = 0
712Options: caseless utf
713First code unit = \x{1234}
714Subject length lower bound = 1
715
716/\x{1234}++/Ii,utf
717Capture group count = 0
718Options: caseless utf
719First code unit = \x{1234}
720Subject length lower bound = 1
721
722/\x{1234}{2}/Ii,utf
723Capture group count = 0
724Options: caseless utf
725First code unit = \x{1234}
726Last code unit = \x{1234}
727Subject length lower bound = 2
728
729/[^\x{c4}]/IB,utf
730------------------------------------------------------------------
731        Bra
732        [^\x{c4}]
733        Ket
734        End
735------------------------------------------------------------------
736Capture group count = 0
737Options: utf
738Subject length lower bound = 1
739
740/X+\x{200}/IB,utf
741------------------------------------------------------------------
742        Bra
743        X++
744        \x{200}
745        Ket
746        End
747------------------------------------------------------------------
748Capture group count = 0
749Options: utf
750First code unit = 'X'
751Last code unit = \x{200}
752Subject length lower bound = 2
753
754/\R/I,utf
755Capture group count = 0
756Options: utf
757Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
758Subject length lower bound = 1
759
760# Check bad offset
761
762/a/utf
763\= Expect bad UTF-16 offset, or no match in 32-bit
764    \x{10000}\=offset=1
765No match
766    \x{10000}ab\=offset=1
767 0: a
768\= Expect 16-bit match, 32-bit no match
769    \x{10000}ab\=offset=2
770No match
771\= Expect no match
772    \x{10000}ab\=offset=3
773No match
774\= Expect no match in 16-bit, bad offset in 32-bit
775    \x{10000}ab\=offset=4
776Failed: error -33: bad offset value
777\= Expect bad offset
778    \x{10000}ab\=offset=5
779Failed: error -33: bad offset value
780
781/�/utf
782Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined
783
784/\w+\x{C4}/B,utf
785------------------------------------------------------------------
786        Bra
787        \w++
788        \x{c4}
789        Ket
790        End
791------------------------------------------------------------------
792    a\x{C4}\x{C4}
793 0: a\x{c4}
794
795/\w+\x{C4}/B,utf,tables=2
796------------------------------------------------------------------
797        Bra
798        \w+
799        \x{c4}
800        Ket
801        End
802------------------------------------------------------------------
803    a\x{C4}\x{C4}
804 0: a\x{c4}\x{c4}
805
806/\W+\x{C4}/B,utf
807------------------------------------------------------------------
808        Bra
809        \W+
810        \x{c4}
811        Ket
812        End
813------------------------------------------------------------------
814    !\x{C4}
815 0: !\x{c4}
816
817/\W+\x{C4}/B,utf,tables=2
818------------------------------------------------------------------
819        Bra
820        \W++
821        \x{c4}
822        Ket
823        End
824------------------------------------------------------------------
825    !\x{C4}
826 0: !\x{c4}
827
828/\W+\x{A1}/B,utf
829------------------------------------------------------------------
830        Bra
831        \W+
832        \x{a1}
833        Ket
834        End
835------------------------------------------------------------------
836    !\x{A1}
837 0: !\x{a1}
838
839/\W+\x{A1}/B,utf,tables=2
840------------------------------------------------------------------
841        Bra
842        \W+
843        \x{a1}
844        Ket
845        End
846------------------------------------------------------------------
847    !\x{A1}
848 0: !\x{a1}
849
850/X\s+\x{A0}/B,utf
851------------------------------------------------------------------
852        Bra
853        X
854        \s++
855        \x{a0}
856        Ket
857        End
858------------------------------------------------------------------
859    X\x20\x{A0}\x{A0}
860 0: X \x{a0}
861
862/X\s+\x{A0}/B,utf,tables=2
863------------------------------------------------------------------
864        Bra
865        X
866        \s+
867        \x{a0}
868        Ket
869        End
870------------------------------------------------------------------
871    X\x20\x{A0}\x{A0}
872 0: X \x{a0}\x{a0}
873
874/\S+\x{A0}/B,utf
875------------------------------------------------------------------
876        Bra
877        \S+
878        \x{a0}
879        Ket
880        End
881------------------------------------------------------------------
882    X\x{A0}\x{A0}
883 0: X\x{a0}\x{a0}
884
885/\S+\x{A0}/B,utf,tables=2
886------------------------------------------------------------------
887        Bra
888        \S++
889        \x{a0}
890        Ket
891        End
892------------------------------------------------------------------
893    X\x{A0}\x{A0}
894 0: X\x{a0}
895
896/\x{a0}+\s!/B,utf
897------------------------------------------------------------------
898        Bra
899        \x{a0}++
900        \s
901        !
902        Ket
903        End
904------------------------------------------------------------------
905    \x{a0}\x20!
906 0: \x{a0} !
907
908/\x{a0}+\s!/B,utf,tables=2
909------------------------------------------------------------------
910        Bra
911        \x{a0}+
912        \s
913        !
914        Ket
915        End
916------------------------------------------------------------------
917    \x{a0}\x20!
918 0: \x{a0} !
919
920/(*UTF)abc/never_utf
921Failed: error 174 at offset 6: using UTF is disabled by the application
922
923/abc/utf,never_utf
924Failed: error 174 at offset 0: using UTF is disabled by the application
925
926/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
927------------------------------------------------------------------
928        Bra
929     /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
930        Ket
931        End
932------------------------------------------------------------------
933Capture group count = 0
934Options: caseless utf
935First code unit = 'A' (caseless)
936Last code unit = \x{1fb0} (caseless)
937Subject length lower bound = 5
938
939/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
940------------------------------------------------------------------
941        Bra
942        A\x{391}\x{10427}\x{ff3a}\x{1fb0}
943        Ket
944        End
945------------------------------------------------------------------
946Capture group count = 0
947Options: utf
948First code unit = 'A'
949Last code unit = \x{1fb0}
950Subject length lower bound = 5
951
952/AB\x{1fb0}/IB,utf
953------------------------------------------------------------------
954        Bra
955        AB\x{1fb0}
956        Ket
957        End
958------------------------------------------------------------------
959Capture group count = 0
960Options: utf
961First code unit = 'A'
962Last code unit = \x{1fb0}
963Subject length lower bound = 3
964
965/AB\x{1fb0}/IBi,utf
966------------------------------------------------------------------
967        Bra
968     /i AB\x{1fb0}
969        Ket
970        End
971------------------------------------------------------------------
972Capture group count = 0
973Options: caseless utf
974First code unit = 'A' (caseless)
975Last code unit = \x{1fb0} (caseless)
976Subject length lower bound = 3
977
978/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
979Capture group count = 0
980Options: caseless utf
981First code unit = \x{401} (caseless)
982Last code unit = \x{42f} (caseless)
983Subject length lower bound = 17
984    \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
985 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
986    \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
987 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
988
989/[ⱥ]/Bi,utf
990------------------------------------------------------------------
991        Bra
992     /i \x{2c65}
993        Ket
994        End
995------------------------------------------------------------------
996
997/[^ⱥ]/Bi,utf
998------------------------------------------------------------------
999        Bra
1000     /i [^\x{2c65}]
1001        Ket
1002        End
1003------------------------------------------------------------------
1004
1005/[[:blank:]]/B,ucp
1006------------------------------------------------------------------
1007        Bra
1008        [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
1009        Ket
1010        End
1011------------------------------------------------------------------
1012
1013/\x{212a}+/Ii,utf
1014Capture group count = 0
1015Options: caseless utf
1016Starting code units: K k \xff
1017Subject length lower bound = 1
1018    KKkk\x{212a}
1019 0: KKkk\x{212a}
1020
1021/s+/Ii,utf
1022Capture group count = 0
1023Options: caseless utf
1024Starting code units: S s \xff
1025Subject length lower bound = 1
1026    SSss\x{17f}
1027 0: SSss\x{17f}
1028
1029# Non-UTF characters should give errors in both 16-bit and 32-bit modes.
1030
1031/\x{110000}/utf
1032Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
1033
1034/\o{4200000}/utf
1035Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
1036
1037/\x{100}*A/IB,utf
1038------------------------------------------------------------------
1039        Bra
1040        \x{100}*+
1041        A
1042        Ket
1043        End
1044------------------------------------------------------------------
1045Capture group count = 0
1046Options: utf
1047Starting code units: A \xff
1048Last code unit = 'A'
1049Subject length lower bound = 1
1050    A
1051 0: A
1052
1053/\x{100}*\d(?R)/IB,utf
1054------------------------------------------------------------------
1055        Bra
1056        \x{100}*+
1057        \d
1058        Recurse
1059        Ket
1060        End
1061------------------------------------------------------------------
1062Capture group count = 0
1063Options: utf
1064Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
1065Subject length lower bound = 1
1066
1067/[Z\x{100}]/IB,utf
1068------------------------------------------------------------------
1069        Bra
1070        [Z\x{100}]
1071        Ket
1072        End
1073------------------------------------------------------------------
1074Capture group count = 0
1075Options: utf
1076Starting code units: Z \xff
1077Subject length lower bound = 1
1078    Z\x{100}
1079 0: Z
1080    \x{100}
1081 0: \x{100}
1082    \x{100}Z
1083 0: \x{100}
1084
1085/[z-\x{100}]/IB,utf
1086------------------------------------------------------------------
1087        Bra
1088        [z-\xff\x{100}]
1089        Ket
1090        End
1091------------------------------------------------------------------
1092Capture group count = 0
1093Options: utf
1094Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87
1095  \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96
1096  \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5
1097  \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4
1098  \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3
1099  \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
1100  \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
1101  \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
1102  \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1103Subject length lower bound = 1
1104
1105/[z\Qa-d]Ā\E]/IB,utf
1106------------------------------------------------------------------
1107        Bra
1108        [\-\]adz\x{100}]
1109        Ket
1110        End
1111------------------------------------------------------------------
1112Capture group count = 0
1113Options: utf
1114Starting code units: - ] a d z \xff
1115Subject length lower bound = 1
1116    \x{100}
1117 0: \x{100}
1118    Ā
1119 0: \x{100}
1120
1121/[ab\x{100}]abc(xyz(?1))/IB,utf
1122------------------------------------------------------------------
1123        Bra
1124        [ab\x{100}]
1125        abc
1126        CBra 1
1127        xyz
1128        Recurse
1129        Ket
1130        Ket
1131        End
1132------------------------------------------------------------------
1133Capture group count = 1
1134Options: utf
1135Starting code units: a b \xff
1136Last code unit = 'z'
1137Subject length lower bound = 7
1138
1139/\x{100}*\s/IB,utf
1140------------------------------------------------------------------
1141        Bra
1142        \x{100}*+
1143        \s
1144        Ket
1145        End
1146------------------------------------------------------------------
1147Capture group count = 0
1148Options: utf
1149Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff
1150Subject length lower bound = 1
1151
1152/\x{100}*\d/IB,utf
1153------------------------------------------------------------------
1154        Bra
1155        \x{100}*+
1156        \d
1157        Ket
1158        End
1159------------------------------------------------------------------
1160Capture group count = 0
1161Options: utf
1162Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
1163Subject length lower bound = 1
1164
1165/\x{100}*\w/IB,utf
1166------------------------------------------------------------------
1167        Bra
1168        \x{100}*+
1169        \w
1170        Ket
1171        End
1172------------------------------------------------------------------
1173Capture group count = 0
1174Options: utf
1175Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
1176  Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
1177  \xff
1178Subject length lower bound = 1
1179
1180/\x{100}*\D/IB,utf
1181------------------------------------------------------------------
1182        Bra
1183        \x{100}*
1184        \D
1185        Ket
1186        End
1187------------------------------------------------------------------
1188Capture group count = 0
1189Options: utf
1190Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1191  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1192  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1193  ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
1194  d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
1195  \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91
1196  \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0
1197  \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf
1198  \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe
1199  \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
1200  \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
1201  \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
1202  \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
1203  \xfb \xfc \xfd \xfe \xff
1204Subject length lower bound = 1
1205
1206/\x{100}*\S/IB,utf
1207------------------------------------------------------------------
1208        Bra
1209        \x{100}*
1210        \S
1211        Ket
1212        End
1213------------------------------------------------------------------
1214Capture group count = 0
1215Options: utf
1216Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
1217  \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
1218  \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
1219  D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
1220  i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
1221  \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
1222  \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2
1223  \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1
1224  \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0
1225  \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf
1226  \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde
1227  \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed
1228  \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc
1229  \xfd \xfe \xff
1230Subject length lower bound = 1
1231
1232/\x{100}*\W/IB,utf
1233------------------------------------------------------------------
1234        Bra
1235        \x{100}*
1236        \W
1237        Ket
1238        End
1239------------------------------------------------------------------
1240Capture group count = 0
1241Options: utf
1242Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1243  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1244  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1245  ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89
1246  \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98
1247  \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7
1248  \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6
1249  \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5
1250  \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4
1251  \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3
1252  \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2
1253  \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1254Subject length lower bound = 1
1255
1256/[\x{105}-\x{109}]/IBi,utf
1257------------------------------------------------------------------
1258        Bra
1259        [\x{104}-\x{109}]
1260        Ket
1261        End
1262------------------------------------------------------------------
1263Capture group count = 0
1264Options: caseless utf
1265Starting code units: \xff
1266Subject length lower bound = 1
1267    \x{104}
1268 0: \x{104}
1269    \x{105}
1270 0: \x{105}
1271    \x{109}
1272 0: \x{109}
1273\= Expect no match
1274    \x{100}
1275No match
1276    \x{10a}
1277No match
1278
1279/[z-\x{100}]/IBi,utf
1280------------------------------------------------------------------
1281        Bra
1282        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1283        Ket
1284        End
1285------------------------------------------------------------------
1286Capture group count = 0
1287Options: caseless utf
1288Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
1289  \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
1290  \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
1291  \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
1292  \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
1293  \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1294  \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1295  \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1296  \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1297  \xff
1298Subject length lower bound = 1
1299    Z
1300 0: Z
1301    z
1302 0: z
1303    \x{39c}
1304 0: \x{39c}
1305    \x{178}
1306 0: \x{178}
1307    |
1308 0: |
1309    \x{80}
1310 0: \x{80}
1311    \x{ff}
1312 0: \x{ff}
1313    \x{100}
1314 0: \x{100}
1315    \x{101}
1316 0: \x{101}
1317\= Expect no match
1318    \x{102}
1319No match
1320    Y
1321No match
1322    y
1323No match
1324
1325/[z-\x{100}]/IBi,utf
1326------------------------------------------------------------------
1327        Bra
1328        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1329        Ket
1330        End
1331------------------------------------------------------------------
1332Capture group count = 0
1333Options: caseless utf
1334Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
1335  \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
1336  \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
1337  \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
1338  \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
1339  \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1340  \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1341  \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1342  \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1343  \xff
1344Subject length lower bound = 1
1345
1346/\x{3a3}B/IBi,utf
1347------------------------------------------------------------------
1348        Bra
1349        clist 03a3 03c2 03c3
1350     /i B
1351        Ket
1352        End
1353------------------------------------------------------------------
1354Capture group count = 0
1355Options: caseless utf
1356Starting code units: \xff
1357Last code unit = 'B' (caseless)
1358Subject length lower bound = 2
1359
1360/./utf
1361    \x{110000}
1362Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0
1363
1364/(*UTF)ab������z/B
1365------------------------------------------------------------------
1366        Bra
1367        ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z
1368        Ket
1369        End
1370------------------------------------------------------------------
1371
1372/ab������z/utf
1373** Failed: character value greater than 0x10ffff cannot be converted to UTF
1374
1375/[\W\p{Any}]/B
1376------------------------------------------------------------------
1377        Bra
1378        [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffffffff}]
1379        Ket
1380        End
1381------------------------------------------------------------------
1382    abc
1383 0: a
1384    123
1385 0: 1
1386
1387/[\W\pL]/B
1388------------------------------------------------------------------
1389        Bra
1390        [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffffffff}]
1391        Ket
1392        End
1393------------------------------------------------------------------
1394    abc
1395 0: a
1396    \x{100}
1397 0: \x{100}
1398    \x{308}
1399 0: \x{308}
1400\= Expect no match
1401    123
1402No match
1403
1404/[\s[:^ascii:]]/B,ucp
1405------------------------------------------------------------------
1406        Bra
1407        [\x80-\xff\p{Xsp}\x{100}-\x{ffffffff}]
1408        Ket
1409        End
1410------------------------------------------------------------------
1411
1412/\pP/ucp
1413    \x{7fffffff}
1414No match
1415
1416# A special extra option allows excaped surrogate code points in 32-bit mode,
1417# but subjects containing them must not be UTF-checked. These patterns give
1418# errors in 16-bit mode.
1419
1420/\x{d800}/I,utf,allow_surrogate_escapes
1421Capture group count = 0
1422Options: utf
1423Extra options: allow_surrogate_escapes
1424First code unit = \x{d800}
1425Subject length lower bound = 1
1426    \x{d800}\=no_utf_check
1427 0: \x{d800}
1428
1429/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
1430    \x{dfff}\x{df01}\=no_utf_check
1431 0: \x{dfff}\x{df01}
1432
1433# This has different starting code units in 8-bit mode.
1434
1435/^[^ab]/IB,utf
1436------------------------------------------------------------------
1437        Bra
1438        ^
1439        [\x00-`c-\xff] (neg)
1440        Ket
1441        End
1442------------------------------------------------------------------
1443Capture group count = 0
1444Compile options: utf
1445Overall options: anchored utf
1446Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1447  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1448  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
1449  5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
1450  Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
1451  \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
1452  \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
1453  \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
1454  \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
1455  \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca
1456  \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9
1457  \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8
1458  \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7
1459  \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1460Subject length lower bound = 1
1461    c
1462 0: c
1463    \x{ff}
1464 0: \x{ff}
1465    \x{100}
1466 0: \x{100}
1467\= Expect no match
1468    aaa
1469No match
1470
1471# Offsets are different in 8-bit mode.
1472
1473/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
1474    123abcáyzabcdef789abcሴqr
1475 1(2) Old 6 6 "" New 6 8 "<>"
1476 2(2) Old 12 12 "" New 14 16 "<>"
1477 3(2) Old 12 15 "def" New 16 21 "<def>"
1478 4(2) Old 21 21 "" New 27 29 "<>"
1479 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
1480
1481# A few script run tests in non-UTF mode (but they need Unicode support)
1482
1483/^(*script_run:.{4})/
1484    \x{3041}\x{30a1}\x{3007}\x{3007}   Hiragana Katakana Han Han
1485 0: \x{3041}\x{30a1}\x{3007}\x{3007}
1486    \x{30a1}\x{3041}\x{3007}\x{3007}   Katakana Hiragana Han Han
1487 0: \x{30a1}\x{3041}\x{3007}\x{3007}
1488    \x{1100}\x{2e80}\x{2e80}\x{1101}   Hangul Han Han Hangul
1489 0: \x{1100}\x{2e80}\x{2e80}\x{1101}
1490
1491/^(*sr:.*)/utf,allow_surrogate_escapes
1492    \x{2e80}\x{3105}\x{2e80}\x{30a1}   Han Bopomofo Han Katakana
1493 0: \x{2e80}\x{3105}\x{2e80}
1494    \x{d800}\x{dfff}                   Surrogates (Unknown) \=no_utf_check
1495 0: \x{d800}
1496
1497/(?(n/utf
1498Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
1499
1500/(?(á/utf
1501Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
1502
1503# Invalid UTF-16/32 tests.
1504
1505/.../g,match_invalid_utf
1506    abcd\x{df00}wxzy\x{df00}pqrs
1507 0: abc
1508 0: wxz
1509 0: pqr
1510    abcd\x{80}wxzy\x{df00}pqrs
1511 0: abc
1512 0: d\x{80}w
1513 0: xzy
1514 0: pqr
1515
1516/abc/match_invalid_utf
1517    ab\x{df00}ab\=ph
1518Partial match: ab
1519\= Expect no match
1520    ab\x{df00}cdef\=ph
1521No match
1522
1523/ab$/match_invalid_utf
1524    ab\x{df00}cdeab
1525 0: ab
1526\= Expect no match
1527    ab\x{df00}cde
1528No match
1529
1530/.../g,match_invalid_utf
1531    abcd\x{80}wxzy\x{df00}pqrs
1532 0: abc
1533 0: d\x{80}w
1534 0: xzy
1535 0: pqr
1536
1537/(?<=x)../g,match_invalid_utf
1538    abcd\x{80}wxzy\x{df00}pqrs
1539 0: zy
1540    abcd\x{80}wxzy\x{df00}xpqrs
1541 0: zy
1542 0: pq
1543
1544/X$/match_invalid_utf
1545\= Expect no match
1546    X\x{df00}
1547No match
1548
1549/(?<=..)X/match_invalid_utf,aftertext
1550    AB\x{df00}AQXYZ
1551 0: X
1552 0+ YZ
1553    AB\x{df00}AQXYZ\=offset=5
1554 0: X
1555 0+ YZ
1556    AB\x{df00}\x{df00}AXYZXC\=offset=5
1557 0: X
1558 0+ C
1559\= Expect no match
1560    AB\x{df00}XYZ
1561No match
1562    AB\x{df00}XYZ\=offset=3
1563No match
1564    AB\x{df00}AXYZ
1565No match
1566    AB\x{df00}AXYZ\=offset=4
1567No match
1568    AB\x{df00}\x{df00}AXYZ\=offset=5
1569No match
1570
1571/.../match_invalid_utf
1572\= Expect no match
1573    A\x{d800}B
1574No match
1575    A\x{110000}B
1576No match
1577
1578/aa/utf,ucp,match_invalid_utf,global
1579    aa\x{d800}aa
1580 0: aa
1581 0: aa
1582
1583/aa/utf,ucp,match_invalid_utf,global
1584    \x{d800}aa
1585 0: aa
1586
1587# ----------------------------------------------------
1588
1589/(*UTF)(?=\x{123})/I
1590Capture group count = 0
1591May match empty string
1592Compile options: <none>
1593Overall options: utf
1594First code unit = \x{123}
1595Subject length lower bound = 1
1596
1597/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf
1598Capture group count = 0
1599Options: utf
1600First code unit = \xc1 (caseless)
1601Last code unit = \x{145} (caseless)
1602Subject length lower bound = 3
1603
1604/[\xff\x{ffff}]/I,utf
1605Capture group count = 0
1606Options: utf
1607Starting code units: \xff
1608Subject length lower bound = 1
1609
1610/[\xff\x{ff}]/I,utf
1611Capture group count = 0
1612Options: utf
1613Starting code units: \xff
1614Subject length lower bound = 1
1615
1616/[\xff\x{ff}]/I
1617Capture group count = 0
1618Starting code units: \xff
1619Subject length lower bound = 1
1620
1621/[Ss]/I
1622Capture group count = 0
1623First code unit = 'S' (caseless)
1624Subject length lower bound = 1
1625
1626/[Ss]/I,utf
1627Capture group count = 0
1628Options: utf
1629Starting code units: S s
1630Subject length lower bound = 1
1631
1632/(?:\x{ff}|\x{3000})/I,utf
1633Capture group count = 0
1634Options: utf
1635Starting code units: \xff
1636Subject length lower bound = 1
1637
1638# ----------------------------------------------------
1639# UCP and casing tests
1640
1641/\x{120}/i,I
1642Capture group count = 0
1643Options: caseless
1644First code unit = \x{120}
1645Subject length lower bound = 1
1646
1647/\x{c1}/i,I,ucp
1648Capture group count = 0
1649Options: caseless ucp
1650First code unit = \xc1 (caseless)
1651Subject length lower bound = 1
1652
1653/[\x{120}\x{121}]/iB,ucp
1654------------------------------------------------------------------
1655        Bra
1656     /i \x{120}
1657        Ket
1658        End
1659------------------------------------------------------------------
1660
1661/[ab\x{120}]+/iB,ucp
1662------------------------------------------------------------------
1663        Bra
1664        [ABab\x{120}-\x{121}]++
1665        Ket
1666        End
1667------------------------------------------------------------------
1668    aABb\x{121}\x{120}
1669 0: aABb\x{121}\x{120}
1670
1671/\x{c1}/i,no_start_optimize
1672\= Expect no match
1673    \x{e1}
1674No match
1675
1676/\x{120}\x{c1}/i,ucp,no_start_optimize
1677    \x{121}\x{e1}
1678 0: \x{121}\xe1
1679
1680/\x{120}\x{c1}/i,ucp
1681    \x{121}\x{e1}
1682 0: \x{121}\xe1
1683
1684/[^\x{120}]/i,no_start_optimize
1685    \x{121}
1686 0: \x{121}
1687
1688/[^\x{120}]/i,ucp,no_start_optimize
1689\= Expect no match
1690    \x{121}
1691No match
1692
1693/[^\x{120}]/i
1694    \x{121}
1695 0: \x{121}
1696
1697/[^\x{120}]/i,ucp
1698\= Expect no match
1699    \x{121}
1700No match
1701
1702/\x{120}{2}/i,ucp
1703    \x{121}\x{121}
1704 0: \x{121}\x{121}
1705
1706/[^\x{120}]{2}/i,ucp
1707\= Expect no match
1708    \x{121}\x{121}
1709No match
1710
1711/\x{c1}+\x{e1}/iB,ucp
1712------------------------------------------------------------------
1713        Bra
1714     /i \x{c1}+
1715     /i \x{e1}
1716        Ket
1717        End
1718------------------------------------------------------------------
1719    \x{c1}\x{c1}\x{c1}
1720 0: \xc1\xc1\xc1
1721
1722/\x{c1}+\x{e1}/iIB,ucp
1723------------------------------------------------------------------
1724        Bra
1725     /i \x{c1}+
1726     /i \x{e1}
1727        Ket
1728        End
1729------------------------------------------------------------------
1730Capture group count = 0
1731Options: caseless ucp
1732First code unit = \xc1 (caseless)
1733Last code unit = \xe1 (caseless)
1734Subject length lower bound = 2
1735    \x{c1}\x{c1}\x{c1}
1736 0: \xc1\xc1\xc1
1737    \x{e1}\x{e1}\x{e1}
1738 0: \xe1\xe1\xe1
1739
1740/a|\x{c1}/iI,ucp
1741Capture group count = 0
1742Options: caseless ucp
1743Starting code units: A a \xc1 \xe1
1744Subject length lower bound = 1
1745    \x{e1}xxx
1746 0: \xe1
1747
1748/\x{c1}|\x{e1}/iI,ucp
1749Capture group count = 0
1750Options: caseless ucp
1751First code unit = \xc1 (caseless)
1752Subject length lower bound = 1
1753
1754/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
1755    X\x{e1}Y
1756 1: >\xc1<
1757
1758/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
1759    X\x{121}Y
1760 1: >\x{120}<
1761
1762/s/i,ucp
1763    \x{17f}
1764 0: \x{17f}
1765
1766/s/i,utf
1767    \x{17f}
1768 0: \x{17f}
1769
1770/[^s]/i,ucp
1771\= Expect no match
1772    \x{17f}
1773No match
1774
1775/[^s]/i,utf
1776\= Expect no match
1777    \x{17f}
1778No match
1779
1780# ----------------------------------------------------
1781
1782# End of testinput12
1783