1# These test special UTF and UCP features of DFA matching. The output is
2# different for the different widths.
3
4#subject dfa
5
6# ----------------------------------------------------
7# These are a selection of the more comprehensive tests that are run for
8# non-DFA matching.
9
10/X/utf
11    XX\x{d800}
12Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
13    XX\x{d800}\=offset=3
14Error -36 (bad UTF-8 offset)
15    XX\x{d800}\=no_utf_check
16 0: X
17    XX\x{da00}
18Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
19    XX\x{da00}\=no_utf_check
20 0: X
21    XX\x{dc00}
22Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
23    XX\x{dc00}\=no_utf_check
24 0: X
25    XX\x{de00}
26Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
27    XX\x{de00}\=no_utf_check
28 0: X
29    XX\x{dfff}
30Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
31    XX\x{dfff}\=no_utf_check
32 0: X
33    XX\x{110000}
34Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
35    XX\x{d800}\x{1234}
36Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
37
38/badutf/utf
39    X\xdf
40Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
41    XX\xef
42Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
43    XXX\xef\x80
44Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
45    X\xf7
46Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1
47    XX\xf7\x80
48Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
49    XXX\xf7\x80\x80
50Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
51
52/shortutf/utf
53    XX\xdf\=ph
54Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
55    XX\xef\=ph
56Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
57    XX\xef\x80\=ph
58Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
59    \xf7\=ph
60Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
61    \xf7\x80\=ph
62Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
63
64# ----------------------------------------------------
65# UCP and casing tests - except for the first two, these will all fail in 8-bit
66# mode because they are testing UCP without UTF and use characters > 255.
67
68/\x{c1}/i,no_start_optimize
69\= Expect no match
70    \x{e1}
71No match
72
73/\x{c1}+\x{e1}/iB,ucp
74------------------------------------------------------------------
75        Bra
76     /i \x{c1}+
77     /i \x{e1}
78        Ket
79        End
80------------------------------------------------------------------
81    \x{c1}\x{c1}\x{c1}
82 0: \xc1\xc1\xc1
83 1: \xc1\xc1
84    \x{e1}\x{e1}\x{e1}
85 0: \xe1\xe1\xe1
86 1: \xe1\xe1
87
88/\x{120}\x{c1}/i,ucp,no_start_optimize
89Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
90    \x{121}\x{e1}
91
92/\x{120}\x{c1}/i,ucp
93Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
94    \x{121}\x{e1}
95
96/[^\x{120}]/i,no_start_optimize
97Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
98    \x{121}
99
100/[^\x{120}]/i,ucp,no_start_optimize
101Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
102\= Expect no match
103    \x{121}
104
105/[^\x{120}]/i
106Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
107    \x{121}
108
109/[^\x{120}]/i,ucp
110Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
111\= Expect no match
112    \x{121}
113
114/\x{120}{2}/i,ucp
115Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
116    \x{121}\x{121}
117
118/[^\x{120}]{2}/i,ucp
119Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
120\= Expect no match
121    \x{121}\x{121}
122
123# ----------------------------------------------------
124
125# End of testinput14
126