1# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
2# features that are not compatible with the 8-bit library, or which give
3# different output in 16-bit or 32-bit mode. The output for the two widths is
4# different, so they have separate output files.
5
6#forbid_utf
7#newline_default LF ANY ANYCRLF
8
9/[^\x{c4}]/IB
10
11/\x{100}/I
12
13/  (?: [\040\t] |  \(
14(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
15\)  )*                          # optional leading comment
16(?:    (?:
17[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
18(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
19|
20" (?:                      # opening quote...
21[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
22|                     #    or
23\\ [^\x80-\xff]           #   Escaped something (something != CR)
24)* "  # closing quote
25)                    # initial word
26(?:  (?: [\040\t] |  \(
27(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
28\)  )*  \.  (?: [\040\t] |  \(
29(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
30\)  )*   (?:
31[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
32(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
33|
34" (?:                      # opening quote...
35[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
36|                     #    or
37\\ [^\x80-\xff]           #   Escaped something (something != CR)
38)* "  # closing quote
39)  )* # further okay, if led by a period
40(?: [\040\t] |  \(
41(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
42\)  )*  @  (?: [\040\t] |  \(
43(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
44\)  )*    (?:
45[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
46(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
47|   \[                         # [
48(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
49\]                        #           ]
50)                           # initial subdomain
51(?:                                  #
52(?: [\040\t] |  \(
53(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
54\)  )*  \.                        # if led by a period...
55(?: [\040\t] |  \(
56(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
57\)  )*   (?:
58[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
59(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
60|   \[                         # [
61(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
62\]                        #           ]
63)                     #   ...further okay
64)*
65# address
66|                     #  or
67(?:
68[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
69(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
70|
71" (?:                      # opening quote...
72[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
73|                     #    or
74\\ [^\x80-\xff]           #   Escaped something (something != CR)
75)* "  # closing quote
76)             # one word, optionally followed by....
77(?:
78[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]  |  # atom and space parts, or...
79\(
80(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
81\)       |  # comments, or...
82
83" (?:                      # opening quote...
84[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
85|                     #    or
86\\ [^\x80-\xff]           #   Escaped something (something != CR)
87)* "  # closing quote
88# quoted strings
89)*
90<  (?: [\040\t] |  \(
91(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
92\)  )*                     # leading <
93(?:  @  (?: [\040\t] |  \(
94(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
95\)  )*    (?:
96[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
97(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
98|   \[                         # [
99(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
100\]                        #           ]
101)                           # initial subdomain
102(?:                                  #
103(?: [\040\t] |  \(
104(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
105\)  )*  \.                        # if led by a period...
106(?: [\040\t] |  \(
107(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
108\)  )*   (?:
109[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
110(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
111|   \[                         # [
112(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
113\]                        #           ]
114)                     #   ...further okay
115)*
116
117(?:  (?: [\040\t] |  \(
118(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
119\)  )*  ,  (?: [\040\t] |  \(
120(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
121\)  )*  @  (?: [\040\t] |  \(
122(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
123\)  )*    (?:
124[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
125(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
126|   \[                         # [
127(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
128\]                        #           ]
129)                           # initial subdomain
130(?:                                  #
131(?: [\040\t] |  \(
132(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
133\)  )*  \.                        # if led by a period...
134(?: [\040\t] |  \(
135(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
136\)  )*   (?:
137[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
138(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
139|   \[                         # [
140(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
141\]                        #           ]
142)                     #   ...further okay
143)*
144)* # further okay, if led by comma
145:                                # closing colon
146(?: [\040\t] |  \(
147(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
148\)  )*  )? #       optional route
149(?:
150[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
151(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
152|
153" (?:                      # opening quote...
154[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
155|                     #    or
156\\ [^\x80-\xff]           #   Escaped something (something != CR)
157)* "  # closing quote
158)                    # initial word
159(?:  (?: [\040\t] |  \(
160(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
161\)  )*  \.  (?: [\040\t] |  \(
162(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
163\)  )*   (?:
164[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
165(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
166|
167" (?:                      # opening quote...
168[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
169|                     #    or
170\\ [^\x80-\xff]           #   Escaped something (something != CR)
171)* "  # closing quote
172)  )* # further okay, if led by a period
173(?: [\040\t] |  \(
174(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
175\)  )*  @  (?: [\040\t] |  \(
176(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
177\)  )*    (?:
178[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
179(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
180|   \[                         # [
181(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
182\]                        #           ]
183)                           # initial subdomain
184(?:                                  #
185(?: [\040\t] |  \(
186(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
187\)  )*  \.                        # if led by a period...
188(?: [\040\t] |  \(
189(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
190\)  )*   (?:
191[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
192(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
193|   \[                         # [
194(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
195\]                        #           ]
196)                     #   ...further okay
197)*
198#       address spec
199(?: [\040\t] |  \(
200(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
201\)  )*  > #                  trailing >
202# name and address
203)  (?: [\040\t] |  \(
204(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
205\)  )*                       # optional trailing comment
206/Ix
207
208/[\h]/B
209    >\x09<
210
211/[\h]+/B
212    >\x09\x20\xa0<
213
214/[\v]/B
215
216/[^\h]/B
217
218/\h+/I
219    \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
220    \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
221
222/[\h\x{dc00}]+/IB
223    \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
224    \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
225
226/\H+/I
227    \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
228    \x{2000}\x{200a}\x{1fff}\x{200b}
229    \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
230    \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
231
232/[\H\x{d800}]+/
233    \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
234    \x{2000}\x{200a}\x{1fff}\x{200b}
235    \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
236    \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
237
238/\v+/I
239    \x{2027}\x{2030}\x{2028}\x{2029}
240    \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
241
242/[\v\x{dc00}]+/IB
243    \x{2027}\x{2030}\x{2028}\x{2029}
244    \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
245
246/\V+/I
247    \x{2028}\x{2029}\x{2027}\x{2030}
248    \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
249
250/[\V\x{d800}]+/
251    \x{2028}\x{2029}\x{2027}\x{2030}
252    \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
253
254/\R+/I,bsr=unicode
255    \x{2027}\x{2030}\x{2028}\x{2029}
256    \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
257
258/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
259    \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
260
261/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
262
263/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
264
265/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
266
267/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
268
269/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
270    XX
271
272/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
273    XX
274
275/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
276
277/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
278
279/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
280
281/^\x{ffff}+/i
282    \x{ffff}
283
284/^\x{ffff}?/i
285    \x{ffff}
286
287/^\x{ffff}*/i
288    \x{ffff}
289
290/^\x{ffff}{3}/i
291    \x{ffff}\x{ffff}\x{ffff}
292
293/^\x{ffff}{0,3}/i
294    \x{ffff}
295
296/[^\x00-a]{12,}[^b-\xff]*/B
297
298/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
299
300/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
301
302/^[\x{1234}\x{4321}]{2,4}?/
303    \x{1234}\x{1234}\x{1234}
304
305# Check maximum non-UTF character size for the 16-bit library.
306
307/\x{ffff}/
308    A\x{ffff}B
309
310/\x{10000}/
311
312/\o{20000}/
313
314# Check maximum character size for the 32-bit library. These will all give
315# errors in the 16-bit library.
316
317/\x{110000}/
318
319/\x{7fffffff}/
320
321/\x{80000000}/
322
323/\x{ffffffff}/
324
325/\x{100000000}/
326
327/\o{17777777777}/
328
329/\o{20000000000}/
330
331/\o{37777777777}/
332
333/\o{40000000000}/
334
335/\x{7fffffff}\x{7fffffff}/I
336
337/\x{80000000}\x{80000000}/I
338
339/\x{ffffffff}\x{ffffffff}/I
340
341# Non-UTF characters
342
343/.{2,3}/
344    \x{400000}\x{400001}\x{400002}\x{400003}
345
346/\x{400000}\x{800000}/IBi
347
348# Check character ranges
349
350/[\H]/IB
351
352/[\V]/IB
353
354/(*THEN:\[A]{65501})/expand
355
356# We can use pcre2test's utf8_input modifier to create wide pattern characters,
357# even though this test is run when UTF is not supported.
358
359/ab������z/utf8_input
360    ab������z
361    ab\x{7fffffff}z
362
363/ab�������z/utf8_input
364    ab�������z
365    ab\x{ffffffff}z
366
367/ab�Az/utf8_input
368    ab�Az
369    ab\x{80000041}z
370
371# End of testinput11
372