1# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF) 2# features that are not compatible with the 8-bit library, or which give 3# different output in 16-bit or 32-bit mode. The output for the two widths is 4# different, so they have separate output files. 5 6#forbid_utf 7#newline_default LF ANY ANYCRLF 8 9/[^\x{c4}]/IB 10 11/\x{100}/I 12 13/ (?: [\040\t] | \( 14(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 15\) )* # optional leading comment 16(?: (?: 17[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 18(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 19| 20" (?: # opening quote... 21[^\\\x80-\xff\n\015"] # Anything except backslash and quote 22| # or 23\\ [^\x80-\xff] # Escaped something (something != CR) 24)* " # closing quote 25) # initial word 26(?: (?: [\040\t] | \( 27(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 28\) )* \. (?: [\040\t] | \( 29(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 30\) )* (?: 31[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 32(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 33| 34" (?: # opening quote... 35[^\\\x80-\xff\n\015"] # Anything except backslash and quote 36| # or 37\\ [^\x80-\xff] # Escaped something (something != CR) 38)* " # closing quote 39) )* # further okay, if led by a period 40(?: [\040\t] | \( 41(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 42\) )* @ (?: [\040\t] | \( 43(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 44\) )* (?: 45[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 46(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 47| \[ # [ 48(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff 49\] # ] 50) # initial subdomain 51(?: # 52(?: [\040\t] | \( 53(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 54\) )* \. # if led by a period... 55(?: [\040\t] | \( 56(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 57\) )* (?: 58[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 59(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 60| \[ # [ 61(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff 62\] # ] 63) # ...further okay 64)* 65# address 66| # or 67(?: 68[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 69(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 70| 71" (?: # opening quote... 72[^\\\x80-\xff\n\015"] # Anything except backslash and quote 73| # or 74\\ [^\x80-\xff] # Escaped something (something != CR) 75)* " # closing quote 76) # one word, optionally followed by.... 77(?: 78[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... 79\( 80(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 81\) | # comments, or... 82 83" (?: # opening quote... 84[^\\\x80-\xff\n\015"] # Anything except backslash and quote 85| # or 86\\ [^\x80-\xff] # Escaped something (something != CR) 87)* " # closing quote 88# quoted strings 89)* 90< (?: [\040\t] | \( 91(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 92\) )* # leading < 93(?: @ (?: [\040\t] | \( 94(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 95\) )* (?: 96[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 97(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 98| \[ # [ 99(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff 100\] # ] 101) # initial subdomain 102(?: # 103(?: [\040\t] | \( 104(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 105\) )* \. # if led by a period... 106(?: [\040\t] | \( 107(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 108\) )* (?: 109[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 110(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 111| \[ # [ 112(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff 113\] # ] 114) # ...further okay 115)* 116 117(?: (?: [\040\t] | \( 118(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 119\) )* , (?: [\040\t] | \( 120(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 121\) )* @ (?: [\040\t] | \( 122(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 123\) )* (?: 124[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 125(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 126| \[ # [ 127(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff 128\] # ] 129) # initial subdomain 130(?: # 131(?: [\040\t] | \( 132(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 133\) )* \. # if led by a period... 134(?: [\040\t] | \( 135(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 136\) )* (?: 137[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 138(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 139| \[ # [ 140(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff 141\] # ] 142) # ...further okay 143)* 144)* # further okay, if led by comma 145: # closing colon 146(?: [\040\t] | \( 147(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 148\) )* )? # optional route 149(?: 150[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 151(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 152| 153" (?: # opening quote... 154[^\\\x80-\xff\n\015"] # Anything except backslash and quote 155| # or 156\\ [^\x80-\xff] # Escaped something (something != CR) 157)* " # closing quote 158) # initial word 159(?: (?: [\040\t] | \( 160(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 161\) )* \. (?: [\040\t] | \( 162(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 163\) )* (?: 164[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 165(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 166| 167" (?: # opening quote... 168[^\\\x80-\xff\n\015"] # Anything except backslash and quote 169| # or 170\\ [^\x80-\xff] # Escaped something (something != CR) 171)* " # closing quote 172) )* # further okay, if led by a period 173(?: [\040\t] | \( 174(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 175\) )* @ (?: [\040\t] | \( 176(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 177\) )* (?: 178[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 179(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 180| \[ # [ 181(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff 182\] # ] 183) # initial subdomain 184(?: # 185(?: [\040\t] | \( 186(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 187\) )* \. # if led by a period... 188(?: [\040\t] | \( 189(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 190\) )* (?: 191[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... 192(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom 193| \[ # [ 194(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff 195\] # ] 196) # ...further okay 197)* 198# address spec 199(?: [\040\t] | \( 200(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 201\) )* > # trailing > 202# name and address 203) (?: [\040\t] | \( 204(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* 205\) )* # optional trailing comment 206/Ix 207 208/[\h]/B 209 >\x09< 210 211/[\h]+/B 212 >\x09\x20\xa0< 213 214/[\v]/B 215 216/[^\h]/B 217 218/\h+/I 219 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 220 \x{3001}\x{2fff}\x{200a}\xa0\x{2000} 221 222/[\h\x{dc00}]+/IB 223 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 224 \x{3001}\x{2fff}\x{200a}\xa0\x{2000} 225 226/\H+/I 227 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 228 \x{2000}\x{200a}\x{1fff}\x{200b} 229 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 230 \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} 231 232/[\H\x{d800}]+/ 233 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 234 \x{2000}\x{200a}\x{1fff}\x{200b} 235 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 236 \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} 237 238/\v+/I 239 \x{2027}\x{2030}\x{2028}\x{2029} 240 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d 241 242/[\v\x{dc00}]+/IB 243 \x{2027}\x{2030}\x{2028}\x{2029} 244 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d 245 246/\V+/I 247 \x{2028}\x{2029}\x{2027}\x{2030} 248 \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 249 250/[\V\x{d800}]+/ 251 \x{2028}\x{2029}\x{2027}\x{2030} 252 \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 253 254/\R+/I,bsr=unicode 255 \x{2027}\x{2030}\x{2028}\x{2029} 256 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d 257 258/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I 259 \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} 260 261/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B 262 263/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi 264 265/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B 266 267/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi 268 269/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark 270 XX 271 272/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark 273 XX 274 275/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref 276 277/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref 278 279/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref 280 281/^\x{ffff}+/i 282 \x{ffff} 283 284/^\x{ffff}?/i 285 \x{ffff} 286 287/^\x{ffff}*/i 288 \x{ffff} 289 290/^\x{ffff}{3}/i 291 \x{ffff}\x{ffff}\x{ffff} 292 293/^\x{ffff}{0,3}/i 294 \x{ffff} 295 296/[^\x00-a]{12,}[^b-\xff]*/B 297 298/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B 299 300/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B 301 302/^[\x{1234}\x{4321}]{2,4}?/ 303 \x{1234}\x{1234}\x{1234} 304 305# Check maximum non-UTF character size for the 16-bit library. 306 307/\x{ffff}/ 308 A\x{ffff}B 309 310/\x{10000}/ 311 312/\o{20000}/ 313 314# Check maximum character size for the 32-bit library. These will all give 315# errors in the 16-bit library. 316 317/\x{110000}/ 318 319/\x{7fffffff}/ 320 321/\x{80000000}/ 322 323/\x{ffffffff}/ 324 325/\x{100000000}/ 326 327/\o{17777777777}/ 328 329/\o{20000000000}/ 330 331/\o{37777777777}/ 332 333/\o{40000000000}/ 334 335/\x{7fffffff}\x{7fffffff}/I 336 337/\x{80000000}\x{80000000}/I 338 339/\x{ffffffff}\x{ffffffff}/I 340 341# Non-UTF characters 342 343/.{2,3}/ 344 \x{400000}\x{400001}\x{400002}\x{400003} 345 346/\x{400000}\x{800000}/IBi 347 348# Check character ranges 349 350/[\H]/IB 351 352/[\V]/IB 353 354/(*THEN:\[A]{65501})/expand 355 356# We can use pcre2test's utf8_input modifier to create wide pattern characters, 357# even though this test is run when UTF is not supported. 358 359/ab������z/utf8_input 360 ab������z 361 ab\x{7fffffff}z 362 363/ab�������z/utf8_input 364 ab�������z 365 ab\x{ffffffff}z 366 367/ab�Az/utf8_input 368 ab�Az 369 ab\x{80000041}z 370 371# End of testinput11 372