1# This set of tests is for UTF-16 and UTF-32 support, including Unicode 2# properties. It is relevant only to the 16-bit and 32-bit libraries. The 3# output is different for each library, so there are separate output files. 4 5/���xxx/IB,utf,no_utf_check 6 7/abc/utf 8 �] 9 10# Check maximum character size 11 12/\x{ffff}/IB,utf 13 14/\x{10000}/IB,utf 15 16/\x{100}/IB,utf 17 18/\x{1000}/IB,utf 19 20/\x{10000}/IB,utf 21 22/\x{100000}/IB,utf 23 24/\x{10ffff}/IB,utf 25 26/[\x{ff}]/IB,utf 27 28/[\x{100}]/IB,utf 29 30/\x80/IB,utf 31 32/\xff/IB,utf 33 34/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf 35 \x{D55c}\x{ad6d}\x{C5B4} 36 37/\x{65e5}\x{672c}\x{8a9e}/IB,utf 38 \x{65e5}\x{672c}\x{8a9e} 39 40/\x{80}/IB,utf 41 42/\x{084}/IB,utf 43 44/\x{104}/IB,utf 45 46/\x{861}/IB,utf 47 48/\x{212ab}/IB,utf 49 50/[^ab\xC0-\xF0]/IB,utf 51 \x{f1} 52 \x{bf} 53 \x{100} 54 \x{1000} 55\= Expect no match 56 \x{c0} 57 \x{f0} 58 59/Ā{3,4}/IB,utf 60 \x{100}\x{100}\x{100}\x{100\x{100} 61 62/(\x{100}+|x)/IB,utf 63 64/(\x{100}*a|x)/IB,utf 65 66/(\x{100}{0,2}a|x)/IB,utf 67 68/(\x{100}{1,2}a|x)/IB,utf 69 70/\x{100}/IB,utf 71 72/a\x{100}\x{101}*/IB,utf 73 74/a\x{100}\x{101}+/IB,utf 75 76/[^\x{c4}]/IB 77 78/[\x{100}]/IB,utf 79 \x{100} 80 Z\x{100} 81 \x{100}Z 82 83/[\xff]/IB,utf 84 >\x{ff}< 85 86/[^\xff]/IB,utf 87 88/\x{100}abc(xyz(?1))/IB,utf 89 90/\777/I,utf 91 \x{1ff} 92 \777 93 94/\x{100}+\x{200}/IB,utf 95 96/\x{100}+X/IB,utf 97 98/^[\QĀ\E-\QŐ\E/B,utf 99 100/X/utf 101 XX\x{d800}\=no_utf_check 102 XX\x{da00}\=no_utf_check 103 XX\x{dc00}\=no_utf_check 104 XX\x{de00}\=no_utf_check 105 XX\x{dfff}\=no_utf_check 106\= Expect UTF error 107 XX\x{d800} 108 XX\x{da00} 109 XX\x{dc00} 110 XX\x{de00} 111 XX\x{dfff} 112 XX\x{110000} 113 XX\x{d800}\x{1234} 114\= Expect no match 115 XX\x{d800}\=offset=3 116 117/(?<=.)X/utf 118 XX\x{d800}\=offset=3 119 120/(*UTF16)\x{11234}/ 121 abcd\x{11234}pqr 122 123/(*UTF)\x{11234}/I 124 abcd\x{11234}pqr 125 126/(*UTF-32)\x{11234}/ 127 abcd\x{11234}pqr 128 129/(*UTF-32)\x{112}/ 130 abcd\x{11234}pqr 131 132/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I 133 134/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I 135 136/\h/I,utf 137 ABC\x{09} 138 ABC\x{20} 139 ABC\x{a0} 140 ABC\x{1680} 141 ABC\x{180e} 142 ABC\x{2000} 143 ABC\x{202f} 144 ABC\x{205f} 145 ABC\x{3000} 146 147/\v/I,utf 148 ABC\x{0a} 149 ABC\x{0b} 150 ABC\x{0c} 151 ABC\x{0d} 152 ABC\x{85} 153 ABC\x{2028} 154 155/\h*A/I,utf 156 CDBABC 157 \x{2000}ABC 158 159/\R*A/I,bsr=unicode,utf 160 CDBABC 161 \x{2028}A 162 163/\v+A/I,utf 164 165/\s?xxx\s/I,utf 166 167/\sxxx\s/I,utf,tables=2 168 AB\x{85}xxx\x{a0}XYZ 169 AB\x{a0}xxx\x{85}XYZ 170 171/\S \S/I,utf,tables=2 172 \x{a2} \x{84} 173 A Z 174 175/a+/utf 176 a\x{123}aa\=offset=1 177 a\x{123}aa\=offset=2 178 a\x{123}aa\=offset=3 179\= Expect no match 180 a\x{123}aa\=offset=4 181\= Expect bad offset error 182 a\x{123}aa\=offset=5 183 a\x{123}aa\=offset=6 184 185/\x{1234}+/Ii,utf 186 187/\x{1234}+?/Ii,utf 188 189/\x{1234}++/Ii,utf 190 191/\x{1234}{2}/Ii,utf 192 193/[^\x{c4}]/IB,utf 194 195/X+\x{200}/IB,utf 196 197/\R/I,utf 198 199# Check bad offset 200 201/a/utf 202\= Expect bad UTF-16 offset, or no match in 32-bit 203 \x{10000}\=offset=1 204 \x{10000}ab\=offset=1 205\= Expect 16-bit match, 32-bit no match 206 \x{10000}ab\=offset=2 207\= Expect no match 208 \x{10000}ab\=offset=3 209\= Expect no match in 16-bit, bad offset in 32-bit 210 \x{10000}ab\=offset=4 211\= Expect bad offset 212 \x{10000}ab\=offset=5 213 214/�/utf 215 216/\w+\x{C4}/B,utf 217 a\x{C4}\x{C4} 218 219/\w+\x{C4}/B,utf,tables=2 220 a\x{C4}\x{C4} 221 222/\W+\x{C4}/B,utf 223 !\x{C4} 224 225/\W+\x{C4}/B,utf,tables=2 226 !\x{C4} 227 228/\W+\x{A1}/B,utf 229 !\x{A1} 230 231/\W+\x{A1}/B,utf,tables=2 232 !\x{A1} 233 234/X\s+\x{A0}/B,utf 235 X\x20\x{A0}\x{A0} 236 237/X\s+\x{A0}/B,utf,tables=2 238 X\x20\x{A0}\x{A0} 239 240/\S+\x{A0}/B,utf 241 X\x{A0}\x{A0} 242 243/\S+\x{A0}/B,utf,tables=2 244 X\x{A0}\x{A0} 245 246/\x{a0}+\s!/B,utf 247 \x{a0}\x20! 248 249/\x{a0}+\s!/B,utf,tables=2 250 \x{a0}\x20! 251 252/(*UTF)abc/never_utf 253 254/abc/utf,never_utf 255 256/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf 257 258/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf 259 260/AB\x{1fb0}/IB,utf 261 262/AB\x{1fb0}/IBi,utf 263 264/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf 265 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} 266 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} 267 268/[ⱥ]/Bi,utf 269 270/[^ⱥ]/Bi,utf 271 272/[[:blank:]]/B,ucp 273 274/\x{212a}+/Ii,utf 275 KKkk\x{212a} 276 277/s+/Ii,utf 278 SSss\x{17f} 279 280# Non-UTF characters should give errors in both 16-bit and 32-bit modes. 281 282/\x{110000}/utf 283 284/\o{4200000}/utf 285 286/\x{100}*A/IB,utf 287 A 288 289/\x{100}*\d(?R)/IB,utf 290 291/[Z\x{100}]/IB,utf 292 Z\x{100} 293 \x{100} 294 \x{100}Z 295 296/[z-\x{100}]/IB,utf 297 298/[z\Qa-d]Ā\E]/IB,utf 299 \x{100} 300 Ā 301 302/[ab\x{100}]abc(xyz(?1))/IB,utf 303 304/\x{100}*\s/IB,utf 305 306/\x{100}*\d/IB,utf 307 308/\x{100}*\w/IB,utf 309 310/\x{100}*\D/IB,utf 311 312/\x{100}*\S/IB,utf 313 314/\x{100}*\W/IB,utf 315 316/[\x{105}-\x{109}]/IBi,utf 317 \x{104} 318 \x{105} 319 \x{109} 320\= Expect no match 321 \x{100} 322 \x{10a} 323 324/[z-\x{100}]/IBi,utf 325 Z 326 z 327 \x{39c} 328 \x{178} 329 | 330 \x{80} 331 \x{ff} 332 \x{100} 333 \x{101} 334\= Expect no match 335 \x{102} 336 Y 337 y 338 339/[z-\x{100}]/IBi,utf 340 341/\x{3a3}B/IBi,utf 342 343/./utf 344 \x{110000} 345 346/(*UTF)ab������z/B 347 348/ab������z/utf 349 350/[\W\p{Any}]/B 351 abc 352 123 353 354/[\W\pL]/B 355 abc 356 \x{100} 357 \x{308} 358\= Expect no match 359 123 360 361/[\s[:^ascii:]]/B,ucp 362 363/\pP/ucp 364 \x{7fffffff} 365 366# A special extra option allows excaped surrogate code points in 32-bit mode, 367# but subjects containing them must not be UTF-checked. These patterns give 368# errors in 16-bit mode. 369 370/\x{d800}/I,utf,allow_surrogate_escapes 371 \x{d800}\=no_utf_check 372 373/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes 374 \x{dfff}\x{df01}\=no_utf_check 375 376# This has different starting code units in 8-bit mode. 377 378/^[^ab]/IB,utf 379 c 380 \x{ff} 381 \x{100} 382\= Expect no match 383 aaa 384 385# Offsets are different in 8-bit mode. 386 387/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout 388 123abcáyzabcdef789abcሴqr 389 390# A few script run tests in non-UTF mode (but they need Unicode support) 391 392/^(*script_run:.{4})/ 393 \x{3041}\x{30a1}\x{3007}\x{3007} Hiragana Katakana Han Han 394 \x{30a1}\x{3041}\x{3007}\x{3007} Katakana Hiragana Han Han 395 \x{1100}\x{2e80}\x{2e80}\x{1101} Hangul Han Han Hangul 396 397/^(*sr:.*)/utf,allow_surrogate_escapes 398 \x{2e80}\x{3105}\x{2e80}\x{30a1} Han Bopomofo Han Katakana 399 \x{d800}\x{dfff} Surrogates (Unknown) \=no_utf_check 400 401/(?(n/utf 402 403/(?(á/utf 404 405# Invalid UTF-16/32 tests. 406 407/.../g,match_invalid_utf 408 abcd\x{df00}wxzy\x{df00}pqrs 409 abcd\x{80}wxzy\x{df00}pqrs 410 411/abc/match_invalid_utf 412 ab\x{df00}ab\=ph 413\= Expect no match 414 ab\x{df00}cdef\=ph 415 416/ab$/match_invalid_utf 417 ab\x{df00}cdeab 418\= Expect no match 419 ab\x{df00}cde 420 421/.../g,match_invalid_utf 422 abcd\x{80}wxzy\x{df00}pqrs 423 424/(?<=x)../g,match_invalid_utf 425 abcd\x{80}wxzy\x{df00}pqrs 426 abcd\x{80}wxzy\x{df00}xpqrs 427 428/X$/match_invalid_utf 429\= Expect no match 430 X\x{df00} 431 432/(?<=..)X/match_invalid_utf,aftertext 433 AB\x{df00}AQXYZ 434 AB\x{df00}AQXYZ\=offset=5 435 AB\x{df00}\x{df00}AXYZXC\=offset=5 436\= Expect no match 437 AB\x{df00}XYZ 438 AB\x{df00}XYZ\=offset=3 439 AB\x{df00}AXYZ 440 AB\x{df00}AXYZ\=offset=4 441 AB\x{df00}\x{df00}AXYZ\=offset=5 442 443/.../match_invalid_utf 444\= Expect no match 445 A\x{d800}B 446 A\x{110000}B 447 448/aa/utf,ucp,match_invalid_utf,global 449 aa\x{d800}aa 450 451/aa/utf,ucp,match_invalid_utf,global 452 \x{d800}aa 453 454# ---------------------------------------------------- 455 456/(*UTF)(?=\x{123})/I 457 458/[\x{c1}\x{e1}]X[\x{145}\x{146}]/I,utf 459 460/[\xff\x{ffff}]/I,utf 461 462/[\xff\x{ff}]/I,utf 463 464/[\xff\x{ff}]/I 465 466/[Ss]/I 467 468/[Ss]/I,utf 469 470/(?:\x{ff}|\x{3000})/I,utf 471 472# ---------------------------------------------------- 473# UCP and casing tests 474 475/\x{120}/i,I 476 477/\x{c1}/i,I,ucp 478 479/[\x{120}\x{121}]/iB,ucp 480 481/[ab\x{120}]+/iB,ucp 482 aABb\x{121}\x{120} 483 484/\x{c1}/i,no_start_optimize 485\= Expect no match 486 \x{e1} 487 488/\x{120}\x{c1}/i,ucp,no_start_optimize 489 \x{121}\x{e1} 490 491/\x{120}\x{c1}/i,ucp 492 \x{121}\x{e1} 493 494/[^\x{120}]/i,no_start_optimize 495 \x{121} 496 497/[^\x{120}]/i,ucp,no_start_optimize 498\= Expect no match 499 \x{121} 500 501/[^\x{120}]/i 502 \x{121} 503 504/[^\x{120}]/i,ucp 505\= Expect no match 506 \x{121} 507 508/\x{120}{2}/i,ucp 509 \x{121}\x{121} 510 511/[^\x{120}]{2}/i,ucp 512\= Expect no match 513 \x{121}\x{121} 514 515/\x{c1}+\x{e1}/iB,ucp 516 \x{c1}\x{c1}\x{c1} 517 518/\x{c1}+\x{e1}/iIB,ucp 519 \x{c1}\x{c1}\x{c1} 520 \x{e1}\x{e1}\x{e1} 521 522/a|\x{c1}/iI,ucp 523 \x{e1}xxx 524 525/\x{c1}|\x{e1}/iI,ucp 526 527/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended 528 X\x{e1}Y 529 530/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended 531 X\x{121}Y 532 533/s/i,ucp 534 \x{17f} 535 536/s/i,utf 537 \x{17f} 538 539/[^s]/i,ucp 540\= Expect no match 541 \x{17f} 542 543/[^s]/i,utf 544\= Expect no match 545 \x{17f} 546 547# ---------------------------------------------------- 548 549# End of testinput12 550