1// 2// Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. 3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4// 5// This code is free software; you can redistribute it and/or modify it 6// under the terms of the GNU General Public License version 2 only, as 7// published by the Free Software Foundation. 8// 9// This code is distributed in the hope that it will be useful, but WITHOUT 10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12// version 2 for more details (a copy is included in the LICENSE file that 13// accompanied this code). 14// 15// You should have received a copy of the GNU General Public License version 16// 2 along with this work; if not, write to the Free Software Foundation, 17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18// 19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20// or visit www.oracle.com if you need additional information or have any 21// questions. 22// 23// 24// This file contains test cases for regular expressions. 25// A test case consists of three lines: 26// The first line is a pattern used in the test 27// The second line is the input to search for the pattern in 28// The third line is a concatenation of the match, the number of groups, 29// and the contents of the first four subexpressions. 30// Empty lines and lines beginning with comment slashes are ignored. 31// 32// Test unsetting of backed off groups 33^(a)?a 34a 35true a 1 36 37^(a){0,1}a 38a 39true a 1 40 41^(aa(bb)?)+$ 42aabbaa 43true aabbaa 2 aa bb 44 45^(aa(bb){0,1})+$ 46aabbaa 47true aabbaa 2 aa bb 48 49((a|b)?b)+ 50b 51true b 2 b 52 53((a|b){0,1}b)+ 54b 55true b 2 b 56 57(aaa)?aaa 58aaa 59true aaa 1 60 61(aaa){0,1}aaa 62aaa 63true aaa 1 64 65^(a(b)?)+$ 66aba 67true aba 2 a b 68 69^(a(b){0,1})+$ 70aba 71true aba 2 a b 72 73^(a(b(c)?)?)?abc 74abc 75true abc 3 76 77^(a(b(c){0,1}){0,1}){0,1}abc 78abc 79true abc 3 80 81^(a(b(c))).* 82abc 83true abc 3 abc bc c 84 85// use of x modifier 86abc(?x)blah 87abcblah 88true abcblah 0 89 90abc(?x) blah 91abcblah 92true abcblah 0 93 94abc(?x) blah blech 95abcblahblech 96true abcblahblech 0 97 98abc(?x) blah # ignore comment 99abcblah 100true abcblah 0 101 102// Simple alternation 103a|b 104a 105true a 0 106 107a|b 108z 109false 0 110 111a|b 112b 113true b 0 114 115a|b|cd 116cd 117true cd 0 118 119a|ad 120ad 121true a 0 122 123z(a|ac)b 124zacb 125true zacb 1 ac 126 127// Simple char class 128[abc]+ 129ababab 130true ababab 0 131 132[abc]+ 133defg 134false 0 135 136[abc]+[def]+[ghi]+ 137zzzaaddggzzz 138true aaddgg 0 139 140// Range char class 141[a-g]+ 142zzzggg 143true ggg 0 144 145[a-g]+ 146mmm 147false 0 148 149[a-]+ 150za-9z 151true a- 0 152 153[a-\\u4444]+ 154za-9z 155true za 0 156 157// Negated char class 158[^abc]+ 159ababab 160false 0 161 162[^abc]+ 163aaabbbcccdefg 164true defg 0 165 166// Negation with nested char class and intersection 167[^[c]] 168c 169false 0 170 171[^[a-z]] 172e 173false 0 174 175[^[a-z][A-Z]] 176E 177false 0 178 179[^a-d[0-9][m-p]] 180e 181true e 0 182 183[^a-d[0-9][m-p]] 1848 185false 0 186 187[^[a-c]&&[d-f]] 188z 189true z 0 190 191[^a-c&&d-f] 192a 193true a 0 194 195[^a-m&&m-z] 196m 197false 0 198 199[^a-m&&m-z&&a-c] 200m 201true m 0 202 203[^a-cd-f&&[d-f]] 204c 205true c 0 206 207[^[a-c][d-f]&&abc] 208a 209false 0 210 211[^[a-c][d-f]&&abc] 212d 213true d 0 214 215[^[a-c][d-f]&&abc[def]] 216a 217false 0 218 219[^[a-c][d-f]&&abc[def]] 220e 221false 0 222 223[^[a-c]&&[b-d]&&[c-e]] 224a 225true a 0 226 227[^[a-c]&&[b-d]&&[c-e]] 228c 229false 0 230 231// Making sure a ^ not in first position matches literal ^ 232[abc^b] 233b 234true b 0 235 236[abc^b] 237^ 238true ^ 0 239 240// Class union and intersection 241[abc[def]] 242b 243true b 0 244 245[abc[def]] 246e 247true e 0 248 249[a-d[0-9][m-p]] 250a 251true a 0 252 253[a-d[0-9][m-p]] 254o 255true o 0 256 257[a-d[0-9][m-p]] 2584 259true 4 0 260 261[a-d[0-9][m-p]] 262e 263false 0 264 265[a-d[0-9][m-p]] 266u 267false 0 268 269[[a-d][0-9][m-p]] 270b 271true b 0 272 273[[a-d][0-9][m-p]] 274z 275false 0 276 277[a-c[d-f[g-i]]] 278a 279true a 0 280 281[a-c[d-f[g-i]]] 282e 283true e 0 284 285[a-c[d-f[g-i]]] 286h 287true h 0 288 289[a-c[d-f[g-i]]] 290m 291false 0 292 293[a-c[d-f[g-i]]m] 294m 295true m 0 296 297[abc[def]ghi] 298a 299true a 0 300 301[abc[def]ghi] 302d 303true d 0 304 305[abc[def]ghi] 306h 307true h 0 308 309[abc[def]ghi] 310w 311false 0 312 313[a-c&&[d-f]] 314a 315false 0 316 317[a-c&&[d-f]] 318e 319false 0 320 321[a-c&&[d-f]] 322z 323false 0 324 325[[a-c]&&[d-f]] 326a 327false 0 328 329[[a-c]&&[d-f]] 330e 331false 0 332 333[[a-c]&&[d-f]] 334z 335false 0 336 337[a-c&&d-f] 338a 339false 0 340 341[a-m&&m-z] 342m 343true m 0 344 345[a-m&&m-z&&a-c] 346m 347false 0 348 349[a-m&&m-z&&a-z] 350m 351true m 0 352 353[[a-m]&&[m-z]] 354a 355false 0 356 357[[a-m]&&[m-z]] 358m 359true m 0 360 361[[a-m]&&[m-z]] 362z 363false 0 364 365[[a-m]&&[^a-c]] 366a 367false 0 368 369[[a-m]&&[^a-c]] 370d 371true d 0 372 373[a-m&&[^a-c]] 374a 375false 0 376 377[a-m&&[^a-c]] 378d 379true d 0 380 381[a-cd-f&&[d-f]] 382a 383false 0 384 385[a-cd-f&&[d-f]] 386e 387true e 0 388 389[[a-c]&&d-fa-c] 390a 391true a 0 392 393[[a-c]&&[d-f][a-c]] 394a 395true a 0 396 397[[a-c][d-f]&&abc] 398a 399true a 0 400 401[[a-c][d-f]&&abc[def]] 402e 403true e 0 404 405[[a-c]&&[b-d]&&[c-e]] 406a 407false 0 408 409[[a-c]&&[b-d]&&[c-e]] 410c 411true c 0 412 413[[a-c]&&[b-d][c-e]&&[u-z]] 414c 415false 0 416 417[abc[^bcd]] 418a 419true a 0 420 421[abc[^bcd]] 422d 423false 0 424 425[a-c&&a-d&&a-eghi] 426b 427true b 0 428 429[a-c&&a-d&&a-eghi] 430g 431false 0 432 433[[a[b]]&&[b[a]]] 434a 435true a 0 436 437[[a]&&[b][c][a]&&[^d]] 438a 439true a 0 440 441[[a]&&[b][c][a]&&[^d]] 442d 443false 0 444 445[[[a-d]&&[c-f]]] 446a 447false 0 448 449[[[a-d]&&[c-f]]] 450c 451true c 0 452 453[[[a-d]&&[c-f]]&&[c]] 454c 455true c 0 456 457[[[a-d]&&[c-f]]&&[c]&&c] 458c 459true c 0 460 461[[[a-d]&&[c-f]]&&[c]&&c&&c] 462c 463true c 0 464 465[[[a-d]&&[c-f]]&&[c]&&c&&[cde]] 466c 467true c 0 468 469[z[abc&&bcd]] 470c 471true c 0 472 473[z[abc&&bcd]&&[u-z]] 474z 475true z 0 476 477[x[abc&&bcd[z]]&&[u-z]] 478z 479false 0 480 481[x[[wz]abc&&bcd[z]]&&[u-z]] 482z 483true z 0 484 485[[abc]&&[def]abc] 486a 487true a 0 488 489[[abc]&&[def]xyz[abc]] 490a 491true a 0 492 493// Android-changed: This syntax \pL isn't documented. 494// \pL 495\p{L} 496a 497true a 0 498 499// Android-changed: This syntax \pL isn't documented. 500// \pL 501\p{L} 5027 503false 0 504 505\p{L} 506a 507true a 0 508 509\p{LC} 510a 511true a 0 512 513\p{LC} 514A 515true A 0 516 517\p{IsL} 518a 519true a 0 520 521\p{IsLC} 522a 523true a 0 524 525\p{IsLC} 526A 527true A 0 528 529\p{IsLC} 5309 531false 0 532 533\P{IsLC} 5349 535true 9 0 536 537// Guillemet left is initial quote punctuation 538\p{Pi} 539\u00ab 540true \u00ab 0 541 542\P{Pi} 543\u00ac 544true \u00ac 0 545 546// Guillemet right is final quote punctuation 547\p{IsPf} 548\u00bb 549true \u00bb 0 550 551\p{P} 552\u00bb 553true \u00bb 0 554 555\p{P}+ 556\u00bb 557true \u00bb 0 558 559\P{IsPf} 560\u00bc 561true \u00bc 0 562 563\P{IsP} 564\u00bc 565true \u00bc 0 566 567// Android-removed: L1 isn't a known Unicode category. 568// \p{L1} 569// \u00bc 570// true \u00bc 0 571 572// Android-removed: L1 isn't a known Unicode category. 573// \p{L1}+ 574// \u00bc 575// true \u00bc 0 576 577// Android-removed: L1 isn't a known Unicode category. 578// \p{L1} 579// \u02bc 580// false 0 581 582\p{ASCII} 583a 584true a 0 585 586\p{IsASCII} 587a 588true a 0 589 590\p{IsASCII} 591\u0370 592false 0 593 594// Android-changed: This syntax \pL isn't documented. 595// \pLbc 596\p{L}bc 597abc 598true abc 0 599 600a[r\p{InGreek}]c 601a\u0370c 602true a\u0370c 0 603 604a\p{InGreek} 605a\u0370 606true a\u0370 0 607 608a\P{InGreek} 609a\u0370 610false 0 611 612a\P{InGreek} 613ab 614true ab 0 615 616a{^InGreek} 617- 618error 619 620a\p{^InGreek} 621- 622error 623 624a\P{^InGreek} 625- 626error 627 628a\p{InGreek} 629a\u0370 630true a\u0370 0 631 632a[\p{InGreek}]c 633a\u0370c 634true a\u0370c 0 635 636a[\P{InGreek}]c 637a\u0370c 638false 0 639 640a[\P{InGreek}]c 641abc 642true abc 0 643 644a[{^InGreek}]c 645anc 646true anc 0 647 648a[{^InGreek}]c 649azc 650false 0 651 652a[\p{^InGreek}]c 653- 654error 655 656a[\P{^InGreek}]c 657- 658error 659 660a[\p{InGreek}] 661a\u0370 662true a\u0370 0 663 664a[r\p{InGreek}]c 665arc 666true arc 0 667 668a[\p{InGreek}r]c 669arc 670true arc 0 671 672a[r\p{InGreek}]c 673arc 674true arc 0 675 676a[^\p{InGreek}]c 677a\u0370c 678false 0 679 680a[^\P{InGreek}]c 681a\u0370c 682true a\u0370c 0 683 684a[\p{InGreek}&&[^\u0370]]c 685a\u0370c 686false 0 687 688// Test the dot metacharacter 689a.c.+ 690a#c%& 691true a#c%& 0 692 693ab. 694ab\n 695false 0 696 697(?s)ab. 698ab\n 699true ab\n 0 700 701a[\p{L}&&[\P{InGreek}]]c 702a\u6000c 703true a\u6000c 0 704 705a[\p{L}&&[\P{InGreek}]]c 706arc 707true arc 0 708 709a[\p{L}&&[\P{InGreek}]]c 710a\u0370c 711false 0 712 713a\p{InGreek}c 714a\u0370c 715true a\u0370c 0 716 717a\p{Sc} 718a$ 719true a$ 0 720 721// Test the word char escape sequence 722ab\wc 723abcc 724true abcc 0 725 726\W\w\W 727#r# 728true #r# 0 729 730\W\w\W 731rrrr#ggg 732false 0 733 734abc[\w] 735abcd 736true abcd 0 737 738abc[\sdef]* 739abc def 740true abc def 0 741 742abc[\sy-z]* 743abc y z 744true abc y z 0 745 746abc[a-d\sm-p]* 747abcaa mn p 748true abcaa mn p 0 749 750// Test the whitespace escape sequence 751ab\sc 752ab c 753true ab c 0 754 755\s\s\s 756blah err 757false 0 758 759\S\S\s 760blah err 761true ah 0 762 763// Test the digit escape sequence 764ab\dc 765ab9c 766true ab9c 0 767 768\d\d\d 769blah45 770false 0 771 772// Test the caret metacharacter 773^abc 774abcdef 775true abc 0 776 777^abc 778bcdabc 779false 0 780 781// Greedy ? metacharacter 782a?b 783aaaab 784true ab 0 785 786a{0,1}b 787aaaab 788true ab 0 789 790a?b 791b 792true b 0 793 794a{0,1}b 795b 796true b 0 797 798a?b 799aaaccc 800false 0 801 802a{0,1}b 803aaaccc 804false 0 805 806.?b 807aaaab 808true ab 0 809 810.{0,1}b 811aaaab 812true ab 0 813 814// Reluctant ? metacharacter 815a??b 816aaaab 817true ab 0 818 819a{0,1}?b 820aaaab 821true ab 0 822 823a??b 824b 825true b 0 826 827a{0,1}?b 828b 829true b 0 830 831a??b 832aaaccc 833false 0 834 835a{0,1}?b 836aaaccc 837false 0 838 839.??b 840aaaab 841true ab 0 842 843.{0,1}?b 844aaaab 845true ab 0 846 847// Possessive ? metacharacter 848a?+b 849aaaab 850true ab 0 851 852a{0,1}+b 853aaaab 854true ab 0 855 856a?+b 857b 858true b 0 859 860a{0,1}+b 861b 862true b 0 863 864a?+b 865aaaccc 866false 0 867 868a{0,1}+b 869aaaccc 870false 0 871 872.?+b 873aaaab 874true ab 0 875 876.{0,1}+b 877aaaab 878true ab 0 879 880// Greedy + metacharacter 881a+b 882aaaab 883true aaaab 0 884 885a+b 886b 887false 0 888 889a+b 890aaaccc 891false 0 892 893.+b 894aaaab 895true aaaab 0 896 897// Reluctant + metacharacter 898a+?b 899aaaab 900true aaaab 0 901 902a+?b 903b 904false 0 905 906a+?b 907aaaccc 908false 0 909 910.+?b 911aaaab 912true aaaab 0 913 914// Possessive + metacharacter 915a++b 916aaaab 917true aaaab 0 918 919a++b 920b 921false 0 922 923a++b 924aaaccc 925false 0 926 927.++b 928aaaab 929false 0 930 931// Greedy Repetition 932a{2,3} 933a 934false 0 935 936a{2,3} 937aa 938true aa 0 939 940a{2,3} 941aaa 942true aaa 0 943 944a{2,3} 945aaaa 946true aaa 0 947 948a{3,} 949zzzaaaazzz 950true aaaa 0 951 952a{3,} 953zzzaazzz 954false 0 955 956// Reluctant Repetition 957a{2,3}? 958a 959false 0 960 961a{2,3}? 962aa 963true aa 0 964 965a{2,3}? 966aaa 967true aa 0 968 969a{2,3}? 970aaaa 971true aa 0 972 973// Zero width Positive lookahead 974abc(?=d) 975zzzabcd 976true abc 0 977 978abc(?=d) 979zzzabced 980false 0 981 982// Zero width Negative lookahead 983abc(?!d) 984zzabcd 985false 0 986 987abc(?!d) 988zzabced 989true abc 0 990 991// Zero width Positive lookbehind 992\w(?<=a) 993###abc### 994true a 0 995 996\w(?<=a) 997###ert### 998false 0 999 1000// Zero width Negative lookbehind 1001(?<!a)\w 1002###abc### 1003true a 0 1004 1005(?<!a)c 1006bc 1007true c 0 1008 1009(?<!a)c 1010ac 1011false 0 1012 1013// Nondeterministic group 1014(a+b)+ 1015ababab 1016true ababab 1 ab 1017 1018(a|b)+ 1019ccccd 1020false 1 1021 1022// Deterministic group 1023(ab)+ 1024ababab 1025true ababab 1 ab 1026 1027(ab)+ 1028accccd 1029false 1 1030 1031(ab)* 1032ababab 1033true ababab 1 ab 1034 1035(ab)(cd*) 1036zzzabczzz 1037true abc 2 ab c 1038 1039abc(d)*abc 1040abcdddddabc 1041true abcdddddabc 1 d 1042 1043// Escaped metacharacter 1044\* 1045* 1046true * 0 1047 1048\\ 1049\ 1050true \ 0 1051 1052\\ 1053\\\\ 1054true \ 0 1055 1056// Back references 1057(a*)bc\1 1058zzzaabcaazzz 1059true aabcaa 1 aa 1060 1061(a*)bc\1 1062zzzaabcazzz 1063true abca 1 a 1064 1065(gt*)(dde)*(yu)\1\3(vv) 1066zzzgttddeddeyugttyuvvzzz 1067true gttddeddeyugttyuvv 4 gtt dde yu vv 1068 1069// Greedy * metacharacter 1070a*b 1071aaaab 1072true aaaab 0 1073 1074a*b 1075b 1076true b 0 1077 1078a*b 1079aaaccc 1080false 0 1081 1082.*b 1083aaaab 1084true aaaab 0 1085 1086// Reluctant * metacharacter 1087a*?b 1088aaaab 1089true aaaab 0 1090 1091a*?b 1092b 1093true b 0 1094 1095a*?b 1096aaaccc 1097false 0 1098 1099.*?b 1100aaaab 1101true aaaab 0 1102 1103// Possessive * metacharacter 1104a*+b 1105aaaab 1106true aaaab 0 1107 1108a*+b 1109b 1110true b 0 1111 1112a*+b 1113aaaccc 1114false 0 1115 1116.*+b 1117aaaab 1118false 0 1119 1120// Case insensitivity 1121(?i)foobar 1122fOobAr 1123true fOobAr 0 1124 1125f(?i)oobar 1126fOobAr 1127true fOobAr 0 1128 1129foo(?i)bar 1130fOobAr 1131false 0 1132 1133(?i)foo[bar]+ 1134foObAr 1135true foObAr 0 1136 1137(?i)foo[a-r]+ 1138foObAr 1139true foObAr 0 1140 1141// Disable metacharacters- test both length <=3 and >3 1142// So that the BM optimization is part of test 1143\Q***\Eabc 1144***abc 1145true ***abc 0 1146 1147bl\Q***\Eabc 1148bl***abc 1149true bl***abc 0 1150 1151\Q***abc 1152***abc 1153true ***abc 0 1154 1155blah\Q***\Eabc 1156blah***abc 1157true blah***abc 0 1158 1159\Q***abc 1160***abc 1161true ***abc 0 1162 1163\Q*ab 1164*ab 1165true *ab 0 1166 1167blah\Q***abc 1168blah***abc 1169true blah***abc 0 1170 1171bla\Q***abc 1172bla***abc 1173true bla***abc 0 1174 1175// Escapes in char classes 1176[ab\Qdef\E] 1177d 1178true d 0 1179 1180[ab\Q[\E] 1181[ 1182true [ 0 1183 1184[\Q]\E] 1185] 1186true ] 0 1187 1188[\Q\\E] 1189\ 1190true \ 0 1191 1192[\Q(\E] 1193( 1194true ( 0 1195 1196[\n-#] 1197! 1198true ! 0 1199 1200[\n-#] 1201- 1202false 0 1203 1204[\w-#] 1205! 1206false 0 1207 1208[\w-#] 1209a 1210true a 0 1211 1212[\w-#] 1213- 1214true - 0 1215 1216[\w-#] 1217# 1218true # 0 1219 1220[\043]+ 1221blahblah#blech 1222true # 0 1223 1224[\042-\044]+ 1225blahblah#blech 1226true # 0 1227 1228[\u1234-\u1236] 1229blahblah\u1235blech 1230true \u1235 0 1231 1232[^\043]* 1233blahblah#blech 1234true blahblah 0 1235 1236(|f)?+ 1237foo 1238true 1 1239 1240(|f){0,1}+ 1241foo 1242true 1 1243 1244//---------------------------------------------------------------- 1245// Unary numeral primality testing 1246//---------------------------------------------------------------- 1247 1248// Input is 7 (a prime), in unary; reluctant quantifier 1249^(11+?)\1+$ 12501111111 1251false 1 1252 1253^(1{2,}?)\1+$ 12541111111 1255false 1 1256 1257// Input is 8 (a power of two), in unary; reluctant quantifier 1258// group is shortest possible (2) 1259^(11+?)\1+$ 126011111111 1261true 11111111 1 11 1262 1263^(1{2,}?)\1+$ 126411111111 1265true 11111111 1 11 1266 1267// Input is 7 (a prime), in unary; greedy quantifier 1268^(11+)\1+$ 12691111111 1270false 1 1271 1272^(1{2,})\1+$ 12731111111 1274false 1 1275 1276// Input is 8 (a power of two), in unary; greedy quantifier 1277// group is longest possible (4) 1278^(11+)\1+$ 127911111111 1280true 11111111 1 1111 1281 1282^(1{2,})\1+$ 128311111111 1284true 11111111 1 1111 1285