1//
2// Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24// This file contains test cases for regular expressions.
25// A test case consists of three lines:
26// The first line is a pattern used in the test
27// The second line is the input to search for the pattern in
28// The third line is a concatenation of the match, the number of groups,
29//     and the contents of the first four subexpressions.
30// Empty lines and lines beginning with comment slashes are ignored.
31//
32// Test unsetting of backed off groups
33^(a)?a
34a
35true a 1
36
37^(a){0,1}a
38a
39true a 1
40
41^(aa(bb)?)+$
42aabbaa
43true aabbaa 2 aa bb
44
45^(aa(bb){0,1})+$
46aabbaa
47true aabbaa 2 aa bb
48
49((a|b)?b)+
50b
51true b 2 b
52
53((a|b){0,1}b)+
54b
55true b 2 b
56
57(aaa)?aaa
58aaa
59true aaa 1
60
61(aaa){0,1}aaa
62aaa
63true aaa 1
64
65^(a(b)?)+$
66aba
67true aba 2 a b
68
69^(a(b){0,1})+$
70aba
71true aba 2 a b
72
73^(a(b(c)?)?)?abc
74abc
75true abc 3
76
77^(a(b(c){0,1}){0,1}){0,1}abc
78abc
79true abc 3
80
81^(a(b(c))).*
82abc
83true abc 3 abc bc c
84
85// use of x modifier
86abc(?x)blah
87abcblah
88true abcblah 0
89
90abc(?x)  blah
91abcblah
92true abcblah 0
93
94abc(?x)  blah  blech
95abcblahblech
96true abcblahblech 0
97
98abc(?x)  blah # ignore comment
99abcblah
100true abcblah 0
101
102// Simple alternation
103a|b
104a
105true a 0
106
107a|b
108z
109false 0
110
111a|b
112b
113true b 0
114
115a|b|cd
116cd
117true cd 0
118
119a|ad
120ad
121true a 0
122
123z(a|ac)b
124zacb
125true zacb 1 ac
126
127// Simple char class
128[abc]+
129ababab
130true ababab 0
131
132[abc]+
133defg
134false 0
135
136[abc]+[def]+[ghi]+
137zzzaaddggzzz
138true aaddgg 0
139
140// Range char class
141[a-g]+
142zzzggg
143true ggg 0
144
145[a-g]+
146mmm
147false 0
148
149[a-]+
150za-9z
151true a- 0
152
153[a-\\u4444]+
154za-9z
155true za 0
156
157// Negated char class
158[^abc]+
159ababab
160false 0
161
162[^abc]+
163aaabbbcccdefg
164true defg 0
165
166// Negation with nested char class and intersection
167[^[c]]
168c
169false 0
170
171[^[a-z]]
172e
173false 0
174
175[^[a-z][A-Z]]
176E
177false 0
178
179[^a-d[0-9][m-p]]
180e
181true e 0
182
183[^a-d[0-9][m-p]]
1848
185false 0
186
187[^[a-c]&&[d-f]]
188z
189true z 0
190
191[^a-c&&d-f]
192a
193true a 0
194
195[^a-m&&m-z]
196m
197false 0
198
199[^a-m&&m-z&&a-c]
200m
201true m 0
202
203[^a-cd-f&&[d-f]]
204c
205true c 0
206
207[^[a-c][d-f]&&abc]
208a
209false 0
210
211[^[a-c][d-f]&&abc]
212d
213true d 0
214
215[^[a-c][d-f]&&abc[def]]
216a
217false 0
218
219[^[a-c][d-f]&&abc[def]]
220e
221false 0
222
223[^[a-c]&&[b-d]&&[c-e]]
224a
225true a 0
226
227[^[a-c]&&[b-d]&&[c-e]]
228c
229false 0
230
231// Making sure a ^ not in first position matches literal ^
232[abc^b]
233b
234true b 0
235
236[abc^b]
237^
238true ^ 0
239
240// Class union and intersection
241[abc[def]]
242b
243true b 0
244
245[abc[def]]
246e
247true e 0
248
249[a-d[0-9][m-p]]
250a
251true a 0
252
253[a-d[0-9][m-p]]
254o
255true o 0
256
257[a-d[0-9][m-p]]
2584
259true 4 0
260
261[a-d[0-9][m-p]]
262e
263false 0
264
265[a-d[0-9][m-p]]
266u
267false 0
268
269[[a-d][0-9][m-p]]
270b
271true b 0
272
273[[a-d][0-9][m-p]]
274z
275false 0
276
277[a-c[d-f[g-i]]]
278a
279true a 0
280
281[a-c[d-f[g-i]]]
282e
283true e 0
284
285[a-c[d-f[g-i]]]
286h
287true h 0
288
289[a-c[d-f[g-i]]]
290m
291false 0
292
293[a-c[d-f[g-i]]m]
294m
295true m 0
296
297[abc[def]ghi]
298a
299true a 0
300
301[abc[def]ghi]
302d
303true d 0
304
305[abc[def]ghi]
306h
307true h 0
308
309[abc[def]ghi]
310w
311false 0
312
313[a-c&&[d-f]]
314a
315false 0
316
317[a-c&&[d-f]]
318e
319false 0
320
321[a-c&&[d-f]]
322z
323false 0
324
325[[a-c]&&[d-f]]
326a
327false 0
328
329[[a-c]&&[d-f]]
330e
331false 0
332
333[[a-c]&&[d-f]]
334z
335false 0
336
337[a-c&&d-f]
338a
339false 0
340
341[a-m&&m-z]
342m
343true m 0
344
345[a-m&&m-z&&a-c]
346m
347false 0
348
349[a-m&&m-z&&a-z]
350m
351true m 0
352
353[[a-m]&&[m-z]]
354a
355false 0
356
357[[a-m]&&[m-z]]
358m
359true m 0
360
361[[a-m]&&[m-z]]
362z
363false 0
364
365[[a-m]&&[^a-c]]
366a
367false 0
368
369[[a-m]&&[^a-c]]
370d
371true d 0
372
373[a-m&&[^a-c]]
374a
375false 0
376
377[a-m&&[^a-c]]
378d
379true d 0
380
381[a-cd-f&&[d-f]]
382a
383false 0
384
385[a-cd-f&&[d-f]]
386e
387true e 0
388
389[[a-c]&&d-fa-c]
390a
391true a 0
392
393[[a-c]&&[d-f][a-c]]
394a
395true a 0
396
397[[a-c][d-f]&&abc]
398a
399true a 0
400
401[[a-c][d-f]&&abc[def]]
402e
403true e 0
404
405[[a-c]&&[b-d]&&[c-e]]
406a
407false 0
408
409[[a-c]&&[b-d]&&[c-e]]
410c
411true c 0
412
413[[a-c]&&[b-d][c-e]&&[u-z]]
414c
415false 0
416
417[abc[^bcd]]
418a
419true a 0
420
421[abc[^bcd]]
422d
423false 0
424
425[a-c&&a-d&&a-eghi]
426b
427true b 0
428
429[a-c&&a-d&&a-eghi]
430g
431false 0
432
433[[a[b]]&&[b[a]]]
434a
435true a 0
436
437[[a]&&[b][c][a]&&[^d]]
438a
439true a 0
440
441[[a]&&[b][c][a]&&[^d]]
442d
443false 0
444
445[[[a-d]&&[c-f]]]
446a
447false 0
448
449[[[a-d]&&[c-f]]]
450c
451true c 0
452
453[[[a-d]&&[c-f]]&&[c]]
454c
455true c 0
456
457[[[a-d]&&[c-f]]&&[c]&&c]
458c
459true c 0
460
461[[[a-d]&&[c-f]]&&[c]&&c&&c]
462c
463true c 0
464
465[[[a-d]&&[c-f]]&&[c]&&c&&[cde]]
466c
467true c 0
468
469[z[abc&&bcd]]
470c
471true c 0
472
473[z[abc&&bcd]&&[u-z]]
474z
475true z 0
476
477[x[abc&&bcd[z]]&&[u-z]]
478z
479false 0
480
481[x[[wz]abc&&bcd[z]]&&[u-z]]
482z
483true z 0
484
485[[abc]&&[def]abc]
486a
487true a 0
488
489[[abc]&&[def]xyz[abc]]
490a
491true a 0
492
493// Android-changed: This syntax \pL isn't documented.
494// \pL
495\p{L}
496a
497true a 0
498
499// Android-changed: This syntax \pL isn't documented.
500// \pL
501\p{L}
5027
503false 0
504
505\p{L}
506a
507true a 0
508
509\p{LC}
510a
511true a 0
512
513\p{LC}
514A
515true A 0
516
517\p{IsL}
518a
519true a 0
520
521\p{IsLC}
522a
523true a 0
524
525\p{IsLC}
526A
527true A 0
528
529\p{IsLC}
5309
531false 0
532
533\P{IsLC}
5349
535true 9 0
536
537// Guillemet left is initial quote punctuation
538\p{Pi}
539\u00ab
540true \u00ab 0
541
542\P{Pi}
543\u00ac
544true \u00ac 0
545
546// Guillemet right is final quote punctuation
547\p{IsPf}
548\u00bb
549true \u00bb 0
550
551\p{P}
552\u00bb
553true \u00bb 0
554
555\p{P}+
556\u00bb
557true \u00bb 0
558
559\P{IsPf}
560\u00bc
561true \u00bc 0
562
563\P{IsP}
564\u00bc
565true \u00bc 0
566
567// Android-removed: L1 isn't a known Unicode category.
568// \p{L1}
569// \u00bc
570// true \u00bc 0
571
572// Android-removed: L1 isn't a known Unicode category.
573// \p{L1}+
574// \u00bc
575// true \u00bc 0
576
577// Android-removed: L1 isn't a known Unicode category.
578// \p{L1}
579// \u02bc
580// false 0
581
582\p{ASCII}
583a
584true a 0
585
586\p{IsASCII}
587a
588true a 0
589
590\p{IsASCII}
591\u0370
592false 0
593
594// Android-changed: This syntax \pL isn't documented.
595// \pLbc
596\p{L}bc
597abc
598true abc 0
599
600a[r\p{InGreek}]c
601a\u0370c
602true a\u0370c 0
603
604a\p{InGreek}
605a\u0370
606true a\u0370 0
607
608a\P{InGreek}
609a\u0370
610false 0
611
612a\P{InGreek}
613ab
614true ab 0
615
616a{^InGreek}
617-
618error
619
620a\p{^InGreek}
621-
622error
623
624a\P{^InGreek}
625-
626error
627
628a\p{InGreek}
629a\u0370
630true a\u0370 0
631
632a[\p{InGreek}]c
633a\u0370c
634true a\u0370c 0
635
636a[\P{InGreek}]c
637a\u0370c
638false 0
639
640a[\P{InGreek}]c
641abc
642true abc 0
643
644a[{^InGreek}]c
645anc
646true anc 0
647
648a[{^InGreek}]c
649azc
650false 0
651
652a[\p{^InGreek}]c
653-
654error
655
656a[\P{^InGreek}]c
657-
658error
659
660a[\p{InGreek}]
661a\u0370
662true a\u0370 0
663
664a[r\p{InGreek}]c
665arc
666true arc 0
667
668a[\p{InGreek}r]c
669arc
670true arc 0
671
672a[r\p{InGreek}]c
673arc
674true arc 0
675
676a[^\p{InGreek}]c
677a\u0370c
678false 0
679
680a[^\P{InGreek}]c
681a\u0370c
682true a\u0370c 0
683
684a[\p{InGreek}&&[^\u0370]]c
685a\u0370c
686false 0
687
688// Test the dot metacharacter
689a.c.+
690a#c%&
691true a#c%& 0
692
693ab.
694ab\n
695false 0
696
697(?s)ab.
698ab\n
699true ab\n 0
700
701a[\p{L}&&[\P{InGreek}]]c
702a\u6000c
703true a\u6000c 0
704
705a[\p{L}&&[\P{InGreek}]]c
706arc
707true arc 0
708
709a[\p{L}&&[\P{InGreek}]]c
710a\u0370c
711false 0
712
713a\p{InGreek}c
714a\u0370c
715true a\u0370c 0
716
717a\p{Sc}
718a$
719true a$ 0
720
721// Test the word char escape sequence
722ab\wc
723abcc
724true abcc 0
725
726\W\w\W
727#r#
728true #r# 0
729
730\W\w\W
731rrrr#ggg
732false 0
733
734abc[\w]
735abcd
736true abcd 0
737
738abc[\sdef]*
739abc  def
740true abc  def 0
741
742abc[\sy-z]*
743abc y z
744true abc y z 0
745
746abc[a-d\sm-p]*
747abcaa mn  p
748true abcaa mn  p 0
749
750// Test the whitespace escape sequence
751ab\sc
752ab c
753true ab c 0
754
755\s\s\s
756blah  err
757false 0
758
759\S\S\s
760blah  err
761true ah  0
762
763// Test the digit escape sequence
764ab\dc
765ab9c
766true ab9c 0
767
768\d\d\d
769blah45
770false 0
771
772// Test the caret metacharacter
773^abc
774abcdef
775true abc 0
776
777^abc
778bcdabc
779false 0
780
781// Greedy ? metacharacter
782a?b
783aaaab
784true ab 0
785
786a{0,1}b
787aaaab
788true ab 0
789
790a?b
791b
792true b 0
793
794a{0,1}b
795b
796true b 0
797
798a?b
799aaaccc
800false 0
801
802a{0,1}b
803aaaccc
804false 0
805
806.?b
807aaaab
808true ab 0
809
810.{0,1}b
811aaaab
812true ab 0
813
814// Reluctant ? metacharacter
815a??b
816aaaab
817true ab 0
818
819a{0,1}?b
820aaaab
821true ab 0
822
823a??b
824b
825true b 0
826
827a{0,1}?b
828b
829true b 0
830
831a??b
832aaaccc
833false 0
834
835a{0,1}?b
836aaaccc
837false 0
838
839.??b
840aaaab
841true ab 0
842
843.{0,1}?b
844aaaab
845true ab 0
846
847// Possessive ? metacharacter
848a?+b
849aaaab
850true ab 0
851
852a{0,1}+b
853aaaab
854true ab 0
855
856a?+b
857b
858true b 0
859
860a{0,1}+b
861b
862true b 0
863
864a?+b
865aaaccc
866false 0
867
868a{0,1}+b
869aaaccc
870false 0
871
872.?+b
873aaaab
874true ab 0
875
876.{0,1}+b
877aaaab
878true ab 0
879
880// Greedy + metacharacter
881a+b
882aaaab
883true aaaab 0
884
885a+b
886b
887false 0
888
889a+b
890aaaccc
891false 0
892
893.+b
894aaaab
895true aaaab 0
896
897// Reluctant + metacharacter
898a+?b
899aaaab
900true aaaab 0
901
902a+?b
903b
904false 0
905
906a+?b
907aaaccc
908false 0
909
910.+?b
911aaaab
912true aaaab 0
913
914// Possessive + metacharacter
915a++b
916aaaab
917true aaaab 0
918
919a++b
920b
921false 0
922
923a++b
924aaaccc
925false 0
926
927.++b
928aaaab
929false 0
930
931// Greedy Repetition
932a{2,3}
933a
934false 0
935
936a{2,3}
937aa
938true aa 0
939
940a{2,3}
941aaa
942true aaa 0
943
944a{2,3}
945aaaa
946true aaa 0
947
948a{3,}
949zzzaaaazzz
950true aaaa 0
951
952a{3,}
953zzzaazzz
954false 0
955
956// Reluctant Repetition
957a{2,3}?
958a
959false 0
960
961a{2,3}?
962aa
963true aa 0
964
965a{2,3}?
966aaa
967true aa 0
968
969a{2,3}?
970aaaa
971true aa 0
972
973// Zero width Positive lookahead
974abc(?=d)
975zzzabcd
976true abc 0
977
978abc(?=d)
979zzzabced
980false 0
981
982// Zero width Negative lookahead
983abc(?!d)
984zzabcd
985false 0
986
987abc(?!d)
988zzabced
989true abc 0
990
991// Zero width Positive lookbehind
992\w(?<=a)
993###abc###
994true a 0
995
996\w(?<=a)
997###ert###
998false 0
999
1000// Zero width Negative lookbehind
1001(?<!a)\w
1002###abc###
1003true a 0
1004
1005(?<!a)c
1006bc
1007true c 0
1008
1009(?<!a)c
1010ac
1011false 0
1012
1013// Nondeterministic group
1014(a+b)+
1015ababab
1016true ababab 1 ab
1017
1018(a|b)+
1019ccccd
1020false 1
1021
1022// Deterministic group
1023(ab)+
1024ababab
1025true ababab 1 ab
1026
1027(ab)+
1028accccd
1029false 1
1030
1031(ab)*
1032ababab
1033true ababab 1 ab
1034
1035(ab)(cd*)
1036zzzabczzz
1037true abc 2 ab c
1038
1039abc(d)*abc
1040abcdddddabc
1041true abcdddddabc 1 d
1042
1043// Escaped metacharacter
1044\*
1045*
1046true * 0
1047
1048\\
1049\
1050true \ 0
1051
1052\\
1053\\\\
1054true \ 0
1055
1056// Back references
1057(a*)bc\1
1058zzzaabcaazzz
1059true aabcaa 1 aa
1060
1061(a*)bc\1
1062zzzaabcazzz
1063true abca 1 a
1064
1065(gt*)(dde)*(yu)\1\3(vv)
1066zzzgttddeddeyugttyuvvzzz
1067true gttddeddeyugttyuvv 4 gtt dde yu vv
1068
1069// Greedy * metacharacter
1070a*b
1071aaaab
1072true aaaab 0
1073
1074a*b
1075b
1076true b 0
1077
1078a*b
1079aaaccc
1080false 0
1081
1082.*b
1083aaaab
1084true aaaab 0
1085
1086// Reluctant * metacharacter
1087a*?b
1088aaaab
1089true aaaab 0
1090
1091a*?b
1092b
1093true b 0
1094
1095a*?b
1096aaaccc
1097false 0
1098
1099.*?b
1100aaaab
1101true aaaab 0
1102
1103// Possessive * metacharacter
1104a*+b
1105aaaab
1106true aaaab 0
1107
1108a*+b
1109b
1110true b 0
1111
1112a*+b
1113aaaccc
1114false 0
1115
1116.*+b
1117aaaab
1118false 0
1119
1120// Case insensitivity
1121(?i)foobar
1122fOobAr
1123true fOobAr 0
1124
1125f(?i)oobar
1126fOobAr
1127true fOobAr 0
1128
1129foo(?i)bar
1130fOobAr
1131false 0
1132
1133(?i)foo[bar]+
1134foObAr
1135true foObAr 0
1136
1137(?i)foo[a-r]+
1138foObAr
1139true foObAr 0
1140
1141// Disable metacharacters- test both length <=3 and >3
1142// So that the BM optimization is part of test
1143\Q***\Eabc
1144***abc
1145true ***abc 0
1146
1147bl\Q***\Eabc
1148bl***abc
1149true bl***abc 0
1150
1151\Q***abc
1152***abc
1153true ***abc 0
1154
1155blah\Q***\Eabc
1156blah***abc
1157true blah***abc 0
1158
1159\Q***abc
1160***abc
1161true ***abc 0
1162
1163\Q*ab
1164*ab
1165true *ab 0
1166
1167blah\Q***abc
1168blah***abc
1169true blah***abc 0
1170
1171bla\Q***abc
1172bla***abc
1173true bla***abc 0
1174
1175// Escapes in char classes
1176[ab\Qdef\E]
1177d
1178true d 0
1179
1180[ab\Q[\E]
1181[
1182true [ 0
1183
1184[\Q]\E]
1185]
1186true ] 0
1187
1188[\Q\\E]
1189\
1190true \ 0
1191
1192[\Q(\E]
1193(
1194true ( 0
1195
1196[\n-#]
1197!
1198true ! 0
1199
1200[\n-#]
1201-
1202false 0
1203
1204[\w-#]
1205!
1206false 0
1207
1208[\w-#]
1209a
1210true a 0
1211
1212[\w-#]
1213-
1214true - 0
1215
1216[\w-#]
1217#
1218true # 0
1219
1220[\043]+
1221blahblah#blech
1222true # 0
1223
1224[\042-\044]+
1225blahblah#blech
1226true # 0
1227
1228[\u1234-\u1236]
1229blahblah\u1235blech
1230true \u1235 0
1231
1232[^\043]*
1233blahblah#blech
1234true blahblah 0
1235
1236(|f)?+
1237foo
1238true  1
1239
1240(|f){0,1}+
1241foo
1242true  1
1243
1244//----------------------------------------------------------------
1245// Unary numeral primality testing
1246//----------------------------------------------------------------
1247
1248// Input is 7 (a prime), in unary; reluctant quantifier
1249^(11+?)\1+$
12501111111
1251false 1
1252
1253^(1{2,}?)\1+$
12541111111
1255false 1
1256
1257// Input is 8 (a power of two), in unary; reluctant quantifier
1258// group is shortest possible (2)
1259^(11+?)\1+$
126011111111
1261true 11111111 1 11
1262
1263^(1{2,}?)\1+$
126411111111
1265true 11111111 1 11
1266
1267// Input is 7 (a prime), in unary; greedy quantifier
1268^(11+)\1+$
12691111111
1270false 1
1271
1272^(1{2,})\1+$
12731111111
1274false 1
1275
1276// Input is 8 (a power of two), in unary; greedy quantifier
1277// group is longest possible (4)
1278^(11+)\1+$
127911111111
1280true 11111111 1 1111
1281
1282^(1{2,})\1+$
128311111111
1284true 11111111 1 1111
1285