1 /*
2  * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /**
25  * @test
26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
27  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
28  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
29  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
30  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
31  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
32  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
33  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
34  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
35  * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
36  * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
37  * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
38  * 8216332 8214245 8237599 8241055 8247546 8258259 8037397 8269753
39  *
40  * @library /test/lib
41  * @library /lib/testlibrary/java/lang
42  * @build jdk.test.lib.RandomFactory
43  * @author Mike McCloskey
44  * @run testng RegExTest
45  * @key randomness
46  */
47 
48 package test.java.util.regex;
49 
50 import java.io.*;
51 import java.math.BigInteger;
52 import java.nio.CharBuffer;
53 import java.nio.file.Files;
54 import java.nio.file.Path;
55 import java.nio.file.Paths;
56 import java.util.ArrayList;
57 import java.util.Arrays;
58 import java.util.HashMap;
59 import java.util.List;
60 import java.util.Map;
61 import java.util.Random;
62 import java.util.Scanner;
63 import java.util.function.Function;
64 import java.util.function.Predicate;
65 import java.util.regex.Matcher;
66 import java.util.regex.MatchResult;
67 import java.util.regex.Pattern;
68 import java.util.regex.PatternSyntaxException;
69 import java.util.stream.IntStream;
70 import java.util.stream.Stream;
71 
72 import org.testng.annotations.Test;
73 import org.testng.Assert;
74 
75 
76 import jdk.test.lib.RandomFactory;
77 import org.testng.annotations.BeforeMethod;
78 import org.testng.annotations.Test;
79 
80 import static org.testng.Assert.assertEquals;
81 import static org.testng.Assert.assertFalse;
82 import static org.testng.Assert.assertNotEquals;
83 import static org.testng.Assert.assertNotNull;
84 import static org.testng.Assert.assertTrue;
85 import static org.testng.Assert.fail;
86 
87 import static org.testng.Assert.assertEquals;
88 import static org.testng.Assert.assertFalse;
89 import static org.testng.Assert.assertNotEquals;
90 import static org.testng.Assert.assertNotSame;
91 import static org.testng.Assert.assertThrows;
92 import static org.testng.Assert.assertTrue;
93 import static org.testng.Assert.fail;
94 import static org.testng.Assert.expectThrows;
95 
96 /**
97  * This is a test class created to check the operation of
98  * the Pattern and Matcher classes.
99  */
100 public class RegExTest {
101 
102     private static final Random generator = RandomFactory.getRandom();
103 
104     // Utility functions
105 
getRandomAlphaString(int length)106     private static String getRandomAlphaString(int length) {
107 
108         StringBuilder buf = new StringBuilder(length);
109         for (int i=0; i<length; i++) {
110             char randChar = (char)(97 + generator.nextInt(26));
111             buf.append(randChar);
112         }
113         return buf.toString();
114     }
115 
check(Matcher m, String expected)116     private static void check(Matcher m, String expected) {
117         m.find();
118         assertEquals(m.group(), expected);
119     }
120 
check(Matcher m, String result, boolean expected)121     private static void check(Matcher m, String result, boolean expected) {
122         m.find();
123         assertEquals(m.group().equals(result), expected);
124     }
125 
check(Pattern p, String s, boolean expected)126     private static void check(Pattern p, String s, boolean expected) {
127         assertEquals(p.matcher(s).find(), expected);
128     }
129 
check(String p, String s, boolean expected)130     private static void check(String p, String s, boolean expected) {
131         Matcher matcher = Pattern.compile(p).matcher(s);
132         assertEquals(matcher.find(), expected);
133     }
134 
check(String p, char c, boolean expected)135     private static void check(String p, char c, boolean expected) {
136         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
137         Pattern pattern = Pattern.compile(propertyPattern);
138         char[] ca = new char[1]; ca[0] = c;
139         Matcher matcher = pattern.matcher(new String(ca));
140         assertTrue(matcher.find());
141     }
142 
check(String p, int codePoint, boolean expected)143     private static void check(String p, int codePoint, boolean expected) {
144         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
145         Pattern pattern = Pattern.compile(propertyPattern);
146         char[] ca = Character.toChars(codePoint);
147         Matcher matcher = pattern.matcher(new String(ca));
148         assertTrue(matcher.find());
149     }
150 
check(String p, int flag, String input, String s, boolean expected)151     private static void check(String p, int flag, String input, String s,
152                               boolean expected)
153     {
154         Pattern pattern = Pattern.compile(p, flag);
155         Matcher matcher = pattern.matcher(input);
156         if (expected)
157             check(matcher, s, expected);
158         else
159             check(pattern, input, expected);
160     }
161 
check(Pattern p, String s, String g, String expected)162     private static void check(Pattern p, String s, String g, String expected) {
163         Matcher m = p.matcher(s);
164         m.find();
165         assertFalse(!m.group(g).equals(expected) ||
166                 s.charAt(m.start(g)) != expected.charAt(0) ||
167                 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1));
168     }
checkReplaceFirst(String p, String s, String r, String expected)169     private static void checkReplaceFirst(String p, String s, String r, String expected)
170     {
171         assertEquals(expected, Pattern.compile(p).matcher(s).replaceFirst(r));
172     }
173 
checkReplaceAll(String p, String s, String r, String expected)174     private static void checkReplaceAll(String p, String s, String r, String expected)
175     {
176         assertEquals(expected, Pattern.compile(p).matcher(s).replaceAll(r));
177     }
178 
checkExpectedFail(String p)179     private static void checkExpectedFail(String p) {
180         assertThrows(PatternSyntaxException.class, () ->
181                 Pattern.compile(p));
182     }
183 
184     /**
185      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
186      * supplementary characters. This method does NOT fully take care
187      * of the regex syntax.
188      */
toSupplementaries(String s)189     public static String toSupplementaries(String s) {
190         int length = s.length();
191         StringBuilder sb = new StringBuilder(length * 2);
192 
193         for (int i = 0; i < length; ) {
194             char c = s.charAt(i++);
195             if (c == '\\') {
196                 sb.append(c);
197                 if (i < length) {
198                     c = s.charAt(i++);
199                     sb.append(c);
200                     if (c == 'u') {
201                         // assume no syntax error
202                         sb.append(s.charAt(i++));
203                         sb.append(s.charAt(i++));
204                         sb.append(s.charAt(i++));
205                         sb.append(s.charAt(i++));
206                     }
207                 }
208             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
209                 sb.append('\ud800').append((char)('\udc00'+c));
210             } else {
211                 sb.append(c);
212             }
213         }
214         return sb.toString();
215     }
216 
217     // Regular expression tests
218     //Following three tests execute from a file.
219     @Test
processTestCases()220     public static void processTestCases() throws IOException {
221         processFile("TestCases.txt");
222     }
223 
224     @Test
processBMPTestCases()225     public static void processBMPTestCases() throws IOException {
226         processFile("BMPTestCases.txt");
227     }
228 
229     @Test
processSupplementaryTestCases()230     public static void processSupplementaryTestCases() throws IOException {
231         processFile("SupplementaryTestCases.txt");
232     }
233 
234 
235     @Test
nullArgumentTest()236     public static void nullArgumentTest() {
237 
238         assertThrows(NullPointerException.class, () -> Pattern.compile(null));
239         assertThrows(NullPointerException.class, () -> Pattern.matches(null, null));
240         assertThrows(NullPointerException.class, () -> Pattern.matches("xyz", null));
241         assertThrows(NullPointerException.class, () -> Pattern.quote(null));
242         assertThrows(NullPointerException.class, () -> Pattern.compile("xyz").split(null));
243         assertThrows(NullPointerException.class, () -> Pattern.compile("xyz").matcher(null));
244 
245         final Matcher m = Pattern.compile("xyz").matcher("xyz");
246         m.matches();
247         assertThrows(NullPointerException.class, () -> m.appendTail((StringBuffer) null));
248         assertThrows(NullPointerException.class, () -> m.appendTail((StringBuilder)null));
249         assertThrows(NullPointerException.class, () -> m.replaceAll((String) null));
250         assertThrows(NullPointerException.class, () -> m.replaceAll((Function<MatchResult, String>)null));
251         assertThrows(NullPointerException.class, () -> m.replaceFirst((String)null));
252         assertThrows(NullPointerException.class, () -> m.replaceFirst((Function<MatchResult, String>) null));
253         assertThrows(NullPointerException.class, () -> m.appendReplacement((StringBuffer)null, null));
254         assertThrows(NullPointerException.class, () -> m.appendReplacement((StringBuilder)null, null));
255         assertThrows(NullPointerException.class, () -> m.reset(null));
256         assertThrows(NullPointerException.class, () -> Matcher.quoteReplacement(null));
257         //check(() -> m.usePattern(null));
258 
259     }
260 
261     // This is for bug6635133
262     // Test if surrogate pair in Unicode escapes can be handled correctly.
263     @Test
surrogatesInClassTest()264     public static void surrogatesInClassTest() {
265         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
266         Matcher matcher = pattern.matcher("\ud834\udd22");
267 
268         assertTrue(matcher.find(), "Surrogate pair in Unicode escape");
269     }
270 
271     // This is for bug6990617
272     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
273     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
274     // char is an octal digit.
275     @Test
removeQEQuotingTest()276     public static void removeQEQuotingTest() {
277         Pattern pattern =
278             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
279         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
280 
281         assertTrue(matcher.find(), "Remove Q/E Quoting");
282     }
283 
284     // This is for bug 4988891
285     // Test toMatchResult to see that it is a copy of the Matcher
286     // that is not affected by subsequent operations on the original
287     @Test
toMatchResultTest()288     public static void toMatchResultTest() {
289         Pattern pattern = Pattern.compile("squid");
290         Matcher matcher = pattern.matcher(
291             "agiantsquidofdestinyasmallsquidoffate");
292         matcher.find();
293 
294         int matcherStart1 = matcher.start();
295         MatchResult mr = matcher.toMatchResult();
296         assertNotSame(mr, matcher, "Matcher toMatchResult is identical object");
297 
298         int resultStart1 = mr.start();
299         assertEquals(matcherStart1, resultStart1, "equal matchers don't have equal start indices");
300         matcher.find();
301 
302         int matcherStart2 = matcher.start();
303         int resultStart2 = mr.start();
304         assertNotEquals(matcherStart2, resultStart2, "Matcher2 and Result2 should not be equal");
305         assertEquals(resultStart1, resultStart2, "Second match result should have the same state");
306         MatchResult mr2 = matcher.toMatchResult();
307         assertNotSame(mr, mr2, "Second Matcher copy should not be identical to the first.");
308         assertEquals(mr2.start(), matcherStart2, "mr2 index should equal matcher index");
309     }
310 
311     // This is for bug 8074678
312     // Test the result of toMatchResult throws ISE if no match is availble
313     // Android-changed: Android throws Exception when calling toMatchResult(), not start().
314     @Test(enabled = false)
toMatchResultTest2()315     public static void toMatchResultTest2() {
316         Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
317         matcher.find();
318         MatchResult mr = matcher.toMatchResult();
319 
320         assertThrows(IllegalStateException.class, mr::start);
321         assertThrows(IllegalStateException.class, () -> mr.start(2));
322         assertThrows(IllegalStateException.class, mr::end);
323         assertThrows(IllegalStateException.class, () -> mr.end(2));
324         assertThrows(IllegalStateException.class, mr::group);
325         assertThrows(IllegalStateException.class, () -> mr.group(2));
326 
327         matcher = Pattern.compile("(match)").matcher("there is a match");
328         matcher.find();
329         MatchResult mr2 = matcher.toMatchResult();
330         assertThrows(IndexOutOfBoundsException.class, () -> mr2.start(2));
331         assertThrows(IndexOutOfBoundsException.class, () -> mr2.end(2));
332         assertThrows(IndexOutOfBoundsException.class, () -> mr2.group(2));
333     }
334 
335     // This is for bug 5013885
336     // Must test a slice to see if it reports hitEnd correctly
337     // FIXME: Investigate why this test fails.
338     // Android-changed: The javadoc isn't clear about the behavior if no match is found. The
339     // behavior could depend on the underlying search engine.
340     @Test(enabled = false)
hitEndTest()341     public static void hitEndTest() {
342         // Basic test of Slice node
343         Pattern p = Pattern.compile("^squidattack");
344         Matcher m = p.matcher("squack");
345         m.find();
346         assertFalse(m.hitEnd(), "Matcher should not be at end of sequence");
347         m.reset("squid");
348         m.find();
349         assertTrue(m.hitEnd(), "Matcher should be at the end of sequence");
350 
351         // Test Slice, SliceA and SliceU nodes
352         for (int i=0; i<3; i++) {
353             int flags = 0;
354             if (i==1) flags = Pattern.CASE_INSENSITIVE;
355             if (i==2) flags = Pattern.UNICODE_CASE;
356             p = Pattern.compile("^abc", flags);
357             m = p.matcher("ad");
358             m.find();
359             assertFalse(m.hitEnd(), "Slice node test");
360             m.reset("ab");
361             m.find();
362             assertTrue(m.hitEnd(), "Slice node test");
363         }
364 
365         // Test Boyer-Moore node
366         p = Pattern.compile("catattack");
367         m = p.matcher("attack");
368         m.find();
369         assertTrue(m.hitEnd(), "Boyer-Moore node test");
370 
371         p = Pattern.compile("catattack");
372         m = p.matcher("attackattackattackcatatta");
373         m.find();
374         assertTrue(m.hitEnd(), "Boyer-More node test");
375 
376         // 8184706: Matching u+0d at EOL against \R should hit-end
377         p = Pattern.compile("...\\R");
378         m = p.matcher("cat" + (char)0x0a);
379         m.find();
380         assertFalse(m.hitEnd());
381 
382         m = p.matcher("cat" + (char)0x0d);
383         m.find();
384         assertTrue(m.hitEnd());
385 
386         m = p.matcher("cat" + (char)0x0d + (char)0x0a);
387         m.find();
388         assertFalse(m.hitEnd());
389     }
390 
391     // This is for bug 4997476
392     // It is weird code submitted by customer demonstrating a regression
393     @Test
wordSearchTest()394     public static void wordSearchTest() {
395         String testString = "word1 word2 word3";
396         Pattern p = Pattern.compile("\\b");
397         Matcher m = p.matcher(testString);
398         int position = 0;
399         int start;
400         while (m.find(position)) {
401             start = m.start();
402             if (start == testString.length())
403                 break;
404             if (m.find(start+1)) {
405                 position = m.start();
406             } else {
407                 position = testString.length();
408             }
409             if (testString.substring(start, position).equals(" "))
410                 continue;
411             assertTrue(testString.substring(start, position-1).startsWith("word"));
412         }
413     }
414 
415     // This is for bug 4994840
416     @Test
caretAtEndTest()417     public static void caretAtEndTest() {
418         // Problem only occurs with multiline patterns
419         // containing a beginning-of-line caret "^" followed
420         // by an expression that also matches the empty string.
421         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
422         Matcher matcher = pattern.matcher("\r");
423         matcher.find();
424         matcher.find();
425     }
426 
427     // This test is for 4979006
428     // Check to see if word boundary construct properly handles unicode
429     // non spacing marks
430     @Test
unicodeWordBoundsTest()431     public static void unicodeWordBoundsTest() {
432         String spaces = "  ";
433         String wordChar = "a";
434         String nsm = "\u030a";
435 
436         // Android-changed: assert statement has no runtime effect on Android.
437         // assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
438         assertEquals(Character.getType('\u030a'), Character.NON_SPACING_MARK);
439 
440         Pattern pattern = Pattern.compile("\\b");
441         Matcher matcher = pattern.matcher("");
442         // S=other B=word character N=non spacing mark .=word boundary
443         // SS.BB.SS
444         String input = spaces + wordChar + wordChar + spaces;
445         twoFindIndexes(input, matcher, 2, 4);
446         // SS.BBN.SS
447         input = spaces + wordChar +wordChar + nsm + spaces;
448         twoFindIndexes(input, matcher, 2, 5);
449         // SS.BN.SS
450         input = spaces + wordChar + nsm + spaces;
451         twoFindIndexes(input, matcher, 2, 4);
452         // SS.BNN.SS
453         input = spaces + wordChar + nsm + nsm + spaces;
454         twoFindIndexes(input, matcher, 2, 5);
455         // SSN.BB.SS
456         input = spaces + nsm + wordChar + wordChar + spaces;
457         twoFindIndexes(input, matcher, 3, 5);
458         // SS.BNB.SS
459         input = spaces + wordChar + nsm + wordChar + spaces;
460         twoFindIndexes(input, matcher, 2, 5);
461         // SSNNSS
462         input = spaces + nsm + nsm + spaces;
463         matcher.reset(input);
464         assertFalse(matcher.find());
465         // SSN.BBN.SS
466         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
467         twoFindIndexes(input, matcher, 3, 6);
468     }
469 
twoFindIndexes(String input, Matcher matcher, int a, int b)470     private static void twoFindIndexes(String input, Matcher matcher, int a,
471                                        int b)
472     {
473         matcher.reset(input);
474         matcher.find();
475         assertEquals(matcher.start(), a);
476         matcher.find();
477         assertEquals(matcher.start(), b);
478     }
479 
480     // This test is for 6284152
check(String regex, String input, String[] expected)481     private static void check(String regex, String input, String[] expected) {
482         List<String> result = new ArrayList<>();
483         Pattern p = Pattern.compile(regex);
484         Matcher m = p.matcher(input);
485         while (m.find()) {
486             result.add(m.group());
487         }
488         assertEquals(Arrays.asList(expected), result);
489     }
490 
491     @Test
lookbehindTest()492     public static void lookbehindTest() {
493         //Positive
494         check("(?<=%.{0,5})foo\\d",
495               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
496               new String[]{"foo1", "foo2", "foo3"});
497 
498         //boundary at end of the lookbehind sub-regex should work consistently
499         //with the boundary just after the lookbehind sub-regex
500         // Android-changed:
501         // check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
502         // check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
503         check("(?<=.{0,100}\\b)foo", "abcd foo", new String[]{"foo"});
504         check("(?<=.{0,100})\\bfoo", "abcd foo", new String[]{"foo"});
505         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
506         check("(?<!abc \\b)foo", "abc foo", new String[0]);
507 
508         //Negative
509         check("(?<!%.{0,5})foo\\d",
510               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
511               new String[] {"foo4", "foo5"});
512 
513         //Positive greedy
514         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
515 
516         //Positive reluctant
517         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
518 
519         //supplementary
520         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
521               new String[] {"fo\ud800\udc00o"});
522         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
523               new String[] {"fo\ud800\udc00o"});
524         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
525               new String[] {"fo\ud800\udc00o"});
526         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
527               new String[] {"fo\ud800\udc00o"});
528     }
529 
530     // This test is for 4938995
531     // Check to see if weak region boundaries are transparent to
532     // lookahead and lookbehind constructs
533     @Test
boundsTest()534     public static void boundsTest() {
535         String fullMessage = "catdogcat";
536         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
537         Matcher matcher = pattern.matcher("catdogca");
538         matcher.useTransparentBounds(true);
539 
540         assertFalse(matcher.find());
541         matcher.reset("atdogcat");
542 
543         assertFalse(matcher.find());
544         matcher.reset(fullMessage);
545 
546         assertTrue(matcher.find());
547         matcher.reset(fullMessage);
548         matcher.region(0,9);
549 
550         assertTrue(matcher.find());
551         matcher.reset(fullMessage);
552         matcher.region(0,6);
553 
554         assertTrue(matcher.find());
555         matcher.reset(fullMessage);
556         matcher.region(3,6);
557 
558         assertTrue(matcher.find());
559         matcher.useTransparentBounds(false);
560         assertFalse(matcher.find());
561 
562         // Negative lookahead/lookbehind
563         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
564         matcher = pattern.matcher("dogcat");
565         matcher.useTransparentBounds(true);
566         matcher.region(0,3);
567 
568         assertFalse(matcher.find());
569         matcher.reset("catdog");
570         matcher.region(3,6);
571 
572         assertFalse(matcher.find());
573         matcher.useTransparentBounds(false);
574         matcher.reset("dogcat");
575         matcher.region(0,3);
576 
577         assertTrue(matcher.find());
578         matcher.reset("catdog");
579         matcher.region(3,6);
580         // FIXME: This looks like a bug.
581         // Android-removed: Android fails this case.
582         // assertTrue(matcher.find());
583 
584     }
585 
586     // This test is for 4945394
587     @Test
findFromTest()588     public static void findFromTest() {
589         String message = "This is 40 $0 message.";
590         Pattern pat = Pattern.compile("\\$0");
591         Matcher match = pat.matcher(message);
592         assertTrue(match.find());
593         assertFalse(match.find());
594         assertFalse(match.find());
595     }
596 
597     // This test is for 4872664 and 4892980
598     @Test
negatedCharClassTest()599     public static void negatedCharClassTest() {
600         Pattern pattern = Pattern.compile("[^>]");
601         Matcher matcher = pattern.matcher("\u203A");
602         assertTrue(matcher.matches());
603 
604         pattern = Pattern.compile("[^fr]");
605         matcher = pattern.matcher("a");
606         assertTrue(matcher.find());
607 
608         matcher.reset("\u203A");
609         assertTrue(matcher.find());
610         String s = "for";
611         String[] result = s.split("[^fr]");
612         assertEquals(result[0], "f");
613         assertEquals(result[1], "r");
614         s = "f\u203Ar";
615         result = s.split("[^fr]");
616         assertEquals(result[0], "f");
617         assertEquals(result[1], "r");
618 
619         // Test adding to bits, subtracting a node, then adding to bits again
620         pattern = Pattern.compile("[^f\u203Ar]");
621         matcher = pattern.matcher("a");
622         assertTrue(matcher.find());
623         matcher.reset("f");
624         assertFalse(matcher.find());
625         matcher.reset("\u203A");
626         assertFalse(matcher.find());
627         matcher.reset("r");
628         assertFalse(matcher.find());
629         matcher.reset("\u203B");
630         assertTrue(matcher.find());
631 
632         // Test subtracting a node, adding to bits, subtracting again
633         pattern = Pattern.compile("[^\u203Ar\u203B]");
634         matcher = pattern.matcher("a");
635         assertTrue(matcher.find());
636         matcher.reset("\u203A");
637         assertFalse(matcher.find());
638         matcher.reset("r");
639         assertFalse(matcher.find());
640         matcher.reset("\u203B");
641         assertFalse(matcher.find());
642         matcher.reset("\u203C");
643         assertTrue(matcher.find());
644     }
645 
646     // This test is for 4628291
647     @Test
toStringTest()648     public static void toStringTest() {
649         Pattern pattern = Pattern.compile("b+");
650         assertEquals(pattern.toString(), "b+");
651         Matcher matcher = pattern.matcher("aaabbbccc");
652         String matcherString = matcher.toString(); // unspecified
653         matcher.find();
654         matcher.toString(); // unspecified
655         matcher.region(0,3);
656         matcher.toString(); // unspecified
657         matcher.reset();
658         matcher.toString(); // unspecified
659     }
660 
661     // This test is for 4808962
662     @Test
literalPatternTest()663     public static void literalPatternTest() {
664         int flags = Pattern.LITERAL;
665 
666         Pattern pattern = Pattern.compile("abc\\t$^", flags);
667         check(pattern, "abc\\t$^", true);
668 
669         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
670         check(pattern, "abc\\t$^", true);
671 
672         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
673         check(pattern, "\\Qa^$bcabc\\E", true);
674         check(pattern, "a^$bcabc", false);
675 
676         pattern = Pattern.compile("\\\\Q\\\\E");
677         check(pattern, "\\Q\\E", true);
678 
679         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
680         check(pattern, "abcefg\\Q\\Ehij", true);
681 
682         pattern = Pattern.compile("\\\\\\Q\\\\E");
683         check(pattern, "\\\\\\\\", true);
684 
685         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
686         check(pattern, "\\Qa^$bcabc\\E", true);
687         check(pattern, "a^$bcabc", false);
688 
689         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
690         check(pattern, "\\Qabc\\Edef", true);
691         check(pattern, "abcdef", false);
692 
693         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
694         check(pattern, "abc\\Edef", true);
695         check(pattern, "abcdef", false);
696 
697         pattern = Pattern.compile(Pattern.quote("\\E"));
698         check(pattern, "\\E", true);
699 
700         pattern = Pattern.compile("((((abc.+?:)", flags);
701         check(pattern, "((((abc.+?:)", true);
702 
703         flags |= Pattern.MULTILINE;
704 
705         pattern = Pattern.compile("^cat$", flags);
706         check(pattern, "abc^cat$def", true);
707         check(pattern, "cat", false);
708 
709         flags |= Pattern.CASE_INSENSITIVE;
710 
711         pattern = Pattern.compile("abcdef", flags);
712         check(pattern, "ABCDEF", true);
713         check(pattern, "AbCdEf", true);
714 
715         flags |= Pattern.DOTALL;
716 
717         pattern = Pattern.compile("a...b", flags);
718         check(pattern, "A...b", true);
719         check(pattern, "Axxxb", false);
720 
721         // Android-removed: CANON_EQ isn't supported on Android.
722         // flags |= Pattern.CANON_EQ;
723 
724         //Note: Possible issue
725         // Pattern p = Pattern.compile("testa\u030a", flags);
726         // check(pattern, "testa\u030a", false);
727         // check(pattern, "test\u00e5", false);
728 
729         // Supplementary character test
730         flags = Pattern.LITERAL;
731 
732         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
733         check(pattern, toSupplementaries("abc\\t$^"), true);
734 
735         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
736         check(pattern, toSupplementaries("abc\\t$^"), true);
737 
738         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
739         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
740         check(pattern, toSupplementaries("a^$bcabc"), false);
741 
742         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
743         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
744         check(pattern, toSupplementaries("a^$bcabc"), false);
745 
746         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
747         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
748         check(pattern, toSupplementaries("abcdef"), false);
749 
750         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
751         check(pattern, toSupplementaries("abc\\Edef"), true);
752         check(pattern, toSupplementaries("abcdef"), false);
753 
754         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
755         check(pattern, toSupplementaries("((((abc.+?:)"), true);
756 
757         flags |= Pattern.MULTILINE;
758 
759         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
760         check(pattern, toSupplementaries("abc^cat$def"), true);
761         check(pattern, toSupplementaries("cat"), false);
762 
763         flags |= Pattern.DOTALL;
764 
765         // note: this is case-sensitive.
766         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
767         check(pattern, toSupplementaries("a...b"), true);
768         check(pattern, toSupplementaries("axxxb"), false);
769 
770         // Android-removed: CANON_EQ isn't supported on Android.
771         // flags |= Pattern.CANON_EQ;
772 
773         // String t = toSupplementaries("test");
774         //Note: Possible issue
775         // p = Pattern.compile(t + "a\u030a", flags);
776         // check(pattern, t + "a\u030a", false);
777         // check(pattern, t + "\u00e5", false);
778     }
779 
780     // This test is for 4803179
781     // This test is also for 4808962, replacement parts
782     @Test
literalReplacementTest()783     public static void literalReplacementTest() {
784         int flags = Pattern.LITERAL;
785 
786         Pattern pattern = Pattern.compile("abc", flags);
787         Matcher matcher = pattern.matcher("zzzabczzz");
788         String replaceTest = "$0";
789         String result = matcher.replaceAll(replaceTest);
790         assertEquals(result, "zzzabczzz");
791 
792         matcher.reset();
793         String literalReplacement = Matcher.quoteReplacement(replaceTest);
794         result = matcher.replaceAll(literalReplacement);
795         assertEquals(result, "zzz$0zzz");
796 
797         matcher.reset();
798         replaceTest = "\\t$\\$";
799         literalReplacement = Matcher.quoteReplacement(replaceTest);
800         result = matcher.replaceAll(literalReplacement);
801         assertEquals(result, "zzz\\t$\\$zzz");
802 
803         // Supplementary character test
804         pattern = Pattern.compile(toSupplementaries("abc"), flags);
805         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
806         replaceTest = "$0";
807         result = matcher.replaceAll(replaceTest);
808         assertEquals(result, toSupplementaries("zzzabczzz"));
809 
810         matcher.reset();
811         literalReplacement = Matcher.quoteReplacement(replaceTest);
812         result = matcher.replaceAll(literalReplacement);
813         assertEquals(result, toSupplementaries("zzz$0zzz"));
814 
815         matcher.reset();
816         replaceTest = "\\t$\\$";
817         literalReplacement = Matcher.quoteReplacement(replaceTest);
818         result = matcher.replaceAll(literalReplacement);
819         assertEquals(result, toSupplementaries("zzz\\t$\\$zzz"));
820 
821         // IAE should be thrown if backslash or '$' is the last character
822         // in replacement string
823         assertThrows(IllegalArgumentException.class, () -> "\uac00".replaceAll("\uac00", "$"));
824         assertThrows(IllegalArgumentException.class, () -> "\uac00".replaceAll("\uac00", "\\"));
825     }
826 
827     // This test is for 4757029
828     @Test
regionTest()829     public static void regionTest() {
830         Pattern pattern = Pattern.compile("abc");
831         Matcher matcher = pattern.matcher("abcdefabc");
832 
833         matcher.region(0,9);
834         assertTrue(matcher.find());
835         assertTrue(matcher.find());
836         matcher.region(0,3);
837         assertTrue(matcher.find());
838         matcher.region(3,6);
839         assertFalse(matcher.find());
840         matcher.region(0,2);
841         assertFalse(matcher.find());
842 
843         expectRegionFail(matcher, 1, -1);
844         expectRegionFail(matcher, -1, -1);
845         expectRegionFail(matcher, -1, 1);
846         expectRegionFail(matcher, 5, 3);
847         expectRegionFail(matcher, 5, 12);
848         expectRegionFail(matcher, 12, 12);
849 
850         pattern = Pattern.compile("^abc$");
851         matcher = pattern.matcher("zzzabczzz");
852         matcher.region(0,9);
853         assertFalse(matcher.find());
854         matcher.region(3,6);
855         assertTrue(matcher.find());
856         matcher.region(3,6);
857         matcher.useAnchoringBounds(false);
858         assertFalse(matcher.find());
859 
860         // Supplementary character test
861         pattern = Pattern.compile(toSupplementaries("abc"));
862         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
863         matcher.region(0,9*2);
864         assertTrue(matcher.find());
865         assertTrue(matcher.find());
866         matcher.region(0,3*2);
867         assertTrue(matcher.find());
868         matcher.region(1,3*2);
869         assertFalse(matcher.find());
870         matcher.region(3*2,6*2);
871         assertFalse(matcher.find());
872         matcher.region(0,2*2);
873         assertFalse(matcher.find());
874         matcher.region(0,2*2+1);
875         assertFalse(matcher.find());
876 
877         expectRegionFail(matcher, 2, -1);
878         expectRegionFail(matcher, -1, -1);
879         expectRegionFail(matcher, -1, 2);
880         expectRegionFail(matcher, 5*2, 3*2);
881         expectRegionFail(matcher, 5*2, 12*2);
882         expectRegionFail(matcher, 12*2, 12*2);
883 
884         pattern = Pattern.compile(toSupplementaries("^abc$"));
885         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
886         matcher.region(0,9*2);
887         assertFalse(matcher.find());
888         matcher.region(3*2,6*2);
889         assertTrue(matcher.find());
890         matcher.region(3*2+1,6*2);
891         assertFalse(matcher.find());
892         matcher.region(3*2,6*2-1);
893         assertFalse(matcher.find());
894         matcher.region(3*2,6*2);
895         matcher.useAnchoringBounds(false);
896         assertFalse(matcher.find());
897 
898         // JDK-8230829
899         pattern = Pattern.compile("\\ud800\\udc61");
900         matcher = pattern.matcher("\ud800\udc61");
901         matcher.region(0, 1);
902         assertFalse(matcher.find(), "Matched a surrogate pair" +
903                 " that crosses border of region");
904 
905         assertTrue(matcher.hitEnd(), "Expected to hit the end when" +
906                 " matching a surrogate pair crossing region");
907     }
908 
expectRegionFail(Matcher matcher, int index1, int index2)909     private static void expectRegionFail(Matcher matcher, int index1,
910                                          int index2)
911     {
912 
913         try {
914             matcher.region(index1, index2);
915             fail();
916         } catch (IndexOutOfBoundsException | IllegalStateException ioobe) {
917             // Correct result
918         } catch (Exception e) {
919             fail();
920         }
921     }
922 
923     // This test is for 4803197
924     @Test
escapedSegmentTest()925     public static void escapedSegmentTest() {
926 
927         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
928         check(pattern, "dir1\\dir2", true);
929 
930         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
931         check(pattern, "dir1\\dir2\\", true);
932 
933         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
934         check(pattern, "dir1\\dir2\\", true);
935 
936         // Supplementary character test
937         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
938         check(pattern, toSupplementaries("dir1\\dir2"), true);
939 
940         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
941         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
942 
943         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
944         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
945     }
946 
947     // This test is for 4792284
948     @Test
nonCaptureRepetitionTest()949     public static void nonCaptureRepetitionTest() {
950         String input = "abcdefgh;";
951 
952         String[] patterns = new String[] {
953             "(?:\\w{4})+;",
954             "(?:\\w{8})*;",
955             "(?:\\w{2}){2,4};",
956             "(?:\\w{4}){2,};",   // only matches the
957             ".*?(?:\\w{5})+;",   //     specified minimum
958             ".*?(?:\\w{9})*;",   //     number of reps - OK
959             "(?:\\w{4})+?;",     // lazy repetition - OK
960             "(?:\\w{4})++;",     // possessive repetition - OK
961             "(?:\\w{2,}?)+;",    // non-deterministic - OK
962             "(\\w{4})+;",        // capturing group - OK
963         };
964 
965         for (String pattern : patterns) {
966             // Check find()
967             check(pattern, 0, input, input, true);
968             // Check matches()
969             Pattern p = Pattern.compile(pattern);
970             Matcher m = p.matcher(input);
971 
972             assertTrue(m.matches());
973             assertEquals(m.group(0), input);
974         }
975     }
976 
977     // This test is for 6358731
978     @Test
notCapturedGroupCurlyMatchTest()979     public static void notCapturedGroupCurlyMatchTest() {
980         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
981         Matcher matcher = pattern.matcher("abcd");
982 
983         boolean condition = !matcher.matches() ||
984              matcher.group(1) != null ||
985              !matcher.group(2).equals("abcd");
986 
987         assertFalse(condition);
988     }
989 
990     // This test is for 4706545
991     // FIXME: The char class doesn't match Character.is*() behavior. Is it worth fixing?
992     // Android-changed: Disable the test for further invesitgation.
993     @Test(enabled = false)
javaCharClassTest()994     public static void javaCharClassTest() {
995         for (int i=0; i<1000; i++) {
996             char c = (char)generator.nextInt();
997             check("{javaLowerCase}", c, Character.isLowerCase(c));
998             check("{javaUpperCase}", c, Character.isUpperCase(c));
999             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1000             check("{javaTitleCase}", c, Character.isTitleCase(c));
1001             check("{javaDigit}", c, Character.isDigit(c));
1002             check("{javaDefined}", c, Character.isDefined(c));
1003             check("{javaLetter}", c, Character.isLetter(c));
1004             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1005             check("{javaJavaIdentifierStart}", c,
1006                   Character.isJavaIdentifierStart(c));
1007             check("{javaJavaIdentifierPart}", c,
1008                   Character.isJavaIdentifierPart(c));
1009             check("{javaUnicodeIdentifierStart}", c,
1010                   Character.isUnicodeIdentifierStart(c));
1011             check("{javaUnicodeIdentifierPart}", c,
1012                   Character.isUnicodeIdentifierPart(c));
1013             check("{javaIdentifierIgnorable}", c,
1014                   Character.isIdentifierIgnorable(c));
1015             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1016             check("{javaWhitespace}", c, Character.isWhitespace(c));
1017             check("{javaISOControl}", c, Character.isISOControl(c));
1018             check("{javaMirrored}", c, Character.isMirrored(c));
1019 
1020         }
1021 
1022         // Supplementary character test
1023         for (int i=0; i<1000; i++) {
1024             int c = generator.nextInt(Character.MAX_CODE_POINT
1025                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1026                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1027             check("{javaLowerCase}", c, Character.isLowerCase(c));
1028             check("{javaUpperCase}", c, Character.isUpperCase(c));
1029             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1030             check("{javaTitleCase}", c, Character.isTitleCase(c));
1031             check("{javaDigit}", c, Character.isDigit(c));
1032             check("{javaDefined}", c, Character.isDefined(c));
1033             check("{javaLetter}", c, Character.isLetter(c));
1034             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1035             check("{javaJavaIdentifierStart}", c,
1036                   Character.isJavaIdentifierStart(c));
1037             check("{javaJavaIdentifierPart}", c,
1038                   Character.isJavaIdentifierPart(c));
1039             check("{javaUnicodeIdentifierStart}", c,
1040                   Character.isUnicodeIdentifierStart(c));
1041             check("{javaUnicodeIdentifierPart}", c,
1042                   Character.isUnicodeIdentifierPart(c));
1043             check("{javaIdentifierIgnorable}", c,
1044                   Character.isIdentifierIgnorable(c));
1045             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1046             check("{javaWhitespace}", c, Character.isWhitespace(c));
1047             check("{javaISOControl}", c, Character.isISOControl(c));
1048             check("{javaMirrored}", c, Character.isMirrored(c));
1049         }
1050     }
1051 
1052     // This test is for 4523620
1053     /*
1054     @Test
1055     public static void numOccurrencesTest() throws Exception {
1056         Pattern pattern = Pattern.compile("aaa");
1057 
1058         if (pattern.numOccurrences("aaaaaa", false) != 2)
1059             failCount++;
1060         if (pattern.numOccurrences("aaaaaa", true) != 4)
1061             failCount++;
1062 
1063         pattern = Pattern.compile("^");
1064         if (pattern.numOccurrences("aaaaaa", false) != 1)
1065             failCount++;
1066         if (pattern.numOccurrences("aaaaaa", true) != 1)
1067             failCount++;
1068 
1069         report("Number of Occurrences");
1070     }
1071     */
1072 
1073     // This test is for 4776374
1074     @Test
caretBetweenTerminatorsTest()1075     public static void caretBetweenTerminatorsTest() {
1076         int flags1 = Pattern.DOTALL;
1077         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1078         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1079         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1080 
1081         check("^....", flags1, "test\ntest", "test", true);
1082         check(".....^", flags1, "test\ntest", "test", false);
1083         check(".....^", flags1, "test\n", "test", false);
1084         check("....^", flags1, "test\r\n", "test", false);
1085 
1086         check("^....", flags2, "test\ntest", "test", true);
1087         check("....^", flags2, "test\ntest", "test", false);
1088         check(".....^", flags2, "test\n", "test", false);
1089         check("....^", flags2, "test\r\n", "test", false);
1090 
1091         check("^....", flags3, "test\ntest", "test", true);
1092         check(".....^", flags3, "test\ntest", "test\n", true);
1093         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1094         // Android-removed: DOTALL + UNIX_LINES is ambiguous for interpreting non-\\u000a linebreak.
1095         // check(".....^", flags3, "test\n", "test", false);
1096         // check(".....^", flags3, "test\r\n", "test", false);
1097         // check("......^", flags3, "test\r\ntest", "test\r\n", true);
1098 
1099         check("^....", flags4, "test\ntest", "test", true);
1100         check(".....^", flags3, "test\ntest", "test\n", true);
1101         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1102         check(".....^", flags4, "test\n", "test\n", false);
1103         check(".....^", flags4, "test\r\n", "test\r", false);
1104 
1105         // Supplementary character test
1106         String t = toSupplementaries("test");
1107         check("^....", flags1, t+"\n"+t, t, true);
1108         check(".....^", flags1, t+"\n"+t, t, false);
1109         check(".....^", flags1, t+"\n", t, false);
1110         check("....^", flags1, t+"\r\n", t, false);
1111 
1112         check("^....", flags2, t+"\n"+t, t, true);
1113         check("....^", flags2, t+"\n"+t, t, false);
1114         check(".....^", flags2, t+"\n", t, false);
1115         // Android-removed: DOTALL + UNIX_LINES is ambiguous for interpreting non-\\u000a linebreak.
1116         // check("....^", flags2, t+"\r\n", t, false);
1117 
1118         check("^....", flags3, t+"\n"+t, t, true);
1119         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1120         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1121         // Android-removed: DOTALL + UNIX_LINES is ambiguous for interpreting non-\\u000a linebreak.
1122         // check(".....^", flags3, t+"\n", t, false);
1123         // check(".....^", flags3, t+"\r\n", t, false);
1124         // check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1125         // check("......^", flags3, t+"\r\n\n"+t, t+"\r\n", true);
1126 
1127         check("^....", flags4, t+"\n"+t, t, true);
1128         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1129         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1130         check(".....^", flags4, t+"\n", t+"\n", false);
1131         check(".....^", flags4, t+"\r\n", t+"\r", false);
1132     }
1133 
1134     // This test is for 4727935
1135     @Test
dollarAtEndTest()1136     public static void dollarAtEndTest() {
1137         int flags1 = Pattern.DOTALL;
1138         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1139         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1140 
1141         check("....$", flags1, "test\n", "test", true);
1142         check("....$", flags1, "test\r\n", "test", true);
1143         check(".....$", flags1, "test\n", "test\n", true);
1144         check(".....$", flags1, "test\u0085", "test\u0085", true);
1145         check("....$", flags1, "test\u0085", "test", true);
1146 
1147         check("....$", flags2, "test\n", "test", true);
1148         check(".....$", flags2, "test\n", "test\n", true);
1149         check(".....$", flags2, "test\u0085", "test\u0085", true);
1150         check("....$", flags2, "test\u0085", "est\u0085", true);
1151 
1152         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1153         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1154         check("....$blah", flags3, "test\nblah", "!!!!", false);
1155         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1156 
1157         // Supplementary character test
1158         String t = toSupplementaries("test");
1159         String b = toSupplementaries("blah");
1160         check("....$", flags1, t+"\n", t, true);
1161         check("....$", flags1, t+"\r\n", t, true);
1162         check(".....$", flags1, t+"\n", t+"\n", true);
1163         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1164         check("....$", flags1, t+"\u0085", t, true);
1165 
1166         check("....$", flags2, t+"\n", t, true);
1167         check(".....$", flags2, t+"\n", t+"\n", true);
1168         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1169         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1170 
1171         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1172         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1173         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1174         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1175     }
1176 
1177     // This test is for 4711773
1178     @Test
multilineDollarTest()1179     public static void multilineDollarTest() {
1180         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1181         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1182         matcher.find();
1183         assertEquals(matcher.start(), 9);
1184         matcher.find();
1185         assertEquals(matcher.start(0), 20);
1186 
1187         // Supplementary character test
1188         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1189         matcher.find();
1190         assertEquals(matcher.start(0), 9*2);
1191         matcher.find();
1192         assertEquals(matcher.start(0), 20*2);
1193     }
1194 
1195     @Test
reluctantRepetitionTest()1196     public static void reluctantRepetitionTest() {
1197         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1198         check(p, "1 word word word 2", true);
1199         check(p, "1 wor wo w 2", true);
1200         check(p, "1 word word 2", true);
1201         check(p, "1 word 2", true);
1202         check(p, "1 wo w w 2", true);
1203         check(p, "1 wo w 2", true);
1204         check(p, "1 wor w 2", true);
1205 
1206         p = Pattern.compile("([a-z])+?c");
1207         Matcher m = p.matcher("ababcdefdec");
1208         check(m, "ababc");
1209 
1210         // Supplementary character test
1211         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1212         m = p.matcher(toSupplementaries("ababcdefdec"));
1213         check(m, toSupplementaries("ababc"));
1214     }
1215 
serializedPattern(Pattern p)1216     public static Pattern serializedPattern(Pattern p) throws Exception {
1217         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1218         ObjectOutputStream oos = new ObjectOutputStream(baos);
1219         oos.writeObject(p);
1220         oos.close();
1221         try (ObjectInputStream ois = new ObjectInputStream(
1222                 new ByteArrayInputStream(baos.toByteArray()))) {
1223             return (Pattern)ois.readObject();
1224         }
1225     }
1226 
1227     @Test
serializeTest()1228     public static void serializeTest() throws Exception {
1229         String patternStr = "(b)";
1230         String matchStr = "b";
1231         Pattern pattern = Pattern.compile(patternStr);
1232         Pattern serializedPattern = serializedPattern(pattern);
1233         Matcher matcher = serializedPattern.matcher(matchStr);
1234         assertTrue(matcher.matches());
1235         assertEquals(matcher.groupCount(), 1);
1236 
1237         pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE);
1238         serializedPattern = serializedPattern(pattern);
1239         assertTrue(serializedPattern.matcher("Ab").matches());
1240         assertFalse(serializedPattern.matcher("AB").matches());
1241     }
1242 
1243     @Test
gTest()1244     public static void gTest() {
1245         Pattern pattern = Pattern.compile("\\G\\w");
1246         Matcher matcher = pattern.matcher("abc#x#x");
1247         matcher.find();
1248         matcher.find();
1249         matcher.find();
1250         assertFalse(matcher.find());
1251 
1252         pattern = Pattern.compile("\\GA*");
1253         matcher = pattern.matcher("1A2AA3");
1254         matcher.find();
1255         assertFalse(matcher.find());
1256 
1257         pattern = Pattern.compile("\\GA*");
1258         matcher = pattern.matcher("1A2AA3");
1259         // FIXME: Looks like find(start) should reset to the start point, but sets to 0.
1260         // Android-removed: Android fails this use case.
1261         // assertTrue(matcher.find(1));
1262         // matcher.find();
1263         // assertFalse(matcher.find());
1264     }
1265 
1266     @Test
zTest()1267     public static void zTest() {
1268         Pattern pattern = Pattern.compile("foo\\Z");
1269         // Positives
1270         check(pattern, "foo\u0085", true);
1271         check(pattern, "foo\u2028", true);
1272         check(pattern, "foo\u2029", true);
1273         check(pattern, "foo\n", true);
1274         check(pattern, "foo\r", true);
1275         check(pattern, "foo\r\n", true);
1276         // Negatives
1277         check(pattern, "fooo", false);
1278         check(pattern, "foo\n\r", false);
1279 
1280         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1281         // Positives
1282         check(pattern, "foo", true);
1283         check(pattern, "foo\n", true);
1284         // Negatives
1285         // FIXME: Investigate why this fails.
1286         // Android-changed: UNIX_LINES is supported by ICU, but this test failed.
1287         // check(pattern, "foo\r", false);
1288         // check(pattern, "foo\u0085", false);
1289         // check(pattern, "foo\u2028", false);
1290         // check(pattern, "foo\u2029", false);
1291     }
1292 
1293     @Test
replaceFirstTest()1294     public static void replaceFirstTest() {
1295         Pattern pattern = Pattern.compile("(ab)(c*)");
1296         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1297         assertEquals(matcher.replaceFirst("test"), "testzzzabcczzzabccc");
1298 
1299         matcher.reset("zzzabccczzzabcczzzabccczzz");
1300         assertEquals(matcher.replaceFirst("test"), "zzztestzzzabcczzzabccczzz");
1301 
1302         matcher.reset("zzzabccczzzabcczzzabccczzz");
1303         String result = matcher.replaceFirst("$1");
1304         assertEquals(result,"zzzabzzzabcczzzabccczzz");
1305 
1306         matcher.reset("zzzabccczzzabcczzzabccczzz");
1307         result = matcher.replaceFirst("$2");
1308         assertEquals(result, "zzzccczzzabcczzzabccczzz");
1309 
1310         pattern = Pattern.compile("a*");
1311         matcher = pattern.matcher("aaaaaaaaaa");
1312         assertEquals(matcher.replaceFirst("test"), "test");
1313 
1314         pattern = Pattern.compile("a+");
1315         matcher = pattern.matcher("zzzaaaaaaaaaa");
1316         assertEquals(matcher.replaceFirst("test"), "zzztest");
1317 
1318         // Supplementary character test
1319         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1320         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1321         result = matcher.replaceFirst(toSupplementaries("test"));
1322         assertEquals(result, toSupplementaries("testzzzabcczzzabccc"));
1323 
1324         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1325         result = matcher.replaceFirst(toSupplementaries("test"));
1326         assertEquals(result, toSupplementaries("zzztestzzzabcczzzabccczzz"));
1327 
1328         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1329         result = matcher.replaceFirst("$1");
1330         assertEquals(result, toSupplementaries("zzzabzzzabcczzzabccczzz"));
1331 
1332         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1333         result = matcher.replaceFirst("$2");
1334         assertEquals(result, toSupplementaries("zzzccczzzabcczzzabccczzz"));
1335 
1336         pattern = Pattern.compile(toSupplementaries("a*"));
1337         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1338 
1339         result = matcher.replaceFirst(toSupplementaries("test"));
1340         assertEquals(result,toSupplementaries("test"));
1341 
1342         pattern = Pattern.compile(toSupplementaries("a+"));
1343         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1344         result = matcher.replaceFirst(toSupplementaries("test"));
1345         assertEquals(result, toSupplementaries("zzztest"));
1346     }
1347 
1348     @Test
unixLinesTest()1349     public static void unixLinesTest() {
1350         Pattern pattern = Pattern.compile(".*");
1351         Matcher matcher = pattern.matcher("aa\u2028blah");
1352         matcher.find();
1353         assertEquals(matcher.group(0), "aa");
1354 
1355         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1356         matcher = pattern.matcher("aa\u2028blah");
1357         matcher.find();
1358         assertEquals(matcher.group(0), "aa\u2028blah");
1359 
1360         pattern = Pattern.compile("[az]$",
1361                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1362         matcher = pattern.matcher("aa\u2028zz");
1363         check(matcher, "a\u2028", false);
1364 
1365         // Supplementary character test
1366         pattern = Pattern.compile(".*");
1367         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1368         matcher.find();
1369         assertEquals(matcher.group(0), toSupplementaries("aa"));
1370 
1371         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1372         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1373         matcher.find();
1374         assertEquals(matcher.group(0), toSupplementaries("aa\u2028blah"));
1375 
1376         pattern = Pattern.compile(toSupplementaries("[az]$"),
1377                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1378         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1379         check(matcher, toSupplementaries("a\u2028"), false);
1380     }
1381 
1382     @Test
commentsTest()1383     public static void commentsTest() {
1384         int flags = Pattern.COMMENTS;
1385 
1386         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1387         Matcher matcher = pattern.matcher("aa#aa");
1388         assertTrue(matcher.matches());
1389 
1390         pattern = Pattern.compile("aa  # blah", flags);
1391         matcher = pattern.matcher("aa");
1392         assertTrue(matcher.matches());
1393 
1394         pattern = Pattern.compile("aa blah", flags);
1395         matcher = pattern.matcher("aablah");
1396         assertTrue(matcher.matches());
1397 
1398         pattern = Pattern.compile("aa  # blah blech  ", flags);
1399         matcher = pattern.matcher("aa");
1400         assertTrue(matcher.matches());
1401 
1402         pattern = Pattern.compile("aa  # blah\n  ", flags);
1403         matcher = pattern.matcher("aa");
1404         assertTrue(matcher.matches());
1405 
1406         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1407         matcher = pattern.matcher("aabc");
1408         assertTrue(matcher.matches());
1409 
1410         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1411         matcher = pattern.matcher("aabc");
1412         assertTrue(matcher.matches());
1413 
1414         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1415         matcher = pattern.matcher("aabc#blech");
1416         assertTrue(matcher.matches());
1417 
1418         // Supplementary character test
1419         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1420         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1421         assertTrue(matcher.matches());
1422 
1423         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1424         matcher = pattern.matcher(toSupplementaries("aa"));
1425         assertTrue(matcher.matches());
1426 
1427         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1428         matcher = pattern.matcher(toSupplementaries("aablah"));
1429         assertTrue(matcher.matches());
1430 
1431         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1432         matcher = pattern.matcher(toSupplementaries("aa"));
1433         assertTrue(matcher.matches());
1434 
1435         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1436         matcher = pattern.matcher(toSupplementaries("aa"));
1437         assertTrue(matcher.matches());
1438 
1439         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1440         matcher = pattern.matcher(toSupplementaries("aabc"));
1441         assertTrue(matcher.matches());
1442 
1443         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1444         matcher = pattern.matcher(toSupplementaries("aabc"));
1445         assertTrue(matcher.matches());
1446 
1447         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1448         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1449         assertTrue(matcher.matches());
1450     }
1451 
1452     @Test
caseFoldingTest()1453     public static void caseFoldingTest() { // bug 4504687
1454         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1455         Pattern pattern = Pattern.compile("aa", flags);
1456         Matcher matcher = pattern.matcher("ab");
1457         assertFalse(matcher.matches());
1458 
1459         pattern = Pattern.compile("aA", flags);
1460         matcher = pattern.matcher("ab");
1461         assertFalse(matcher.matches());
1462 
1463         pattern = Pattern.compile("aa", flags);
1464         matcher = pattern.matcher("aB");
1465         assertFalse(matcher.matches());
1466 
1467         matcher = pattern.matcher("Ab");
1468         assertFalse(matcher.matches());
1469 
1470         // ASCII               "a"
1471         // Latin-1 Supplement  "a" + grave
1472         // Cyrillic            "a"
1473         String[] patterns = new String[] {
1474             //single
1475             "a", "\u00e0", "\u0430",
1476             //slice
1477             "ab", "\u00e0\u00e1", "\u0430\u0431",
1478             //class single
1479             "[a]", "[\u00e0]", "[\u0430]",
1480             //class range
1481             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1482             //back reference
1483             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1484         };
1485 
1486         String[] texts = new String[] {
1487             "A", "\u00c0", "\u0410",
1488             "AB", "\u00c0\u00c1", "\u0410\u0411",
1489             "A", "\u00c0", "\u0410",
1490             "B", "\u00c2", "\u0411",
1491             "aA", "\u00e0\u00c0", "\u0430\u0410"
1492         };
1493 
1494         boolean[] expected = new boolean[] {
1495             true, false, false,
1496             true, false, false,
1497             true, false, false,
1498             true, false, false,
1499             true, false, false
1500         };
1501 
1502         // Android-removed: CASE_INSENSITIVE has the same effect as UNICODE_CASE on Android.
1503         /*
1504         flags = Pattern.CASE_INSENSITIVE;
1505         for (int i = 0; i < patterns.length; i++) {
1506             pattern = Pattern.compile(patterns[i], flags);
1507             matcher = pattern.matcher(texts[i]);
1508             assertEquals(matcher.matches(), expected[i], "<1> Failed at " + i);
1509         }
1510         */
1511 
1512         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1513         for (int i = 0; i < patterns.length; i++) {
1514             pattern = Pattern.compile(patterns[i], flags);
1515             matcher = pattern.matcher(texts[i]);
1516             assertTrue(matcher.matches(), "<2> Failed at " + i);
1517         }
1518         // flag unicode_case alone should do nothing
1519         flags = Pattern.UNICODE_CASE;
1520         for (int i = 0; i < patterns.length; i++) {
1521             pattern = Pattern.compile(patterns[i], flags);
1522             matcher = pattern.matcher(texts[i]);
1523             assertFalse(matcher.matches(), "<3> Failed at " + i);
1524         }
1525 
1526         // Special cases: i, I, u+0131 and u+0130
1527         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1528         pattern = Pattern.compile("[h-j]+", flags);
1529         // Android-changed: no simple case folding for \u0130 and \u0131 according to Unicode 14.0
1530         // https://www.unicode.org/Public/14.0.0/ucd/CaseFolding.txt
1531         // pattern = Pattern.compile("[h-j]+", flags);
1532         // assertTrue(pattern.matcher("\u0131\u0130").matches());
1533     }
1534 
1535     @Test
appendTest()1536     public static void appendTest() {
1537         Pattern pattern = Pattern.compile("(ab)(cd)");
1538         Matcher matcher = pattern.matcher("abcd");
1539         String result = matcher.replaceAll("$2$1");
1540         assertEquals(result, "cdab");
1541 
1542         String  s1 = "Swap all: first = 123, second = 456";
1543         String  s2 = "Swap one: first = 123, second = 456";
1544         String  r  = "$3$2$1";
1545         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1546         matcher = pattern.matcher(s1);
1547 
1548         result = matcher.replaceAll(r);
1549         assertEquals(result, "Swap all: 123 = first, 456 = second");
1550 
1551         matcher = pattern.matcher(s2);
1552 
1553         if (matcher.find()) {
1554             StringBuffer sb = new StringBuffer();
1555             matcher.appendReplacement(sb, r);
1556             matcher.appendTail(sb);
1557             result = sb.toString();
1558             assertEquals(result, "Swap one: 123 = first, second = 456");
1559         }
1560 
1561         // Supplementary character test
1562         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1563         matcher = pattern.matcher(toSupplementaries("abcd"));
1564         result = matcher.replaceAll("$2$1");
1565         assertEquals(result, toSupplementaries("cdab"));
1566 
1567         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1568         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1569         r  = toSupplementaries("$3$2$1");
1570         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1571         matcher = pattern.matcher(s1);
1572 
1573         result = matcher.replaceAll(r);
1574         assertEquals(result, toSupplementaries("Swap all: 123 = first, 456 = second"));
1575 
1576         matcher = pattern.matcher(s2);
1577 
1578         if (matcher.find()) {
1579             StringBuffer sb = new StringBuffer();
1580             matcher.appendReplacement(sb, r);
1581             matcher.appendTail(sb);
1582             result = sb.toString();
1583             assertEquals(result, toSupplementaries("Swap one: 123 = first, second = 456"));
1584         }
1585     }
1586 
1587     @Test
splitTest()1588     public static void splitTest() {
1589         Pattern pattern = Pattern.compile(":");
1590         String[] result = pattern.split("foo:and:boo", 2);
1591         assertEquals(result[0], "foo");
1592         assertEquals(result[1], "and:boo");
1593         // Supplementary character test
1594         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1595         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1596         assertEquals(result[0], toSupplementaries("foo"));
1597         assertEquals(result[1], toSupplementaries("andXboo"));
1598 
1599         CharBuffer cb = CharBuffer.allocate(100);
1600         cb.put("foo:and:boo");
1601         cb.flip();
1602         result = pattern.split(cb);
1603         assertEquals(result[0], "foo");
1604         assertEquals(result[1], "and");
1605         assertEquals(result[2], "boo");
1606 
1607         // Supplementary character test
1608         CharBuffer cbs = CharBuffer.allocate(100);
1609         cbs.put(toSupplementaries("fooXandXboo"));
1610         cbs.flip();
1611         result = patternX.split(cbs);
1612         assertEquals(result[0], toSupplementaries("foo"));
1613         assertEquals(result[1], toSupplementaries("and"));
1614         assertEquals(result[2], toSupplementaries("boo"));
1615 
1616         String source = "0123456789";
1617         for (int limit=-2; limit<3; limit++) {
1618             for (int x=0; x<10; x++) {
1619                 result = source.split(Integer.toString(x), limit);
1620                 int expectedLength = limit < 1 ? 2 : limit;
1621 
1622                 if ((limit == 0) && (x == 9)) {
1623                     // expected dropping of ""
1624                     assertEquals(result.length, 1);
1625                     assertEquals(result[0], "012345678");
1626                 } else {
1627                     assertEquals(result.length, expectedLength);
1628 
1629                     if (!result[0].equals(source.substring(0,x))) {
1630                         assertEquals(limit, 1);
1631                         assertEquals(result[0], source.substring(0,10));
1632                     }
1633                     if (expectedLength > 1) { // Check segment 2
1634                         assertEquals(result[1], source.substring(x+1,10));
1635                     }
1636                 }
1637             }
1638         }
1639         // Check the case for no match found
1640         for (int limit=-2; limit<3; limit++) {
1641             result = source.split("e", limit);
1642             assertEquals(result.length, 1);
1643             assertEquals(result[0], source);
1644         }
1645         // Check the case for limit == 0, source = "";
1646         // split() now returns 0-length for empty source "" see #6559590
1647         source = "";
1648         result = source.split("e", 0);
1649         assertEquals(result.length, 1);
1650         assertEquals(result[0], source);
1651 
1652         // Check both split() and splitAsStraem(), especially for zero-lenth
1653         // input and zero-lenth match cases
1654         String[][] input = new String[][] {
1655             { " ",           "Abc Efg Hij" },   // normal non-zero-match
1656             { " ",           " Abc Efg Hij" },  // leading empty str for non-zero-match
1657             { " ",           "Abc  Efg Hij" },  // non-zero-match in the middle
1658             { "(?=\\p{Lu})", "AbcEfgHij" },     // no leading empty str for zero-match
1659             { "(?=\\p{Lu})", "AbcEfg" },
1660             { "(?=\\p{Lu})", "Abc" },
1661             { " ",           "" },              // zero-length input
1662             { ".*",          "" },
1663 
1664             // some tests from PatternStreamTest.java
1665             { "4",       "awgqwefg1fefw4vssv1vvv1" },
1666             { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1667             { "1",       "awgqwefg1fefw4vssv1vvv1" },
1668             { "1",       "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1669             { "\u56da",  "1\u56da23\u56da456\u56da7890" },
1670             { "\u56da",  "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1671             { "\u56da",  "" },
1672             { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1673             { "o",       "boo:and:foo" },
1674             { "o",       "booooo:and:fooooo" },
1675             { "o",       "fooooo:" },
1676         };
1677 
1678         String[][] expected = new String[][] {
1679             { "Abc", "Efg", "Hij" },
1680             { "", "Abc", "Efg", "Hij" },
1681             { "Abc", "", "Efg", "Hij" },
1682             { "Abc", "Efg", "Hij" },
1683             { "Abc", "Efg" },
1684             { "Abc" },
1685             { "" },
1686             { "" },
1687 
1688             { "awgqwefg1fefw", "vssv1vvv1" },
1689             { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1690             { "awgqwefg", "fefw4vssv", "vvv" },
1691             { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1692             { "1", "23", "456", "7890" },
1693             { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1694             { "" },
1695             { "This", "is", "testing", "", "with", "different", "separators" },
1696             { "b", "", ":and:f" },
1697             { "b", "", "", "", "", ":and:f" },
1698             { "f", "", "", "", "", ":" },
1699         };
1700         for (int i = 0; i < input.length; i++) {
1701             pattern = Pattern.compile(input[i][0]);
1702             assertTrue(Arrays.equals(pattern.split(input[i][1]), expected[i]));
1703 
1704             assertFalse(input[i][1].length() > 0 &&  // splitAsStream() return empty resulting
1705                                              // array for zero-length input for now
1706                 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1707                                expected[i]));
1708         }
1709     }
1710 
1711     @Test
negationTest()1712     public static void negationTest() {
1713         Pattern pattern = Pattern.compile("[\\[@^]+");
1714         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1715         assertTrue(matcher.find());
1716         assertEquals(matcher.group(0), "@@@@[[[[^^^^");
1717 
1718         pattern = Pattern.compile("[@\\[^]+");
1719         matcher = pattern.matcher("@@@@[[[[^^^^");
1720         assertTrue(matcher.find());
1721         assertEquals(matcher.group(0), "@@@@[[[[^^^^");
1722 
1723         pattern = Pattern.compile("[@\\[^@]+");
1724         matcher = pattern.matcher("@@@@[[[[^^^^");
1725         assertTrue(matcher.find());
1726         assertEquals(matcher.group(0), "@@@@[[[[^^^^");
1727 
1728         pattern = Pattern.compile("\\)");
1729         matcher = pattern.matcher("xxx)xxx");
1730         assertTrue(matcher.find());
1731     }
1732 
1733     @Test
ampersandTest()1734     public static void ampersandTest() {
1735         Pattern pattern = Pattern.compile("[&@]+");
1736         check(pattern, "@@@@&&&&", true);
1737 
1738         pattern = Pattern.compile("[@&]+");
1739         check(pattern, "@@@@&&&&", true);
1740 
1741         pattern = Pattern.compile("[@\\&]+");
1742         check(pattern, "@@@@&&&&", true);
1743     }
1744 
1745     @Test
octalTest()1746     public static void octalTest() {
1747         Pattern pattern = Pattern.compile("\\u0007");
1748         Matcher matcher = pattern.matcher("\u0007");
1749         assertTrue(matcher.matches());
1750         pattern = Pattern.compile("\\07");
1751         matcher = pattern.matcher("\u0007");
1752         assertTrue(matcher.matches());
1753         pattern = Pattern.compile("\\007");
1754         matcher = pattern.matcher("\u0007");
1755         assertTrue(matcher.matches());
1756         pattern = Pattern.compile("\\0007");
1757         matcher = pattern.matcher("\u0007");
1758         assertTrue(matcher.matches());
1759         pattern = Pattern.compile("\\040");
1760         matcher = pattern.matcher("\u0020");
1761         assertTrue(matcher.matches());
1762         pattern = Pattern.compile("\\0403");
1763         matcher = pattern.matcher("\u00203");
1764         assertTrue(matcher.matches());
1765         pattern = Pattern.compile("\\0103");
1766         matcher = pattern.matcher("\u0043");
1767         assertTrue(matcher.matches());
1768     }
1769 
1770     @Test
longPatternTest()1771     public static void longPatternTest() {
1772         try {
1773             Pattern.compile(
1774                 "a 32-character-long pattern xxxx");
1775             Pattern.compile("a 33-character-long pattern xxxxx");
1776             Pattern.compile("a thirty four character long regex");
1777             StringBuilder patternToBe = new StringBuilder(101);
1778             for (int i=0; i<100; i++)
1779                 patternToBe.append((char)(97 + i%26));
1780             Pattern.compile(patternToBe.toString());
1781         } catch (PatternSyntaxException e) {
1782             fail();
1783         }
1784 
1785         // Supplementary character test
1786         try {
1787             Pattern.compile(
1788                 toSupplementaries("a 32-character-long pattern xxxx"));
1789             Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1790             Pattern.compile(toSupplementaries("a thirty four character long regex"));
1791             StringBuilder patternToBe = new StringBuilder(101*2);
1792             for (int i=0; i<100; i++)
1793                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1794                                                      + 97 + i%26));
1795             Pattern.compile(patternToBe.toString());
1796         } catch (PatternSyntaxException e) {
1797             fail();
1798         }
1799     }
1800 
1801     @Test
group0Test()1802     public static void group0Test() {
1803         Pattern pattern = Pattern.compile("(tes)ting");
1804         Matcher matcher = pattern.matcher("testing");
1805         check(matcher, "testing");
1806 
1807         matcher.reset("testing");
1808         assertTrue(matcher.lookingAt());
1809         assertEquals(matcher.group(0), "testing");
1810 
1811         matcher.reset("testing");
1812         assertTrue(matcher.matches());
1813         assertEquals(matcher.group(0), "testing");
1814 
1815         pattern = Pattern.compile("(tes)ting");
1816         matcher = pattern.matcher("testing");
1817         assertTrue(matcher.lookingAt());
1818         assertEquals(matcher.group(0), "testing");
1819 
1820         pattern = Pattern.compile("^(tes)ting");
1821         matcher = pattern.matcher("testing");
1822         assertTrue(matcher.matches());
1823         assertEquals(matcher.group(0), "testing");
1824 
1825         // Supplementary character test
1826         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1827         matcher = pattern.matcher(toSupplementaries("testing"));
1828         check(matcher, toSupplementaries("testing"));
1829 
1830         matcher.reset(toSupplementaries("testing"));
1831         assertTrue(matcher.lookingAt());
1832         assertEquals(matcher.group(0), toSupplementaries("testing"));
1833 
1834         matcher.reset(toSupplementaries("testing"));
1835         assertTrue(matcher.matches());
1836         assertEquals(matcher.group(0), toSupplementaries("testing"));
1837 
1838         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1839         matcher = pattern.matcher(toSupplementaries("testing"));
1840         assertTrue(matcher.lookingAt());
1841         assertEquals(matcher.group(0), toSupplementaries("testing"));
1842 
1843         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1844         matcher = pattern.matcher(toSupplementaries("testing"));
1845 
1846         assertTrue(matcher.matches());
1847         assertEquals(matcher.group(0), toSupplementaries("testing"));
1848     }
1849 
1850     @Test
findIntTest()1851     public static void findIntTest() {
1852         Pattern p = Pattern.compile("blah");
1853         Matcher m = p.matcher("zzzzblahzzzzzblah");
1854         boolean result = m.find(2);
1855 
1856         assertTrue(result);
1857 
1858         final Pattern p2 = Pattern.compile("$");
1859         final Matcher m2 = p2.matcher("1234567890");
1860         result = m2.find(10);
1861         assertTrue(result);
1862         assertThrows(IndexOutOfBoundsException.class, () -> m2.find(11));
1863 
1864         // Supplementary character test
1865         p = Pattern.compile(toSupplementaries("blah"));
1866         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1867         result = m.find(2);
1868         assertTrue(result);
1869     }
1870 
1871     @Test
emptyPatternTest()1872     public static void emptyPatternTest() {
1873         Pattern p = Pattern.compile("");
1874         final Matcher m = p.matcher("foo");
1875 
1876         // Should find empty pattern at beginning of input
1877         boolean result = m.find();
1878         assertTrue(result);
1879         assertEquals(m.start(), 0);
1880 
1881         // Should not match entire input if input is not empty
1882         m.reset();
1883         result = m.matches();
1884         assertFalse(result);
1885 
1886         assertThrows(IllegalStateException.class, () -> m.start(0));
1887 
1888         // Should match entire input if input is empty
1889         m.reset("");
1890         result = m.matches();
1891         assertTrue(result);
1892 
1893         result = Pattern.matches("", "");
1894         assertTrue(result);
1895 
1896         result = Pattern.matches("", "foo");
1897         assertFalse(result);
1898     }
1899 
1900     @Test
charClassTest()1901     public static void charClassTest() {
1902         Pattern pattern = Pattern.compile("blah[ab]]blech");
1903         check(pattern, "blahb]blech", true);
1904 
1905         pattern = Pattern.compile("[abc[def]]");
1906         check(pattern, "b", true);
1907 
1908         // Supplementary character tests
1909         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
1910         check(pattern, toSupplementaries("blahb]blech"), true);
1911 
1912         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
1913         check(pattern, toSupplementaries("b"), true);
1914 
1915         // u00ff when UNICODE_CASE
1916         pattern = Pattern.compile("[ab\u00ffcd]",
1917                                   Pattern.CASE_INSENSITIVE|
1918                                   Pattern.UNICODE_CASE);
1919         check(pattern, "ab\u00ffcd", true);
1920         check(pattern, "Ab\u0178Cd", true);
1921 
1922         // u00b5 when UNICODE_CASE
1923         pattern = Pattern.compile("[ab\u00b5cd]",
1924                                   Pattern.CASE_INSENSITIVE|
1925                                   Pattern.UNICODE_CASE);
1926         check(pattern, "ab\u00b5cd", true);
1927         check(pattern, "Ab\u039cCd", true);
1928 
1929         /* Special cases
1930            (1)LatinSmallLetterLongS u+017f
1931            (2)LatinSmallLetterDotlessI u+0131
1932            (3)LatineCapitalLetterIWithDotAbove u+0130
1933            (4)KelvinSign u+212a
1934            (5)AngstromSign u+212b
1935         */
1936         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1937         // Android-changed: Modified the input and output to match Unicode 14.0
1938         // See https://www.unicode.org/Public/14.0.0/ucd/CaseFolding.txt
1939         // pattern = Pattern.compile("[sik\u00c5]+", flags);
1940         // assertTrue(pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches());
1941         pattern = Pattern.compile("[sk\u00e5]+", flags);
1942         assertTrue(pattern.matcher("\u017f\u212a\u212b").matches());
1943 
1944     }
1945 
1946     @Test
caretTest()1947     public static void caretTest() {
1948         Pattern pattern = Pattern.compile("\\w*");
1949         Matcher matcher = pattern.matcher("a#bc#def##g");
1950         check(matcher, "a");
1951         check(matcher, "");
1952         check(matcher, "bc");
1953         check(matcher, "");
1954         check(matcher, "def");
1955         check(matcher, "");
1956         check(matcher, "");
1957         check(matcher, "g");
1958         check(matcher, "");
1959         assertFalse(matcher.find());
1960 
1961         pattern = Pattern.compile("^\\w*");
1962         matcher = pattern.matcher("a#bc#def##g");
1963         check(matcher, "a");
1964         assertFalse(matcher.find());
1965 
1966         pattern = Pattern.compile("\\w");
1967         matcher = pattern.matcher("abc##x");
1968         check(matcher, "a");
1969         check(matcher, "b");
1970         check(matcher, "c");
1971         check(matcher, "x");
1972         assertFalse(matcher.find());
1973 
1974         pattern = Pattern.compile("^\\w");
1975         matcher = pattern.matcher("abc##x");
1976         check(matcher, "a");
1977         assertFalse(matcher.find());
1978 
1979         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
1980         matcher = pattern.matcher("abcdef-ghi\njklmno");
1981         check(matcher, "abc");
1982         assertFalse(matcher.find());
1983 
1984         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
1985         matcher = pattern.matcher("abcdef-ghi\njklmno");
1986         check(matcher, "abc");
1987         check(matcher, "jkl");
1988         assertFalse(matcher.find());
1989 
1990         pattern = Pattern.compile("^", Pattern.MULTILINE);
1991         matcher = pattern.matcher("this is some text");
1992         String result = matcher.replaceAll("X");
1993         assertEquals(result, "Xthis is some text");
1994 
1995         pattern = Pattern.compile("^");
1996         matcher = pattern.matcher("this is some text");
1997         result = matcher.replaceAll("X");
1998         assertEquals(result, "Xthis is some text");
1999 
2000         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2001         matcher = pattern.matcher("this is some text\n");
2002         result = matcher.replaceAll("X");
2003         // Android-changed: Inserting 'X' after the new line \n on Android seems correct.
2004         // assertEquals(result, "Xthis is some text\n");
2005         assertEquals(result, "Xthis is some text\nX");
2006     }
2007 
2008     @Test
groupCaptureTest()2009     public static void groupCaptureTest() {
2010         // Independent group
2011         assertThrows(IndexOutOfBoundsException.class, () -> {
2012                     Pattern pattern = Pattern.compile("x+(?>y+)z+");
2013                     Matcher matcher = pattern.matcher("xxxyyyzzz");
2014                     matcher.find();
2015                     matcher.group(1);
2016        });
2017 
2018         // Pure group
2019         assertThrows(IndexOutOfBoundsException.class, () -> {
2020             Pattern pattern = Pattern.compile("x+(?:y+)z+");
2021             Matcher matcher = pattern.matcher("xxxyyyzzz");
2022             matcher.find();
2023             String blah = matcher.group(1);
2024         });
2025 
2026         // Supplementary character tests
2027         // Independent group
2028         assertThrows(IndexOutOfBoundsException.class, () -> {
2029             Pattern pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2030             Matcher matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2031             matcher.find();
2032             String blah = matcher.group(1);
2033         });
2034 
2035         // Pure group
2036         assertThrows(IndexOutOfBoundsException.class, () -> {
2037             Pattern pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2038             Matcher matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2039             matcher.find();
2040             String blah = matcher.group(1);
2041         });
2042     }
2043 
2044     @Test
backRefTest()2045     public static void backRefTest() {
2046         Pattern pattern = Pattern.compile("(a*)bc\\1");
2047         check(pattern, "zzzaabcazzz", true);
2048 
2049         pattern = Pattern.compile("(a*)bc\\1");
2050         check(pattern, "zzzaabcaazzz", true);
2051 
2052         pattern = Pattern.compile("(abc)(def)\\1");
2053         check(pattern, "abcdefabc", true);
2054 
2055         // Android-changed: Android throws Exception at the compilation for non-existent group.
2056         // The doc says "\1 through \9 are always interpreted as back references".
2057         // pattern = Pattern.compile("(abc)(def)\\3");
2058         pattern = Pattern.compile("(abc)(def)\\2");
2059         check(pattern, "abcdefabc", false);
2060 
2061         // Android-removed: ICU4C checks the existence of the groups. ICU4C behavior sounds good,
2062         // but doesn't match the upstream javadoc.
2063         /*
2064         for (int i = 1; i < 10; i++) {
2065             // Make sure backref 1-9 are always accepted
2066             pattern = Pattern.compile("abcdef\\" + i);
2067             // and fail to match if the target group does not exit
2068             check(pattern, "abcdef", false);
2069         }
2070         */
2071 
2072         // Android-changed: Android doesn't match the upstream javadoc, but this isn't important.
2073         // The doc says "the parser will drop digits until the number is smaller or equal to the
2074         // existing number of groups or it is one digit."
2075         // pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2076         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\1\\Q1\\E");
2077         check(pattern, "abcdefghija", false);
2078         check(pattern, "abcdefghija1", true);
2079 
2080         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2081         check(pattern, "abcdefghijkk", true);
2082 
2083         pattern = Pattern.compile("(a)bcdefghij\\11");
2084         check(pattern, "abcdefghija1", true);
2085 
2086         // Supplementary character tests
2087         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2088         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2089 
2090         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2091         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2092 
2093         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2094         check(pattern, toSupplementaries("abcdefabc"), true);
2095 
2096         // Android-changed: Android doesn't allow non-existent capture group.
2097         // pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2098         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\2"));
2099         check(pattern, toSupplementaries("abcdefabc"), false);
2100         check(pattern, toSupplementaries("abcdefdef"), true);
2101 
2102         // Android-changed: Android doesn't allow non-existent capture group.
2103         // pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2104         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\1\\Q1\\E"));
2105         check(pattern, toSupplementaries("abcdefghija"), false);
2106         check(pattern, toSupplementaries("abcdefghija1"), true);
2107 
2108         // Android-changed: Android doesn't allow non-existent capture group.
2109         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2110         check(pattern, toSupplementaries("abcdefghijkk"), true);
2111     }
2112 
2113     /**
2114      * Unicode Technical Report #18, section 2.6 End of Line
2115      * There is no empty line to be matched in the sequence \u000D\u000A
2116      * but there is an empty line in the sequence \u000A\u000D.
2117      */
2118     @Test
anchorTest()2119     public static void anchorTest() {
2120         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2121         Matcher m = p.matcher("blah1\r\nblah2");
2122         m.find();
2123         m.find();
2124         assertEquals(m.group(), "blah2");
2125 
2126         m.reset("blah1\n\rblah2");
2127         m.find();
2128         m.find();
2129         m.find();
2130         assertEquals(m.group(), "blah2");
2131 
2132         // Test behavior of $ with \r\n at end of input
2133         p = Pattern.compile(".+$");
2134         m = p.matcher("blah1\r\n");
2135         assertTrue(m.find());
2136         assertEquals(m.group(), "blah1");
2137         assertFalse(m.find());
2138 
2139         // Test behavior of $ with \r\n at end of input in multiline
2140         p = Pattern.compile(".+$", Pattern.MULTILINE);
2141         m = p.matcher("blah1\r\n");
2142         assertTrue(m.find());
2143         assertFalse(m.find());
2144 
2145         // Test for $ recognition of \u0085 for bug 4527731
2146         p = Pattern.compile(".+$", Pattern.MULTILINE);
2147         m = p.matcher("blah1\u0085");
2148         assertTrue(m.find());
2149 
2150         // Supplementary character test
2151         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2152         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2153         m.find();
2154         m.find();
2155         assertEquals(m.group(), toSupplementaries("blah2"));
2156 
2157         m.reset(toSupplementaries("blah1\n\rblah2"));
2158         m.find();
2159         m.find();
2160         m.find();
2161 
2162         assertEquals(m.group(), toSupplementaries("blah2"));
2163 
2164         // Test behavior of $ with \r\n at end of input
2165         p = Pattern.compile(".+$");
2166         m = p.matcher(toSupplementaries("blah1\r\n"));
2167         assertTrue(m.find());
2168         assertEquals(m.group(), toSupplementaries("blah1"));
2169         assertFalse(m.find());
2170 
2171         // Test behavior of $ with \r\n at end of input in multiline
2172         p = Pattern.compile(".+$", Pattern.MULTILINE);
2173         m = p.matcher(toSupplementaries("blah1\r\n"));
2174         assertTrue(m.find());
2175         assertFalse(m.find());
2176 
2177         // Test for $ recognition of \u0085 for bug 4527731
2178         p = Pattern.compile(".+$", Pattern.MULTILINE);
2179         m = p.matcher(toSupplementaries("blah1\u0085"));
2180         assertTrue(m.find());
2181     }
2182 
2183     /**
2184      * A basic sanity test of Matcher.lookingAt().
2185      */
2186     @Test
lookingAtTest()2187     public static void lookingAtTest() {
2188         Pattern p = Pattern.compile("(ab)(c*)");
2189         Matcher m = p.matcher("abccczzzabcczzzabccc");
2190 
2191         assertTrue(m.lookingAt());
2192 
2193         assertEquals(m.group(), m.group(0));
2194 
2195         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2196         assertFalse(m.lookingAt());
2197 
2198         // Supplementary character test
2199         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2200         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2201 
2202         assertTrue(m.lookingAt());
2203 
2204         assertEquals(m.group(), m.group(0));
2205 
2206         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2207         assertFalse(m.lookingAt());
2208     }
2209 
2210     /**
2211      * A basic sanity test of Matcher.matches().
2212      */
2213     @Test
matchesTest()2214     public static void matchesTest() {
2215         // matches()
2216         Pattern p = Pattern.compile("ulb(c*)");
2217         Matcher m = p.matcher("ulbcccccc");
2218         assertTrue(m.matches());
2219 
2220         // find() but not matches()
2221         m.reset("zzzulbcccccc");
2222         assertFalse(m.matches());
2223 
2224         // lookingAt() but not matches()
2225         m.reset("ulbccccccdef");
2226         assertFalse(m.matches());
2227 
2228         // matches()
2229         p = Pattern.compile("a|ad");
2230         m = p.matcher("ad");
2231         assertTrue(m.matches());
2232 
2233         // Supplementary character test
2234         // matches()
2235         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2236         m = p.matcher(toSupplementaries("ulbcccccc"));
2237         assertTrue(m.matches());
2238 
2239         // find() but not matches()
2240         m.reset(toSupplementaries("zzzulbcccccc"));
2241         assertFalse(m.matches());
2242 
2243         // lookingAt() but not matches()
2244         m.reset(toSupplementaries("ulbccccccdef"));
2245         assertFalse(m.matches());
2246 
2247         // matches()
2248         p = Pattern.compile(toSupplementaries("a|ad"));
2249         m = p.matcher(toSupplementaries("ad"));
2250         assertTrue(m.matches());
2251     }
2252 
2253     /**
2254      * A basic sanity test of Pattern.matches().
2255      */
2256     @Test
patternMatchesTest()2257     public static void patternMatchesTest() {
2258         // matches()
2259         assertTrue(Pattern.matches(toSupplementaries("ulb(c*)"),
2260                                     toSupplementaries("ulbcccccc")));
2261 
2262         // find() but not matches()
2263         assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"),
2264                                     toSupplementaries("zzzulbcccccc")));
2265 
2266         // lookingAt() but not matches()
2267         assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"),
2268                                    toSupplementaries("ulbccccccdef")));
2269 
2270         // Supplementary character test
2271         // matches()
2272         assertTrue(Pattern.matches(toSupplementaries("ulb(c*)"),
2273                                    toSupplementaries("ulbcccccc")));
2274 
2275         // find() but not matches()
2276         assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"),
2277                                     toSupplementaries("zzzulbcccccc")));
2278 
2279         // lookingAt() but not matches()
2280         assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"),
2281                                     toSupplementaries("ulbccccccdef")));
2282     }
2283 
2284     /**
2285      * Canonical equivalence testing. Tests the ability of the engine
2286      * to match sequences that are not explicitly specified in the
2287      * pattern when they are considered equivalent by the Unicode Standard.
2288      */
2289     // Android-changed: Disable the test because CANON_EQ isn't supported on Android.
2290     @Test(enabled = false)
ceTest()2291     public static void ceTest() {
2292         // Decomposed char outside char classes
2293         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2294         Matcher m = p.matcher("test\u00e5");
2295         assertTrue(m.matches());
2296 
2297         m.reset("testa\u030a");
2298         assertTrue(m.matches());
2299 
2300         // Composed char outside char classes
2301         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2302         m = p.matcher("test\u00e5");
2303         assertTrue(m.matches());
2304 
2305         m.reset("testa\u030a");
2306         assertTrue(m.find());
2307 
2308         // Decomposed char inside a char class
2309         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2310         m = p.matcher("test\u00e5");
2311         assertTrue(m.find());
2312 
2313         m.reset("testa\u030a");
2314         assertTrue(m.find());
2315 
2316         // Composed char inside a char class
2317         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2318         m = p.matcher("test\u00e5");
2319         assertTrue(m.find());
2320 
2321         m.reset("testa\u0300");
2322         assertTrue(m.find());
2323 
2324         m.reset("testa\u030a");
2325         assertTrue(m.find());
2326 
2327         // Marks that cannot legally change order and be equivalent
2328         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2329         check(p, "testa\u0308\u0300", true);
2330         check(p, "testa\u0300\u0308", false);
2331 
2332         // Marks that can legally change order and be equivalent
2333         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2334         check(p, "testa\u0308\u0323", true);
2335         check(p, "testa\u0323\u0308", true);
2336 
2337         // Test all equivalences of the sequence a\u0308\u0323\u0300
2338         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2339         check(p, "testa\u0308\u0323\u0300", true);
2340         check(p, "testa\u0323\u0308\u0300", true);
2341         check(p, "testa\u0308\u0300\u0323", true);
2342         check(p, "test\u00e4\u0323\u0300", true);
2343         check(p, "test\u00e4\u0300\u0323", true);
2344 
2345         Object[][] data = new Object[][] {
2346 
2347         // JDK-4867170
2348         { "[\u1f80-\u1f82]", "ab\u1f80cd",             "f", true },
2349         { "[\u1f80-\u1f82]", "ab\u1f81cd",             "f", true },
2350         { "[\u1f80-\u1f82]", "ab\u1f82cd",             "f", true },
2351         { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2352         { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2353         { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd",       "f", true },
2354         { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd",       "f", true },
2355 
2356         { "\\p{IsGreek}",    "ab\u1f80cd",             "f", true },
2357         { "\\p{IsGreek}",    "ab\u1f81cd",             "f", true },
2358         { "\\p{IsGreek}",    "ab\u1f82cd",             "f", true },
2359         { "\\p{IsGreek}",    "ab\u03b1\u0314\u0345cd", "f", true },
2360         { "\\p{IsGreek}",    "ab\u1f01\u0345cd",       "f", true },
2361 
2362         // backtracking, force to match "\u1f80", instead of \u1f82"
2363         { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2364 
2365         { "[\\p{IsGreek}]",  "\u03b1\u0314\u0345",     "m", true },
2366         { "\\p{IsGreek}",    "\u03b1\u0314\u0345",     "m", true },
2367 
2368         { "[^\u1f80-\u1f82]","\u1f81",                 "m", false },
2369         { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345",     "m", false },
2370         { "[^\u1f01\u0345]", "\u1f81",                 "f", false },
2371 
2372         { "[^\u1f81]+",      "\u1f80\u1f82",           "f", true },
2373         { "[\u1f80]",        "ab\u1f80cd",             "f", true },
2374         { "\u1f80",          "ab\u1f80cd",             "f", true },
2375         { "\u1f00\u0345\u0300",  "\u1f82", "m", true },
2376         { "\u1f80",          "-\u1f00\u0345\u0300-",   "f", true },
2377         { "\u1f82",          "\u1f00\u0345\u0300",     "m", true },
2378         { "\u1f82",          "\u1f80\u0300",           "m", true },
2379 
2380         // JDK-7080302       # compile failed
2381         { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2382 
2383         // JDK-6728861, same cause as above one
2384         { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2385 
2386         // JDK-6995635
2387         { "(\u00e9)", "e\u0301", "m", true },
2388 
2389         // JDK-6736245
2390         // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2391         { "\u2ADC", "\u2ADC", "m", true},          // NFC
2392         { "\u2ADC", "\u2ADD\u0338", "m", true},    // NFD
2393 
2394         //  4916384.
2395         // Decomposed hangul (jamos) works inside clazz
2396         { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2397         { "[\u1100\u1161]", "\uac00", "m", true},
2398 
2399         { "[\uac00]", "\u1100\u1161", "m", true},
2400         { "[\uac00]", "\uac00", "m", true},
2401 
2402         // Decomposed hangul (jamos)
2403         { "\u1100\u1161", "\u1100\u1161", "m", true},
2404         { "\u1100\u1161", "\uac00", "m", true},
2405 
2406         // Composed hangul
2407         { "\uac00",  "\u1100\u1161", "m", true },
2408         { "\uac00",  "\uac00", "m", true },
2409 
2410         /* Need a NFDSlice to nfd the source to solve this issue
2411            u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f>  -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2412            u+1d1bc -> nfd: <u+1d1ba><u+1d165>           -> nfc: <u+1d1ba><u+1d165>
2413            <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2414 
2415         // Decomposed supplementary outside char classes
2416         // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2417         // Composed supplementary outside char classes
2418         // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2419         */
2420         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2421         //{ "test\ud834\uddc0",             "test\ud834\uddbc\ud834\udd6f", "m", true }, //problem
2422 
2423         { "test\ud834\uddc0",             "test\ud834\uddc0",             "m", true },
2424         //{ "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0",             "m", true }, //problem
2425         };
2426 
2427         for (Object[] d : data) {
2428             String pn = (String)d[0];
2429             String tt = (String)d[1];
2430             boolean isFind = "f".equals((d[2]));
2431             boolean expected = (boolean)d[3];
2432             boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2433                                  : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2434             if (ret != expected) {
2435                 fail("pn: " + pn + "\ntt: " + tt + "\nexpected: " + expected + "\nret: " + ret);
2436             }
2437         }
2438     }
2439 
2440     /**
2441      * A basic sanity test of Matcher.replaceAll().
2442      */
2443     @Test
globalSubstitute()2444     public static void globalSubstitute() {
2445         // Global substitution with a literal
2446         Pattern p = Pattern.compile("(ab)(c*)");
2447         Matcher m = p.matcher("abccczzzabcczzzabccc");
2448         assertEquals(m.replaceAll("test"), "testzzztestzzztest");
2449 
2450         m.reset("zzzabccczzzabcczzzabccczzz");
2451         assertEquals(m.replaceAll("test"), "zzztestzzztestzzztestzzz");
2452 
2453         // Global substitution with groups
2454         m.reset("zzzabccczzzabcczzzabccczzz");
2455         String result = m.replaceAll("$1");
2456         assertEquals(result, "zzzabzzzabzzzabzzz");
2457 
2458         // Supplementary character test
2459         // Global substitution with a literal
2460         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2461         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2462         assertEquals(m.replaceAll(toSupplementaries("test")),
2463                                   toSupplementaries("testzzztestzzztest"));
2464 
2465         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2466         assertEquals(m.replaceAll(toSupplementaries("test")),
2467                               toSupplementaries("zzztestzzztestzzztestzzz"));
2468 
2469         // Global substitution with groups
2470         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2471         result = m.replaceAll("$1");
2472         assertEquals(result,toSupplementaries("zzzabzzzabzzzabzzz"));
2473     }
2474 
2475     /**
2476      * Tests the usage of Matcher.appendReplacement() with literal
2477      * and group substitutions.
2478      */
2479     @Test
stringBufferSubstituteLiteral()2480     public static void stringBufferSubstituteLiteral() {
2481         // SB substitution with literal
2482         final String blah = "zzzblahzzz";
2483         final Pattern p = Pattern.compile("blah");
2484         final Matcher m = p.matcher(blah);
2485         final StringBuffer result = new StringBuffer();
2486 
2487         assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "blech"));
2488 
2489         m.find();
2490         m.appendReplacement(result, "blech");
2491         assertEquals(result.toString(), "zzzblech");
2492 
2493         m.appendTail(result);
2494         assertEquals(result.toString(), "zzzblechzzz");
2495 
2496     }
2497 
2498     @Test
stringBufferSubtituteWithGroups()2499     public static void stringBufferSubtituteWithGroups() {
2500         // SB substitution with groups
2501         final String blah = "zzzabcdzzz";
2502         final Pattern p = Pattern.compile("(ab)(cd)*");
2503         final Matcher m = p.matcher(blah);
2504         final StringBuffer result = new StringBuffer();
2505         assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "$1"));
2506         m.find();
2507         m.appendReplacement(result, "$1");
2508         assertEquals(result.toString(), "zzzab");
2509 
2510         m.appendTail(result);
2511         assertEquals(result.toString(), "zzzabzzz");
2512     }
2513 
2514     @Test
stringBufferThreeSubstitution()2515     public static void stringBufferThreeSubstitution() {
2516         // SB substitution with 3 groups
2517         final String blah = "zzzabcdcdefzzz";
2518         final Pattern p = Pattern.compile("(ab)(cd)*(ef)");
2519         final Matcher m = p.matcher(blah);
2520         final StringBuffer result = new StringBuffer();
2521         assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "$1w$2w$3"));
2522         m.find();
2523         m.appendReplacement(result, "$1w$2w$3");
2524         assertEquals(result.toString(), "zzzabwcdwef");
2525 
2526         m.appendTail(result);
2527         assertEquals(result.toString(), "zzzabwcdwefzzz");
2528 
2529     }
2530 
2531     @Test
stringBufferSubstituteGroupsThreeMatches()2532     public static void stringBufferSubstituteGroupsThreeMatches() {
2533         // SB substitution with groups and three matches
2534         // skipping middle match
2535         final String blah = "zzzabcdzzzabcddzzzabcdzzz";
2536         final Pattern p = Pattern.compile("(ab)(cd*)");
2537         final Matcher m = p.matcher(blah);
2538         final StringBuffer result = new StringBuffer();
2539         assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "$1"));
2540 
2541         m.find();
2542         m.appendReplacement(result, "$1");
2543         assertEquals(result.toString(), "zzzab");
2544 
2545         m.find();
2546         m.find();
2547         m.appendReplacement(result, "$2");
2548         assertEquals(result.toString(), "zzzabzzzabcddzzzcd");
2549 
2550         m.appendTail(result);
2551         assertEquals(result.toString(), "zzzabzzzabcddzzzcdzzz");
2552 
2553 
2554     }
2555 
2556     @Test
stringBufferEscapedDollar()2557     public static void stringBufferEscapedDollar() {
2558         // Check to make sure escaped $ is ignored
2559         String blah = "zzzabcdcdefzzz";
2560         Pattern p = Pattern.compile("(ab)(cd)*(ef)");
2561         Matcher m = p.matcher(blah);
2562         StringBuffer result = new StringBuffer();
2563         m.find();
2564         m.appendReplacement(result, "$1w\\$2w$3");
2565         assertEquals(result.toString(), "zzzabw$2wef");
2566 
2567         m.appendTail(result);
2568         assertEquals(result.toString(), "zzzabw$2wefzzz");
2569     }
2570 
2571     @Test
stringBufferNonExistentGroup()2572     public static void stringBufferNonExistentGroup() {
2573         // Check to make sure a reference to nonexistent group causes error
2574         final String blah = "zzzabcdcdefzzz";
2575         final Pattern p = Pattern.compile("(ab)(cd)*(ef)");
2576         final Matcher m = p.matcher(blah);
2577         final StringBuffer result = new StringBuffer();
2578         m.find();
2579         assertThrows(IndexOutOfBoundsException.class,
2580                 () -> m.appendReplacement(result, "$1w$5w$3"));
2581     }
2582 
2583     @Test
stringBufferCheckDoubleDigitGroupReferences()2584     public static void stringBufferCheckDoubleDigitGroupReferences() {
2585 
2586         // Check double digit group references
2587         String blah = "zzz123456789101112zzz";
2588         Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2589         Matcher m = p.matcher(blah);
2590         StringBuffer result = new StringBuffer();
2591         m.find();
2592         m.appendReplacement(result, "$1w$11w$3");
2593         assertEquals(result.toString(), "zzz1w11w3");
2594 
2595     }
2596 
2597     @Test
stringBufferBackoff()2598     public static void stringBufferBackoff() {
2599         // Check to make sure it backs off $15 to $1 if only three groups
2600         String blah = "zzzabcdcdefzzz";
2601         Pattern p = Pattern.compile("(ab)(cd)*(ef)");
2602         Matcher m = p.matcher(blah);
2603         StringBuffer result = new StringBuffer();
2604         m.find();
2605         m.appendReplacement(result, "$1w$15w$3");
2606         assertEquals(result.toString(), "zzzabwab5wef");
2607     }
2608 
2609     @Test
stringBufferSupplementaryCharacter()2610     public static void stringBufferSupplementaryCharacter(){
2611         // Supplementary character test
2612         // SB substitution with literal
2613         final String blah = toSupplementaries("zzzblahzzz");
2614         final Pattern p = Pattern.compile(toSupplementaries("blah"));
2615         final Matcher m = p.matcher(blah);
2616         final StringBuffer result = new StringBuffer();
2617         assertThrows(IllegalStateException.class,
2618                 () -> m.appendReplacement(result, toSupplementaries("blech")));
2619         m.find();
2620         m.appendReplacement(result, toSupplementaries("blech"));
2621         assertEquals(result.toString(), toSupplementaries("zzzblech"));
2622 
2623         m.appendTail(result);
2624         assertEquals(result.toString(), toSupplementaries("zzzblechzzz"));
2625     }
2626 
2627     @Test
stringBufferSubstitutionWithGroups()2628     public static void stringBufferSubstitutionWithGroups() {
2629         // SB substitution with groups
2630         final String blah = toSupplementaries("zzzabcdzzz");
2631         final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2632         final Matcher m = p.matcher(blah);
2633         final StringBuffer result = new StringBuffer();
2634         assertThrows(IllegalStateException.class,
2635                 () -> m.appendReplacement(result, "$1"));
2636         m.find();
2637         m.appendReplacement(result, "$1");
2638         assertEquals(result.toString(), toSupplementaries("zzzab"));
2639 
2640         m.appendTail(result);
2641         assertEquals(result.toString(), toSupplementaries("zzzabzzz"));
2642     }
2643 
2644     @Test
stringBufferSubstituteWithThreeGroups()2645     public static void stringBufferSubstituteWithThreeGroups() {
2646         // SB substitution with 3 groups
2647         final String blah = toSupplementaries("zzzabcdcdefzzz");
2648         final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2649         final Matcher m = p.matcher(blah);
2650         final StringBuffer result = new StringBuffer();
2651         assertThrows(IllegalStateException.class,
2652                 () -> m.appendReplacement(result, toSupplementaries("$1w$2w$3")));
2653 
2654         m.find();
2655         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2656         assertEquals(result.toString(), toSupplementaries("zzzabwcdwef"));
2657 
2658         m.appendTail(result);
2659         assertEquals(result.toString(), toSupplementaries("zzzabwcdwefzzz"));
2660     }
2661 
2662     @Test
stringBufferWithGroupsAndThreeMatches()2663     public static void stringBufferWithGroupsAndThreeMatches() {
2664         // SB substitution with groups and three matches
2665         // skipping middle match
2666         final String blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2667         final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2668         final Matcher m = p.matcher(blah);
2669         final StringBuffer result = new StringBuffer();
2670         assertThrows(IllegalStateException.class, () ->
2671             m.appendReplacement(result, "$1"));
2672 
2673         m.find();
2674         m.appendReplacement(result, "$1");
2675         assertEquals(result.toString(), toSupplementaries("zzzab"));
2676 
2677         m.find();
2678         m.find();
2679         m.appendReplacement(result, "$2");
2680         assertEquals(result.toString(), toSupplementaries("zzzabzzzabcddzzzcd"));
2681 
2682         m.appendTail(result);
2683         assertEquals(result.toString(), toSupplementaries("zzzabzzzabcddzzzcdzzz"));
2684     }
2685 
2686     @Test
stringBufferEnsureDollarIgnored()2687     public static void stringBufferEnsureDollarIgnored() {
2688         // Check to make sure escaped $ is ignored
2689         String blah = toSupplementaries("zzzabcdcdefzzz");
2690         Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2691         Matcher m = p.matcher(blah);
2692         StringBuffer result = new StringBuffer();
2693         m.find();
2694         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2695         assertEquals(result.toString(), toSupplementaries("zzzabw$2wef"));
2696 
2697         m.appendTail(result);
2698         assertEquals(result.toString(), toSupplementaries("zzzabw$2wefzzz"));
2699     }
2700 
2701     @Test
stringBufferCheckNonexistentGroupReference()2702     public static void stringBufferCheckNonexistentGroupReference() {
2703         // Check to make sure a reference to nonexistent group causes error
2704         final String blah = toSupplementaries("zzzabcdcdefzzz");
2705         final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2706         final Matcher m = p.matcher(blah);
2707         final StringBuffer result = new StringBuffer();
2708         m.find();
2709         assertThrows(IndexOutOfBoundsException.class, () ->
2710                 m.appendReplacement(result, toSupplementaries("$1w$5w$3")));
2711     }
2712 
2713     @Test
stringBufferCheckSupplementalDoubleDigitGroupReferences()2714     public static void stringBufferCheckSupplementalDoubleDigitGroupReferences() {
2715         // Check double digit group references
2716         String blah = toSupplementaries("zzz123456789101112zzz");
2717         Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2718         Matcher m = p.matcher(blah);
2719         StringBuffer result = new StringBuffer();
2720         m.find();
2721         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2722         assertEquals(result.toString(), toSupplementaries("zzz1w11w3"));
2723     }
2724 
2725     @Test
stringBufferBackoffSupplemental()2726     public static void stringBufferBackoffSupplemental() {
2727         // Check to make sure it backs off $15 to $1 if only three groups
2728         String blah = toSupplementaries("zzzabcdcdefzzz");
2729         Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2730         Matcher m = p.matcher(blah);
2731         StringBuffer result = new StringBuffer();
2732         m.find();
2733         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2734         assertEquals(result.toString(), toSupplementaries("zzzabwab5wef"));
2735     }
2736 
2737     // Android-changed: Disable stringBufferCheckAppendException() test due to app compat behavior.
2738     // @Test
stringBufferCheckAppendException()2739     public static void stringBufferCheckAppendException() {
2740         // Check nothing has been appended into the output buffer if
2741         // the replacement string triggers IllegalArgumentException.
2742         Pattern p = Pattern.compile("(abc)");
2743         Matcher m = p.matcher("abcd");
2744         StringBuffer result = new StringBuffer();
2745         m.find();
2746         expectThrows(IllegalArgumentException.class,
2747                 () -> m.appendReplacement(result, ("xyz$g")));
2748         assertEquals(result.length(), 0);
2749 
2750     }
2751     /**
2752      * Tests the usage of Matcher.appendReplacement() with literal
2753      * and group substitutions.
2754      */
2755     @Test
stringBuilderSubstitutionWithLiteral()2756     public static void stringBuilderSubstitutionWithLiteral() {
2757         // SB substitution with literal
2758         final String blah = "zzzblahzzz";
2759         final Pattern p = Pattern.compile("blah");
2760         final Matcher m = p.matcher(blah);
2761         final StringBuilder result = new StringBuilder();
2762         assertThrows(IllegalStateException.class, () ->
2763             m.appendReplacement(result, "blech"));
2764 
2765         m.find();
2766         m.appendReplacement(result, "blech");
2767         assertEquals(result.toString(), "zzzblech");
2768 
2769         m.appendTail(result);
2770         assertEquals(result.toString(), "zzzblechzzz");
2771     }
2772 
2773     @Test
stringBuilderSubstitutionWithGroups()2774     public static void stringBuilderSubstitutionWithGroups() {
2775         // SB substitution with groups
2776         final String blah = "zzzabcdzzz";
2777         final Pattern p = Pattern.compile("(ab)(cd)*");
2778         final Matcher m = p.matcher(blah);
2779         final StringBuilder result = new StringBuilder();
2780         assertThrows(IllegalStateException.class, () ->
2781             m.appendReplacement(result, "$1"));
2782         m.find();
2783         m.appendReplacement(result, "$1");
2784         assertEquals(result.toString(), "zzzab");
2785 
2786         m.appendTail(result);
2787         assertEquals(result.toString(), "zzzabzzz");
2788     }
2789 
2790     @Test
stringBuilderSubstitutionWithThreeGroups()2791     public static void stringBuilderSubstitutionWithThreeGroups() {
2792         // SB substitution with 3 groups
2793         final String blah = "zzzabcdcdefzzz";
2794         final Pattern p = Pattern.compile("(ab)(cd)*(ef)");
2795         final Matcher m = p.matcher(blah);
2796         final StringBuilder result = new StringBuilder();
2797         assertThrows(IllegalStateException.class, () ->
2798             m.appendReplacement(result, "$1w$2w$3"));
2799 
2800         m.find();
2801         m.appendReplacement(result, "$1w$2w$3");
2802         assertEquals(result.toString(), "zzzabwcdwef");
2803 
2804         m.appendTail(result);
2805         assertEquals(result.toString(), "zzzabwcdwefzzz");
2806     }
2807 
2808     @Test
stringBuilderSubstitutionThreeMatch()2809     public static void stringBuilderSubstitutionThreeMatch() {
2810         // SB substitution with groups and three matches
2811         // skipping middle match
2812         final String blah = "zzzabcdzzzabcddzzzabcdzzz";
2813         final Pattern p = Pattern.compile("(ab)(cd*)");
2814         final Matcher m = p.matcher(blah);
2815         final StringBuilder result = new StringBuilder();
2816         assertThrows(IllegalStateException.class, () ->
2817             m.appendReplacement(result, "$1"));
2818         m.find();
2819         m.appendReplacement(result, "$1");
2820         assertEquals(result.toString(), "zzzab");
2821 
2822         m.find();
2823         m.find();
2824         m.appendReplacement(result, "$2");
2825         assertEquals(result.toString(), "zzzabzzzabcddzzzcd");
2826 
2827         m.appendTail(result);
2828         assertEquals(result.toString(), "zzzabzzzabcddzzzcdzzz");
2829     }
2830 
2831     @Test
stringBuilderSubtituteCheckEscapedDollar()2832     public static void stringBuilderSubtituteCheckEscapedDollar() {
2833         // Check to make sure escaped $ is ignored
2834         final String blah = "zzzabcdcdefzzz";
2835         final Pattern p = Pattern.compile("(ab)(cd)*(ef)");
2836         final Matcher m = p.matcher(blah);
2837         final StringBuilder result = new StringBuilder();
2838         m.find();
2839         m.appendReplacement(result, "$1w\\$2w$3");
2840         assertEquals(result.toString(), "zzzabw$2wef");
2841 
2842         m.appendTail(result);
2843         assertEquals(result.toString(), "zzzabw$2wefzzz");
2844     }
2845 
2846     @Test
stringBuilderNonexistentGroupError()2847     public static void stringBuilderNonexistentGroupError() {
2848         // Check to make sure a reference to nonexistent group causes error
2849         final String blah = "zzzabcdcdefzzz";
2850         final Pattern p = Pattern.compile("(ab)(cd)*(ef)");
2851         final Matcher m = p.matcher(blah);
2852         final StringBuilder result = new StringBuilder();
2853         m.find();
2854         assertThrows(IndexOutOfBoundsException.class, () ->
2855             m.appendReplacement(result, "$1w$5w$3"));
2856     }
2857 
2858     @Test
stringBuilderDoubleDigitGroupReferences()2859     public static void stringBuilderDoubleDigitGroupReferences() {
2860         // Check double digit group references
2861         final String blah = "zzz123456789101112zzz";
2862         final Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2863         final Matcher m = p.matcher(blah);
2864         final StringBuilder result = new StringBuilder();
2865         m.find();
2866         m.appendReplacement(result, "$1w$11w$3");
2867         assertEquals(result.toString(), "zzz1w11w3");
2868     }
2869 
2870     @Test
stringBuilderCheckBackoff()2871     public static void stringBuilderCheckBackoff() {
2872         // Check to make sure it backs off $15 to $1 if only three groups
2873         final String blah = "zzzabcdcdefzzz";
2874         final Pattern p = Pattern.compile("(ab)(cd)*(ef)");
2875         final Matcher m = p.matcher(blah);
2876         final StringBuilder result = new StringBuilder();
2877         m.find();
2878         m.appendReplacement(result, "$1w$15w$3");
2879         assertEquals(result.toString(), "zzzabwab5wef");
2880     }
2881 
2882     @Test
stringBuilderSupplementalLiteralSubstitution()2883     public static void stringBuilderSupplementalLiteralSubstitution() {
2884         // Supplementary character test
2885         // SB substitution with literal
2886         final String blah = toSupplementaries("zzzblahzzz");
2887         final Pattern p = Pattern.compile(toSupplementaries("blah"));
2888         final Matcher m = p.matcher(blah);
2889         final StringBuilder result = new StringBuilder();
2890         assertThrows(IllegalStateException.class,
2891                 () -> m.appendReplacement(result, toSupplementaries("blech")));
2892         m.find();
2893         m.appendReplacement(result, toSupplementaries("blech"));
2894         assertEquals(result.toString(), toSupplementaries("zzzblech"));
2895         m.appendTail(result);
2896         assertEquals(result.toString(), toSupplementaries("zzzblechzzz"));
2897     }
2898 
2899     @Test
stringBuilderSupplementalSubstitutionWithGroups()2900     public static void stringBuilderSupplementalSubstitutionWithGroups() {
2901         // SB substitution with groups
2902         final String blah = toSupplementaries("zzzabcdzzz");
2903         final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2904         final Matcher m = p.matcher(blah);
2905         final StringBuilder result = new StringBuilder();
2906         assertThrows(IllegalStateException.class,
2907                 () -> m.appendReplacement(result, "$1"));
2908         m.find();
2909         m.appendReplacement(result, "$1");
2910         assertEquals(result.toString(), toSupplementaries("zzzab"));
2911 
2912         m.appendTail(result);
2913         assertEquals(result.toString(), toSupplementaries("zzzabzzz"));
2914     }
2915 
2916     @Test
stringBuilderSupplementalSubstitutionThreeGroups()2917     public static void stringBuilderSupplementalSubstitutionThreeGroups() {
2918         // SB substitution with 3 groups
2919         final String blah = toSupplementaries("zzzabcdcdefzzz");
2920         final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2921         final Matcher m = p.matcher(blah);
2922         final StringBuilder result = new StringBuilder();
2923         assertThrows(IllegalStateException.class, () ->
2924             m.appendReplacement(result, toSupplementaries("$1w$2w$3")));
2925         m.find();
2926         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2927         assertEquals(result.toString(), toSupplementaries("zzzabwcdwef"));
2928 
2929         m.appendTail(result);
2930         assertEquals(result.toString(), toSupplementaries("zzzabwcdwefzzz"));
2931     }
2932 
2933     @Test
stringBuilderSubstitutionSupplementalSkipMiddleThreeMatch()2934     public static void stringBuilderSubstitutionSupplementalSkipMiddleThreeMatch() {
2935         // SB substitution with groups and three matches
2936         // skipping middle match
2937         final String blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2938         final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2939         final Matcher m = p.matcher(blah);
2940         final StringBuilder result = new StringBuilder();
2941         assertThrows(IllegalStateException.class, () ->
2942                 m.appendReplacement(result, "$1"));
2943         m.find();
2944         m.appendReplacement(result, "$1");
2945         assertEquals(result.toString(), toSupplementaries("zzzab"));
2946 
2947         m.find();
2948         m.find();
2949         m.appendReplacement(result, "$2");
2950         assertEquals(result.toString(), toSupplementaries("zzzabzzzabcddzzzcd"));
2951 
2952         m.appendTail(result);
2953         assertEquals(result.toString(), toSupplementaries("zzzabzzzabcddzzzcdzzz"));
2954     }
2955 
2956     @Test
stringBuilderSupplementalEscapedDollar()2957     public static void stringBuilderSupplementalEscapedDollar() {
2958         // Check to make sure escaped $ is ignored
2959         final String blah = toSupplementaries("zzzabcdcdefzzz");
2960         final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2961         final Matcher m = p.matcher(blah);
2962         final StringBuilder result = new StringBuilder();
2963         m.find();
2964         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2965         assertEquals(result.toString(), toSupplementaries("zzzabw$2wef"));
2966 
2967         m.appendTail(result);
2968         assertEquals(result.toString(), toSupplementaries("zzzabw$2wefzzz"));
2969     }
2970 
2971     @Test
stringBuilderSupplementalNonExistentGroupError()2972     public static void stringBuilderSupplementalNonExistentGroupError() {
2973         // Check to make sure a reference to nonexistent group causes error
2974         final String blah = toSupplementaries("zzzabcdcdefzzz");
2975         final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2976         final Matcher m = p.matcher(blah);
2977         final StringBuilder result = new StringBuilder();
2978         m.find();
2979         assertThrows(IndexOutOfBoundsException.class, () ->
2980             m.appendReplacement(result, toSupplementaries("$1w$5w$3")));
2981     }
2982 
2983     @Test
stringBuilderSupplementalCheckDoubleDigitGroupReferences()2984     public static void stringBuilderSupplementalCheckDoubleDigitGroupReferences() {
2985         // Check double digit group references
2986         final String blah = toSupplementaries("zzz123456789101112zzz");
2987         final Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2988         final Matcher m = p.matcher(blah);
2989         final StringBuilder result = new StringBuilder();
2990         m.find();
2991         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2992         assertEquals(result.toString(), toSupplementaries("zzz1w11w3"));
2993     }
2994 
2995     @Test
stringBuilderSupplementalCheckBackoff()2996     public static void stringBuilderSupplementalCheckBackoff() {
2997         // Check to make sure it backs off $15 to $1 if only three groups
2998         final String blah = toSupplementaries("zzzabcdcdefzzz");
2999         final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3000         final Matcher m = p.matcher(blah);
3001         final StringBuilder result = new StringBuilder();
3002         m.find();
3003         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3004         assertEquals(result.toString(), toSupplementaries("zzzabwab5wef"));
3005     }
3006 
3007     @Test
stringBuilderCheckIllegalArgumentException()3008     public static void stringBuilderCheckIllegalArgumentException() {
3009         // Check nothing has been appended into the output buffer if
3010         // the replacement string triggers IllegalArgumentException.
3011         final Pattern p = Pattern.compile("(abc)");
3012         final Matcher m = p.matcher("abcd");
3013         final StringBuilder result = new StringBuilder();
3014         m.find();
3015         // Android-removed: Our implementation is more lenient than upstream.
3016         // assertThrows(IllegalArgumentException.class, () ->
3017         //     m.appendReplacement(result, ("xyz$g")));
3018         // assertEquals(result.length(), 0);
3019     }
3020 
3021     /*
3022      * 5 groups of characters are created to make a substitution string.
3023      * A base string will be created including random lead chars, the
3024      * substitution string, and random trailing chars.
3025      * A pattern containing the 5 groups is searched for and replaced with:
3026      * random group + random string + random group.
3027      * The results are checked for correctness.
3028      */
3029     @Test
substitutionBasher()3030     public static void substitutionBasher() {
3031         for (int runs = 0; runs<1000; runs++) {
3032             // Create a base string to work in
3033             int leadingChars = generator.nextInt(10);
3034             StringBuilder baseBuffer = new StringBuilder(100);
3035             String leadingString = getRandomAlphaString(leadingChars);
3036             baseBuffer.append(leadingString);
3037 
3038             // Create 5 groups of random number of random chars
3039             // Create the string to substitute
3040             // Create the pattern string to search for
3041             StringBuilder bufferToSub = new StringBuilder(25);
3042             StringBuilder bufferToPat = new StringBuilder(50);
3043             String[] groups = new String[5];
3044             for(int i=0; i<5; i++) {
3045                 int aGroupSize = generator.nextInt(5)+1;
3046                 groups[i] = getRandomAlphaString(aGroupSize);
3047                 bufferToSub.append(groups[i]);
3048                 bufferToPat.append('(');
3049                 bufferToPat.append(groups[i]);
3050                 bufferToPat.append(')');
3051             }
3052             String stringToSub = bufferToSub.toString();
3053             String pattern = bufferToPat.toString();
3054 
3055             // Place sub string into working string at random index
3056             baseBuffer.append(stringToSub);
3057 
3058             // Append random chars to end
3059             int trailingChars = generator.nextInt(10);
3060             String trailingString = getRandomAlphaString(trailingChars);
3061             baseBuffer.append(trailingString);
3062             String baseString = baseBuffer.toString();
3063 
3064             // Create test pattern and matcher
3065             Pattern p = Pattern.compile(pattern);
3066             Matcher m = p.matcher(baseString);
3067 
3068             // Reject candidate if pattern happens to start early
3069             m.find();
3070             if (m.start() < leadingChars)
3071                 continue;
3072 
3073             // Reject candidate if more than one match
3074             if (m.find())
3075                 continue;
3076 
3077             // Construct a replacement string with :
3078             // random group + random string + random group
3079             StringBuilder bufferToRep = new StringBuilder();
3080             int groupIndex1 = generator.nextInt(5);
3081             bufferToRep.append("$").append(groupIndex1 + 1);
3082             String randomMidString = getRandomAlphaString(5);
3083             bufferToRep.append(randomMidString);
3084             int groupIndex2 = generator.nextInt(5);
3085             bufferToRep.append("$").append(groupIndex2 + 1);
3086             String replacement = bufferToRep.toString();
3087 
3088             // Do the replacement
3089             String result = m.replaceAll(replacement);
3090 
3091             // Construct expected result
3092             String expectedResult = leadingString +
3093                     groups[groupIndex1] +
3094                     randomMidString +
3095                     groups[groupIndex2] +
3096                     trailingString;
3097 
3098             // Check results
3099             assertEquals(result, expectedResult);
3100         }
3101     }
3102 
3103     /*
3104      * 5 groups of characters are created to make a substitution string.
3105      * A base string will be created including random lead chars, the
3106      * substitution string, and random trailing chars.
3107      * A pattern containing the 5 groups is searched for and replaced with:
3108      * random group + random string + random group.
3109      * The results are checked for correctness.
3110      */
3111     @Test
substitutionBasher2()3112     public static void substitutionBasher2() {
3113         for (int runs = 0; runs<1000; runs++) {
3114             // Create a base string to work in
3115             int leadingChars = generator.nextInt(10);
3116             StringBuilder baseBuffer = new StringBuilder(100);
3117             String leadingString = getRandomAlphaString(leadingChars);
3118             baseBuffer.append(leadingString);
3119 
3120             // Create 5 groups of random number of random chars
3121             // Create the string to substitute
3122             // Create the pattern string to search for
3123             StringBuilder bufferToSub = new StringBuilder(25);
3124             StringBuilder bufferToPat = new StringBuilder(50);
3125             String[] groups = new String[5];
3126             for(int i=0; i<5; i++) {
3127                 int aGroupSize = generator.nextInt(5)+1;
3128                 groups[i] = getRandomAlphaString(aGroupSize);
3129                 bufferToSub.append(groups[i]);
3130                 bufferToPat.append('(');
3131                 bufferToPat.append(groups[i]);
3132                 bufferToPat.append(')');
3133             }
3134             String stringToSub = bufferToSub.toString();
3135             String pattern = bufferToPat.toString();
3136 
3137             // Place sub string into working string at random index
3138             baseBuffer.append(stringToSub);
3139 
3140             // Append random chars to end
3141             int trailingChars = generator.nextInt(10);
3142             String trailingString = getRandomAlphaString(trailingChars);
3143             baseBuffer.append(trailingString);
3144             String baseString = baseBuffer.toString();
3145 
3146             // Create test pattern and matcher
3147             Pattern p = Pattern.compile(pattern);
3148             Matcher m = p.matcher(baseString);
3149 
3150             // Reject candidate if pattern happens to start early
3151             m.find();
3152             if (m.start() < leadingChars)
3153                 continue;
3154 
3155             // Reject candidate if more than one match
3156             if (m.find())
3157                 continue;
3158 
3159             // Construct a replacement string with :
3160             // random group + random string + random group
3161             StringBuilder bufferToRep = new StringBuilder();
3162             int groupIndex1 = generator.nextInt(5);
3163             bufferToRep.append("$").append(groupIndex1 + 1);
3164             String randomMidString = getRandomAlphaString(5);
3165             bufferToRep.append(randomMidString);
3166             int groupIndex2 = generator.nextInt(5);
3167             bufferToRep.append("$").append(groupIndex2 + 1);
3168             String replacement = bufferToRep.toString();
3169 
3170             // Do the replacement
3171             String result = m.replaceAll(replacement);
3172 
3173             // Construct expected result
3174             String expectedResult = leadingString +
3175                     groups[groupIndex1] +
3176                     randomMidString +
3177                     groups[groupIndex2] +
3178                     trailingString;
3179 
3180             // Check results
3181             assertEquals(result, expectedResult);
3182         }
3183     }
3184 
3185     /**
3186      * Checks the handling of some escape sequences that the Pattern
3187      * class should process instead of the java compiler. These are
3188      * not in the file because the escapes should be be processed
3189      * by the Pattern class when the regex is compiled.
3190      */
3191     @Test
escapes()3192     public static void escapes() {
3193         Pattern p = Pattern.compile("\\043");
3194         Matcher m = p.matcher("#");
3195         assertTrue(m.find());
3196 
3197         p = Pattern.compile("\\x23");
3198         m = p.matcher("#");
3199         assertTrue(m.find());
3200 
3201         p = Pattern.compile("\\u0023");
3202         m = p.matcher("#");
3203         assertTrue(m.find());
3204     }
3205 
3206     /**
3207      * Checks the handling of blank input situations. These
3208      * tests are incompatible with my test file format.
3209      */
3210     @Test
blankInput()3211     public static void blankInput() {
3212         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3213         Matcher m = p.matcher("");
3214         assertFalse(m.find());
3215 
3216         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3217         m = p.matcher("");
3218         assertTrue(m.find());
3219 
3220         p = Pattern.compile("abc");
3221         m = p.matcher("");
3222         assertFalse(m.find());
3223 
3224         p = Pattern.compile("a*");
3225         m = p.matcher("");
3226         assertTrue(m.find());
3227     }
3228 
3229     /**
3230      * Tests the Boyer-Moore pattern matching of a character sequence
3231      * on randomly generated patterns.
3232      */
3233     @Test
bm()3234     public static void bm() {
3235         doBnM('a');
3236 
3237         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3238     }
3239 
doBnM(int baseCharacter)3240     private static void doBnM(int baseCharacter) {
3241         for (int i=0; i<100; i++) {
3242             // Create a short pattern to search for
3243             int patternLength = generator.nextInt(7) + 4;
3244             StringBuilder patternBuffer = new StringBuilder(patternLength);
3245             String pattern;
3246             retry: for (;;) {
3247                 for (int x=0; x<patternLength; x++) {
3248                     int ch = baseCharacter + generator.nextInt(26);
3249                     if (Character.isSupplementaryCodePoint(ch)) {
3250                         patternBuffer.append(Character.toChars(ch));
3251                     } else {
3252                         patternBuffer.append((char)ch);
3253                     }
3254                 }
3255                 pattern = patternBuffer.toString();
3256 
3257                 // Avoid patterns that start and end with the same substring
3258                 // See JDK-6854417
3259                 for (int x=1; x < pattern.length(); x++) {
3260                     if (pattern.startsWith(pattern.substring(x)))
3261                         continue retry;
3262                 }
3263                 break;
3264             }
3265             Pattern p = Pattern.compile(pattern);
3266 
3267             // Create a buffer with random ASCII chars that does
3268             // not match the sample
3269             String toSearch;
3270             StringBuffer s;
3271             Matcher m = p.matcher("");
3272             do {
3273                 s = new StringBuffer(100);
3274                 for (int x=0; x<100; x++) {
3275                     int ch = baseCharacter + generator.nextInt(26);
3276                     if (Character.isSupplementaryCodePoint(ch)) {
3277                         s.append(Character.toChars(ch));
3278                     } else {
3279                         s.append((char)ch);
3280                     }
3281                 }
3282                 toSearch = s.toString();
3283                 m.reset(toSearch);
3284             } while (m.find());
3285 
3286             // Insert the pattern at a random spot
3287             int insertIndex = generator.nextInt(99);
3288             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3289                 insertIndex++;
3290             s.insert(insertIndex, pattern);
3291             toSearch = s.toString();
3292 
3293             // Make sure that the pattern is found
3294             m.reset(toSearch);
3295             assertTrue(m.find());
3296 
3297             // Make sure that the match text is the pattern
3298             assertEquals(m.group(), pattern);
3299 
3300             // Make sure match occured at insertion point
3301             assertEquals(m.start(), insertIndex);
3302         }
3303     }
3304 
3305     /**
3306      * Tests the matching of slices on randomly generated patterns.
3307      * The Boyer-Moore optimization is not done on these patterns
3308      * because it uses unicode case folding.
3309      */
3310     @Test
slice()3311     public static void slice() {
3312         doSlice(Character.MAX_VALUE);
3313 
3314         doSlice(Character.MAX_CODE_POINT);
3315     }
3316 
doSlice(int maxCharacter)3317     private static void doSlice(int maxCharacter) {
3318         for (int i=0; i<100; i++) {
3319             // Create a short pattern to search for
3320             int patternLength = generator.nextInt(7) + 4;
3321             StringBuilder patternBuffer = new StringBuilder(patternLength);
3322             for (int x=0; x<patternLength; x++) {
3323                 int randomChar = 0;
3324                 while (!Character.isLetterOrDigit(randomChar))
3325                     randomChar = generator.nextInt(maxCharacter);
3326                 if (Character.isSupplementaryCodePoint(randomChar)) {
3327                     patternBuffer.append(Character.toChars(randomChar));
3328                 } else {
3329                     patternBuffer.append((char) randomChar);
3330                 }
3331             }
3332             String pattern =  patternBuffer.toString();
3333             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3334 
3335             // Create a buffer with random chars that does not match the sample
3336             String toSearch = null;
3337             StringBuffer s = null;
3338             Matcher m = p.matcher("");
3339             do {
3340                 s = new StringBuffer(100);
3341                 for (int x=0; x<100; x++) {
3342                     int randomChar = 0;
3343                     while (!Character.isLetterOrDigit(randomChar))
3344                         randomChar = generator.nextInt(maxCharacter);
3345                     if (Character.isSupplementaryCodePoint(randomChar)) {
3346                         s.append(Character.toChars(randomChar));
3347                     } else {
3348                         s.append((char) randomChar);
3349                     }
3350                 }
3351                 toSearch = s.toString();
3352                 m.reset(toSearch);
3353             } while (m.find());
3354 
3355             // Insert the pattern at a random spot
3356             int insertIndex = generator.nextInt(99);
3357             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3358                 insertIndex++;
3359             s.insert(insertIndex, pattern);
3360             toSearch = s.toString();
3361 
3362             // Make sure that the pattern is found
3363             m.reset(toSearch);
3364             assertTrue(m.find());
3365 
3366             // Make sure that the match text is the pattern
3367             assertEquals(m.group(), pattern);
3368 
3369             // Make sure match occured at insertion point
3370             assertEquals(m.start(), insertIndex);
3371         }
3372     }
3373 
3374     // Testing examples from a file
3375 
3376     /**
3377      * Goes through the file "TestCases.txt" and creates many patterns
3378      * described in the file, matching the patterns against input lines in
3379      * the file, and comparing the results against the correct results
3380      * also found in the file. The file format is described in comments
3381      * at the head of the file.
3382      */
processFile(String fileName)3383     public static void processFile(String fileName) throws IOException {
3384         // Android-changed: Use resources instead of "test.src" property.
3385         // File testCases = new File(System.getProperty("test.src", "."),
3386         //                           fileName);
3387         // FileInputStream in = new FileInputStream(testCases);
3388         InputStream in = RegExTest.class.getResourceAsStream(fileName);
3389         assertNotNull(in);
3390         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3391 
3392         // Process next test case.
3393         String aLine;
3394         while((aLine = r.readLine()) != null) {
3395             // Read a line for pattern
3396             String patternString = grabLine(r);
3397             Pattern p = null;
3398             try {
3399                 p = compileTestPattern(patternString);
3400             } catch (PatternSyntaxException e) {
3401                 String dataString = grabLine(r);
3402                 String expectedResult = grabLine(r);
3403                 if (expectedResult.startsWith("error"))
3404                     continue;
3405                 String line1 = "----------------------------------------";
3406                 String line2 = "Pattern = " + patternString;
3407                 String line3 = "Data = " + dataString;
3408                 fail(line1 + System.lineSeparator() + line2 + System.lineSeparator() + line3 + System.lineSeparator());
3409                 continue;
3410             }
3411 
3412             // Read a line for input string
3413             String dataString = grabLine(r);
3414             Matcher m = p.matcher(dataString);
3415             StringBuilder result = new StringBuilder();
3416 
3417             // Check for IllegalStateExceptions before a match
3418             preMatchInvariants(m);
3419 
3420             boolean found = m.find();
3421 
3422             if (found)
3423                 postTrueMatchInvariants(m);
3424             else
3425                 postFalseMatchInvariants(m);
3426 
3427             if (found) {
3428                 result.append("true ");
3429                 result.append(m.group(0)).append(" ");
3430             } else {
3431                 result.append("false ");
3432             }
3433 
3434             result.append(m.groupCount());
3435 
3436             if (found) {
3437                 for (int i=1; i<m.groupCount()+1; i++)
3438                     if (m.group(i) != null)
3439                         result.append(" ").append(m.group(i));
3440             }
3441 
3442             // Read a line for the expected result
3443             String expectedResult = grabLine(r);
3444 
3445             assertEquals(result.toString(), expectedResult,
3446                 "Pattern = " + patternString +
3447                 System.lineSeparator() +
3448                 "Data = " + dataString +
3449                 System.lineSeparator() +
3450                 "Expected = " + expectedResult +
3451                 System.lineSeparator() +
3452                 "Actual   = " + result.toString());
3453         }
3454     }
3455 
preMatchInvariants(Matcher m)3456     private static void preMatchInvariants(Matcher m) {
3457         assertThrows(IllegalStateException.class, m::start);
3458         assertThrows(IllegalStateException.class, m::end);
3459         assertThrows(IllegalStateException.class, m::group);
3460     }
3461 
postFalseMatchInvariants(Matcher m)3462     private static void postFalseMatchInvariants(Matcher m) {
3463         assertThrows(IllegalStateException.class, m::group);
3464         assertThrows(IllegalStateException.class, m::start);
3465         assertThrows(IllegalStateException.class, m::end);
3466     }
3467 
postTrueMatchInvariants(Matcher m)3468     private static void postTrueMatchInvariants(Matcher m) {
3469         assertEquals(m.start(), m.start(0));
3470         assertEquals(m.start(), m.start(0));
3471         assertEquals(m.group(), m.group(0));
3472         assertThrows(IndexOutOfBoundsException.class, () -> m.group(50));
3473     }
3474 
compileTestPattern(String patternString)3475     private static Pattern compileTestPattern(String patternString) {
3476         if (!patternString.startsWith("'")) {
3477             return Pattern.compile(patternString);
3478         }
3479         int break1 = patternString.lastIndexOf("'");
3480         String flagString = patternString.substring(break1+1);
3481         patternString = patternString.substring(1, break1);
3482 
3483         if (flagString.equals("i"))
3484             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3485 
3486         if (flagString.equals("m"))
3487             return Pattern.compile(patternString, Pattern.MULTILINE);
3488 
3489         return Pattern.compile(patternString);
3490     }
3491 
3492     /**
3493      * Reads a line from the input file. Keeps reading lines until a non
3494      * empty non comment line is read. If the line contains a \n then
3495      * these two characters are replaced by a newline char. If a \\uxxxx
3496      * sequence is read then the sequence is replaced by the unicode char.
3497      */
grabLine(BufferedReader r)3498     public static String grabLine(BufferedReader r) throws IOException {
3499         int index = 0;
3500         String line = r.readLine();
3501         while (line.startsWith("//") || line.length() < 1)
3502             line = r.readLine();
3503         while ((index = line.indexOf("\\n")) != -1) {
3504             StringBuilder temp = new StringBuilder(line);
3505             temp.replace(index, index+2, "\n");
3506             line = temp.toString();
3507         }
3508         while ((index = line.indexOf("\\u")) != -1) {
3509             StringBuilder temp = new StringBuilder(line);
3510             String value = temp.substring(index+2, index+6);
3511             char aChar = (char)Integer.parseInt(value, 16);
3512             String unicodeChar = "" + aChar;
3513             temp.replace(index, index+6, unicodeChar);
3514             line = temp.toString();
3515         }
3516 
3517         return line;
3518     }
3519 
3520 
3521     @Test
namedGroupCaptureTest()3522     public static void namedGroupCaptureTest() {
3523         check(Pattern.compile("x+(?<gname>y+)z+"),
3524               "xxxyyyzzz",
3525               "gname",
3526               "yyy");
3527 
3528         check(Pattern.compile("x+(?<gname8>y+)z+"),
3529               "xxxyyyzzz",
3530               "gname8",
3531               "yyy");
3532 
3533         //backref
3534         Pattern pattern = Pattern.compile("(a*)bc\\1");
3535         check(pattern, "zzzaabcazzz", true);  // found "abca"
3536 
3537         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3538               "zzzaabcaazzz", true);
3539 
3540         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3541               "abcdefabc", true);
3542 
3543         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3544               "abcdefghijkk", true);
3545 
3546         // Supplementary character tests
3547         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3548               toSupplementaries("zzzaabcazzz"), true);
3549 
3550         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3551               toSupplementaries("zzzaabcaazzz"), true);
3552 
3553         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3554               toSupplementaries("abcdefabc"), true);
3555 
3556         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3557                               "(?<gname>" +
3558                               toSupplementaries("k)") + "\\k<gname>"),
3559               toSupplementaries("abcdefghijkk"), true);
3560 
3561         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3562               "xxxyyyzzzyyy",
3563               "gname",
3564               "yyy");
3565 
3566         //replaceFirst/All
3567         checkReplaceFirst("(?<gn>ab)(c*)",
3568                           "abccczzzabcczzzabccc",
3569                           "${gn}",
3570                           "abzzzabcczzzabccc");
3571 
3572         checkReplaceAll("(?<gn>ab)(c*)",
3573                         "abccczzzabcczzzabccc",
3574                         "${gn}",
3575                         "abzzzabzzzab");
3576 
3577 
3578         checkReplaceFirst("(?<gn>ab)(c*)",
3579                           "zzzabccczzzabcczzzabccczzz",
3580                           "${gn}",
3581                           "zzzabzzzabcczzzabccczzz");
3582 
3583         checkReplaceAll("(?<gn>ab)(c*)",
3584                         "zzzabccczzzabcczzzabccczzz",
3585                         "${gn}",
3586                         "zzzabzzzabzzzabzzz");
3587 
3588         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3589                           "zzzabccczzzabcczzzabccczzz",
3590                           "${gn2}",
3591                           "zzzccczzzabcczzzabccczzz");
3592 
3593         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3594                         "zzzabccczzzabcczzzabccczzz",
3595                         "${gn2}",
3596                         "zzzccczzzcczzzccczzz");
3597 
3598         //toSupplementaries("(ab)(c*)"));
3599         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3600                            ")(?<gn2>" + toSupplementaries("c") + "*)",
3601                           toSupplementaries("abccczzzabcczzzabccc"),
3602                           "${gn1}",
3603                           toSupplementaries("abzzzabcczzzabccc"));
3604 
3605 
3606         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3607                         ")(?<gn2>" + toSupplementaries("c") + "*)",
3608                         toSupplementaries("abccczzzabcczzzabccc"),
3609                         "${gn1}",
3610                         toSupplementaries("abzzzabzzzab"));
3611 
3612         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3613                            ")(?<gn2>" + toSupplementaries("c") + "*)",
3614                           toSupplementaries("abccczzzabcczzzabccc"),
3615                           "${gn2}",
3616                           toSupplementaries("ccczzzabcczzzabccc"));
3617 
3618 
3619         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3620                         ")(?<gn2>" + toSupplementaries("c") + "*)",
3621                         toSupplementaries("abccczzzabcczzzabccc"),
3622                         "${gn2}",
3623                         toSupplementaries("ccczzzcczzzccc"));
3624 
3625         checkReplaceFirst("(?<dog>Dog)AndCat",
3626                           "zzzDogAndCatzzzDogAndCatzzz",
3627                           "${dog}",
3628                           "zzzDogzzzDogAndCatzzz");
3629 
3630 
3631         checkReplaceAll("(?<dog>Dog)AndCat",
3632                           "zzzDogAndCatzzzDogAndCatzzz",
3633                           "${dog}",
3634                           "zzzDogzzzDogzzz");
3635 
3636         // backref in Matcher & String
3637         assertTrue("abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") &&
3638                    "abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"));
3639 
3640         // negative
3641         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3642         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
3643         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
3644         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3645         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3646 
3647         Matcher iaeMatcher = Pattern.compile("(?<gname>abc)(def)").matcher("abcdef");
3648         iaeMatcher.find();
3649         assertThrows(IllegalArgumentException.class, () -> iaeMatcher.group("gnameX"));
3650         assertThrows(IllegalArgumentException.class, () -> iaeMatcher.start("gnameX"));
3651         assertThrows(IllegalArgumentException.class, () -> iaeMatcher.start("gnameX"));
3652 
3653         Matcher npeMatcher = Pattern.compile("(?<gname>abc)(def)").matcher("abcdef");
3654         npeMatcher.find();
3655         assertThrows(NullPointerException.class, () -> npeMatcher.group(null));
3656         assertThrows(NullPointerException.class, () -> npeMatcher.start(null));
3657         assertThrows(NullPointerException.class, () -> npeMatcher.end(null));
3658     }
3659 
3660     // This is for bug 6919132
3661     @Test
nonBmpClassComplementTest()3662     public static void nonBmpClassComplementTest() {
3663         Pattern p = Pattern.compile("\\P{Lu}");
3664         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3665 
3666         assertFalse(m.find() && m.start() == 1);
3667 
3668         // from a unicode category
3669         p = Pattern.compile("\\P{Lu}");
3670         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3671         assertFalse(m.find());
3672         assertTrue(m.hitEnd());
3673 
3674         // block
3675         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3676         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3677         assertFalse(m.find() && m.start() == 1);
3678 
3679         p = Pattern.compile("\\P{sc=GRANTHA}");
3680         m = p.matcher(new String(new int[] {0x11350}, 0, 1));
3681         assertFalse(m.find() && m.start() == 1);
3682     }
3683 
3684     // FIXME: Investigate if this test is worth fixing.
3685     // Android-changed: Disable the test because upstream and ICU has different Unicode versions.
3686     @Test(enabled = false)
unicodePropertiesTest()3687     public static void unicodePropertiesTest() {
3688         // different forms
3689         assertFalse(!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3690                     !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3691                     !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3692                     !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3693                     !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3694                     !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3695                     !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3696                     !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3697                     !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3698                     !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches());
3699 
3700         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
3701         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3702         Matcher lastSM  = common;
3703         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3704 
3705         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
3706         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
3707         Matcher lastBM = latin;
3708         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3709 
3710         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3711             if (cp >= 0x30000 && (cp & 0x70) == 0){
3712                 continue;  // only pick couple code points, they are the same
3713             }
3714 
3715             // Unicode Script
3716             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3717             Matcher m;
3718             String str = new String(Character.toChars(cp));
3719             if (script == lastScript) {
3720                  m = lastSM;
3721                  m.reset(str);
3722             } else {
3723                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3724             }
3725             assertTrue(m.matches());
3726 
3727             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3728             other.reset(str);
3729             assertFalse(other.matches());
3730             lastSM = m;
3731             lastScript = script;
3732 
3733             // Unicode Block
3734             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3735             if (block == null) {
3736                 //System.out.printf("Not a Block: cp=%x%n", cp);
3737                 continue;
3738             }
3739             if (block == lastBlock) {
3740                  m = lastBM;
3741                  m.reset(str);
3742             } else {
3743                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3744             }
3745             assertTrue(m.matches());
3746             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3747             other.reset(str);
3748             assertFalse(other.matches());
3749             lastBM = m;
3750             lastBlock = block;
3751         }
3752     }
3753 
3754     // FIXME: Investigate if this test is worth fixed.
3755     // Android-changed: Disable the test because upstream and ICU has different Unicode versions.
3756     @Test(enabled = false)
unicodeHexNotationTest()3757     public static void unicodeHexNotationTest() {
3758 
3759         // negative
3760         checkExpectedFail("\\x{-23}");
3761         checkExpectedFail("\\x{110000}");
3762         checkExpectedFail("\\x{}");
3763         checkExpectedFail("\\x{AB[ef]");
3764 
3765         // codepoint
3766         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
3767         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
3768         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
3769         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
3770 
3771         // in class
3772         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
3773         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3774         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
3775         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
3776         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
3777         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
3778 
3779         for (int cp = 0; cp <= 0x10FFFF; cp++) {
3780              String s = "A" + new String(Character.toChars(cp)) + "B";
3781              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3782                                              : String.format("\\u%04x\\u%04x",
3783                                                (int) Character.toChars(cp)[0],
3784                                                (int) Character.toChars(cp)[1]);
3785              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3786              assertTrue(Pattern.matches("A" + hexUTF16 + "B", s));
3787              assertTrue(Pattern.matches("A[" + hexUTF16 + "]B", s));
3788              assertTrue(Pattern.matches("A" + hexCodePoint + "B", s));
3789              assertTrue(Pattern.matches("A[" + hexCodePoint + "]B", s));
3790          }
3791     }
3792 
3793     // FIXME: Investigate if this test is worth fixed.
3794     // Android-changed: Disable the test because upstream and ICU has different Unicode versions,
3795     // and the different behavior with the UNICODE_CHARACTER_CLASS flag.
3796     @Test(enabled = false)
unicodeClassesTest()3797     public static void unicodeClassesTest() {
3798 
3799         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
3800         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
3801         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
3802         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
3803         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
3804         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
3805         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
3806         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
3807         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
3808         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
3809         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
3810         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3811         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
3812         Matcher bound  = Pattern.compile("\\b").matcher("");
3813         Matcher word   = Pattern.compile("\\w++").matcher("");
3814         // UNICODE_CHARACTER_CLASS
3815         // Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3816         // Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3817         // Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3818         // Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3819         // Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3820         // Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3821         // Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3822         // Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3823         // Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3824         // Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3825         // Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3826         // Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3827         // Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3828         // Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3829         // Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3830         Matcher lowerU  = Pattern.compile("\\p{Lower}", 0).matcher("");
3831         Matcher upperU  = Pattern.compile("\\p{Upper}", 0).matcher("");
3832         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", 0).matcher("");
3833         Matcher alphaU  = Pattern.compile("\\p{Alpha}", 0).matcher("");
3834         Matcher digitU  = Pattern.compile("\\p{Digit}", 0).matcher("");
3835         Matcher alnumU  = Pattern.compile("\\p{Alnum}", 0).matcher("");
3836         Matcher punctU  = Pattern.compile("\\p{Punct}", 0).matcher("");
3837         Matcher graphU  = Pattern.compile("\\p{Graph}", 0).matcher("");
3838         Matcher printU  = Pattern.compile("\\p{Print}", 0).matcher("");
3839         Matcher blankU  = Pattern.compile("\\p{Blank}", 0).matcher("");
3840         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", 0).matcher("");
3841         Matcher xdigitU = Pattern.compile("\\p{XDigit}", 0).matcher("");
3842         Matcher spaceU  = Pattern.compile("\\p{Space}", 0).matcher("");
3843         Matcher boundU  = Pattern.compile("\\b", 0).matcher("");
3844         Matcher wordU   = Pattern.compile("\\w", 0).matcher("");
3845         // embedded flag (?U)
3846         // Android-changed: UNICODE_CHARACTER_CLASS flag isn't supported.
3847         // Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3848         // Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3849         // Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3850         //
3851         // Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
3852         // Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3853         // Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3854         Matcher lowerEU  = Pattern.compile("\\p{Lower}", 0).matcher("");
3855         Matcher graphEU  = Pattern.compile("\\p{Graph}", 0).matcher("");
3856         Matcher wordEU   = Pattern.compile("\\w", 0).matcher("");
3857 
3858         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
3859         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", 0).matcher("");
3860         Matcher bwbEU  = Pattern.compile("\\b\\w++\\b", 0).matcher("");
3861         // properties
3862         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
3863         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
3864         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
3865         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3866         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3867         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
3868         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
3869         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3870         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3871         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
3872         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
3873         // javaMethod
3874         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
3875         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
3876         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3877         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
3878         // GC/C
3879         Matcher gcC  = Pattern.compile("\\p{C}").matcher("");
3880 
3881         for (int cp = 1; cp < 0x30000; cp++) {
3882             String str = new String(Character.toChars(cp));
3883             int type = Character.getType(cp);
3884             if (// lower
3885                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
3886                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3887                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3888                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3889                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3890                 // upper
3891                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
3892                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3893                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3894                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3895                 // alpha
3896                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
3897                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3898                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3899                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3900                 // digit
3901                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
3902                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
3903                 // alnum
3904                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
3905                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3906                 // punct
3907                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
3908                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3909                 // graph
3910                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
3911                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3912                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3913                 // blank
3914                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3915                                           != blank.reset(str).matches()  ||
3916                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3917                 // print
3918                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
3919                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3920                 // cntrl
3921                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
3922                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3923                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3924                 // hexdigit
3925                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
3926                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3927                 // space
3928                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
3929                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3930                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3931                 // word
3932                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
3933                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3934                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3935                 // bwordb
3936                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3937                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3938                 // properties
3939                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3940                 Character.isLetter(cp)    != letterP.reset(str).matches()||
3941                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3942                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3943                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
3944                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
3945                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
3946                 // gc_C
3947                 (Character.CONTROL == type || Character.FORMAT == type ||
3948                  Character.PRIVATE_USE == type || Character.SURROGATE == type ||
3949                  Character.UNASSIGNED == type)
3950                 != gcC.reset(str).matches()) {
3951                 fail();
3952             }
3953         }
3954 
3955         // bounds/word align
3956         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3957         assertTrue(bwbU.reset("\u0180sherman\u0400").matches());
3958         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3959         assertTrue(bwbU.reset("\u0180sh\u0345erman\u0400").matches());
3960         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3961         assertTrue(bwbU.reset("\u0724\u0739\u0724").matches());
3962         assertTrue(bwbEU.reset("\u0724\u0739\u0724").matches());
3963     }
3964 
3965     // FIXME: Investigate if this test is worth fixed.
3966     // Android-changed: Disable the test because upstream and ICU has different Unicode versions,
3967     // and the different behavior with the UNICODE_CHARACTER_CLASS flag.
3968     @Test(enabled = false)
unicodeCharacterNameTest()3969     public static void unicodeCharacterNameTest() {
3970 
3971         for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
3972             if (!Character.isValidCodePoint(cp) ||
3973                 Character.getType(cp) == Character.UNASSIGNED)
3974                 continue;
3975             String str = new String(Character.toChars(cp));
3976             // single
3977             String p = "\\N{" + Character.getName(cp) + "}";
3978             // Android-changed: Android fails at the compilation time.
3979             // assertTrue(Pattern.compile(p).matcher(str).matches());
3980             String p1 = p;
3981             assertThrows(PatternSyntaxException.class, () -> Pattern.compile(p1));
3982             // class[c]
3983             p = "[\\N{" + Character.getName(cp) + "}]";
3984             // Android-changed: Android fails at the compilation time.
3985             // assertTrue(Pattern.compile(p).matcher(str).matches());
3986             String p2 = p;
3987             assertThrows(PatternSyntaxException.class, () -> Pattern.compile(p2));
3988         }
3989 
3990         // range
3991         for (int i = 0; i < 10; i++) {
3992             int start = generator.nextInt(20);
3993             int end = start + generator.nextInt(200);
3994             String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
3995             String str;
3996             for (int cp = start; cp < end; cp++) {
3997                 str = new String(Character.toChars(cp));
3998 
3999                 // Android-changed: Android fails at the compilation time.
4000                 // assertTrue(Pattern.compile(p).matcher(str).matches());
4001                 assertThrows(PatternSyntaxException.class, () -> Pattern.compile(p));
4002             }
4003             str = new String(Character.toChars(end + 10));
4004             // Android-changed: Android fails at the compilation time.
4005             // assertFalse(Pattern.compile(p).matcher(str).matches());
4006             assertThrows(PatternSyntaxException.class, () -> Pattern.compile(p));
4007         }
4008 
4009         // slice
4010         for (int i = 0; i < 10; i++) {
4011             int n = generator.nextInt(256);
4012             int[] buf = new int[n];
4013             StringBuilder sb = new StringBuilder(1024);
4014             for (int j = 0; j < n; j++) {
4015                 int cp = generator.nextInt(1000);
4016                 if (!Character.isValidCodePoint(cp) ||
4017                     Character.getType(cp) == Character.UNASSIGNED)
4018                     cp = 0x4e00;    // just use 4e00
4019                 sb.append("\\N{").append(Character.getName(cp)).append("}");
4020                 buf[j] = cp;
4021             }
4022             String p = sb.toString();
4023             String str = new String(buf, 0, buf.length);
4024             assertTrue(Pattern.compile(p).matcher(str).matches());
4025         }
4026     }
4027 
4028     // FIXME: Investigate why this test. It may be an ICU4C bug treating WS characters.
4029     // Android-changed: Disable the test because upstream and ICU has different Unicode versions.
4030     @Test(enabled = false)
horizontalAndVerticalWSTest()4031     public static void horizontalAndVerticalWSTest() {
4032         String hws = new String (new char[] {
4033                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
4034                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4035                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4036                                      0x202f, 0x205f, 0x3000 });
4037         String vws = new String (new char[] {
4038                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4039         assertTrue(Pattern.compile("\\h+").matcher(hws).matches() &&
4040                    Pattern.compile("[\\h]+").matcher(hws).matches());
4041         assertTrue(!Pattern.compile("\\H").matcher(hws).find() &&
4042                    !Pattern.compile("[\\H]").matcher(hws).find());
4043         assertTrue(Pattern.compile("\\v+").matcher(vws).matches() &&
4044                    Pattern.compile("[\\v]+").matcher(vws).matches());
4045         assertTrue(!Pattern.compile("\\V").matcher(vws).find() &&
4046                    !Pattern.compile("[\\V]").matcher(vws).find());
4047         String prefix = "abcd";
4048         String suffix = "efgh";
4049         String ng = "A";
4050         for (int i = 0; i < hws.length(); i++) {
4051             String c = String.valueOf(hws.charAt(i));
4052             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4053             assertTrue(m.find() && c.equals(m.group()));
4054             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4055             assertTrue(m.find() && c.equals(m.group()));
4056 
4057             String matcherSubstring = hws.substring(0, i) + ng + hws.substring(i);
4058 
4059             m = Pattern.compile("\\H").matcher(matcherSubstring);
4060             assertTrue(m.find() && ng.equals(m.group()));
4061             m = Pattern.compile("[\\H]").matcher(matcherSubstring);
4062             assertTrue(m.find() && ng.equals(m.group()));
4063         }
4064         for (int i = 0; i < vws.length(); i++) {
4065             String c = String.valueOf(vws.charAt(i));
4066             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4067             assertTrue(m.find() && c.equals(m.group()));
4068             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4069             assertTrue(m.find() && c.equals(m.group()));
4070 
4071             String matcherSubstring = vws.substring(0, i) + ng + vws.substring(i);
4072             m = Pattern.compile("\\V").matcher(matcherSubstring);
4073             assertTrue(m.find() && ng.equals(m.group()));
4074             m = Pattern.compile("[\\V]").matcher(matcherSubstring);
4075             assertTrue(m.find() && ng.equals(m.group()));
4076         }
4077         // \v in range is interpreted as 0x0B. This is the undocumented behavior
4078         assertTrue(Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches());
4079     }
4080 
4081     @Test
linebreakTest()4082     public static void linebreakTest() {
4083         String linebreaks = new String (new char[] {
4084             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4085         String crnl = "\r\n";
4086 
4087         // Android-changed: Disable failing backtracking test.
4088         // assertTrue((Pattern.compile("\\R+").matcher(linebreaks).matches() &&
4089         //      Pattern.compile("\\R").matcher(crnl).matches() &&
4090         //      Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() &&
4091         //      Pattern.compile("\\Rabc").matcher("\rabc").matches() &&
4092         //      Pattern.compile("\\R\\R").matcher(crnl).matches() &&  // backtracking
4093         //      Pattern.compile("\\R\\n").matcher(crnl).matches()) || // backtracking
4094         //      Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()); // #8176029
4095         assertTrue(Pattern.compile("\\R+").matcher(linebreaks).matches());
4096         assertTrue(Pattern.compile("\\R").matcher(crnl).matches());
4097         assertTrue(Pattern.compile("\\Rabc").matcher(crnl + "abc").matches());
4098         assertTrue(Pattern.compile("\\Rabc").matcher("\rabc").matches());
4099         // assertTrue(Pattern.compile("\\R\\R").matcher(crnl).matches());  // backtracking
4100         // assertTrue(Pattern.compile("\\R\\n").matcher(crnl).matches()); // backtracking
4101         assertFalse(Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()); // #8176029
4102     }
4103 
4104     // #7189363
4105     @Test
branchTest()4106     public static void branchTest() {
4107         assertFalse(!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
4108                     !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4109                     !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4110                     !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
4111                     !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4112                     !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4113                     !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
4114                     !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4115                     !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4116                     !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
4117                     !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4118                     !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4119                     !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4120                     !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4121                     !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4122                     !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4123                     !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4124                     !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4125                     !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
4126                     !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4127                     !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4128                     !Pattern.compile("(a)??bc|de").matcher("de").matches());
4129     }
4130 
4131     // This test is for 8007395
4132     @Test
groupCurlyNotFoundSuppTest()4133     public static void groupCurlyNotFoundSuppTest() {
4134         String input = "test this as \ud83d\ude0d";
4135         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4136                                           "test(.)*(@[a-zA-Z.]+)",
4137                                           "test([^B])+(@[a-zA-Z.]+)",
4138                                           "test([^B])*(@[a-zA-Z.]+)",
4139                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4140                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4141                                         }) {
4142             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4143                                .matcher(input);
4144             assertFalse(m.find());
4145         }
4146     }
4147 
4148     // This test is for 8023647
4149     @Test
groupCurlyBackoffTest()4150     public static void groupCurlyBackoffTest() {
4151         assertFalse(!"abc1c".matches("(\\w)+1\\1") ||
4152                     "abc11".matches("(\\w)+1\\1"));
4153     }
4154 
4155     // This test is for 8012646
4156     @Test
patternAsPredicate()4157     public static void patternAsPredicate() {
4158         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4159 
4160         assertFalse(p.test(""));
4161         assertTrue(p.test("word"));
4162         assertFalse(p.test("1234"));
4163         assertTrue(p.test("word1234"));
4164     }
4165 
4166     // This test is for 8184692
4167     @Test
patternAsMatchPredicate()4168     public static void patternAsMatchPredicate() {
4169         Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate();
4170 
4171         assertFalse(p.test(""));
4172         assertTrue(p.test("word"));
4173         assertFalse(p.test("1234word"));
4174         assertFalse(p.test("1234"));
4175     }
4176 
4177 
4178     // This test is for 8035975
4179     @Test
invalidFlags()4180     public static void invalidFlags() {
4181         for (int flag = 1; flag != 0; flag <<= 1) {
4182             switch (flag) {
4183             case Pattern.CASE_INSENSITIVE:
4184             case Pattern.MULTILINE:
4185             case Pattern.DOTALL:
4186             case Pattern.UNICODE_CASE:
4187             case Pattern.CANON_EQ:
4188             case Pattern.UNIX_LINES:
4189             case Pattern.LITERAL:
4190             case Pattern.UNICODE_CHARACTER_CLASS:
4191             case Pattern.COMMENTS:
4192                 // valid flag, continue
4193                 break;
4194             default:
4195                 int finalFlag = flag;
4196                 assertThrows(IllegalArgumentException.class, () ->
4197                     Pattern.compile(".", finalFlag));
4198             }
4199         }
4200     }
4201 
4202     // This test is for 8158482
4203     @Test
embeddedFlags()4204     public static void embeddedFlags() {
4205             //Runs without exception.
4206             Pattern.compile("(?i).(?-i).");
4207             Pattern.compile("(?m).(?-m).");
4208             Pattern.compile("(?s).(?-s).");
4209             Pattern.compile("(?d).(?-d).");
4210             Pattern.compile("(?u).(?-u).");
4211             // Android-removed: 'c' isn't a documented flag.
4212             // Pattern.compile("(?c).(?-c).");
4213             Pattern.compile("(?x).(?-x).");
4214 
4215             // Android-removed: UNICODE_CHARACTER_CLASS and CANON_EQ flags aren't supported.
4216             // Pattern.compile("(?U).(?-U).");
4217             // Pattern.compile("(?imsducxU).(?-imsducxU).");
4218             Pattern.compile("(?imsdux).(?-imsdux).");
4219     }
4220 
4221     @Test
grapheme()4222     public static void grapheme() throws Exception {
4223         final int[] lineNumber = new int[1];
4224         // Android-changed: Use resources instead of "test.src" property.
4225         // Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST),
4226         //         Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt")))
4227         try (BufferedReader reader = new BufferedReader(new InputStreamReader(
4228                 RegExTest.class.getResourceAsStream("GraphemeTestCases.txt")))) {
4229             reader.lines().forEach( ln -> {
4230                     lineNumber[0]++;
4231                     if (ln.length() == 0 || ln.startsWith("#")) {
4232                         return;
4233                     }
4234                     ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4235                     // System.out.println(str);
4236                     String[] strs = ln.split("\u00f7|\u00d7");
4237                     StringBuilder src = new StringBuilder();
4238                     ArrayList<String> graphemes = new ArrayList<>();
4239                     StringBuilder buf = new StringBuilder();
4240                     int offBk = 0;
4241                     for (String str : strs) {
4242                         if (str.length() == 0)  // first empty str
4243                             continue;
4244                         int cp = Integer.parseInt(str, 16);
4245                         src.appendCodePoint(cp);
4246                         buf.appendCodePoint(cp);
4247                         offBk += (str.length() + 1);
4248                         if (ln.charAt(offBk) == '\u00f7') {    // DIV
4249                             graphemes.add(buf.toString());
4250                             buf = new StringBuilder();
4251                         }
4252                     }
4253                     Pattern p = Pattern.compile("\\X");
4254                     // (1) test \X directly
4255                     Matcher m = p.matcher(src.toString());
4256                     for (String g : graphemes) {
4257                         // System.out.printf("     grapheme:=[%s]%n", g);
4258                         String group = null;
4259                         if (!m.find() || !(group = m.group()).equals(g)) {
4260                                  fail("Failed pattern \\X [" + ln + "] : "
4261                                     + "expected: " + g + " - actual: " + group
4262                                     + "(line " + lineNumber[0] + ")");
4263                         }
4264                     }
4265 
4266                     // BEGIN Android-removed: ICU4C doesn't support \\b{g} yet.
4267                     /*
4268                     assertFalse(m.find());
4269                     // test \b{g} without \X via Pattern
4270                     Pattern pbg = Pattern.compile("\\b{g}");
4271                     m = pbg.matcher(src.toString());
4272                     m.find();
4273                     int prev = m.end();
4274                     for (String g : graphemes) {
4275                         String group = null;
4276                         if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) {
4277                                  fail("Failed pattern \\b{g} [" + ln + "] : "
4278                                     + "expected: " + g + " - actual: " + group
4279                                     + "(line " + lineNumber[0] + ")");
4280                         }
4281                         assertEquals("", m.group());
4282                         prev = m.end();
4283                     }
4284                     assertFalse(m.find());
4285                     // (2) test \b{g} + \X  via Scanner
4286                     Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4287                     for (String g : graphemes) {
4288                         String next = null;
4289                         if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) {
4290                                  fail("Failed \\b{g} [" + ln + "] : "
4291                                     + "expected: " + g + " - actual: " + next
4292                                     + " (line " + lineNumber[0] + ")");
4293                         }
4294                     }
4295                     assertFalse(s.hasNext(p));
4296                     // test \b{g} without \X via Scanner
4297                     s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4298                     for (String g : graphemes) {
4299                         String next = null;
4300                         if (!s.hasNext() || !(next = s.next()).equals(g)) {
4301                                  fail("Failed \\b{g} [" + ln + "] : "
4302                                     + "expected: " + g + " - actual: " + next
4303                                     + " (line " + lineNumber[0] + ")");
4304                         }
4305                     }
4306                     assertFalse(s.hasNext());
4307                     */
4308                     // END Android-removed: ICU4C doesn't support \\b{g} yet.
4309                 });
4310         }
4311         // some sanity checks
4312         // Android-changed: ICU4C doesn't support \\b{g} yet.
4313         // assertTrue(Pattern.compile("\\X{10}").matcher("abcdefghij").matches() &&
4314         //           Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() &&
4315         //           Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches());
4316         assertTrue(Pattern.compile("\\X{10}").matcher("abcdefghij").matches());
4317         // make sure "\b{n}" still works
4318 
4319         // Android-removed: ICU4C can't parse \b{1}.
4320         // FIXME: \b{1} should be treated as exactly one \b, but ICU4C fails to parse.
4321         // assertTrue(Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches());
4322     }
4323 
4324     // hangup/timeout if go into exponential backtracking
4325     // FIXME: Investigate if this exponential backtracking causes any harm or worth fixing.
4326     // Android-changed: The test hangs up on Android.
4327     @Test(enabled = false)
expoBacktracking()4328     public static void expoBacktracking() {
4329 
4330         Object[][] patternMatchers = {
4331             // 6328855
4332             { "(.*\n*)*",
4333               "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
4334               false },
4335             // 6192895
4336             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4337               "Hello World this is a test this is a test this is a test A",
4338               true },
4339             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4340               "Hello World this is a test this is a test this is a test \u4e00 ",
4341               false },
4342             { " *([a-z0-9]+ *)+",
4343               "hello world this is a test this is a test this is a test A",
4344               false },
4345             // 4771934 [FIXED] #5013651?
4346             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4347               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
4348               true },
4349             // 4866249 [FIXED]
4350             { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
4351               "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
4352               true },
4353             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4354               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
4355               false },
4356             // 6345469
4357             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4358               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; </p>",
4359               true }, // --> matched
4360             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4361               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; p </p>",
4362               false },
4363             // 5026912
4364             { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
4365               "156580451111112225588087755221111111566969655555555",
4366               false},
4367             // 6988218
4368             { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
4369               "'%)) order by ANGEBOT.ID",
4370               false},    // find
4371             // 6693451
4372             { "^(\\s*foo\\s*)*$",
4373               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
4374               true },
4375             { "^(\\s*foo\\s*)*$",
4376               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
4377               false
4378             },
4379             // 7006761
4380             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
4381             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
4382             // 8140212
4383             { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
4384               "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
4385               false
4386             },
4387             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
4388             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
4389 
4390             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
4391             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4392 
4393             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
4394             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4395 
4396             { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
4397 
4398             /* not fixed
4399             //8132141   --->    second level exponential backtracking
4400             { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
4401               "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
4402             */
4403         };
4404 
4405         for (Object[] pm : patternMatchers) {
4406             String p = (String)pm[0];
4407             String s = (String)pm[1];
4408             boolean r = (Boolean)pm[2];
4409             assertEquals(r, Pattern.compile(p).matcher(s).matches());
4410         }
4411     }
4412 
4413     @Test
invalidGroupName()4414     public static void invalidGroupName() {
4415         // Invalid start of a group name
4416         for (String groupName : List.of("", ".", "0", "\u0040", "\u005b",
4417                 "\u0060", "\u007b", "\u0416")) {
4418             for (String pat : List.of("(?<" + groupName + ">)",
4419                     "\\k<" + groupName + ">")) {
4420                 var e = expectThrows(PatternSyntaxException.class, () -> Pattern.compile(pat));
4421                 // Android-changed: Android has a different error message.
4422                 // assertTrue(e.getMessage().startsWith(
4423                 //             "capturing group name does not start with a"
4424                 //             + " Latin letter"));
4425             }
4426         }
4427         // Invalid char in a group name
4428         for (String groupName : List.of("a.", "b\u0040", "c\u005b",
4429                 "d\u0060", "e\u007b", "f\u0416")) {
4430             for (String pat : List.of("(?<" + groupName + ">)",
4431                     "\\k<" + groupName + ">")) {
4432                 var e = expectThrows(PatternSyntaxException.class, () ->
4433                     Pattern.compile(pat));
4434 
4435                     // Android-changed: Android has a different error message.
4436                     // assertTrue(e.getMessage().startsWith(
4437                     //         "named capturing group is missing trailing '>'"));
4438             }
4439         }
4440     }
4441 
4442     @Test
illegalRepetitionRange()4443     public static void illegalRepetitionRange() {
4444         // huge integers > (2^31 - 1)
4445         String n = BigInteger.valueOf(1L << 32)
4446             .toString();
4447         String m = BigInteger.valueOf(1L << 31)
4448             .add(new BigInteger(80, generator))
4449             .toString();
4450         for (String rep : List.of("", "x", ".", ",", "-1", "2,1",
4451                 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) {
4452             String pat = ".{" + rep + "}";
4453             var e = expectThrows(PatternSyntaxException.class, () ->
4454                     Pattern.compile(pat));
4455             // Android-changed: Android produces a different error message.
4456             // assertTrue(e.getMessage().startsWith("Illegal repetition"));
4457         }
4458     }
4459 
4460     // Android-changed: CANON_EQ flag isn't supported.
4461     // @Test
surrogatePairWithCanonEq()4462     public static void surrogatePairWithCanonEq() {
4463         //Runs without exception
4464         Pattern.compile("\ud834\udd21", Pattern.CANON_EQ);
4465     }
4466 
s2x(String s)4467     public static String s2x(String s) {
4468         StringBuilder sb = new StringBuilder();
4469         for (char ch : s.toCharArray()) {
4470             sb.append(String.format("\\u%04x", (int)ch));
4471         }
4472         return sb.toString();
4473     }
4474 
4475     // This test is for 8235812, with cases excluded by 8258259
4476     // FIXME: Fix this test.
4477     // Android-changed: Disable lineBreakWithQuantifier test.
4478     // @Test
lineBreakWithQuantifier()4479     public static void lineBreakWithQuantifier() {
4480         // key:    pattern
4481         // value:  lengths of input that must match the pattern
4482         Map<String, List<Integer>> cases = Map.ofEntries(
4483             Map.entry("\\R?",      List.of(0, 1)),
4484             Map.entry("\\R*",      List.of(0, 1, 2, 3)),
4485             Map.entry("\\R+",      List.of(1, 2, 3)),
4486             Map.entry("\\R{0}",    List.of(0)),
4487             Map.entry("\\R{1}",    List.of(1)),
4488 //          Map.entry("\\R{2}",    List.of(2)),            // 8258259
4489 //          Map.entry("\\R{3}",    List.of(3)),            // 8258259
4490             Map.entry("\\R{0,}",   List.of(0, 1, 2, 3)),
4491             Map.entry("\\R{1,}",   List.of(1, 2, 3)),
4492 //          Map.entry("\\R{2,}",   List.of(2, 3)),         // 8258259
4493 //          Map.entry("\\R{3,}",   List.of(3)),            // 8258259
4494             Map.entry("\\R{0,0}",  List.of(0)),
4495             Map.entry("\\R{0,1}",  List.of(0, 1)),
4496             Map.entry("\\R{0,2}",  List.of(0, 1, 2)),
4497             Map.entry("\\R{0,3}",  List.of(0, 1, 2, 3)),
4498             Map.entry("\\R{1,1}",  List.of(1)),
4499             Map.entry("\\R{1,2}",  List.of(1, 2)),
4500             Map.entry("\\R{1,3}",  List.of(1, 2, 3)),
4501 //          Map.entry("\\R{2,2}",  List.of(2)),            // 8258259
4502 //          Map.entry("\\R{2,3}",  List.of(2, 3)),         // 8258259
4503 //          Map.entry("\\R{3,3}",  List.of(3)),            // 8258259
4504             Map.entry("\\R",       List.of(1)),
4505             Map.entry("\\R\\R",    List.of(2)),
4506             Map.entry("\\R\\R\\R", List.of(3))
4507         );
4508 
4509         // key:    length of input
4510         // value:  all possible inputs of given length
4511         Map<Integer, List<String>> inputs = new HashMap<>();
4512         String[] Rs = { "\r\n", "\r", "\n",
4513                         "\u000B", "\u000C", "\u0085", "\u2028", "\u2029" };
4514         StringBuilder sb = new StringBuilder();
4515         for (int len = 0; len <= 3; ++len) {
4516             int[] idx = new int[len + 1];
4517             do {
4518                 sb.setLength(0);
4519                 for (int j = 0; j < len; ++j)
4520                     sb.append(Rs[idx[j]]);
4521                 // Android-changed: Suppress ComputeIfAbsentAmbiguousReference ErrorProne error.
4522                 // inputs.computeIfAbsent(len, ArrayList::new).add(sb.toString());
4523                 inputs.computeIfAbsent(len, l -> new ArrayList<>()).add(sb.toString());
4524                 idx[0]++;
4525                 for (int j = 0; j < len; ++j) {
4526                     if (idx[j] < Rs.length)
4527                         break;
4528                     idx[j] = 0;
4529                     idx[j+1]++;
4530                 }
4531             } while (idx[len] == 0);
4532         }
4533 
4534         // exhaustive testing
4535         for (String patStr : cases.keySet()) {
4536             Pattern[] pats = patStr.endsWith("R")
4537                 ? new Pattern[] { Pattern.compile(patStr) }  // no quantifiers
4538                 : new Pattern[] { Pattern.compile(patStr),          // greedy
4539                                   Pattern.compile(patStr + "?") };  // reluctant
4540             Matcher m = pats[0].matcher("");
4541             for (Pattern p : pats) {
4542                 m.usePattern(p);
4543                 for (int len : cases.get(patStr)) {
4544                     for (String in : inputs.get(len)) {
4545                         assertTrue(m.reset(in).matches(), "Expected to match '"
4546                                 + s2x(in) + "' =~ /" + p + "/");
4547                     }
4548                 }
4549             }
4550         }
4551     }
4552 
4553     // This test is for 8214245
4554     // FIXME: Fix this test.
4555     // Android-changed: Disable caseInsensitivePMatch test.
4556     // @Test
caseInsensitivePMatch()4557     public static void caseInsensitivePMatch() {
4558         for (String input : List.of("abcd", "AbCd", "ABCD")) {
4559             for (String pattern : List.of("abcd", "aBcD", "[a-d]{4}",
4560                     "(?:a|b|c|d){4}", "\\p{Lower}{4}", "\\p{Ll}{4}",
4561                     "\\p{IsLl}{4}", "\\p{gc=Ll}{4}",
4562                     "\\p{general_category=Ll}{4}", "\\p{IsLowercase}{4}",
4563                     "\\p{javaLowerCase}{4}", "\\p{Upper}{4}", "\\p{Lu}{4}",
4564                     "\\p{IsLu}{4}", "\\p{gc=Lu}{4}", "\\p{general_category=Lu}{4}",
4565                     "\\p{IsUppercase}{4}", "\\p{javaUpperCase}{4}",
4566                     "\\p{Lt}{4}", "\\p{IsLt}{4}", "\\p{gc=Lt}{4}",
4567                     "\\p{general_category=Lt}{4}", "\\p{IsTitlecase}{4}",
4568                     "\\p{javaTitleCase}{4}", "[\\p{Lower}]{4}", "[\\p{Ll}]{4}",
4569                     "[\\p{IsLl}]{4}", "[\\p{gc=Ll}]{4}",
4570                     "[\\p{general_category=Ll}]{4}", "[\\p{IsLowercase}]{4}",
4571                     "[\\p{javaLowerCase}]{4}", "[\\p{Upper}]{4}", "[\\p{Lu}]{4}",
4572                     "[\\p{IsLu}]{4}", "[\\p{gc=Lu}]{4}",
4573                     "[\\p{general_category=Lu}]{4}", "[\\p{IsUppercase}]{4}",
4574                     "[\\p{javaUpperCase}]{4}", "[\\p{Lt}]{4}", "[\\p{IsLt}]{4}",
4575                     "[\\p{gc=Lt}]{4}", "[\\p{general_category=Lt}]{4}",
4576                     "[\\p{IsTitlecase}]{4}", "[\\p{javaTitleCase}]{4}"))
4577             {
4578                 assertTrue(Pattern.compile(pattern, Pattern.CASE_INSENSITIVE)
4579                             .matcher(input)
4580                             .matches(),"Expected to match: " + "'" + input +
4581                         "' =~ /" + pattern + "/");
4582             }
4583         }
4584 
4585         for (String input : List.of("\u01c7", "\u01c8", "\u01c9")) {
4586             for (String pattern : List.of("\u01c7", "\u01c8", "\u01c9",
4587                     "[\u01c7\u01c8]", "[\u01c7\u01c9]", "[\u01c8\u01c9]",
4588                     "[\u01c7-\u01c8]", "[\u01c8-\u01c9]", "[\u01c7-\u01c9]",
4589                     "\\p{Lower}", "\\p{Ll}", "\\p{IsLl}", "\\p{gc=Ll}",
4590                     "\\p{general_category=Ll}", "\\p{IsLowercase}",
4591                     "\\p{javaLowerCase}", "\\p{Upper}", "\\p{Lu}",
4592                     "\\p{IsLu}", "\\p{gc=Lu}", "\\p{general_category=Lu}",
4593                     "\\p{IsUppercase}", "\\p{javaUpperCase}",
4594                     "\\p{Lt}", "\\p{IsLt}", "\\p{gc=Lt}",
4595                     "\\p{general_category=Lt}", "\\p{IsTitlecase}",
4596                     "\\p{javaTitleCase}", "[\\p{Lower}]", "[\\p{Ll}]",
4597                     "[\\p{IsLl}]", "[\\p{gc=Ll}]",
4598                     "[\\p{general_category=Ll}]", "[\\p{IsLowercase}]",
4599                     "[\\p{javaLowerCase}]", "[\\p{Upper}]", "[\\p{Lu}]",
4600                     "[\\p{IsLu}]", "[\\p{gc=Lu}]",
4601                     "[\\p{general_category=Lu}]", "[\\p{IsUppercase}]",
4602                     "[\\p{javaUpperCase}]", "[\\p{Lt}]", "[\\p{IsLt}]",
4603                     "[\\p{gc=Lt}]", "[\\p{general_category=Lt}]",
4604                     "[\\p{IsTitlecase}]", "[\\p{javaTitleCase}]"))
4605             {
4606                 assertTrue(Pattern.compile(pattern, Pattern.CASE_INSENSITIVE
4607                                             | Pattern.UNICODE_CHARACTER_CLASS)
4608                             .matcher(input)
4609                             .matches(), "Expected to match: " +
4610                         "'" + input + "' =~ /" + pattern + "/");
4611             }
4612         }
4613     }
4614 
4615     // This test is for 8237599
4616     // FIXME: Fix this test.
4617     // Android-changed: Disable failing surrogatePairOverlapRegion() test.
4618     // @Test
surrogatePairOverlapRegion()4619     public static void surrogatePairOverlapRegion() {
4620         String input = "\ud801\udc37";
4621 
4622         Pattern p = Pattern.compile(".+");
4623         Matcher m = p.matcher(input);
4624         m.region(0, 1);
4625 
4626         boolean ok = m.find();
4627         if (!ok || !m.group(0).equals(input.substring(0, 1)))
4628         {
4629             String errMessage = "Input \"" + input + "\".substr(0, 1)" +
4630                     " expected to match pattern \"" + p + "\"";
4631             if (ok) {
4632                 fail(errMessage + System.lineSeparator() +
4633                         "group(0): \"" + m.group(0) + "\"");
4634             } else {
4635                 fail(errMessage);
4636             }
4637         } else if (!m.hitEnd()) {
4638             fail("Expected m.hitEnd() == true");
4639         }
4640 
4641         p = Pattern.compile(".*(.)");
4642         m = p.matcher(input);
4643         m.region(1, 2);
4644 
4645         ok = m.find();
4646         if (!ok || !m.group(0).equals(input.substring(1, 2))
4647                 || !m.group(1).equals(input.substring(1, 2)))
4648         {
4649             String errMessage = "Input \"" + input + "\".substr(1, 2)" +
4650                     " expected to match pattern \"" + p + "\"";
4651             if (ok) {
4652                 String msg1 = "group(0): \"" + m.group(0) + "\"";
4653                 String msg2 = "group(1): \"" + m.group(1) + "\"";
4654                 fail(errMessage + System.lineSeparator() + msg1 +
4655                         System.lineSeparator() + msg2);
4656             } else {
4657                 fail(errMessage);
4658             }
4659         }
4660     }
4661 
4662     //This test is for 8037397
4663     @Test
droppedClassesWithIntersection()4664     public static void droppedClassesWithIntersection() {
4665         String rx = "[A-Z&&[A-Z]0-9]";
4666         String ry = "[A-Z&&[A-F][G-Z]0-9]";
4667 
4668         Stream<Character> letterChars = IntStream.range('A', 'Z').mapToObj((i) -> (char) i);
4669         Stream<Character> digitChars = IntStream.range('0', '9').mapToObj((i) -> (char) i);
4670 
4671         boolean letterCharsMatch = letterChars.allMatch((ch) -> {
4672             String chString = ch.toString();
4673             return chString.matches(rx) && chString.matches(ry);
4674         });
4675 
4676         boolean digitCharsDontMatch = digitChars.noneMatch((ch) -> {
4677             String chString = ch.toString();
4678             return chString.matches(rx) && chString.matches(ry);
4679         });
4680 
4681 
4682         assertTrue(letterCharsMatch, "Compiling intersection pattern is " +
4683                 "dropping a character class in its matcher");
4684 
4685         assertTrue(digitCharsDontMatch, "Compiling intersection pattern is " +
4686                 "matching digits where it should not");
4687     }
4688 
4689     //This test is for 8269753
4690      @Test
errorMessageCaretIndentation()4691     public static void errorMessageCaretIndentation() {
4692         String pattern = "\t**";
4693         var e = expectThrows(PatternSyntaxException.class, () ->
4694                 Pattern.compile(pattern));
4695         // Android-changed: Android produces a different error message.
4696         // var sep = System.lineSeparator();
4697         // assertTrue(e.getMessage().contains(sep + "\t ^"));
4698     }
4699 }
4700