1 /* 2 * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 28 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 29 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 30 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 31 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 32 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 33 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 34 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 35 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 36 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 37 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812 38 * 8216332 8214245 8237599 8241055 8247546 8258259 8037397 8269753 39 * 40 * @library /test/lib 41 * @library /lib/testlibrary/java/lang 42 * @build jdk.test.lib.RandomFactory 43 * @author Mike McCloskey 44 * @run testng RegExTest 45 * @key randomness 46 */ 47 48 package test.java.util.regex; 49 50 import java.io.*; 51 import java.math.BigInteger; 52 import java.nio.CharBuffer; 53 import java.nio.file.Files; 54 import java.nio.file.Path; 55 import java.nio.file.Paths; 56 import java.util.ArrayList; 57 import java.util.Arrays; 58 import java.util.HashMap; 59 import java.util.List; 60 import java.util.Map; 61 import java.util.Random; 62 import java.util.Scanner; 63 import java.util.function.Function; 64 import java.util.function.Predicate; 65 import java.util.regex.Matcher; 66 import java.util.regex.MatchResult; 67 import java.util.regex.Pattern; 68 import java.util.regex.PatternSyntaxException; 69 import java.util.stream.IntStream; 70 import java.util.stream.Stream; 71 72 import org.testng.annotations.Test; 73 import org.testng.Assert; 74 75 76 import jdk.test.lib.RandomFactory; 77 import org.testng.annotations.BeforeMethod; 78 import org.testng.annotations.Test; 79 80 import static org.testng.Assert.assertEquals; 81 import static org.testng.Assert.assertFalse; 82 import static org.testng.Assert.assertNotEquals; 83 import static org.testng.Assert.assertNotNull; 84 import static org.testng.Assert.assertTrue; 85 import static org.testng.Assert.fail; 86 87 import static org.testng.Assert.assertEquals; 88 import static org.testng.Assert.assertFalse; 89 import static org.testng.Assert.assertNotEquals; 90 import static org.testng.Assert.assertNotSame; 91 import static org.testng.Assert.assertThrows; 92 import static org.testng.Assert.assertTrue; 93 import static org.testng.Assert.fail; 94 import static org.testng.Assert.expectThrows; 95 96 /** 97 * This is a test class created to check the operation of 98 * the Pattern and Matcher classes. 99 */ 100 public class RegExTest { 101 102 private static final Random generator = RandomFactory.getRandom(); 103 104 // Utility functions 105 getRandomAlphaString(int length)106 private static String getRandomAlphaString(int length) { 107 108 StringBuilder buf = new StringBuilder(length); 109 for (int i=0; i<length; i++) { 110 char randChar = (char)(97 + generator.nextInt(26)); 111 buf.append(randChar); 112 } 113 return buf.toString(); 114 } 115 check(Matcher m, String expected)116 private static void check(Matcher m, String expected) { 117 m.find(); 118 assertEquals(m.group(), expected); 119 } 120 check(Matcher m, String result, boolean expected)121 private static void check(Matcher m, String result, boolean expected) { 122 m.find(); 123 assertEquals(m.group().equals(result), expected); 124 } 125 check(Pattern p, String s, boolean expected)126 private static void check(Pattern p, String s, boolean expected) { 127 assertEquals(p.matcher(s).find(), expected); 128 } 129 check(String p, String s, boolean expected)130 private static void check(String p, String s, boolean expected) { 131 Matcher matcher = Pattern.compile(p).matcher(s); 132 assertEquals(matcher.find(), expected); 133 } 134 check(String p, char c, boolean expected)135 private static void check(String p, char c, boolean expected) { 136 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 137 Pattern pattern = Pattern.compile(propertyPattern); 138 char[] ca = new char[1]; ca[0] = c; 139 Matcher matcher = pattern.matcher(new String(ca)); 140 assertTrue(matcher.find()); 141 } 142 check(String p, int codePoint, boolean expected)143 private static void check(String p, int codePoint, boolean expected) { 144 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 145 Pattern pattern = Pattern.compile(propertyPattern); 146 char[] ca = Character.toChars(codePoint); 147 Matcher matcher = pattern.matcher(new String(ca)); 148 assertTrue(matcher.find()); 149 } 150 check(String p, int flag, String input, String s, boolean expected)151 private static void check(String p, int flag, String input, String s, 152 boolean expected) 153 { 154 Pattern pattern = Pattern.compile(p, flag); 155 Matcher matcher = pattern.matcher(input); 156 if (expected) 157 check(matcher, s, expected); 158 else 159 check(pattern, input, expected); 160 } 161 check(Pattern p, String s, String g, String expected)162 private static void check(Pattern p, String s, String g, String expected) { 163 Matcher m = p.matcher(s); 164 m.find(); 165 assertFalse(!m.group(g).equals(expected) || 166 s.charAt(m.start(g)) != expected.charAt(0) || 167 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)); 168 } checkReplaceFirst(String p, String s, String r, String expected)169 private static void checkReplaceFirst(String p, String s, String r, String expected) 170 { 171 assertEquals(expected, Pattern.compile(p).matcher(s).replaceFirst(r)); 172 } 173 checkReplaceAll(String p, String s, String r, String expected)174 private static void checkReplaceAll(String p, String s, String r, String expected) 175 { 176 assertEquals(expected, Pattern.compile(p).matcher(s).replaceAll(r)); 177 } 178 checkExpectedFail(String p)179 private static void checkExpectedFail(String p) { 180 assertThrows(PatternSyntaxException.class, () -> 181 Pattern.compile(p)); 182 } 183 184 /** 185 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 186 * supplementary characters. This method does NOT fully take care 187 * of the regex syntax. 188 */ toSupplementaries(String s)189 public static String toSupplementaries(String s) { 190 int length = s.length(); 191 StringBuilder sb = new StringBuilder(length * 2); 192 193 for (int i = 0; i < length; ) { 194 char c = s.charAt(i++); 195 if (c == '\\') { 196 sb.append(c); 197 if (i < length) { 198 c = s.charAt(i++); 199 sb.append(c); 200 if (c == 'u') { 201 // assume no syntax error 202 sb.append(s.charAt(i++)); 203 sb.append(s.charAt(i++)); 204 sb.append(s.charAt(i++)); 205 sb.append(s.charAt(i++)); 206 } 207 } 208 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 209 sb.append('\ud800').append((char)('\udc00'+c)); 210 } else { 211 sb.append(c); 212 } 213 } 214 return sb.toString(); 215 } 216 217 // Regular expression tests 218 //Following three tests execute from a file. 219 @Test processTestCases()220 public static void processTestCases() throws IOException { 221 processFile("TestCases.txt"); 222 } 223 224 @Test processBMPTestCases()225 public static void processBMPTestCases() throws IOException { 226 processFile("BMPTestCases.txt"); 227 } 228 229 @Test processSupplementaryTestCases()230 public static void processSupplementaryTestCases() throws IOException { 231 processFile("SupplementaryTestCases.txt"); 232 } 233 234 235 @Test nullArgumentTest()236 public static void nullArgumentTest() { 237 238 assertThrows(NullPointerException.class, () -> Pattern.compile(null)); 239 assertThrows(NullPointerException.class, () -> Pattern.matches(null, null)); 240 assertThrows(NullPointerException.class, () -> Pattern.matches("xyz", null)); 241 assertThrows(NullPointerException.class, () -> Pattern.quote(null)); 242 assertThrows(NullPointerException.class, () -> Pattern.compile("xyz").split(null)); 243 assertThrows(NullPointerException.class, () -> Pattern.compile("xyz").matcher(null)); 244 245 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 246 m.matches(); 247 assertThrows(NullPointerException.class, () -> m.appendTail((StringBuffer) null)); 248 assertThrows(NullPointerException.class, () -> m.appendTail((StringBuilder)null)); 249 assertThrows(NullPointerException.class, () -> m.replaceAll((String) null)); 250 assertThrows(NullPointerException.class, () -> m.replaceAll((Function<MatchResult, String>)null)); 251 assertThrows(NullPointerException.class, () -> m.replaceFirst((String)null)); 252 assertThrows(NullPointerException.class, () -> m.replaceFirst((Function<MatchResult, String>) null)); 253 assertThrows(NullPointerException.class, () -> m.appendReplacement((StringBuffer)null, null)); 254 assertThrows(NullPointerException.class, () -> m.appendReplacement((StringBuilder)null, null)); 255 assertThrows(NullPointerException.class, () -> m.reset(null)); 256 assertThrows(NullPointerException.class, () -> Matcher.quoteReplacement(null)); 257 //check(() -> m.usePattern(null)); 258 259 } 260 261 // This is for bug6635133 262 // Test if surrogate pair in Unicode escapes can be handled correctly. 263 @Test surrogatesInClassTest()264 public static void surrogatesInClassTest() { 265 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 266 Matcher matcher = pattern.matcher("\ud834\udd22"); 267 268 assertTrue(matcher.find(), "Surrogate pair in Unicode escape"); 269 } 270 271 // This is for bug6990617 272 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 273 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 274 // char is an octal digit. 275 @Test removeQEQuotingTest()276 public static void removeQEQuotingTest() { 277 Pattern pattern = 278 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 279 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 280 281 assertTrue(matcher.find(), "Remove Q/E Quoting"); 282 } 283 284 // This is for bug 4988891 285 // Test toMatchResult to see that it is a copy of the Matcher 286 // that is not affected by subsequent operations on the original 287 @Test toMatchResultTest()288 public static void toMatchResultTest() { 289 Pattern pattern = Pattern.compile("squid"); 290 Matcher matcher = pattern.matcher( 291 "agiantsquidofdestinyasmallsquidoffate"); 292 matcher.find(); 293 294 int matcherStart1 = matcher.start(); 295 MatchResult mr = matcher.toMatchResult(); 296 assertNotSame(mr, matcher, "Matcher toMatchResult is identical object"); 297 298 int resultStart1 = mr.start(); 299 assertEquals(matcherStart1, resultStart1, "equal matchers don't have equal start indices"); 300 matcher.find(); 301 302 int matcherStart2 = matcher.start(); 303 int resultStart2 = mr.start(); 304 assertNotEquals(matcherStart2, resultStart2, "Matcher2 and Result2 should not be equal"); 305 assertEquals(resultStart1, resultStart2, "Second match result should have the same state"); 306 MatchResult mr2 = matcher.toMatchResult(); 307 assertNotSame(mr, mr2, "Second Matcher copy should not be identical to the first."); 308 assertEquals(mr2.start(), matcherStart2, "mr2 index should equal matcher index"); 309 } 310 311 // This is for bug 8074678 312 // Test the result of toMatchResult throws ISE if no match is availble 313 // Android-changed: Android throws Exception when calling toMatchResult(), not start(). 314 @Test(enabled = false) toMatchResultTest2()315 public static void toMatchResultTest2() { 316 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 317 matcher.find(); 318 MatchResult mr = matcher.toMatchResult(); 319 320 assertThrows(IllegalStateException.class, mr::start); 321 assertThrows(IllegalStateException.class, () -> mr.start(2)); 322 assertThrows(IllegalStateException.class, mr::end); 323 assertThrows(IllegalStateException.class, () -> mr.end(2)); 324 assertThrows(IllegalStateException.class, mr::group); 325 assertThrows(IllegalStateException.class, () -> mr.group(2)); 326 327 matcher = Pattern.compile("(match)").matcher("there is a match"); 328 matcher.find(); 329 MatchResult mr2 = matcher.toMatchResult(); 330 assertThrows(IndexOutOfBoundsException.class, () -> mr2.start(2)); 331 assertThrows(IndexOutOfBoundsException.class, () -> mr2.end(2)); 332 assertThrows(IndexOutOfBoundsException.class, () -> mr2.group(2)); 333 } 334 335 // This is for bug 5013885 336 // Must test a slice to see if it reports hitEnd correctly 337 // FIXME: Investigate why this test fails. 338 // Android-changed: The javadoc isn't clear about the behavior if no match is found. The 339 // behavior could depend on the underlying search engine. 340 @Test(enabled = false) hitEndTest()341 public static void hitEndTest() { 342 // Basic test of Slice node 343 Pattern p = Pattern.compile("^squidattack"); 344 Matcher m = p.matcher("squack"); 345 m.find(); 346 assertFalse(m.hitEnd(), "Matcher should not be at end of sequence"); 347 m.reset("squid"); 348 m.find(); 349 assertTrue(m.hitEnd(), "Matcher should be at the end of sequence"); 350 351 // Test Slice, SliceA and SliceU nodes 352 for (int i=0; i<3; i++) { 353 int flags = 0; 354 if (i==1) flags = Pattern.CASE_INSENSITIVE; 355 if (i==2) flags = Pattern.UNICODE_CASE; 356 p = Pattern.compile("^abc", flags); 357 m = p.matcher("ad"); 358 m.find(); 359 assertFalse(m.hitEnd(), "Slice node test"); 360 m.reset("ab"); 361 m.find(); 362 assertTrue(m.hitEnd(), "Slice node test"); 363 } 364 365 // Test Boyer-Moore node 366 p = Pattern.compile("catattack"); 367 m = p.matcher("attack"); 368 m.find(); 369 assertTrue(m.hitEnd(), "Boyer-Moore node test"); 370 371 p = Pattern.compile("catattack"); 372 m = p.matcher("attackattackattackcatatta"); 373 m.find(); 374 assertTrue(m.hitEnd(), "Boyer-More node test"); 375 376 // 8184706: Matching u+0d at EOL against \R should hit-end 377 p = Pattern.compile("...\\R"); 378 m = p.matcher("cat" + (char)0x0a); 379 m.find(); 380 assertFalse(m.hitEnd()); 381 382 m = p.matcher("cat" + (char)0x0d); 383 m.find(); 384 assertTrue(m.hitEnd()); 385 386 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 387 m.find(); 388 assertFalse(m.hitEnd()); 389 } 390 391 // This is for bug 4997476 392 // It is weird code submitted by customer demonstrating a regression 393 @Test wordSearchTest()394 public static void wordSearchTest() { 395 String testString = "word1 word2 word3"; 396 Pattern p = Pattern.compile("\\b"); 397 Matcher m = p.matcher(testString); 398 int position = 0; 399 int start; 400 while (m.find(position)) { 401 start = m.start(); 402 if (start == testString.length()) 403 break; 404 if (m.find(start+1)) { 405 position = m.start(); 406 } else { 407 position = testString.length(); 408 } 409 if (testString.substring(start, position).equals(" ")) 410 continue; 411 assertTrue(testString.substring(start, position-1).startsWith("word")); 412 } 413 } 414 415 // This is for bug 4994840 416 @Test caretAtEndTest()417 public static void caretAtEndTest() { 418 // Problem only occurs with multiline patterns 419 // containing a beginning-of-line caret "^" followed 420 // by an expression that also matches the empty string. 421 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 422 Matcher matcher = pattern.matcher("\r"); 423 matcher.find(); 424 matcher.find(); 425 } 426 427 // This test is for 4979006 428 // Check to see if word boundary construct properly handles unicode 429 // non spacing marks 430 @Test unicodeWordBoundsTest()431 public static void unicodeWordBoundsTest() { 432 String spaces = " "; 433 String wordChar = "a"; 434 String nsm = "\u030a"; 435 436 // Android-changed: assert statement has no runtime effect on Android. 437 // assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 438 assertEquals(Character.getType('\u030a'), Character.NON_SPACING_MARK); 439 440 Pattern pattern = Pattern.compile("\\b"); 441 Matcher matcher = pattern.matcher(""); 442 // S=other B=word character N=non spacing mark .=word boundary 443 // SS.BB.SS 444 String input = spaces + wordChar + wordChar + spaces; 445 twoFindIndexes(input, matcher, 2, 4); 446 // SS.BBN.SS 447 input = spaces + wordChar +wordChar + nsm + spaces; 448 twoFindIndexes(input, matcher, 2, 5); 449 // SS.BN.SS 450 input = spaces + wordChar + nsm + spaces; 451 twoFindIndexes(input, matcher, 2, 4); 452 // SS.BNN.SS 453 input = spaces + wordChar + nsm + nsm + spaces; 454 twoFindIndexes(input, matcher, 2, 5); 455 // SSN.BB.SS 456 input = spaces + nsm + wordChar + wordChar + spaces; 457 twoFindIndexes(input, matcher, 3, 5); 458 // SS.BNB.SS 459 input = spaces + wordChar + nsm + wordChar + spaces; 460 twoFindIndexes(input, matcher, 2, 5); 461 // SSNNSS 462 input = spaces + nsm + nsm + spaces; 463 matcher.reset(input); 464 assertFalse(matcher.find()); 465 // SSN.BBN.SS 466 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 467 twoFindIndexes(input, matcher, 3, 6); 468 } 469 twoFindIndexes(String input, Matcher matcher, int a, int b)470 private static void twoFindIndexes(String input, Matcher matcher, int a, 471 int b) 472 { 473 matcher.reset(input); 474 matcher.find(); 475 assertEquals(matcher.start(), a); 476 matcher.find(); 477 assertEquals(matcher.start(), b); 478 } 479 480 // This test is for 6284152 check(String regex, String input, String[] expected)481 private static void check(String regex, String input, String[] expected) { 482 List<String> result = new ArrayList<>(); 483 Pattern p = Pattern.compile(regex); 484 Matcher m = p.matcher(input); 485 while (m.find()) { 486 result.add(m.group()); 487 } 488 assertEquals(Arrays.asList(expected), result); 489 } 490 491 @Test lookbehindTest()492 public static void lookbehindTest() { 493 //Positive 494 check("(?<=%.{0,5})foo\\d", 495 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 496 new String[]{"foo1", "foo2", "foo3"}); 497 498 //boundary at end of the lookbehind sub-regex should work consistently 499 //with the boundary just after the lookbehind sub-regex 500 // Android-changed: 501 // check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 502 // check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 503 check("(?<=.{0,100}\\b)foo", "abcd foo", new String[]{"foo"}); 504 check("(?<=.{0,100})\\bfoo", "abcd foo", new String[]{"foo"}); 505 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 506 check("(?<!abc \\b)foo", "abc foo", new String[0]); 507 508 //Negative 509 check("(?<!%.{0,5})foo\\d", 510 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 511 new String[] {"foo4", "foo5"}); 512 513 //Positive greedy 514 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 515 516 //Positive reluctant 517 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 518 519 //supplementary 520 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 521 new String[] {"fo\ud800\udc00o"}); 522 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 523 new String[] {"fo\ud800\udc00o"}); 524 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 525 new String[] {"fo\ud800\udc00o"}); 526 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 527 new String[] {"fo\ud800\udc00o"}); 528 } 529 530 // This test is for 4938995 531 // Check to see if weak region boundaries are transparent to 532 // lookahead and lookbehind constructs 533 @Test boundsTest()534 public static void boundsTest() { 535 String fullMessage = "catdogcat"; 536 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 537 Matcher matcher = pattern.matcher("catdogca"); 538 matcher.useTransparentBounds(true); 539 540 assertFalse(matcher.find()); 541 matcher.reset("atdogcat"); 542 543 assertFalse(matcher.find()); 544 matcher.reset(fullMessage); 545 546 assertTrue(matcher.find()); 547 matcher.reset(fullMessage); 548 matcher.region(0,9); 549 550 assertTrue(matcher.find()); 551 matcher.reset(fullMessage); 552 matcher.region(0,6); 553 554 assertTrue(matcher.find()); 555 matcher.reset(fullMessage); 556 matcher.region(3,6); 557 558 assertTrue(matcher.find()); 559 matcher.useTransparentBounds(false); 560 assertFalse(matcher.find()); 561 562 // Negative lookahead/lookbehind 563 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 564 matcher = pattern.matcher("dogcat"); 565 matcher.useTransparentBounds(true); 566 matcher.region(0,3); 567 568 assertFalse(matcher.find()); 569 matcher.reset("catdog"); 570 matcher.region(3,6); 571 572 assertFalse(matcher.find()); 573 matcher.useTransparentBounds(false); 574 matcher.reset("dogcat"); 575 matcher.region(0,3); 576 577 assertTrue(matcher.find()); 578 matcher.reset("catdog"); 579 matcher.region(3,6); 580 // FIXME: This looks like a bug. 581 // Android-removed: Android fails this case. 582 // assertTrue(matcher.find()); 583 584 } 585 586 // This test is for 4945394 587 @Test findFromTest()588 public static void findFromTest() { 589 String message = "This is 40 $0 message."; 590 Pattern pat = Pattern.compile("\\$0"); 591 Matcher match = pat.matcher(message); 592 assertTrue(match.find()); 593 assertFalse(match.find()); 594 assertFalse(match.find()); 595 } 596 597 // This test is for 4872664 and 4892980 598 @Test negatedCharClassTest()599 public static void negatedCharClassTest() { 600 Pattern pattern = Pattern.compile("[^>]"); 601 Matcher matcher = pattern.matcher("\u203A"); 602 assertTrue(matcher.matches()); 603 604 pattern = Pattern.compile("[^fr]"); 605 matcher = pattern.matcher("a"); 606 assertTrue(matcher.find()); 607 608 matcher.reset("\u203A"); 609 assertTrue(matcher.find()); 610 String s = "for"; 611 String[] result = s.split("[^fr]"); 612 assertEquals(result[0], "f"); 613 assertEquals(result[1], "r"); 614 s = "f\u203Ar"; 615 result = s.split("[^fr]"); 616 assertEquals(result[0], "f"); 617 assertEquals(result[1], "r"); 618 619 // Test adding to bits, subtracting a node, then adding to bits again 620 pattern = Pattern.compile("[^f\u203Ar]"); 621 matcher = pattern.matcher("a"); 622 assertTrue(matcher.find()); 623 matcher.reset("f"); 624 assertFalse(matcher.find()); 625 matcher.reset("\u203A"); 626 assertFalse(matcher.find()); 627 matcher.reset("r"); 628 assertFalse(matcher.find()); 629 matcher.reset("\u203B"); 630 assertTrue(matcher.find()); 631 632 // Test subtracting a node, adding to bits, subtracting again 633 pattern = Pattern.compile("[^\u203Ar\u203B]"); 634 matcher = pattern.matcher("a"); 635 assertTrue(matcher.find()); 636 matcher.reset("\u203A"); 637 assertFalse(matcher.find()); 638 matcher.reset("r"); 639 assertFalse(matcher.find()); 640 matcher.reset("\u203B"); 641 assertFalse(matcher.find()); 642 matcher.reset("\u203C"); 643 assertTrue(matcher.find()); 644 } 645 646 // This test is for 4628291 647 @Test toStringTest()648 public static void toStringTest() { 649 Pattern pattern = Pattern.compile("b+"); 650 assertEquals(pattern.toString(), "b+"); 651 Matcher matcher = pattern.matcher("aaabbbccc"); 652 String matcherString = matcher.toString(); // unspecified 653 matcher.find(); 654 matcher.toString(); // unspecified 655 matcher.region(0,3); 656 matcher.toString(); // unspecified 657 matcher.reset(); 658 matcher.toString(); // unspecified 659 } 660 661 // This test is for 4808962 662 @Test literalPatternTest()663 public static void literalPatternTest() { 664 int flags = Pattern.LITERAL; 665 666 Pattern pattern = Pattern.compile("abc\\t$^", flags); 667 check(pattern, "abc\\t$^", true); 668 669 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 670 check(pattern, "abc\\t$^", true); 671 672 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 673 check(pattern, "\\Qa^$bcabc\\E", true); 674 check(pattern, "a^$bcabc", false); 675 676 pattern = Pattern.compile("\\\\Q\\\\E"); 677 check(pattern, "\\Q\\E", true); 678 679 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 680 check(pattern, "abcefg\\Q\\Ehij", true); 681 682 pattern = Pattern.compile("\\\\\\Q\\\\E"); 683 check(pattern, "\\\\\\\\", true); 684 685 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 686 check(pattern, "\\Qa^$bcabc\\E", true); 687 check(pattern, "a^$bcabc", false); 688 689 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 690 check(pattern, "\\Qabc\\Edef", true); 691 check(pattern, "abcdef", false); 692 693 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 694 check(pattern, "abc\\Edef", true); 695 check(pattern, "abcdef", false); 696 697 pattern = Pattern.compile(Pattern.quote("\\E")); 698 check(pattern, "\\E", true); 699 700 pattern = Pattern.compile("((((abc.+?:)", flags); 701 check(pattern, "((((abc.+?:)", true); 702 703 flags |= Pattern.MULTILINE; 704 705 pattern = Pattern.compile("^cat$", flags); 706 check(pattern, "abc^cat$def", true); 707 check(pattern, "cat", false); 708 709 flags |= Pattern.CASE_INSENSITIVE; 710 711 pattern = Pattern.compile("abcdef", flags); 712 check(pattern, "ABCDEF", true); 713 check(pattern, "AbCdEf", true); 714 715 flags |= Pattern.DOTALL; 716 717 pattern = Pattern.compile("a...b", flags); 718 check(pattern, "A...b", true); 719 check(pattern, "Axxxb", false); 720 721 // Android-removed: CANON_EQ isn't supported on Android. 722 // flags |= Pattern.CANON_EQ; 723 724 //Note: Possible issue 725 // Pattern p = Pattern.compile("testa\u030a", flags); 726 // check(pattern, "testa\u030a", false); 727 // check(pattern, "test\u00e5", false); 728 729 // Supplementary character test 730 flags = Pattern.LITERAL; 731 732 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 733 check(pattern, toSupplementaries("abc\\t$^"), true); 734 735 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 736 check(pattern, toSupplementaries("abc\\t$^"), true); 737 738 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 739 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 740 check(pattern, toSupplementaries("a^$bcabc"), false); 741 742 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 743 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 744 check(pattern, toSupplementaries("a^$bcabc"), false); 745 746 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 747 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 748 check(pattern, toSupplementaries("abcdef"), false); 749 750 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 751 check(pattern, toSupplementaries("abc\\Edef"), true); 752 check(pattern, toSupplementaries("abcdef"), false); 753 754 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 755 check(pattern, toSupplementaries("((((abc.+?:)"), true); 756 757 flags |= Pattern.MULTILINE; 758 759 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 760 check(pattern, toSupplementaries("abc^cat$def"), true); 761 check(pattern, toSupplementaries("cat"), false); 762 763 flags |= Pattern.DOTALL; 764 765 // note: this is case-sensitive. 766 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 767 check(pattern, toSupplementaries("a...b"), true); 768 check(pattern, toSupplementaries("axxxb"), false); 769 770 // Android-removed: CANON_EQ isn't supported on Android. 771 // flags |= Pattern.CANON_EQ; 772 773 // String t = toSupplementaries("test"); 774 //Note: Possible issue 775 // p = Pattern.compile(t + "a\u030a", flags); 776 // check(pattern, t + "a\u030a", false); 777 // check(pattern, t + "\u00e5", false); 778 } 779 780 // This test is for 4803179 781 // This test is also for 4808962, replacement parts 782 @Test literalReplacementTest()783 public static void literalReplacementTest() { 784 int flags = Pattern.LITERAL; 785 786 Pattern pattern = Pattern.compile("abc", flags); 787 Matcher matcher = pattern.matcher("zzzabczzz"); 788 String replaceTest = "$0"; 789 String result = matcher.replaceAll(replaceTest); 790 assertEquals(result, "zzzabczzz"); 791 792 matcher.reset(); 793 String literalReplacement = Matcher.quoteReplacement(replaceTest); 794 result = matcher.replaceAll(literalReplacement); 795 assertEquals(result, "zzz$0zzz"); 796 797 matcher.reset(); 798 replaceTest = "\\t$\\$"; 799 literalReplacement = Matcher.quoteReplacement(replaceTest); 800 result = matcher.replaceAll(literalReplacement); 801 assertEquals(result, "zzz\\t$\\$zzz"); 802 803 // Supplementary character test 804 pattern = Pattern.compile(toSupplementaries("abc"), flags); 805 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 806 replaceTest = "$0"; 807 result = matcher.replaceAll(replaceTest); 808 assertEquals(result, toSupplementaries("zzzabczzz")); 809 810 matcher.reset(); 811 literalReplacement = Matcher.quoteReplacement(replaceTest); 812 result = matcher.replaceAll(literalReplacement); 813 assertEquals(result, toSupplementaries("zzz$0zzz")); 814 815 matcher.reset(); 816 replaceTest = "\\t$\\$"; 817 literalReplacement = Matcher.quoteReplacement(replaceTest); 818 result = matcher.replaceAll(literalReplacement); 819 assertEquals(result, toSupplementaries("zzz\\t$\\$zzz")); 820 821 // IAE should be thrown if backslash or '$' is the last character 822 // in replacement string 823 assertThrows(IllegalArgumentException.class, () -> "\uac00".replaceAll("\uac00", "$")); 824 assertThrows(IllegalArgumentException.class, () -> "\uac00".replaceAll("\uac00", "\\")); 825 } 826 827 // This test is for 4757029 828 @Test regionTest()829 public static void regionTest() { 830 Pattern pattern = Pattern.compile("abc"); 831 Matcher matcher = pattern.matcher("abcdefabc"); 832 833 matcher.region(0,9); 834 assertTrue(matcher.find()); 835 assertTrue(matcher.find()); 836 matcher.region(0,3); 837 assertTrue(matcher.find()); 838 matcher.region(3,6); 839 assertFalse(matcher.find()); 840 matcher.region(0,2); 841 assertFalse(matcher.find()); 842 843 expectRegionFail(matcher, 1, -1); 844 expectRegionFail(matcher, -1, -1); 845 expectRegionFail(matcher, -1, 1); 846 expectRegionFail(matcher, 5, 3); 847 expectRegionFail(matcher, 5, 12); 848 expectRegionFail(matcher, 12, 12); 849 850 pattern = Pattern.compile("^abc$"); 851 matcher = pattern.matcher("zzzabczzz"); 852 matcher.region(0,9); 853 assertFalse(matcher.find()); 854 matcher.region(3,6); 855 assertTrue(matcher.find()); 856 matcher.region(3,6); 857 matcher.useAnchoringBounds(false); 858 assertFalse(matcher.find()); 859 860 // Supplementary character test 861 pattern = Pattern.compile(toSupplementaries("abc")); 862 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 863 matcher.region(0,9*2); 864 assertTrue(matcher.find()); 865 assertTrue(matcher.find()); 866 matcher.region(0,3*2); 867 assertTrue(matcher.find()); 868 matcher.region(1,3*2); 869 assertFalse(matcher.find()); 870 matcher.region(3*2,6*2); 871 assertFalse(matcher.find()); 872 matcher.region(0,2*2); 873 assertFalse(matcher.find()); 874 matcher.region(0,2*2+1); 875 assertFalse(matcher.find()); 876 877 expectRegionFail(matcher, 2, -1); 878 expectRegionFail(matcher, -1, -1); 879 expectRegionFail(matcher, -1, 2); 880 expectRegionFail(matcher, 5*2, 3*2); 881 expectRegionFail(matcher, 5*2, 12*2); 882 expectRegionFail(matcher, 12*2, 12*2); 883 884 pattern = Pattern.compile(toSupplementaries("^abc$")); 885 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 886 matcher.region(0,9*2); 887 assertFalse(matcher.find()); 888 matcher.region(3*2,6*2); 889 assertTrue(matcher.find()); 890 matcher.region(3*2+1,6*2); 891 assertFalse(matcher.find()); 892 matcher.region(3*2,6*2-1); 893 assertFalse(matcher.find()); 894 matcher.region(3*2,6*2); 895 matcher.useAnchoringBounds(false); 896 assertFalse(matcher.find()); 897 898 // JDK-8230829 899 pattern = Pattern.compile("\\ud800\\udc61"); 900 matcher = pattern.matcher("\ud800\udc61"); 901 matcher.region(0, 1); 902 assertFalse(matcher.find(), "Matched a surrogate pair" + 903 " that crosses border of region"); 904 905 assertTrue(matcher.hitEnd(), "Expected to hit the end when" + 906 " matching a surrogate pair crossing region"); 907 } 908 expectRegionFail(Matcher matcher, int index1, int index2)909 private static void expectRegionFail(Matcher matcher, int index1, 910 int index2) 911 { 912 913 try { 914 matcher.region(index1, index2); 915 fail(); 916 } catch (IndexOutOfBoundsException | IllegalStateException ioobe) { 917 // Correct result 918 } catch (Exception e) { 919 fail(); 920 } 921 } 922 923 // This test is for 4803197 924 @Test escapedSegmentTest()925 public static void escapedSegmentTest() { 926 927 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 928 check(pattern, "dir1\\dir2", true); 929 930 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 931 check(pattern, "dir1\\dir2\\", true); 932 933 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 934 check(pattern, "dir1\\dir2\\", true); 935 936 // Supplementary character test 937 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 938 check(pattern, toSupplementaries("dir1\\dir2"), true); 939 940 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 941 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 942 943 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 944 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 945 } 946 947 // This test is for 4792284 948 @Test nonCaptureRepetitionTest()949 public static void nonCaptureRepetitionTest() { 950 String input = "abcdefgh;"; 951 952 String[] patterns = new String[] { 953 "(?:\\w{4})+;", 954 "(?:\\w{8})*;", 955 "(?:\\w{2}){2,4};", 956 "(?:\\w{4}){2,};", // only matches the 957 ".*?(?:\\w{5})+;", // specified minimum 958 ".*?(?:\\w{9})*;", // number of reps - OK 959 "(?:\\w{4})+?;", // lazy repetition - OK 960 "(?:\\w{4})++;", // possessive repetition - OK 961 "(?:\\w{2,}?)+;", // non-deterministic - OK 962 "(\\w{4})+;", // capturing group - OK 963 }; 964 965 for (String pattern : patterns) { 966 // Check find() 967 check(pattern, 0, input, input, true); 968 // Check matches() 969 Pattern p = Pattern.compile(pattern); 970 Matcher m = p.matcher(input); 971 972 assertTrue(m.matches()); 973 assertEquals(m.group(0), input); 974 } 975 } 976 977 // This test is for 6358731 978 @Test notCapturedGroupCurlyMatchTest()979 public static void notCapturedGroupCurlyMatchTest() { 980 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 981 Matcher matcher = pattern.matcher("abcd"); 982 983 boolean condition = !matcher.matches() || 984 matcher.group(1) != null || 985 !matcher.group(2).equals("abcd"); 986 987 assertFalse(condition); 988 } 989 990 // This test is for 4706545 991 // FIXME: The char class doesn't match Character.is*() behavior. Is it worth fixing? 992 // Android-changed: Disable the test for further invesitgation. 993 @Test(enabled = false) javaCharClassTest()994 public static void javaCharClassTest() { 995 for (int i=0; i<1000; i++) { 996 char c = (char)generator.nextInt(); 997 check("{javaLowerCase}", c, Character.isLowerCase(c)); 998 check("{javaUpperCase}", c, Character.isUpperCase(c)); 999 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1000 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1001 check("{javaDigit}", c, Character.isDigit(c)); 1002 check("{javaDefined}", c, Character.isDefined(c)); 1003 check("{javaLetter}", c, Character.isLetter(c)); 1004 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1005 check("{javaJavaIdentifierStart}", c, 1006 Character.isJavaIdentifierStart(c)); 1007 check("{javaJavaIdentifierPart}", c, 1008 Character.isJavaIdentifierPart(c)); 1009 check("{javaUnicodeIdentifierStart}", c, 1010 Character.isUnicodeIdentifierStart(c)); 1011 check("{javaUnicodeIdentifierPart}", c, 1012 Character.isUnicodeIdentifierPart(c)); 1013 check("{javaIdentifierIgnorable}", c, 1014 Character.isIdentifierIgnorable(c)); 1015 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1016 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1017 check("{javaISOControl}", c, Character.isISOControl(c)); 1018 check("{javaMirrored}", c, Character.isMirrored(c)); 1019 1020 } 1021 1022 // Supplementary character test 1023 for (int i=0; i<1000; i++) { 1024 int c = generator.nextInt(Character.MAX_CODE_POINT 1025 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1026 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1027 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1028 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1029 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1030 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1031 check("{javaDigit}", c, Character.isDigit(c)); 1032 check("{javaDefined}", c, Character.isDefined(c)); 1033 check("{javaLetter}", c, Character.isLetter(c)); 1034 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1035 check("{javaJavaIdentifierStart}", c, 1036 Character.isJavaIdentifierStart(c)); 1037 check("{javaJavaIdentifierPart}", c, 1038 Character.isJavaIdentifierPart(c)); 1039 check("{javaUnicodeIdentifierStart}", c, 1040 Character.isUnicodeIdentifierStart(c)); 1041 check("{javaUnicodeIdentifierPart}", c, 1042 Character.isUnicodeIdentifierPart(c)); 1043 check("{javaIdentifierIgnorable}", c, 1044 Character.isIdentifierIgnorable(c)); 1045 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1046 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1047 check("{javaISOControl}", c, Character.isISOControl(c)); 1048 check("{javaMirrored}", c, Character.isMirrored(c)); 1049 } 1050 } 1051 1052 // This test is for 4523620 1053 /* 1054 @Test 1055 public static void numOccurrencesTest() throws Exception { 1056 Pattern pattern = Pattern.compile("aaa"); 1057 1058 if (pattern.numOccurrences("aaaaaa", false) != 2) 1059 failCount++; 1060 if (pattern.numOccurrences("aaaaaa", true) != 4) 1061 failCount++; 1062 1063 pattern = Pattern.compile("^"); 1064 if (pattern.numOccurrences("aaaaaa", false) != 1) 1065 failCount++; 1066 if (pattern.numOccurrences("aaaaaa", true) != 1) 1067 failCount++; 1068 1069 report("Number of Occurrences"); 1070 } 1071 */ 1072 1073 // This test is for 4776374 1074 @Test caretBetweenTerminatorsTest()1075 public static void caretBetweenTerminatorsTest() { 1076 int flags1 = Pattern.DOTALL; 1077 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1078 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1079 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1080 1081 check("^....", flags1, "test\ntest", "test", true); 1082 check(".....^", flags1, "test\ntest", "test", false); 1083 check(".....^", flags1, "test\n", "test", false); 1084 check("....^", flags1, "test\r\n", "test", false); 1085 1086 check("^....", flags2, "test\ntest", "test", true); 1087 check("....^", flags2, "test\ntest", "test", false); 1088 check(".....^", flags2, "test\n", "test", false); 1089 check("....^", flags2, "test\r\n", "test", false); 1090 1091 check("^....", flags3, "test\ntest", "test", true); 1092 check(".....^", flags3, "test\ntest", "test\n", true); 1093 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1094 // Android-removed: DOTALL + UNIX_LINES is ambiguous for interpreting non-\\u000a linebreak. 1095 // check(".....^", flags3, "test\n", "test", false); 1096 // check(".....^", flags3, "test\r\n", "test", false); 1097 // check("......^", flags3, "test\r\ntest", "test\r\n", true); 1098 1099 check("^....", flags4, "test\ntest", "test", true); 1100 check(".....^", flags3, "test\ntest", "test\n", true); 1101 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1102 check(".....^", flags4, "test\n", "test\n", false); 1103 check(".....^", flags4, "test\r\n", "test\r", false); 1104 1105 // Supplementary character test 1106 String t = toSupplementaries("test"); 1107 check("^....", flags1, t+"\n"+t, t, true); 1108 check(".....^", flags1, t+"\n"+t, t, false); 1109 check(".....^", flags1, t+"\n", t, false); 1110 check("....^", flags1, t+"\r\n", t, false); 1111 1112 check("^....", flags2, t+"\n"+t, t, true); 1113 check("....^", flags2, t+"\n"+t, t, false); 1114 check(".....^", flags2, t+"\n", t, false); 1115 // Android-removed: DOTALL + UNIX_LINES is ambiguous for interpreting non-\\u000a linebreak. 1116 // check("....^", flags2, t+"\r\n", t, false); 1117 1118 check("^....", flags3, t+"\n"+t, t, true); 1119 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1120 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1121 // Android-removed: DOTALL + UNIX_LINES is ambiguous for interpreting non-\\u000a linebreak. 1122 // check(".....^", flags3, t+"\n", t, false); 1123 // check(".....^", flags3, t+"\r\n", t, false); 1124 // check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1125 // check("......^", flags3, t+"\r\n\n"+t, t+"\r\n", true); 1126 1127 check("^....", flags4, t+"\n"+t, t, true); 1128 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1129 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1130 check(".....^", flags4, t+"\n", t+"\n", false); 1131 check(".....^", flags4, t+"\r\n", t+"\r", false); 1132 } 1133 1134 // This test is for 4727935 1135 @Test dollarAtEndTest()1136 public static void dollarAtEndTest() { 1137 int flags1 = Pattern.DOTALL; 1138 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1139 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1140 1141 check("....$", flags1, "test\n", "test", true); 1142 check("....$", flags1, "test\r\n", "test", true); 1143 check(".....$", flags1, "test\n", "test\n", true); 1144 check(".....$", flags1, "test\u0085", "test\u0085", true); 1145 check("....$", flags1, "test\u0085", "test", true); 1146 1147 check("....$", flags2, "test\n", "test", true); 1148 check(".....$", flags2, "test\n", "test\n", true); 1149 check(".....$", flags2, "test\u0085", "test\u0085", true); 1150 check("....$", flags2, "test\u0085", "est\u0085", true); 1151 1152 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1153 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1154 check("....$blah", flags3, "test\nblah", "!!!!", false); 1155 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1156 1157 // Supplementary character test 1158 String t = toSupplementaries("test"); 1159 String b = toSupplementaries("blah"); 1160 check("....$", flags1, t+"\n", t, true); 1161 check("....$", flags1, t+"\r\n", t, true); 1162 check(".....$", flags1, t+"\n", t+"\n", true); 1163 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1164 check("....$", flags1, t+"\u0085", t, true); 1165 1166 check("....$", flags2, t+"\n", t, true); 1167 check(".....$", flags2, t+"\n", t+"\n", true); 1168 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1169 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1170 1171 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1172 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1173 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1174 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1175 } 1176 1177 // This test is for 4711773 1178 @Test multilineDollarTest()1179 public static void multilineDollarTest() { 1180 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1181 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1182 matcher.find(); 1183 assertEquals(matcher.start(), 9); 1184 matcher.find(); 1185 assertEquals(matcher.start(0), 20); 1186 1187 // Supplementary character test 1188 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1189 matcher.find(); 1190 assertEquals(matcher.start(0), 9*2); 1191 matcher.find(); 1192 assertEquals(matcher.start(0), 20*2); 1193 } 1194 1195 @Test reluctantRepetitionTest()1196 public static void reluctantRepetitionTest() { 1197 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1198 check(p, "1 word word word 2", true); 1199 check(p, "1 wor wo w 2", true); 1200 check(p, "1 word word 2", true); 1201 check(p, "1 word 2", true); 1202 check(p, "1 wo w w 2", true); 1203 check(p, "1 wo w 2", true); 1204 check(p, "1 wor w 2", true); 1205 1206 p = Pattern.compile("([a-z])+?c"); 1207 Matcher m = p.matcher("ababcdefdec"); 1208 check(m, "ababc"); 1209 1210 // Supplementary character test 1211 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1212 m = p.matcher(toSupplementaries("ababcdefdec")); 1213 check(m, toSupplementaries("ababc")); 1214 } 1215 serializedPattern(Pattern p)1216 public static Pattern serializedPattern(Pattern p) throws Exception { 1217 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1218 ObjectOutputStream oos = new ObjectOutputStream(baos); 1219 oos.writeObject(p); 1220 oos.close(); 1221 try (ObjectInputStream ois = new ObjectInputStream( 1222 new ByteArrayInputStream(baos.toByteArray()))) { 1223 return (Pattern)ois.readObject(); 1224 } 1225 } 1226 1227 @Test serializeTest()1228 public static void serializeTest() throws Exception { 1229 String patternStr = "(b)"; 1230 String matchStr = "b"; 1231 Pattern pattern = Pattern.compile(patternStr); 1232 Pattern serializedPattern = serializedPattern(pattern); 1233 Matcher matcher = serializedPattern.matcher(matchStr); 1234 assertTrue(matcher.matches()); 1235 assertEquals(matcher.groupCount(), 1); 1236 1237 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1238 serializedPattern = serializedPattern(pattern); 1239 assertTrue(serializedPattern.matcher("Ab").matches()); 1240 assertFalse(serializedPattern.matcher("AB").matches()); 1241 } 1242 1243 @Test gTest()1244 public static void gTest() { 1245 Pattern pattern = Pattern.compile("\\G\\w"); 1246 Matcher matcher = pattern.matcher("abc#x#x"); 1247 matcher.find(); 1248 matcher.find(); 1249 matcher.find(); 1250 assertFalse(matcher.find()); 1251 1252 pattern = Pattern.compile("\\GA*"); 1253 matcher = pattern.matcher("1A2AA3"); 1254 matcher.find(); 1255 assertFalse(matcher.find()); 1256 1257 pattern = Pattern.compile("\\GA*"); 1258 matcher = pattern.matcher("1A2AA3"); 1259 // FIXME: Looks like find(start) should reset to the start point, but sets to 0. 1260 // Android-removed: Android fails this use case. 1261 // assertTrue(matcher.find(1)); 1262 // matcher.find(); 1263 // assertFalse(matcher.find()); 1264 } 1265 1266 @Test zTest()1267 public static void zTest() { 1268 Pattern pattern = Pattern.compile("foo\\Z"); 1269 // Positives 1270 check(pattern, "foo\u0085", true); 1271 check(pattern, "foo\u2028", true); 1272 check(pattern, "foo\u2029", true); 1273 check(pattern, "foo\n", true); 1274 check(pattern, "foo\r", true); 1275 check(pattern, "foo\r\n", true); 1276 // Negatives 1277 check(pattern, "fooo", false); 1278 check(pattern, "foo\n\r", false); 1279 1280 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1281 // Positives 1282 check(pattern, "foo", true); 1283 check(pattern, "foo\n", true); 1284 // Negatives 1285 // FIXME: Investigate why this fails. 1286 // Android-changed: UNIX_LINES is supported by ICU, but this test failed. 1287 // check(pattern, "foo\r", false); 1288 // check(pattern, "foo\u0085", false); 1289 // check(pattern, "foo\u2028", false); 1290 // check(pattern, "foo\u2029", false); 1291 } 1292 1293 @Test replaceFirstTest()1294 public static void replaceFirstTest() { 1295 Pattern pattern = Pattern.compile("(ab)(c*)"); 1296 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1297 assertEquals(matcher.replaceFirst("test"), "testzzzabcczzzabccc"); 1298 1299 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1300 assertEquals(matcher.replaceFirst("test"), "zzztestzzzabcczzzabccczzz"); 1301 1302 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1303 String result = matcher.replaceFirst("$1"); 1304 assertEquals(result,"zzzabzzzabcczzzabccczzz"); 1305 1306 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1307 result = matcher.replaceFirst("$2"); 1308 assertEquals(result, "zzzccczzzabcczzzabccczzz"); 1309 1310 pattern = Pattern.compile("a*"); 1311 matcher = pattern.matcher("aaaaaaaaaa"); 1312 assertEquals(matcher.replaceFirst("test"), "test"); 1313 1314 pattern = Pattern.compile("a+"); 1315 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1316 assertEquals(matcher.replaceFirst("test"), "zzztest"); 1317 1318 // Supplementary character test 1319 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1320 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1321 result = matcher.replaceFirst(toSupplementaries("test")); 1322 assertEquals(result, toSupplementaries("testzzzabcczzzabccc")); 1323 1324 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1325 result = matcher.replaceFirst(toSupplementaries("test")); 1326 assertEquals(result, toSupplementaries("zzztestzzzabcczzzabccczzz")); 1327 1328 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1329 result = matcher.replaceFirst("$1"); 1330 assertEquals(result, toSupplementaries("zzzabzzzabcczzzabccczzz")); 1331 1332 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1333 result = matcher.replaceFirst("$2"); 1334 assertEquals(result, toSupplementaries("zzzccczzzabcczzzabccczzz")); 1335 1336 pattern = Pattern.compile(toSupplementaries("a*")); 1337 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1338 1339 result = matcher.replaceFirst(toSupplementaries("test")); 1340 assertEquals(result,toSupplementaries("test")); 1341 1342 pattern = Pattern.compile(toSupplementaries("a+")); 1343 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1344 result = matcher.replaceFirst(toSupplementaries("test")); 1345 assertEquals(result, toSupplementaries("zzztest")); 1346 } 1347 1348 @Test unixLinesTest()1349 public static void unixLinesTest() { 1350 Pattern pattern = Pattern.compile(".*"); 1351 Matcher matcher = pattern.matcher("aa\u2028blah"); 1352 matcher.find(); 1353 assertEquals(matcher.group(0), "aa"); 1354 1355 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1356 matcher = pattern.matcher("aa\u2028blah"); 1357 matcher.find(); 1358 assertEquals(matcher.group(0), "aa\u2028blah"); 1359 1360 pattern = Pattern.compile("[az]$", 1361 Pattern.MULTILINE | Pattern.UNIX_LINES); 1362 matcher = pattern.matcher("aa\u2028zz"); 1363 check(matcher, "a\u2028", false); 1364 1365 // Supplementary character test 1366 pattern = Pattern.compile(".*"); 1367 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1368 matcher.find(); 1369 assertEquals(matcher.group(0), toSupplementaries("aa")); 1370 1371 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1372 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1373 matcher.find(); 1374 assertEquals(matcher.group(0), toSupplementaries("aa\u2028blah")); 1375 1376 pattern = Pattern.compile(toSupplementaries("[az]$"), 1377 Pattern.MULTILINE | Pattern.UNIX_LINES); 1378 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1379 check(matcher, toSupplementaries("a\u2028"), false); 1380 } 1381 1382 @Test commentsTest()1383 public static void commentsTest() { 1384 int flags = Pattern.COMMENTS; 1385 1386 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1387 Matcher matcher = pattern.matcher("aa#aa"); 1388 assertTrue(matcher.matches()); 1389 1390 pattern = Pattern.compile("aa # blah", flags); 1391 matcher = pattern.matcher("aa"); 1392 assertTrue(matcher.matches()); 1393 1394 pattern = Pattern.compile("aa blah", flags); 1395 matcher = pattern.matcher("aablah"); 1396 assertTrue(matcher.matches()); 1397 1398 pattern = Pattern.compile("aa # blah blech ", flags); 1399 matcher = pattern.matcher("aa"); 1400 assertTrue(matcher.matches()); 1401 1402 pattern = Pattern.compile("aa # blah\n ", flags); 1403 matcher = pattern.matcher("aa"); 1404 assertTrue(matcher.matches()); 1405 1406 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1407 matcher = pattern.matcher("aabc"); 1408 assertTrue(matcher.matches()); 1409 1410 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1411 matcher = pattern.matcher("aabc"); 1412 assertTrue(matcher.matches()); 1413 1414 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1415 matcher = pattern.matcher("aabc#blech"); 1416 assertTrue(matcher.matches()); 1417 1418 // Supplementary character test 1419 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1420 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1421 assertTrue(matcher.matches()); 1422 1423 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1424 matcher = pattern.matcher(toSupplementaries("aa")); 1425 assertTrue(matcher.matches()); 1426 1427 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1428 matcher = pattern.matcher(toSupplementaries("aablah")); 1429 assertTrue(matcher.matches()); 1430 1431 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1432 matcher = pattern.matcher(toSupplementaries("aa")); 1433 assertTrue(matcher.matches()); 1434 1435 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1436 matcher = pattern.matcher(toSupplementaries("aa")); 1437 assertTrue(matcher.matches()); 1438 1439 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1440 matcher = pattern.matcher(toSupplementaries("aabc")); 1441 assertTrue(matcher.matches()); 1442 1443 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1444 matcher = pattern.matcher(toSupplementaries("aabc")); 1445 assertTrue(matcher.matches()); 1446 1447 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1448 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1449 assertTrue(matcher.matches()); 1450 } 1451 1452 @Test caseFoldingTest()1453 public static void caseFoldingTest() { // bug 4504687 1454 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1455 Pattern pattern = Pattern.compile("aa", flags); 1456 Matcher matcher = pattern.matcher("ab"); 1457 assertFalse(matcher.matches()); 1458 1459 pattern = Pattern.compile("aA", flags); 1460 matcher = pattern.matcher("ab"); 1461 assertFalse(matcher.matches()); 1462 1463 pattern = Pattern.compile("aa", flags); 1464 matcher = pattern.matcher("aB"); 1465 assertFalse(matcher.matches()); 1466 1467 matcher = pattern.matcher("Ab"); 1468 assertFalse(matcher.matches()); 1469 1470 // ASCII "a" 1471 // Latin-1 Supplement "a" + grave 1472 // Cyrillic "a" 1473 String[] patterns = new String[] { 1474 //single 1475 "a", "\u00e0", "\u0430", 1476 //slice 1477 "ab", "\u00e0\u00e1", "\u0430\u0431", 1478 //class single 1479 "[a]", "[\u00e0]", "[\u0430]", 1480 //class range 1481 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1482 //back reference 1483 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1484 }; 1485 1486 String[] texts = new String[] { 1487 "A", "\u00c0", "\u0410", 1488 "AB", "\u00c0\u00c1", "\u0410\u0411", 1489 "A", "\u00c0", "\u0410", 1490 "B", "\u00c2", "\u0411", 1491 "aA", "\u00e0\u00c0", "\u0430\u0410" 1492 }; 1493 1494 boolean[] expected = new boolean[] { 1495 true, false, false, 1496 true, false, false, 1497 true, false, false, 1498 true, false, false, 1499 true, false, false 1500 }; 1501 1502 // Android-removed: CASE_INSENSITIVE has the same effect as UNICODE_CASE on Android. 1503 /* 1504 flags = Pattern.CASE_INSENSITIVE; 1505 for (int i = 0; i < patterns.length; i++) { 1506 pattern = Pattern.compile(patterns[i], flags); 1507 matcher = pattern.matcher(texts[i]); 1508 assertEquals(matcher.matches(), expected[i], "<1> Failed at " + i); 1509 } 1510 */ 1511 1512 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1513 for (int i = 0; i < patterns.length; i++) { 1514 pattern = Pattern.compile(patterns[i], flags); 1515 matcher = pattern.matcher(texts[i]); 1516 assertTrue(matcher.matches(), "<2> Failed at " + i); 1517 } 1518 // flag unicode_case alone should do nothing 1519 flags = Pattern.UNICODE_CASE; 1520 for (int i = 0; i < patterns.length; i++) { 1521 pattern = Pattern.compile(patterns[i], flags); 1522 matcher = pattern.matcher(texts[i]); 1523 assertFalse(matcher.matches(), "<3> Failed at " + i); 1524 } 1525 1526 // Special cases: i, I, u+0131 and u+0130 1527 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1528 pattern = Pattern.compile("[h-j]+", flags); 1529 // Android-changed: no simple case folding for \u0130 and \u0131 according to Unicode 14.0 1530 // https://www.unicode.org/Public/14.0.0/ucd/CaseFolding.txt 1531 // pattern = Pattern.compile("[h-j]+", flags); 1532 // assertTrue(pattern.matcher("\u0131\u0130").matches()); 1533 } 1534 1535 @Test appendTest()1536 public static void appendTest() { 1537 Pattern pattern = Pattern.compile("(ab)(cd)"); 1538 Matcher matcher = pattern.matcher("abcd"); 1539 String result = matcher.replaceAll("$2$1"); 1540 assertEquals(result, "cdab"); 1541 1542 String s1 = "Swap all: first = 123, second = 456"; 1543 String s2 = "Swap one: first = 123, second = 456"; 1544 String r = "$3$2$1"; 1545 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1546 matcher = pattern.matcher(s1); 1547 1548 result = matcher.replaceAll(r); 1549 assertEquals(result, "Swap all: 123 = first, 456 = second"); 1550 1551 matcher = pattern.matcher(s2); 1552 1553 if (matcher.find()) { 1554 StringBuffer sb = new StringBuffer(); 1555 matcher.appendReplacement(sb, r); 1556 matcher.appendTail(sb); 1557 result = sb.toString(); 1558 assertEquals(result, "Swap one: 123 = first, second = 456"); 1559 } 1560 1561 // Supplementary character test 1562 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1563 matcher = pattern.matcher(toSupplementaries("abcd")); 1564 result = matcher.replaceAll("$2$1"); 1565 assertEquals(result, toSupplementaries("cdab")); 1566 1567 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1568 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1569 r = toSupplementaries("$3$2$1"); 1570 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1571 matcher = pattern.matcher(s1); 1572 1573 result = matcher.replaceAll(r); 1574 assertEquals(result, toSupplementaries("Swap all: 123 = first, 456 = second")); 1575 1576 matcher = pattern.matcher(s2); 1577 1578 if (matcher.find()) { 1579 StringBuffer sb = new StringBuffer(); 1580 matcher.appendReplacement(sb, r); 1581 matcher.appendTail(sb); 1582 result = sb.toString(); 1583 assertEquals(result, toSupplementaries("Swap one: 123 = first, second = 456")); 1584 } 1585 } 1586 1587 @Test splitTest()1588 public static void splitTest() { 1589 Pattern pattern = Pattern.compile(":"); 1590 String[] result = pattern.split("foo:and:boo", 2); 1591 assertEquals(result[0], "foo"); 1592 assertEquals(result[1], "and:boo"); 1593 // Supplementary character test 1594 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1595 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1596 assertEquals(result[0], toSupplementaries("foo")); 1597 assertEquals(result[1], toSupplementaries("andXboo")); 1598 1599 CharBuffer cb = CharBuffer.allocate(100); 1600 cb.put("foo:and:boo"); 1601 cb.flip(); 1602 result = pattern.split(cb); 1603 assertEquals(result[0], "foo"); 1604 assertEquals(result[1], "and"); 1605 assertEquals(result[2], "boo"); 1606 1607 // Supplementary character test 1608 CharBuffer cbs = CharBuffer.allocate(100); 1609 cbs.put(toSupplementaries("fooXandXboo")); 1610 cbs.flip(); 1611 result = patternX.split(cbs); 1612 assertEquals(result[0], toSupplementaries("foo")); 1613 assertEquals(result[1], toSupplementaries("and")); 1614 assertEquals(result[2], toSupplementaries("boo")); 1615 1616 String source = "0123456789"; 1617 for (int limit=-2; limit<3; limit++) { 1618 for (int x=0; x<10; x++) { 1619 result = source.split(Integer.toString(x), limit); 1620 int expectedLength = limit < 1 ? 2 : limit; 1621 1622 if ((limit == 0) && (x == 9)) { 1623 // expected dropping of "" 1624 assertEquals(result.length, 1); 1625 assertEquals(result[0], "012345678"); 1626 } else { 1627 assertEquals(result.length, expectedLength); 1628 1629 if (!result[0].equals(source.substring(0,x))) { 1630 assertEquals(limit, 1); 1631 assertEquals(result[0], source.substring(0,10)); 1632 } 1633 if (expectedLength > 1) { // Check segment 2 1634 assertEquals(result[1], source.substring(x+1,10)); 1635 } 1636 } 1637 } 1638 } 1639 // Check the case for no match found 1640 for (int limit=-2; limit<3; limit++) { 1641 result = source.split("e", limit); 1642 assertEquals(result.length, 1); 1643 assertEquals(result[0], source); 1644 } 1645 // Check the case for limit == 0, source = ""; 1646 // split() now returns 0-length for empty source "" see #6559590 1647 source = ""; 1648 result = source.split("e", 0); 1649 assertEquals(result.length, 1); 1650 assertEquals(result[0], source); 1651 1652 // Check both split() and splitAsStraem(), especially for zero-lenth 1653 // input and zero-lenth match cases 1654 String[][] input = new String[][] { 1655 { " ", "Abc Efg Hij" }, // normal non-zero-match 1656 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1657 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1658 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1659 { "(?=\\p{Lu})", "AbcEfg" }, 1660 { "(?=\\p{Lu})", "Abc" }, 1661 { " ", "" }, // zero-length input 1662 { ".*", "" }, 1663 1664 // some tests from PatternStreamTest.java 1665 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1666 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1667 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1668 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1669 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1670 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1671 { "\u56da", "" }, 1672 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1673 { "o", "boo:and:foo" }, 1674 { "o", "booooo:and:fooooo" }, 1675 { "o", "fooooo:" }, 1676 }; 1677 1678 String[][] expected = new String[][] { 1679 { "Abc", "Efg", "Hij" }, 1680 { "", "Abc", "Efg", "Hij" }, 1681 { "Abc", "", "Efg", "Hij" }, 1682 { "Abc", "Efg", "Hij" }, 1683 { "Abc", "Efg" }, 1684 { "Abc" }, 1685 { "" }, 1686 { "" }, 1687 1688 { "awgqwefg1fefw", "vssv1vvv1" }, 1689 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1690 { "awgqwefg", "fefw4vssv", "vvv" }, 1691 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1692 { "1", "23", "456", "7890" }, 1693 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1694 { "" }, 1695 { "This", "is", "testing", "", "with", "different", "separators" }, 1696 { "b", "", ":and:f" }, 1697 { "b", "", "", "", "", ":and:f" }, 1698 { "f", "", "", "", "", ":" }, 1699 }; 1700 for (int i = 0; i < input.length; i++) { 1701 pattern = Pattern.compile(input[i][0]); 1702 assertTrue(Arrays.equals(pattern.split(input[i][1]), expected[i])); 1703 1704 assertFalse(input[i][1].length() > 0 && // splitAsStream() return empty resulting 1705 // array for zero-length input for now 1706 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1707 expected[i])); 1708 } 1709 } 1710 1711 @Test negationTest()1712 public static void negationTest() { 1713 Pattern pattern = Pattern.compile("[\\[@^]+"); 1714 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1715 assertTrue(matcher.find()); 1716 assertEquals(matcher.group(0), "@@@@[[[[^^^^"); 1717 1718 pattern = Pattern.compile("[@\\[^]+"); 1719 matcher = pattern.matcher("@@@@[[[[^^^^"); 1720 assertTrue(matcher.find()); 1721 assertEquals(matcher.group(0), "@@@@[[[[^^^^"); 1722 1723 pattern = Pattern.compile("[@\\[^@]+"); 1724 matcher = pattern.matcher("@@@@[[[[^^^^"); 1725 assertTrue(matcher.find()); 1726 assertEquals(matcher.group(0), "@@@@[[[[^^^^"); 1727 1728 pattern = Pattern.compile("\\)"); 1729 matcher = pattern.matcher("xxx)xxx"); 1730 assertTrue(matcher.find()); 1731 } 1732 1733 @Test ampersandTest()1734 public static void ampersandTest() { 1735 Pattern pattern = Pattern.compile("[&@]+"); 1736 check(pattern, "@@@@&&&&", true); 1737 1738 pattern = Pattern.compile("[@&]+"); 1739 check(pattern, "@@@@&&&&", true); 1740 1741 pattern = Pattern.compile("[@\\&]+"); 1742 check(pattern, "@@@@&&&&", true); 1743 } 1744 1745 @Test octalTest()1746 public static void octalTest() { 1747 Pattern pattern = Pattern.compile("\\u0007"); 1748 Matcher matcher = pattern.matcher("\u0007"); 1749 assertTrue(matcher.matches()); 1750 pattern = Pattern.compile("\\07"); 1751 matcher = pattern.matcher("\u0007"); 1752 assertTrue(matcher.matches()); 1753 pattern = Pattern.compile("\\007"); 1754 matcher = pattern.matcher("\u0007"); 1755 assertTrue(matcher.matches()); 1756 pattern = Pattern.compile("\\0007"); 1757 matcher = pattern.matcher("\u0007"); 1758 assertTrue(matcher.matches()); 1759 pattern = Pattern.compile("\\040"); 1760 matcher = pattern.matcher("\u0020"); 1761 assertTrue(matcher.matches()); 1762 pattern = Pattern.compile("\\0403"); 1763 matcher = pattern.matcher("\u00203"); 1764 assertTrue(matcher.matches()); 1765 pattern = Pattern.compile("\\0103"); 1766 matcher = pattern.matcher("\u0043"); 1767 assertTrue(matcher.matches()); 1768 } 1769 1770 @Test longPatternTest()1771 public static void longPatternTest() { 1772 try { 1773 Pattern.compile( 1774 "a 32-character-long pattern xxxx"); 1775 Pattern.compile("a 33-character-long pattern xxxxx"); 1776 Pattern.compile("a thirty four character long regex"); 1777 StringBuilder patternToBe = new StringBuilder(101); 1778 for (int i=0; i<100; i++) 1779 patternToBe.append((char)(97 + i%26)); 1780 Pattern.compile(patternToBe.toString()); 1781 } catch (PatternSyntaxException e) { 1782 fail(); 1783 } 1784 1785 // Supplementary character test 1786 try { 1787 Pattern.compile( 1788 toSupplementaries("a 32-character-long pattern xxxx")); 1789 Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 1790 Pattern.compile(toSupplementaries("a thirty four character long regex")); 1791 StringBuilder patternToBe = new StringBuilder(101*2); 1792 for (int i=0; i<100; i++) 1793 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 1794 + 97 + i%26)); 1795 Pattern.compile(patternToBe.toString()); 1796 } catch (PatternSyntaxException e) { 1797 fail(); 1798 } 1799 } 1800 1801 @Test group0Test()1802 public static void group0Test() { 1803 Pattern pattern = Pattern.compile("(tes)ting"); 1804 Matcher matcher = pattern.matcher("testing"); 1805 check(matcher, "testing"); 1806 1807 matcher.reset("testing"); 1808 assertTrue(matcher.lookingAt()); 1809 assertEquals(matcher.group(0), "testing"); 1810 1811 matcher.reset("testing"); 1812 assertTrue(matcher.matches()); 1813 assertEquals(matcher.group(0), "testing"); 1814 1815 pattern = Pattern.compile("(tes)ting"); 1816 matcher = pattern.matcher("testing"); 1817 assertTrue(matcher.lookingAt()); 1818 assertEquals(matcher.group(0), "testing"); 1819 1820 pattern = Pattern.compile("^(tes)ting"); 1821 matcher = pattern.matcher("testing"); 1822 assertTrue(matcher.matches()); 1823 assertEquals(matcher.group(0), "testing"); 1824 1825 // Supplementary character test 1826 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 1827 matcher = pattern.matcher(toSupplementaries("testing")); 1828 check(matcher, toSupplementaries("testing")); 1829 1830 matcher.reset(toSupplementaries("testing")); 1831 assertTrue(matcher.lookingAt()); 1832 assertEquals(matcher.group(0), toSupplementaries("testing")); 1833 1834 matcher.reset(toSupplementaries("testing")); 1835 assertTrue(matcher.matches()); 1836 assertEquals(matcher.group(0), toSupplementaries("testing")); 1837 1838 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 1839 matcher = pattern.matcher(toSupplementaries("testing")); 1840 assertTrue(matcher.lookingAt()); 1841 assertEquals(matcher.group(0), toSupplementaries("testing")); 1842 1843 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 1844 matcher = pattern.matcher(toSupplementaries("testing")); 1845 1846 assertTrue(matcher.matches()); 1847 assertEquals(matcher.group(0), toSupplementaries("testing")); 1848 } 1849 1850 @Test findIntTest()1851 public static void findIntTest() { 1852 Pattern p = Pattern.compile("blah"); 1853 Matcher m = p.matcher("zzzzblahzzzzzblah"); 1854 boolean result = m.find(2); 1855 1856 assertTrue(result); 1857 1858 final Pattern p2 = Pattern.compile("$"); 1859 final Matcher m2 = p2.matcher("1234567890"); 1860 result = m2.find(10); 1861 assertTrue(result); 1862 assertThrows(IndexOutOfBoundsException.class, () -> m2.find(11)); 1863 1864 // Supplementary character test 1865 p = Pattern.compile(toSupplementaries("blah")); 1866 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 1867 result = m.find(2); 1868 assertTrue(result); 1869 } 1870 1871 @Test emptyPatternTest()1872 public static void emptyPatternTest() { 1873 Pattern p = Pattern.compile(""); 1874 final Matcher m = p.matcher("foo"); 1875 1876 // Should find empty pattern at beginning of input 1877 boolean result = m.find(); 1878 assertTrue(result); 1879 assertEquals(m.start(), 0); 1880 1881 // Should not match entire input if input is not empty 1882 m.reset(); 1883 result = m.matches(); 1884 assertFalse(result); 1885 1886 assertThrows(IllegalStateException.class, () -> m.start(0)); 1887 1888 // Should match entire input if input is empty 1889 m.reset(""); 1890 result = m.matches(); 1891 assertTrue(result); 1892 1893 result = Pattern.matches("", ""); 1894 assertTrue(result); 1895 1896 result = Pattern.matches("", "foo"); 1897 assertFalse(result); 1898 } 1899 1900 @Test charClassTest()1901 public static void charClassTest() { 1902 Pattern pattern = Pattern.compile("blah[ab]]blech"); 1903 check(pattern, "blahb]blech", true); 1904 1905 pattern = Pattern.compile("[abc[def]]"); 1906 check(pattern, "b", true); 1907 1908 // Supplementary character tests 1909 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 1910 check(pattern, toSupplementaries("blahb]blech"), true); 1911 1912 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 1913 check(pattern, toSupplementaries("b"), true); 1914 1915 // u00ff when UNICODE_CASE 1916 pattern = Pattern.compile("[ab\u00ffcd]", 1917 Pattern.CASE_INSENSITIVE| 1918 Pattern.UNICODE_CASE); 1919 check(pattern, "ab\u00ffcd", true); 1920 check(pattern, "Ab\u0178Cd", true); 1921 1922 // u00b5 when UNICODE_CASE 1923 pattern = Pattern.compile("[ab\u00b5cd]", 1924 Pattern.CASE_INSENSITIVE| 1925 Pattern.UNICODE_CASE); 1926 check(pattern, "ab\u00b5cd", true); 1927 check(pattern, "Ab\u039cCd", true); 1928 1929 /* Special cases 1930 (1)LatinSmallLetterLongS u+017f 1931 (2)LatinSmallLetterDotlessI u+0131 1932 (3)LatineCapitalLetterIWithDotAbove u+0130 1933 (4)KelvinSign u+212a 1934 (5)AngstromSign u+212b 1935 */ 1936 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1937 // Android-changed: Modified the input and output to match Unicode 14.0 1938 // See https://www.unicode.org/Public/14.0.0/ucd/CaseFolding.txt 1939 // pattern = Pattern.compile("[sik\u00c5]+", flags); 1940 // assertTrue(pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()); 1941 pattern = Pattern.compile("[sk\u00e5]+", flags); 1942 assertTrue(pattern.matcher("\u017f\u212a\u212b").matches()); 1943 1944 } 1945 1946 @Test caretTest()1947 public static void caretTest() { 1948 Pattern pattern = Pattern.compile("\\w*"); 1949 Matcher matcher = pattern.matcher("a#bc#def##g"); 1950 check(matcher, "a"); 1951 check(matcher, ""); 1952 check(matcher, "bc"); 1953 check(matcher, ""); 1954 check(matcher, "def"); 1955 check(matcher, ""); 1956 check(matcher, ""); 1957 check(matcher, "g"); 1958 check(matcher, ""); 1959 assertFalse(matcher.find()); 1960 1961 pattern = Pattern.compile("^\\w*"); 1962 matcher = pattern.matcher("a#bc#def##g"); 1963 check(matcher, "a"); 1964 assertFalse(matcher.find()); 1965 1966 pattern = Pattern.compile("\\w"); 1967 matcher = pattern.matcher("abc##x"); 1968 check(matcher, "a"); 1969 check(matcher, "b"); 1970 check(matcher, "c"); 1971 check(matcher, "x"); 1972 assertFalse(matcher.find()); 1973 1974 pattern = Pattern.compile("^\\w"); 1975 matcher = pattern.matcher("abc##x"); 1976 check(matcher, "a"); 1977 assertFalse(matcher.find()); 1978 1979 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 1980 matcher = pattern.matcher("abcdef-ghi\njklmno"); 1981 check(matcher, "abc"); 1982 assertFalse(matcher.find()); 1983 1984 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 1985 matcher = pattern.matcher("abcdef-ghi\njklmno"); 1986 check(matcher, "abc"); 1987 check(matcher, "jkl"); 1988 assertFalse(matcher.find()); 1989 1990 pattern = Pattern.compile("^", Pattern.MULTILINE); 1991 matcher = pattern.matcher("this is some text"); 1992 String result = matcher.replaceAll("X"); 1993 assertEquals(result, "Xthis is some text"); 1994 1995 pattern = Pattern.compile("^"); 1996 matcher = pattern.matcher("this is some text"); 1997 result = matcher.replaceAll("X"); 1998 assertEquals(result, "Xthis is some text"); 1999 2000 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2001 matcher = pattern.matcher("this is some text\n"); 2002 result = matcher.replaceAll("X"); 2003 // Android-changed: Inserting 'X' after the new line \n on Android seems correct. 2004 // assertEquals(result, "Xthis is some text\n"); 2005 assertEquals(result, "Xthis is some text\nX"); 2006 } 2007 2008 @Test groupCaptureTest()2009 public static void groupCaptureTest() { 2010 // Independent group 2011 assertThrows(IndexOutOfBoundsException.class, () -> { 2012 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2013 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2014 matcher.find(); 2015 matcher.group(1); 2016 }); 2017 2018 // Pure group 2019 assertThrows(IndexOutOfBoundsException.class, () -> { 2020 Pattern pattern = Pattern.compile("x+(?:y+)z+"); 2021 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2022 matcher.find(); 2023 String blah = matcher.group(1); 2024 }); 2025 2026 // Supplementary character tests 2027 // Independent group 2028 assertThrows(IndexOutOfBoundsException.class, () -> { 2029 Pattern pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2030 Matcher matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2031 matcher.find(); 2032 String blah = matcher.group(1); 2033 }); 2034 2035 // Pure group 2036 assertThrows(IndexOutOfBoundsException.class, () -> { 2037 Pattern pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2038 Matcher matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2039 matcher.find(); 2040 String blah = matcher.group(1); 2041 }); 2042 } 2043 2044 @Test backRefTest()2045 public static void backRefTest() { 2046 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2047 check(pattern, "zzzaabcazzz", true); 2048 2049 pattern = Pattern.compile("(a*)bc\\1"); 2050 check(pattern, "zzzaabcaazzz", true); 2051 2052 pattern = Pattern.compile("(abc)(def)\\1"); 2053 check(pattern, "abcdefabc", true); 2054 2055 // Android-changed: Android throws Exception at the compilation for non-existent group. 2056 // The doc says "\1 through \9 are always interpreted as back references". 2057 // pattern = Pattern.compile("(abc)(def)\\3"); 2058 pattern = Pattern.compile("(abc)(def)\\2"); 2059 check(pattern, "abcdefabc", false); 2060 2061 // Android-removed: ICU4C checks the existence of the groups. ICU4C behavior sounds good, 2062 // but doesn't match the upstream javadoc. 2063 /* 2064 for (int i = 1; i < 10; i++) { 2065 // Make sure backref 1-9 are always accepted 2066 pattern = Pattern.compile("abcdef\\" + i); 2067 // and fail to match if the target group does not exit 2068 check(pattern, "abcdef", false); 2069 } 2070 */ 2071 2072 // Android-changed: Android doesn't match the upstream javadoc, but this isn't important. 2073 // The doc says "the parser will drop digits until the number is smaller or equal to the 2074 // existing number of groups or it is one digit." 2075 // pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2076 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\1\\Q1\\E"); 2077 check(pattern, "abcdefghija", false); 2078 check(pattern, "abcdefghija1", true); 2079 2080 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2081 check(pattern, "abcdefghijkk", true); 2082 2083 pattern = Pattern.compile("(a)bcdefghij\\11"); 2084 check(pattern, "abcdefghija1", true); 2085 2086 // Supplementary character tests 2087 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2088 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2089 2090 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2091 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2092 2093 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2094 check(pattern, toSupplementaries("abcdefabc"), true); 2095 2096 // Android-changed: Android doesn't allow non-existent capture group. 2097 // pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2098 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\2")); 2099 check(pattern, toSupplementaries("abcdefabc"), false); 2100 check(pattern, toSupplementaries("abcdefdef"), true); 2101 2102 // Android-changed: Android doesn't allow non-existent capture group. 2103 // pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2104 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\1\\Q1\\E")); 2105 check(pattern, toSupplementaries("abcdefghija"), false); 2106 check(pattern, toSupplementaries("abcdefghija1"), true); 2107 2108 // Android-changed: Android doesn't allow non-existent capture group. 2109 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2110 check(pattern, toSupplementaries("abcdefghijkk"), true); 2111 } 2112 2113 /** 2114 * Unicode Technical Report #18, section 2.6 End of Line 2115 * There is no empty line to be matched in the sequence \u000D\u000A 2116 * but there is an empty line in the sequence \u000A\u000D. 2117 */ 2118 @Test anchorTest()2119 public static void anchorTest() { 2120 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2121 Matcher m = p.matcher("blah1\r\nblah2"); 2122 m.find(); 2123 m.find(); 2124 assertEquals(m.group(), "blah2"); 2125 2126 m.reset("blah1\n\rblah2"); 2127 m.find(); 2128 m.find(); 2129 m.find(); 2130 assertEquals(m.group(), "blah2"); 2131 2132 // Test behavior of $ with \r\n at end of input 2133 p = Pattern.compile(".+$"); 2134 m = p.matcher("blah1\r\n"); 2135 assertTrue(m.find()); 2136 assertEquals(m.group(), "blah1"); 2137 assertFalse(m.find()); 2138 2139 // Test behavior of $ with \r\n at end of input in multiline 2140 p = Pattern.compile(".+$", Pattern.MULTILINE); 2141 m = p.matcher("blah1\r\n"); 2142 assertTrue(m.find()); 2143 assertFalse(m.find()); 2144 2145 // Test for $ recognition of \u0085 for bug 4527731 2146 p = Pattern.compile(".+$", Pattern.MULTILINE); 2147 m = p.matcher("blah1\u0085"); 2148 assertTrue(m.find()); 2149 2150 // Supplementary character test 2151 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2152 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2153 m.find(); 2154 m.find(); 2155 assertEquals(m.group(), toSupplementaries("blah2")); 2156 2157 m.reset(toSupplementaries("blah1\n\rblah2")); 2158 m.find(); 2159 m.find(); 2160 m.find(); 2161 2162 assertEquals(m.group(), toSupplementaries("blah2")); 2163 2164 // Test behavior of $ with \r\n at end of input 2165 p = Pattern.compile(".+$"); 2166 m = p.matcher(toSupplementaries("blah1\r\n")); 2167 assertTrue(m.find()); 2168 assertEquals(m.group(), toSupplementaries("blah1")); 2169 assertFalse(m.find()); 2170 2171 // Test behavior of $ with \r\n at end of input in multiline 2172 p = Pattern.compile(".+$", Pattern.MULTILINE); 2173 m = p.matcher(toSupplementaries("blah1\r\n")); 2174 assertTrue(m.find()); 2175 assertFalse(m.find()); 2176 2177 // Test for $ recognition of \u0085 for bug 4527731 2178 p = Pattern.compile(".+$", Pattern.MULTILINE); 2179 m = p.matcher(toSupplementaries("blah1\u0085")); 2180 assertTrue(m.find()); 2181 } 2182 2183 /** 2184 * A basic sanity test of Matcher.lookingAt(). 2185 */ 2186 @Test lookingAtTest()2187 public static void lookingAtTest() { 2188 Pattern p = Pattern.compile("(ab)(c*)"); 2189 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2190 2191 assertTrue(m.lookingAt()); 2192 2193 assertEquals(m.group(), m.group(0)); 2194 2195 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2196 assertFalse(m.lookingAt()); 2197 2198 // Supplementary character test 2199 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2200 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2201 2202 assertTrue(m.lookingAt()); 2203 2204 assertEquals(m.group(), m.group(0)); 2205 2206 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2207 assertFalse(m.lookingAt()); 2208 } 2209 2210 /** 2211 * A basic sanity test of Matcher.matches(). 2212 */ 2213 @Test matchesTest()2214 public static void matchesTest() { 2215 // matches() 2216 Pattern p = Pattern.compile("ulb(c*)"); 2217 Matcher m = p.matcher("ulbcccccc"); 2218 assertTrue(m.matches()); 2219 2220 // find() but not matches() 2221 m.reset("zzzulbcccccc"); 2222 assertFalse(m.matches()); 2223 2224 // lookingAt() but not matches() 2225 m.reset("ulbccccccdef"); 2226 assertFalse(m.matches()); 2227 2228 // matches() 2229 p = Pattern.compile("a|ad"); 2230 m = p.matcher("ad"); 2231 assertTrue(m.matches()); 2232 2233 // Supplementary character test 2234 // matches() 2235 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2236 m = p.matcher(toSupplementaries("ulbcccccc")); 2237 assertTrue(m.matches()); 2238 2239 // find() but not matches() 2240 m.reset(toSupplementaries("zzzulbcccccc")); 2241 assertFalse(m.matches()); 2242 2243 // lookingAt() but not matches() 2244 m.reset(toSupplementaries("ulbccccccdef")); 2245 assertFalse(m.matches()); 2246 2247 // matches() 2248 p = Pattern.compile(toSupplementaries("a|ad")); 2249 m = p.matcher(toSupplementaries("ad")); 2250 assertTrue(m.matches()); 2251 } 2252 2253 /** 2254 * A basic sanity test of Pattern.matches(). 2255 */ 2256 @Test patternMatchesTest()2257 public static void patternMatchesTest() { 2258 // matches() 2259 assertTrue(Pattern.matches(toSupplementaries("ulb(c*)"), 2260 toSupplementaries("ulbcccccc"))); 2261 2262 // find() but not matches() 2263 assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"), 2264 toSupplementaries("zzzulbcccccc"))); 2265 2266 // lookingAt() but not matches() 2267 assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"), 2268 toSupplementaries("ulbccccccdef"))); 2269 2270 // Supplementary character test 2271 // matches() 2272 assertTrue(Pattern.matches(toSupplementaries("ulb(c*)"), 2273 toSupplementaries("ulbcccccc"))); 2274 2275 // find() but not matches() 2276 assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"), 2277 toSupplementaries("zzzulbcccccc"))); 2278 2279 // lookingAt() but not matches() 2280 assertFalse(Pattern.matches(toSupplementaries("ulb(c*)"), 2281 toSupplementaries("ulbccccccdef"))); 2282 } 2283 2284 /** 2285 * Canonical equivalence testing. Tests the ability of the engine 2286 * to match sequences that are not explicitly specified in the 2287 * pattern when they are considered equivalent by the Unicode Standard. 2288 */ 2289 // Android-changed: Disable the test because CANON_EQ isn't supported on Android. 2290 @Test(enabled = false) ceTest()2291 public static void ceTest() { 2292 // Decomposed char outside char classes 2293 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2294 Matcher m = p.matcher("test\u00e5"); 2295 assertTrue(m.matches()); 2296 2297 m.reset("testa\u030a"); 2298 assertTrue(m.matches()); 2299 2300 // Composed char outside char classes 2301 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2302 m = p.matcher("test\u00e5"); 2303 assertTrue(m.matches()); 2304 2305 m.reset("testa\u030a"); 2306 assertTrue(m.find()); 2307 2308 // Decomposed char inside a char class 2309 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2310 m = p.matcher("test\u00e5"); 2311 assertTrue(m.find()); 2312 2313 m.reset("testa\u030a"); 2314 assertTrue(m.find()); 2315 2316 // Composed char inside a char class 2317 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2318 m = p.matcher("test\u00e5"); 2319 assertTrue(m.find()); 2320 2321 m.reset("testa\u0300"); 2322 assertTrue(m.find()); 2323 2324 m.reset("testa\u030a"); 2325 assertTrue(m.find()); 2326 2327 // Marks that cannot legally change order and be equivalent 2328 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2329 check(p, "testa\u0308\u0300", true); 2330 check(p, "testa\u0300\u0308", false); 2331 2332 // Marks that can legally change order and be equivalent 2333 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2334 check(p, "testa\u0308\u0323", true); 2335 check(p, "testa\u0323\u0308", true); 2336 2337 // Test all equivalences of the sequence a\u0308\u0323\u0300 2338 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2339 check(p, "testa\u0308\u0323\u0300", true); 2340 check(p, "testa\u0323\u0308\u0300", true); 2341 check(p, "testa\u0308\u0300\u0323", true); 2342 check(p, "test\u00e4\u0323\u0300", true); 2343 check(p, "test\u00e4\u0300\u0323", true); 2344 2345 Object[][] data = new Object[][] { 2346 2347 // JDK-4867170 2348 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2349 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2350 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2351 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2352 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2353 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2354 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2355 2356 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2357 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2358 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2359 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2360 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2361 2362 // backtracking, force to match "\u1f80", instead of \u1f82" 2363 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2364 2365 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2366 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2367 2368 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2369 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2370 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2371 2372 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2373 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2374 { "\u1f80", "ab\u1f80cd", "f", true }, 2375 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2376 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2377 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2378 { "\u1f82", "\u1f80\u0300", "m", true }, 2379 2380 // JDK-7080302 # compile failed 2381 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2382 2383 // JDK-6728861, same cause as above one 2384 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2385 2386 // JDK-6995635 2387 { "(\u00e9)", "e\u0301", "m", true }, 2388 2389 // JDK-6736245 2390 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2391 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2392 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2393 2394 // 4916384. 2395 // Decomposed hangul (jamos) works inside clazz 2396 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2397 { "[\u1100\u1161]", "\uac00", "m", true}, 2398 2399 { "[\uac00]", "\u1100\u1161", "m", true}, 2400 { "[\uac00]", "\uac00", "m", true}, 2401 2402 // Decomposed hangul (jamos) 2403 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2404 { "\u1100\u1161", "\uac00", "m", true}, 2405 2406 // Composed hangul 2407 { "\uac00", "\u1100\u1161", "m", true }, 2408 { "\uac00", "\uac00", "m", true }, 2409 2410 /* Need a NFDSlice to nfd the source to solve this issue 2411 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2412 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2413 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2414 2415 // Decomposed supplementary outside char classes 2416 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2417 // Composed supplementary outside char classes 2418 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2419 */ 2420 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2421 //{ "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, //problem 2422 2423 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2424 //{ "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, //problem 2425 }; 2426 2427 for (Object[] d : data) { 2428 String pn = (String)d[0]; 2429 String tt = (String)d[1]; 2430 boolean isFind = "f".equals((d[2])); 2431 boolean expected = (boolean)d[3]; 2432 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2433 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2434 if (ret != expected) { 2435 fail("pn: " + pn + "\ntt: " + tt + "\nexpected: " + expected + "\nret: " + ret); 2436 } 2437 } 2438 } 2439 2440 /** 2441 * A basic sanity test of Matcher.replaceAll(). 2442 */ 2443 @Test globalSubstitute()2444 public static void globalSubstitute() { 2445 // Global substitution with a literal 2446 Pattern p = Pattern.compile("(ab)(c*)"); 2447 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2448 assertEquals(m.replaceAll("test"), "testzzztestzzztest"); 2449 2450 m.reset("zzzabccczzzabcczzzabccczzz"); 2451 assertEquals(m.replaceAll("test"), "zzztestzzztestzzztestzzz"); 2452 2453 // Global substitution with groups 2454 m.reset("zzzabccczzzabcczzzabccczzz"); 2455 String result = m.replaceAll("$1"); 2456 assertEquals(result, "zzzabzzzabzzzabzzz"); 2457 2458 // Supplementary character test 2459 // Global substitution with a literal 2460 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2461 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2462 assertEquals(m.replaceAll(toSupplementaries("test")), 2463 toSupplementaries("testzzztestzzztest")); 2464 2465 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2466 assertEquals(m.replaceAll(toSupplementaries("test")), 2467 toSupplementaries("zzztestzzztestzzztestzzz")); 2468 2469 // Global substitution with groups 2470 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2471 result = m.replaceAll("$1"); 2472 assertEquals(result,toSupplementaries("zzzabzzzabzzzabzzz")); 2473 } 2474 2475 /** 2476 * Tests the usage of Matcher.appendReplacement() with literal 2477 * and group substitutions. 2478 */ 2479 @Test stringBufferSubstituteLiteral()2480 public static void stringBufferSubstituteLiteral() { 2481 // SB substitution with literal 2482 final String blah = "zzzblahzzz"; 2483 final Pattern p = Pattern.compile("blah"); 2484 final Matcher m = p.matcher(blah); 2485 final StringBuffer result = new StringBuffer(); 2486 2487 assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "blech")); 2488 2489 m.find(); 2490 m.appendReplacement(result, "blech"); 2491 assertEquals(result.toString(), "zzzblech"); 2492 2493 m.appendTail(result); 2494 assertEquals(result.toString(), "zzzblechzzz"); 2495 2496 } 2497 2498 @Test stringBufferSubtituteWithGroups()2499 public static void stringBufferSubtituteWithGroups() { 2500 // SB substitution with groups 2501 final String blah = "zzzabcdzzz"; 2502 final Pattern p = Pattern.compile("(ab)(cd)*"); 2503 final Matcher m = p.matcher(blah); 2504 final StringBuffer result = new StringBuffer(); 2505 assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "$1")); 2506 m.find(); 2507 m.appendReplacement(result, "$1"); 2508 assertEquals(result.toString(), "zzzab"); 2509 2510 m.appendTail(result); 2511 assertEquals(result.toString(), "zzzabzzz"); 2512 } 2513 2514 @Test stringBufferThreeSubstitution()2515 public static void stringBufferThreeSubstitution() { 2516 // SB substitution with 3 groups 2517 final String blah = "zzzabcdcdefzzz"; 2518 final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); 2519 final Matcher m = p.matcher(blah); 2520 final StringBuffer result = new StringBuffer(); 2521 assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "$1w$2w$3")); 2522 m.find(); 2523 m.appendReplacement(result, "$1w$2w$3"); 2524 assertEquals(result.toString(), "zzzabwcdwef"); 2525 2526 m.appendTail(result); 2527 assertEquals(result.toString(), "zzzabwcdwefzzz"); 2528 2529 } 2530 2531 @Test stringBufferSubstituteGroupsThreeMatches()2532 public static void stringBufferSubstituteGroupsThreeMatches() { 2533 // SB substitution with groups and three matches 2534 // skipping middle match 2535 final String blah = "zzzabcdzzzabcddzzzabcdzzz"; 2536 final Pattern p = Pattern.compile("(ab)(cd*)"); 2537 final Matcher m = p.matcher(blah); 2538 final StringBuffer result = new StringBuffer(); 2539 assertThrows(IllegalStateException.class, () -> m.appendReplacement(result, "$1")); 2540 2541 m.find(); 2542 m.appendReplacement(result, "$1"); 2543 assertEquals(result.toString(), "zzzab"); 2544 2545 m.find(); 2546 m.find(); 2547 m.appendReplacement(result, "$2"); 2548 assertEquals(result.toString(), "zzzabzzzabcddzzzcd"); 2549 2550 m.appendTail(result); 2551 assertEquals(result.toString(), "zzzabzzzabcddzzzcdzzz"); 2552 2553 2554 } 2555 2556 @Test stringBufferEscapedDollar()2557 public static void stringBufferEscapedDollar() { 2558 // Check to make sure escaped $ is ignored 2559 String blah = "zzzabcdcdefzzz"; 2560 Pattern p = Pattern.compile("(ab)(cd)*(ef)"); 2561 Matcher m = p.matcher(blah); 2562 StringBuffer result = new StringBuffer(); 2563 m.find(); 2564 m.appendReplacement(result, "$1w\\$2w$3"); 2565 assertEquals(result.toString(), "zzzabw$2wef"); 2566 2567 m.appendTail(result); 2568 assertEquals(result.toString(), "zzzabw$2wefzzz"); 2569 } 2570 2571 @Test stringBufferNonExistentGroup()2572 public static void stringBufferNonExistentGroup() { 2573 // Check to make sure a reference to nonexistent group causes error 2574 final String blah = "zzzabcdcdefzzz"; 2575 final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); 2576 final Matcher m = p.matcher(blah); 2577 final StringBuffer result = new StringBuffer(); 2578 m.find(); 2579 assertThrows(IndexOutOfBoundsException.class, 2580 () -> m.appendReplacement(result, "$1w$5w$3")); 2581 } 2582 2583 @Test stringBufferCheckDoubleDigitGroupReferences()2584 public static void stringBufferCheckDoubleDigitGroupReferences() { 2585 2586 // Check double digit group references 2587 String blah = "zzz123456789101112zzz"; 2588 Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2589 Matcher m = p.matcher(blah); 2590 StringBuffer result = new StringBuffer(); 2591 m.find(); 2592 m.appendReplacement(result, "$1w$11w$3"); 2593 assertEquals(result.toString(), "zzz1w11w3"); 2594 2595 } 2596 2597 @Test stringBufferBackoff()2598 public static void stringBufferBackoff() { 2599 // Check to make sure it backs off $15 to $1 if only three groups 2600 String blah = "zzzabcdcdefzzz"; 2601 Pattern p = Pattern.compile("(ab)(cd)*(ef)"); 2602 Matcher m = p.matcher(blah); 2603 StringBuffer result = new StringBuffer(); 2604 m.find(); 2605 m.appendReplacement(result, "$1w$15w$3"); 2606 assertEquals(result.toString(), "zzzabwab5wef"); 2607 } 2608 2609 @Test stringBufferSupplementaryCharacter()2610 public static void stringBufferSupplementaryCharacter(){ 2611 // Supplementary character test 2612 // SB substitution with literal 2613 final String blah = toSupplementaries("zzzblahzzz"); 2614 final Pattern p = Pattern.compile(toSupplementaries("blah")); 2615 final Matcher m = p.matcher(blah); 2616 final StringBuffer result = new StringBuffer(); 2617 assertThrows(IllegalStateException.class, 2618 () -> m.appendReplacement(result, toSupplementaries("blech"))); 2619 m.find(); 2620 m.appendReplacement(result, toSupplementaries("blech")); 2621 assertEquals(result.toString(), toSupplementaries("zzzblech")); 2622 2623 m.appendTail(result); 2624 assertEquals(result.toString(), toSupplementaries("zzzblechzzz")); 2625 } 2626 2627 @Test stringBufferSubstitutionWithGroups()2628 public static void stringBufferSubstitutionWithGroups() { 2629 // SB substitution with groups 2630 final String blah = toSupplementaries("zzzabcdzzz"); 2631 final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2632 final Matcher m = p.matcher(blah); 2633 final StringBuffer result = new StringBuffer(); 2634 assertThrows(IllegalStateException.class, 2635 () -> m.appendReplacement(result, "$1")); 2636 m.find(); 2637 m.appendReplacement(result, "$1"); 2638 assertEquals(result.toString(), toSupplementaries("zzzab")); 2639 2640 m.appendTail(result); 2641 assertEquals(result.toString(), toSupplementaries("zzzabzzz")); 2642 } 2643 2644 @Test stringBufferSubstituteWithThreeGroups()2645 public static void stringBufferSubstituteWithThreeGroups() { 2646 // SB substitution with 3 groups 2647 final String blah = toSupplementaries("zzzabcdcdefzzz"); 2648 final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2649 final Matcher m = p.matcher(blah); 2650 final StringBuffer result = new StringBuffer(); 2651 assertThrows(IllegalStateException.class, 2652 () -> m.appendReplacement(result, toSupplementaries("$1w$2w$3"))); 2653 2654 m.find(); 2655 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2656 assertEquals(result.toString(), toSupplementaries("zzzabwcdwef")); 2657 2658 m.appendTail(result); 2659 assertEquals(result.toString(), toSupplementaries("zzzabwcdwefzzz")); 2660 } 2661 2662 @Test stringBufferWithGroupsAndThreeMatches()2663 public static void stringBufferWithGroupsAndThreeMatches() { 2664 // SB substitution with groups and three matches 2665 // skipping middle match 2666 final String blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2667 final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2668 final Matcher m = p.matcher(blah); 2669 final StringBuffer result = new StringBuffer(); 2670 assertThrows(IllegalStateException.class, () -> 2671 m.appendReplacement(result, "$1")); 2672 2673 m.find(); 2674 m.appendReplacement(result, "$1"); 2675 assertEquals(result.toString(), toSupplementaries("zzzab")); 2676 2677 m.find(); 2678 m.find(); 2679 m.appendReplacement(result, "$2"); 2680 assertEquals(result.toString(), toSupplementaries("zzzabzzzabcddzzzcd")); 2681 2682 m.appendTail(result); 2683 assertEquals(result.toString(), toSupplementaries("zzzabzzzabcddzzzcdzzz")); 2684 } 2685 2686 @Test stringBufferEnsureDollarIgnored()2687 public static void stringBufferEnsureDollarIgnored() { 2688 // Check to make sure escaped $ is ignored 2689 String blah = toSupplementaries("zzzabcdcdefzzz"); 2690 Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2691 Matcher m = p.matcher(blah); 2692 StringBuffer result = new StringBuffer(); 2693 m.find(); 2694 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2695 assertEquals(result.toString(), toSupplementaries("zzzabw$2wef")); 2696 2697 m.appendTail(result); 2698 assertEquals(result.toString(), toSupplementaries("zzzabw$2wefzzz")); 2699 } 2700 2701 @Test stringBufferCheckNonexistentGroupReference()2702 public static void stringBufferCheckNonexistentGroupReference() { 2703 // Check to make sure a reference to nonexistent group causes error 2704 final String blah = toSupplementaries("zzzabcdcdefzzz"); 2705 final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2706 final Matcher m = p.matcher(blah); 2707 final StringBuffer result = new StringBuffer(); 2708 m.find(); 2709 assertThrows(IndexOutOfBoundsException.class, () -> 2710 m.appendReplacement(result, toSupplementaries("$1w$5w$3"))); 2711 } 2712 2713 @Test stringBufferCheckSupplementalDoubleDigitGroupReferences()2714 public static void stringBufferCheckSupplementalDoubleDigitGroupReferences() { 2715 // Check double digit group references 2716 String blah = toSupplementaries("zzz123456789101112zzz"); 2717 Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2718 Matcher m = p.matcher(blah); 2719 StringBuffer result = new StringBuffer(); 2720 m.find(); 2721 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 2722 assertEquals(result.toString(), toSupplementaries("zzz1w11w3")); 2723 } 2724 2725 @Test stringBufferBackoffSupplemental()2726 public static void stringBufferBackoffSupplemental() { 2727 // Check to make sure it backs off $15 to $1 if only three groups 2728 String blah = toSupplementaries("zzzabcdcdefzzz"); 2729 Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2730 Matcher m = p.matcher(blah); 2731 StringBuffer result = new StringBuffer(); 2732 m.find(); 2733 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 2734 assertEquals(result.toString(), toSupplementaries("zzzabwab5wef")); 2735 } 2736 2737 // Android-changed: Disable stringBufferCheckAppendException() test due to app compat behavior. 2738 // @Test stringBufferCheckAppendException()2739 public static void stringBufferCheckAppendException() { 2740 // Check nothing has been appended into the output buffer if 2741 // the replacement string triggers IllegalArgumentException. 2742 Pattern p = Pattern.compile("(abc)"); 2743 Matcher m = p.matcher("abcd"); 2744 StringBuffer result = new StringBuffer(); 2745 m.find(); 2746 expectThrows(IllegalArgumentException.class, 2747 () -> m.appendReplacement(result, ("xyz$g"))); 2748 assertEquals(result.length(), 0); 2749 2750 } 2751 /** 2752 * Tests the usage of Matcher.appendReplacement() with literal 2753 * and group substitutions. 2754 */ 2755 @Test stringBuilderSubstitutionWithLiteral()2756 public static void stringBuilderSubstitutionWithLiteral() { 2757 // SB substitution with literal 2758 final String blah = "zzzblahzzz"; 2759 final Pattern p = Pattern.compile("blah"); 2760 final Matcher m = p.matcher(blah); 2761 final StringBuilder result = new StringBuilder(); 2762 assertThrows(IllegalStateException.class, () -> 2763 m.appendReplacement(result, "blech")); 2764 2765 m.find(); 2766 m.appendReplacement(result, "blech"); 2767 assertEquals(result.toString(), "zzzblech"); 2768 2769 m.appendTail(result); 2770 assertEquals(result.toString(), "zzzblechzzz"); 2771 } 2772 2773 @Test stringBuilderSubstitutionWithGroups()2774 public static void stringBuilderSubstitutionWithGroups() { 2775 // SB substitution with groups 2776 final String blah = "zzzabcdzzz"; 2777 final Pattern p = Pattern.compile("(ab)(cd)*"); 2778 final Matcher m = p.matcher(blah); 2779 final StringBuilder result = new StringBuilder(); 2780 assertThrows(IllegalStateException.class, () -> 2781 m.appendReplacement(result, "$1")); 2782 m.find(); 2783 m.appendReplacement(result, "$1"); 2784 assertEquals(result.toString(), "zzzab"); 2785 2786 m.appendTail(result); 2787 assertEquals(result.toString(), "zzzabzzz"); 2788 } 2789 2790 @Test stringBuilderSubstitutionWithThreeGroups()2791 public static void stringBuilderSubstitutionWithThreeGroups() { 2792 // SB substitution with 3 groups 2793 final String blah = "zzzabcdcdefzzz"; 2794 final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); 2795 final Matcher m = p.matcher(blah); 2796 final StringBuilder result = new StringBuilder(); 2797 assertThrows(IllegalStateException.class, () -> 2798 m.appendReplacement(result, "$1w$2w$3")); 2799 2800 m.find(); 2801 m.appendReplacement(result, "$1w$2w$3"); 2802 assertEquals(result.toString(), "zzzabwcdwef"); 2803 2804 m.appendTail(result); 2805 assertEquals(result.toString(), "zzzabwcdwefzzz"); 2806 } 2807 2808 @Test stringBuilderSubstitutionThreeMatch()2809 public static void stringBuilderSubstitutionThreeMatch() { 2810 // SB substitution with groups and three matches 2811 // skipping middle match 2812 final String blah = "zzzabcdzzzabcddzzzabcdzzz"; 2813 final Pattern p = Pattern.compile("(ab)(cd*)"); 2814 final Matcher m = p.matcher(blah); 2815 final StringBuilder result = new StringBuilder(); 2816 assertThrows(IllegalStateException.class, () -> 2817 m.appendReplacement(result, "$1")); 2818 m.find(); 2819 m.appendReplacement(result, "$1"); 2820 assertEquals(result.toString(), "zzzab"); 2821 2822 m.find(); 2823 m.find(); 2824 m.appendReplacement(result, "$2"); 2825 assertEquals(result.toString(), "zzzabzzzabcddzzzcd"); 2826 2827 m.appendTail(result); 2828 assertEquals(result.toString(), "zzzabzzzabcddzzzcdzzz"); 2829 } 2830 2831 @Test stringBuilderSubtituteCheckEscapedDollar()2832 public static void stringBuilderSubtituteCheckEscapedDollar() { 2833 // Check to make sure escaped $ is ignored 2834 final String blah = "zzzabcdcdefzzz"; 2835 final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); 2836 final Matcher m = p.matcher(blah); 2837 final StringBuilder result = new StringBuilder(); 2838 m.find(); 2839 m.appendReplacement(result, "$1w\\$2w$3"); 2840 assertEquals(result.toString(), "zzzabw$2wef"); 2841 2842 m.appendTail(result); 2843 assertEquals(result.toString(), "zzzabw$2wefzzz"); 2844 } 2845 2846 @Test stringBuilderNonexistentGroupError()2847 public static void stringBuilderNonexistentGroupError() { 2848 // Check to make sure a reference to nonexistent group causes error 2849 final String blah = "zzzabcdcdefzzz"; 2850 final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); 2851 final Matcher m = p.matcher(blah); 2852 final StringBuilder result = new StringBuilder(); 2853 m.find(); 2854 assertThrows(IndexOutOfBoundsException.class, () -> 2855 m.appendReplacement(result, "$1w$5w$3")); 2856 } 2857 2858 @Test stringBuilderDoubleDigitGroupReferences()2859 public static void stringBuilderDoubleDigitGroupReferences() { 2860 // Check double digit group references 2861 final String blah = "zzz123456789101112zzz"; 2862 final Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2863 final Matcher m = p.matcher(blah); 2864 final StringBuilder result = new StringBuilder(); 2865 m.find(); 2866 m.appendReplacement(result, "$1w$11w$3"); 2867 assertEquals(result.toString(), "zzz1w11w3"); 2868 } 2869 2870 @Test stringBuilderCheckBackoff()2871 public static void stringBuilderCheckBackoff() { 2872 // Check to make sure it backs off $15 to $1 if only three groups 2873 final String blah = "zzzabcdcdefzzz"; 2874 final Pattern p = Pattern.compile("(ab)(cd)*(ef)"); 2875 final Matcher m = p.matcher(blah); 2876 final StringBuilder result = new StringBuilder(); 2877 m.find(); 2878 m.appendReplacement(result, "$1w$15w$3"); 2879 assertEquals(result.toString(), "zzzabwab5wef"); 2880 } 2881 2882 @Test stringBuilderSupplementalLiteralSubstitution()2883 public static void stringBuilderSupplementalLiteralSubstitution() { 2884 // Supplementary character test 2885 // SB substitution with literal 2886 final String blah = toSupplementaries("zzzblahzzz"); 2887 final Pattern p = Pattern.compile(toSupplementaries("blah")); 2888 final Matcher m = p.matcher(blah); 2889 final StringBuilder result = new StringBuilder(); 2890 assertThrows(IllegalStateException.class, 2891 () -> m.appendReplacement(result, toSupplementaries("blech"))); 2892 m.find(); 2893 m.appendReplacement(result, toSupplementaries("blech")); 2894 assertEquals(result.toString(), toSupplementaries("zzzblech")); 2895 m.appendTail(result); 2896 assertEquals(result.toString(), toSupplementaries("zzzblechzzz")); 2897 } 2898 2899 @Test stringBuilderSupplementalSubstitutionWithGroups()2900 public static void stringBuilderSupplementalSubstitutionWithGroups() { 2901 // SB substitution with groups 2902 final String blah = toSupplementaries("zzzabcdzzz"); 2903 final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2904 final Matcher m = p.matcher(blah); 2905 final StringBuilder result = new StringBuilder(); 2906 assertThrows(IllegalStateException.class, 2907 () -> m.appendReplacement(result, "$1")); 2908 m.find(); 2909 m.appendReplacement(result, "$1"); 2910 assertEquals(result.toString(), toSupplementaries("zzzab")); 2911 2912 m.appendTail(result); 2913 assertEquals(result.toString(), toSupplementaries("zzzabzzz")); 2914 } 2915 2916 @Test stringBuilderSupplementalSubstitutionThreeGroups()2917 public static void stringBuilderSupplementalSubstitutionThreeGroups() { 2918 // SB substitution with 3 groups 2919 final String blah = toSupplementaries("zzzabcdcdefzzz"); 2920 final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2921 final Matcher m = p.matcher(blah); 2922 final StringBuilder result = new StringBuilder(); 2923 assertThrows(IllegalStateException.class, () -> 2924 m.appendReplacement(result, toSupplementaries("$1w$2w$3"))); 2925 m.find(); 2926 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2927 assertEquals(result.toString(), toSupplementaries("zzzabwcdwef")); 2928 2929 m.appendTail(result); 2930 assertEquals(result.toString(), toSupplementaries("zzzabwcdwefzzz")); 2931 } 2932 2933 @Test stringBuilderSubstitutionSupplementalSkipMiddleThreeMatch()2934 public static void stringBuilderSubstitutionSupplementalSkipMiddleThreeMatch() { 2935 // SB substitution with groups and three matches 2936 // skipping middle match 2937 final String blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2938 final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2939 final Matcher m = p.matcher(blah); 2940 final StringBuilder result = new StringBuilder(); 2941 assertThrows(IllegalStateException.class, () -> 2942 m.appendReplacement(result, "$1")); 2943 m.find(); 2944 m.appendReplacement(result, "$1"); 2945 assertEquals(result.toString(), toSupplementaries("zzzab")); 2946 2947 m.find(); 2948 m.find(); 2949 m.appendReplacement(result, "$2"); 2950 assertEquals(result.toString(), toSupplementaries("zzzabzzzabcddzzzcd")); 2951 2952 m.appendTail(result); 2953 assertEquals(result.toString(), toSupplementaries("zzzabzzzabcddzzzcdzzz")); 2954 } 2955 2956 @Test stringBuilderSupplementalEscapedDollar()2957 public static void stringBuilderSupplementalEscapedDollar() { 2958 // Check to make sure escaped $ is ignored 2959 final String blah = toSupplementaries("zzzabcdcdefzzz"); 2960 final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2961 final Matcher m = p.matcher(blah); 2962 final StringBuilder result = new StringBuilder(); 2963 m.find(); 2964 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2965 assertEquals(result.toString(), toSupplementaries("zzzabw$2wef")); 2966 2967 m.appendTail(result); 2968 assertEquals(result.toString(), toSupplementaries("zzzabw$2wefzzz")); 2969 } 2970 2971 @Test stringBuilderSupplementalNonExistentGroupError()2972 public static void stringBuilderSupplementalNonExistentGroupError() { 2973 // Check to make sure a reference to nonexistent group causes error 2974 final String blah = toSupplementaries("zzzabcdcdefzzz"); 2975 final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2976 final Matcher m = p.matcher(blah); 2977 final StringBuilder result = new StringBuilder(); 2978 m.find(); 2979 assertThrows(IndexOutOfBoundsException.class, () -> 2980 m.appendReplacement(result, toSupplementaries("$1w$5w$3"))); 2981 } 2982 2983 @Test stringBuilderSupplementalCheckDoubleDigitGroupReferences()2984 public static void stringBuilderSupplementalCheckDoubleDigitGroupReferences() { 2985 // Check double digit group references 2986 final String blah = toSupplementaries("zzz123456789101112zzz"); 2987 final Pattern p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2988 final Matcher m = p.matcher(blah); 2989 final StringBuilder result = new StringBuilder(); 2990 m.find(); 2991 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 2992 assertEquals(result.toString(), toSupplementaries("zzz1w11w3")); 2993 } 2994 2995 @Test stringBuilderSupplementalCheckBackoff()2996 public static void stringBuilderSupplementalCheckBackoff() { 2997 // Check to make sure it backs off $15 to $1 if only three groups 2998 final String blah = toSupplementaries("zzzabcdcdefzzz"); 2999 final Pattern p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3000 final Matcher m = p.matcher(blah); 3001 final StringBuilder result = new StringBuilder(); 3002 m.find(); 3003 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3004 assertEquals(result.toString(), toSupplementaries("zzzabwab5wef")); 3005 } 3006 3007 @Test stringBuilderCheckIllegalArgumentException()3008 public static void stringBuilderCheckIllegalArgumentException() { 3009 // Check nothing has been appended into the output buffer if 3010 // the replacement string triggers IllegalArgumentException. 3011 final Pattern p = Pattern.compile("(abc)"); 3012 final Matcher m = p.matcher("abcd"); 3013 final StringBuilder result = new StringBuilder(); 3014 m.find(); 3015 // Android-removed: Our implementation is more lenient than upstream. 3016 // assertThrows(IllegalArgumentException.class, () -> 3017 // m.appendReplacement(result, ("xyz$g"))); 3018 // assertEquals(result.length(), 0); 3019 } 3020 3021 /* 3022 * 5 groups of characters are created to make a substitution string. 3023 * A base string will be created including random lead chars, the 3024 * substitution string, and random trailing chars. 3025 * A pattern containing the 5 groups is searched for and replaced with: 3026 * random group + random string + random group. 3027 * The results are checked for correctness. 3028 */ 3029 @Test substitutionBasher()3030 public static void substitutionBasher() { 3031 for (int runs = 0; runs<1000; runs++) { 3032 // Create a base string to work in 3033 int leadingChars = generator.nextInt(10); 3034 StringBuilder baseBuffer = new StringBuilder(100); 3035 String leadingString = getRandomAlphaString(leadingChars); 3036 baseBuffer.append(leadingString); 3037 3038 // Create 5 groups of random number of random chars 3039 // Create the string to substitute 3040 // Create the pattern string to search for 3041 StringBuilder bufferToSub = new StringBuilder(25); 3042 StringBuilder bufferToPat = new StringBuilder(50); 3043 String[] groups = new String[5]; 3044 for(int i=0; i<5; i++) { 3045 int aGroupSize = generator.nextInt(5)+1; 3046 groups[i] = getRandomAlphaString(aGroupSize); 3047 bufferToSub.append(groups[i]); 3048 bufferToPat.append('('); 3049 bufferToPat.append(groups[i]); 3050 bufferToPat.append(')'); 3051 } 3052 String stringToSub = bufferToSub.toString(); 3053 String pattern = bufferToPat.toString(); 3054 3055 // Place sub string into working string at random index 3056 baseBuffer.append(stringToSub); 3057 3058 // Append random chars to end 3059 int trailingChars = generator.nextInt(10); 3060 String trailingString = getRandomAlphaString(trailingChars); 3061 baseBuffer.append(trailingString); 3062 String baseString = baseBuffer.toString(); 3063 3064 // Create test pattern and matcher 3065 Pattern p = Pattern.compile(pattern); 3066 Matcher m = p.matcher(baseString); 3067 3068 // Reject candidate if pattern happens to start early 3069 m.find(); 3070 if (m.start() < leadingChars) 3071 continue; 3072 3073 // Reject candidate if more than one match 3074 if (m.find()) 3075 continue; 3076 3077 // Construct a replacement string with : 3078 // random group + random string + random group 3079 StringBuilder bufferToRep = new StringBuilder(); 3080 int groupIndex1 = generator.nextInt(5); 3081 bufferToRep.append("$").append(groupIndex1 + 1); 3082 String randomMidString = getRandomAlphaString(5); 3083 bufferToRep.append(randomMidString); 3084 int groupIndex2 = generator.nextInt(5); 3085 bufferToRep.append("$").append(groupIndex2 + 1); 3086 String replacement = bufferToRep.toString(); 3087 3088 // Do the replacement 3089 String result = m.replaceAll(replacement); 3090 3091 // Construct expected result 3092 String expectedResult = leadingString + 3093 groups[groupIndex1] + 3094 randomMidString + 3095 groups[groupIndex2] + 3096 trailingString; 3097 3098 // Check results 3099 assertEquals(result, expectedResult); 3100 } 3101 } 3102 3103 /* 3104 * 5 groups of characters are created to make a substitution string. 3105 * A base string will be created including random lead chars, the 3106 * substitution string, and random trailing chars. 3107 * A pattern containing the 5 groups is searched for and replaced with: 3108 * random group + random string + random group. 3109 * The results are checked for correctness. 3110 */ 3111 @Test substitutionBasher2()3112 public static void substitutionBasher2() { 3113 for (int runs = 0; runs<1000; runs++) { 3114 // Create a base string to work in 3115 int leadingChars = generator.nextInt(10); 3116 StringBuilder baseBuffer = new StringBuilder(100); 3117 String leadingString = getRandomAlphaString(leadingChars); 3118 baseBuffer.append(leadingString); 3119 3120 // Create 5 groups of random number of random chars 3121 // Create the string to substitute 3122 // Create the pattern string to search for 3123 StringBuilder bufferToSub = new StringBuilder(25); 3124 StringBuilder bufferToPat = new StringBuilder(50); 3125 String[] groups = new String[5]; 3126 for(int i=0; i<5; i++) { 3127 int aGroupSize = generator.nextInt(5)+1; 3128 groups[i] = getRandomAlphaString(aGroupSize); 3129 bufferToSub.append(groups[i]); 3130 bufferToPat.append('('); 3131 bufferToPat.append(groups[i]); 3132 bufferToPat.append(')'); 3133 } 3134 String stringToSub = bufferToSub.toString(); 3135 String pattern = bufferToPat.toString(); 3136 3137 // Place sub string into working string at random index 3138 baseBuffer.append(stringToSub); 3139 3140 // Append random chars to end 3141 int trailingChars = generator.nextInt(10); 3142 String trailingString = getRandomAlphaString(trailingChars); 3143 baseBuffer.append(trailingString); 3144 String baseString = baseBuffer.toString(); 3145 3146 // Create test pattern and matcher 3147 Pattern p = Pattern.compile(pattern); 3148 Matcher m = p.matcher(baseString); 3149 3150 // Reject candidate if pattern happens to start early 3151 m.find(); 3152 if (m.start() < leadingChars) 3153 continue; 3154 3155 // Reject candidate if more than one match 3156 if (m.find()) 3157 continue; 3158 3159 // Construct a replacement string with : 3160 // random group + random string + random group 3161 StringBuilder bufferToRep = new StringBuilder(); 3162 int groupIndex1 = generator.nextInt(5); 3163 bufferToRep.append("$").append(groupIndex1 + 1); 3164 String randomMidString = getRandomAlphaString(5); 3165 bufferToRep.append(randomMidString); 3166 int groupIndex2 = generator.nextInt(5); 3167 bufferToRep.append("$").append(groupIndex2 + 1); 3168 String replacement = bufferToRep.toString(); 3169 3170 // Do the replacement 3171 String result = m.replaceAll(replacement); 3172 3173 // Construct expected result 3174 String expectedResult = leadingString + 3175 groups[groupIndex1] + 3176 randomMidString + 3177 groups[groupIndex2] + 3178 trailingString; 3179 3180 // Check results 3181 assertEquals(result, expectedResult); 3182 } 3183 } 3184 3185 /** 3186 * Checks the handling of some escape sequences that the Pattern 3187 * class should process instead of the java compiler. These are 3188 * not in the file because the escapes should be be processed 3189 * by the Pattern class when the regex is compiled. 3190 */ 3191 @Test escapes()3192 public static void escapes() { 3193 Pattern p = Pattern.compile("\\043"); 3194 Matcher m = p.matcher("#"); 3195 assertTrue(m.find()); 3196 3197 p = Pattern.compile("\\x23"); 3198 m = p.matcher("#"); 3199 assertTrue(m.find()); 3200 3201 p = Pattern.compile("\\u0023"); 3202 m = p.matcher("#"); 3203 assertTrue(m.find()); 3204 } 3205 3206 /** 3207 * Checks the handling of blank input situations. These 3208 * tests are incompatible with my test file format. 3209 */ 3210 @Test blankInput()3211 public static void blankInput() { 3212 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3213 Matcher m = p.matcher(""); 3214 assertFalse(m.find()); 3215 3216 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3217 m = p.matcher(""); 3218 assertTrue(m.find()); 3219 3220 p = Pattern.compile("abc"); 3221 m = p.matcher(""); 3222 assertFalse(m.find()); 3223 3224 p = Pattern.compile("a*"); 3225 m = p.matcher(""); 3226 assertTrue(m.find()); 3227 } 3228 3229 /** 3230 * Tests the Boyer-Moore pattern matching of a character sequence 3231 * on randomly generated patterns. 3232 */ 3233 @Test bm()3234 public static void bm() { 3235 doBnM('a'); 3236 3237 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3238 } 3239 doBnM(int baseCharacter)3240 private static void doBnM(int baseCharacter) { 3241 for (int i=0; i<100; i++) { 3242 // Create a short pattern to search for 3243 int patternLength = generator.nextInt(7) + 4; 3244 StringBuilder patternBuffer = new StringBuilder(patternLength); 3245 String pattern; 3246 retry: for (;;) { 3247 for (int x=0; x<patternLength; x++) { 3248 int ch = baseCharacter + generator.nextInt(26); 3249 if (Character.isSupplementaryCodePoint(ch)) { 3250 patternBuffer.append(Character.toChars(ch)); 3251 } else { 3252 patternBuffer.append((char)ch); 3253 } 3254 } 3255 pattern = patternBuffer.toString(); 3256 3257 // Avoid patterns that start and end with the same substring 3258 // See JDK-6854417 3259 for (int x=1; x < pattern.length(); x++) { 3260 if (pattern.startsWith(pattern.substring(x))) 3261 continue retry; 3262 } 3263 break; 3264 } 3265 Pattern p = Pattern.compile(pattern); 3266 3267 // Create a buffer with random ASCII chars that does 3268 // not match the sample 3269 String toSearch; 3270 StringBuffer s; 3271 Matcher m = p.matcher(""); 3272 do { 3273 s = new StringBuffer(100); 3274 for (int x=0; x<100; x++) { 3275 int ch = baseCharacter + generator.nextInt(26); 3276 if (Character.isSupplementaryCodePoint(ch)) { 3277 s.append(Character.toChars(ch)); 3278 } else { 3279 s.append((char)ch); 3280 } 3281 } 3282 toSearch = s.toString(); 3283 m.reset(toSearch); 3284 } while (m.find()); 3285 3286 // Insert the pattern at a random spot 3287 int insertIndex = generator.nextInt(99); 3288 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3289 insertIndex++; 3290 s.insert(insertIndex, pattern); 3291 toSearch = s.toString(); 3292 3293 // Make sure that the pattern is found 3294 m.reset(toSearch); 3295 assertTrue(m.find()); 3296 3297 // Make sure that the match text is the pattern 3298 assertEquals(m.group(), pattern); 3299 3300 // Make sure match occured at insertion point 3301 assertEquals(m.start(), insertIndex); 3302 } 3303 } 3304 3305 /** 3306 * Tests the matching of slices on randomly generated patterns. 3307 * The Boyer-Moore optimization is not done on these patterns 3308 * because it uses unicode case folding. 3309 */ 3310 @Test slice()3311 public static void slice() { 3312 doSlice(Character.MAX_VALUE); 3313 3314 doSlice(Character.MAX_CODE_POINT); 3315 } 3316 doSlice(int maxCharacter)3317 private static void doSlice(int maxCharacter) { 3318 for (int i=0; i<100; i++) { 3319 // Create a short pattern to search for 3320 int patternLength = generator.nextInt(7) + 4; 3321 StringBuilder patternBuffer = new StringBuilder(patternLength); 3322 for (int x=0; x<patternLength; x++) { 3323 int randomChar = 0; 3324 while (!Character.isLetterOrDigit(randomChar)) 3325 randomChar = generator.nextInt(maxCharacter); 3326 if (Character.isSupplementaryCodePoint(randomChar)) { 3327 patternBuffer.append(Character.toChars(randomChar)); 3328 } else { 3329 patternBuffer.append((char) randomChar); 3330 } 3331 } 3332 String pattern = patternBuffer.toString(); 3333 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3334 3335 // Create a buffer with random chars that does not match the sample 3336 String toSearch = null; 3337 StringBuffer s = null; 3338 Matcher m = p.matcher(""); 3339 do { 3340 s = new StringBuffer(100); 3341 for (int x=0; x<100; x++) { 3342 int randomChar = 0; 3343 while (!Character.isLetterOrDigit(randomChar)) 3344 randomChar = generator.nextInt(maxCharacter); 3345 if (Character.isSupplementaryCodePoint(randomChar)) { 3346 s.append(Character.toChars(randomChar)); 3347 } else { 3348 s.append((char) randomChar); 3349 } 3350 } 3351 toSearch = s.toString(); 3352 m.reset(toSearch); 3353 } while (m.find()); 3354 3355 // Insert the pattern at a random spot 3356 int insertIndex = generator.nextInt(99); 3357 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3358 insertIndex++; 3359 s.insert(insertIndex, pattern); 3360 toSearch = s.toString(); 3361 3362 // Make sure that the pattern is found 3363 m.reset(toSearch); 3364 assertTrue(m.find()); 3365 3366 // Make sure that the match text is the pattern 3367 assertEquals(m.group(), pattern); 3368 3369 // Make sure match occured at insertion point 3370 assertEquals(m.start(), insertIndex); 3371 } 3372 } 3373 3374 // Testing examples from a file 3375 3376 /** 3377 * Goes through the file "TestCases.txt" and creates many patterns 3378 * described in the file, matching the patterns against input lines in 3379 * the file, and comparing the results against the correct results 3380 * also found in the file. The file format is described in comments 3381 * at the head of the file. 3382 */ processFile(String fileName)3383 public static void processFile(String fileName) throws IOException { 3384 // Android-changed: Use resources instead of "test.src" property. 3385 // File testCases = new File(System.getProperty("test.src", "."), 3386 // fileName); 3387 // FileInputStream in = new FileInputStream(testCases); 3388 InputStream in = RegExTest.class.getResourceAsStream(fileName); 3389 assertNotNull(in); 3390 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3391 3392 // Process next test case. 3393 String aLine; 3394 while((aLine = r.readLine()) != null) { 3395 // Read a line for pattern 3396 String patternString = grabLine(r); 3397 Pattern p = null; 3398 try { 3399 p = compileTestPattern(patternString); 3400 } catch (PatternSyntaxException e) { 3401 String dataString = grabLine(r); 3402 String expectedResult = grabLine(r); 3403 if (expectedResult.startsWith("error")) 3404 continue; 3405 String line1 = "----------------------------------------"; 3406 String line2 = "Pattern = " + patternString; 3407 String line3 = "Data = " + dataString; 3408 fail(line1 + System.lineSeparator() + line2 + System.lineSeparator() + line3 + System.lineSeparator()); 3409 continue; 3410 } 3411 3412 // Read a line for input string 3413 String dataString = grabLine(r); 3414 Matcher m = p.matcher(dataString); 3415 StringBuilder result = new StringBuilder(); 3416 3417 // Check for IllegalStateExceptions before a match 3418 preMatchInvariants(m); 3419 3420 boolean found = m.find(); 3421 3422 if (found) 3423 postTrueMatchInvariants(m); 3424 else 3425 postFalseMatchInvariants(m); 3426 3427 if (found) { 3428 result.append("true "); 3429 result.append(m.group(0)).append(" "); 3430 } else { 3431 result.append("false "); 3432 } 3433 3434 result.append(m.groupCount()); 3435 3436 if (found) { 3437 for (int i=1; i<m.groupCount()+1; i++) 3438 if (m.group(i) != null) 3439 result.append(" ").append(m.group(i)); 3440 } 3441 3442 // Read a line for the expected result 3443 String expectedResult = grabLine(r); 3444 3445 assertEquals(result.toString(), expectedResult, 3446 "Pattern = " + patternString + 3447 System.lineSeparator() + 3448 "Data = " + dataString + 3449 System.lineSeparator() + 3450 "Expected = " + expectedResult + 3451 System.lineSeparator() + 3452 "Actual = " + result.toString()); 3453 } 3454 } 3455 preMatchInvariants(Matcher m)3456 private static void preMatchInvariants(Matcher m) { 3457 assertThrows(IllegalStateException.class, m::start); 3458 assertThrows(IllegalStateException.class, m::end); 3459 assertThrows(IllegalStateException.class, m::group); 3460 } 3461 postFalseMatchInvariants(Matcher m)3462 private static void postFalseMatchInvariants(Matcher m) { 3463 assertThrows(IllegalStateException.class, m::group); 3464 assertThrows(IllegalStateException.class, m::start); 3465 assertThrows(IllegalStateException.class, m::end); 3466 } 3467 postTrueMatchInvariants(Matcher m)3468 private static void postTrueMatchInvariants(Matcher m) { 3469 assertEquals(m.start(), m.start(0)); 3470 assertEquals(m.start(), m.start(0)); 3471 assertEquals(m.group(), m.group(0)); 3472 assertThrows(IndexOutOfBoundsException.class, () -> m.group(50)); 3473 } 3474 compileTestPattern(String patternString)3475 private static Pattern compileTestPattern(String patternString) { 3476 if (!patternString.startsWith("'")) { 3477 return Pattern.compile(patternString); 3478 } 3479 int break1 = patternString.lastIndexOf("'"); 3480 String flagString = patternString.substring(break1+1); 3481 patternString = patternString.substring(1, break1); 3482 3483 if (flagString.equals("i")) 3484 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3485 3486 if (flagString.equals("m")) 3487 return Pattern.compile(patternString, Pattern.MULTILINE); 3488 3489 return Pattern.compile(patternString); 3490 } 3491 3492 /** 3493 * Reads a line from the input file. Keeps reading lines until a non 3494 * empty non comment line is read. If the line contains a \n then 3495 * these two characters are replaced by a newline char. If a \\uxxxx 3496 * sequence is read then the sequence is replaced by the unicode char. 3497 */ grabLine(BufferedReader r)3498 public static String grabLine(BufferedReader r) throws IOException { 3499 int index = 0; 3500 String line = r.readLine(); 3501 while (line.startsWith("//") || line.length() < 1) 3502 line = r.readLine(); 3503 while ((index = line.indexOf("\\n")) != -1) { 3504 StringBuilder temp = new StringBuilder(line); 3505 temp.replace(index, index+2, "\n"); 3506 line = temp.toString(); 3507 } 3508 while ((index = line.indexOf("\\u")) != -1) { 3509 StringBuilder temp = new StringBuilder(line); 3510 String value = temp.substring(index+2, index+6); 3511 char aChar = (char)Integer.parseInt(value, 16); 3512 String unicodeChar = "" + aChar; 3513 temp.replace(index, index+6, unicodeChar); 3514 line = temp.toString(); 3515 } 3516 3517 return line; 3518 } 3519 3520 3521 @Test namedGroupCaptureTest()3522 public static void namedGroupCaptureTest() { 3523 check(Pattern.compile("x+(?<gname>y+)z+"), 3524 "xxxyyyzzz", 3525 "gname", 3526 "yyy"); 3527 3528 check(Pattern.compile("x+(?<gname8>y+)z+"), 3529 "xxxyyyzzz", 3530 "gname8", 3531 "yyy"); 3532 3533 //backref 3534 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3535 check(pattern, "zzzaabcazzz", true); // found "abca" 3536 3537 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3538 "zzzaabcaazzz", true); 3539 3540 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3541 "abcdefabc", true); 3542 3543 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3544 "abcdefghijkk", true); 3545 3546 // Supplementary character tests 3547 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3548 toSupplementaries("zzzaabcazzz"), true); 3549 3550 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3551 toSupplementaries("zzzaabcaazzz"), true); 3552 3553 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3554 toSupplementaries("abcdefabc"), true); 3555 3556 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3557 "(?<gname>" + 3558 toSupplementaries("k)") + "\\k<gname>"), 3559 toSupplementaries("abcdefghijkk"), true); 3560 3561 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 3562 "xxxyyyzzzyyy", 3563 "gname", 3564 "yyy"); 3565 3566 //replaceFirst/All 3567 checkReplaceFirst("(?<gn>ab)(c*)", 3568 "abccczzzabcczzzabccc", 3569 "${gn}", 3570 "abzzzabcczzzabccc"); 3571 3572 checkReplaceAll("(?<gn>ab)(c*)", 3573 "abccczzzabcczzzabccc", 3574 "${gn}", 3575 "abzzzabzzzab"); 3576 3577 3578 checkReplaceFirst("(?<gn>ab)(c*)", 3579 "zzzabccczzzabcczzzabccczzz", 3580 "${gn}", 3581 "zzzabzzzabcczzzabccczzz"); 3582 3583 checkReplaceAll("(?<gn>ab)(c*)", 3584 "zzzabccczzzabcczzzabccczzz", 3585 "${gn}", 3586 "zzzabzzzabzzzabzzz"); 3587 3588 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 3589 "zzzabccczzzabcczzzabccczzz", 3590 "${gn2}", 3591 "zzzccczzzabcczzzabccczzz"); 3592 3593 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 3594 "zzzabccczzzabcczzzabccczzz", 3595 "${gn2}", 3596 "zzzccczzzcczzzccczzz"); 3597 3598 //toSupplementaries("(ab)(c*)")); 3599 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3600 ")(?<gn2>" + toSupplementaries("c") + "*)", 3601 toSupplementaries("abccczzzabcczzzabccc"), 3602 "${gn1}", 3603 toSupplementaries("abzzzabcczzzabccc")); 3604 3605 3606 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3607 ")(?<gn2>" + toSupplementaries("c") + "*)", 3608 toSupplementaries("abccczzzabcczzzabccc"), 3609 "${gn1}", 3610 toSupplementaries("abzzzabzzzab")); 3611 3612 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3613 ")(?<gn2>" + toSupplementaries("c") + "*)", 3614 toSupplementaries("abccczzzabcczzzabccc"), 3615 "${gn2}", 3616 toSupplementaries("ccczzzabcczzzabccc")); 3617 3618 3619 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3620 ")(?<gn2>" + toSupplementaries("c") + "*)", 3621 toSupplementaries("abccczzzabcczzzabccc"), 3622 "${gn2}", 3623 toSupplementaries("ccczzzcczzzccc")); 3624 3625 checkReplaceFirst("(?<dog>Dog)AndCat", 3626 "zzzDogAndCatzzzDogAndCatzzz", 3627 "${dog}", 3628 "zzzDogzzzDogAndCatzzz"); 3629 3630 3631 checkReplaceAll("(?<dog>Dog)AndCat", 3632 "zzzDogAndCatzzzDogAndCatzzz", 3633 "${dog}", 3634 "zzzDogzzzDogzzz"); 3635 3636 // backref in Matcher & String 3637 assertTrue("abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") && 3638 "abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")); 3639 3640 // negative 3641 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 3642 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 3643 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 3644 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 3645 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 3646 3647 Matcher iaeMatcher = Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"); 3648 iaeMatcher.find(); 3649 assertThrows(IllegalArgumentException.class, () -> iaeMatcher.group("gnameX")); 3650 assertThrows(IllegalArgumentException.class, () -> iaeMatcher.start("gnameX")); 3651 assertThrows(IllegalArgumentException.class, () -> iaeMatcher.start("gnameX")); 3652 3653 Matcher npeMatcher = Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"); 3654 npeMatcher.find(); 3655 assertThrows(NullPointerException.class, () -> npeMatcher.group(null)); 3656 assertThrows(NullPointerException.class, () -> npeMatcher.start(null)); 3657 assertThrows(NullPointerException.class, () -> npeMatcher.end(null)); 3658 } 3659 3660 // This is for bug 6919132 3661 @Test nonBmpClassComplementTest()3662 public static void nonBmpClassComplementTest() { 3663 Pattern p = Pattern.compile("\\P{Lu}"); 3664 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3665 3666 assertFalse(m.find() && m.start() == 1); 3667 3668 // from a unicode category 3669 p = Pattern.compile("\\P{Lu}"); 3670 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3671 assertFalse(m.find()); 3672 assertTrue(m.hitEnd()); 3673 3674 // block 3675 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 3676 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3677 assertFalse(m.find() && m.start() == 1); 3678 3679 p = Pattern.compile("\\P{sc=GRANTHA}"); 3680 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 3681 assertFalse(m.find() && m.start() == 1); 3682 } 3683 3684 // FIXME: Investigate if this test is worth fixing. 3685 // Android-changed: Disable the test because upstream and ICU has different Unicode versions. 3686 @Test(enabled = false) unicodePropertiesTest()3687 public static void unicodePropertiesTest() { 3688 // different forms 3689 assertFalse(!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 3690 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 3691 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 3692 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 3693 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 3694 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 3695 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 3696 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 3697 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 3698 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()); 3699 3700 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 3701 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 3702 Matcher lastSM = common; 3703 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 3704 3705 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 3706 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 3707 Matcher lastBM = latin; 3708 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 3709 3710 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 3711 if (cp >= 0x30000 && (cp & 0x70) == 0){ 3712 continue; // only pick couple code points, they are the same 3713 } 3714 3715 // Unicode Script 3716 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 3717 Matcher m; 3718 String str = new String(Character.toChars(cp)); 3719 if (script == lastScript) { 3720 m = lastSM; 3721 m.reset(str); 3722 } else { 3723 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 3724 } 3725 assertTrue(m.matches()); 3726 3727 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 3728 other.reset(str); 3729 assertFalse(other.matches()); 3730 lastSM = m; 3731 lastScript = script; 3732 3733 // Unicode Block 3734 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 3735 if (block == null) { 3736 //System.out.printf("Not a Block: cp=%x%n", cp); 3737 continue; 3738 } 3739 if (block == lastBlock) { 3740 m = lastBM; 3741 m.reset(str); 3742 } else { 3743 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 3744 } 3745 assertTrue(m.matches()); 3746 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 3747 other.reset(str); 3748 assertFalse(other.matches()); 3749 lastBM = m; 3750 lastBlock = block; 3751 } 3752 } 3753 3754 // FIXME: Investigate if this test is worth fixed. 3755 // Android-changed: Disable the test because upstream and ICU has different Unicode versions. 3756 @Test(enabled = false) unicodeHexNotationTest()3757 public static void unicodeHexNotationTest() { 3758 3759 // negative 3760 checkExpectedFail("\\x{-23}"); 3761 checkExpectedFail("\\x{110000}"); 3762 checkExpectedFail("\\x{}"); 3763 checkExpectedFail("\\x{AB[ef]"); 3764 3765 // codepoint 3766 check("^\\x{1033c}$", "\uD800\uDF3C", true); 3767 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 3768 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 3769 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 3770 3771 // in class 3772 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 3773 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 3774 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 3775 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 3776 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 3777 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 3778 3779 for (int cp = 0; cp <= 0x10FFFF; cp++) { 3780 String s = "A" + new String(Character.toChars(cp)) + "B"; 3781 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 3782 : String.format("\\u%04x\\u%04x", 3783 (int) Character.toChars(cp)[0], 3784 (int) Character.toChars(cp)[1]); 3785 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 3786 assertTrue(Pattern.matches("A" + hexUTF16 + "B", s)); 3787 assertTrue(Pattern.matches("A[" + hexUTF16 + "]B", s)); 3788 assertTrue(Pattern.matches("A" + hexCodePoint + "B", s)); 3789 assertTrue(Pattern.matches("A[" + hexCodePoint + "]B", s)); 3790 } 3791 } 3792 3793 // FIXME: Investigate if this test is worth fixed. 3794 // Android-changed: Disable the test because upstream and ICU has different Unicode versions, 3795 // and the different behavior with the UNICODE_CHARACTER_CLASS flag. 3796 @Test(enabled = false) unicodeClassesTest()3797 public static void unicodeClassesTest() { 3798 3799 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 3800 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 3801 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 3802 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 3803 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 3804 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 3805 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 3806 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 3807 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 3808 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 3809 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 3810 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 3811 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 3812 Matcher bound = Pattern.compile("\\b").matcher(""); 3813 Matcher word = Pattern.compile("\\w++").matcher(""); 3814 // UNICODE_CHARACTER_CLASS 3815 // Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3816 // Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3817 // Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3818 // Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3819 // Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3820 // Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3821 // Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3822 // Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3823 // Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3824 // Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3825 // Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3826 // Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3827 // Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3828 // Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3829 // Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3830 Matcher lowerU = Pattern.compile("\\p{Lower}", 0).matcher(""); 3831 Matcher upperU = Pattern.compile("\\p{Upper}", 0).matcher(""); 3832 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", 0).matcher(""); 3833 Matcher alphaU = Pattern.compile("\\p{Alpha}", 0).matcher(""); 3834 Matcher digitU = Pattern.compile("\\p{Digit}", 0).matcher(""); 3835 Matcher alnumU = Pattern.compile("\\p{Alnum}", 0).matcher(""); 3836 Matcher punctU = Pattern.compile("\\p{Punct}", 0).matcher(""); 3837 Matcher graphU = Pattern.compile("\\p{Graph}", 0).matcher(""); 3838 Matcher printU = Pattern.compile("\\p{Print}", 0).matcher(""); 3839 Matcher blankU = Pattern.compile("\\p{Blank}", 0).matcher(""); 3840 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", 0).matcher(""); 3841 Matcher xdigitU = Pattern.compile("\\p{XDigit}", 0).matcher(""); 3842 Matcher spaceU = Pattern.compile("\\p{Space}", 0).matcher(""); 3843 Matcher boundU = Pattern.compile("\\b", 0).matcher(""); 3844 Matcher wordU = Pattern.compile("\\w", 0).matcher(""); 3845 // embedded flag (?U) 3846 // Android-changed: UNICODE_CHARACTER_CLASS flag isn't supported. 3847 // Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3848 // Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3849 // Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3850 // 3851 // Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 3852 // Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3853 // Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3854 Matcher lowerEU = Pattern.compile("\\p{Lower}", 0).matcher(""); 3855 Matcher graphEU = Pattern.compile("\\p{Graph}", 0).matcher(""); 3856 Matcher wordEU = Pattern.compile("\\w", 0).matcher(""); 3857 3858 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 3859 Matcher bwbU = Pattern.compile("\\b\\w++\\b", 0).matcher(""); 3860 Matcher bwbEU = Pattern.compile("\\b\\w++\\b", 0).matcher(""); 3861 // properties 3862 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 3863 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 3864 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 3865 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 3866 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 3867 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 3868 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 3869 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 3870 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 3871 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 3872 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 3873 // javaMethod 3874 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 3875 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 3876 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 3877 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 3878 // GC/C 3879 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 3880 3881 for (int cp = 1; cp < 0x30000; cp++) { 3882 String str = new String(Character.toChars(cp)); 3883 int type = Character.getType(cp); 3884 if (// lower 3885 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 3886 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 3887 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 3888 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 3889 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 3890 // upper 3891 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 3892 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 3893 Character.isUpperCase(cp) != upperP.reset(str).matches() || 3894 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 3895 // alpha 3896 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 3897 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 3898 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 3899 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 3900 // digit 3901 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 3902 Character.isDigit(cp) != digitU.reset(str).matches() || 3903 // alnum 3904 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 3905 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 3906 // punct 3907 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 3908 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 3909 // graph 3910 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 3911 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 3912 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 3913 // blank 3914 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 3915 != blank.reset(str).matches() || 3916 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 3917 // print 3918 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 3919 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 3920 // cntrl 3921 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 3922 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 3923 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 3924 // hexdigit 3925 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 3926 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 3927 // space 3928 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 3929 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 3930 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 3931 // word 3932 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 3933 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 3934 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 3935 // bwordb 3936 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 3937 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 3938 // properties 3939 Character.isTitleCase(cp) != titleP.reset(str).matches() || 3940 Character.isLetter(cp) != letterP.reset(str).matches()|| 3941 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 3942 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 3943 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 3944 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 3945 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 3946 // gc_C 3947 (Character.CONTROL == type || Character.FORMAT == type || 3948 Character.PRIVATE_USE == type || Character.SURROGATE == type || 3949 Character.UNASSIGNED == type) 3950 != gcC.reset(str).matches()) { 3951 fail(); 3952 } 3953 } 3954 3955 // bounds/word align 3956 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 3957 assertTrue(bwbU.reset("\u0180sherman\u0400").matches()); 3958 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 3959 assertTrue(bwbU.reset("\u0180sh\u0345erman\u0400").matches()); 3960 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 3961 assertTrue(bwbU.reset("\u0724\u0739\u0724").matches()); 3962 assertTrue(bwbEU.reset("\u0724\u0739\u0724").matches()); 3963 } 3964 3965 // FIXME: Investigate if this test is worth fixed. 3966 // Android-changed: Disable the test because upstream and ICU has different Unicode versions, 3967 // and the different behavior with the UNICODE_CHARACTER_CLASS flag. 3968 @Test(enabled = false) unicodeCharacterNameTest()3969 public static void unicodeCharacterNameTest() { 3970 3971 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 3972 if (!Character.isValidCodePoint(cp) || 3973 Character.getType(cp) == Character.UNASSIGNED) 3974 continue; 3975 String str = new String(Character.toChars(cp)); 3976 // single 3977 String p = "\\N{" + Character.getName(cp) + "}"; 3978 // Android-changed: Android fails at the compilation time. 3979 // assertTrue(Pattern.compile(p).matcher(str).matches()); 3980 String p1 = p; 3981 assertThrows(PatternSyntaxException.class, () -> Pattern.compile(p1)); 3982 // class[c] 3983 p = "[\\N{" + Character.getName(cp) + "}]"; 3984 // Android-changed: Android fails at the compilation time. 3985 // assertTrue(Pattern.compile(p).matcher(str).matches()); 3986 String p2 = p; 3987 assertThrows(PatternSyntaxException.class, () -> Pattern.compile(p2)); 3988 } 3989 3990 // range 3991 for (int i = 0; i < 10; i++) { 3992 int start = generator.nextInt(20); 3993 int end = start + generator.nextInt(200); 3994 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 3995 String str; 3996 for (int cp = start; cp < end; cp++) { 3997 str = new String(Character.toChars(cp)); 3998 3999 // Android-changed: Android fails at the compilation time. 4000 // assertTrue(Pattern.compile(p).matcher(str).matches()); 4001 assertThrows(PatternSyntaxException.class, () -> Pattern.compile(p)); 4002 } 4003 str = new String(Character.toChars(end + 10)); 4004 // Android-changed: Android fails at the compilation time. 4005 // assertFalse(Pattern.compile(p).matcher(str).matches()); 4006 assertThrows(PatternSyntaxException.class, () -> Pattern.compile(p)); 4007 } 4008 4009 // slice 4010 for (int i = 0; i < 10; i++) { 4011 int n = generator.nextInt(256); 4012 int[] buf = new int[n]; 4013 StringBuilder sb = new StringBuilder(1024); 4014 for (int j = 0; j < n; j++) { 4015 int cp = generator.nextInt(1000); 4016 if (!Character.isValidCodePoint(cp) || 4017 Character.getType(cp) == Character.UNASSIGNED) 4018 cp = 0x4e00; // just use 4e00 4019 sb.append("\\N{").append(Character.getName(cp)).append("}"); 4020 buf[j] = cp; 4021 } 4022 String p = sb.toString(); 4023 String str = new String(buf, 0, buf.length); 4024 assertTrue(Pattern.compile(p).matcher(str).matches()); 4025 } 4026 } 4027 4028 // FIXME: Investigate why this test. It may be an ICU4C bug treating WS characters. 4029 // Android-changed: Disable the test because upstream and ICU has different Unicode versions. 4030 @Test(enabled = false) horizontalAndVerticalWSTest()4031 public static void horizontalAndVerticalWSTest() { 4032 String hws = new String (new char[] { 4033 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4034 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4035 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4036 0x202f, 0x205f, 0x3000 }); 4037 String vws = new String (new char[] { 4038 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4039 assertTrue(Pattern.compile("\\h+").matcher(hws).matches() && 4040 Pattern.compile("[\\h]+").matcher(hws).matches()); 4041 assertTrue(!Pattern.compile("\\H").matcher(hws).find() && 4042 !Pattern.compile("[\\H]").matcher(hws).find()); 4043 assertTrue(Pattern.compile("\\v+").matcher(vws).matches() && 4044 Pattern.compile("[\\v]+").matcher(vws).matches()); 4045 assertTrue(!Pattern.compile("\\V").matcher(vws).find() && 4046 !Pattern.compile("[\\V]").matcher(vws).find()); 4047 String prefix = "abcd"; 4048 String suffix = "efgh"; 4049 String ng = "A"; 4050 for (int i = 0; i < hws.length(); i++) { 4051 String c = String.valueOf(hws.charAt(i)); 4052 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4053 assertTrue(m.find() && c.equals(m.group())); 4054 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4055 assertTrue(m.find() && c.equals(m.group())); 4056 4057 String matcherSubstring = hws.substring(0, i) + ng + hws.substring(i); 4058 4059 m = Pattern.compile("\\H").matcher(matcherSubstring); 4060 assertTrue(m.find() && ng.equals(m.group())); 4061 m = Pattern.compile("[\\H]").matcher(matcherSubstring); 4062 assertTrue(m.find() && ng.equals(m.group())); 4063 } 4064 for (int i = 0; i < vws.length(); i++) { 4065 String c = String.valueOf(vws.charAt(i)); 4066 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4067 assertTrue(m.find() && c.equals(m.group())); 4068 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4069 assertTrue(m.find() && c.equals(m.group())); 4070 4071 String matcherSubstring = vws.substring(0, i) + ng + vws.substring(i); 4072 m = Pattern.compile("\\V").matcher(matcherSubstring); 4073 assertTrue(m.find() && ng.equals(m.group())); 4074 m = Pattern.compile("[\\V]").matcher(matcherSubstring); 4075 assertTrue(m.find() && ng.equals(m.group())); 4076 } 4077 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4078 assertTrue(Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()); 4079 } 4080 4081 @Test linebreakTest()4082 public static void linebreakTest() { 4083 String linebreaks = new String (new char[] { 4084 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4085 String crnl = "\r\n"; 4086 4087 // Android-changed: Disable failing backtracking test. 4088 // assertTrue((Pattern.compile("\\R+").matcher(linebreaks).matches() && 4089 // Pattern.compile("\\R").matcher(crnl).matches() && 4090 // Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4091 // Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4092 // Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4093 // Pattern.compile("\\R\\n").matcher(crnl).matches()) || // backtracking 4094 // Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()); // #8176029 4095 assertTrue(Pattern.compile("\\R+").matcher(linebreaks).matches()); 4096 assertTrue(Pattern.compile("\\R").matcher(crnl).matches()); 4097 assertTrue(Pattern.compile("\\Rabc").matcher(crnl + "abc").matches()); 4098 assertTrue(Pattern.compile("\\Rabc").matcher("\rabc").matches()); 4099 // assertTrue(Pattern.compile("\\R\\R").matcher(crnl).matches()); // backtracking 4100 // assertTrue(Pattern.compile("\\R\\n").matcher(crnl).matches()); // backtracking 4101 assertFalse(Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()); // #8176029 4102 } 4103 4104 // #7189363 4105 @Test branchTest()4106 public static void branchTest() { 4107 assertFalse(!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4108 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4109 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4110 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4111 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4112 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4113 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4114 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4115 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4116 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4117 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4118 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4119 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4120 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4121 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4122 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4123 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4124 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4125 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4126 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4127 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4128 !Pattern.compile("(a)??bc|de").matcher("de").matches()); 4129 } 4130 4131 // This test is for 8007395 4132 @Test groupCurlyNotFoundSuppTest()4133 public static void groupCurlyNotFoundSuppTest() { 4134 String input = "test this as \ud83d\ude0d"; 4135 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4136 "test(.)*(@[a-zA-Z.]+)", 4137 "test([^B])+(@[a-zA-Z.]+)", 4138 "test([^B])*(@[a-zA-Z.]+)", 4139 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4140 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4141 }) { 4142 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4143 .matcher(input); 4144 assertFalse(m.find()); 4145 } 4146 } 4147 4148 // This test is for 8023647 4149 @Test groupCurlyBackoffTest()4150 public static void groupCurlyBackoffTest() { 4151 assertFalse(!"abc1c".matches("(\\w)+1\\1") || 4152 "abc11".matches("(\\w)+1\\1")); 4153 } 4154 4155 // This test is for 8012646 4156 @Test patternAsPredicate()4157 public static void patternAsPredicate() { 4158 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4159 4160 assertFalse(p.test("")); 4161 assertTrue(p.test("word")); 4162 assertFalse(p.test("1234")); 4163 assertTrue(p.test("word1234")); 4164 } 4165 4166 // This test is for 8184692 4167 @Test patternAsMatchPredicate()4168 public static void patternAsMatchPredicate() { 4169 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4170 4171 assertFalse(p.test("")); 4172 assertTrue(p.test("word")); 4173 assertFalse(p.test("1234word")); 4174 assertFalse(p.test("1234")); 4175 } 4176 4177 4178 // This test is for 8035975 4179 @Test invalidFlags()4180 public static void invalidFlags() { 4181 for (int flag = 1; flag != 0; flag <<= 1) { 4182 switch (flag) { 4183 case Pattern.CASE_INSENSITIVE: 4184 case Pattern.MULTILINE: 4185 case Pattern.DOTALL: 4186 case Pattern.UNICODE_CASE: 4187 case Pattern.CANON_EQ: 4188 case Pattern.UNIX_LINES: 4189 case Pattern.LITERAL: 4190 case Pattern.UNICODE_CHARACTER_CLASS: 4191 case Pattern.COMMENTS: 4192 // valid flag, continue 4193 break; 4194 default: 4195 int finalFlag = flag; 4196 assertThrows(IllegalArgumentException.class, () -> 4197 Pattern.compile(".", finalFlag)); 4198 } 4199 } 4200 } 4201 4202 // This test is for 8158482 4203 @Test embeddedFlags()4204 public static void embeddedFlags() { 4205 //Runs without exception. 4206 Pattern.compile("(?i).(?-i)."); 4207 Pattern.compile("(?m).(?-m)."); 4208 Pattern.compile("(?s).(?-s)."); 4209 Pattern.compile("(?d).(?-d)."); 4210 Pattern.compile("(?u).(?-u)."); 4211 // Android-removed: 'c' isn't a documented flag. 4212 // Pattern.compile("(?c).(?-c)."); 4213 Pattern.compile("(?x).(?-x)."); 4214 4215 // Android-removed: UNICODE_CHARACTER_CLASS and CANON_EQ flags aren't supported. 4216 // Pattern.compile("(?U).(?-U)."); 4217 // Pattern.compile("(?imsducxU).(?-imsducxU)."); 4218 Pattern.compile("(?imsdux).(?-imsdux)."); 4219 } 4220 4221 @Test grapheme()4222 public static void grapheme() throws Exception { 4223 final int[] lineNumber = new int[1]; 4224 // Android-changed: Use resources instead of "test.src" property. 4225 // Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST), 4226 // Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt"))) 4227 try (BufferedReader reader = new BufferedReader(new InputStreamReader( 4228 RegExTest.class.getResourceAsStream("GraphemeTestCases.txt")))) { 4229 reader.lines().forEach( ln -> { 4230 lineNumber[0]++; 4231 if (ln.length() == 0 || ln.startsWith("#")) { 4232 return; 4233 } 4234 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4235 // System.out.println(str); 4236 String[] strs = ln.split("\u00f7|\u00d7"); 4237 StringBuilder src = new StringBuilder(); 4238 ArrayList<String> graphemes = new ArrayList<>(); 4239 StringBuilder buf = new StringBuilder(); 4240 int offBk = 0; 4241 for (String str : strs) { 4242 if (str.length() == 0) // first empty str 4243 continue; 4244 int cp = Integer.parseInt(str, 16); 4245 src.appendCodePoint(cp); 4246 buf.appendCodePoint(cp); 4247 offBk += (str.length() + 1); 4248 if (ln.charAt(offBk) == '\u00f7') { // DIV 4249 graphemes.add(buf.toString()); 4250 buf = new StringBuilder(); 4251 } 4252 } 4253 Pattern p = Pattern.compile("\\X"); 4254 // (1) test \X directly 4255 Matcher m = p.matcher(src.toString()); 4256 for (String g : graphemes) { 4257 // System.out.printf(" grapheme:=[%s]%n", g); 4258 String group = null; 4259 if (!m.find() || !(group = m.group()).equals(g)) { 4260 fail("Failed pattern \\X [" + ln + "] : " 4261 + "expected: " + g + " - actual: " + group 4262 + "(line " + lineNumber[0] + ")"); 4263 } 4264 } 4265 4266 // BEGIN Android-removed: ICU4C doesn't support \\b{g} yet. 4267 /* 4268 assertFalse(m.find()); 4269 // test \b{g} without \X via Pattern 4270 Pattern pbg = Pattern.compile("\\b{g}"); 4271 m = pbg.matcher(src.toString()); 4272 m.find(); 4273 int prev = m.end(); 4274 for (String g : graphemes) { 4275 String group = null; 4276 if (!m.find() || !(group = src.substring(prev, m.end())).equals(g)) { 4277 fail("Failed pattern \\b{g} [" + ln + "] : " 4278 + "expected: " + g + " - actual: " + group 4279 + "(line " + lineNumber[0] + ")"); 4280 } 4281 assertEquals("", m.group()); 4282 prev = m.end(); 4283 } 4284 assertFalse(m.find()); 4285 // (2) test \b{g} + \X via Scanner 4286 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4287 for (String g : graphemes) { 4288 String next = null; 4289 if (!s.hasNext(p) || !(next = s.next(p)).equals(g)) { 4290 fail("Failed \\b{g} [" + ln + "] : " 4291 + "expected: " + g + " - actual: " + next 4292 + " (line " + lineNumber[0] + ")"); 4293 } 4294 } 4295 assertFalse(s.hasNext(p)); 4296 // test \b{g} without \X via Scanner 4297 s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4298 for (String g : graphemes) { 4299 String next = null; 4300 if (!s.hasNext() || !(next = s.next()).equals(g)) { 4301 fail("Failed \\b{g} [" + ln + "] : " 4302 + "expected: " + g + " - actual: " + next 4303 + " (line " + lineNumber[0] + ")"); 4304 } 4305 } 4306 assertFalse(s.hasNext()); 4307 */ 4308 // END Android-removed: ICU4C doesn't support \\b{g} yet. 4309 }); 4310 } 4311 // some sanity checks 4312 // Android-changed: ICU4C doesn't support \\b{g} yet. 4313 // assertTrue(Pattern.compile("\\X{10}").matcher("abcdefghij").matches() && 4314 // Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() && 4315 // Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()); 4316 assertTrue(Pattern.compile("\\X{10}").matcher("abcdefghij").matches()); 4317 // make sure "\b{n}" still works 4318 4319 // Android-removed: ICU4C can't parse \b{1}. 4320 // FIXME: \b{1} should be treated as exactly one \b, but ICU4C fails to parse. 4321 // assertTrue(Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()); 4322 } 4323 4324 // hangup/timeout if go into exponential backtracking 4325 // FIXME: Investigate if this exponential backtracking causes any harm or worth fixing. 4326 // Android-changed: The test hangs up on Android. 4327 @Test(enabled = false) expoBacktracking()4328 public static void expoBacktracking() { 4329 4330 Object[][] patternMatchers = { 4331 // 6328855 4332 { "(.*\n*)*", 4333 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4334 false }, 4335 // 6192895 4336 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4337 "Hello World this is a test this is a test this is a test A", 4338 true }, 4339 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4340 "Hello World this is a test this is a test this is a test \u4e00 ", 4341 false }, 4342 { " *([a-z0-9]+ *)+", 4343 "hello world this is a test this is a test this is a test A", 4344 false }, 4345 // 4771934 [FIXED] #5013651? 4346 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4347 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4348 true }, 4349 // 4866249 [FIXED] 4350 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4351 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4352 true }, 4353 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4354 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4355 false }, 4356 // 6345469 4357 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4358 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4359 true }, // --> matched 4360 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4361 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4362 false }, 4363 // 5026912 4364 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4365 "156580451111112225588087755221111111566969655555555", 4366 false}, 4367 // 6988218 4368 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4369 "'%)) order by ANGEBOT.ID", 4370 false}, // find 4371 // 6693451 4372 { "^(\\s*foo\\s*)*$", 4373 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4374 true }, 4375 { "^(\\s*foo\\s*)*$", 4376 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4377 false 4378 }, 4379 // 7006761 4380 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4381 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4382 // 8140212 4383 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4384 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4385 false 4386 }, 4387 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4388 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4389 4390 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4391 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4392 4393 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4394 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4395 4396 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4397 4398 /* not fixed 4399 //8132141 ---> second level exponential backtracking 4400 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4401 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4402 */ 4403 }; 4404 4405 for (Object[] pm : patternMatchers) { 4406 String p = (String)pm[0]; 4407 String s = (String)pm[1]; 4408 boolean r = (Boolean)pm[2]; 4409 assertEquals(r, Pattern.compile(p).matcher(s).matches()); 4410 } 4411 } 4412 4413 @Test invalidGroupName()4414 public static void invalidGroupName() { 4415 // Invalid start of a group name 4416 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4417 "\u0060", "\u007b", "\u0416")) { 4418 for (String pat : List.of("(?<" + groupName + ">)", 4419 "\\k<" + groupName + ">")) { 4420 var e = expectThrows(PatternSyntaxException.class, () -> Pattern.compile(pat)); 4421 // Android-changed: Android has a different error message. 4422 // assertTrue(e.getMessage().startsWith( 4423 // "capturing group name does not start with a" 4424 // + " Latin letter")); 4425 } 4426 } 4427 // Invalid char in a group name 4428 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4429 "d\u0060", "e\u007b", "f\u0416")) { 4430 for (String pat : List.of("(?<" + groupName + ">)", 4431 "\\k<" + groupName + ">")) { 4432 var e = expectThrows(PatternSyntaxException.class, () -> 4433 Pattern.compile(pat)); 4434 4435 // Android-changed: Android has a different error message. 4436 // assertTrue(e.getMessage().startsWith( 4437 // "named capturing group is missing trailing '>'")); 4438 } 4439 } 4440 } 4441 4442 @Test illegalRepetitionRange()4443 public static void illegalRepetitionRange() { 4444 // huge integers > (2^31 - 1) 4445 String n = BigInteger.valueOf(1L << 32) 4446 .toString(); 4447 String m = BigInteger.valueOf(1L << 31) 4448 .add(new BigInteger(80, generator)) 4449 .toString(); 4450 for (String rep : List.of("", "x", ".", ",", "-1", "2,1", 4451 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) { 4452 String pat = ".{" + rep + "}"; 4453 var e = expectThrows(PatternSyntaxException.class, () -> 4454 Pattern.compile(pat)); 4455 // Android-changed: Android produces a different error message. 4456 // assertTrue(e.getMessage().startsWith("Illegal repetition")); 4457 } 4458 } 4459 4460 // Android-changed: CANON_EQ flag isn't supported. 4461 // @Test surrogatePairWithCanonEq()4462 public static void surrogatePairWithCanonEq() { 4463 //Runs without exception 4464 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ); 4465 } 4466 s2x(String s)4467 public static String s2x(String s) { 4468 StringBuilder sb = new StringBuilder(); 4469 for (char ch : s.toCharArray()) { 4470 sb.append(String.format("\\u%04x", (int)ch)); 4471 } 4472 return sb.toString(); 4473 } 4474 4475 // This test is for 8235812, with cases excluded by 8258259 4476 // FIXME: Fix this test. 4477 // Android-changed: Disable lineBreakWithQuantifier test. 4478 // @Test lineBreakWithQuantifier()4479 public static void lineBreakWithQuantifier() { 4480 // key: pattern 4481 // value: lengths of input that must match the pattern 4482 Map<String, List<Integer>> cases = Map.ofEntries( 4483 Map.entry("\\R?", List.of(0, 1)), 4484 Map.entry("\\R*", List.of(0, 1, 2, 3)), 4485 Map.entry("\\R+", List.of(1, 2, 3)), 4486 Map.entry("\\R{0}", List.of(0)), 4487 Map.entry("\\R{1}", List.of(1)), 4488 // Map.entry("\\R{2}", List.of(2)), // 8258259 4489 // Map.entry("\\R{3}", List.of(3)), // 8258259 4490 Map.entry("\\R{0,}", List.of(0, 1, 2, 3)), 4491 Map.entry("\\R{1,}", List.of(1, 2, 3)), 4492 // Map.entry("\\R{2,}", List.of(2, 3)), // 8258259 4493 // Map.entry("\\R{3,}", List.of(3)), // 8258259 4494 Map.entry("\\R{0,0}", List.of(0)), 4495 Map.entry("\\R{0,1}", List.of(0, 1)), 4496 Map.entry("\\R{0,2}", List.of(0, 1, 2)), 4497 Map.entry("\\R{0,3}", List.of(0, 1, 2, 3)), 4498 Map.entry("\\R{1,1}", List.of(1)), 4499 Map.entry("\\R{1,2}", List.of(1, 2)), 4500 Map.entry("\\R{1,3}", List.of(1, 2, 3)), 4501 // Map.entry("\\R{2,2}", List.of(2)), // 8258259 4502 // Map.entry("\\R{2,3}", List.of(2, 3)), // 8258259 4503 // Map.entry("\\R{3,3}", List.of(3)), // 8258259 4504 Map.entry("\\R", List.of(1)), 4505 Map.entry("\\R\\R", List.of(2)), 4506 Map.entry("\\R\\R\\R", List.of(3)) 4507 ); 4508 4509 // key: length of input 4510 // value: all possible inputs of given length 4511 Map<Integer, List<String>> inputs = new HashMap<>(); 4512 String[] Rs = { "\r\n", "\r", "\n", 4513 "\u000B", "\u000C", "\u0085", "\u2028", "\u2029" }; 4514 StringBuilder sb = new StringBuilder(); 4515 for (int len = 0; len <= 3; ++len) { 4516 int[] idx = new int[len + 1]; 4517 do { 4518 sb.setLength(0); 4519 for (int j = 0; j < len; ++j) 4520 sb.append(Rs[idx[j]]); 4521 // Android-changed: Suppress ComputeIfAbsentAmbiguousReference ErrorProne error. 4522 // inputs.computeIfAbsent(len, ArrayList::new).add(sb.toString()); 4523 inputs.computeIfAbsent(len, l -> new ArrayList<>()).add(sb.toString()); 4524 idx[0]++; 4525 for (int j = 0; j < len; ++j) { 4526 if (idx[j] < Rs.length) 4527 break; 4528 idx[j] = 0; 4529 idx[j+1]++; 4530 } 4531 } while (idx[len] == 0); 4532 } 4533 4534 // exhaustive testing 4535 for (String patStr : cases.keySet()) { 4536 Pattern[] pats = patStr.endsWith("R") 4537 ? new Pattern[] { Pattern.compile(patStr) } // no quantifiers 4538 : new Pattern[] { Pattern.compile(patStr), // greedy 4539 Pattern.compile(patStr + "?") }; // reluctant 4540 Matcher m = pats[0].matcher(""); 4541 for (Pattern p : pats) { 4542 m.usePattern(p); 4543 for (int len : cases.get(patStr)) { 4544 for (String in : inputs.get(len)) { 4545 assertTrue(m.reset(in).matches(), "Expected to match '" 4546 + s2x(in) + "' =~ /" + p + "/"); 4547 } 4548 } 4549 } 4550 } 4551 } 4552 4553 // This test is for 8214245 4554 // FIXME: Fix this test. 4555 // Android-changed: Disable caseInsensitivePMatch test. 4556 // @Test caseInsensitivePMatch()4557 public static void caseInsensitivePMatch() { 4558 for (String input : List.of("abcd", "AbCd", "ABCD")) { 4559 for (String pattern : List.of("abcd", "aBcD", "[a-d]{4}", 4560 "(?:a|b|c|d){4}", "\\p{Lower}{4}", "\\p{Ll}{4}", 4561 "\\p{IsLl}{4}", "\\p{gc=Ll}{4}", 4562 "\\p{general_category=Ll}{4}", "\\p{IsLowercase}{4}", 4563 "\\p{javaLowerCase}{4}", "\\p{Upper}{4}", "\\p{Lu}{4}", 4564 "\\p{IsLu}{4}", "\\p{gc=Lu}{4}", "\\p{general_category=Lu}{4}", 4565 "\\p{IsUppercase}{4}", "\\p{javaUpperCase}{4}", 4566 "\\p{Lt}{4}", "\\p{IsLt}{4}", "\\p{gc=Lt}{4}", 4567 "\\p{general_category=Lt}{4}", "\\p{IsTitlecase}{4}", 4568 "\\p{javaTitleCase}{4}", "[\\p{Lower}]{4}", "[\\p{Ll}]{4}", 4569 "[\\p{IsLl}]{4}", "[\\p{gc=Ll}]{4}", 4570 "[\\p{general_category=Ll}]{4}", "[\\p{IsLowercase}]{4}", 4571 "[\\p{javaLowerCase}]{4}", "[\\p{Upper}]{4}", "[\\p{Lu}]{4}", 4572 "[\\p{IsLu}]{4}", "[\\p{gc=Lu}]{4}", 4573 "[\\p{general_category=Lu}]{4}", "[\\p{IsUppercase}]{4}", 4574 "[\\p{javaUpperCase}]{4}", "[\\p{Lt}]{4}", "[\\p{IsLt}]{4}", 4575 "[\\p{gc=Lt}]{4}", "[\\p{general_category=Lt}]{4}", 4576 "[\\p{IsTitlecase}]{4}", "[\\p{javaTitleCase}]{4}")) 4577 { 4578 assertTrue(Pattern.compile(pattern, Pattern.CASE_INSENSITIVE) 4579 .matcher(input) 4580 .matches(),"Expected to match: " + "'" + input + 4581 "' =~ /" + pattern + "/"); 4582 } 4583 } 4584 4585 for (String input : List.of("\u01c7", "\u01c8", "\u01c9")) { 4586 for (String pattern : List.of("\u01c7", "\u01c8", "\u01c9", 4587 "[\u01c7\u01c8]", "[\u01c7\u01c9]", "[\u01c8\u01c9]", 4588 "[\u01c7-\u01c8]", "[\u01c8-\u01c9]", "[\u01c7-\u01c9]", 4589 "\\p{Lower}", "\\p{Ll}", "\\p{IsLl}", "\\p{gc=Ll}", 4590 "\\p{general_category=Ll}", "\\p{IsLowercase}", 4591 "\\p{javaLowerCase}", "\\p{Upper}", "\\p{Lu}", 4592 "\\p{IsLu}", "\\p{gc=Lu}", "\\p{general_category=Lu}", 4593 "\\p{IsUppercase}", "\\p{javaUpperCase}", 4594 "\\p{Lt}", "\\p{IsLt}", "\\p{gc=Lt}", 4595 "\\p{general_category=Lt}", "\\p{IsTitlecase}", 4596 "\\p{javaTitleCase}", "[\\p{Lower}]", "[\\p{Ll}]", 4597 "[\\p{IsLl}]", "[\\p{gc=Ll}]", 4598 "[\\p{general_category=Ll}]", "[\\p{IsLowercase}]", 4599 "[\\p{javaLowerCase}]", "[\\p{Upper}]", "[\\p{Lu}]", 4600 "[\\p{IsLu}]", "[\\p{gc=Lu}]", 4601 "[\\p{general_category=Lu}]", "[\\p{IsUppercase}]", 4602 "[\\p{javaUpperCase}]", "[\\p{Lt}]", "[\\p{IsLt}]", 4603 "[\\p{gc=Lt}]", "[\\p{general_category=Lt}]", 4604 "[\\p{IsTitlecase}]", "[\\p{javaTitleCase}]")) 4605 { 4606 assertTrue(Pattern.compile(pattern, Pattern.CASE_INSENSITIVE 4607 | Pattern.UNICODE_CHARACTER_CLASS) 4608 .matcher(input) 4609 .matches(), "Expected to match: " + 4610 "'" + input + "' =~ /" + pattern + "/"); 4611 } 4612 } 4613 } 4614 4615 // This test is for 8237599 4616 // FIXME: Fix this test. 4617 // Android-changed: Disable failing surrogatePairOverlapRegion() test. 4618 // @Test surrogatePairOverlapRegion()4619 public static void surrogatePairOverlapRegion() { 4620 String input = "\ud801\udc37"; 4621 4622 Pattern p = Pattern.compile(".+"); 4623 Matcher m = p.matcher(input); 4624 m.region(0, 1); 4625 4626 boolean ok = m.find(); 4627 if (!ok || !m.group(0).equals(input.substring(0, 1))) 4628 { 4629 String errMessage = "Input \"" + input + "\".substr(0, 1)" + 4630 " expected to match pattern \"" + p + "\""; 4631 if (ok) { 4632 fail(errMessage + System.lineSeparator() + 4633 "group(0): \"" + m.group(0) + "\""); 4634 } else { 4635 fail(errMessage); 4636 } 4637 } else if (!m.hitEnd()) { 4638 fail("Expected m.hitEnd() == true"); 4639 } 4640 4641 p = Pattern.compile(".*(.)"); 4642 m = p.matcher(input); 4643 m.region(1, 2); 4644 4645 ok = m.find(); 4646 if (!ok || !m.group(0).equals(input.substring(1, 2)) 4647 || !m.group(1).equals(input.substring(1, 2))) 4648 { 4649 String errMessage = "Input \"" + input + "\".substr(1, 2)" + 4650 " expected to match pattern \"" + p + "\""; 4651 if (ok) { 4652 String msg1 = "group(0): \"" + m.group(0) + "\""; 4653 String msg2 = "group(1): \"" + m.group(1) + "\""; 4654 fail(errMessage + System.lineSeparator() + msg1 + 4655 System.lineSeparator() + msg2); 4656 } else { 4657 fail(errMessage); 4658 } 4659 } 4660 } 4661 4662 //This test is for 8037397 4663 @Test droppedClassesWithIntersection()4664 public static void droppedClassesWithIntersection() { 4665 String rx = "[A-Z&&[A-Z]0-9]"; 4666 String ry = "[A-Z&&[A-F][G-Z]0-9]"; 4667 4668 Stream<Character> letterChars = IntStream.range('A', 'Z').mapToObj((i) -> (char) i); 4669 Stream<Character> digitChars = IntStream.range('0', '9').mapToObj((i) -> (char) i); 4670 4671 boolean letterCharsMatch = letterChars.allMatch((ch) -> { 4672 String chString = ch.toString(); 4673 return chString.matches(rx) && chString.matches(ry); 4674 }); 4675 4676 boolean digitCharsDontMatch = digitChars.noneMatch((ch) -> { 4677 String chString = ch.toString(); 4678 return chString.matches(rx) && chString.matches(ry); 4679 }); 4680 4681 4682 assertTrue(letterCharsMatch, "Compiling intersection pattern is " + 4683 "dropping a character class in its matcher"); 4684 4685 assertTrue(digitCharsDontMatch, "Compiling intersection pattern is " + 4686 "matching digits where it should not"); 4687 } 4688 4689 //This test is for 8269753 4690 @Test errorMessageCaretIndentation()4691 public static void errorMessageCaretIndentation() { 4692 String pattern = "\t**"; 4693 var e = expectThrows(PatternSyntaxException.class, () -> 4694 Pattern.compile(pattern)); 4695 // Android-changed: Android produces a different error message. 4696 // var sep = System.lineSeparator(); 4697 // assertTrue(e.getMessage().contains(sep + "\t ^")); 4698 } 4699 } 4700