1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2014, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.dev.test.rbbi; 8 9 import java.text.StringCharacterIterator; 10 import java.util.ArrayList; 11 import java.util.List; 12 import java.util.Locale; 13 14 import com.ibm.icu.dev.test.TestFmwk; 15 import com.ibm.icu.text.BreakIterator; 16 import com.ibm.icu.text.FilteredBreakIteratorBuilder; 17 import com.ibm.icu.util.ULocale; 18 19 public class BreakIteratorTest extends TestFmwk 20 { 21 private BreakIterator characterBreak; 22 private BreakIterator wordBreak; 23 private BreakIterator lineBreak; 24 private BreakIterator sentenceBreak; 25 private BreakIterator titleBreak; 26 main(String[] args)27 public static void main(String[] args) throws Exception { 28 new BreakIteratorTest().run(args); 29 } BreakIteratorTest()30 public BreakIteratorTest() 31 { 32 33 } init()34 protected void init(){ 35 characterBreak = BreakIterator.getCharacterInstance(); 36 wordBreak = BreakIterator.getWordInstance(); 37 lineBreak = BreakIterator.getLineInstance(); 38 //logln("Creating sentence iterator..."); 39 sentenceBreak = BreakIterator.getSentenceInstance(); 40 //logln("Finished creating sentence iterator..."); 41 titleBreak = BreakIterator.getTitleInstance(); 42 } 43 //========================================================================= 44 // general test subroutines 45 //========================================================================= 46 generalIteratorTest(BreakIterator bi, List<String> expectedResult)47 private void generalIteratorTest(BreakIterator bi, List<String> expectedResult) { 48 StringBuffer buffer = new StringBuffer(); 49 String text; 50 for (int i = 0; i < expectedResult.size(); i++) { 51 text = expectedResult.get(i); 52 buffer.append(text); 53 } 54 text = buffer.toString(); 55 56 bi.setText(text); 57 58 List<String> nextResults = _testFirstAndNext(bi, text); 59 List<String> previousResults = _testLastAndPrevious(bi, text); 60 61 logln("comparing forward and backward..."); 62 int errs = getErrorCount(); 63 compareFragmentLists("forward iteration", "backward iteration", nextResults, 64 previousResults); 65 if (getErrorCount() == errs) { 66 logln("comparing expected and actual..."); 67 compareFragmentLists("expected result", "actual result", expectedResult, 68 nextResults); 69 } 70 71 int[] boundaries = new int[expectedResult.size() + 3]; 72 boundaries[0] = BreakIterator.DONE; 73 boundaries[1] = 0; 74 for (int i = 0; i < expectedResult.size(); i++) 75 boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i)). 76 length(); 77 boundaries[boundaries.length - 1] = BreakIterator.DONE; 78 79 _testFollowing(bi, text, boundaries); 80 _testPreceding(bi, text, boundaries); 81 _testIsBoundary(bi, text, boundaries); 82 83 doMultipleSelectionTest(bi, text); 84 } 85 _testFirstAndNext(BreakIterator bi, String text)86 private List<String> _testFirstAndNext(BreakIterator bi, String text) { 87 int p = bi.first(); 88 int lastP = p; 89 List<String> result = new ArrayList<String>(); 90 91 if (p != 0) 92 errln("first() returned " + p + " instead of 0"); 93 while (p != BreakIterator.DONE) { 94 p = bi.next(); 95 if (p != BreakIterator.DONE) { 96 if (p <= lastP) 97 errln("next() failed to move forward: next() on position " 98 + lastP + " yielded " + p); 99 100 result.add(text.substring(lastP, p)); 101 } 102 else { 103 if (lastP != text.length()) 104 errln("next() returned DONE prematurely: offset was " 105 + lastP + " instead of " + text.length()); 106 } 107 lastP = p; 108 } 109 return result; 110 } 111 _testLastAndPrevious(BreakIterator bi, String text)112 private List<String> _testLastAndPrevious(BreakIterator bi, String text) { 113 int p = bi.last(); 114 int lastP = p; 115 List<String> result = new ArrayList<String>(); 116 117 if (p != text.length()) 118 errln("last() returned " + p + " instead of " + text.length()); 119 while (p != BreakIterator.DONE) { 120 p = bi.previous(); 121 if (p != BreakIterator.DONE) { 122 if (p >= lastP) 123 errln("previous() failed to move backward: previous() on position " 124 + lastP + " yielded " + p); 125 126 result.add(0, text.substring(p, lastP)); 127 } 128 else { 129 if (lastP != 0) 130 errln("previous() returned DONE prematurely: offset was " 131 + lastP + " instead of 0"); 132 } 133 lastP = p; 134 } 135 return result; 136 } 137 compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2)138 private void compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2) { 139 int p1 = 0; 140 int p2 = 0; 141 String s1; 142 String s2; 143 int t1 = 0; 144 int t2 = 0; 145 146 while (p1 < f1.size() && p2 < f2.size()) { 147 s1 = f1.get(p1); 148 s2 = f2.get(p2); 149 t1 += s1.length(); 150 t2 += s2.length(); 151 152 if (s1.equals(s2)) { 153 debugLogln(" >" + s1 + "<"); 154 ++p1; 155 ++p2; 156 } 157 else { 158 int tempT1 = t1; 159 int tempT2 = t2; 160 int tempP1 = p1; 161 int tempP2 = p2; 162 163 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) { 164 while (tempT1 < tempT2 && tempP1 < f1.size()) { 165 tempT1 += (f1.get(tempP1)).length(); 166 ++tempP1; 167 } 168 while (tempT2 < tempT1 && tempP2 < f2.size()) { 169 tempT2 += (f2.get(tempP2)).length(); 170 ++tempP2; 171 } 172 } 173 logln("*** " + f1Name + " has:"); 174 while (p1 <= tempP1 && p1 < f1.size()) { 175 s1 = f1.get(p1); 176 t1 += s1.length(); 177 debugLogln(" *** >" + s1 + "<"); 178 ++p1; 179 } 180 logln("***** " + f2Name + " has:"); 181 while (p2 <= tempP2 && p2 < f2.size()) { 182 s2 = f2.get(p2); 183 t2 += s2.length(); 184 debugLogln(" ***** >" + s2 + "<"); 185 ++p2; 186 } 187 errln("Discrepancy between " + f1Name + " and " + f2Name); 188 } 189 } 190 } 191 _testFollowing(BreakIterator bi, String text, int[] boundaries)192 private void _testFollowing(BreakIterator bi, String text, int[] boundaries) { 193 logln("testFollowing():"); 194 int p = 2; 195 for (int i = 0; i <= text.length(); i++) { 196 if (i == boundaries[p]) 197 ++p; 198 199 int b = bi.following(i); 200 logln("bi.following(" + i + ") -> " + b); 201 if (b != boundaries[p]) 202 errln("Wrong result from following() for " + i + ": expected " + boundaries[p] 203 + ", got " + b); 204 } 205 } 206 _testPreceding(BreakIterator bi, String text, int[] boundaries)207 private void _testPreceding(BreakIterator bi, String text, int[] boundaries) { 208 logln("testPreceding():"); 209 int p = 0; 210 for (int i = 0; i <= text.length(); i++) { 211 int b = bi.preceding(i); 212 logln("bi.preceding(" + i + ") -> " + b); 213 if (b != boundaries[p]) 214 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p] 215 + ", got " + b); 216 217 if (i == boundaries[p + 1]) 218 ++p; 219 } 220 } 221 _testIsBoundary(BreakIterator bi, String text, int[] boundaries)222 private void _testIsBoundary(BreakIterator bi, String text, int[] boundaries) { 223 logln("testIsBoundary():"); 224 int p = 1; 225 boolean isB; 226 for (int i = 0; i <= text.length(); i++) { 227 isB = bi.isBoundary(i); 228 logln("bi.isBoundary(" + i + ") -> " + isB); 229 230 if (i == boundaries[p]) { 231 if (!isB) 232 errln("Wrong result from isBoundary() for " + i + ": expected true, got false"); 233 ++p; 234 } 235 else { 236 if (isB) 237 errln("Wrong result from isBoundary() for " + i + ": expected false, got true"); 238 } 239 } 240 } 241 doMultipleSelectionTest(BreakIterator iterator, String testText)242 private void doMultipleSelectionTest(BreakIterator iterator, String testText) 243 { 244 logln("Multiple selection test..."); 245 BreakIterator testIterator = (BreakIterator)iterator.clone(); 246 int offset = iterator.first(); 247 int testOffset; 248 int count = 0; 249 250 do { 251 testOffset = testIterator.first(); 252 testOffset = testIterator.next(count); 253 logln("next(" + count + ") -> " + testOffset); 254 if (offset != testOffset) 255 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset); 256 257 if (offset != BreakIterator.DONE) { 258 count++; 259 offset = iterator.next(); 260 } 261 } while (offset != BreakIterator.DONE); 262 263 // now do it backwards... 264 offset = iterator.last(); 265 count = 0; 266 267 do { 268 testOffset = testIterator.last(); 269 testOffset = testIterator.next(count); 270 logln("next(" + count + ") -> " + testOffset); 271 if (offset != testOffset) 272 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset); 273 274 if (offset != BreakIterator.DONE) { 275 count--; 276 offset = iterator.previous(); 277 } 278 } while (offset != BreakIterator.DONE); 279 } 280 281 doOtherInvariantTest(BreakIterator tb, String testChars)282 private void doOtherInvariantTest(BreakIterator tb, String testChars) 283 { 284 StringBuffer work = new StringBuffer("a\r\na"); 285 int errorCount = 0; 286 287 // a break should never occur between CR and LF 288 for (int i = 0; i < testChars.length(); i++) { 289 work.setCharAt(0, testChars.charAt(i)); 290 for (int j = 0; j < testChars.length(); j++) { 291 work.setCharAt(3, testChars.charAt(j)); 292 tb.setText(work.toString()); 293 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next()) 294 if (k == 2) { 295 errln("Break between CR and LF in string U+" + Integer.toHexString( 296 (int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString( 297 (int)(work.charAt(3)))); 298 errorCount++; 299 if (errorCount >= 75) 300 return; 301 } 302 } 303 } 304 305 // a break should never occur before a non-spacing mark, unless it's preceded 306 // by a line terminator 307 work.setLength(0); 308 work.append("aaaa"); 309 for (int i = 0; i < testChars.length(); i++) { 310 char c = testChars.charAt(i); 311 if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003') 312 continue; 313 work.setCharAt(1, c); 314 for (int j = 0; j < testChars.length(); j++) { 315 c = testChars.charAt(j); 316 if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c) 317 != Character.ENCLOSING_MARK) 318 continue; 319 work.setCharAt(2, c); 320 tb.setText(work.toString()); 321 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next()) 322 if (k == 2) { 323 errln("Break between U+" + Integer.toHexString((int)(work.charAt(1))) 324 + " and U+" + Integer.toHexString((int)(work.charAt(2)))); 325 errorCount++; 326 if (errorCount >= 75) 327 return; 328 } 329 } 330 } 331 } 332 debugLogln(String s)333 public void debugLogln(String s) { 334 final String zeros = "0000"; 335 String temp; 336 StringBuffer out = new StringBuffer(); 337 for (int i = 0; i < s.length(); i++) { 338 char c = s.charAt(i); 339 if (c >= ' ' && c < '\u007f') 340 out.append(c); 341 else { 342 out.append("\\u"); 343 temp = Integer.toHexString((int)c); 344 out.append(zeros.substring(0, 4 - temp.length())); 345 out.append(temp); 346 } 347 } 348 logln(out.toString()); 349 } 350 351 //========================================================================= 352 // tests 353 //========================================================================= 354 355 356 /** 357 * @bug 4097779 358 */ TestBug4097779()359 public void TestBug4097779() { 360 List<String> wordSelectionData = new ArrayList<String>(2); 361 362 wordSelectionData.add("aa\u0300a"); 363 wordSelectionData.add(" "); 364 365 generalIteratorTest(wordBreak, wordSelectionData); 366 } 367 368 /** 369 * @bug 4098467 370 */ TestBug4098467Words()371 public void TestBug4098467Words() { 372 List<String> wordSelectionData = new ArrayList<String>(); 373 374 // What follows is a string of Korean characters (I found it in the Yellow Pages 375 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed 376 // it correctly), first as precomposed syllables, and then as conjoining jamo. 377 // Both sequences should be semantically identical and break the same way. 378 // precomposed syllables... 379 wordSelectionData.add("\uc0c1\ud56d"); 380 wordSelectionData.add(" "); 381 wordSelectionData.add("\ud55c\uc778"); 382 wordSelectionData.add(" "); 383 wordSelectionData.add("\uc5f0\ud569"); 384 wordSelectionData.add(" "); 385 wordSelectionData.add("\uc7a5\ub85c\uad50\ud68c"); 386 wordSelectionData.add(" "); 387 // conjoining jamo... 388 wordSelectionData.add("\u1109\u1161\u11bc\u1112\u1161\u11bc"); 389 wordSelectionData.add(" "); 390 wordSelectionData.add("\u1112\u1161\u11ab\u110b\u1175\u11ab"); 391 wordSelectionData.add(" "); 392 wordSelectionData.add("\u110b\u1167\u11ab\u1112\u1161\u11b8"); 393 wordSelectionData.add(" "); 394 wordSelectionData.add("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c"); 395 wordSelectionData.add(" "); 396 397 generalIteratorTest(wordBreak, wordSelectionData); 398 } 399 400 401 /** 402 * @bug 4111338 403 */ TestBug4111338()404 public void TestBug4111338() { 405 List<String> sentenceSelectionData = new ArrayList<String>(); 406 407 // test for bug #4111338: Don't break sentences at the boundary between CJK 408 // and other letters 409 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c" 410 + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba" 411 + "\u611d\u57b6\u2510\u5d46\".\u2029"); 412 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8" 413 + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0" 414 + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029"); 415 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4" 416 + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8" 417 + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029"); 418 sentenceSelectionData.add("He said, \"I can go there.\"\u2029"); 419 420 generalIteratorTest(sentenceBreak, sentenceSelectionData); 421 } 422 423 424 /** 425 * @bug 4143071 426 */ TestBug4143071()427 public void TestBug4143071() { 428 List<String> sentenceSelectionData = new ArrayList<String>(3); 429 430 // Make sure sentences that end with digits work right 431 sentenceSelectionData.add("Today is the 27th of May, 1998. "); 432 sentenceSelectionData.add("Tomorrow will be 28 May 1998. "); 433 sentenceSelectionData.add("The day after will be the 30th.\u2029"); 434 435 generalIteratorTest(sentenceBreak, sentenceSelectionData); 436 } 437 438 /** 439 * @bug 4152416 440 */ TestBug4152416()441 public void TestBug4152416() { 442 List<String> sentenceSelectionData = new ArrayList<String>(2); 443 444 // Make sure sentences ending with a capital letter are treated correctly 445 sentenceSelectionData.add("The type of all primitive " 446 + "<code>boolean</code> values accessed in the target VM. "); 447 sentenceSelectionData.add("Calls to xxx will return an " 448 + "implementor of this interface.\u2029"); 449 450 generalIteratorTest(sentenceBreak, sentenceSelectionData); 451 } 452 453 /** 454 * @bug 4152117 455 */ TestBug4152117()456 public void TestBug4152117() { 457 List<String> sentenceSelectionData = new ArrayList<String>(3); 458 459 // Make sure sentence breaking is handling punctuation correctly 460 // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE 461 // IT DOESN'T CROP UP] 462 sentenceSelectionData.add("Constructs a randomly generated " 463 + "BigInteger, uniformly distributed over the range <tt>0</tt> " 464 + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. "); 465 sentenceSelectionData.add("The uniformity of the distribution " 466 + "assumes that a fair source of random bits is provided in " 467 + "<tt>rnd</tt>. "); 468 sentenceSelectionData.add("Note that this constructor always " 469 + "constructs a non-negative BigInteger.\u2029"); 470 471 generalIteratorTest(sentenceBreak, sentenceSelectionData); 472 } 473 TestLineBreak()474 public void TestLineBreak() { 475 List<String> lineSelectionData = new ArrayList<String>(); 476 477 lineSelectionData.add("Multi-"); 478 lineSelectionData.add("Level "); 479 lineSelectionData.add("example "); 480 lineSelectionData.add("of "); 481 lineSelectionData.add("a "); 482 lineSelectionData.add("semi-"); 483 lineSelectionData.add("idiotic "); 484 lineSelectionData.add("non-"); 485 lineSelectionData.add("sensical "); 486 lineSelectionData.add("(non-"); 487 lineSelectionData.add("important) "); 488 lineSelectionData.add("sentence. "); 489 490 lineSelectionData.add("Hi "); 491 lineSelectionData.add("Hello "); 492 lineSelectionData.add("How\n"); 493 lineSelectionData.add("are\r"); 494 lineSelectionData.add("you\u2028"); 495 lineSelectionData.add("fine.\t"); 496 lineSelectionData.add("good. "); 497 498 lineSelectionData.add("Now\r"); 499 lineSelectionData.add("is\n"); 500 lineSelectionData.add("the\r\n"); 501 lineSelectionData.add("time\n"); 502 lineSelectionData.add("\r"); 503 lineSelectionData.add("for\r"); 504 lineSelectionData.add("\r"); 505 lineSelectionData.add("all"); 506 507 generalIteratorTest(lineBreak, lineSelectionData); 508 } 509 510 /** 511 * @bug 4068133 512 */ TestBug4068133()513 public void TestBug4068133() { 514 List<String> lineSelectionData = new ArrayList<String>(9); 515 516 lineSelectionData.add("\u96f6"); 517 lineSelectionData.add("\u4e00\u3002"); 518 lineSelectionData.add("\u4e8c\u3001"); 519 lineSelectionData.add("\u4e09\u3002\u3001"); 520 lineSelectionData.add("\u56db\u3001\u3002\u3001"); 521 lineSelectionData.add("\u4e94,"); 522 lineSelectionData.add("\u516d."); 523 lineSelectionData.add("\u4e03.\u3001,\u3002"); 524 lineSelectionData.add("\u516b"); 525 526 generalIteratorTest(lineBreak, lineSelectionData); 527 } 528 529 /** 530 * @bug 4086052 531 */ TestBug4086052()532 public void TestBug4086052() { 533 List<String> lineSelectionData = new ArrayList<String>(1); 534 535 lineSelectionData.add("foo\u00a0bar "); 536 // lineSelectionData.addElement("foo\ufeffbar"); 537 538 generalIteratorTest(lineBreak, lineSelectionData); 539 } 540 541 /** 542 * @bug 4097920 543 */ TestBug4097920()544 public void TestBug4097920() { 545 List<String> lineSelectionData = new ArrayList<String>(3); 546 547 lineSelectionData.add("dog,cat,mouse "); 548 lineSelectionData.add("(one)"); 549 lineSelectionData.add("(two)\n"); 550 generalIteratorTest(lineBreak, lineSelectionData); 551 } 552 553 554 555 /** 556 * @bug 4117554 557 */ TestBug4117554Lines()558 public void TestBug4117554Lines() { 559 List<String> lineSelectionData = new ArrayList<String>(3); 560 561 // Fullwidth .!? should be treated as postJwrd 562 lineSelectionData.add("\u4e01\uff0e"); 563 lineSelectionData.add("\u4e02\uff01"); 564 lineSelectionData.add("\u4e03\uff1f"); 565 566 generalIteratorTest(lineBreak, lineSelectionData); 567 } 568 TestLettersAndDigits()569 public void TestLettersAndDigits() { 570 // a character sequence such as "X11" or "30F3" or "native2ascii" should 571 // be kept together as a single word 572 List<String> lineSelectionData = new ArrayList<String>(3); 573 574 lineSelectionData.add("X11 "); 575 lineSelectionData.add("30F3 "); 576 lineSelectionData.add("native2ascii"); 577 578 generalIteratorTest(lineBreak, lineSelectionData); 579 } 580 581 582 private static final String graveS = "S\u0300"; 583 private static final String acuteBelowI = "i\u0317"; 584 private static final String acuteE = "e\u0301"; 585 private static final String circumflexA = "a\u0302"; 586 private static final String tildeE = "e\u0303"; 587 TestCharacterBreak()588 public void TestCharacterBreak() { 589 List<String> characterSelectionData = new ArrayList<String>(); 590 591 characterSelectionData.add(graveS); 592 characterSelectionData.add(acuteBelowI); 593 characterSelectionData.add("m"); 594 characterSelectionData.add("p"); 595 characterSelectionData.add("l"); 596 characterSelectionData.add(acuteE); 597 characterSelectionData.add(" "); 598 characterSelectionData.add("s"); 599 characterSelectionData.add(circumflexA); 600 characterSelectionData.add("m"); 601 characterSelectionData.add("p"); 602 characterSelectionData.add("l"); 603 characterSelectionData.add(tildeE); 604 characterSelectionData.add("."); 605 characterSelectionData.add("w"); 606 characterSelectionData.add(circumflexA); 607 characterSelectionData.add("w"); 608 characterSelectionData.add("a"); 609 characterSelectionData.add("f"); 610 characterSelectionData.add("q"); 611 characterSelectionData.add("\n"); 612 characterSelectionData.add("\r"); 613 characterSelectionData.add("\r\n"); 614 characterSelectionData.add("\n"); 615 616 generalIteratorTest(characterBreak, characterSelectionData); 617 } 618 619 /** 620 * @bug 4098467 621 */ TestBug4098467Characters()622 public void TestBug4098467Characters() { 623 List<String> characterSelectionData = new ArrayList<String>(); 624 625 // What follows is a string of Korean characters (I found it in the Yellow Pages 626 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed 627 // it correctly), first as precomposed syllables, and then as conjoining jamo. 628 // Both sequences should be semantically identical and break the same way. 629 // precomposed syllables... 630 characterSelectionData.add("\uc0c1"); 631 characterSelectionData.add("\ud56d"); 632 characterSelectionData.add(" "); 633 characterSelectionData.add("\ud55c"); 634 characterSelectionData.add("\uc778"); 635 characterSelectionData.add(" "); 636 characterSelectionData.add("\uc5f0"); 637 characterSelectionData.add("\ud569"); 638 characterSelectionData.add(" "); 639 characterSelectionData.add("\uc7a5"); 640 characterSelectionData.add("\ub85c"); 641 characterSelectionData.add("\uad50"); 642 characterSelectionData.add("\ud68c"); 643 characterSelectionData.add(" "); 644 // conjoining jamo... 645 characterSelectionData.add("\u1109\u1161\u11bc"); 646 characterSelectionData.add("\u1112\u1161\u11bc"); 647 characterSelectionData.add(" "); 648 characterSelectionData.add("\u1112\u1161\u11ab"); 649 characterSelectionData.add("\u110b\u1175\u11ab"); 650 characterSelectionData.add(" "); 651 characterSelectionData.add("\u110b\u1167\u11ab"); 652 characterSelectionData.add("\u1112\u1161\u11b8"); 653 characterSelectionData.add(" "); 654 characterSelectionData.add("\u110c\u1161\u11bc"); 655 characterSelectionData.add("\u1105\u1169"); 656 characterSelectionData.add("\u1100\u116d"); 657 characterSelectionData.add("\u1112\u116c"); 658 659 generalIteratorTest(characterBreak, characterSelectionData); 660 } 661 TestTitleBreak()662 public void TestTitleBreak() 663 { 664 List<String> titleData = new ArrayList<String>(); 665 titleData.add(" "); 666 titleData.add("This "); 667 titleData.add("is "); 668 titleData.add("a "); 669 titleData.add("simple "); 670 titleData.add("sample "); 671 titleData.add("sentence. "); 672 titleData.add("This "); 673 674 generalIteratorTest(titleBreak, titleData); 675 } 676 677 678 679 /* 680 * @bug 4153072 681 */ TestBug4153072()682 public void TestBug4153072() { 683 BreakIterator iter = BreakIterator.getWordInstance(); 684 String str = "...Hello, World!..."; 685 int begin = 3; 686 int end = str.length() - 3; 687 // not used boolean gotException = false; 688 689 690 iter.setText(new StringCharacterIterator(str, begin, end, begin)); 691 for (int index = -1; index < begin + 1; ++index) { 692 try { 693 iter.isBoundary(index); 694 if (index < begin) 695 errln("Didn't get exception with offset = " + index + 696 " and begin index = " + begin); 697 } 698 catch (IllegalArgumentException e) { 699 if (index >= begin) 700 errln("Got exception with offset = " + index + 701 " and begin index = " + begin); 702 } 703 } 704 } 705 706 TestBug4146175Lines()707 public void TestBug4146175Lines() { 708 List<String> lineSelectionData = new ArrayList<String>(2); 709 710 // the fullwidth comma should stick to the preceding Japanese character 711 lineSelectionData.add("\u7d42\uff0c"); 712 lineSelectionData.add("\u308f"); 713 714 generalIteratorTest(lineBreak, lineSelectionData); 715 } 716 717 private static final String cannedTestChars 718 = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2" 719 + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3" 720 + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303" 721 + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000" 722 + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f" 723 + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164"; 724 TestSentenceInvariants()725 public void TestSentenceInvariants() 726 { 727 BreakIterator e = BreakIterator.getSentenceInstance(); 728 doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff"); 729 } 730 TestEmptyString()731 public void TestEmptyString() 732 { 733 String text = ""; 734 List<String> x = new ArrayList<String>(1); 735 x.add(text); 736 737 generalIteratorTest(lineBreak, x); 738 } 739 TestGetAvailableLocales()740 public void TestGetAvailableLocales() 741 { 742 Locale[] locList = BreakIterator.getAvailableLocales(); 743 744 if (locList.length == 0) 745 errln("getAvailableLocales() returned an empty list!"); 746 // I have no idea how to test this function... 747 748 com.ibm.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales(); 749 if (ulocList.length == 0) { 750 errln("getAvailableULocales() returned an empty list!"); 751 } else { 752 logln("getAvailableULocales() returned " + ulocList.length + " locales"); 753 } 754 } 755 756 757 /** 758 * @bug 4068137 759 */ TestEndBehavior()760 public void TestEndBehavior() 761 { 762 String testString = "boo."; 763 BreakIterator wb = BreakIterator.getWordInstance(); 764 wb.setText(testString); 765 766 if (wb.first() != 0) 767 errln("Didn't get break at beginning of string."); 768 if (wb.next() != 3) 769 errln("Didn't get break before period in \"boo.\""); 770 if (wb.current() != 4 && wb.next() != 4) 771 errln("Didn't get break at end of string."); 772 } 773 774 // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL] 775 /** 776 * Port From: ICU4C v1.8.1 : textbounds : IntlTestTextBoundary 777 * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp 778 **/ 779 /** 780 * test methods preceding, following and isBoundary 781 **/ TestPreceding()782 public void TestPreceding() { 783 String words3 = "aaa bbb ccc"; 784 BreakIterator e = BreakIterator.getWordInstance(Locale.getDefault()); 785 e.setText( words3 ); 786 e.first(); 787 int p1 = e.next(); 788 int p2 = e.next(); 789 int p3 = e.next(); 790 int p4 = e.next(); 791 792 int f = e.following(p2+1); 793 int p = e.preceding(p2+1); 794 if (f!=p3) 795 errln("IntlTestTextBoundary::TestPreceding: f!=p3"); 796 if (p!=p2) 797 errln("IntlTestTextBoundary::TestPreceding: p!=p2"); 798 799 if (p1+1!=p2) 800 errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2"); 801 802 if (p3+1!=p4) 803 errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4"); 804 805 if (!e.isBoundary(p2) || e.isBoundary(p2+1) || !e.isBoundary(p3)) 806 { 807 errln("IntlTestTextBoundary::TestPreceding: isBoundary err"); 808 } 809 } 810 811 812 /** 813 * Bug 4450804 814 */ TestLineBreakContractions()815 public void TestLineBreakContractions() { 816 List<String> expected = new ArrayList<String>(7); 817 expected.add("These "); 818 expected.add("are "); 819 expected.add("'foobles'. "); 820 expected.add("Don't "); 821 expected.add("you "); 822 expected.add("like "); 823 expected.add("them?"); 824 generalIteratorTest(lineBreak, expected); 825 } 826 827 /** 828 * Ticket#5615 829 */ TestT5615()830 public void TestT5615() { 831 com.ibm.icu.util.ULocale[] ulocales = BreakIterator.getAvailableULocales(); 832 int type = 0; 833 com.ibm.icu.util.ULocale loc = null; 834 try { 835 for (int i = 0; i < ulocales.length; i++) { 836 loc = ulocales[i]; 837 for (type = 0; type < 5 /* 5 = BreakIterator.KIND_COUNT */; ++type) { 838 BreakIterator brk = BreakIterator.getBreakInstance(loc, type); 839 if (brk == null) { 840 errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc); 841 } 842 } 843 } 844 } catch (Exception e) { 845 errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc + " / exception: " + e.getMessage()); 846 } 847 } 848 849 /* 850 * Test case for Ticket#10721. BreakIterator factory method should throw NPE 851 * when specified locale is null. 852 */ TestNullLocale()853 public void TestNullLocale() { 854 Locale loc = null; 855 ULocale uloc = null; 856 857 @SuppressWarnings("unused") 858 BreakIterator brk; 859 860 // Character 861 try { 862 brk = BreakIterator.getCharacterInstance(loc); 863 errln("getCharacterInstance((Locale)null) did not throw NPE."); 864 } catch (NullPointerException e) { /* OK */ } 865 try { 866 brk = BreakIterator.getCharacterInstance(uloc); 867 errln("getCharacterInstance((ULocale)null) did not throw NPE."); 868 } catch (NullPointerException e) { /* OK */ } 869 870 // Line 871 try { 872 brk = BreakIterator.getLineInstance(loc); 873 errln("getLineInstance((Locale)null) did not throw NPE."); 874 } catch (NullPointerException e) { /* OK */ } 875 try { 876 brk = BreakIterator.getLineInstance(uloc); 877 errln("getLineInstance((ULocale)null) did not throw NPE."); 878 } catch (NullPointerException e) { /* OK */ } 879 880 // Sentence 881 try { 882 brk = BreakIterator.getSentenceInstance(loc); 883 errln("getSentenceInstance((Locale)null) did not throw NPE."); 884 } catch (NullPointerException e) { /* OK */ } 885 try { 886 brk = BreakIterator.getSentenceInstance(uloc); 887 errln("getSentenceInstance((ULocale)null) did not throw NPE."); 888 } catch (NullPointerException e) { /* OK */ } 889 890 // Title 891 try { 892 brk = BreakIterator.getTitleInstance(loc); 893 errln("getTitleInstance((Locale)null) did not throw NPE."); 894 } catch (NullPointerException e) { /* OK */ } 895 try { 896 brk = BreakIterator.getTitleInstance(uloc); 897 errln("getTitleInstance((ULocale)null) did not throw NPE."); 898 } catch (NullPointerException e) { /* OK */ } 899 900 // Word 901 try { 902 brk = BreakIterator.getWordInstance(loc); 903 errln("getWordInstance((Locale)null) did not throw NPE."); 904 } catch (NullPointerException e) { /* OK */ } 905 try { 906 brk = BreakIterator.getWordInstance(uloc); 907 errln("getWordInstance((ULocale)null) did not throw NPE."); 908 } catch (NullPointerException e) { /* OK */ } 909 } 910 911 /** 912 * Test FilteredBreakIteratorBuilder newly introduced 913 */ TestFilteredBreakIteratorBuilder()914 public void TestFilteredBreakIteratorBuilder() { 915 FilteredBreakIteratorBuilder builder; 916 BreakIterator baseBI; 917 BreakIterator filteredBI; 918 919 String text = "In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."; // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited. 920 String ABBR_MR = "Mr."; 921 String ABBR_CAPT = "Capt."; 922 923 { 924 logln("Constructing empty builder\n"); 925 builder = FilteredBreakIteratorBuilder.createInstance(); 926 927 logln("Constructing base BI\n"); 928 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 929 930 logln("Building new BI\n"); 931 filteredBI = builder.build(baseBI); 932 933 logln("Testing:"); 934 filteredBI.setText(text); 935 assertEquals("1st next", 20, filteredBI.next()); 936 assertEquals("1st next", 84, filteredBI.next()); 937 assertEquals("1st next", 90, filteredBI.next()); 938 assertEquals("1st next", 181, filteredBI.next()); 939 assertEquals("1st next", 278, filteredBI.next()); 940 filteredBI.first(); 941 } 942 943 { 944 logln("Constructing empty builder\n"); 945 builder = FilteredBreakIteratorBuilder.createInstance(); 946 947 logln("Adding Mr. as an exception\n"); 948 949 assertEquals("2.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR)); 950 assertEquals("2.2 suppressBreakAfter", false, builder.suppressBreakAfter(ABBR_MR)); 951 assertEquals("2.3 unsuppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_MR)); 952 assertEquals("2.4 unsuppressBreakAfter", false, builder.unsuppressBreakAfter(ABBR_MR)); 953 assertEquals("2.5 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR)); 954 955 logln("Constructing base BI\n"); 956 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 957 958 logln("Building new BI\n"); 959 filteredBI = builder.build(baseBI); 960 961 logln("Testing:"); 962 filteredBI.setText(text); 963 assertEquals("2nd next", 84, filteredBI.next()); 964 assertEquals("2nd next", 90, filteredBI.next()); 965 assertEquals("2nd next", 278, filteredBI.next()); 966 filteredBI.first(); 967 } 968 969 970 { 971 logln("Constructing empty builder\n"); 972 builder = FilteredBreakIteratorBuilder.createInstance(); 973 974 logln("Adding Mr. and Capt as an exception\n"); 975 assertEquals("3.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR)); 976 assertEquals("3.2 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_CAPT)); 977 978 logln("Constructing base BI\n"); 979 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 980 981 logln("Building new BI\n"); 982 filteredBI = builder.build(baseBI); 983 984 logln("Testing:"); 985 filteredBI.setText(text); 986 assertEquals("3rd next", 84, filteredBI.next()); 987 assertEquals("3rd next", 278, filteredBI.next()); 988 filteredBI.first(); 989 } 990 991 { 992 logln("Constructing English builder\n"); 993 builder = FilteredBreakIteratorBuilder.createInstance(ULocale.ENGLISH); 994 995 logln("Constructing base BI\n"); 996 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 997 998 logln("unsuppressing 'Capt'"); 999 assertEquals("1st suppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_CAPT)); 1000 1001 logln("Building new BI\n"); 1002 filteredBI = builder.build(baseBI); 1003 1004 if(filteredBI != null) { 1005 logln("Testing:"); 1006 filteredBI.setText(text); 1007 assertEquals("4th next", 84, filteredBI.next()); 1008 assertEquals("4th next", 90, filteredBI.next()); 1009 assertEquals("4th next", 278, filteredBI.next()); 1010 filteredBI.first(); 1011 } 1012 } 1013 1014 { 1015 logln("Constructing English builder\n"); 1016 builder = FilteredBreakIteratorBuilder.createInstance(ULocale.ENGLISH); 1017 1018 logln("Constructing base BI\n"); 1019 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 1020 1021 logln("Building new BI\n"); 1022 filteredBI = builder.build(baseBI); 1023 1024 if(filteredBI != null) { 1025 logln("Testing:"); 1026 filteredBI.setText(text); 1027 1028 assertEquals("5th next", 84, filteredBI.next()); 1029 assertEquals("5th next", 278, filteredBI.next()); 1030 filteredBI.first(); 1031 } 1032 } 1033 1034 { 1035 logln("Constructing French builder"); 1036 builder = FilteredBreakIteratorBuilder.createInstance(ULocale.FRENCH); 1037 1038 logln("Constructing base BI\n"); 1039 baseBI = BreakIterator.getSentenceInstance(Locale.FRENCH); 1040 1041 logln("Building new BI\n"); 1042 filteredBI = builder.build(baseBI); 1043 1044 if(filteredBI != null) { 1045 logln("Testing:"); 1046 filteredBI.setText(text); 1047 assertEquals("6th next", 20, filteredBI.next()); 1048 assertEquals("6th next", 84, filteredBI.next()); 1049 filteredBI.first(); 1050 } 1051 } 1052 } 1053 } 1054