1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2013, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.dev.test.rbbi; 8 9 //Regression testing of RuleBasedBreakIterator 10 // 11 // TODO: These tests should be mostly retired. 12 // Much of the test data that was originally here was removed when the RBBI rules 13 // were updated to match the Unicode boundary TRs, and the data was found to be invalid. 14 // Much of the remaining data has been moved into the rbbitst.txt test data file, 15 // which is common between ICU4C and ICU4J. The remaining test data should also be moved, 16 // or simply retired if it is no longer interesting. 17 import java.text.CharacterIterator; 18 import java.util.ArrayList; 19 import java.util.List; 20 21 import com.ibm.icu.dev.test.TestFmwk; 22 import com.ibm.icu.text.BreakIterator; 23 import com.ibm.icu.text.RuleBasedBreakIterator; 24 import com.ibm.icu.util.ULocale; 25 26 public class RBBITest extends TestFmwk { 27 main(String[] args)28 public static void main(String[] args) throws Exception { 29 new RBBITest().run(args); 30 } 31 RBBITest()32 public RBBITest() { 33 } 34 35 private static final String halfNA = "\u0928\u094d\u200d"; /* 36 * halfform NA = devanigiri NA + virama(supresses 37 * inherent vowel)+ zero width joiner 38 */ 39 40 // tests default rules based character iteration. 41 // Builds a new iterator from the source rules in the default (prebuilt) iterator. 42 // TestDefaultRuleBasedCharacterIteration()43 public void TestDefaultRuleBasedCharacterIteration() { 44 RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(); 45 logln("Testing the RBBI for character iteration by using default rules"); 46 47 // fetch the rules used to create the above RuleBasedBreakIterator 48 String defaultRules = rbbi.toString(); 49 50 RuleBasedBreakIterator charIterDefault = null; 51 try { 52 charIterDefault = new RuleBasedBreakIterator(defaultRules); 53 } catch (IllegalArgumentException iae) { 54 errln("ERROR: failed construction in TestDefaultRuleBasedCharacterIteration()" + iae.toString()); 55 } 56 57 List<String> chardata = new ArrayList<String>(); 58 chardata.add("H"); 59 chardata.add("e"); 60 chardata.add("l"); 61 chardata.add("l"); 62 chardata.add("o"); 63 chardata.add("e\u0301"); // acuteE 64 chardata.add("&"); 65 chardata.add("e\u0303"); // tildaE 66 // devanagiri characters for Hindi support 67 chardata.add("\u0906"); // devanagiri AA 68 // chardata.add("\u093e\u0901"); //devanagiri vowelsign AA+ chandrabindhu 69 chardata.add("\u0916\u0947"); // devanagiri KHA+vowelsign E 70 chardata.add("\u0938\u0941\u0902"); // devanagiri SA+vowelsign U + anusvara(bindu) 71 chardata.add("\u0926"); // devanagiri consonant DA 72 chardata.add("\u0930"); // devanagiri consonant RA 73 // chardata.add("\u0939\u094c"); //devanagiri HA+vowel sign AI 74 chardata.add("\u0964"); // devanagiri danda 75 // end hindi characters 76 chardata.add("A\u0302"); // circumflexA 77 chardata.add("i\u0301"); // acuteBelowI 78 // conjoining jamo... 79 chardata.add("\u1109\u1161\u11bc"); 80 chardata.add("\u1112\u1161\u11bc"); 81 chardata.add("\n"); 82 chardata.add("\r\n"); // keep CRLF sequences together 83 chardata.add("S\u0300"); // graveS 84 chardata.add("i\u0301"); // acuteBelowI 85 chardata.add("!"); 86 87 // What follows is a string of Korean characters (I found it in the Yellow Pages 88 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed 89 // it correctly), first as precomposed syllables, and then as conjoining jamo. 90 // Both sequences should be semantically identical and break the same way. 91 // precomposed syllables... 92 chardata.add("\uc0c1"); 93 chardata.add("\ud56d"); 94 chardata.add(" "); 95 chardata.add("\ud55c"); 96 chardata.add("\uc778"); 97 chardata.add(" "); 98 chardata.add("\uc5f0"); 99 chardata.add("\ud569"); 100 chardata.add(" "); 101 chardata.add("\uc7a5"); 102 chardata.add("\ub85c"); 103 chardata.add("\uad50"); 104 chardata.add("\ud68c"); 105 chardata.add(" "); 106 // conjoining jamo... 107 chardata.add("\u1109\u1161\u11bc"); 108 chardata.add("\u1112\u1161\u11bc"); 109 chardata.add(" "); 110 chardata.add("\u1112\u1161\u11ab"); 111 chardata.add("\u110b\u1175\u11ab"); 112 chardata.add(" "); 113 chardata.add("\u110b\u1167\u11ab"); 114 chardata.add("\u1112\u1161\u11b8"); 115 chardata.add(" "); 116 chardata.add("\u110c\u1161\u11bc"); 117 chardata.add("\u1105\u1169"); 118 chardata.add("\u1100\u116d"); 119 chardata.add("\u1112\u116c"); 120 121 generalIteratorTest(charIterDefault, chardata); 122 123 } 124 TestDefaultRuleBasedWordIteration()125 public void TestDefaultRuleBasedWordIteration() { 126 logln("Testing the RBBI for word iteration using default rules"); 127 RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getWordInstance(); 128 // fetch the rules used to create the above RuleBasedBreakIterator 129 String defaultRules = rbbi.toString(); 130 131 RuleBasedBreakIterator wordIterDefault = null; 132 try { 133 wordIterDefault = new RuleBasedBreakIterator(defaultRules); 134 } catch (IllegalArgumentException iae) { 135 errln("ERROR: failed construction in TestDefaultRuleBasedWordIteration() -- custom rules" + iae.toString()); 136 } 137 138 List<String> worddata = new ArrayList<String>(); 139 worddata.add("Write"); 140 worddata.add(" "); 141 worddata.add("wordrules"); 142 worddata.add("."); 143 worddata.add(" "); 144 // worddata.add("alpha-beta-gamma"); 145 worddata.add(" "); 146 worddata.add("\u092f\u0939"); 147 worddata.add(" "); 148 worddata.add("\u0939\u093f" + halfNA + "\u0926\u0940"); 149 worddata.add(" "); 150 worddata.add("\u0939\u0948"); 151 // worddata.add("\u0964"); //danda followed by a space 152 worddata.add(" "); 153 worddata.add("\u0905\u093e\u092a"); 154 worddata.add(" "); 155 worddata.add("\u0938\u093f\u0916\u094b\u0917\u0947"); 156 worddata.add("?"); 157 worddata.add(" "); 158 worddata.add("\r"); 159 worddata.add("It's"); 160 worddata.add(" "); 161 // worddata.add("$30.10"); 162 worddata.add(" "); 163 worddata.add(" "); 164 worddata.add("Badges"); 165 worddata.add("?"); 166 worddata.add(" "); 167 worddata.add("BADGES"); 168 worddata.add("!"); 169 worddata.add("1000,233,456.000"); 170 worddata.add(" "); 171 172 generalIteratorTest(wordIterDefault, worddata); 173 } 174 175 // private static final String kParagraphSeparator = "\u2029"; 176 private static final String kLineSeparator = "\u2028"; 177 TestDefaultRuleBasedSentenceIteration()178 public void TestDefaultRuleBasedSentenceIteration() { 179 logln("Testing the RBBI for sentence iteration using default rules"); 180 RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance(); 181 182 // fetch the rules used to create the above RuleBasedBreakIterator 183 String defaultRules = rbbi.toString(); 184 RuleBasedBreakIterator sentIterDefault = null; 185 try { 186 sentIterDefault = new RuleBasedBreakIterator(defaultRules); 187 } catch (IllegalArgumentException iae) { 188 errln("ERROR: failed construction in TestDefaultRuleBasedSentenceIteration()" + iae.toString()); 189 } 190 191 List<String> sentdata = new ArrayList<String>(); 192 sentdata.add("(This is it.) "); 193 sentdata.add("Testing the sentence iterator. "); 194 sentdata.add("\"This isn\'t it.\" "); 195 sentdata.add("Hi! "); 196 sentdata.add("This is a simple sample sentence. "); 197 sentdata.add("(This is it.) "); 198 sentdata.add("This is a simple sample sentence. "); 199 sentdata.add("\"This isn\'t it.\" "); 200 sentdata.add("Hi! "); 201 sentdata.add("This is a simple sample sentence. "); 202 sentdata.add("It does not have to make any sense as you can see. "); 203 sentdata.add("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. "); 204 sentdata.add("Che la dritta via aveo smarrita. "); 205 206 generalIteratorTest(sentIterDefault, sentdata); 207 } 208 TestDefaultRuleBasedLineIteration()209 public void TestDefaultRuleBasedLineIteration() { 210 logln("Testing the RBBI for line iteration using default rules"); 211 RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) RuleBasedBreakIterator.getLineInstance(); 212 // fetch the rules used to create the above RuleBasedBreakIterator 213 String defaultRules = rbbi.toString(); 214 RuleBasedBreakIterator lineIterDefault = null; 215 try { 216 lineIterDefault = new RuleBasedBreakIterator(defaultRules); 217 } catch (IllegalArgumentException iae) { 218 errln("ERROR: failed construction in TestDefaultRuleBasedLineIteration()" + iae.toString()); 219 } 220 221 List<String> linedata = new ArrayList<String>(); 222 linedata.add("Multi-"); 223 linedata.add("Level "); 224 linedata.add("example "); 225 linedata.add("of "); 226 linedata.add("a "); 227 linedata.add("semi-"); 228 linedata.add("idiotic "); 229 linedata.add("non-"); 230 linedata.add("sensical "); 231 linedata.add("(non-"); 232 linedata.add("important) "); 233 linedata.add("sentence. "); 234 235 linedata.add("Hi "); 236 linedata.add("Hello "); 237 linedata.add("How\n"); 238 linedata.add("are\r"); 239 linedata.add("you" + kLineSeparator); 240 linedata.add("fine.\t"); 241 linedata.add("good. "); 242 243 linedata.add("Now\r"); 244 linedata.add("is\n"); 245 linedata.add("the\r\n"); 246 linedata.add("time\n"); 247 linedata.add("\r"); 248 linedata.add("for\r"); 249 linedata.add("\r"); 250 linedata.add("all"); 251 252 generalIteratorTest(lineIterDefault, linedata); 253 254 } 255 256 // ========================================================================= 257 // general test subroutines 258 // ========================================================================= 259 generalIteratorTest(RuleBasedBreakIterator rbbi, List<String> expectedResult)260 private void generalIteratorTest(RuleBasedBreakIterator rbbi, List<String> expectedResult) { 261 StringBuffer buffer = new StringBuffer(); 262 String text; 263 for (int i = 0; i < expectedResult.size(); i++) { 264 text = expectedResult.get(i); 265 buffer.append(text); 266 } 267 text = buffer.toString(); 268 if (rbbi == null) { 269 errln("null iterator, test skipped."); 270 return; 271 } 272 273 rbbi.setText(text); 274 275 List<String> nextResults = _testFirstAndNext(rbbi, text); 276 List<String> previousResults = _testLastAndPrevious(rbbi, text); 277 278 logln("comparing forward and backward..."); 279 int errs = getErrorCount(); 280 compareFragmentLists("forward iteration", "backward iteration", nextResults, previousResults); 281 if (getErrorCount() == errs) { 282 logln("comparing expected and actual..."); 283 compareFragmentLists("expected result", "actual result", expectedResult, nextResults); 284 } 285 286 int[] boundaries = new int[expectedResult.size() + 3]; 287 boundaries[0] = RuleBasedBreakIterator.DONE; 288 boundaries[1] = 0; 289 for (int i = 0; i < expectedResult.size(); i++) { 290 boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i).length()); 291 } 292 293 boundaries[boundaries.length - 1] = RuleBasedBreakIterator.DONE; 294 295 _testFollowing(rbbi, text, boundaries); 296 _testPreceding(rbbi, text, boundaries); 297 _testIsBoundary(rbbi, text, boundaries); 298 299 doMultipleSelectionTest(rbbi, text); 300 } 301 _testFirstAndNext(RuleBasedBreakIterator rbbi, String text)302 private List<String> _testFirstAndNext(RuleBasedBreakIterator rbbi, String text) { 303 int p = rbbi.first(); 304 int lastP = p; 305 List<String> result = new ArrayList<String>(); 306 307 if (p != 0) { 308 errln("first() returned " + p + " instead of 0"); 309 } 310 311 while (p != RuleBasedBreakIterator.DONE) { 312 p = rbbi.next(); 313 if (p != RuleBasedBreakIterator.DONE) { 314 if (p <= lastP) { 315 errln("next() failed to move forward: next() on position " 316 + lastP + " yielded " + p); 317 } 318 result.add(text.substring(lastP, p)); 319 } 320 else { 321 if (lastP != text.length()) { 322 errln("next() returned DONE prematurely: offset was " 323 + lastP + " instead of " + text.length()); 324 } 325 } 326 lastP = p; 327 } 328 return result; 329 } 330 _testLastAndPrevious(RuleBasedBreakIterator rbbi, String text)331 private List<String> _testLastAndPrevious(RuleBasedBreakIterator rbbi, String text) { 332 int p = rbbi.last(); 333 int lastP = p; 334 List<String> result = new ArrayList<String>(); 335 336 if (p != text.length()) { 337 errln("last() returned " + p + " instead of " + text.length()); 338 } 339 340 while (p != RuleBasedBreakIterator.DONE) { 341 p = rbbi.previous(); 342 if (p != RuleBasedBreakIterator.DONE) { 343 if (p >= lastP) { 344 errln("previous() failed to move backward: previous() on position " 345 + lastP + " yielded " + p); 346 } 347 348 result.add(0, text.substring(p, lastP)); 349 } 350 else { 351 if (lastP != 0) { 352 errln("previous() returned DONE prematurely: offset was " 353 + lastP + " instead of 0"); 354 } 355 } 356 lastP = p; 357 } 358 return result; 359 } 360 compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2)361 private void compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2) { 362 int p1 = 0; 363 int p2 = 0; 364 String s1; 365 String s2; 366 int t1 = 0; 367 int t2 = 0; 368 369 while (p1 < f1.size() && p2 < f2.size()) { 370 s1 = f1.get(p1); 371 s2 = f2.get(p2); 372 t1 += s1.length(); 373 t2 += s2.length(); 374 375 if (s1.equals(s2)) { 376 debugLogln(" >" + s1 + "<"); 377 ++p1; 378 ++p2; 379 } 380 else { 381 int tempT1 = t1; 382 int tempT2 = t2; 383 int tempP1 = p1; 384 int tempP2 = p2; 385 386 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) { 387 while (tempT1 < tempT2 && tempP1 < f1.size()) { 388 tempT1 += (f1.get(tempP1)).length(); 389 ++tempP1; 390 } 391 while (tempT2 < tempT1 && tempP2 < f2.size()) { 392 tempT2 += (f2.get(tempP2)).length(); 393 ++tempP2; 394 } 395 } 396 logln("*** " + f1Name + " has:"); 397 while (p1 <= tempP1 && p1 < f1.size()) { 398 s1 = f1.get(p1); 399 t1 += s1.length(); 400 debugLogln(" *** >" + s1 + "<"); 401 ++p1; 402 } 403 logln("***** " + f2Name + " has:"); 404 while (p2 <= tempP2 && p2 < f2.size()) { 405 s2 = f2.get(p2); 406 t2 += s2.length(); 407 debugLogln(" ***** >" + s2 + "<"); 408 ++p2; 409 } 410 errln("Discrepancy between " + f1Name + " and " + f2Name); 411 } 412 } 413 } 414 _testFollowing(RuleBasedBreakIterator rbbi, String text, int[] boundaries)415 private void _testFollowing(RuleBasedBreakIterator rbbi, String text, int[] boundaries) { 416 logln("testFollowing():"); 417 int p = 2; 418 for(int i = 0; i <= text.length(); i++) { 419 if (i == boundaries[p]) 420 ++p; 421 int b = rbbi.following(i); 422 logln("rbbi.following(" + i + ") -> " + b); 423 if (b != boundaries[p]) 424 errln("Wrong result from following() for " + i + ": expected " + boundaries[p] 425 + ", got " + b); 426 } 427 } 428 _testPreceding(RuleBasedBreakIterator rbbi, String text, int[] boundaries)429 private void _testPreceding(RuleBasedBreakIterator rbbi, String text, int[] boundaries) { 430 logln("testPreceding():"); 431 int p = 0; 432 for(int i = 0; i <= text.length(); i++) { 433 int b = rbbi.preceding(i); 434 logln("rbbi.preceding(" + i + ") -> " + b); 435 if (b != boundaries[p]) 436 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p] 437 + ", got " + b); 438 if (i == boundaries[p + 1]) 439 ++p; 440 } 441 } 442 _testIsBoundary(RuleBasedBreakIterator rbbi, String text, int[] boundaries)443 private void _testIsBoundary(RuleBasedBreakIterator rbbi, String text, int[] boundaries) { 444 logln("testIsBoundary():"); 445 int p = 1; 446 boolean isB; 447 for(int i = 0; i <= text.length(); i++) { 448 isB = rbbi.isBoundary(i); 449 logln("rbbi.isBoundary(" + i + ") -> " + isB); 450 if(i == boundaries[p]) { 451 if (!isB) 452 errln("Wrong result from isBoundary() for " + i + ": expected true, got false"); 453 ++p; 454 } 455 else { 456 if(isB) 457 errln("Wrong result from isBoundary() for " + i + ": expected false, got true"); 458 } 459 } 460 } doMultipleSelectionTest(RuleBasedBreakIterator iterator, String testText)461 private void doMultipleSelectionTest(RuleBasedBreakIterator iterator, String testText) 462 { 463 logln("Multiple selection test..."); 464 RuleBasedBreakIterator testIterator = (RuleBasedBreakIterator)iterator.clone(); 465 int offset = iterator.first(); 466 int testOffset; 467 int count = 0; 468 469 do { 470 testOffset = testIterator.first(); 471 testOffset = testIterator.next(count); 472 logln("next(" + count + ") -> " + testOffset); 473 if (offset != testOffset) 474 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset); 475 476 if (offset != RuleBasedBreakIterator.DONE) { 477 count++; 478 offset = iterator.next(); 479 } 480 } while (offset != RuleBasedBreakIterator.DONE); 481 482 // now do it backwards... 483 offset = iterator.last(); 484 count = 0; 485 486 do { 487 testOffset = testIterator.last(); 488 testOffset = testIterator.next(count); 489 logln("next(" + count + ") -> " + testOffset); 490 if (offset != testOffset) 491 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset); 492 493 if (offset != RuleBasedBreakIterator.DONE) { 494 count--; 495 offset = iterator.previous(); 496 } 497 } while (offset != RuleBasedBreakIterator.DONE); 498 } 499 debugLogln(String s)500 private void debugLogln(String s) { 501 final String zeros = "0000"; 502 String temp; 503 StringBuffer out = new StringBuffer(); 504 for (int i = 0; i < s.length(); i++) { 505 char c = s.charAt(i); 506 if (c >= ' ' && c < '\u007f') 507 out.append(c); 508 else { 509 out.append("\\u"); 510 temp = Integer.toHexString((int)c); 511 out.append(zeros.substring(0, 4 - temp.length())); 512 out.append(temp); 513 } 514 } 515 logln(out.toString()); 516 } 517 TestThaiDictionaryBreakIterator()518 public void TestThaiDictionaryBreakIterator() { 519 int position; 520 int index; 521 int result[] = { 1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0 }; 522 char ctext[] = { 523 0x0041, 0x0020, 524 0x0E01, 0x0E32, 0x0E23, 0x0E17, 0x0E14, 0x0E25, 0x0E2D, 0x0E07, 525 0x0020, 0x0041 526 }; 527 String text = new String(ctext); 528 529 ULocale locale = ULocale.createCanonical("th"); 530 BreakIterator b = BreakIterator.getWordInstance(locale); 531 532 b.setText(text); 533 534 index = 0; 535 // Test forward iteration 536 while ((position = b.next())!= BreakIterator.DONE) { 537 if (position != result[index++]) { 538 errln("Error with ThaiDictionaryBreakIterator forward iteration test at " + position + ".\nShould have been " + result[index-1]); 539 } 540 } 541 542 // Test backward iteration 543 while ((position = b.previous())!= BreakIterator.DONE) { 544 if (position != result[index++]) { 545 errln("Error with ThaiDictionaryBreakIterator backward iteration test at " + position + ".\nShould have been " + result[index-1]); 546 } 547 } 548 549 //Test invalid sequence and spaces 550 char text2[] = { 551 0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B, 552 0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19, 553 0x0E16, 0x0E49, 0x0E33 554 }; 555 int expectedWordResult[] = { 556 2, 3, 6, 10, 11, 15, 17, 20, 22 557 }; 558 int expectedLineResult[] = { 559 3, 6, 11, 15, 17, 20, 22 560 }; 561 BreakIterator brk = BreakIterator.getWordInstance(new ULocale("th")); 562 brk.setText(new String(text2)); 563 position = index = 0; 564 while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) { 565 if (position != expectedWordResult[index++]) { 566 errln("Incorrect break given by thai word break iterator. Expected: " + expectedWordResult[index-1] + " Got: " + position); 567 } 568 } 569 570 brk = BreakIterator.getLineInstance(new ULocale("th")); 571 brk.setText(new String(text2)); 572 position = index = 0; 573 while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) { 574 if (position != expectedLineResult[index++]) { 575 errln("Incorrect break given by thai line break iterator. Expected: " + expectedLineResult[index-1] + " Got: " + position); 576 } 577 } 578 // Improve code coverage 579 if (brk.preceding(expectedLineResult[1]) != expectedLineResult[0]) { 580 errln("Incorrect preceding position."); 581 } 582 if (brk.following(expectedLineResult[1]) != expectedLineResult[2]) { 583 errln("Incorrect following position."); 584 } 585 int []fillInArray = new int[2]; 586 if (((RuleBasedBreakIterator)brk).getRuleStatusVec(fillInArray) != 1 || fillInArray[0] != 0) { 587 errln("Error: Since getRuleStatusVec is not supported in DictionaryBasedBreakIterator, it should return 1 and fillInArray[0] == 0."); 588 } 589 } 590 591 592 // TODO: Move these test cases to rbbitst.txt if they aren't there already, then remove this test. It is redundant. TestTailoredBreaks()593 public void TestTailoredBreaks() { 594 class TBItem { 595 private int type; 596 private ULocale locale; 597 private String text; 598 private int[] expectOffsets; 599 TBItem(int typ, ULocale loc, String txt, int[] eOffs) { 600 type = typ; 601 locale = loc; 602 text = txt; 603 expectOffsets = eOffs; 604 } 605 private static final int maxOffsetCount = 128; 606 private boolean offsetsMatchExpected(int[] foundOffsets, int foundOffsetsLength) { 607 if ( foundOffsetsLength != expectOffsets.length ) { 608 return false; 609 } 610 for (int i = 0; i < foundOffsetsLength; i++) { 611 if ( foundOffsets[i] != expectOffsets[i] ) { 612 return false; 613 } 614 } 615 return true; 616 } 617 private String formatOffsets(int[] offsets, int length) { 618 StringBuffer buildString = new StringBuffer(4*maxOffsetCount); 619 for (int i = 0; i < length; i++) { 620 buildString.append(" " + offsets[i]); 621 } 622 return buildString.toString(); 623 } 624 public void doTest() { 625 BreakIterator brkIter; 626 switch( type ) { 627 case BreakIterator.KIND_CHARACTER: brkIter = BreakIterator.getCharacterInstance(locale); break; 628 case BreakIterator.KIND_WORD: brkIter = BreakIterator.getWordInstance(locale); break; 629 case BreakIterator.KIND_LINE: brkIter = BreakIterator.getLineInstance(locale); break; 630 case BreakIterator.KIND_SENTENCE: brkIter = BreakIterator.getSentenceInstance(locale); break; 631 default: errln("Unsupported break iterator type " + type); return; 632 } 633 brkIter.setText(text); 634 int[] foundOffsets = new int[maxOffsetCount]; 635 int offset, foundOffsetsCount = 0; 636 // do forwards iteration test 637 while ( foundOffsetsCount < maxOffsetCount && (offset = brkIter.next()) != BreakIterator.DONE ) { 638 foundOffsets[foundOffsetsCount++] = offset; 639 } 640 if ( !offsetsMatchExpected(foundOffsets, foundOffsetsCount) ) { 641 // log error for forwards test 642 String textToDisplay = (text.length() <= 16)? text: text.substring(0,16); 643 errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" + 644 "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) + 645 "; found " + foundOffsetsCount + " offsets fwd:" + formatOffsets(foundOffsets, foundOffsetsCount) ); 646 } else { 647 // do backwards iteration test 648 --foundOffsetsCount; // back off one from the end offset 649 while ( foundOffsetsCount > 0 ) { 650 offset = brkIter.previous(); 651 if ( offset != foundOffsets[--foundOffsetsCount] ) { 652 // log error for backwards test 653 String textToDisplay = (text.length() <= 16)? text: text.substring(0,16); 654 errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" + 655 "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) + 656 "; found rev offset " + offset + " where expect " + foundOffsets[foundOffsetsCount] ); 657 break; 658 } 659 } 660 } 661 } 662 } 663 // KIND_SENTENCE "el" 664 final String elSentText = "\u0391\u03B2, \u03B3\u03B4; \u0395 \u03B6\u03B7\u037E \u0398 \u03B9\u03BA. " + 665 "\u039B\u03BC \u03BD\u03BE! \u039F\u03C0, \u03A1\u03C2? \u03A3"; 666 final int[] elSentTOffsets = { 8, 14, 20, 27, 35, 36 }; 667 final int[] elSentROffsets = { 20, 27, 35, 36 }; 668 // KIND_CHARACTER "th" 669 final String thCharText = "\u0E01\u0E23\u0E30\u0E17\u0E48\u0E2D\u0E21\u0E23\u0E08\u0E19\u0E32 " + 670 "(\u0E2A\u0E38\u0E0A\u0E32\u0E15\u0E34-\u0E08\u0E38\u0E11\u0E32\u0E21\u0E32\u0E28) " + 671 "\u0E40\u0E14\u0E47\u0E01\u0E21\u0E35\u0E1B\u0E31\u0E0D\u0E2B\u0E32 "; 672 final int[] thCharTOffsets = { 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 673 12, 13, 15, 16, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28, 674 29, 30, 32, 33, 35, 37, 38, 39, 40, 41 }; 675 //starting in Unicode 6.1, root behavior should be the same as Thai above 676 //final int[] thCharROffsets = { 1, 3, 5, 6, 7, 8, 9, 11, 677 // 12, 13, 15, 17, 19, 20, 22, 24, 26, 27, 28, 678 // 29, 32, 33, 35, 37, 38, 40, 41 }; 679 680 final TBItem[] tests = { 681 new TBItem( BreakIterator.KIND_SENTENCE, new ULocale("el"), elSentText, elSentTOffsets ), 682 new TBItem( BreakIterator.KIND_SENTENCE, ULocale.ROOT, elSentText, elSentROffsets ), 683 new TBItem( BreakIterator.KIND_CHARACTER, new ULocale("th"), thCharText, thCharTOffsets ), 684 new TBItem( BreakIterator.KIND_CHARACTER, ULocale.ROOT, thCharText, thCharTOffsets ), 685 }; 686 for (int iTest = 0; iTest < tests.length; iTest++) { 687 tests[iTest].doTest(); 688 } 689 } 690 691 /* Tests the method public Object clone() */ TestClone()692 public void TestClone() { 693 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(""); 694 try { 695 rbbi.setText((CharacterIterator) null); 696 if (((RuleBasedBreakIterator) rbbi.clone()).getText() != null) 697 errln("RuleBasedBreakIterator.clone() was suppose to return " 698 + "the same object because fText is set to null."); 699 } catch (Exception e) { 700 errln("RuleBasedBreakIterator.clone() was not suppose to return " + "an exception."); 701 } 702 } 703 704 /* 705 * Tests the method public boolean equals(Object that) 706 */ TestEquals()707 public void TestEquals() { 708 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(""); 709 RuleBasedBreakIterator rbbi1 = new RuleBasedBreakIterator(""); 710 711 // TODO: Tests when "if (fRData != other.fRData && (fRData == null || other.fRData == null))" is true 712 713 // Tests when "if (fText == null || other.fText == null)" is true 714 rbbi.setText((CharacterIterator) null); 715 if (rbbi.equals(rbbi1)) { 716 errln("RuleBasedBreakIterator.equals(Object) was not suppose to return " 717 + "true when the other object has a null fText."); 718 } 719 720 // Tests when "if (fText == null && other.fText == null)" is true 721 rbbi1.setText((CharacterIterator) null); 722 if (!rbbi.equals(rbbi1)) { 723 errln("RuleBasedBreakIterator.equals(Object) was not suppose to return " 724 + "false when both objects has a null fText."); 725 } 726 727 // Tests when an exception occurs 728 if (rbbi.equals(0)) { 729 errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to integer 0."); 730 } 731 if (rbbi.equals(0.0)) { 732 errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to float 0.0."); 733 } 734 if (rbbi.equals("0")) { 735 errln("RuleBasedBreakIterator.equals(Object) was suppose to return " 736 + "false when comparing to string '0'."); 737 } 738 } 739 740 /* 741 * Tests the method public void dump() 742 */ TestDump()743 public void TestDump() { 744 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(""); 745 try { 746 rbbi.dump(); 747 errln("RuleBasedBreakIterator.dump() was suppose to return " 748 + "an exception for a blank RuleBasedBreakIterator object."); 749 } catch (Exception e) { 750 } 751 } 752 753 /* 754 * Tests the method public int first() 755 */ TestFirst()756 public void TestFirst() { 757 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(""); 758 // Tests when "if (fText == null)" is true 759 rbbi.setText((CharacterIterator) null); 760 if (rbbi.first() != BreakIterator.DONE) { 761 errln("RuleBasedBreakIterator.first() was suppose to return " 762 + "BreakIterator.DONE when the object has a null fText."); 763 } 764 } 765 766 /* 767 * Tests the method public int last() 768 */ TestLast()769 public void TestLast() { 770 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(""); 771 // Tests when "if (fText == null)" is true 772 rbbi.setText((CharacterIterator) null); 773 if (rbbi.last() != BreakIterator.DONE) { 774 errln("RuleBasedBreakIterator.last() was suppose to return " 775 + "BreakIterator.DONE when the object has a null fText."); 776 } 777 } 778 779 /* 780 * Tests the method public int following(int offset) 781 */ TestFollowing()782 public void TestFollowing() { 783 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(""); 784 // Tests when "else if (offset < fText.getBeginIndex())" is true 785 rbbi.setText("dummy"); 786 if (rbbi.following(-1) != 0) { 787 errln("RuleBasedBreakIterator.following(-1) was suppose to return " 788 + "0 when the object has a fText of dummy."); 789 } 790 } 791 792 /* 793 * Tests the method public int preceding(int offset) 794 */ TestPreceding()795 public void TestPreceding() { 796 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(""); 797 // Tests when "if (fText == null || offset > fText.getEndIndex())" is true 798 rbbi.setText((CharacterIterator)null); 799 if (rbbi.preceding(-1) != BreakIterator.DONE) { 800 errln("RuleBasedBreakIterator.preceding(-1) was suppose to return " 801 + "0 when the object has a fText of null."); 802 } 803 804 // Tests when "else if (offset < fText.getBeginIndex())" is true 805 rbbi.setText("dummy"); 806 if (rbbi.preceding(-1) != 0) { 807 errln("RuleBasedBreakIterator.preceding(-1) was suppose to return " 808 + "0 when the object has a fText of dummy."); 809 } 810 } 811 812 /* Tests the method public int current() */ TestCurrent()813 public void TestCurrent(){ 814 RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(""); 815 // Tests when "(fText != null) ? fText.getIndex() : BreakIterator.DONE" is true and false 816 rbbi.setText((CharacterIterator)null); 817 if(rbbi.current() != BreakIterator.DONE){ 818 errln("RuleBasedBreakIterator.current() was suppose to return " 819 + "BreakIterator.DONE when the object has a fText of null."); 820 } 821 rbbi.setText("dummy"); 822 if(rbbi.current() != 0){ 823 errln("RuleBasedBreakIterator.current() was suppose to return " 824 + "0 when the object has a fText of dummy."); 825 } 826 } 827 } 828