1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2013, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  */
7 package com.ibm.icu.dev.test.rbbi;
8 
9 //Regression testing of RuleBasedBreakIterator
10 //
11 //  TODO:  These tests should be mostly retired.
12 //          Much of the test data that was originally here was removed when the RBBI rules
13 //            were updated to match the Unicode boundary TRs, and the data was found to be invalid.
14 //          Much of the remaining data has been moved into the rbbitst.txt test data file,
15 //            which is common between ICU4C and ICU4J.  The remaining test data should also be moved,
16 //            or simply retired if it is no longer interesting.
17 import java.text.CharacterIterator;
18 import java.util.ArrayList;
19 import java.util.List;
20 
21 import com.ibm.icu.dev.test.TestFmwk;
22 import com.ibm.icu.text.BreakIterator;
23 import com.ibm.icu.text.RuleBasedBreakIterator;
24 import com.ibm.icu.util.ULocale;
25 
26 public class RBBITest extends TestFmwk {
27 
main(String[] args)28     public static void main(String[] args) throws Exception {
29         new RBBITest().run(args);
30     }
31 
RBBITest()32     public RBBITest() {
33     }
34 
35     private static final String halfNA = "\u0928\u094d\u200d"; /*
36                                                                 * halfform NA = devanigiri NA + virama(supresses
37                                                                 * inherent vowel)+ zero width joiner
38                                                                 */
39 
40     // tests default rules based character iteration.
41     // Builds a new iterator from the source rules in the default (prebuilt) iterator.
42     //
TestDefaultRuleBasedCharacterIteration()43     public void TestDefaultRuleBasedCharacterIteration() {
44         RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance();
45         logln("Testing the RBBI for character iteration by using default rules");
46 
47         // fetch the rules used to create the above RuleBasedBreakIterator
48         String defaultRules = rbbi.toString();
49 
50         RuleBasedBreakIterator charIterDefault = null;
51         try {
52             charIterDefault = new RuleBasedBreakIterator(defaultRules);
53         } catch (IllegalArgumentException iae) {
54             errln("ERROR: failed construction in TestDefaultRuleBasedCharacterIteration()" + iae.toString());
55         }
56 
57         List<String> chardata = new ArrayList<String>();
58         chardata.add("H");
59         chardata.add("e");
60         chardata.add("l");
61         chardata.add("l");
62         chardata.add("o");
63         chardata.add("e\u0301"); // acuteE
64         chardata.add("&");
65         chardata.add("e\u0303"); // tildaE
66         // devanagiri characters for Hindi support
67         chardata.add("\u0906"); // devanagiri AA
68         // chardata.add("\u093e\u0901"); //devanagiri vowelsign AA+ chandrabindhu
69         chardata.add("\u0916\u0947"); // devanagiri KHA+vowelsign E
70         chardata.add("\u0938\u0941\u0902"); // devanagiri SA+vowelsign U + anusvara(bindu)
71         chardata.add("\u0926"); // devanagiri consonant DA
72         chardata.add("\u0930"); // devanagiri consonant RA
73         // chardata.add("\u0939\u094c"); //devanagiri HA+vowel sign AI
74         chardata.add("\u0964"); // devanagiri danda
75         // end hindi characters
76         chardata.add("A\u0302"); // circumflexA
77         chardata.add("i\u0301"); // acuteBelowI
78         // conjoining jamo...
79         chardata.add("\u1109\u1161\u11bc");
80         chardata.add("\u1112\u1161\u11bc");
81         chardata.add("\n");
82         chardata.add("\r\n"); // keep CRLF sequences together
83         chardata.add("S\u0300"); // graveS
84         chardata.add("i\u0301"); // acuteBelowI
85         chardata.add("!");
86 
87         // What follows is a string of Korean characters (I found it in the Yellow Pages
88         // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
89         // it correctly), first as precomposed syllables, and then as conjoining jamo.
90         // Both sequences should be semantically identical and break the same way.
91         // precomposed syllables...
92         chardata.add("\uc0c1");
93         chardata.add("\ud56d");
94         chardata.add(" ");
95         chardata.add("\ud55c");
96         chardata.add("\uc778");
97         chardata.add(" ");
98         chardata.add("\uc5f0");
99         chardata.add("\ud569");
100         chardata.add(" ");
101         chardata.add("\uc7a5");
102         chardata.add("\ub85c");
103         chardata.add("\uad50");
104         chardata.add("\ud68c");
105         chardata.add(" ");
106         // conjoining jamo...
107         chardata.add("\u1109\u1161\u11bc");
108         chardata.add("\u1112\u1161\u11bc");
109         chardata.add(" ");
110         chardata.add("\u1112\u1161\u11ab");
111         chardata.add("\u110b\u1175\u11ab");
112         chardata.add(" ");
113         chardata.add("\u110b\u1167\u11ab");
114         chardata.add("\u1112\u1161\u11b8");
115         chardata.add(" ");
116         chardata.add("\u110c\u1161\u11bc");
117         chardata.add("\u1105\u1169");
118         chardata.add("\u1100\u116d");
119         chardata.add("\u1112\u116c");
120 
121         generalIteratorTest(charIterDefault, chardata);
122 
123     }
124 
TestDefaultRuleBasedWordIteration()125     public void TestDefaultRuleBasedWordIteration() {
126         logln("Testing the RBBI for word iteration using default rules");
127         RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getWordInstance();
128         // fetch the rules used to create the above RuleBasedBreakIterator
129         String defaultRules = rbbi.toString();
130 
131         RuleBasedBreakIterator wordIterDefault = null;
132         try {
133             wordIterDefault = new RuleBasedBreakIterator(defaultRules);
134         } catch (IllegalArgumentException iae) {
135             errln("ERROR: failed construction in TestDefaultRuleBasedWordIteration() -- custom rules" + iae.toString());
136         }
137 
138         List<String> worddata = new ArrayList<String>();
139         worddata.add("Write");
140         worddata.add(" ");
141         worddata.add("wordrules");
142         worddata.add(".");
143         worddata.add(" ");
144         // worddata.add("alpha-beta-gamma");
145         worddata.add(" ");
146         worddata.add("\u092f\u0939");
147         worddata.add(" ");
148         worddata.add("\u0939\u093f" + halfNA + "\u0926\u0940");
149         worddata.add(" ");
150         worddata.add("\u0939\u0948");
151         // worddata.add("\u0964"); //danda followed by a space
152         worddata.add(" ");
153         worddata.add("\u0905\u093e\u092a");
154         worddata.add(" ");
155         worddata.add("\u0938\u093f\u0916\u094b\u0917\u0947");
156         worddata.add("?");
157         worddata.add(" ");
158         worddata.add("\r");
159         worddata.add("It's");
160         worddata.add(" ");
161         // worddata.add("$30.10");
162         worddata.add(" ");
163         worddata.add(" ");
164         worddata.add("Badges");
165         worddata.add("?");
166         worddata.add(" ");
167         worddata.add("BADGES");
168         worddata.add("!");
169         worddata.add("1000,233,456.000");
170         worddata.add(" ");
171 
172         generalIteratorTest(wordIterDefault, worddata);
173     }
174 
175 //    private static final String kParagraphSeparator = "\u2029";
176     private static final String kLineSeparator      = "\u2028";
177 
TestDefaultRuleBasedSentenceIteration()178     public void TestDefaultRuleBasedSentenceIteration() {
179         logln("Testing the RBBI for sentence iteration using default rules");
180         RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance();
181 
182         // fetch the rules used to create the above RuleBasedBreakIterator
183         String defaultRules = rbbi.toString();
184         RuleBasedBreakIterator sentIterDefault = null;
185         try {
186             sentIterDefault = new RuleBasedBreakIterator(defaultRules);
187         } catch (IllegalArgumentException iae) {
188             errln("ERROR: failed construction in TestDefaultRuleBasedSentenceIteration()" + iae.toString());
189         }
190 
191         List<String> sentdata = new ArrayList<String>();
192         sentdata.add("(This is it.) ");
193         sentdata.add("Testing the sentence iterator. ");
194         sentdata.add("\"This isn\'t it.\" ");
195         sentdata.add("Hi! ");
196         sentdata.add("This is a simple sample sentence. ");
197         sentdata.add("(This is it.) ");
198         sentdata.add("This is a simple sample sentence. ");
199         sentdata.add("\"This isn\'t it.\" ");
200         sentdata.add("Hi! ");
201         sentdata.add("This is a simple sample sentence. ");
202         sentdata.add("It does not have to make any sense as you can see. ");
203         sentdata.add("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ");
204         sentdata.add("Che la dritta via aveo smarrita. ");
205 
206         generalIteratorTest(sentIterDefault, sentdata);
207     }
208 
TestDefaultRuleBasedLineIteration()209     public void TestDefaultRuleBasedLineIteration() {
210         logln("Testing the RBBI for line iteration using default rules");
211         RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) RuleBasedBreakIterator.getLineInstance();
212         // fetch the rules used to create the above RuleBasedBreakIterator
213         String defaultRules = rbbi.toString();
214         RuleBasedBreakIterator lineIterDefault = null;
215         try {
216             lineIterDefault = new RuleBasedBreakIterator(defaultRules);
217         } catch (IllegalArgumentException iae) {
218             errln("ERROR: failed construction in TestDefaultRuleBasedLineIteration()" + iae.toString());
219         }
220 
221         List<String> linedata = new ArrayList<String>();
222         linedata.add("Multi-");
223         linedata.add("Level ");
224         linedata.add("example ");
225         linedata.add("of ");
226         linedata.add("a ");
227         linedata.add("semi-");
228         linedata.add("idiotic ");
229         linedata.add("non-");
230         linedata.add("sensical ");
231         linedata.add("(non-");
232         linedata.add("important) ");
233         linedata.add("sentence. ");
234 
235         linedata.add("Hi  ");
236         linedata.add("Hello ");
237         linedata.add("How\n");
238         linedata.add("are\r");
239         linedata.add("you" + kLineSeparator);
240         linedata.add("fine.\t");
241         linedata.add("good.  ");
242 
243         linedata.add("Now\r");
244         linedata.add("is\n");
245         linedata.add("the\r\n");
246         linedata.add("time\n");
247         linedata.add("\r");
248         linedata.add("for\r");
249         linedata.add("\r");
250         linedata.add("all");
251 
252         generalIteratorTest(lineIterDefault, linedata);
253 
254     }
255 
256     // =========================================================================
257     // general test subroutines
258     // =========================================================================
259 
generalIteratorTest(RuleBasedBreakIterator rbbi, List<String> expectedResult)260     private void generalIteratorTest(RuleBasedBreakIterator rbbi, List<String> expectedResult) {
261         StringBuffer buffer = new StringBuffer();
262         String text;
263         for (int i = 0; i < expectedResult.size(); i++) {
264             text = expectedResult.get(i);
265             buffer.append(text);
266         }
267         text = buffer.toString();
268         if (rbbi == null) {
269             errln("null iterator, test skipped.");
270             return;
271         }
272 
273         rbbi.setText(text);
274 
275         List<String> nextResults = _testFirstAndNext(rbbi, text);
276         List<String> previousResults = _testLastAndPrevious(rbbi, text);
277 
278         logln("comparing forward and backward...");
279         int errs = getErrorCount();
280         compareFragmentLists("forward iteration", "backward iteration", nextResults, previousResults);
281         if (getErrorCount() == errs) {
282             logln("comparing expected and actual...");
283             compareFragmentLists("expected result", "actual result", expectedResult, nextResults);
284         }
285 
286         int[] boundaries = new int[expectedResult.size() + 3];
287         boundaries[0] = RuleBasedBreakIterator.DONE;
288         boundaries[1] = 0;
289         for (int i = 0; i < expectedResult.size(); i++) {
290             boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i).length());
291         }
292 
293         boundaries[boundaries.length - 1] = RuleBasedBreakIterator.DONE;
294 
295         _testFollowing(rbbi, text, boundaries);
296         _testPreceding(rbbi, text, boundaries);
297         _testIsBoundary(rbbi, text, boundaries);
298 
299         doMultipleSelectionTest(rbbi, text);
300     }
301 
_testFirstAndNext(RuleBasedBreakIterator rbbi, String text)302      private List<String> _testFirstAndNext(RuleBasedBreakIterator rbbi, String text) {
303          int p = rbbi.first();
304          int lastP = p;
305          List<String> result = new ArrayList<String>();
306 
307          if (p != 0) {
308              errln("first() returned " + p + " instead of 0");
309          }
310 
311          while (p != RuleBasedBreakIterator.DONE) {
312              p = rbbi.next();
313              if (p != RuleBasedBreakIterator.DONE) {
314                  if (p <= lastP) {
315                      errln("next() failed to move forward: next() on position "
316                                      + lastP + " yielded " + p);
317                  }
318                  result.add(text.substring(lastP, p));
319              }
320              else {
321                  if (lastP != text.length()) {
322                      errln("next() returned DONE prematurely: offset was "
323                                      + lastP + " instead of " + text.length());
324                  }
325              }
326              lastP = p;
327          }
328          return result;
329      }
330 
_testLastAndPrevious(RuleBasedBreakIterator rbbi, String text)331      private List<String> _testLastAndPrevious(RuleBasedBreakIterator rbbi, String text) {
332          int p = rbbi.last();
333          int lastP = p;
334          List<String> result = new ArrayList<String>();
335 
336          if (p != text.length()) {
337              errln("last() returned " + p + " instead of " + text.length());
338          }
339 
340          while (p != RuleBasedBreakIterator.DONE) {
341              p = rbbi.previous();
342              if (p != RuleBasedBreakIterator.DONE) {
343                  if (p >= lastP) {
344                      errln("previous() failed to move backward: previous() on position "
345                                      + lastP + " yielded " + p);
346                  }
347 
348                  result.add(0, text.substring(p, lastP));
349              }
350              else {
351                  if (lastP != 0) {
352                      errln("previous() returned DONE prematurely: offset was "
353                                      + lastP + " instead of 0");
354                  }
355              }
356              lastP = p;
357          }
358          return result;
359      }
360 
compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2)361      private void compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2) {
362          int p1 = 0;
363          int p2 = 0;
364          String s1;
365          String s2;
366          int t1 = 0;
367          int t2 = 0;
368 
369          while (p1 < f1.size() && p2 < f2.size()) {
370              s1 = f1.get(p1);
371              s2 = f2.get(p2);
372              t1 += s1.length();
373              t2 += s2.length();
374 
375              if (s1.equals(s2)) {
376                  debugLogln("   >" + s1 + "<");
377                  ++p1;
378                  ++p2;
379              }
380              else {
381                  int tempT1 = t1;
382                  int tempT2 = t2;
383                  int tempP1 = p1;
384                  int tempP2 = p2;
385 
386                  while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {
387                      while (tempT1 < tempT2 && tempP1 < f1.size()) {
388                          tempT1 += (f1.get(tempP1)).length();
389                          ++tempP1;
390                      }
391                      while (tempT2 < tempT1 && tempP2 < f2.size()) {
392                          tempT2 += (f2.get(tempP2)).length();
393                          ++tempP2;
394                      }
395                  }
396                  logln("*** " + f1Name + " has:");
397                  while (p1 <= tempP1 && p1 < f1.size()) {
398                      s1 = f1.get(p1);
399                      t1 += s1.length();
400                      debugLogln(" *** >" + s1 + "<");
401                      ++p1;
402                  }
403                  logln("***** " + f2Name + " has:");
404                  while (p2 <= tempP2 && p2 < f2.size()) {
405                      s2 = f2.get(p2);
406                      t2 += s2.length();
407                      debugLogln(" ***** >" + s2 + "<");
408                      ++p2;
409                  }
410                  errln("Discrepancy between " + f1Name + " and " + f2Name);
411              }
412          }
413      }
414 
_testFollowing(RuleBasedBreakIterator rbbi, String text, int[] boundaries)415     private void _testFollowing(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
416        logln("testFollowing():");
417        int p = 2;
418        for(int i = 0; i <= text.length(); i++) {
419            if (i == boundaries[p])
420                ++p;
421            int b = rbbi.following(i);
422            logln("rbbi.following(" + i + ") -> " + b);
423            if (b != boundaries[p])
424                errln("Wrong result from following() for " + i + ": expected " + boundaries[p]
425                                + ", got " + b);
426        }
427    }
428 
_testPreceding(RuleBasedBreakIterator rbbi, String text, int[] boundaries)429    private void _testPreceding(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
430        logln("testPreceding():");
431        int p = 0;
432        for(int i = 0; i <= text.length(); i++) {
433            int b = rbbi.preceding(i);
434            logln("rbbi.preceding(" + i + ") -> " + b);
435            if (b != boundaries[p])
436                errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
437                               + ", got " + b);
438            if (i == boundaries[p + 1])
439                ++p;
440        }
441    }
442 
_testIsBoundary(RuleBasedBreakIterator rbbi, String text, int[] boundaries)443    private void _testIsBoundary(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
444        logln("testIsBoundary():");
445        int p = 1;
446        boolean isB;
447        for(int i = 0; i <= text.length(); i++) {
448            isB = rbbi.isBoundary(i);
449            logln("rbbi.isBoundary(" + i + ") -> " + isB);
450            if(i == boundaries[p]) {
451                if (!isB)
452                    errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
453                ++p;
454            }
455            else {
456                if(isB)
457                    errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
458            }
459        }
460    }
doMultipleSelectionTest(RuleBasedBreakIterator iterator, String testText)461    private void doMultipleSelectionTest(RuleBasedBreakIterator iterator, String testText)
462    {
463        logln("Multiple selection test...");
464        RuleBasedBreakIterator testIterator = (RuleBasedBreakIterator)iterator.clone();
465        int offset = iterator.first();
466        int testOffset;
467        int count = 0;
468 
469        do {
470            testOffset = testIterator.first();
471            testOffset = testIterator.next(count);
472            logln("next(" + count + ") -> " + testOffset);
473            if (offset != testOffset)
474                errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
475 
476            if (offset != RuleBasedBreakIterator.DONE) {
477                count++;
478                offset = iterator.next();
479            }
480        } while (offset != RuleBasedBreakIterator.DONE);
481 
482        // now do it backwards...
483        offset = iterator.last();
484        count = 0;
485 
486        do {
487            testOffset = testIterator.last();
488            testOffset = testIterator.next(count);
489            logln("next(" + count + ") -> " + testOffset);
490            if (offset != testOffset)
491                errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
492 
493            if (offset != RuleBasedBreakIterator.DONE) {
494                count--;
495                offset = iterator.previous();
496            }
497        } while (offset != RuleBasedBreakIterator.DONE);
498    }
499 
debugLogln(String s)500    private void debugLogln(String s) {
501         final String zeros = "0000";
502         String temp;
503         StringBuffer out = new StringBuffer();
504         for (int i = 0; i < s.length(); i++) {
505             char c = s.charAt(i);
506             if (c >= ' ' && c < '\u007f')
507                 out.append(c);
508             else {
509                 out.append("\\u");
510                 temp = Integer.toHexString((int)c);
511                 out.append(zeros.substring(0, 4 - temp.length()));
512                 out.append(temp);
513             }
514         }
515          logln(out.toString());
516     }
517 
TestThaiDictionaryBreakIterator()518    public void TestThaiDictionaryBreakIterator() {
519        int position;
520        int index;
521        int result[] = { 1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0 };
522        char ctext[] = {
523                0x0041, 0x0020,
524                0x0E01, 0x0E32, 0x0E23, 0x0E17, 0x0E14, 0x0E25, 0x0E2D, 0x0E07,
525                0x0020, 0x0041
526                };
527        String text = new String(ctext);
528 
529        ULocale locale = ULocale.createCanonical("th");
530        BreakIterator b = BreakIterator.getWordInstance(locale);
531 
532        b.setText(text);
533 
534        index = 0;
535        // Test forward iteration
536        while ((position = b.next())!= BreakIterator.DONE) {
537            if (position != result[index++]) {
538                errln("Error with ThaiDictionaryBreakIterator forward iteration test at " + position + ".\nShould have been " + result[index-1]);
539            }
540        }
541 
542        // Test backward iteration
543        while ((position = b.previous())!= BreakIterator.DONE) {
544            if (position != result[index++]) {
545                errln("Error with ThaiDictionaryBreakIterator backward iteration test at " + position + ".\nShould have been " + result[index-1]);
546            }
547        }
548 
549        //Test invalid sequence and spaces
550        char text2[] = {
551                0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B,
552                0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19,
553                0x0E16, 0x0E49, 0x0E33
554        };
555        int expectedWordResult[] = {
556                2, 3, 6, 10, 11, 15, 17, 20, 22
557        };
558        int expectedLineResult[] = {
559                3, 6, 11, 15, 17, 20, 22
560        };
561        BreakIterator brk = BreakIterator.getWordInstance(new ULocale("th"));
562        brk.setText(new String(text2));
563        position = index = 0;
564        while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
565            if (position != expectedWordResult[index++]) {
566                errln("Incorrect break given by thai word break iterator. Expected: " + expectedWordResult[index-1] + " Got: " + position);
567            }
568        }
569 
570        brk = BreakIterator.getLineInstance(new ULocale("th"));
571        brk.setText(new String(text2));
572        position = index = 0;
573        while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
574            if (position != expectedLineResult[index++]) {
575                errln("Incorrect break given by thai line break iterator. Expected: " + expectedLineResult[index-1] + " Got: " + position);
576            }
577        }
578        // Improve code coverage
579        if (brk.preceding(expectedLineResult[1]) != expectedLineResult[0]) {
580            errln("Incorrect preceding position.");
581        }
582        if (brk.following(expectedLineResult[1]) != expectedLineResult[2]) {
583            errln("Incorrect following position.");
584        }
585        int []fillInArray = new int[2];
586        if (((RuleBasedBreakIterator)brk).getRuleStatusVec(fillInArray) != 1 || fillInArray[0] != 0) {
587            errln("Error: Since getRuleStatusVec is not supported in DictionaryBasedBreakIterator, it should return 1 and fillInArray[0] == 0.");
588        }
589    }
590 
591 
592    // TODO: Move these test cases to rbbitst.txt if they aren't there already, then remove this test. It is redundant.
TestTailoredBreaks()593     public void TestTailoredBreaks() {
594         class TBItem {
595             private int     type;
596             private ULocale locale;
597             private String  text;
598             private int[]   expectOffsets;
599             TBItem(int typ, ULocale loc, String txt, int[] eOffs) {
600                 type          = typ;
601                 locale        = loc;
602                 text          = txt;
603                 expectOffsets = eOffs;
604             }
605             private static final int maxOffsetCount = 128;
606             private boolean offsetsMatchExpected(int[] foundOffsets, int foundOffsetsLength) {
607                 if ( foundOffsetsLength != expectOffsets.length ) {
608                     return false;
609                 }
610                 for (int i = 0; i < foundOffsetsLength; i++) {
611                     if ( foundOffsets[i] != expectOffsets[i] ) {
612                         return false;
613                     }
614                 }
615                 return true;
616             }
617             private String formatOffsets(int[] offsets, int length) {
618                 StringBuffer buildString = new StringBuffer(4*maxOffsetCount);
619                 for (int i = 0; i < length; i++) {
620                     buildString.append(" " + offsets[i]);
621                 }
622                 return buildString.toString();
623             }
624             public void doTest() {
625                 BreakIterator brkIter;
626                 switch( type ) {
627                     case BreakIterator.KIND_CHARACTER: brkIter = BreakIterator.getCharacterInstance(locale); break;
628                     case BreakIterator.KIND_WORD:      brkIter = BreakIterator.getWordInstance(locale); break;
629                     case BreakIterator.KIND_LINE:      brkIter = BreakIterator.getLineInstance(locale); break;
630                     case BreakIterator.KIND_SENTENCE:  brkIter = BreakIterator.getSentenceInstance(locale); break;
631                     default: errln("Unsupported break iterator type " + type); return;
632                 }
633                 brkIter.setText(text);
634                 int[] foundOffsets = new int[maxOffsetCount];
635                 int offset, foundOffsetsCount = 0;
636                 // do forwards iteration test
637                 while ( foundOffsetsCount < maxOffsetCount && (offset = brkIter.next()) != BreakIterator.DONE ) {
638                     foundOffsets[foundOffsetsCount++] = offset;
639                 }
640                 if ( !offsetsMatchExpected(foundOffsets, foundOffsetsCount) ) {
641                     // log error for forwards test
642                     String textToDisplay = (text.length() <= 16)? text: text.substring(0,16);
643                     errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
644                             "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) +
645                             "; found " + foundOffsetsCount + " offsets fwd:" + formatOffsets(foundOffsets, foundOffsetsCount) );
646                 } else {
647                     // do backwards iteration test
648                     --foundOffsetsCount; // back off one from the end offset
649                     while ( foundOffsetsCount > 0 ) {
650                         offset = brkIter.previous();
651                         if ( offset != foundOffsets[--foundOffsetsCount] ) {
652                             // log error for backwards test
653                             String textToDisplay = (text.length() <= 16)? text: text.substring(0,16);
654                             errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
655                                     "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) +
656                                     "; found rev offset " + offset + " where expect " + foundOffsets[foundOffsetsCount] );
657                             break;
658                         }
659                     }
660                 }
661             }
662         }
663         // KIND_SENTENCE "el"
664         final String elSentText     = "\u0391\u03B2, \u03B3\u03B4; \u0395 \u03B6\u03B7\u037E \u0398 \u03B9\u03BA. " +
665                                       "\u039B\u03BC \u03BD\u03BE! \u039F\u03C0, \u03A1\u03C2? \u03A3";
666         final int[]  elSentTOffsets = { 8, 14, 20, 27, 35, 36 };
667         final int[]  elSentROffsets = {        20, 27, 35, 36 };
668         // KIND_CHARACTER "th"
669         final String thCharText     = "\u0E01\u0E23\u0E30\u0E17\u0E48\u0E2D\u0E21\u0E23\u0E08\u0E19\u0E32 " +
670                                       "(\u0E2A\u0E38\u0E0A\u0E32\u0E15\u0E34-\u0E08\u0E38\u0E11\u0E32\u0E21\u0E32\u0E28) " +
671                                       "\u0E40\u0E14\u0E47\u0E01\u0E21\u0E35\u0E1B\u0E31\u0E0D\u0E2B\u0E32 ";
672         final int[]  thCharTOffsets = { 1, 2, 3, 5, 6, 7, 8, 9, 10, 11,
673                                         12, 13, 15, 16, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28,
674                                         29, 30, 32, 33, 35, 37, 38, 39, 40, 41 };
675         //starting in Unicode 6.1, root behavior should be the same as Thai above
676         //final int[]  thCharROffsets = { 1,    3, 5, 6, 7, 8, 9,     11,
677         //                                12, 13, 15,     17, 19, 20, 22,     24,     26, 27, 28,
678         //                                29,     32, 33, 35, 37, 38,     40, 41 };
679 
680         final TBItem[] tests = {
681             new TBItem( BreakIterator.KIND_SENTENCE,  new ULocale("el"),          elSentText,   elSentTOffsets   ),
682             new TBItem( BreakIterator.KIND_SENTENCE,  ULocale.ROOT,               elSentText,   elSentROffsets   ),
683             new TBItem( BreakIterator.KIND_CHARACTER, new ULocale("th"),          thCharText,   thCharTOffsets   ),
684             new TBItem( BreakIterator.KIND_CHARACTER, ULocale.ROOT,               thCharText,   thCharTOffsets   ),
685         };
686         for (int iTest = 0; iTest < tests.length; iTest++) {
687             tests[iTest].doTest();
688         }
689     }
690 
691     /* Tests the method public Object clone() */
TestClone()692     public void TestClone() {
693         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
694         try {
695             rbbi.setText((CharacterIterator) null);
696             if (((RuleBasedBreakIterator) rbbi.clone()).getText() != null)
697                 errln("RuleBasedBreakIterator.clone() was suppose to return "
698                         + "the same object because fText is set to null.");
699         } catch (Exception e) {
700             errln("RuleBasedBreakIterator.clone() was not suppose to return " + "an exception.");
701         }
702     }
703 
704     /*
705      * Tests the method public boolean equals(Object that)
706      */
TestEquals()707     public void TestEquals() {
708         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
709         RuleBasedBreakIterator rbbi1 = new RuleBasedBreakIterator("");
710 
711         // TODO: Tests when "if (fRData != other.fRData && (fRData == null || other.fRData == null))" is true
712 
713         // Tests when "if (fText == null || other.fText == null)" is true
714         rbbi.setText((CharacterIterator) null);
715         if (rbbi.equals(rbbi1)) {
716             errln("RuleBasedBreakIterator.equals(Object) was not suppose to return "
717                     + "true when the other object has a null fText.");
718         }
719 
720         // Tests when "if (fText == null && other.fText == null)" is true
721         rbbi1.setText((CharacterIterator) null);
722         if (!rbbi.equals(rbbi1)) {
723             errln("RuleBasedBreakIterator.equals(Object) was not suppose to return "
724                     + "false when both objects has a null fText.");
725         }
726 
727         // Tests when an exception occurs
728         if (rbbi.equals(0)) {
729             errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to integer 0.");
730         }
731         if (rbbi.equals(0.0)) {
732             errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to float 0.0.");
733         }
734         if (rbbi.equals("0")) {
735             errln("RuleBasedBreakIterator.equals(Object) was suppose to return "
736                     + "false when comparing to string '0'.");
737         }
738     }
739 
740     /*
741      * Tests the method public void dump()
742      */
TestDump()743     public void TestDump() {
744         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
745         try {
746             rbbi.dump();
747             errln("RuleBasedBreakIterator.dump() was suppose to return "
748                     + "an exception for a blank RuleBasedBreakIterator object.");
749         } catch (Exception e) {
750         }
751     }
752 
753     /*
754      * Tests the method public int first()
755      */
TestFirst()756     public void TestFirst() {
757         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
758         // Tests when "if (fText == null)" is true
759         rbbi.setText((CharacterIterator) null);
760         if (rbbi.first() != BreakIterator.DONE) {
761             errln("RuleBasedBreakIterator.first() was suppose to return "
762                     + "BreakIterator.DONE when the object has a null fText.");
763         }
764     }
765 
766     /*
767      * Tests the method public int last()
768      */
TestLast()769     public void TestLast() {
770         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
771         // Tests when "if (fText == null)" is true
772         rbbi.setText((CharacterIterator) null);
773         if (rbbi.last() != BreakIterator.DONE) {
774             errln("RuleBasedBreakIterator.last() was suppose to return "
775                     + "BreakIterator.DONE when the object has a null fText.");
776         }
777     }
778 
779     /*
780      * Tests the method public int following(int offset)
781      */
TestFollowing()782     public void TestFollowing() {
783         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
784         // Tests when "else if (offset < fText.getBeginIndex())" is true
785         rbbi.setText("dummy");
786         if (rbbi.following(-1) != 0) {
787             errln("RuleBasedBreakIterator.following(-1) was suppose to return "
788                     + "0 when the object has a fText of dummy.");
789         }
790     }
791 
792     /*
793      * Tests the method public int preceding(int offset)
794      */
TestPreceding()795     public void TestPreceding() {
796         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
797         // Tests when "if (fText == null || offset > fText.getEndIndex())" is true
798         rbbi.setText((CharacterIterator)null);
799         if (rbbi.preceding(-1) != BreakIterator.DONE) {
800             errln("RuleBasedBreakIterator.preceding(-1) was suppose to return "
801                     + "0 when the object has a fText of null.");
802         }
803 
804         // Tests when "else if (offset < fText.getBeginIndex())" is true
805         rbbi.setText("dummy");
806         if (rbbi.preceding(-1) != 0) {
807             errln("RuleBasedBreakIterator.preceding(-1) was suppose to return "
808                     + "0 when the object has a fText of dummy.");
809         }
810     }
811 
812     /* Tests the method public int current() */
TestCurrent()813     public void TestCurrent(){
814         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
815         // Tests when "(fText != null) ? fText.getIndex() : BreakIterator.DONE" is true and false
816         rbbi.setText((CharacterIterator)null);
817         if(rbbi.current() != BreakIterator.DONE){
818             errln("RuleBasedBreakIterator.current() was suppose to return "
819                     + "BreakIterator.DONE when the object has a fText of null.");
820         }
821         rbbi.setText("dummy");
822         if(rbbi.current() != 0){
823             errln("RuleBasedBreakIterator.current() was suppose to return "
824                     + "0 when the object has a fText of dummy.");
825         }
826     }
827 }
828