1 /********************************************************************
2  * Copyright (c) 1999-2014, International Business Machines
3  * Corporation and others. All Rights Reserved.
4  ********************************************************************
5  *   Date        Name        Description
6  *   12/14/99    Madhu        Creation.
7  *   01/12/2000  Madhu        updated for changed API
8  ********************************************************************/
9 
10 #include "unicode/utypes.h"
11 
12 #if !UCONFIG_NO_BREAK_ITERATION
13 
14 #include "unicode/uchar.h"
15 #include "intltest.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/schriter.h"
18 #include "rbbiapts.h"
19 #include "rbbidata.h"
20 #include "cstring.h"
21 #include "ubrkimpl.h"
22 #include "unicode/locid.h"
23 #include "unicode/ustring.h"
24 #include "unicode/utext.h"
25 #include "cmemory.h"
26 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
27 #include "unicode/filteredbrk.h"
28 #include <stdio.h> // for sprintf
29 #endif
30 /**
31  * API Test the RuleBasedBreakIterator class
32  */
33 
34 
35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
36 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
37 
38 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
39     errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
40 
TestCloneEquals()41 void RBBIAPITest::TestCloneEquals()
42 {
43 
44     UErrorCode status=U_ZERO_ERROR;
45     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
46     RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
47     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
49     if(U_FAILURE(status)){
50         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
51         return;
52     }
53 
54 
55     UnicodeString testString="Testing word break iterators's clone() and equals()";
56     bi1->setText(testString);
57     bi2->setText(testString);
58     biequal->setText(testString);
59 
60     bi3->setText("hello");
61 
62     logln((UnicodeString)"Testing equals()");
63 
64     logln((UnicodeString)"Testing == and !=");
65     UBool b = (*bi1 != *biequal);
66     b |= *bi1 == *bi2;
67     b |= *bi1 == *bi3;
68     if (b) {
69         errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
70     }
71 
72     if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
73         errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
74 
75 
76     // Quick test of RulesBasedBreakIterator assignment -
77     // Check that
78     //    two different iterators are !=
79     //    they are == after assignment
80     //    source and dest iterator produce the same next() after assignment.
81     //    deleting one doesn't disable the other.
82     logln("Testing assignment");
83     RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
84     if(U_FAILURE(status)){
85         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
86         return;
87     }
88 
89     RuleBasedBreakIterator biDefault, biDefault2;
90     if(U_FAILURE(status)){
91         errln((UnicodeString)"FAIL : in construction of default iterator");
92         return;
93     }
94     if (biDefault == *bix) {
95         errln((UnicodeString)"ERROR: iterators should not compare ==");
96         return;
97     }
98     if (biDefault != biDefault2) {
99         errln((UnicodeString)"ERROR: iterators should compare ==");
100         return;
101     }
102 
103 
104     UnicodeString   HelloString("Hello Kitty");
105     bix->setText(HelloString);
106     if (*bix == *bi2) {
107         errln(UnicodeString("ERROR: strings should not be equal before assignment."));
108     }
109     *bix = *bi2;
110     if (*bix != *bi2) {
111         errln(UnicodeString("ERROR: strings should be equal before assignment."));
112     }
113 
114     int bixnext = bix->next();
115     int bi2next = bi2->next();
116     if (! (bixnext == bi2next && bixnext == 7)) {
117         errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
118     }
119     delete bix;
120     if (bi2->next() != 8) {
121         errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
122     }
123 
124 
125 
126     logln((UnicodeString)"Testing clone()");
127     RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
128     RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
129 
130     if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
131       *bi1clone == *bi3 || *bi1clone == *bi2)
132         errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
133 
134     if(*bi2clone == *bi1 || *bi2clone == *biequal ||
135        *bi2clone == *bi3 || *bi2clone != *bi2)
136         errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
137 
138     if(bi1->getText() != bi1clone->getText()   ||
139        bi2clone->getText() != bi2->getText()   ||
140        *bi2clone == *bi1clone )
141         errln((UnicodeString)"ERROR: RBBI's clone() method failed");
142 
143     delete bi1clone;
144     delete bi2clone;
145     delete bi1;
146     delete bi3;
147     delete bi2;
148     delete biequal;
149 }
150 
TestBoilerPlate()151 void RBBIAPITest::TestBoilerPlate()
152 {
153     UErrorCode status = U_ZERO_ERROR;
154     BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
155     BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
156     if (U_FAILURE(status)) {
157         errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
158         return;
159     }
160     if(*a!=*b){
161         errln("Failed: boilerplate method operator!= does not return correct results");
162     }
163     // Japanese word break iterators are identical to root with
164     // a dictionary-based break iterator
165     BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
166     BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
167     if(c && d){
168         if(*c!=*d){
169             errln("Failed: boilerplate method operator== does not return correct results");
170         }
171     }else{
172         errln("creation of break iterator failed");
173     }
174     delete a;
175     delete b;
176     delete c;
177     delete d;
178 }
179 
TestgetRules()180 void RBBIAPITest::TestgetRules()
181 {
182     UErrorCode status=U_ZERO_ERROR;
183 
184     RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
185     RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
186     if(U_FAILURE(status)){
187         errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
188         delete bi1;
189         delete bi2;
190         return;
191     }
192 
193 
194 
195     logln((UnicodeString)"Testing toString()");
196 
197     bi1->setText((UnicodeString)"Hello there");
198 
199     RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
200 
201     UnicodeString temp=bi1->getRules();
202     UnicodeString temp2=bi2->getRules();
203     UnicodeString temp3=bi3->getRules();
204     if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
205         errln((UnicodeString)"ERROR: error in getRules() method");
206 
207     delete bi1;
208     delete bi2;
209     delete bi3;
210 }
TestHashCode()211 void RBBIAPITest::TestHashCode()
212 {
213     UErrorCode status=U_ZERO_ERROR;
214     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
215     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
216     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
217     if(U_FAILURE(status)){
218         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
219         delete bi1;
220         delete bi2;
221         delete bi3;
222         return;
223     }
224 
225 
226     logln((UnicodeString)"Testing hashCode()");
227 
228     bi1->setText((UnicodeString)"Hash code");
229     bi2->setText((UnicodeString)"Hash code");
230     bi3->setText((UnicodeString)"Hash code");
231 
232     RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
233     RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
234 
235     if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
236         bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
237         errln((UnicodeString)"ERROR: identical objects have different hashcodes");
238 
239     if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
240         bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
241         errln((UnicodeString)"ERROR: different objects have same hashcodes");
242 
243     delete bi1clone;
244     delete bi2clone;
245     delete bi1;
246     delete bi2;
247     delete bi3;
248 
249 }
TestGetSetAdoptText()250 void RBBIAPITest::TestGetSetAdoptText()
251 {
252     logln((UnicodeString)"Testing getText setText ");
253     IcuTestErrorCode status(*this, "TestGetSetAdoptText");
254     UnicodeString str1="first string.";
255     UnicodeString str2="Second string.";
256     LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
257     LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
258     if(status.isFailure()){
259         errcheckln(status, "Fail : in construction - %s", status.errorName());
260             return;
261     }
262 
263 
264     CharacterIterator* text1= new StringCharacterIterator(str1);
265     CharacterIterator* text1Clone = text1->clone();
266     CharacterIterator* text2= new StringCharacterIterator(str2);
267     CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
268 
269     wordIter1->setText(str1);
270     CharacterIterator *tci = &wordIter1->getText();
271     UnicodeString      tstr;
272     tci->getText(tstr);
273     TEST_ASSERT(tstr == str1);
274     if(wordIter1->current() != 0)
275         errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
276 
277     wordIter1->next(2);
278 
279     wordIter1->setText(str2);
280     if(wordIter1->current() != 0)
281         errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
282 
283 
284     charIter1->adoptText(text1Clone);
285     TEST_ASSERT(wordIter1->getText() != charIter1->getText());
286     tci = &wordIter1->getText();
287     tci->getText(tstr);
288     TEST_ASSERT(tstr == str2);
289     tci = &charIter1->getText();
290     tci->getText(tstr);
291     TEST_ASSERT(tstr == str1);
292 
293 
294     LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
295     rb->adoptText(text1);
296     if(rb->getText() != *text1)
297         errln((UnicodeString)"ERROR:1 error in adoptText ");
298     rb->adoptText(text2);
299     if(rb->getText() != *text2)
300         errln((UnicodeString)"ERROR:2 error in adoptText ");
301 
302     // Adopt where iterator range is less than the entire orignal source string.
303     //   (With the change of the break engine to working with UText internally,
304     //    CharacterIterators starting at positions other than zero are not supported)
305     rb->adoptText(text3);
306     TEST_ASSERT(rb->preceding(2) == 0);
307     TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
308     //if(rb->preceding(2) != 3) {
309     //    errln((UnicodeString)"ERROR:3 error in adoptText ");
310     //}
311     //if(rb->following(11) != BreakIterator::DONE) {
312     //    errln((UnicodeString)"ERROR:4 error in adoptText ");
313     //}
314 
315     // UText API
316     //
317     //   Quick test to see if UText is working at all.
318     //
319     const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
320     const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
321     //                012345678901
322 
323     status.reset();
324     LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
325     wordIter1->setText(ut.getAlias(), status);
326     TEST_ASSERT_SUCCESS(status);
327 
328     int32_t pos;
329     pos = wordIter1->first();
330     TEST_ASSERT(pos==0);
331     pos = wordIter1->next();
332     TEST_ASSERT(pos==5);
333     pos = wordIter1->next();
334     TEST_ASSERT(pos==6);
335     pos = wordIter1->next();
336     TEST_ASSERT(pos==11);
337     pos = wordIter1->next();
338     TEST_ASSERT(pos==UBRK_DONE);
339 
340     status.reset();
341     LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
342     TEST_ASSERT_SUCCESS(status);
343     wordIter1->setText(ut2.getAlias(), status);
344     TEST_ASSERT_SUCCESS(status);
345 
346     pos = wordIter1->first();
347     TEST_ASSERT(pos==0);
348     pos = wordIter1->next();
349     TEST_ASSERT(pos==3);
350     pos = wordIter1->next();
351     TEST_ASSERT(pos==4);
352 
353     pos = wordIter1->last();
354     TEST_ASSERT(pos==6);
355     pos = wordIter1->previous();
356     TEST_ASSERT(pos==4);
357     pos = wordIter1->previous();
358     TEST_ASSERT(pos==3);
359     pos = wordIter1->previous();
360     TEST_ASSERT(pos==0);
361     pos = wordIter1->previous();
362     TEST_ASSERT(pos==UBRK_DONE);
363 
364     status.reset();
365     UnicodeString sEmpty;
366     LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
367     wordIter1->getUText(gut2.getAlias(), status);
368     TEST_ASSERT_SUCCESS(status);
369     status.reset();
370 }
371 
372 
TestIteration()373 void RBBIAPITest::TestIteration()
374 {
375     // This test just verifies that the API is present.
376     // Testing for correct operation of the break rules happens elsewhere.
377 
378     UErrorCode status=U_ZERO_ERROR;
379     RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
380     if (U_FAILURE(status) || bi == NULL)  {
381         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
382     }
383     delete bi;
384 
385     status=U_ZERO_ERROR;
386     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
387     if (U_FAILURE(status) || bi == NULL)  {
388         errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
389     }
390     delete bi;
391 
392     status=U_ZERO_ERROR;
393     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
394     if (U_FAILURE(status) || bi == NULL)  {
395         errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
396     }
397     delete bi;
398 
399     status=U_ZERO_ERROR;
400     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
401     if (U_FAILURE(status) || bi == NULL)  {
402         errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
403     }
404     delete bi;
405 
406     status=U_ZERO_ERROR;
407     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
408     if (U_FAILURE(status) || bi == NULL)  {
409         errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
410     }
411     delete bi;
412 
413     status=U_ZERO_ERROR;
414     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
415     if (U_FAILURE(status) || bi == NULL)  {
416         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
417         return;   // Skip the rest of these tests.
418     }
419 
420 
421     UnicodeString testString="0123456789";
422     bi->setText(testString);
423 
424     int32_t i;
425     i = bi->first();
426     if (i != 0) {
427         errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
428     }
429 
430     i = bi->last();
431     if (i != 10) {
432         errln("Incorrect value from bi->last().  Expected 10, got %d", i);
433     }
434 
435     //
436     // Previous
437     //
438     bi->last();
439     i = bi->previous();
440     if (i != 9) {
441         errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
442     }
443 
444 
445     bi->first();
446     i = bi->previous();
447     if (i != BreakIterator::DONE) {
448         errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
449     }
450 
451     //
452     // next()
453     //
454     bi->first();
455     i = bi->next();
456     if (i != 1) {
457         errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
458     }
459 
460     bi->last();
461     i = bi->next();
462     if (i != BreakIterator::DONE) {
463         errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
464     }
465 
466 
467     //
468     //  current()
469     //
470     bi->first();
471     i = bi->current();
472     if (i != 0) {
473         errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
474     }
475 
476     bi->next();
477     i = bi->current();
478     if (i != 1) {
479         errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
480     }
481 
482     bi->last();
483     bi->next();
484     i = bi->current();
485     if (i != 10) {
486         errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
487     }
488 
489     bi->first();
490     bi->previous();
491     i = bi->current();
492     if (i != 0) {
493         errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
494     }
495 
496 
497     //
498     // Following()
499     //
500     i = bi->following(4);
501     if (i != 5) {
502         errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
503     }
504 
505     i = bi->following(9);
506     if (i != 10) {
507         errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
508     }
509 
510     i = bi->following(10);
511     if (i != BreakIterator::DONE) {
512         errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
513     }
514 
515 
516     //
517     // Preceding
518     //
519     i = bi->preceding(4);
520     if (i != 3) {
521         errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
522     }
523 
524     i = bi->preceding(10);
525     if (i != 9) {
526         errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
527     }
528 
529     i = bi->preceding(1);
530     if (i != 0) {
531         errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
532     }
533 
534     i = bi->preceding(0);
535     if (i != BreakIterator::DONE) {
536         errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
537     }
538 
539 
540     //
541     // isBoundary()
542     //
543     bi->first();
544     if (bi->isBoundary(3) != TRUE) {
545         errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
546     }
547     i = bi->current();
548     if (i != 3) {
549         errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
550     }
551 
552 
553     if (bi->isBoundary(11) != FALSE) {
554         errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
555     }
556     i = bi->current();
557     if (i != 10) {
558         errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
559     }
560 
561     //
562     // next(n)
563     //
564     bi->first();
565     i = bi->next(4);
566     if (i != 4) {
567         errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
568     }
569 
570     i = bi->next(6);
571     if (i != 10) {
572         errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
573     }
574 
575     bi->first();
576     i = bi->next(11);
577     if (i != BreakIterator::DONE) {
578         errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
579     }
580 
581     delete bi;
582 
583 }
584 
585 
586 
587 
588 
589 
TestBuilder()590 void RBBIAPITest::TestBuilder() {
591      UnicodeString rulesString1 = "$Letters = [:L:];\n"
592                                   "$Numbers = [:N:];\n"
593                                   "$Letters+;\n"
594                                   "$Numbers+;\n"
595                                   "[^$Letters $Numbers];\n"
596                                   "!.*;\n";
597      UnicodeString testString1  = "abc123..abc";
598                                 // 01234567890
599      int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
600      UErrorCode status=U_ZERO_ERROR;
601      UParseError    parseError;
602 
603      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
604      if(U_FAILURE(status)) {
605          dataerrln("Fail : in construction - %s", u_errorName(status));
606      } else {
607          bi->setText(testString1);
608          doBoundaryTest(*bi, testString1, bounds1);
609      }
610      delete bi;
611 }
612 
613 
614 //
615 //  TestQuoteGrouping
616 //       Single quotes within rules imply a grouping, so that a modifier
617 //       following the quoted text (* or +) applies to all of the quoted chars.
618 //
TestQuoteGrouping()619 void RBBIAPITest::TestQuoteGrouping() {
620      UnicodeString rulesString1 = "#Here comes the rule...\n"
621                                   "'$@!'*;\n"   //  (\$\@\!)*
622                                   ".;\n";
623 
624      UnicodeString testString1  = "$@!$@!X$@!!X";
625                                 // 0123456789012
626      int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
627      UErrorCode status=U_ZERO_ERROR;
628      UParseError    parseError;
629 
630      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
631      if(U_FAILURE(status)) {
632          dataerrln("Fail : in construction - %s", u_errorName(status));
633      } else {
634          bi->setText(testString1);
635          doBoundaryTest(*bi, testString1, bounds1);
636      }
637      delete bi;
638 }
639 
640 //
641 //  TestRuleStatus
642 //      Test word break rule status constants.
643 //
TestRuleStatus()644 void RBBIAPITest::TestRuleStatus() {
645      UChar str[30];
646      //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
647      // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
648      u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
649               // 012345678901234567  8      9    0
650               //                     Katakana
651                 str, 30);
652      UnicodeString testString1(str);
653      int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
654      int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
655                           UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
656                           UBRK_WORD_IDEO,     UBRK_WORD_NONE};
657 
658      int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
659                           UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
660                           UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
661 
662      UErrorCode status=U_ZERO_ERROR;
663 
664      BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
665      if(U_FAILURE(status)) {
666          errcheckln(status, "Fail : in construction - %s", u_errorName(status));
667      } else {
668          bi->setText(testString1);
669          // First test that the breaks are in the right spots.
670          doBoundaryTest(*bi, testString1, bounds1);
671 
672          // Then go back and check tag values
673          int32_t i = 0;
674          int32_t pos, tag;
675          for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
676              if (pos != bounds1[i]) {
677                  errln("FAIL: unexpected word break at postion %d", pos);
678                  break;
679              }
680              tag = bi->getRuleStatus();
681              if (tag < tag_lo[i] || tag >= tag_hi[i]) {
682                  errln("FAIL: incorrect tag value %d at position %d", tag, pos);
683                  break;
684              }
685 
686              // Check that we get the same tag values from getRuleStatusVec()
687              int32_t vec[10];
688              int t = bi->getRuleStatusVec(vec, 10, status);
689              TEST_ASSERT_SUCCESS(status);
690              TEST_ASSERT(t==1);
691              TEST_ASSERT(vec[0] == tag);
692          }
693      }
694      delete bi;
695 
696      // Now test line break status.  This test mostly is to confirm that the status constants
697      //                              are correctly declared in the header.
698      testString1 =   "test line. \n";
699      // break type    s    s     h
700 
701      bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
702      if(U_FAILURE(status)) {
703          errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
704      } else {
705          int32_t i = 0;
706          int32_t pos, tag;
707          UBool   success;
708 
709          bi->setText(testString1);
710          pos = bi->current();
711          tag = bi->getRuleStatus();
712          for (i=0; i<3; i++) {
713              switch (i) {
714              case 0:
715                  success = pos==0  && tag==UBRK_LINE_SOFT; break;
716              case 1:
717                  success = pos==5  && tag==UBRK_LINE_SOFT; break;
718              case 2:
719                  success = pos==12 && tag==UBRK_LINE_HARD; break;
720              default:
721                  success = FALSE; break;
722              }
723              if (success == FALSE) {
724                  errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
725                      i, pos, tag);
726                  break;
727              }
728              pos = bi->next();
729              tag = bi->getRuleStatus();
730          }
731          if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
732              UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
733              (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
734              errln("UBRK_LINE_* constants from header are inconsistent.");
735          }
736      }
737      delete bi;
738 
739 }
740 
741 
742 //
743 //  TestRuleStatusVec
744 //      Test the vector form of  break rule status.
745 //
TestRuleStatusVec()746 void RBBIAPITest::TestRuleStatusVec() {
747     UnicodeString rulesString(   "[A-N]{100}; \n"
748                                  "[a-w]{200}; \n"
749                                  "[\\p{L}]{300}; \n"
750                                  "[\\p{N}]{400}; \n"
751                                  "[0-5]{500}; \n"
752                                   "!.*;\n", -1, US_INV);
753      UnicodeString testString1  = "Aapz5?";
754      int32_t  statusVals[10];
755      int32_t  numStatuses;
756      int32_t  pos;
757 
758      UErrorCode status=U_ZERO_ERROR;
759      UParseError    parseError;
760 
761      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
762      if (U_FAILURE(status)) {
763          dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
764      } else {
765          bi->setText(testString1);
766 
767          // A
768          pos = bi->next();
769          TEST_ASSERT(pos==1);
770          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
771          TEST_ASSERT_SUCCESS(status);
772          TEST_ASSERT(numStatuses == 2);
773          TEST_ASSERT(statusVals[0] == 100);
774          TEST_ASSERT(statusVals[1] == 300);
775 
776          // a
777          pos = bi->next();
778          TEST_ASSERT(pos==2);
779          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
780          TEST_ASSERT_SUCCESS(status);
781          TEST_ASSERT(numStatuses == 2);
782          TEST_ASSERT(statusVals[0] == 200);
783          TEST_ASSERT(statusVals[1] == 300);
784 
785          // p
786          pos = bi->next();
787          TEST_ASSERT(pos==3);
788          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
789          TEST_ASSERT_SUCCESS(status);
790          TEST_ASSERT(numStatuses == 2);
791          TEST_ASSERT(statusVals[0] == 200);
792          TEST_ASSERT(statusVals[1] == 300);
793 
794          // z
795          pos = bi->next();
796          TEST_ASSERT(pos==4);
797          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
798          TEST_ASSERT_SUCCESS(status);
799          TEST_ASSERT(numStatuses == 1);
800          TEST_ASSERT(statusVals[0] == 300);
801 
802          // 5
803          pos = bi->next();
804          TEST_ASSERT(pos==5);
805          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
806          TEST_ASSERT_SUCCESS(status);
807          TEST_ASSERT(numStatuses == 2);
808          TEST_ASSERT(statusVals[0] == 400);
809          TEST_ASSERT(statusVals[1] == 500);
810 
811          // ?
812          pos = bi->next();
813          TEST_ASSERT(pos==6);
814          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
815          TEST_ASSERT_SUCCESS(status);
816          TEST_ASSERT(numStatuses == 1);
817          TEST_ASSERT(statusVals[0] == 0);
818 
819          //
820          //  Check buffer overflow error handling.   Char == A
821          //
822          bi->first();
823          pos = bi->next();
824          TEST_ASSERT(pos==1);
825          memset(statusVals, -1, sizeof(statusVals));
826          numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
827          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
828          TEST_ASSERT(numStatuses == 2);
829          TEST_ASSERT(statusVals[0] == -1);
830 
831          status = U_ZERO_ERROR;
832          memset(statusVals, -1, sizeof(statusVals));
833          numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
834          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
835          TEST_ASSERT(numStatuses == 2);
836          TEST_ASSERT(statusVals[0] == 100);
837          TEST_ASSERT(statusVals[1] == -1);
838 
839          status = U_ZERO_ERROR;
840          memset(statusVals, -1, sizeof(statusVals));
841          numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
842          TEST_ASSERT_SUCCESS(status);
843          TEST_ASSERT(numStatuses == 2);
844          TEST_ASSERT(statusVals[0] == 100);
845          TEST_ASSERT(statusVals[1] == 300);
846          TEST_ASSERT(statusVals[2] == -1);
847      }
848      delete bi;
849 
850 }
851 
852 //
853 //   Bug 2190 Regression test.   Builder crash on rule consisting of only a
854 //                               $variable reference
TestBug2190()855 void RBBIAPITest::TestBug2190() {
856      UnicodeString rulesString1 = "$aaa = abcd;\n"
857                                   "$bbb = $aaa;\n"
858                                   "$bbb;\n";
859      UnicodeString testString1  = "abcdabcd";
860                                 // 01234567890
861      int32_t bounds1[] = {0, 4, 8};
862      UErrorCode status=U_ZERO_ERROR;
863      UParseError    parseError;
864 
865      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
866      if(U_FAILURE(status)) {
867          dataerrln("Fail : in construction - %s", u_errorName(status));
868      } else {
869          bi->setText(testString1);
870          doBoundaryTest(*bi, testString1, bounds1);
871      }
872      delete bi;
873 }
874 
875 
TestRegistration()876 void RBBIAPITest::TestRegistration() {
877 #if !UCONFIG_NO_SERVICE
878     UErrorCode status = U_ZERO_ERROR;
879     BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
880     // ok to not delete these if we exit because of error?
881     BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
882     BreakIterator* root_word = BreakIterator::createWordInstance("", status);
883     BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
884 
885     if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
886         dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
887 
888         delete ja_word;
889         delete ja_char;
890         delete root_word;
891         delete root_char;
892 
893         return;
894     }
895 
896     URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
897     {
898 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
899         if (ja_word && *ja_word == *root_word) {
900             errln("japan not different from root");
901         }
902 #endif
903     }
904 
905     {
906         BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
907         UBool fail = TRUE;
908         if(result){
909             fail = *result != *ja_word;
910         }
911         delete result;
912         if (fail) {
913             errln("bad result for xx_XX/word");
914         }
915     }
916 
917     {
918         BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
919         UBool fail = TRUE;
920         if(result){
921             fail = *result != *ja_char;
922         }
923         delete result;
924         if (fail) {
925             errln("bad result for ja_JP/char");
926         }
927     }
928 
929     {
930         BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
931         UBool fail = TRUE;
932         if(result){
933             fail = *result != *root_char;
934         }
935         delete result;
936         if (fail) {
937             errln("bad result for xx_XX/char");
938         }
939     }
940 
941     {
942         StringEnumeration* avail = BreakIterator::getAvailableLocales();
943         UBool found = FALSE;
944         const UnicodeString* p;
945         while ((p = avail->snext(status))) {
946             if (p->compare("xx") == 0) {
947                 found = TRUE;
948                 break;
949             }
950         }
951         delete avail;
952         if (!found) {
953             errln("did not find test locale");
954         }
955     }
956 
957     {
958         UBool unreg = BreakIterator::unregister(key, status);
959         if (!unreg) {
960             errln("unable to unregister");
961         }
962     }
963 
964     {
965         BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
966         BreakIterator* root = BreakIterator::createWordInstance("", status);
967         UBool fail = TRUE;
968         if(root){
969           fail = *root != *result;
970         }
971         delete root;
972         delete result;
973         if (fail) {
974             errln("did not get root break");
975         }
976     }
977 
978     {
979         StringEnumeration* avail = BreakIterator::getAvailableLocales();
980         UBool found = FALSE;
981         const UnicodeString* p;
982         while ((p = avail->snext(status))) {
983             if (p->compare("xx") == 0) {
984                 found = TRUE;
985                 break;
986             }
987         }
988         delete avail;
989         if (found) {
990             errln("found test locale");
991         }
992     }
993 
994     {
995         int32_t count;
996         UBool   foundLocale = FALSE;
997         const Locale *avail = BreakIterator::getAvailableLocales(count);
998         for (int i=0; i<count; i++) {
999             if (avail[i] == Locale::getEnglish()) {
1000                 foundLocale = TRUE;
1001                 break;
1002             }
1003         }
1004         if (foundLocale == FALSE) {
1005             errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1006         }
1007     }
1008 
1009 
1010     // ja_word was adopted by factory
1011     delete ja_char;
1012     delete root_word;
1013     delete root_char;
1014 #endif
1015 }
1016 
RoundtripRule(const char * dataFile)1017 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1018     UErrorCode status = U_ZERO_ERROR;
1019     UParseError parseError;
1020     parseError.line = 0;
1021     parseError.offset = 0;
1022     LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1023     uint32_t length;
1024     const UChar *builtSource;
1025     const uint8_t *rbbiRules;
1026     const uint8_t *builtRules;
1027 
1028     if (U_FAILURE(status)) {
1029         errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
1030         return;
1031     }
1032 
1033     builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1034     builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1035     RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1036     if (U_FAILURE(status)) {
1037         errln("createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1038                 u_errorName(status), parseError.line, parseError.offset);
1039         return;
1040     };
1041     rbbiRules = brkItr->getBinaryRules(length);
1042     logln("Comparing \"%s\" len=%d", dataFile, length);
1043     if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1044         errln("Built rules and rebuilt rules are different %s", dataFile);
1045         return;
1046     }
1047     delete brkItr;
1048 }
1049 
TestRoundtripRules()1050 void RBBIAPITest::TestRoundtripRules() {
1051     RoundtripRule("word");
1052     RoundtripRule("title");
1053     RoundtripRule("sent");
1054     RoundtripRule("line");
1055     RoundtripRule("char");
1056     if (!quick) {
1057         RoundtripRule("word_POSIX");
1058     }
1059 }
1060 
1061 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1062 // (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1063 // This is just a sanity check, not a thorough test (e.g. we don't check that the
1064 // first delete actually frees rulesCopy).
TestCreateFromRBBIData()1065 void RBBIAPITest::TestCreateFromRBBIData() {
1066     // Get some handy RBBIData
1067     const char *brkName = "word"; // or "sent", "line", "char", etc.
1068     UErrorCode status = U_ZERO_ERROR;
1069     LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status));
1070     if ( U_SUCCESS(status) ) {
1071         const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias());
1072         uint32_t length = builtRules->fLength;
1073         RBBIWithProtectedFunctions * brkItr;
1074 
1075         // Try the memory-adopting constructor, need to copy the data first
1076         RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
1077         if ( rulesCopy ) {
1078             uprv_memcpy( rulesCopy, builtRules, length );
1079 
1080             brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
1081             if ( U_SUCCESS(status) ) {
1082                 delete brkItr; // this should free rulesCopy
1083             } else {
1084                 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
1085                 status = U_ZERO_ERROR;// reset for the next test
1086                 uprv_free( rulesCopy );
1087             }
1088         }
1089 
1090         // Now try the non-adopting constructor
1091         brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
1092         if ( U_SUCCESS(status) ) {
1093             delete brkItr; // this should NOT attempt to free builtRules
1094             if (builtRules->fLength != length) { // sanity check
1095                 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1096             }
1097         } else {
1098             errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
1099         }
1100     }
1101 
1102     // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
1103     //
1104     status = U_ZERO_ERROR;
1105     RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
1106     if (rb == NULL || U_FAILURE(status)) {
1107         dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status));
1108     } else {
1109         uint32_t length;
1110         const uint8_t *rules = rb->getBinaryRules(length);
1111         RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);
1112         TEST_ASSERT_SUCCESS(status);
1113         TEST_ASSERT(*rb == *rb2);
1114         UnicodeString words = "one two three ";
1115         rb2->setText(words);
1116         int wordCounter = 0;
1117         while (rb2->next() != UBRK_DONE) {
1118             wordCounter++;
1119         }
1120         TEST_ASSERT(wordCounter == 6);
1121 
1122         status = U_ZERO_ERROR;
1123         RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status);
1124         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1125 
1126         delete rb;
1127         delete rb2;
1128         delete rb3;
1129     }
1130 }
1131 
1132 
TestRefreshInputText()1133 void RBBIAPITest::TestRefreshInputText() {
1134     /*
1135      *  RefreshInput changes out the input of a Break Iterator without
1136      *    changing anything else in the iterator's state.  Used with Java JNI,
1137      *    when Java moves the underlying string storage.   This test
1138      *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1139      *    The right set of boundaries should still be found.
1140      */
1141     UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
1142     UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
1143     UErrorCode status = U_ZERO_ERROR;
1144     UText ut1 = UTEXT_INITIALIZER;
1145     UText ut2 = UTEXT_INITIALIZER;
1146     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1147     TEST_ASSERT_SUCCESS(status);
1148 
1149     utext_openUChars(&ut1, testStr, -1, &status);
1150     TEST_ASSERT_SUCCESS(status);
1151 
1152     if (U_SUCCESS(status)) {
1153         bi->setText(&ut1, status);
1154         TEST_ASSERT_SUCCESS(status);
1155 
1156         /* Line boundaries will occur before each letter in the original string */
1157         TEST_ASSERT(1 == bi->next());
1158         TEST_ASSERT(3 == bi->next());
1159 
1160         /* Move the string, kill the original string.  */
1161         u_strcpy(movedStr, testStr);
1162         u_memset(testStr, 0x20, u_strlen(testStr));
1163         utext_openUChars(&ut2, movedStr, -1, &status);
1164         TEST_ASSERT_SUCCESS(status);
1165         RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1166         TEST_ASSERT_SUCCESS(status);
1167         TEST_ASSERT(bi == returnedBI);
1168 
1169         /* Find the following matches, now working in the moved string. */
1170         TEST_ASSERT(5 == bi->next());
1171         TEST_ASSERT(7 == bi->next());
1172         TEST_ASSERT(8 == bi->next());
1173         TEST_ASSERT(UBRK_DONE == bi->next());
1174 
1175         utext_close(&ut1);
1176         utext_close(&ut2);
1177     }
1178     delete bi;
1179 
1180 }
1181 
1182 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1183 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1184   static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1185   it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1186 
1187   int32_t *pos = new int32_t[ustr.length()];
1188   int32_t posCount = 0;
1189 
1190   // calculate breaks up front, so we can print out
1191   // sans any debugging
1192   for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1193     pos[posCount++] = n;
1194     if(posCount>=ustr.length()) {
1195       it.errln("brk count exceeds string length!");
1196       return;
1197     }
1198   }
1199   UnicodeString out;
1200   out.append((UChar)CHSTR);
1201   int32_t prev = 0;
1202   for(int32_t i=0;i<posCount;i++) {
1203     int32_t n=pos[i];
1204     out.append(ustr.tempSubString(prev,n-prev));
1205     out.append((UChar)PILCROW);
1206     prev=n;
1207   }
1208   out.append(ustr.tempSubString(prev,ustr.length()-prev));
1209   out.append((UChar)CHEND);
1210   it.logln(out);
1211 
1212   out.remove();
1213   for(int32_t i=0;i<posCount;i++) {
1214     char tmp[100];
1215     sprintf(tmp,"%d ",pos[i]);
1216     out.append(UnicodeString(tmp));
1217   }
1218   it.logln(out);
1219   delete [] pos;
1220 }
1221 #endif
1222 
TestFilteredBreakIteratorBuilder()1223 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1224 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1225   UErrorCode status = U_ZERO_ERROR;
1226   LocalPointer<FilteredBreakIteratorBuilder> builder;
1227   LocalPointer<BreakIterator> baseBI;
1228   LocalPointer<BreakIterator> filteredBI;
1229   LocalPointer<BreakIterator> frenchBI;
1230 
1231   const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1232   const UnicodeString ABBR_MR("Mr.");
1233   const UnicodeString ABBR_CAPT("Capt.");
1234 
1235   {
1236     logln("Constructing empty builder\n");
1237     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1238     TEST_ASSERT_SUCCESS(status);
1239 
1240     logln("Constructing base BI\n");
1241     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1242     TEST_ASSERT_SUCCESS(status);
1243 
1244 	logln("Building new BI\n");
1245     filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1246     TEST_ASSERT_SUCCESS(status);
1247 
1248 	if (U_SUCCESS(status)) {
1249         logln("Testing:");
1250         filteredBI->setText(text);
1251         TEST_ASSERT(20 == filteredBI->next()); // Mr.
1252         TEST_ASSERT(84 == filteredBI->next()); // recovered.
1253         TEST_ASSERT(90 == filteredBI->next()); // Capt.
1254         TEST_ASSERT(181 == filteredBI->next()); // Mr.
1255         TEST_ASSERT(278 == filteredBI->next()); // charge.
1256         filteredBI->first();
1257         prtbrks(filteredBI.getAlias(), text, *this);
1258     }
1259   }
1260 
1261   {
1262     logln("Constructing empty builder\n");
1263     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1264     TEST_ASSERT_SUCCESS(status);
1265 
1266     if (U_SUCCESS(status)) {
1267         logln("Adding Mr. as an exception\n");
1268         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1269         TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1270         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1271         TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1272         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1273         TEST_ASSERT_SUCCESS(status);
1274 
1275         logln("Constructing base BI\n");
1276         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1277         TEST_ASSERT_SUCCESS(status);
1278 
1279         logln("Building new BI\n");
1280         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1281         TEST_ASSERT_SUCCESS(status);
1282 
1283         logln("Testing:");
1284         filteredBI->setText(text);
1285         TEST_ASSERT(84 == filteredBI->next());
1286         TEST_ASSERT(90 == filteredBI->next());// Capt.
1287         TEST_ASSERT(278 == filteredBI->next());
1288         filteredBI->first();
1289         prtbrks(filteredBI.getAlias(), text, *this);
1290     }
1291   }
1292 
1293 
1294   {
1295     logln("Constructing empty builder\n");
1296     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1297     TEST_ASSERT_SUCCESS(status);
1298 
1299     if (U_SUCCESS(status)) {
1300         logln("Adding Mr. and Capt as an exception\n");
1301         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1302         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1303         TEST_ASSERT_SUCCESS(status);
1304 
1305         logln("Constructing base BI\n");
1306         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1307         TEST_ASSERT_SUCCESS(status);
1308 
1309         logln("Building new BI\n");
1310         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1311         TEST_ASSERT_SUCCESS(status);
1312 
1313         logln("Testing:");
1314         filteredBI->setText(text);
1315         TEST_ASSERT(84 == filteredBI->next());
1316         TEST_ASSERT(278 == filteredBI->next());
1317         filteredBI->first();
1318         prtbrks(filteredBI.getAlias(), text, *this);
1319     }
1320   }
1321 
1322 
1323   {
1324     logln("Constructing English builder\n");
1325     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1326     TEST_ASSERT_SUCCESS(status);
1327 
1328     logln("Constructing base BI\n");
1329     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1330     TEST_ASSERT_SUCCESS(status);
1331 
1332     if (U_SUCCESS(status)) {
1333         logln("unsuppressing 'Capt'");
1334         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1335 
1336         logln("Building new BI\n");
1337         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1338         TEST_ASSERT_SUCCESS(status);
1339 
1340         if(filteredBI.isValid()) {
1341           logln("Testing:");
1342           filteredBI->setText(text);
1343           TEST_ASSERT(84 == filteredBI->next());
1344           TEST_ASSERT(90 == filteredBI->next());
1345           TEST_ASSERT(278 == filteredBI->next());
1346           filteredBI->first();
1347           prtbrks(filteredBI.getAlias(), text, *this);
1348         }
1349     }
1350   }
1351 
1352 
1353   {
1354     logln("Constructing English builder\n");
1355     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1356     TEST_ASSERT_SUCCESS(status);
1357 
1358     logln("Constructing base BI\n");
1359     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1360     TEST_ASSERT_SUCCESS(status);
1361 
1362     if (U_SUCCESS(status)) {
1363         logln("Building new BI\n");
1364         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1365         TEST_ASSERT_SUCCESS(status);
1366 
1367         if(filteredBI.isValid()) {
1368           logln("Testing:");
1369           filteredBI->setText(text);
1370           TEST_ASSERT(84 == filteredBI->next());
1371           TEST_ASSERT(278 == filteredBI->next());
1372           filteredBI->first();
1373           prtbrks(filteredBI.getAlias(), text, *this);
1374         }
1375     }
1376   }
1377 
1378   // reenable once french is in
1379   {
1380     logln("Constructing French builder");
1381     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1382     TEST_ASSERT_SUCCESS(status);
1383 
1384     logln("Constructing base BI\n");
1385     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1386     TEST_ASSERT_SUCCESS(status);
1387 
1388     if (U_SUCCESS(status)) {
1389         logln("Building new BI\n");
1390         frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1391         TEST_ASSERT_SUCCESS(status);
1392     }
1393 
1394     if(frenchBI.isValid()) {
1395       logln("Testing:");
1396       UnicodeString frText("C'est MM. Duval.");
1397       frenchBI->setText(frText);
1398       TEST_ASSERT(16 == frenchBI->next());
1399       TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1400       frenchBI->first();
1401       prtbrks(frenchBI.getAlias(), frText, *this);
1402       logln("Testing against English:");
1403       filteredBI->setText(frText);
1404       TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1405       TEST_ASSERT(16 == filteredBI->next());
1406       TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1407       filteredBI->first();
1408       prtbrks(filteredBI.getAlias(), frText, *this);
1409 
1410       // Verify ==
1411       TEST_ASSERT_TRUE(*frenchBI   == *frenchBI);
1412       TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1413       TEST_ASSERT_TRUE(*frenchBI   != *filteredBI);
1414     } else {
1415       dataerrln("French BI: not valid.");
1416 	}
1417   }
1418 
1419 #else
1420   logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1421 #endif
1422 }
1423 
1424 //---------------------------------------------
1425 // runIndexedTest
1426 //---------------------------------------------
1427 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1428 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1429 {
1430     if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1431     switch (index) {
1432      //   case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1433 #if !UCONFIG_NO_FILE_IO
1434         case  0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1435         case  1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1436         case  2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1437         case  3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1438         case  4: name = "TestIteration"; if (exec) TestIteration(); break;
1439 #else
1440         case  0: case  1: case  2: case  3: case  4: name = "skip"; break;
1441 #endif
1442         case  5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1443         case  6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1444         case  7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1445         case  8: name = "TestBug2190"; if (exec) TestBug2190(); break;
1446 #if !UCONFIG_NO_FILE_IO
1447         case  9: name = "TestRegistration"; if (exec) TestRegistration(); break;
1448         case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1449         case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1450         case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1451         case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
1452 #else
1453         case  9: case 10: case 11: case 12: case 13: name = "skip"; break;
1454 #endif
1455         case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break;
1456 
1457 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
1458     case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBreakIteratorBuilder(); break;
1459 #else
1460     case 15: name="skip"; break;
1461 #endif
1462         default: name = ""; break; // needed to end loop
1463     }
1464 }
1465 
1466 //---------------------------------------------
1467 //Internal subroutines
1468 //---------------------------------------------
1469 
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1470 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1471      logln((UnicodeString)"testIsBoundary():");
1472         int32_t p = 0;
1473         UBool isB;
1474         for (int32_t i = 0; i < text.length(); i++) {
1475             isB = bi.isBoundary(i);
1476             logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1477 
1478             if (i == boundaries[p]) {
1479                 if (!isB)
1480                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1481                 p++;
1482             }
1483             else {
1484                 if (isB)
1485                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1486             }
1487         }
1488 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1489 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1490     UnicodeString selected;
1491     UnicodeString expected=CharsToUnicodeString(expectedString);
1492 
1493     if(gotoffset != expectedOffset)
1494          errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1495     if(start <= gotoffset){
1496         testString.extractBetween(start, gotoffset, selected);
1497     }
1498     else{
1499         testString.extractBetween(gotoffset, start, selected);
1500     }
1501     if(selected.compare(expected) != 0)
1502          errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1503     else
1504         logln(prettify("****selected \"" + selected + "\""));
1505 }
1506 
1507 //---------------------------------------------
1508 //RBBIWithProtectedFunctions class functions
1509 //---------------------------------------------
1510 
RBBIWithProtectedFunctions(RBBIDataHeader * data,UErrorCode & status)1511 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
1512     : RuleBasedBreakIterator(data, status)
1513 {
1514 }
1515 
RBBIWithProtectedFunctions(const RBBIDataHeader * data,enum EDontAdopt,UErrorCode & status)1516 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
1517     : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
1518 {
1519 }
1520 
1521 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1522