1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 1999-2016, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  ********************************************************************
7  *   Date        Name        Description
8  *   12/14/99    Madhu        Creation.
9  *   01/12/2000  Madhu        updated for changed API
10  ********************************************************************/
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_BREAK_ITERATION
15 
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
27 #include "cmemory.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
31 #endif
32 /**
33  * API Test the RuleBasedBreakIterator class
34  */
35 
36 
37 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
38 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
39 
40 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
41     errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
42 
TestCloneEquals()43 void RBBIAPITest::TestCloneEquals()
44 {
45 
46     UErrorCode status=U_ZERO_ERROR;
47     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48     RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
49     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
50     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
51     if(U_FAILURE(status)){
52         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
53         return;
54     }
55 
56 
57     UnicodeString testString="Testing word break iterators's clone() and equals()";
58     bi1->setText(testString);
59     bi2->setText(testString);
60     biequal->setText(testString);
61 
62     bi3->setText("hello");
63 
64     logln((UnicodeString)"Testing equals()");
65 
66     logln((UnicodeString)"Testing == and !=");
67     UBool b = (*bi1 != *biequal);
68     b |= *bi1 == *bi2;
69     b |= *bi1 == *bi3;
70     if (b) {
71         errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__);
72     }
73 
74     if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
75         errln("%s:%d ERROR:2 RBBI's == and != operator  failed.", __FILE__, __LINE__);
76 
77 
78     // Quick test of RulesBasedBreakIterator assignment -
79     // Check that
80     //    two different iterators are !=
81     //    they are == after assignment
82     //    source and dest iterator produce the same next() after assignment.
83     //    deleting one doesn't disable the other.
84     logln("Testing assignment");
85     RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
86     if(U_FAILURE(status)){
87         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
88         return;
89     }
90 
91     RuleBasedBreakIterator biDefault, biDefault2;
92     if(U_FAILURE(status)){
93         errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__);
94         return;
95     }
96     if (biDefault == *bix) {
97         errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__);
98         return;
99     }
100     if (biDefault != biDefault2) {
101         errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__);
102         return;
103     }
104 
105 
106     UnicodeString   HelloString("Hello Kitty");
107     bix->setText(HelloString);
108     if (*bix == *bi2) {
109         errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__);
110     }
111     *bix = *bi2;
112     if (*bix != *bi2) {
113         errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__);
114     }
115 
116     int bixnext = bix->next();
117     int bi2next = bi2->next();
118     if (! (bixnext == bi2next && bixnext == 7)) {
119         errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__);
120     }
121     delete bix;
122     if (bi2->next() != 8) {
123         errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__);
124     }
125 
126 
127 
128     logln((UnicodeString)"Testing clone()");
129     RuleBasedBreakIterator* bi1clone = dynamic_cast<RuleBasedBreakIterator *>(bi1->clone());
130     RuleBasedBreakIterator* bi2clone = dynamic_cast<RuleBasedBreakIterator *>(bi2->clone());
131 
132     if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
133       *bi1clone == *bi3 || *bi1clone == *bi2)
134         errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__);
135 
136     if(*bi2clone == *bi1 || *bi2clone == *biequal ||
137        *bi2clone == *bi3 || *bi2clone != *bi2)
138         errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__);
139 
140     if(bi1->getText() != bi1clone->getText()   ||
141        bi2clone->getText() != bi2->getText()   ||
142        *bi2clone == *bi1clone )
143         errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__);
144 
145     delete bi1clone;
146     delete bi2clone;
147     delete bi1;
148     delete bi3;
149     delete bi2;
150     delete biequal;
151 }
152 
TestBoilerPlate()153 void RBBIAPITest::TestBoilerPlate()
154 {
155     UErrorCode status = U_ZERO_ERROR;
156     BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
157     BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
158     if (U_FAILURE(status)) {
159         errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
160         return;
161     }
162     if(*a!=*b){
163         errln("Failed: boilerplate method operator!= does not return correct results");
164     }
165     // Japanese word break iterators are identical to root with
166     // a dictionary-based break iterator
167     BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
168     BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
169     if(c && d){
170         if(*c!=*d){
171             errln("Failed: boilerplate method operator== does not return correct results");
172         }
173     }else{
174         errln("creation of break iterator failed");
175     }
176     delete a;
177     delete b;
178     delete c;
179     delete d;
180 }
181 
TestgetRules()182 void RBBIAPITest::TestgetRules()
183 {
184     UErrorCode status=U_ZERO_ERROR;
185 
186     LocalPointer<RuleBasedBreakIterator> bi1(
187             (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status), status);
188     LocalPointer<RuleBasedBreakIterator> bi2(
189             (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status), status);
190     if(U_FAILURE(status)){
191         errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status));
192         return;
193     }
194 
195     logln((UnicodeString)"Testing getRules()");
196 
197     UnicodeString text(u"Hello there");
198     bi1->setText(text);
199 
200     LocalPointer <RuleBasedBreakIterator> bi3((RuleBasedBreakIterator*)bi1->clone());
201 
202     UnicodeString temp=bi1->getRules();
203     UnicodeString temp2=bi2->getRules();
204     UnicodeString temp3=bi3->getRules();
205     if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
206         errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__);
207 
208     RuleBasedBreakIterator bi4;   // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
209     if (!bi4.getRules().isEmpty()) {
210         errln("%s:%d Empty string expected.", __FILE__, __LINE__);
211     }
212 }
213 
TestHashCode()214 void RBBIAPITest::TestHashCode()
215 {
216     UErrorCode status=U_ZERO_ERROR;
217     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
218     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
219     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
220     if(U_FAILURE(status)){
221         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
222         delete bi1;
223         delete bi2;
224         delete bi3;
225         return;
226     }
227 
228 
229     logln((UnicodeString)"Testing hashCode()");
230 
231     bi1->setText((UnicodeString)"Hash code");
232     bi2->setText((UnicodeString)"Hash code");
233     bi3->setText((UnicodeString)"Hash code");
234 
235     RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
236     RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
237 
238     if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
239         bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
240         errln((UnicodeString)"ERROR: identical objects have different hashcodes");
241 
242     if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
243         bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
244         errln((UnicodeString)"ERROR: different objects have same hashcodes");
245 
246     delete bi1clone;
247     delete bi2clone;
248     delete bi1;
249     delete bi2;
250     delete bi3;
251 
252 }
TestGetSetAdoptText()253 void RBBIAPITest::TestGetSetAdoptText()
254 {
255     logln((UnicodeString)"Testing getText setText ");
256     IcuTestErrorCode status(*this, "TestGetSetAdoptText");
257     UnicodeString str1="first string.";
258     UnicodeString str2="Second string.";
259     LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
260     LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
261     if(status.isFailure()){
262         errcheckln(status, "Fail : in construction - %s", status.errorName());
263             return;
264     }
265 
266 
267     CharacterIterator* text1= new StringCharacterIterator(str1);
268     CharacterIterator* text1Clone = text1->clone();
269     CharacterIterator* text2= new StringCharacterIterator(str2);
270     CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
271 
272     wordIter1->setText(str1);
273     CharacterIterator *tci = &wordIter1->getText();
274     UnicodeString      tstr;
275     tci->getText(tstr);
276     TEST_ASSERT(tstr == str1);
277     if(wordIter1->current() != 0)
278         errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
279 
280     wordIter1->next(2);
281 
282     wordIter1->setText(str2);
283     if(wordIter1->current() != 0)
284         errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
285 
286 
287     charIter1->adoptText(text1Clone);
288     TEST_ASSERT(wordIter1->getText() != charIter1->getText());
289     tci = &wordIter1->getText();
290     tci->getText(tstr);
291     TEST_ASSERT(tstr == str2);
292     tci = &charIter1->getText();
293     tci->getText(tstr);
294     TEST_ASSERT(tstr == str1);
295 
296 
297     LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
298     rb->adoptText(text1);
299     if(rb->getText() != *text1)
300         errln((UnicodeString)"ERROR:1 error in adoptText ");
301     rb->adoptText(text2);
302     if(rb->getText() != *text2)
303         errln((UnicodeString)"ERROR:2 error in adoptText ");
304 
305     // Adopt where iterator range is less than the entire orignal source string.
306     //   (With the change of the break engine to working with UText internally,
307     //    CharacterIterators starting at positions other than zero are not supported)
308     rb->adoptText(text3);
309     TEST_ASSERT(rb->preceding(2) == 0);
310     TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
311     //if(rb->preceding(2) != 3) {
312     //    errln((UnicodeString)"ERROR:3 error in adoptText ");
313     //}
314     //if(rb->following(11) != BreakIterator::DONE) {
315     //    errln((UnicodeString)"ERROR:4 error in adoptText ");
316     //}
317 
318     // UText API
319     //
320     //   Quick test to see if UText is working at all.
321     //
322     const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
323     const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
324     //                012345678901
325 
326     status.reset();
327     LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
328     wordIter1->setText(ut.getAlias(), status);
329     TEST_ASSERT_SUCCESS(status);
330 
331     int32_t pos;
332     pos = wordIter1->first();
333     TEST_ASSERT(pos==0);
334     pos = wordIter1->next();
335     TEST_ASSERT(pos==5);
336     pos = wordIter1->next();
337     TEST_ASSERT(pos==6);
338     pos = wordIter1->next();
339     TEST_ASSERT(pos==11);
340     pos = wordIter1->next();
341     TEST_ASSERT(pos==UBRK_DONE);
342 
343     status.reset();
344     LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
345     TEST_ASSERT_SUCCESS(status);
346     wordIter1->setText(ut2.getAlias(), status);
347     TEST_ASSERT_SUCCESS(status);
348 
349     pos = wordIter1->first();
350     TEST_ASSERT(pos==0);
351     pos = wordIter1->next();
352     TEST_ASSERT(pos==3);
353     pos = wordIter1->next();
354     TEST_ASSERT(pos==4);
355 
356     pos = wordIter1->last();
357     TEST_ASSERT(pos==6);
358     pos = wordIter1->previous();
359     TEST_ASSERT(pos==4);
360     pos = wordIter1->previous();
361     TEST_ASSERT(pos==3);
362     pos = wordIter1->previous();
363     TEST_ASSERT(pos==0);
364     pos = wordIter1->previous();
365     TEST_ASSERT(pos==UBRK_DONE);
366 
367     status.reset();
368     UnicodeString sEmpty;
369     LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
370     wordIter1->getUText(gut2.getAlias(), status);
371     TEST_ASSERT_SUCCESS(status);
372     status.reset();
373 }
374 
375 
TestIteration()376 void RBBIAPITest::TestIteration()
377 {
378     // This test just verifies that the API is present.
379     // Testing for correct operation of the break rules happens elsewhere.
380 
381     UErrorCode status=U_ZERO_ERROR;
382     RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
383     if (U_FAILURE(status) || bi == NULL)  {
384         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
385     }
386     delete bi;
387 
388     status=U_ZERO_ERROR;
389     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
390     if (U_FAILURE(status) || bi == NULL)  {
391         errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
392     }
393     delete bi;
394 
395     status=U_ZERO_ERROR;
396     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
397     if (U_FAILURE(status) || bi == NULL)  {
398         errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
399     }
400     delete bi;
401 
402     status=U_ZERO_ERROR;
403     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
404     if (U_FAILURE(status) || bi == NULL)  {
405         errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
406     }
407     delete bi;
408 
409     status=U_ZERO_ERROR;
410     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
411     if (U_FAILURE(status) || bi == NULL)  {
412         errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
413     }
414     delete bi;
415 
416     status=U_ZERO_ERROR;
417     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
418     if (U_FAILURE(status) || bi == NULL)  {
419         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
420         return;   // Skip the rest of these tests.
421     }
422 
423 
424     UnicodeString testString="0123456789";
425     bi->setText(testString);
426 
427     int32_t i;
428     i = bi->first();
429     if (i != 0) {
430         errln("%s:%d Incorrect value from bi->first().  Expected 0, got %d.", __FILE__, __LINE__, i);
431     }
432 
433     i = bi->last();
434     if (i != 10) {
435         errln("%s:%d Incorrect value from bi->last().  Expected 10, got %d", __FILE__, __LINE__, i);
436     }
437 
438     //
439     // Previous
440     //
441     bi->last();
442     i = bi->previous();
443     if (i != 9) {
444         errln("%s:%d Incorrect value from bi->last().  Expected 9, got %d", __FILE__, __LINE__, i);
445     }
446 
447 
448     bi->first();
449     i = bi->previous();
450     if (i != BreakIterator::DONE) {
451         errln("%s:%d Incorrect value from bi->previous().  Expected DONE, got %d", __FILE__, __LINE__, i);
452     }
453 
454     //
455     // next()
456     //
457     bi->first();
458     i = bi->next();
459     if (i != 1) {
460         errln("%s:%d Incorrect value from bi->next().  Expected 1, got %d", __FILE__, __LINE__, i);
461     }
462 
463     bi->last();
464     i = bi->next();
465     if (i != BreakIterator::DONE) {
466         errln("%s:%d Incorrect value from bi->next().  Expected DONE, got %d", __FILE__, __LINE__, i);
467     }
468 
469 
470     //
471     //  current()
472     //
473     bi->first();
474     i = bi->current();
475     if (i != 0) {
476         errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__, __LINE__, i);
477     }
478 
479     bi->next();
480     i = bi->current();
481     if (i != 1) {
482         errln("%s:%d Incorrect value from bi->current().  Expected 1, got %d", __FILE__, __LINE__, i);
483     }
484 
485     bi->last();
486     bi->next();
487     i = bi->current();
488     if (i != 10) {
489         errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__, __LINE__, i);
490     }
491 
492     bi->first();
493     bi->previous();
494     i = bi->current();
495     if (i != 0) {
496         errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__, __LINE__, i);
497     }
498 
499 
500     //
501     // Following()
502     //
503     i = bi->following(4);
504     if (i != 5) {
505         errln("%s:%d Incorrect value from bi->following().  Expected 5, got %d", __FILE__, __LINE__, i);
506     }
507 
508     i = bi->following(9);
509     if (i != 10) {
510         errln("%s:%d Incorrect value from bi->following().  Expected 10, got %d", __FILE__, __LINE__, i);
511     }
512 
513     i = bi->following(10);
514     if (i != BreakIterator::DONE) {
515         errln("%s:%d Incorrect value from bi->following().  Expected DONE, got %d", __FILE__, __LINE__, i);
516     }
517 
518 
519     //
520     // Preceding
521     //
522     i = bi->preceding(4);
523     if (i != 3) {
524         errln("%s:%d Incorrect value from bi->preceding().  Expected 3, got %d", __FILE__, __LINE__, i);
525     }
526 
527     i = bi->preceding(10);
528     if (i != 9) {
529         errln("%s:%d Incorrect value from bi->preceding().  Expected 9, got %d", __FILE__, __LINE__, i);
530     }
531 
532     i = bi->preceding(1);
533     if (i != 0) {
534         errln("%s:%d Incorrect value from bi->preceding().  Expected 0, got %d", __FILE__, __LINE__, i);
535     }
536 
537     i = bi->preceding(0);
538     if (i != BreakIterator::DONE) {
539         errln("%s:%d Incorrect value from bi->preceding().  Expected DONE, got %d", __FILE__, __LINE__, i);
540     }
541 
542 
543     //
544     // isBoundary()
545     //
546     bi->first();
547     if (bi->isBoundary(3) != TRUE) {
548         errln("%s:%d Incorrect value from bi->isBoudary().  Expected TRUE, got FALSE", __FILE__, __LINE__, i);
549     }
550     i = bi->current();
551     if (i != 3) {
552         errln("%s:%d Incorrect value from bi->current().  Expected 3, got %d", __FILE__, __LINE__, i);
553     }
554 
555 
556     if (bi->isBoundary(11) != FALSE) {
557         errln("%s:%d Incorrect value from bi->isBoudary().  Expected FALSE, got TRUE", __FILE__, __LINE__, i);
558     }
559     i = bi->current();
560     if (i != 10) {
561         errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__, __LINE__, i);
562     }
563 
564     //
565     // next(n)
566     //
567     bi->first();
568     i = bi->next(4);
569     if (i != 4) {
570         errln("%s:%d Incorrect value from bi->next().  Expected 4, got %d", __FILE__, __LINE__, i);
571     }
572 
573     i = bi->next(6);
574     if (i != 10) {
575         errln("%s:%d Incorrect value from bi->next().  Expected 10, got %d", __FILE__, __LINE__, i);
576     }
577 
578     bi->first();
579     i = bi->next(11);
580     if (i != BreakIterator::DONE) {
581         errln("%s:%d Incorrect value from bi->next().  Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i);
582     }
583 
584     delete bi;
585 
586 }
587 
588 
589 
590 
591 
592 
TestBuilder()593 void RBBIAPITest::TestBuilder() {
594      UnicodeString rulesString1 = "$Letters = [:L:];\n"
595                                   "$Numbers = [:N:];\n"
596                                   "$Letters+;\n"
597                                   "$Numbers+;\n"
598                                   "[^$Letters $Numbers];\n"
599                                   "!.*;\n";
600      UnicodeString testString1  = "abc123..abc";
601                                 // 01234567890
602      int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
603      UErrorCode status=U_ZERO_ERROR;
604      UParseError    parseError;
605 
606      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
607      if(U_FAILURE(status)) {
608          dataerrln("Fail : in construction - %s", u_errorName(status));
609      } else {
610          bi->setText(testString1);
611          doBoundaryTest(*bi, testString1, bounds1);
612      }
613      delete bi;
614 }
615 
616 
617 //
618 //  TestQuoteGrouping
619 //       Single quotes within rules imply a grouping, so that a modifier
620 //       following the quoted text (* or +) applies to all of the quoted chars.
621 //
TestQuoteGrouping()622 void RBBIAPITest::TestQuoteGrouping() {
623      UnicodeString rulesString1 = "#Here comes the rule...\n"
624                                   "'$@!'*;\n"   //  (\$\@\!)*
625                                   ".;\n";
626 
627      UnicodeString testString1  = "$@!$@!X$@!!X";
628                                 // 0123456789012
629      int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
630      UErrorCode status=U_ZERO_ERROR;
631      UParseError    parseError;
632 
633      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
634      if(U_FAILURE(status)) {
635          dataerrln("Fail : in construction - %s", u_errorName(status));
636      } else {
637          bi->setText(testString1);
638          doBoundaryTest(*bi, testString1, bounds1);
639      }
640      delete bi;
641 }
642 
643 //
644 //  TestRuleStatus
645 //      Test word break rule status constants.
646 //
TestRuleStatus()647 void RBBIAPITest::TestRuleStatus() {
648      UChar str[30];
649      //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
650      // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
651      u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
652               // 012345678901234567  8      9    0
653               //                     Katakana
654                 str, 30);
655      UnicodeString testString1(str);
656      int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
657      int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
658                           UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
659                           UBRK_WORD_IDEO,     UBRK_WORD_NONE};
660 
661      int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
662                           UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
663                           UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
664 
665      UErrorCode status=U_ZERO_ERROR;
666 
667      BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
668      if(U_FAILURE(status)) {
669          errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status));
670      } else {
671          bi->setText(testString1);
672          // First test that the breaks are in the right spots.
673          doBoundaryTest(*bi, testString1, bounds1);
674 
675          // Then go back and check tag values
676          int32_t i = 0;
677          int32_t pos, tag;
678          for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
679              if (pos != bounds1[i]) {
680                  errln("%s:%d FAIL: unexpected word break at postion %d", __FILE__, __LINE__, pos);
681                  break;
682              }
683              tag = bi->getRuleStatus();
684              if (tag < tag_lo[i] || tag >= tag_hi[i]) {
685                  errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos);
686                  break;
687              }
688 
689              // Check that we get the same tag values from getRuleStatusVec()
690              int32_t vec[10];
691              int t = bi->getRuleStatusVec(vec, 10, status);
692              TEST_ASSERT_SUCCESS(status);
693              TEST_ASSERT(t==1);
694              TEST_ASSERT(vec[0] == tag);
695          }
696      }
697      delete bi;
698 
699      // Now test line break status.  This test mostly is to confirm that the status constants
700      //                              are correctly declared in the header.
701      testString1 =   "test line. \n";
702      // break type    s    s     h
703 
704      bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
705      if(U_FAILURE(status)) {
706          errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status));
707      } else {
708          int32_t i = 0;
709          int32_t pos, tag;
710          UBool   success;
711 
712          bi->setText(testString1);
713          pos = bi->current();
714          tag = bi->getRuleStatus();
715          for (i=0; i<3; i++) {
716              switch (i) {
717              case 0:
718                  success = pos==0  && tag==UBRK_LINE_SOFT; break;
719              case 1:
720                  success = pos==5  && tag==UBRK_LINE_SOFT; break;
721              case 2:
722                  success = pos==12 && tag==UBRK_LINE_HARD; break;
723              default:
724                  success = FALSE; break;
725              }
726              if (success == FALSE) {
727                  errln("%s:%d: incorrect line break status or position.  i=%d, pos=%d, tag=%d",
728                      __FILE__, __LINE__, i, pos, tag);
729                  break;
730              }
731              pos = bi->next();
732              tag = bi->getRuleStatus();
733          }
734          if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
735              UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
736              (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
737              errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__);
738          }
739      }
740      delete bi;
741 
742 }
743 
744 
745 //
746 //  TestRuleStatusVec
747 //      Test the vector form of  break rule status.
748 //
TestRuleStatusVec()749 void RBBIAPITest::TestRuleStatusVec() {
750     UnicodeString rulesString(   "[A-N]{100}; \n"
751                                  "[a-w]{200}; \n"
752                                  "[\\p{L}]{300}; \n"
753                                  "[\\p{N}]{400}; \n"
754                                  "[0-5]{500}; \n"
755                                   "!.*;\n", -1, US_INV);
756      UnicodeString testString1  = "Aapz5?";
757      int32_t  statusVals[10];
758      int32_t  numStatuses;
759      int32_t  pos;
760 
761      UErrorCode status=U_ZERO_ERROR;
762      UParseError    parseError;
763 
764      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
765      if (U_FAILURE(status)) {
766          dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
767      } else {
768          bi->setText(testString1);
769 
770          // A
771          pos = bi->next();
772          TEST_ASSERT(pos==1);
773          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
774          TEST_ASSERT_SUCCESS(status);
775          TEST_ASSERT(numStatuses == 2);
776          TEST_ASSERT(statusVals[0] == 100);
777          TEST_ASSERT(statusVals[1] == 300);
778 
779          // a
780          pos = bi->next();
781          TEST_ASSERT(pos==2);
782          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
783          TEST_ASSERT_SUCCESS(status);
784          TEST_ASSERT(numStatuses == 2);
785          TEST_ASSERT(statusVals[0] == 200);
786          TEST_ASSERT(statusVals[1] == 300);
787 
788          // p
789          pos = bi->next();
790          TEST_ASSERT(pos==3);
791          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
792          TEST_ASSERT_SUCCESS(status);
793          TEST_ASSERT(numStatuses == 2);
794          TEST_ASSERT(statusVals[0] == 200);
795          TEST_ASSERT(statusVals[1] == 300);
796 
797          // z
798          pos = bi->next();
799          TEST_ASSERT(pos==4);
800          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
801          TEST_ASSERT_SUCCESS(status);
802          TEST_ASSERT(numStatuses == 1);
803          TEST_ASSERT(statusVals[0] == 300);
804 
805          // 5
806          pos = bi->next();
807          TEST_ASSERT(pos==5);
808          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
809          TEST_ASSERT_SUCCESS(status);
810          TEST_ASSERT(numStatuses == 2);
811          TEST_ASSERT(statusVals[0] == 400);
812          TEST_ASSERT(statusVals[1] == 500);
813 
814          // ?
815          pos = bi->next();
816          TEST_ASSERT(pos==6);
817          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
818          TEST_ASSERT_SUCCESS(status);
819          TEST_ASSERT(numStatuses == 1);
820          TEST_ASSERT(statusVals[0] == 0);
821 
822          //
823          //  Check buffer overflow error handling.   Char == A
824          //
825          bi->first();
826          pos = bi->next();
827          TEST_ASSERT(pos==1);
828          memset(statusVals, -1, sizeof(statusVals));
829          numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
830          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
831          TEST_ASSERT(numStatuses == 2);
832          TEST_ASSERT(statusVals[0] == -1);
833 
834          status = U_ZERO_ERROR;
835          memset(statusVals, -1, sizeof(statusVals));
836          numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
837          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
838          TEST_ASSERT(numStatuses == 2);
839          TEST_ASSERT(statusVals[0] == 100);
840          TEST_ASSERT(statusVals[1] == -1);
841 
842          status = U_ZERO_ERROR;
843          memset(statusVals, -1, sizeof(statusVals));
844          numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
845          TEST_ASSERT_SUCCESS(status);
846          TEST_ASSERT(numStatuses == 2);
847          TEST_ASSERT(statusVals[0] == 100);
848          TEST_ASSERT(statusVals[1] == 300);
849          TEST_ASSERT(statusVals[2] == -1);
850      }
851      delete bi;
852 
853 }
854 
855 //
856 //   Bug 2190 Regression test.   Builder crash on rule consisting of only a
857 //                               $variable reference
TestBug2190()858 void RBBIAPITest::TestBug2190() {
859      UnicodeString rulesString1 = "$aaa = abcd;\n"
860                                   "$bbb = $aaa;\n"
861                                   "$bbb;\n";
862      UnicodeString testString1  = "abcdabcd";
863                                 // 01234567890
864      int32_t bounds1[] = {0, 4, 8};
865      UErrorCode status=U_ZERO_ERROR;
866      UParseError    parseError;
867 
868      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
869      if(U_FAILURE(status)) {
870          dataerrln("Fail : in construction - %s", u_errorName(status));
871      } else {
872          bi->setText(testString1);
873          doBoundaryTest(*bi, testString1, bounds1);
874      }
875      delete bi;
876 }
877 
878 
TestRegistration()879 void RBBIAPITest::TestRegistration() {
880 #if !UCONFIG_NO_SERVICE
881     UErrorCode status = U_ZERO_ERROR;
882     BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
883     // ok to not delete these if we exit because of error?
884     BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
885     BreakIterator* root_word = BreakIterator::createWordInstance("", status);
886     BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
887 
888     if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
889         dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
890 
891         delete ja_word;
892         delete ja_char;
893         delete root_word;
894         delete root_char;
895 
896         return;
897     }
898 
899     URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
900     {
901 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
902         if (ja_word && *ja_word == *root_word) {
903             errln("japan not different from root");
904         }
905 #endif
906     }
907 
908     {
909         BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
910         UBool fail = TRUE;
911         if(result){
912             fail = *result != *ja_word;
913         }
914         delete result;
915         if (fail) {
916             errln("bad result for xx_XX/word");
917         }
918     }
919 
920     {
921         BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
922         UBool fail = TRUE;
923         if(result){
924             fail = *result != *ja_char;
925         }
926         delete result;
927         if (fail) {
928             errln("bad result for ja_JP/char");
929         }
930     }
931 
932     {
933         BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
934         UBool fail = TRUE;
935         if(result){
936             fail = *result != *root_char;
937         }
938         delete result;
939         if (fail) {
940             errln("bad result for xx_XX/char");
941         }
942     }
943 
944     {
945         StringEnumeration* avail = BreakIterator::getAvailableLocales();
946         UBool found = FALSE;
947         const UnicodeString* p;
948         while ((p = avail->snext(status))) {
949             if (p->compare("xx") == 0) {
950                 found = TRUE;
951                 break;
952             }
953         }
954         delete avail;
955         if (!found) {
956             errln("did not find test locale");
957         }
958     }
959 
960     {
961         UBool unreg = BreakIterator::unregister(key, status);
962         if (!unreg) {
963             errln("unable to unregister");
964         }
965     }
966 
967     {
968         BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
969         BreakIterator* root = BreakIterator::createWordInstance("", status);
970         UBool fail = TRUE;
971         if(root){
972           fail = *root != *result;
973         }
974         delete root;
975         delete result;
976         if (fail) {
977             errln("did not get root break");
978         }
979     }
980 
981     {
982         StringEnumeration* avail = BreakIterator::getAvailableLocales();
983         UBool found = FALSE;
984         const UnicodeString* p;
985         while ((p = avail->snext(status))) {
986             if (p->compare("xx") == 0) {
987                 found = TRUE;
988                 break;
989             }
990         }
991         delete avail;
992         if (found) {
993             errln("found test locale");
994         }
995     }
996 
997     {
998         int32_t count;
999         UBool   foundLocale = FALSE;
1000         const Locale *avail = BreakIterator::getAvailableLocales(count);
1001         for (int i=0; i<count; i++) {
1002             if (avail[i] == Locale::getEnglish()) {
1003                 foundLocale = TRUE;
1004                 break;
1005             }
1006         }
1007         if (foundLocale == FALSE) {
1008             errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1009         }
1010     }
1011 
1012 
1013     // ja_word was adopted by factory
1014     delete ja_char;
1015     delete root_word;
1016     delete root_char;
1017 #endif
1018 }
1019 
RoundtripRule(const char * dataFile)1020 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1021     UErrorCode status = U_ZERO_ERROR;
1022     UParseError parseError;
1023     parseError.line = 0;
1024     parseError.offset = 0;
1025     LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1026     uint32_t length;
1027     const UChar *builtSource;
1028     const uint8_t *rbbiRules;
1029     const uint8_t *builtRules;
1030 
1031     if (U_FAILURE(status)) {
1032         errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1033         return;
1034     }
1035 
1036     builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1037     builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1038     LocalPointer<RuleBasedBreakIterator> brkItr (new RuleBasedBreakIterator(builtSource, parseError, status));
1039     if (U_FAILURE(status)) {
1040         errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1041                 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1042         errln(UnicodeString(builtSource));
1043         return;
1044     };
1045     rbbiRules = brkItr->getBinaryRules(length);
1046     logln("Comparing \"%s\" len=%d", dataFile, length);
1047     if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1048         errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1049         return;
1050     }
1051 }
1052 
TestRoundtripRules()1053 void RBBIAPITest::TestRoundtripRules() {
1054     RoundtripRule("word");
1055     RoundtripRule("title");
1056     RoundtripRule("sent");
1057     RoundtripRule("line");
1058     RoundtripRule("char");
1059     if (!quick) {
1060         RoundtripRule("word_POSIX");
1061     }
1062 }
1063 
1064 
1065 // Check getBinaryRules() and construction of a break iterator from those rules.
1066 
TestGetBinaryRules()1067 void RBBIAPITest::TestGetBinaryRules() {
1068     UErrorCode status=U_ZERO_ERROR;
1069     LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1070     if (U_FAILURE(status)) {
1071         dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
1072         return;
1073     }
1074     RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1075     if (rbbi == NULL) {
1076         dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1077         return;
1078     }
1079 
1080     // Check that the new line break iterator is nominally functional.
1081     UnicodeString helloWorld("Hello, World!");
1082     rbbi->setText(helloWorld);
1083     int n = 0;
1084     while (bi->next() != UBRK_DONE) {
1085         ++n;
1086     }
1087     TEST_ASSERT(n == 2);
1088 
1089     // Extract the binary rules as a uint8_t blob.
1090     uint32_t ruleLength;
1091     const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1092     TEST_ASSERT(ruleLength > 0);
1093     TEST_ASSERT(binRules != NULL);
1094 
1095     // Clone the binary rules, and create a break iterator from that.
1096     // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1097     uint8_t *clonedRules = new uint8_t[ruleLength];
1098     memcpy(clonedRules, binRules, ruleLength);
1099     RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1100     TEST_ASSERT_SUCCESS(status);
1101 
1102     // Check that the cloned line break iterator is nominally alive.
1103     clonedBI.setText(helloWorld);
1104     n = 0;
1105     while (clonedBI.next() != UBRK_DONE) {
1106         ++n;
1107     }
1108     TEST_ASSERT(n == 2);
1109 
1110     delete[] clonedRules;
1111 }
1112 
1113 
TestRefreshInputText()1114 void RBBIAPITest::TestRefreshInputText() {
1115     /*
1116      *  RefreshInput changes out the input of a Break Iterator without
1117      *    changing anything else in the iterator's state.  Used with Java JNI,
1118      *    when Java moves the underlying string storage.   This test
1119      *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1120      *    The right set of boundaries should still be found.
1121      */
1122     UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
1123     UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
1124     UErrorCode status = U_ZERO_ERROR;
1125     UText ut1 = UTEXT_INITIALIZER;
1126     UText ut2 = UTEXT_INITIALIZER;
1127     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1128     TEST_ASSERT_SUCCESS(status);
1129 
1130     utext_openUChars(&ut1, testStr, -1, &status);
1131     TEST_ASSERT_SUCCESS(status);
1132 
1133     if (U_SUCCESS(status)) {
1134         bi->setText(&ut1, status);
1135         TEST_ASSERT_SUCCESS(status);
1136 
1137         /* Line boundaries will occur before each letter in the original string */
1138         TEST_ASSERT(1 == bi->next());
1139         TEST_ASSERT(3 == bi->next());
1140 
1141         /* Move the string, kill the original string.  */
1142         u_strcpy(movedStr, testStr);
1143         u_memset(testStr, 0x20, u_strlen(testStr));
1144         utext_openUChars(&ut2, movedStr, -1, &status);
1145         TEST_ASSERT_SUCCESS(status);
1146         RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1147         TEST_ASSERT_SUCCESS(status);
1148         TEST_ASSERT(bi == returnedBI);
1149 
1150         /* Find the following matches, now working in the moved string. */
1151         TEST_ASSERT(5 == bi->next());
1152         TEST_ASSERT(7 == bi->next());
1153         TEST_ASSERT(8 == bi->next());
1154         TEST_ASSERT(UBRK_DONE == bi->next());
1155 
1156         utext_close(&ut1);
1157         utext_close(&ut2);
1158     }
1159     delete bi;
1160 
1161 }
1162 
1163 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1164 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1165   static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1166   it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1167 
1168   int32_t *pos = new int32_t[ustr.length()];
1169   int32_t posCount = 0;
1170 
1171   // calculate breaks up front, so we can print out
1172   // sans any debugging
1173   for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1174     pos[posCount++] = n;
1175     if(posCount>=ustr.length()) {
1176       it.errln("brk count exceeds string length!");
1177       return;
1178     }
1179   }
1180   UnicodeString out;
1181   out.append((UChar)CHSTR);
1182   int32_t prev = 0;
1183   for(int32_t i=0;i<posCount;i++) {
1184     int32_t n=pos[i];
1185     out.append(ustr.tempSubString(prev,n-prev));
1186     out.append((UChar)PILCROW);
1187     prev=n;
1188   }
1189   out.append(ustr.tempSubString(prev,ustr.length()-prev));
1190   out.append((UChar)CHEND);
1191   it.logln(out);
1192 
1193   out.remove();
1194   for(int32_t i=0;i<posCount;i++) {
1195     char tmp[100];
1196     sprintf(tmp,"%d ",pos[i]);
1197     out.append(UnicodeString(tmp));
1198   }
1199   it.logln(out);
1200   delete [] pos;
1201 }
1202 #endif
1203 
TestFilteredBreakIteratorBuilder()1204 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1205 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1206   UErrorCode status = U_ZERO_ERROR;
1207   LocalPointer<FilteredBreakIteratorBuilder> builder;
1208   LocalPointer<BreakIterator> baseBI;
1209   LocalPointer<BreakIterator> filteredBI;
1210   LocalPointer<BreakIterator> frenchBI;
1211 
1212   const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1213   const UnicodeString ABBR_MR("Mr.");
1214   const UnicodeString ABBR_CAPT("Capt.");
1215 
1216   {
1217     logln("Constructing empty builder\n");
1218     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1219     TEST_ASSERT_SUCCESS(status);
1220 
1221     logln("Constructing base BI\n");
1222     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1223     TEST_ASSERT_SUCCESS(status);
1224 
1225 	logln("Building new BI\n");
1226     filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1227     TEST_ASSERT_SUCCESS(status);
1228 
1229 	if (U_SUCCESS(status)) {
1230         logln("Testing:");
1231         filteredBI->setText(text);
1232         TEST_ASSERT(20 == filteredBI->next()); // Mr.
1233         TEST_ASSERT(84 == filteredBI->next()); // recovered.
1234         TEST_ASSERT(90 == filteredBI->next()); // Capt.
1235         TEST_ASSERT(181 == filteredBI->next()); // Mr.
1236         TEST_ASSERT(278 == filteredBI->next()); // charge.
1237         filteredBI->first();
1238         prtbrks(filteredBI.getAlias(), text, *this);
1239     }
1240   }
1241 
1242   {
1243     logln("Constructing empty builder\n");
1244     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1245     TEST_ASSERT_SUCCESS(status);
1246 
1247     if (U_SUCCESS(status)) {
1248         logln("Adding Mr. as an exception\n");
1249         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1250         TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1251         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1252         TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1253         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1254         TEST_ASSERT_SUCCESS(status);
1255 
1256         logln("Constructing base BI\n");
1257         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1258         TEST_ASSERT_SUCCESS(status);
1259 
1260         logln("Building new BI\n");
1261         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1262         TEST_ASSERT_SUCCESS(status);
1263 
1264         logln("Testing:");
1265         filteredBI->setText(text);
1266         TEST_ASSERT(84 == filteredBI->next());
1267         TEST_ASSERT(90 == filteredBI->next());// Capt.
1268         TEST_ASSERT(278 == filteredBI->next());
1269         filteredBI->first();
1270         prtbrks(filteredBI.getAlias(), text, *this);
1271     }
1272   }
1273 
1274 
1275   {
1276     logln("Constructing empty builder\n");
1277     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1278     TEST_ASSERT_SUCCESS(status);
1279 
1280     if (U_SUCCESS(status)) {
1281         logln("Adding Mr. and Capt as an exception\n");
1282         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1283         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1284         TEST_ASSERT_SUCCESS(status);
1285 
1286         logln("Constructing base BI\n");
1287         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1288         TEST_ASSERT_SUCCESS(status);
1289 
1290         logln("Building new BI\n");
1291         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1292         TEST_ASSERT_SUCCESS(status);
1293 
1294         logln("Testing:");
1295         filteredBI->setText(text);
1296         TEST_ASSERT(84 == filteredBI->next());
1297         TEST_ASSERT(278 == filteredBI->next());
1298         filteredBI->first();
1299         prtbrks(filteredBI.getAlias(), text, *this);
1300     }
1301   }
1302 
1303 
1304   {
1305     logln("Constructing English builder\n");
1306     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1307     TEST_ASSERT_SUCCESS(status);
1308 
1309     logln("Constructing base BI\n");
1310     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1311     TEST_ASSERT_SUCCESS(status);
1312 
1313     if (U_SUCCESS(status)) {
1314         logln("unsuppressing 'Capt'");
1315         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1316 
1317         logln("Building new BI\n");
1318         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1319         TEST_ASSERT_SUCCESS(status);
1320 
1321         if(filteredBI.isValid()) {
1322           logln("Testing:");
1323           filteredBI->setText(text);
1324           TEST_ASSERT(84 == filteredBI->next());
1325           TEST_ASSERT(90 == filteredBI->next());
1326           TEST_ASSERT(278 == filteredBI->next());
1327           filteredBI->first();
1328           prtbrks(filteredBI.getAlias(), text, *this);
1329         }
1330     }
1331   }
1332 
1333 
1334   {
1335     logln("Constructing English builder\n");
1336     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1337     TEST_ASSERT_SUCCESS(status);
1338 
1339     logln("Constructing base BI\n");
1340     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1341     TEST_ASSERT_SUCCESS(status);
1342 
1343     if (U_SUCCESS(status)) {
1344         logln("Building new BI\n");
1345         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1346         TEST_ASSERT_SUCCESS(status);
1347 
1348         if(filteredBI.isValid()) {
1349           logln("Testing:");
1350           filteredBI->setText(text);
1351           TEST_ASSERT(84 == filteredBI->next());
1352           TEST_ASSERT(278 == filteredBI->next());
1353           filteredBI->first();
1354           prtbrks(filteredBI.getAlias(), text, *this);
1355         }
1356     }
1357   }
1358 
1359   // reenable once french is in
1360   {
1361     logln("Constructing French builder");
1362     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1363     TEST_ASSERT_SUCCESS(status);
1364 
1365     logln("Constructing base BI\n");
1366     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1367     TEST_ASSERT_SUCCESS(status);
1368 
1369     if (U_SUCCESS(status)) {
1370         logln("Building new BI\n");
1371         frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1372         TEST_ASSERT_SUCCESS(status);
1373     }
1374 
1375     if(frenchBI.isValid()) {
1376       logln("Testing:");
1377       UnicodeString frText("C'est MM. Duval.");
1378       frenchBI->setText(frText);
1379       TEST_ASSERT(16 == frenchBI->next());
1380       TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1381       frenchBI->first();
1382       prtbrks(frenchBI.getAlias(), frText, *this);
1383       logln("Testing against English:");
1384       filteredBI->setText(frText);
1385       TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1386       TEST_ASSERT(16 == filteredBI->next());
1387       TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1388       filteredBI->first();
1389       prtbrks(filteredBI.getAlias(), frText, *this);
1390 
1391       // Verify ==
1392       TEST_ASSERT_TRUE(*frenchBI   == *frenchBI);
1393       TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1394       TEST_ASSERT_TRUE(*frenchBI   != *filteredBI);
1395     } else {
1396       dataerrln("French BI: not valid.");
1397 	}
1398   }
1399 
1400 #else
1401   logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1402 #endif
1403 }
1404 
1405 //---------------------------------------------
1406 // runIndexedTest
1407 //---------------------------------------------
1408 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1409 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1410 {
1411     if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1412     TESTCASE_AUTO_BEGIN;
1413 #if !UCONFIG_NO_FILE_IO
1414     TESTCASE_AUTO(TestCloneEquals);
1415     TESTCASE_AUTO(TestgetRules);
1416     TESTCASE_AUTO(TestHashCode);
1417     TESTCASE_AUTO(TestGetSetAdoptText);
1418     TESTCASE_AUTO(TestIteration);
1419 #endif
1420     TESTCASE_AUTO(TestBuilder);
1421     TESTCASE_AUTO(TestQuoteGrouping);
1422     TESTCASE_AUTO(TestRuleStatusVec);
1423     TESTCASE_AUTO(TestBug2190);
1424 #if !UCONFIG_NO_FILE_IO
1425     TESTCASE_AUTO(TestRegistration);
1426     TESTCASE_AUTO(TestBoilerPlate);
1427     TESTCASE_AUTO(TestRuleStatus);
1428     TESTCASE_AUTO(TestRoundtripRules);
1429     TESTCASE_AUTO(TestGetBinaryRules);
1430 #endif
1431     TESTCASE_AUTO(TestRefreshInputText);
1432 #if !UCONFIG_NO_BREAK_ITERATION
1433     TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1434 #endif
1435     TESTCASE_AUTO_END;
1436 }
1437 
1438 
1439 //---------------------------------------------
1440 //Internal subroutines
1441 //---------------------------------------------
1442 
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1443 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1444      logln((UnicodeString)"testIsBoundary():");
1445         int32_t p = 0;
1446         UBool isB;
1447         for (int32_t i = 0; i < text.length(); i++) {
1448             isB = bi.isBoundary(i);
1449             logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1450 
1451             if (i == boundaries[p]) {
1452                 if (!isB)
1453                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1454                 p++;
1455             }
1456             else {
1457                 if (isB)
1458                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1459             }
1460         }
1461 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1462 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1463     UnicodeString selected;
1464     UnicodeString expected=CharsToUnicodeString(expectedString);
1465 
1466     if(gotoffset != expectedOffset)
1467          errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1468     if(start <= gotoffset){
1469         testString.extractBetween(start, gotoffset, selected);
1470     }
1471     else{
1472         testString.extractBetween(gotoffset, start, selected);
1473     }
1474     if(selected.compare(expected) != 0)
1475          errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1476     else
1477         logln(prettify("****selected \"" + selected + "\""));
1478 }
1479 
1480 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1481