1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *****************************************************************************
5 * Copyright (C) 2001-2016, International Business Machines orporation
6 * and others. All Rights Reserved.
7 ****************************************************************************/
8 
9 #include "unicode/utypes.h"
10 
11 #if !UCONFIG_NO_COLLATION
12 
13 #include "srchtest.h"
14 #if !UCONFIG_NO_BREAK_ITERATION
15 #include "../cintltst/usrchdat.c"
16 #endif
17 #include "unicode/stsearch.h"
18 #include "unicode/ustring.h"
19 #include "unicode/schriter.h"
20 #include "cmemory.h"
21 #include <string.h>
22 #include <stdio.h>
23 
24 // private definitions -----------------------------------------------------
25 
26 #define CASE(id,test)                 \
27     case id:                          \
28         name = #test;                 \
29         if (exec) {                   \
30             logln(#test "---");       \
31             logln((UnicodeString)""); \
32             if(areBroken) {           \
33                   dataerrln(__FILE__ " cannot test - failed to create collator.");  \
34             } else {                  \
35                 test();               \
36             }                         \
37         }                             \
38         break;
39 
40 // public contructors and destructors --------------------------------------
41 
StringSearchTest()42 StringSearchTest::StringSearchTest()
43 #if !UCONFIG_NO_BREAK_ITERATION
44 :
45     m_en_wordbreaker_(NULL), m_en_characterbreaker_(NULL)
46 #endif
47 {
48 #if !UCONFIG_NO_BREAK_ITERATION
49     UErrorCode    status = U_ZERO_ERROR;
50 
51     m_en_us_ = (RuleBasedCollator *)Collator::createInstance("en_US", status);
52     m_fr_fr_ = (RuleBasedCollator *)Collator::createInstance("fr_FR", status);
53     m_de_    = (RuleBasedCollator *)Collator::createInstance("de_DE", status);
54     m_es_    = (RuleBasedCollator *)Collator::createInstance("es_ES", status);
55     if(U_FAILURE(status)) {
56       delete m_en_us_;
57       delete m_fr_fr_;
58       delete m_de_;
59       delete m_es_;
60       m_en_us_ = 0;
61       m_fr_fr_ = 0;
62       m_de_ = 0;
63       m_es_ = 0;
64       errln("Collator creation failed with %s", u_errorName(status));
65       return;
66     }
67 
68 
69     UnicodeString rules;
70     rules.setTo(((RuleBasedCollator *)m_de_)->getRules());
71     UChar extrarules[128];
72     u_unescape(EXTRACOLLATIONRULE, extrarules, 128);
73     rules.append(extrarules, u_strlen(extrarules));
74     delete m_de_;
75 
76     m_de_ = new RuleBasedCollator(rules, status);
77 
78     rules.setTo(((RuleBasedCollator *)m_es_)->getRules());
79     rules.append(extrarules, u_strlen(extrarules));
80 
81     delete m_es_;
82 
83     m_es_ = new RuleBasedCollator(rules, status);
84 
85 #if !UCONFIG_NO_BREAK_ITERATION
86     m_en_wordbreaker_      = BreakIterator::createWordInstance(
87                                                     Locale::getEnglish(), status);
88     m_en_characterbreaker_ = BreakIterator::createCharacterInstance(
89                                                     Locale::getEnglish(), status);
90 #endif
91 #endif
92 }
93 
~StringSearchTest()94 StringSearchTest::~StringSearchTest()
95 {
96 #if !UCONFIG_NO_BREAK_ITERATION
97     delete m_en_us_;
98     delete m_fr_fr_;
99     delete m_de_;
100     delete m_es_;
101 #if !UCONFIG_NO_BREAK_ITERATION
102     delete m_en_wordbreaker_;
103     delete m_en_characterbreaker_;
104 #endif
105 #endif
106 }
107 
108 // public methods ----------------------------------------------------------
109 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)110 void StringSearchTest::runIndexedTest(int32_t index, UBool exec,
111                                       const char* &name, char* )
112 {
113 #if !UCONFIG_NO_BREAK_ITERATION
114     UBool areBroken = FALSE;
115     if (m_en_us_ == NULL && m_fr_fr_ == NULL && m_de_ == NULL &&
116         m_es_ == NULL && m_en_wordbreaker_ == NULL &&
117         m_en_characterbreaker_ == NULL && exec) {
118         areBroken = TRUE;
119     }
120 
121     switch (index) {
122 #if !UCONFIG_NO_FILE_IO
123         CASE(0, TestOpenClose)
124 #endif
125         CASE(1, TestInitialization)
126         CASE(2, TestBasic)
127         CASE(3, TestNormExact)
128         CASE(4, TestStrength)
129 #if UCONFIG_NO_BREAK_ITERATION
130     case 5:
131         name = "TestBreakIterator";
132         break;
133 #else
134         CASE(5, TestBreakIterator)
135 #endif
136         CASE(6, TestVariable)
137         CASE(7, TestOverlap)
138         CASE(8, TestCollator)
139         CASE(9, TestPattern)
140         CASE(10, TestText)
141         CASE(11, TestCompositeBoundaries)
142         CASE(12, TestGetSetOffset)
143         CASE(13, TestGetSetAttribute)
144         CASE(14, TestGetMatch)
145         CASE(15, TestSetMatch)
146         CASE(16, TestReset)
147         CASE(17, TestSupplementary)
148         CASE(18, TestContraction)
149         CASE(19, TestIgnorable)
150         CASE(20, TestCanonical)
151         CASE(21, TestNormCanonical)
152         CASE(22, TestStrengthCanonical)
153 #if UCONFIG_NO_BREAK_ITERATION
154     case 23:
155         name = "TestBreakIteratorCanonical";
156         break;
157 #else
158         CASE(23, TestBreakIteratorCanonical)
159 #endif
160         CASE(24, TestVariableCanonical)
161         CASE(25, TestOverlapCanonical)
162         CASE(26, TestCollatorCanonical)
163         CASE(27, TestPatternCanonical)
164         CASE(28, TestTextCanonical)
165         CASE(29, TestCompositeBoundariesCanonical)
166         CASE(30, TestGetSetOffsetCanonical)
167         CASE(31, TestSupplementaryCanonical)
168         CASE(32, TestContractionCanonical)
169         CASE(33, TestUClassID)
170         CASE(34, TestSubclass)
171         CASE(35, TestCoverage)
172         CASE(36, TestDiacriticMatch)
173         default: name = ""; break;
174     }
175 #else
176     name="";
177 #endif
178 }
179 
180 #if !UCONFIG_NO_BREAK_ITERATION
181 // private methods ------------------------------------------------------
182 
getCollator(const char * collator)183 RuleBasedCollator * StringSearchTest::getCollator(const char *collator)
184 {
185     if (collator == NULL) {
186         return m_en_us_;
187     }
188     if (strcmp(collator, "fr") == 0) {
189         return m_fr_fr_;
190     }
191     else if (strcmp(collator, "de") == 0) {
192         return m_de_;
193     }
194     else if (strcmp(collator, "es") == 0) {
195         return m_es_;
196     }
197     else {
198         return m_en_us_;
199     }
200 }
201 
getBreakIterator(const char * breaker)202 BreakIterator * StringSearchTest::getBreakIterator(const char *breaker)
203 {
204 #if UCONFIG_NO_BREAK_ITERATION
205     return NULL;
206 #else
207     if (breaker == NULL) {
208         return NULL;
209     }
210     if (strcmp(breaker, "wordbreaker") == 0) {
211         return m_en_wordbreaker_;
212     }
213     else {
214         return m_en_characterbreaker_;
215     }
216 #endif
217 }
218 
toCharString(const UnicodeString & text)219 char * StringSearchTest::toCharString(const UnicodeString &text)
220 {
221     static char   result[1024];
222            int    index  = 0;
223            int    count  = 0;
224            int    length = text.length();
225 
226     for (; count < length; count ++) {
227         UChar ch = text[count];
228         if (ch >= 0x20 && ch <= 0x7e) {
229             result[index ++] = (char)ch;
230         }
231         else {
232             sprintf(result+index, "\\u%04x", ch);
233             index += 6; /* \uxxxx */
234         }
235     }
236     result[index] = 0;
237 
238     return result;
239 }
240 
getECollationStrength(const UCollationStrength & strength) const241 Collator::ECollationStrength StringSearchTest::getECollationStrength(
242                                     const UCollationStrength &strength) const
243 {
244   switch (strength)
245   {
246   case UCOL_PRIMARY :
247     return Collator::PRIMARY;
248   case UCOL_SECONDARY :
249     return Collator::SECONDARY;
250   case UCOL_TERTIARY :
251     return Collator::TERTIARY;
252   default :
253     return Collator::IDENTICAL;
254   }
255 }
256 
assertEqualWithStringSearch(StringSearch * strsrch,const SearchData * search)257 UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
258                                                     const SearchData *search)
259 {
260     int32_t         count       = 0;
261     UErrorCode      status      = U_ZERO_ERROR;
262     int32_t         matchindex  = search->offset[count];
263     UnicodeString   matchtext;
264     int32_t         matchlength;
265 
266     strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, search->elemCompare, status);
267     if (U_FAILURE(status)) {
268         errln("Error setting USEARCH_ELEMENT_COMPARISON attribute %s", u_errorName(status));
269         return FALSE;
270     }
271 
272     if (strsrch->getMatchedStart() != USEARCH_DONE ||
273         strsrch->getMatchedLength() != 0) {
274         errln("Error with the initialization of match start and length");
275     }
276 
277     // start of next matches
278     while (U_SUCCESS(status) && matchindex >= 0) {
279         matchlength = search->size[count];
280         strsrch->next(status);
281         if (matchindex != strsrch->getMatchedStart() ||
282             matchlength != strsrch->getMatchedLength()) {
283             char *str = toCharString(strsrch->getText());
284             errln("Text: %s", str);
285             str = toCharString(strsrch->getPattern());
286             errln("Pattern: %s", str);
287             errln("Error next match found at %d (len:%d); expected %d (len:%d)",
288                     strsrch->getMatchedStart(), strsrch->getMatchedLength(),
289                     matchindex, matchlength);
290             return FALSE;
291         }
292         count ++;
293 
294         strsrch->getMatchedText(matchtext);
295 
296         if (U_FAILURE(status) ||
297             strsrch->getText().compareBetween(matchindex,
298                                               matchindex + matchlength,
299                                               matchtext, 0,
300                                               matchtext.length())) {
301             errln("Error getting next matched text");
302         }
303 
304         matchindex = search->offset[count];
305     }
306     strsrch->next(status);
307     if (strsrch->getMatchedStart() != USEARCH_DONE ||
308         strsrch->getMatchedLength() != 0) {
309         char *str = toCharString(strsrch->getText());
310             errln("Text: %s", str);
311             str = toCharString(strsrch->getPattern());
312             errln("Pattern: %s", str);
313             errln("Error next match found at %d (len:%d); expected <NO MATCH>",
314                     strsrch->getMatchedStart(), strsrch->getMatchedLength());
315             return FALSE;
316     }
317 
318     // start of previous matches
319     count = count == 0 ? 0 : count - 1;
320     matchindex = search->offset[count];
321     while (U_SUCCESS(status) && matchindex >= 0) {
322         matchlength = search->size[count];
323         strsrch->previous(status);
324         if (matchindex != strsrch->getMatchedStart() ||
325             matchlength != strsrch->getMatchedLength()) {
326             char *str = toCharString(strsrch->getText());
327             errln("Text: %s", str);
328             str = toCharString(strsrch->getPattern());
329             errln("Pattern: %s", str);
330             errln("Error previous match found at %d (len:%d); expected %d (len:%d)",
331                     strsrch->getMatchedStart(), strsrch->getMatchedLength(),
332                     matchindex, matchlength);
333             return FALSE;
334         }
335 
336         strsrch->getMatchedText(matchtext);
337 
338         if (U_FAILURE(status) ||
339             strsrch->getText().compareBetween(matchindex,
340                                               matchindex + matchlength,
341                                               matchtext, 0,
342                                               matchtext.length())) {
343             errln("Error getting previous matched text");
344         }
345 
346         matchindex = count > 0 ? search->offset[count - 1] : -1;
347         count --;
348     }
349     strsrch->previous(status);
350     if (strsrch->getMatchedStart() != USEARCH_DONE ||
351         strsrch->getMatchedLength() != 0) {
352         char *str = toCharString(strsrch->getText());
353         errln("Text: %s", str);
354         str = toCharString(strsrch->getPattern());
355         errln("Pattern: %s", str);
356         errln("Error previous match found at %d (len:%d); expected <NO MATCH>",
357                 strsrch->getMatchedStart(), strsrch->getMatchedLength());
358         return FALSE;
359     }
360 
361     int32_t nextStart;
362     UBool isOverlap = (strsrch->getAttribute(USEARCH_OVERLAP) == USEARCH_ON);
363 
364     // start of following matches
365     count = 0;
366     matchindex = search->offset[count];
367     nextStart = 0;
368 
369     while (TRUE) {
370         strsrch->following(nextStart, status);
371 
372         if (matchindex < 0) {
373             if (strsrch->getMatchedStart() != USEARCH_DONE ||
374                     strsrch->getMatchedLength() != 0) {
375                 char *str = toCharString(strsrch->getText());
376                 errln("Text: %s", str);
377                 str = toCharString(strsrch->getPattern());
378                 errln("Pattern: %s", str);
379                 errln("Error following match starting at %d (overlap:%d) found at %d (len:%d); expected <NO MATCH>",
380                         nextStart, isOverlap,
381                         strsrch->getMatchedStart(), strsrch->getMatchedLength());
382                 return FALSE;
383             }
384             // no more matches
385             break;
386         }
387 
388         matchlength = search->size[count];
389         if (strsrch->getMatchedStart() != matchindex
390                 || strsrch->getMatchedLength() != matchlength
391                 || U_FAILURE(status)) {
392             char *str = toCharString(strsrch->getText());
393             errln("Text: %s\n", str);
394             str = toCharString(strsrch->getPattern());
395             errln("Pattern: %s\n", str);
396             errln("Error following match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
397                         nextStart, isOverlap,
398                         strsrch->getMatchedStart(), strsrch->getMatchedLength(),
399                         matchindex, matchlength);
400             return FALSE;
401         }
402 
403         if (isOverlap || strsrch->getMatchedLength() == 0) {
404             nextStart = strsrch->getMatchedStart() + 1;
405         } else {
406             nextStart = strsrch->getMatchedStart() + strsrch->getMatchedLength();
407         }
408 
409         count++;
410         matchindex = search->offset[count];
411     }
412 
413     // start preceding matches
414     count = -1; // last non-negative offset index, could be -1 if no match
415     while (search->offset[count + 1] >= 0) {
416         count++;
417     }
418     nextStart = strsrch->getText().length();
419 
420     while (TRUE) {
421         strsrch->preceding(nextStart, status);
422 
423         if (count < 0) {
424             if (strsrch->getMatchedStart() != USEARCH_DONE || strsrch->getMatchedLength() != 0) {
425                 char *str = toCharString(strsrch->getText());
426                 errln("Text: %s\n", str);
427                 str = toCharString(strsrch->getPattern());
428                 errln("Pattern: %s\n", str);
429                 errln("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected <NO MATCH>\n",
430                             nextStart, isOverlap,
431                             strsrch->getMatchedStart(),
432                             strsrch->getMatchedLength());
433                 return FALSE;
434             }
435             // no more matches
436             break;
437         }
438 
439         matchindex = search->offset[count];
440         matchlength = search->size[count];
441         if (strsrch->getMatchedStart() != matchindex
442                 || strsrch->getMatchedLength() != matchlength
443                 || U_FAILURE(status)) {
444             char *str = toCharString(strsrch->getText());
445             errln("Text: %s\n", str);
446             str = toCharString(strsrch->getPattern());
447             errln("Pattern: %s\n", str);
448             errln("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
449                         nextStart, isOverlap,
450                         strsrch->getMatchedStart(), strsrch->getMatchedLength(),
451                         matchindex, matchlength);
452             return FALSE;
453         }
454 
455         nextStart = matchindex;
456         count--;
457     }
458 
459     strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, status);
460     return TRUE;
461 }
462 
assertEqual(const SearchData * search)463 UBool StringSearchTest::assertEqual(const SearchData *search)
464 {
465     UErrorCode     status   = U_ZERO_ERROR;
466 
467     Collator      *collator = getCollator(search->collator);
468     BreakIterator *breaker  = getBreakIterator(search->breaker);
469     StringSearch  *strsrch, *strsrch2;
470     UChar          temp[128];
471 
472 #if UCONFIG_NO_BREAK_ITERATION
473     if(search->breaker) {
474       return TRUE; /* skip test */
475     }
476 #endif
477     u_unescape(search->text, temp, 128);
478     UnicodeString text;
479     text.setTo(temp);
480     u_unescape(search->pattern, temp, 128);
481     UnicodeString  pattern;
482     pattern.setTo(temp);
483 
484 #if !UCONFIG_NO_BREAK_ITERATION
485     if (breaker != NULL) {
486         breaker->setText(text);
487     }
488 #endif
489     collator->setStrength(getECollationStrength(search->strength));
490     strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator,
491                                breaker, status);
492     if (U_FAILURE(status)) {
493         errln("Error opening string search %s", u_errorName(status));
494         return FALSE;
495     }
496 
497     if (!assertEqualWithStringSearch(strsrch, search)) {
498         collator->setStrength(getECollationStrength(UCOL_TERTIARY));
499         delete strsrch;
500         return FALSE;
501     }
502 
503 
504     strsrch2 = strsrch->clone();
505     if( strsrch2 == strsrch || *strsrch2 != *strsrch ||
506         !assertEqualWithStringSearch(strsrch2, search)
507     ) {
508         infoln("failure with StringSearch.clone()");
509         collator->setStrength(getECollationStrength(UCOL_TERTIARY));
510         delete strsrch;
511         delete strsrch2;
512         return FALSE;
513     }
514     delete strsrch2;
515 
516     collator->setStrength(getECollationStrength(UCOL_TERTIARY));
517     delete strsrch;
518     return TRUE;
519 }
520 
assertCanonicalEqual(const SearchData * search)521 UBool StringSearchTest::assertCanonicalEqual(const SearchData *search)
522 {
523     UErrorCode     status   = U_ZERO_ERROR;
524     Collator      *collator = getCollator(search->collator);
525     BreakIterator *breaker  = getBreakIterator(search->breaker);
526     StringSearch  *strsrch;
527     UChar          temp[128];
528     UBool          result = TRUE;
529 
530 #if UCONFIG_NO_BREAK_ITERATION
531     if(search->breaker) {
532       return TRUE; /* skip test */
533     }
534 #endif
535 
536     u_unescape(search->text, temp, 128);
537     UnicodeString text;
538     text.setTo(temp);
539     u_unescape(search->pattern, temp, 128);
540     UnicodeString  pattern;
541     pattern.setTo(temp);
542 
543 #if !UCONFIG_NO_BREAK_ITERATION
544     if (breaker != NULL) {
545         breaker->setText(text);
546     }
547 #endif
548     collator->setStrength(getECollationStrength(search->strength));
549     collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
550     strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator,
551                                breaker, status);
552     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
553     if (U_FAILURE(status)) {
554         errln("Error opening string search %s", u_errorName(status));
555         result = FALSE;
556         goto bail;
557     }
558 
559     if (!assertEqualWithStringSearch(strsrch, search)) {
560         result = FALSE;
561         goto bail;
562     }
563 
564 bail:
565     collator->setStrength(getECollationStrength(UCOL_TERTIARY));
566     collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
567     delete strsrch;
568 
569     return result;
570 }
571 
assertEqualWithAttribute(const SearchData * search,USearchAttributeValue canonical,USearchAttributeValue overlap)572 UBool StringSearchTest::assertEqualWithAttribute(const SearchData *search,
573                                             USearchAttributeValue canonical,
574                                             USearchAttributeValue overlap)
575 {
576     UErrorCode     status   = U_ZERO_ERROR;
577     Collator      *collator = getCollator(search->collator);
578     BreakIterator *breaker  = getBreakIterator(search->breaker);
579     StringSearch  *strsrch;
580     UChar          temp[128];
581 
582 
583 #if UCONFIG_NO_BREAK_ITERATION
584     if(search->breaker) {
585       return TRUE; /* skip test */
586     }
587 #endif
588 
589     u_unescape(search->text, temp, 128);
590     UnicodeString text;
591     text.setTo(temp);
592     u_unescape(search->pattern, temp, 128);
593     UnicodeString  pattern;
594     pattern.setTo(temp);
595 
596 #if !UCONFIG_NO_BREAK_ITERATION
597     if (breaker != NULL) {
598         breaker->setText(text);
599     }
600 #endif
601     collator->setStrength(getECollationStrength(search->strength));
602     strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator,
603                                breaker, status);
604     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, canonical, status);
605     strsrch->setAttribute(USEARCH_OVERLAP, overlap, status);
606 
607     if (U_FAILURE(status)) {
608         errln("Error opening string search %s", u_errorName(status));
609         return FALSE;
610     }
611 
612     if (!assertEqualWithStringSearch(strsrch, search)) {
613         collator->setStrength(getECollationStrength(UCOL_TERTIARY));
614         delete strsrch;
615         return FALSE;
616     }
617     collator->setStrength(getECollationStrength(UCOL_TERTIARY));
618     delete strsrch;
619     return TRUE;
620 }
621 
TestOpenClose()622 void StringSearchTest::TestOpenClose()
623 {
624     UErrorCode               status    = U_ZERO_ERROR;
625     StringSearch            *result;
626     BreakIterator           *breakiter = m_en_wordbreaker_;
627     UnicodeString            pattern;
628     UnicodeString            text;
629     UnicodeString            temp("a");
630     StringCharacterIterator  chariter(text);
631 
632     /* testing null arguments */
633     result = new StringSearch(pattern, text, NULL, NULL, status);
634     if (U_SUCCESS(status)) {
635         errln("Error: NULL arguments should produce an error");
636     }
637     delete result;
638 
639     chariter.setText(text);
640     status = U_ZERO_ERROR;
641     result = new StringSearch(pattern, chariter, NULL, NULL, status);
642     if (U_SUCCESS(status)) {
643         errln("Error: NULL arguments should produce an error");
644     }
645     delete result;
646 
647     // No-op: text.append(0, 0x1); -- what was intended here?
648     status = U_ZERO_ERROR;
649     result = new StringSearch(pattern, text, NULL, NULL, status);
650     if (U_SUCCESS(status)) {
651         errln("Error: Empty pattern should produce an error");
652     }
653     delete result;
654 
655     chariter.setText(text);
656     status = U_ZERO_ERROR;
657     result = new StringSearch(pattern, chariter, NULL, NULL, status);
658     if (U_SUCCESS(status)) {
659         errln("Error: Empty pattern should produce an error");
660     }
661     delete result;
662 
663     text.remove();
664     pattern.append(temp);
665     status = U_ZERO_ERROR;
666     result = new StringSearch(pattern, text, NULL, NULL, status);
667     if (U_SUCCESS(status)) {
668         errln("Error: Empty text should produce an error");
669     }
670     delete result;
671 
672     chariter.setText(text);
673     status = U_ZERO_ERROR;
674     result = new StringSearch(pattern, chariter, NULL, NULL, status);
675     if (U_SUCCESS(status)) {
676         errln("Error: Empty text should produce an error");
677     }
678     delete result;
679 
680     text.append(temp);
681     status = U_ZERO_ERROR;
682     result = new StringSearch(pattern, text, NULL, NULL, status);
683     if (U_SUCCESS(status)) {
684         errln("Error: NULL arguments should produce an error");
685     }
686     delete result;
687 
688     chariter.setText(text);
689     status = U_ZERO_ERROR;
690     result = new StringSearch(pattern, chariter, NULL, NULL, status);
691     if (U_SUCCESS(status)) {
692         errln("Error: NULL arguments should produce an error");
693     }
694     delete result;
695 
696     status = U_ZERO_ERROR;
697     result = new StringSearch(pattern, text, m_en_us_, NULL, status);
698     if (U_FAILURE(status)) {
699         errln("Error: NULL break iterator is valid for opening search");
700     }
701     delete result;
702 
703     status = U_ZERO_ERROR;
704     result = new StringSearch(pattern, chariter, m_en_us_, NULL, status);
705     if (U_FAILURE(status)) {
706         errln("Error: NULL break iterator is valid for opening search");
707     }
708     delete result;
709 
710     status = U_ZERO_ERROR;
711     result = new StringSearch(pattern, text, Locale::getEnglish(), NULL, status);
712     if (U_FAILURE(status) || result == NULL) {
713         errln("Error: NULL break iterator is valid for opening search");
714     }
715     delete result;
716 
717     status = U_ZERO_ERROR;
718     result = new StringSearch(pattern, chariter, Locale::getEnglish(), NULL, status);
719     if (U_FAILURE(status)) {
720         errln("Error: NULL break iterator is valid for opening search");
721     }
722     delete result;
723 
724     status = U_ZERO_ERROR;
725     result = new StringSearch(pattern, text, m_en_us_, breakiter, status);
726     if (U_FAILURE(status)) {
727         errln("Error: Break iterator is valid for opening search");
728     }
729     delete result;
730 
731     status = U_ZERO_ERROR;
732     result = new StringSearch(pattern, chariter, m_en_us_, NULL, status);
733     if (U_FAILURE(status)) {
734         errln("Error: Break iterator is valid for opening search");
735     }
736     delete result;
737 }
738 
TestInitialization()739 void StringSearchTest::TestInitialization()
740 {
741     UErrorCode     status = U_ZERO_ERROR;
742     UnicodeString  pattern;
743     UnicodeString  text;
744     UnicodeString  temp("a");
745     StringSearch  *result;
746     int count;
747 
748     /* simple test on the pattern ce construction */
749     pattern.append(temp);
750     pattern.append(temp);
751     text.append(temp);
752     text.append(temp);
753     text.append(temp);
754     result = new StringSearch(pattern, text, m_en_us_, NULL, status);
755     if (U_FAILURE(status)) {
756         errln("Error opening search %s", u_errorName(status));
757     }
758     StringSearch *copy = new StringSearch(*result);
759     if (*(copy->getCollator()) != *(result->getCollator()) ||
760         copy->getBreakIterator() != result->getBreakIterator() ||
761         copy->getMatchedLength() != result->getMatchedLength() ||
762         copy->getMatchedStart() != result->getMatchedStart() ||
763         copy->getOffset() != result->getOffset() ||
764         copy->getPattern() != result->getPattern() ||
765         copy->getText() != result->getText() ||
766         *(copy) != *(result))
767     {
768         errln("Error copying StringSearch");
769     }
770     delete copy;
771 
772     copy = result->safeClone();
773     if (*(copy->getCollator()) != *(result->getCollator()) ||
774         copy->getBreakIterator() != result->getBreakIterator() ||
775         copy->getMatchedLength() != result->getMatchedLength() ||
776         copy->getMatchedStart() != result->getMatchedStart() ||
777         copy->getOffset() != result->getOffset() ||
778         copy->getPattern() != result->getPattern() ||
779         copy->getText() != result->getText() ||
780         *(copy) != *(result)) {
781         errln("Error copying StringSearch");
782     }
783     delete result;
784 
785     /* testing if an extremely large pattern will fail the initialization */
786     for (count = 0; count < 512; count ++) {
787         pattern.append(temp);
788     }
789     result = new StringSearch(pattern, text, m_en_us_, NULL, status);
790     if (*result != *result) {
791         errln("Error: string search object expected to match itself");
792     }
793     if (*result == *copy) {
794         errln("Error: string search objects are not expected to match");
795     }
796     *copy  = *result;
797     if (*(copy->getCollator()) != *(result->getCollator()) ||
798         copy->getBreakIterator() != result->getBreakIterator() ||
799         copy->getMatchedLength() != result->getMatchedLength() ||
800         copy->getMatchedStart() != result->getMatchedStart() ||
801         copy->getOffset() != result->getOffset() ||
802         copy->getPattern() != result->getPattern() ||
803         copy->getText() != result->getText() ||
804         *(copy) != *(result)) {
805         errln("Error copying StringSearch");
806     }
807     if (U_FAILURE(status)) {
808         errln("Error opening search %s", u_errorName(status));
809     }
810     delete result;
811     delete copy;
812 }
813 
TestBasic()814 void StringSearchTest::TestBasic()
815 {
816     int count = 0;
817     while (BASIC[count].text != NULL) {
818         //printf("count %d", count);
819         if (!assertEqual(&BASIC[count])) {
820             infoln("Error at test number %d", count);
821         }
822         count ++;
823     }
824 }
825 
TestNormExact()826 void StringSearchTest::TestNormExact()
827 {
828     int count = 0;
829     UErrorCode status = U_ZERO_ERROR;
830     m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
831     if (U_FAILURE(status)) {
832         errln("Error setting collation normalization %s",
833               u_errorName(status));
834     }
835     while (BASIC[count].text != NULL) {
836         if (!assertEqual(&BASIC[count])) {
837             infoln("Error at test number %d", count);
838         }
839         count ++;
840     }
841     count = 0;
842     while (NORMEXACT[count].text != NULL) {
843         if (!assertEqual(&NORMEXACT[count])) {
844             infoln("Error at test number %d", count);
845         }
846         count ++;
847     }
848     m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
849     count = 0;
850     while (NONNORMEXACT[count].text != NULL) {
851         if (!assertEqual(&NONNORMEXACT[count])) {
852             infoln("Error at test number %d", count);
853         }
854         count ++;
855     }
856 }
857 
TestStrength()858 void StringSearchTest::TestStrength()
859 {
860     int count = 0;
861     while (STRENGTH[count].text != NULL) {
862         if (!assertEqual(&STRENGTH[count])) {
863             infoln("Error at test number %d", count);
864         }
865         count ++;
866     }
867 }
868 
869 #if !UCONFIG_NO_BREAK_ITERATION
870 
TestBreakIterator()871 void StringSearchTest::TestBreakIterator()
872 {
873     UChar temp[128];
874     u_unescape(BREAKITERATOREXACT[0].text, temp, 128);
875     UnicodeString text;
876     text.setTo(temp, u_strlen(temp));
877     u_unescape(BREAKITERATOREXACT[0].pattern, temp, 128);
878     UnicodeString pattern;
879     pattern.setTo(temp, u_strlen(temp));
880 
881     UErrorCode status = U_ZERO_ERROR;
882     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
883                                              status);
884     if (U_FAILURE(status)) {
885         errln("Error opening string search %s", u_errorName(status));
886     }
887 
888     strsrch->setBreakIterator(NULL, status);
889     if (U_FAILURE(status) || strsrch->getBreakIterator() != NULL) {
890         errln("Error usearch_getBreakIterator returned wrong object");
891     }
892 
893     strsrch->setBreakIterator(m_en_characterbreaker_, status);
894     if (U_FAILURE(status) ||
895         strsrch->getBreakIterator() != m_en_characterbreaker_) {
896         errln("Error usearch_getBreakIterator returned wrong object");
897     }
898 
899     strsrch->setBreakIterator(m_en_wordbreaker_, status);
900     if (U_FAILURE(status) ||
901         strsrch->getBreakIterator() != m_en_wordbreaker_) {
902         errln("Error usearch_getBreakIterator returned wrong object");
903     }
904 
905     delete strsrch;
906 
907     int count = 0;
908     while (count < 4) {
909         // special purposes for tests numbers 0-3
910         const SearchData        *search   = &(BREAKITERATOREXACT[count]);
911               RuleBasedCollator *collator = getCollator(search->collator);
912               BreakIterator     *breaker  = getBreakIterator(search->breaker);
913               StringSearch      *strsrch;
914 
915         u_unescape(search->text, temp, 128);
916         text.setTo(temp, u_strlen(temp));
917         u_unescape(search->pattern, temp, 128);
918         pattern.setTo(temp, u_strlen(temp));
919         if (breaker != NULL) {
920             breaker->setText(text);
921         }
922         collator->setStrength(getECollationStrength(search->strength));
923 
924         strsrch = new StringSearch(pattern, text, collator, breaker, status);
925         if (U_FAILURE(status) ||
926             strsrch->getBreakIterator() != breaker) {
927             errln("Error setting break iterator");
928             if (strsrch != NULL) {
929                 delete strsrch;
930             }
931         }
932         if (!assertEqualWithStringSearch(strsrch, search)) {
933             collator->setStrength(getECollationStrength(UCOL_TERTIARY));
934             delete strsrch;
935         }
936         search   = &(BREAKITERATOREXACT[count + 1]);
937         breaker  = getBreakIterator(search->breaker);
938         if (breaker != NULL) {
939             breaker->setText(text);
940         }
941         strsrch->setBreakIterator(breaker, status);
942         if (U_FAILURE(status) ||
943             strsrch->getBreakIterator() != breaker) {
944             errln("Error setting break iterator");
945             delete strsrch;
946         }
947         strsrch->reset();
948         if (!assertEqualWithStringSearch(strsrch, search)) {
949              infoln("Error at test number %d", count);
950         }
951         delete strsrch;
952         count += 2;
953     }
954     count = 0;
955     while (BREAKITERATOREXACT[count].text != NULL) {
956          if (!assertEqual(&BREAKITERATOREXACT[count])) {
957              infoln("Error at test number %d", count);
958          }
959          count ++;
960     }
961 }
962 
963 #endif
964 
TestVariable()965 void StringSearchTest::TestVariable()
966 {
967     int count = 0;
968     UErrorCode status = U_ZERO_ERROR;
969     m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
970     if (U_FAILURE(status)) {
971         errln("Error setting collation alternate attribute %s",
972               u_errorName(status));
973     }
974     while (VARIABLE[count].text != NULL) {
975         logln("variable %d", count);
976         if (!assertEqual(&VARIABLE[count])) {
977             infoln("Error at test number %d", count);
978         }
979         count ++;
980     }
981     m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
982                            status);
983 }
984 
TestOverlap()985 void StringSearchTest::TestOverlap()
986 {
987     int count = 0;
988     while (OVERLAP[count].text != NULL) {
989         if (!assertEqualWithAttribute(&OVERLAP[count], USEARCH_OFF,
990                                       USEARCH_ON)) {
991             errln("Error at overlap test number %d", count);
992         }
993         count ++;
994     }
995     count = 0;
996     while (NONOVERLAP[count].text != NULL) {
997         if (!assertEqual(&NONOVERLAP[count])) {
998             errln("Error at non overlap test number %d", count);
999         }
1000         count ++;
1001     }
1002 
1003     count = 0;
1004     while (count < 1) {
1005         const SearchData *search = &(OVERLAP[count]);
1006               UChar       temp[128];
1007         u_unescape(search->text, temp, 128);
1008         UnicodeString text;
1009         text.setTo(temp, u_strlen(temp));
1010         u_unescape(search->pattern, temp, 128);
1011         UnicodeString pattern;
1012         pattern.setTo(temp, u_strlen(temp));
1013 
1014         RuleBasedCollator *collator = getCollator(search->collator);
1015         UErrorCode         status   = U_ZERO_ERROR;
1016         StringSearch      *strsrch  = new StringSearch(pattern, text,
1017                                                        collator, NULL,
1018                                                        status);
1019 
1020         strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1021         if (U_FAILURE(status) ||
1022             strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) {
1023             errln("Error setting overlap option");
1024         }
1025         if (!assertEqualWithStringSearch(strsrch, search)) {
1026             delete strsrch;
1027             return;
1028         }
1029 
1030         search = &(NONOVERLAP[count]);
1031         strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status);
1032         if (U_FAILURE(status) ||
1033             strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1034             errln("Error setting overlap option");
1035         }
1036         strsrch->reset();
1037         if (!assertEqualWithStringSearch(strsrch, search)) {
1038             delete strsrch;
1039             errln("Error at test number %d", count);
1040          }
1041 
1042         count ++;
1043         delete strsrch;
1044     }
1045 }
1046 
TestCollator()1047 void StringSearchTest::TestCollator()
1048 {
1049     // test collator that thinks "o" and "p" are the same thing
1050     UChar         temp[128];
1051     u_unescape(COLLATOR[0].text, temp, 128);
1052     UnicodeString text;
1053     text.setTo(temp, u_strlen(temp));
1054     u_unescape(COLLATOR[0].pattern, temp, 128);
1055     UnicodeString pattern;
1056     pattern.setTo(temp, u_strlen(temp));
1057 
1058     UErrorCode    status = U_ZERO_ERROR;
1059     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1060                                              status);
1061     if (U_FAILURE(status)) {
1062         errln("Error opening string search %s", u_errorName(status));
1063         delete strsrch;
1064         return;
1065     }
1066     if (!assertEqualWithStringSearch(strsrch, &COLLATOR[0])) {
1067         delete strsrch;
1068         return;
1069     }
1070 
1071     u_unescape(TESTCOLLATORRULE, temp, 128);
1072     UnicodeString rules;
1073     rules.setTo(temp, u_strlen(temp));
1074     RuleBasedCollator *tailored = new RuleBasedCollator(rules, status);
1075     tailored->setStrength(getECollationStrength(COLLATOR[1].strength));
1076 
1077     if (U_FAILURE(status)) {
1078         errln("Error opening rule based collator %s", u_errorName(status));
1079         delete strsrch;
1080         delete tailored;
1081         return;
1082     }
1083 
1084     strsrch->setCollator(tailored, status);
1085     if (U_FAILURE(status) || (*strsrch->getCollator()) != (*tailored)) {
1086         errln("Error setting rule based collator");
1087         delete strsrch;
1088         delete tailored;
1089     }
1090     strsrch->reset();
1091     if (!assertEqualWithStringSearch(strsrch, &COLLATOR[1])) {
1092         delete strsrch;
1093         delete tailored;
1094         return;
1095     }
1096 
1097     strsrch->setCollator(m_en_us_, status);
1098     strsrch->reset();
1099     if (U_FAILURE(status) || (*strsrch->getCollator()) != (*m_en_us_)) {
1100         errln("Error setting rule based collator");
1101         delete strsrch;
1102         delete tailored;
1103     }
1104     if (!assertEqualWithStringSearch(strsrch, &COLLATOR[0])) {
1105        errln("Error searching collator test");
1106     }
1107     delete strsrch;
1108     delete tailored;
1109 }
1110 
TestPattern()1111 void StringSearchTest::TestPattern()
1112 {
1113 
1114     UChar temp[512];
1115     int templength;
1116     u_unescape(PATTERN[0].text, temp, 512);
1117     UnicodeString text;
1118     text.setTo(temp, u_strlen(temp));
1119     u_unescape(PATTERN[0].pattern, temp, 512);
1120     UnicodeString pattern;
1121     pattern.setTo(temp, u_strlen(temp));
1122 
1123     m_en_us_->setStrength(getECollationStrength(PATTERN[0].strength));
1124     UErrorCode    status = U_ZERO_ERROR;
1125     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1126                                              status);
1127 
1128     if (U_FAILURE(status)) {
1129         errln("Error opening string search %s", u_errorName(status));
1130         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1131         if (strsrch != NULL) {
1132             delete strsrch;
1133         }
1134         return;
1135     }
1136     if (strsrch->getPattern() != pattern) {
1137         errln("Error setting pattern");
1138     }
1139     if (!assertEqualWithStringSearch(strsrch, &PATTERN[0])) {
1140         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1141         if (strsrch != NULL) {
1142             delete strsrch;
1143         }
1144         return;
1145     }
1146 
1147     u_unescape(PATTERN[1].pattern, temp, 512);
1148     pattern.setTo(temp, u_strlen(temp));
1149     strsrch->setPattern(pattern, status);
1150     if (pattern != strsrch->getPattern()) {
1151         errln("Error setting pattern");
1152         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1153         if (strsrch != NULL) {
1154             delete strsrch;
1155         }
1156         return;
1157     }
1158     strsrch->reset();
1159     if (U_FAILURE(status)) {
1160         errln("Error setting pattern %s", u_errorName(status));
1161     }
1162     if (!assertEqualWithStringSearch(strsrch, &PATTERN[1])) {
1163         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1164         if (strsrch != NULL) {
1165             delete strsrch;
1166         }
1167         return;
1168     }
1169 
1170     u_unescape(PATTERN[0].pattern, temp, 512);
1171     pattern.setTo(temp, u_strlen(temp));
1172     strsrch->setPattern(pattern, status);
1173     if (pattern != strsrch->getPattern()) {
1174         errln("Error setting pattern");
1175         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1176         if (strsrch != NULL) {
1177             delete strsrch;
1178         }
1179         return;
1180     }
1181     strsrch->reset();
1182     if (U_FAILURE(status)) {
1183         errln("Error setting pattern %s", u_errorName(status));
1184     }
1185     if (!assertEqualWithStringSearch(strsrch, &PATTERN[0])) {
1186         m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1187         if (strsrch != NULL) {
1188             delete strsrch;
1189         }
1190         return;
1191     }
1192     /* enormous pattern size to see if this crashes */
1193     for (templength = 0; templength != 512; templength ++) {
1194         temp[templength] = 0x61;
1195     }
1196     temp[511] = 0;
1197     pattern.setTo(temp, 511);
1198     strsrch->setPattern(pattern, status);
1199     if (U_FAILURE(status)) {
1200         errln("Error setting pattern with size 512, %s", u_errorName(status));
1201     }
1202     m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
1203     if (strsrch != NULL) {
1204         delete strsrch;
1205     }
1206 }
1207 
TestText()1208 void StringSearchTest::TestText()
1209 {
1210     UChar temp[128];
1211     u_unescape(TEXT[0].text, temp, 128);
1212     UnicodeString text;
1213     text.setTo(temp, u_strlen(temp));
1214     u_unescape(TEXT[0].pattern, temp, 128);
1215     UnicodeString pattern;
1216     pattern.setTo(temp, u_strlen(temp));
1217 
1218     UErrorCode status = U_ZERO_ERROR;
1219     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1220                                              status);
1221     if (U_FAILURE(status)) {
1222         errln("Error opening string search %s", u_errorName(status));
1223         return;
1224     }
1225     if (text != strsrch->getText()) {
1226         errln("Error setting text");
1227     }
1228     if (!assertEqualWithStringSearch(strsrch, &TEXT[0])) {
1229         delete strsrch;
1230         return;
1231     }
1232 
1233     u_unescape(TEXT[1].text, temp, 128);
1234     text.setTo(temp, u_strlen(temp));
1235     strsrch->setText(text, status);
1236     if (text != strsrch->getText()) {
1237         errln("Error setting text");
1238         delete strsrch;
1239         return;
1240     }
1241     if (U_FAILURE(status)) {
1242         errln("Error setting text %s", u_errorName(status));
1243     }
1244     if (!assertEqualWithStringSearch(strsrch, &TEXT[1])) {
1245         delete strsrch;
1246         return;
1247     }
1248 
1249     u_unescape(TEXT[0].text, temp, 128);
1250     text.setTo(temp, u_strlen(temp));
1251     StringCharacterIterator chariter(text);
1252     strsrch->setText(chariter, status);
1253     if (text != strsrch->getText()) {
1254         errln("Error setting text");
1255         delete strsrch;
1256         return;
1257     }
1258     if (U_FAILURE(status)) {
1259         errln("Error setting pattern %s", u_errorName(status));
1260     }
1261     if (!assertEqualWithStringSearch(strsrch, &TEXT[0])) {
1262         errln("Error searching within set text");
1263     }
1264     delete strsrch;
1265 }
1266 
TestCompositeBoundaries()1267 void StringSearchTest::TestCompositeBoundaries()
1268 {
1269     int count = 0;
1270     while (COMPOSITEBOUNDARIES[count].text != NULL) {
1271         logln("composite %d", count);
1272         if (!assertEqual(&COMPOSITEBOUNDARIES[count])) {
1273             errln("Error at test number %d", count);
1274         }
1275         count ++;
1276     }
1277 }
1278 
TestGetSetOffset()1279 void StringSearchTest::TestGetSetOffset()
1280 {
1281     UErrorCode     status  = U_ZERO_ERROR;
1282     UnicodeString  pattern("1234567890123456");
1283     UnicodeString  text("12345678901234567890123456789012");
1284     StringSearch  *strsrch = new StringSearch(pattern, text, m_en_us_,
1285                                               NULL, status);
1286     /* testing out of bounds error */
1287     strsrch->setOffset(-1, status);
1288     if (U_SUCCESS(status)) {
1289         errln("Error expecting set offset error");
1290     }
1291     strsrch->setOffset(128, status);
1292     if (U_SUCCESS(status)) {
1293         errln("Error expecting set offset error");
1294     }
1295     int index   = 0;
1296     while (BASIC[index].text != NULL) {
1297         UErrorCode  status      = U_ZERO_ERROR;
1298         SearchData  search      = BASIC[index ++];
1299         UChar       temp[128];
1300 
1301         u_unescape(search.text, temp, 128);
1302         text.setTo(temp, u_strlen(temp));
1303         u_unescape(search.pattern, temp, 128);
1304         pattern.setTo(temp, u_strlen(temp));
1305         strsrch->setText(text, status);
1306         strsrch->setPattern(pattern, status);
1307         strsrch->getCollator()->setStrength(getECollationStrength(
1308                                                           search.strength));
1309         strsrch->reset();
1310 
1311         int count = 0;
1312         int32_t matchindex  = search.offset[count];
1313         while (U_SUCCESS(status) && matchindex >= 0) {
1314             int32_t matchlength = search.size[count];
1315             strsrch->next(status);
1316             if (matchindex != strsrch->getMatchedStart() ||
1317                 matchlength != strsrch->getMatchedLength()) {
1318                 char *str = toCharString(strsrch->getText());
1319                 errln("Text: %s", str);
1320                 str = toCharString(strsrch->getPattern());
1321                 errln("Pattern: %s", str);
1322                 errln("Error match found at %d %d",
1323                         strsrch->getMatchedStart(),
1324                         strsrch->getMatchedLength());
1325                 return;
1326             }
1327             matchindex = search.offset[count + 1] == -1 ? -1 :
1328                          search.offset[count + 2];
1329             if (search.offset[count + 1] != -1) {
1330                 strsrch->setOffset(search.offset[count + 1] + 1, status);
1331                 if (strsrch->getOffset() != search.offset[count + 1] + 1) {
1332                     errln("Error setting offset\n");
1333                     return;
1334                 }
1335             }
1336 
1337             count += 2;
1338         }
1339         strsrch->next(status);
1340         if (strsrch->getMatchedStart() != USEARCH_DONE) {
1341             char *str = toCharString(strsrch->getText());
1342             errln("Text: %s", str);
1343             str = toCharString(strsrch->getPattern());
1344             errln("Pattern: %s", str);
1345             errln("Error match found at %d %d",
1346                         strsrch->getMatchedStart(),
1347                         strsrch->getMatchedLength());
1348             return;
1349         }
1350     }
1351     strsrch->getCollator()->setStrength(getECollationStrength(
1352                                                              UCOL_TERTIARY));
1353     delete strsrch;
1354 }
1355 
TestGetSetAttribute()1356 void StringSearchTest::TestGetSetAttribute()
1357 {
1358     UErrorCode     status    = U_ZERO_ERROR;
1359     UnicodeString  pattern("pattern");
1360     UnicodeString  text("text");
1361     StringSearch  *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1362                                               status);
1363     if (U_FAILURE(status)) {
1364         errln("Error opening search %s", u_errorName(status));
1365         return;
1366     }
1367 
1368     strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_DEFAULT, status);
1369     if (U_FAILURE(status) ||
1370         strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1371         errln("Error setting overlap to the default");
1372     }
1373     strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1374     if (U_FAILURE(status) ||
1375         strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) {
1376         errln("Error setting overlap true");
1377     }
1378     strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status);
1379     if (U_FAILURE(status) ||
1380         strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1381         errln("Error setting overlap false");
1382     }
1383     strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ATTRIBUTE_VALUE_COUNT,
1384                           status);
1385     if (U_SUCCESS(status)) {
1386         errln("Error setting overlap to illegal value");
1387     }
1388     status = U_ZERO_ERROR;
1389     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_DEFAULT, status);
1390     if (U_FAILURE(status) ||
1391         strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF) {
1392         errln("Error setting canonical match to the default");
1393     }
1394     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1395     if (U_FAILURE(status) ||
1396         strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_ON) {
1397         errln("Error setting canonical match true");
1398     }
1399     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_OFF, status);
1400     if (U_FAILURE(status) ||
1401         strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF) {
1402         errln("Error setting canonical match false");
1403     }
1404     strsrch->setAttribute(USEARCH_CANONICAL_MATCH,
1405                           USEARCH_ATTRIBUTE_VALUE_COUNT, status);
1406     if (U_SUCCESS(status)) {
1407         errln("Error setting canonical match to illegal value");
1408     }
1409     status = U_ZERO_ERROR;
1410     strsrch->setAttribute(USEARCH_ATTRIBUTE_COUNT, USEARCH_DEFAULT, status);
1411     if (U_SUCCESS(status)) {
1412         errln("Error setting illegal attribute success");
1413     }
1414 
1415     delete strsrch;
1416 }
1417 
TestGetMatch()1418 void StringSearchTest::TestGetMatch()
1419 {
1420     UChar      temp[128];
1421     SearchData search = MATCH[0];
1422     u_unescape(search.text, temp, 128);
1423     UnicodeString text;
1424     text.setTo(temp, u_strlen(temp));
1425     u_unescape(search.pattern, temp, 128);
1426     UnicodeString pattern;
1427     pattern.setTo(temp, u_strlen(temp));
1428 
1429     UErrorCode    status  = U_ZERO_ERROR;
1430     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1431                                              status);
1432     if (U_FAILURE(status)) {
1433         errln("Error opening string search %s", u_errorName(status));
1434         if (strsrch != NULL) {
1435             delete strsrch;
1436         }
1437         return;
1438     }
1439 
1440     int           count      = 0;
1441     int32_t   matchindex = search.offset[count];
1442     UnicodeString matchtext;
1443     while (U_SUCCESS(status) && matchindex >= 0) {
1444         int32_t matchlength = search.size[count];
1445         strsrch->next(status);
1446         if (matchindex != strsrch->getMatchedStart() ||
1447             matchlength != strsrch->getMatchedLength()) {
1448             char *str = toCharString(strsrch->getText());
1449             errln("Text: %s", str);
1450             str = toCharString(strsrch->getPattern());
1451             errln("Pattern: %s", str);
1452             errln("Error match found at %d %d", strsrch->getMatchedStart(),
1453                   strsrch->getMatchedLength());
1454             return;
1455         }
1456         count ++;
1457 
1458         status = U_ZERO_ERROR;
1459         strsrch->getMatchedText(matchtext);
1460         if (matchtext.length() != matchlength || U_FAILURE(status)){
1461             errln("Error getting match text");
1462         }
1463         matchindex = search.offset[count];
1464     }
1465     status = U_ZERO_ERROR;
1466     strsrch->next(status);
1467     if (strsrch->getMatchedStart()  != USEARCH_DONE ||
1468         strsrch->getMatchedLength() != 0) {
1469         errln("Error end of match not found");
1470     }
1471     status = U_ZERO_ERROR;
1472     strsrch->getMatchedText(matchtext);
1473     if (matchtext.length() != 0) {
1474         errln("Error getting null matches");
1475     }
1476     delete strsrch;
1477 }
1478 
TestSetMatch()1479 void StringSearchTest::TestSetMatch()
1480 {
1481     int count = 0;
1482     while (MATCH[count].text != NULL) {
1483         SearchData     search = MATCH[count];
1484         UChar          temp[128];
1485         UErrorCode status = U_ZERO_ERROR;
1486         u_unescape(search.text, temp, 128);
1487         UnicodeString text;
1488         text.setTo(temp, u_strlen(temp));
1489         u_unescape(search.pattern, temp, 128);
1490         UnicodeString pattern;
1491         pattern.setTo(temp, u_strlen(temp));
1492 
1493         StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_,
1494                                                  NULL, status);
1495         if (U_FAILURE(status)) {
1496             errln("Error opening string search %s", u_errorName(status));
1497             if (strsrch != NULL) {
1498                 delete strsrch;
1499             }
1500             return;
1501         }
1502 
1503         int size = 0;
1504         while (search.offset[size] != -1) {
1505             size ++;
1506         }
1507 
1508         if (strsrch->first(status) != search.offset[0] || U_FAILURE(status)) {
1509             errln("Error getting first match");
1510         }
1511         if (strsrch->last(status) != search.offset[size -1] ||
1512             U_FAILURE(status)) {
1513             errln("Error getting last match");
1514         }
1515 
1516         int index = 0;
1517         while (index < size) {
1518             if (index + 2 < size) {
1519                 if (strsrch->following(search.offset[index + 2] - 1, status)
1520                          != search.offset[index + 2] || U_FAILURE(status)) {
1521                     errln("Error getting following match at index %d",
1522                           search.offset[index + 2] - 1);
1523                 }
1524             }
1525             if (index + 1 < size) {
1526                 if (strsrch->preceding(search.offset[index + 1] +
1527                                                 search.size[index + 1] + 1,
1528                                        status) != search.offset[index + 1] ||
1529                     U_FAILURE(status)) {
1530                     errln("Error getting preceeding match at index %d",
1531                           search.offset[index + 1] + 1);
1532                 }
1533             }
1534             index += 2;
1535         }
1536         status = U_ZERO_ERROR;
1537         if (strsrch->following(text.length(), status) != USEARCH_DONE) {
1538             errln("Error expecting out of bounds match");
1539         }
1540         if (strsrch->preceding(0, status) != USEARCH_DONE) {
1541             errln("Error expecting out of bounds match");
1542         }
1543         count ++;
1544         delete strsrch;
1545     }
1546 }
1547 
TestReset()1548 void StringSearchTest::TestReset()
1549 {
1550     UErrorCode     status  = U_ZERO_ERROR;
1551     UnicodeString  text("fish fish");
1552     UnicodeString  pattern("s");
1553     StringSearch  *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1554                                               status);
1555     if (U_FAILURE(status)) {
1556         errln("Error opening string search %s", u_errorName(status));
1557         if (strsrch != NULL) {
1558             delete strsrch;
1559         }
1560         return;
1561     }
1562     strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1563     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1564     strsrch->setOffset(9, status);
1565     if (U_FAILURE(status)) {
1566         errln("Error setting attributes and offsets");
1567     }
1568     else {
1569         strsrch->reset();
1570         if (strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF ||
1571             strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF ||
1572             strsrch->getOffset() != 0 || strsrch->getMatchedLength() != 0 ||
1573             strsrch->getMatchedStart() != USEARCH_DONE) {
1574             errln("Error resetting string search");
1575         }
1576         strsrch->previous(status);
1577         if (strsrch->getMatchedStart() != 7 ||
1578             strsrch->getMatchedLength() != 1) {
1579             errln("Error resetting string search\n");
1580         }
1581     }
1582     delete strsrch;
1583 }
1584 
TestSupplementary()1585 void StringSearchTest::TestSupplementary()
1586 {
1587     int count = 0;
1588     while (SUPPLEMENTARY[count].text != NULL) {
1589         if (!assertEqual(&SUPPLEMENTARY[count])) {
1590             errln("Error at test number %d", count);
1591         }
1592         count ++;
1593     }
1594 }
1595 
TestContraction()1596 void StringSearchTest::TestContraction()
1597 {
1598     UChar      temp[128];
1599     UErrorCode status = U_ZERO_ERROR;
1600 
1601     u_unescape(CONTRACTIONRULE, temp, 128);
1602     UnicodeString rules;
1603     rules.setTo(temp, u_strlen(temp));
1604     RuleBasedCollator *collator = new RuleBasedCollator(rules,
1605         getECollationStrength(UCOL_TERTIARY), UCOL_ON, status);
1606     if (U_FAILURE(status)) {
1607         errln("Error opening collator %s", u_errorName(status));
1608     }
1609     UnicodeString text("text");
1610     UnicodeString pattern("pattern");
1611     StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL,
1612                                              status);
1613     if (U_FAILURE(status)) {
1614         errln("Error opening string search %s", u_errorName(status));
1615     }
1616 
1617     int count = 0;
1618     while (CONTRACTION[count].text != NULL) {
1619         u_unescape(CONTRACTION[count].text, temp, 128);
1620         text.setTo(temp, u_strlen(temp));
1621         u_unescape(CONTRACTION[count].pattern, temp, 128);
1622         pattern.setTo(temp, u_strlen(temp));
1623         strsrch->setText(text, status);
1624         strsrch->setPattern(pattern, status);
1625         if (!assertEqualWithStringSearch(strsrch, &CONTRACTION[count])) {
1626             errln("Error at test number %d", count);
1627         }
1628         count ++;
1629     }
1630     delete strsrch;
1631     delete collator;
1632 }
1633 
TestIgnorable()1634 void StringSearchTest::TestIgnorable()
1635 {
1636     UChar temp[128];
1637     u_unescape(IGNORABLERULE, temp, 128);
1638     UnicodeString rules;
1639     rules.setTo(temp, u_strlen(temp));
1640     UErrorCode status = U_ZERO_ERROR;
1641     int        count  = 0;
1642     RuleBasedCollator *collator = new RuleBasedCollator(rules,
1643                             getECollationStrength(IGNORABLE[count].strength),
1644                             UCOL_ON, status);
1645     if (U_FAILURE(status)) {
1646         errln("Error opening collator %s", u_errorName(status));
1647         return;
1648     }
1649     UnicodeString pattern("pattern");
1650     UnicodeString text("text");
1651     StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL,
1652                                              status);
1653     if (U_FAILURE(status)) {
1654         errln("Error opening string search %s", u_errorName(status));
1655         delete collator;
1656         return;
1657     }
1658 
1659     while (IGNORABLE[count].text != NULL) {
1660         u_unescape(IGNORABLE[count].text, temp, 128);
1661         text.setTo(temp, u_strlen(temp));
1662         u_unescape(IGNORABLE[count].pattern, temp, 128);
1663         pattern.setTo(temp, u_strlen(temp));
1664         strsrch->setText(text, status);
1665         strsrch->setPattern(pattern, status);
1666         if (!assertEqualWithStringSearch(strsrch, &IGNORABLE[count])) {
1667             errln("Error at test number %d", count);
1668         }
1669         count ++;
1670     }
1671     delete strsrch;
1672     delete collator;
1673 }
1674 
TestDiacriticMatch()1675 void StringSearchTest::TestDiacriticMatch()
1676 {
1677 	UChar temp[128];
1678     UErrorCode status = U_ZERO_ERROR;
1679     int        count  = 0;
1680     RuleBasedCollator* coll = NULL;
1681     StringSearch *strsrch = NULL;
1682 
1683     UnicodeString pattern("pattern");
1684     UnicodeString text("text");
1685 
1686     const SearchData *search;
1687 
1688     search = &(DIACRITICMATCH[count]);
1689     while (search->text != NULL) {
1690    		coll = getCollator(search->collator);
1691     	coll->setStrength(getECollationStrength(search->strength));
1692     	strsrch = new StringSearch(pattern, text, coll, getBreakIterator(search->breaker), status);
1693     	if (U_FAILURE(status)) {
1694 	        errln("Error opening string search %s", u_errorName(status));
1695 	        return;
1696 	    }
1697         u_unescape(search->text, temp, 128);
1698         text.setTo(temp, u_strlen(temp));
1699         u_unescape(search->pattern, temp, 128);
1700         pattern.setTo(temp, u_strlen(temp));
1701         strsrch->setText(text, status);
1702         strsrch->setPattern(pattern, status);
1703         if (!assertEqualWithStringSearch(strsrch, search)) {
1704             errln("Error at test number %d", count);
1705         }
1706         search = &(DIACRITICMATCH[++count]);
1707         delete strsrch;
1708     }
1709 
1710 }
1711 
TestCanonical()1712 void StringSearchTest::TestCanonical()
1713 {
1714     int count = 0;
1715     while (BASICCANONICAL[count].text != NULL) {
1716         if (!assertCanonicalEqual(&BASICCANONICAL[count])) {
1717             errln("Error at test number %d", count);
1718         }
1719         count ++;
1720     }
1721 }
1722 
TestNormCanonical()1723 void StringSearchTest::TestNormCanonical()
1724 {
1725     UErrorCode status = U_ZERO_ERROR;
1726     m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
1727     int count = 0;
1728     while (NORMCANONICAL[count].text != NULL) {
1729         if (!assertCanonicalEqual(&NORMCANONICAL[count])) {
1730             errln("Error at test number %d", count);
1731         }
1732         count ++;
1733     }
1734     m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
1735 }
1736 
TestStrengthCanonical()1737 void StringSearchTest::TestStrengthCanonical()
1738 {
1739     int count = 0;
1740     while (STRENGTHCANONICAL[count].text != NULL) {
1741         if (!assertCanonicalEqual(&STRENGTHCANONICAL[count])) {
1742             errln("Error at test number %d", count);
1743         }
1744         count ++;
1745     }
1746 }
1747 
1748 #if !UCONFIG_NO_BREAK_ITERATION
1749 
TestBreakIteratorCanonical()1750 void StringSearchTest::TestBreakIteratorCanonical()
1751 {
1752     UErrorCode status = U_ZERO_ERROR;
1753     int        count  = 0;
1754 
1755     while (count < 4) {
1756         // special purposes for tests numbers 0-3
1757               UChar           temp[128];
1758         const SearchData     *search   = &(BREAKITERATORCANONICAL[count]);
1759 
1760         u_unescape(search->text, temp, 128);
1761         UnicodeString text;
1762         text.setTo(temp, u_strlen(temp));
1763         u_unescape(search->pattern, temp, 128);
1764         UnicodeString pattern;
1765         pattern.setTo(temp, u_strlen(temp));
1766         RuleBasedCollator *collator = getCollator(search->collator);
1767         collator->setStrength(getECollationStrength(search->strength));
1768 
1769         BreakIterator *breaker = getBreakIterator(search->breaker);
1770         StringSearch  *strsrch = new StringSearch(pattern, text, collator,
1771                                                   breaker, status);
1772         if (U_FAILURE(status)) {
1773             errln("Error creating string search data");
1774             return;
1775         }
1776         strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1777         if (U_FAILURE(status) ||
1778             strsrch->getBreakIterator() != breaker) {
1779             errln("Error setting break iterator");
1780             delete strsrch;
1781             return;
1782         }
1783         if (!assertEqualWithStringSearch(strsrch, search)) {
1784             collator->setStrength(getECollationStrength(UCOL_TERTIARY));
1785             delete strsrch;
1786             return;
1787         }
1788         search  = &(BREAKITERATOREXACT[count + 1]);
1789         breaker = getBreakIterator(search->breaker);
1790         if (breaker == NULL) {
1791             errln("Error creating BreakIterator");
1792             return;
1793         }
1794         breaker->setText(strsrch->getText());
1795         strsrch->setBreakIterator(breaker, status);
1796         if (U_FAILURE(status) || strsrch->getBreakIterator() != breaker) {
1797             errln("Error setting break iterator");
1798             delete strsrch;
1799             return;
1800         }
1801         strsrch->reset();
1802         strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1803         if (!assertEqualWithStringSearch(strsrch, search)) {
1804              errln("Error at test number %d", count);
1805              return;
1806         }
1807         delete strsrch;
1808         count += 2;
1809     }
1810     count = 0;
1811     while (BREAKITERATORCANONICAL[count].text != NULL) {
1812          if (!assertEqual(&BREAKITERATORCANONICAL[count])) {
1813              errln("Error at test number %d", count);
1814              return;
1815          }
1816          count ++;
1817     }
1818 }
1819 
1820 #endif
1821 
TestVariableCanonical()1822 void StringSearchTest::TestVariableCanonical()
1823 {
1824     int count = 0;
1825     UErrorCode status = U_ZERO_ERROR;
1826     m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
1827     if (U_FAILURE(status)) {
1828         errln("Error setting collation alternate attribute %s",
1829               u_errorName(status));
1830     }
1831     while (VARIABLE[count].text != NULL) {
1832         logln("variable %d", count);
1833         if (!assertCanonicalEqual(&VARIABLE[count])) {
1834             errln("Error at test number %d", count);
1835         }
1836         count ++;
1837     }
1838     m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE,
1839                            status);
1840 }
1841 
TestOverlapCanonical()1842 void StringSearchTest::TestOverlapCanonical()
1843 {
1844     int count = 0;
1845     while (OVERLAPCANONICAL[count].text != NULL) {
1846         if (!assertEqualWithAttribute(&OVERLAPCANONICAL[count], USEARCH_ON,
1847                                       USEARCH_ON)) {
1848             errln("Error at overlap test number %d", count);
1849         }
1850         count ++;
1851     }
1852     count = 0;
1853     while (NONOVERLAP[count].text != NULL) {
1854         if (!assertCanonicalEqual(&NONOVERLAPCANONICAL[count])) {
1855             errln("Error at non overlap test number %d", count);
1856         }
1857         count ++;
1858     }
1859 
1860     count = 0;
1861     while (count < 1) {
1862               UChar       temp[128];
1863         const SearchData *search = &(OVERLAPCANONICAL[count]);
1864               UErrorCode  status = U_ZERO_ERROR;
1865 
1866         u_unescape(search->text, temp, 128);
1867         UnicodeString text;
1868         text.setTo(temp, u_strlen(temp));
1869         u_unescape(search->pattern, temp, 128);
1870         UnicodeString pattern;
1871         pattern.setTo(temp, u_strlen(temp));
1872         RuleBasedCollator *collator = getCollator(search->collator);
1873         StringSearch *strsrch = new StringSearch(pattern, text, collator,
1874                                                  NULL, status);
1875         strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1876         strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status);
1877         if (U_FAILURE(status) ||
1878             strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) {
1879             errln("Error setting overlap option");
1880         }
1881         if (!assertEqualWithStringSearch(strsrch, search)) {
1882             delete strsrch;
1883             return;
1884         }
1885         search = &(NONOVERLAPCANONICAL[count]);
1886         strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status);
1887         if (U_FAILURE(status) ||
1888             strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) {
1889             errln("Error setting overlap option");
1890         }
1891         strsrch->reset();
1892         if (!assertEqualWithStringSearch(strsrch, search)) {
1893             delete strsrch;
1894             errln("Error at test number %d", count);
1895          }
1896 
1897         count ++;
1898         delete strsrch;
1899     }
1900 }
1901 
TestCollatorCanonical()1902 void StringSearchTest::TestCollatorCanonical()
1903 {
1904     /* test collator that thinks "o" and "p" are the same thing */
1905     UChar temp[128];
1906     u_unescape(COLLATORCANONICAL[0].text, temp, 128);
1907     UnicodeString text;
1908     text.setTo(temp, u_strlen(temp));
1909     u_unescape(COLLATORCANONICAL[0].pattern, temp, 128);
1910     UnicodeString pattern;
1911     pattern.setTo(temp, u_strlen(temp));
1912 
1913     UErrorCode    status  = U_ZERO_ERROR;
1914     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_,
1915                                              NULL, status);
1916     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1917     if (U_FAILURE(status)) {
1918         errln("Error opening string search %s", u_errorName(status));
1919     }
1920     if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[0])) {
1921         delete strsrch;
1922         return;
1923     }
1924 
1925     u_unescape(TESTCOLLATORRULE, temp, 128);
1926     UnicodeString rules;
1927     rules.setTo(temp, u_strlen(temp));
1928     RuleBasedCollator *tailored = new RuleBasedCollator(rules,
1929         getECollationStrength(COLLATORCANONICAL[1].strength),
1930         UCOL_ON, status);
1931 
1932     if (U_FAILURE(status)) {
1933         errln("Error opening rule based collator %s", u_errorName(status));
1934     }
1935 
1936     strsrch->setCollator(tailored, status);
1937     if (U_FAILURE(status) || *(strsrch->getCollator()) != *tailored) {
1938         errln("Error setting rule based collator");
1939     }
1940     strsrch->reset();
1941     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1942     if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[1])) {
1943         delete strsrch;
1944         if (tailored != NULL) {
1945             delete tailored;
1946         }
1947 
1948         return;
1949     }
1950 
1951     strsrch->setCollator(m_en_us_, status);
1952     strsrch->reset();
1953     if (U_FAILURE(status) || *(strsrch->getCollator()) != *m_en_us_) {
1954         errln("Error setting rule based collator");
1955     }
1956     if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[0])) {
1957     }
1958     delete strsrch;
1959     if (tailored != NULL) {
1960         delete tailored;
1961     }
1962 }
1963 
TestPatternCanonical()1964 void StringSearchTest::TestPatternCanonical()
1965 {
1966 
1967     UChar temp[128];
1968 
1969     u_unescape(PATTERNCANONICAL[0].text, temp, 128);
1970     UnicodeString text;
1971     text.setTo(temp, u_strlen(temp));
1972     u_unescape(PATTERNCANONICAL[0].pattern, temp, 128);
1973     UnicodeString pattern;
1974     pattern.setTo(temp, u_strlen(temp));
1975 
1976     m_en_us_->setStrength(
1977                       getECollationStrength(PATTERNCANONICAL[0].strength));
1978 
1979     UErrorCode    status  = U_ZERO_ERROR;
1980     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
1981                                              status);
1982     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
1983     if (U_FAILURE(status)) {
1984         errln("Error opening string search %s", u_errorName(status));
1985         goto ENDTESTPATTERN;
1986     }
1987     if (pattern != strsrch->getPattern()) {
1988         errln("Error setting pattern");
1989     }
1990     if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[0])) {
1991         goto ENDTESTPATTERN;
1992     }
1993 
1994     u_unescape(PATTERNCANONICAL[1].pattern, temp, 128);
1995     pattern.setTo(temp, u_strlen(temp));
1996     strsrch->setPattern(pattern, status);
1997     if (pattern != strsrch->getPattern()) {
1998         errln("Error setting pattern");
1999         goto ENDTESTPATTERN;
2000     }
2001     strsrch->reset();
2002     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2003     if (U_FAILURE(status)) {
2004         errln("Error setting pattern %s", u_errorName(status));
2005     }
2006     if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[1])) {
2007         goto ENDTESTPATTERN;
2008     }
2009 
2010     u_unescape(PATTERNCANONICAL[0].pattern, temp, 128);
2011     pattern.setTo(temp, u_strlen(temp));
2012     strsrch->setPattern(pattern, status);
2013     if (pattern != strsrch->getPattern()) {
2014         errln("Error setting pattern");
2015         goto ENDTESTPATTERN;
2016     }
2017     strsrch->reset();
2018     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2019     if (U_FAILURE(status)) {
2020         errln("Error setting pattern %s", u_errorName(status));
2021     }
2022     if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[0])) {
2023         goto ENDTESTPATTERN;
2024     }
2025 ENDTESTPATTERN:
2026     m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY));
2027     if (strsrch != NULL) {
2028         delete strsrch;
2029     }
2030 }
2031 
TestTextCanonical()2032 void StringSearchTest::TestTextCanonical()
2033 {
2034     UChar temp[128];
2035     u_unescape(TEXTCANONICAL[0].text, temp, 128);
2036     UnicodeString text;
2037     text.setTo(temp, u_strlen(temp));
2038     u_unescape(TEXTCANONICAL[0].pattern, temp, 128);
2039     UnicodeString pattern;
2040     pattern.setTo(temp, u_strlen(temp));
2041 
2042     UErrorCode    status  = U_ZERO_ERROR;
2043     StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
2044                                              status);
2045     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2046 
2047     if (U_FAILURE(status)) {
2048         errln("Error opening string search %s", u_errorName(status));
2049         goto ENDTESTPATTERN;
2050     }
2051     if (text != strsrch->getText()) {
2052         errln("Error setting text");
2053     }
2054     if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[0])) {
2055         goto ENDTESTPATTERN;
2056     }
2057 
2058     u_unescape(TEXTCANONICAL[1].text, temp, 128);
2059     text.setTo(temp, u_strlen(temp));
2060     strsrch->setText(text, status);
2061     if (text != strsrch->getText()) {
2062         errln("Error setting text");
2063         goto ENDTESTPATTERN;
2064     }
2065     if (U_FAILURE(status)) {
2066         errln("Error setting text %s", u_errorName(status));
2067     }
2068     if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[1])) {
2069         goto ENDTESTPATTERN;
2070     }
2071 
2072     u_unescape(TEXTCANONICAL[0].text, temp, 128);
2073     text.setTo(temp, u_strlen(temp));
2074     strsrch->setText(text, status);
2075     if (text != strsrch->getText()) {
2076         errln("Error setting text");
2077         goto ENDTESTPATTERN;
2078     }
2079     if (U_FAILURE(status)) {
2080         errln("Error setting pattern %s", u_errorName(status));
2081     }
2082     if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[0])) {
2083         goto ENDTESTPATTERN;
2084     }
2085 ENDTESTPATTERN:
2086     if (strsrch != NULL) {
2087         delete strsrch;
2088     }
2089 }
2090 
TestCompositeBoundariesCanonical()2091 void StringSearchTest::TestCompositeBoundariesCanonical()
2092 {
2093     int count = 0;
2094     while (COMPOSITEBOUNDARIESCANONICAL[count].text != NULL) {
2095         logln("composite %d", count);
2096         if (!assertCanonicalEqual(&COMPOSITEBOUNDARIESCANONICAL[count])) {
2097             errln("Error at test number %d", count);
2098         }
2099         count ++;
2100     }
2101 }
2102 
TestGetSetOffsetCanonical()2103 void StringSearchTest::TestGetSetOffsetCanonical()
2104 {
2105 
2106     UErrorCode     status  = U_ZERO_ERROR;
2107     UnicodeString  text("text");
2108     UnicodeString  pattern("pattern");
2109     StringSearch  *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
2110                                               status);
2111     Collator *collator = strsrch->getCollator();
2112 
2113     collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
2114 
2115     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2116     /* testing out of bounds error */
2117     strsrch->setOffset(-1, status);
2118     if (U_SUCCESS(status)) {
2119         errln("Error expecting set offset error");
2120     }
2121     strsrch->setOffset(128, status);
2122     if (U_SUCCESS(status)) {
2123         errln("Error expecting set offset error");
2124     }
2125     int   index   = 0;
2126     UChar temp[128];
2127     while (BASICCANONICAL[index].text != NULL) {
2128         SearchData  search      = BASICCANONICAL[index ++];
2129         if (BASICCANONICAL[index].text == NULL) {
2130             /* skip the last one */
2131             break;
2132         }
2133 
2134         u_unescape(search.text, temp, 128);
2135         text.setTo(temp, u_strlen(temp));
2136         u_unescape(search.pattern, temp, 128);
2137         pattern.setTo(temp, u_strlen(temp));
2138 
2139         UErrorCode  status      = U_ZERO_ERROR;
2140         strsrch->setText(text, status);
2141 
2142         strsrch->setPattern(pattern, status);
2143 
2144         int         count       = 0;
2145         int32_t matchindex  = search.offset[count];
2146         while (U_SUCCESS(status) && matchindex >= 0) {
2147             int32_t matchlength = search.size[count];
2148             strsrch->next(status);
2149             if (matchindex != strsrch->getMatchedStart() ||
2150                 matchlength != strsrch->getMatchedLength()) {
2151                 char *str = toCharString(strsrch->getText());
2152                 errln("Text: %s", str);
2153                 str = toCharString(strsrch->getPattern());
2154                 errln("Pattern: %s", str);
2155                 errln("Error match found at %d %d",
2156                       strsrch->getMatchedStart(),
2157                       strsrch->getMatchedLength());
2158                 goto bail;
2159             }
2160             matchindex = search.offset[count + 1] == -1 ? -1 :
2161                          search.offset[count + 2];
2162             if (search.offset[count + 1] != -1) {
2163                 strsrch->setOffset(search.offset[count + 1] + 1, status);
2164                 if (strsrch->getOffset() != search.offset[count + 1] + 1) {
2165                     errln("Error setting offset");
2166                     goto bail;
2167                 }
2168             }
2169 
2170             count += 2;
2171         }
2172         strsrch->next(status);
2173         if (strsrch->getMatchedStart() != USEARCH_DONE) {
2174             char *str = toCharString(strsrch->getText());
2175             errln("Text: %s", str);
2176             str = toCharString(strsrch->getPattern());
2177             errln("Pattern: %s", str);
2178             errln("Error match found at %d %d", strsrch->getMatchedStart(),
2179                    strsrch->getMatchedLength());
2180             goto bail;
2181         }
2182     }
2183 
2184 bail:
2185     collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
2186     delete strsrch;
2187 }
2188 
TestSupplementaryCanonical()2189 void StringSearchTest::TestSupplementaryCanonical()
2190 {
2191     int count = 0;
2192     while (SUPPLEMENTARYCANONICAL[count].text != NULL) {
2193         if (!assertCanonicalEqual(&SUPPLEMENTARYCANONICAL[count])) {
2194             errln("Error at test number %d", count);
2195         }
2196         count ++;
2197     }
2198 }
2199 
TestContractionCanonical()2200 void StringSearchTest::TestContractionCanonical()
2201 {
2202     UChar          temp[128];
2203 
2204     u_unescape(CONTRACTIONRULE, temp, 128);
2205     UnicodeString rules;
2206     rules.setTo(temp, u_strlen(temp));
2207 
2208     UErrorCode         status   = U_ZERO_ERROR;
2209     RuleBasedCollator *collator = new RuleBasedCollator(rules,
2210         getECollationStrength(UCOL_TERTIARY), UCOL_ON, status);
2211     if (U_FAILURE(status)) {
2212         errln("Error opening collator %s", u_errorName(status));
2213     }
2214     UnicodeString text("text");
2215     UnicodeString pattern("pattern");
2216     StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL,
2217                                              status);
2218     strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
2219     if (U_FAILURE(status)) {
2220         errln("Error opening string search %s", u_errorName(status));
2221     }
2222 
2223     int count = 0;
2224     while (CONTRACTIONCANONICAL[count].text != NULL) {
2225         u_unescape(CONTRACTIONCANONICAL[count].text, temp, 128);
2226         text.setTo(temp, u_strlen(temp));
2227         u_unescape(CONTRACTIONCANONICAL[count].pattern, temp, 128);
2228         pattern.setTo(temp, u_strlen(temp));
2229         strsrch->setText(text, status);
2230         strsrch->setPattern(pattern, status);
2231         if (!assertEqualWithStringSearch(strsrch,
2232                                              &CONTRACTIONCANONICAL[count])) {
2233             errln("Error at test number %d", count);
2234         }
2235         count ++;
2236     }
2237     delete strsrch;
2238     delete collator;
2239 }
2240 
TestUClassID()2241 void StringSearchTest::TestUClassID()
2242 {
2243     char id = *((char *)StringSearch::getStaticClassID());
2244     if (id != 0) {
2245         errln("Static class id for StringSearch should be 0");
2246     }
2247     UErrorCode     status    = U_ZERO_ERROR;
2248     UnicodeString  text("text");
2249     UnicodeString  pattern("pattern");
2250     StringSearch  *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
2251                                               status);
2252     id = *((char *)strsrch->getDynamicClassID());
2253     if (id != 0) {
2254         errln("Dynamic class id for StringSearch should be 0");
2255     }
2256     delete strsrch;
2257 }
2258 
2259 class TestSearch : public SearchIterator
2260 {
2261 public:
2262     TestSearch(const TestSearch &obj);
2263     TestSearch(const UnicodeString &text,
2264                BreakIterator *breakiter,
2265                const UnicodeString &pattern);
2266     ~TestSearch();
2267 
2268     void        setOffset(int32_t position, UErrorCode &status);
2269     int32_t     getOffset() const;
2270     SearchIterator* safeClone() const;
2271 
2272 
2273     /**
2274      * ICU "poor man's RTTI", returns a UClassID for the actual class.
2275      *
2276      * @draft ICU 2.2
2277      */
getDynamicClassID() const2278     virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
2279 
2280     /**
2281      * ICU "poor man's RTTI", returns a UClassID for this class.
2282      *
2283      * @draft ICU 2.2
2284      */
getStaticClassID()2285     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
2286 
2287     UBool operator!=(const TestSearch &that) const;
2288 
2289     UnicodeString m_pattern_;
2290 
2291 protected:
2292     int32_t      handleNext(int32_t position, UErrorCode &status);
2293     int32_t      handlePrev(int32_t position, UErrorCode &status);
2294     TestSearch & operator=(const TestSearch &that);
2295 
2296 private:
2297 
2298     /**
2299      * The address of this static class variable serves as this class's ID
2300      * for ICU "poor man's RTTI".
2301      */
2302     static const char fgClassID;
2303     uint32_t m_offset_;
2304 };
2305 
2306 const char TestSearch::fgClassID=0;
2307 
TestSearch(const TestSearch & obj)2308 TestSearch::TestSearch(const TestSearch &obj) : SearchIterator(obj)
2309 {
2310     m_offset_ = obj.m_offset_;
2311     m_pattern_ = obj.m_pattern_;
2312 }
2313 
TestSearch(const UnicodeString & text,BreakIterator * breakiter,const UnicodeString & pattern)2314 TestSearch::TestSearch(const UnicodeString &text,
2315                        BreakIterator *breakiter,
2316                        const UnicodeString &pattern) : SearchIterator()
2317 {
2318     m_breakiterator_ = breakiter;
2319     m_pattern_ = pattern;
2320     m_text_ = text;
2321     m_offset_ = 0;
2322     m_pattern_ = pattern;
2323 }
2324 
~TestSearch()2325 TestSearch::~TestSearch()
2326 {
2327 }
2328 
2329 
setOffset(int32_t position,UErrorCode & status)2330 void TestSearch::setOffset(int32_t position, UErrorCode &status)
2331 {
2332     if (position >= 0 && position <= m_text_.length()) {
2333         m_offset_ = position;
2334     }
2335     else {
2336         status = U_INDEX_OUTOFBOUNDS_ERROR;
2337     }
2338 }
2339 
getOffset() const2340 int32_t TestSearch::getOffset() const
2341 {
2342     return m_offset_;
2343 }
2344 
safeClone() const2345 SearchIterator * TestSearch::safeClone() const
2346 {
2347     return new TestSearch(m_text_, m_breakiterator_, m_pattern_);
2348 }
2349 
operator !=(const TestSearch & that) const2350 UBool TestSearch::operator!=(const TestSearch &that) const
2351 {
2352     if (SearchIterator::operator !=(that)) {
2353         return FALSE;
2354     }
2355     return m_offset_ != that.m_offset_ || m_pattern_ != that.m_pattern_;
2356 }
2357 
handleNext(int32_t start,UErrorCode & status)2358 int32_t TestSearch::handleNext(int32_t start, UErrorCode &status)
2359 {
2360   if(U_SUCCESS(status)) {
2361     int match = m_text_.indexOf(m_pattern_, start);
2362     if (match < 0) {
2363         m_offset_ = m_text_.length();
2364         setMatchStart(m_offset_);
2365         setMatchLength(0);
2366         return USEARCH_DONE;
2367     }
2368     setMatchStart(match);
2369     m_offset_ = match;
2370     setMatchLength(m_pattern_.length());
2371     return match;
2372   } else {
2373     return USEARCH_DONE;
2374   }
2375 }
2376 
handlePrev(int32_t start,UErrorCode & status)2377 int32_t TestSearch::handlePrev(int32_t start, UErrorCode &status)
2378 {
2379   if(U_SUCCESS(status)) {
2380     int match = m_text_.lastIndexOf(m_pattern_, 0, start);
2381     if (match < 0) {
2382         m_offset_ = 0;
2383         setMatchStart(m_offset_);
2384         setMatchLength(0);
2385         return USEARCH_DONE;
2386     }
2387     setMatchStart(match);
2388     m_offset_ = match;
2389     setMatchLength(m_pattern_.length());
2390     return match;
2391   } else {
2392     return USEARCH_DONE;
2393   }
2394 }
2395 
operator =(const TestSearch & that)2396 TestSearch & TestSearch::operator=(const TestSearch &that)
2397 {
2398     SearchIterator::operator=(that);
2399     m_offset_ = that.m_offset_;
2400     m_pattern_ = that.m_pattern_;
2401     return *this;
2402 }
2403 
TestSubclass()2404 void StringSearchTest::TestSubclass()
2405 {
2406     UnicodeString text("abc abcd abc");
2407     UnicodeString pattern("abc");
2408     TestSearch search(text, NULL, pattern);
2409     TestSearch search2(search);
2410     int expected[] = {0, 4, 9};
2411     UErrorCode status = U_ZERO_ERROR;
2412     int i;
2413     StringCharacterIterator chariter(text);
2414 
2415     search.setText(text, status);
2416     if (search.getText() != search2.getText()) {
2417         errln("Error setting text");
2418     }
2419 
2420     search.setText(chariter, status);
2421     if (search.getText() != search2.getText()) {
2422         errln("Error setting text");
2423     }
2424 
2425     search.reset();
2426     // comparing constructors
2427 
2428     for (i = 0; i < UPRV_LENGTHOF(expected); i ++) {
2429         if (search.next(status) != expected[i]) {
2430             errln("Error getting next match");
2431         }
2432         if (search.getMatchedLength() != search.m_pattern_.length()) {
2433             errln("Error getting next match length");
2434         }
2435     }
2436     if (search.next(status) != USEARCH_DONE) {
2437         errln("Error should have reached the end of the iteration");
2438     }
2439     for (i = UPRV_LENGTHOF(expected) - 1; i >= 0; i --) {
2440         if (search.previous(status) != expected[i]) {
2441             errln("Error getting previous match");
2442         }
2443         if (search.getMatchedLength() != search.m_pattern_.length()) {
2444             errln("Error getting previous match length");
2445         }
2446     }
2447     if (search.previous(status) != USEARCH_DONE) {
2448         errln("Error should have reached the start of the iteration");
2449     }
2450 }
2451 
2452 class StubSearchIterator:public SearchIterator{
2453 public:
StubSearchIterator()2454     StubSearchIterator(){}
setOffset(int32_t,UErrorCode &)2455     virtual void setOffset(int32_t , UErrorCode &) {}
getOffset(void) const2456     virtual int32_t getOffset(void) const {return 0;}
safeClone(void) const2457     virtual SearchIterator* safeClone(void) const {return NULL;}
handleNext(int32_t,UErrorCode &)2458     virtual int32_t handleNext(int32_t , UErrorCode &){return 0;}
handlePrev(int32_t,UErrorCode &)2459     virtual int32_t handlePrev(int32_t , UErrorCode &) {return 0;}
getDynamicClassID() const2460     virtual UClassID getDynamicClassID() const {
2461         static char classID = 0;
2462         return (UClassID)&classID;
2463     }
2464 };
2465 
TestCoverage()2466 void StringSearchTest::TestCoverage(){
2467     StubSearchIterator stub1, stub2;
2468     UErrorCode status = U_ZERO_ERROR;
2469 
2470     if (stub1 != stub2){
2471         errln("new StubSearchIterator should be equal");
2472     }
2473 
2474     stub2.setText(UnicodeString("ABC"), status);
2475     if (U_FAILURE(status)) {
2476         errln("Error: SearchIterator::SetText");
2477     }
2478 
2479     stub1 = stub2;
2480     if (stub1 != stub2){
2481         errln("SearchIterator::operator =  assigned object should be equal");
2482     }
2483 }
2484 
2485 #endif /* !UCONFIG_NO_BREAK_ITERATION */
2486 
2487 #endif /* #if !UCONFIG_NO_COLLATION */
2488