1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2014, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_COLLATION
10 
11 #include "unicode/coll.h"
12 #include "unicode/localpointer.h"
13 #include "unicode/tblcoll.h"
14 #include "unicode/unistr.h"
15 #include "unicode/sortkey.h"
16 #include "regcoll.h"
17 #include "sfwdchit.h"
18 #include "testutil.h"
19 #include "cmemory.h"
20 
21 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
22 
CollationRegressionTest()23 CollationRegressionTest::CollationRegressionTest()
24 {
25     UErrorCode status = U_ZERO_ERROR;
26 
27     en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
28     if(U_FAILURE(status)) {
29       delete en_us;
30       en_us = 0;
31       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
32       return;
33     }
34 }
35 
~CollationRegressionTest()36 CollationRegressionTest::~CollationRegressionTest()
37 {
38     delete en_us;
39 }
40 
41 
42     // @bug 4048446
43 //
44 // CollationElementIterator.reset() doesn't work
45 //
Test4048446()46 void CollationRegressionTest::Test4048446(/* char* par */)
47 {
48     const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
49     const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
50     CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
51     CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
52     UErrorCode status = U_ZERO_ERROR;
53 
54     if (i1 == NULL|| i2 == NULL)
55     {
56         errln("Could not create CollationElementIterator's");
57         delete i1;
58         delete i2;
59         return;
60     }
61 
62     while (i1->next(status) != CollationElementIterator::NULLORDER)
63     {
64         if (U_FAILURE(status))
65         {
66             errln("error calling next()");
67 
68             delete i1;
69             delete i2;
70             return;
71         }
72     }
73 
74     i1->reset();
75 
76     assertEqual(*i1, *i2);
77 
78     delete i1;
79     delete i2;
80 }
81 
82 // @bug 4051866
83 //
84 // Collator -> rules -> Collator round-trip broken for expanding characters
85 //
Test4051866()86 void CollationRegressionTest::Test4051866(/* char* par */)
87 {
88     UnicodeString rules;
89     UErrorCode status = U_ZERO_ERROR;
90 
91     rules += "&n < o ";
92     rules += "& oe ,o";
93     rules += (UChar)0x3080;
94     rules += "& oe ,";
95     rules += (UChar)0x1530;
96     rules += " ,O";
97     rules += "& OE ,O";
98     rules += (UChar)0x3080;
99     rules += "& OE ,";
100     rules += (UChar)0x1520;
101     rules += "< p ,P";
102 
103     // Build a collator containing expanding characters
104     LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), status);
105     if (U_FAILURE(status)) {
106         errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status));
107         return;
108     }
109 
110     // Build another using the rules from  the first
111     LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status), status);
112     if (U_FAILURE(status)) {
113         errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_errorName(status));
114         return;
115     }
116 
117     // Make sure they're the same
118     if (!(c1->getRules() == c2->getRules()))
119     {
120         errln("Rules are not equal");
121     }
122 }
123 
124 // @bug 4053636
125 //
126 // Collator thinks "black-bird" == "black"
127 //
Test4053636()128 void CollationRegressionTest::Test4053636(/* char* par */)
129 {
130     if (en_us->equals("black_bird", "black"))
131     {
132         errln("black-bird == black");
133     }
134 }
135 
136 // @bug 4054238
137 //
138 // CollationElementIterator will not work correctly if the associated
139 // Collator object's mode is changed
140 //
Test4054238()141 void CollationRegressionTest::Test4054238(/* char* par */)
142 {
143     const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
144     const UnicodeString test3(chars3);
145     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
146 
147     // NOTE: The Java code uses en_us to create the CollationElementIterators
148     // but I'm pretty sure that's wrong, so I've changed this to use c.
149     UErrorCode status = U_ZERO_ERROR;
150     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
151     CollationElementIterator *i1 = c->createCollationElementIterator(test3);
152     delete i1;
153     delete c;
154 }
155 
156 // @bug 4054734
157 //
158 // Collator::IDENTICAL documented but not implemented
159 //
Test4054734()160 void CollationRegressionTest::Test4054734(/* char* par */)
161 {
162     /*
163         Here's the original Java:
164 
165         String[] decomp = {
166             "\u0001",   "<",    "\u0002",
167             "\u0001",   "=",    "\u0001",
168             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
169             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
170         };
171 
172         String[] nodecomp = {
173             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
174         };
175     */
176 
177     static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
178     {
179         {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
180         {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
181         {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
182         {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
183     };
184 
185 
186     UErrorCode status = U_ZERO_ERROR;
187     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
188 
189     c->setStrength(Collator::IDENTICAL);
190 
191     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
192     compareArray(*c, decomp, ARRAY_LENGTH(decomp));
193 
194     delete c;
195 }
196 
197 // @bug 4054736
198 //
199 // Full Decomposition mode not implemented
200 //
Test4054736()201 void CollationRegressionTest::Test4054736(/* char* par */)
202 {
203     UErrorCode status = U_ZERO_ERROR;
204     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
205 
206     c->setStrength(Collator::SECONDARY);
207     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
208 
209     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
210     {
211         {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
212     };
213 
214     compareArray(*c, tests, ARRAY_LENGTH(tests));
215 
216     delete c;
217 }
218 
219 // @bug 4058613
220 //
221 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
222 //
Test4058613()223 void CollationRegressionTest::Test4058613(/* char* par */)
224 {
225     // Creating a default collator doesn't work when Korean is the default
226     // locale
227 
228     Locale oldDefault = Locale::getDefault();
229     UErrorCode status = U_ZERO_ERROR;
230 
231     Locale::setDefault(Locale::getKorean(), status);
232 
233     if (U_FAILURE(status))
234     {
235         errln("Could not set default locale to Locale::KOREAN");
236         return;
237     }
238 
239     Collator *c = NULL;
240 
241     c = Collator::createInstance("en_US", status);
242 
243     if (c == NULL || U_FAILURE(status))
244     {
245         errln("Could not create a Korean collator");
246         Locale::setDefault(oldDefault, status);
247         delete c;
248         return;
249     }
250 
251     // Since the fix to this bug was to turn off decomposition for Korean collators,
252     // ensure that's what we got
253     if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
254     {
255       errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
256     }
257 
258     delete c;
259 
260     Locale::setDefault(oldDefault, status);
261 }
262 
263 // @bug 4059820
264 //
265 // RuleBasedCollator.getRules does not return the exact pattern as input
266 // for expanding character sequences
267 //
Test4059820()268 void CollationRegressionTest::Test4059820(/* char* par */)
269 {
270     UErrorCode status = U_ZERO_ERROR;
271 
272     RuleBasedCollator *c = NULL;
273     UnicodeString rules = "&9 < a < b , c/a < d < z";
274 
275     c = new RuleBasedCollator(rules, status);
276 
277     if (c == NULL || U_FAILURE(status))
278     {
279         errln("Failure building a collator.");
280         delete c;
281         return;
282     }
283 
284     if ( c->getRules().indexOf("c/a") == -1)
285     {
286         errln("returned rules do not contain 'c/a'");
287     }
288 
289     delete c;
290 }
291 
292 // @bug 4060154
293 //
294 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
295 //
Test4060154()296 void CollationRegressionTest::Test4060154(/* char* par */)
297 {
298     UErrorCode status = U_ZERO_ERROR;
299     UnicodeString rules;
300 
301     rules += "&f < g, G < h, H < i, I < j, J";
302     rules +=  " & H < ";
303     rules += (UChar)0x0131;
304     rules += ", ";
305     rules += (UChar)0x0130;
306     rules += ", i, I";
307 
308     RuleBasedCollator *c = NULL;
309 
310     c = new RuleBasedCollator(rules, status);
311 
312     if (c == NULL || U_FAILURE(status))
313     {
314         errln("failure building collator.");
315         delete c;
316         return;
317     }
318 
319     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
320 
321  /*
322     String[] tertiary = {
323         "A",        "<",    "B",
324         "H",        "<",    "\u0131",
325         "H",        "<",    "I",
326         "\u0131",   "<",    "\u0130",
327         "\u0130",   "<",    "i",
328         "\u0130",   ">",    "H",
329     };
330 */
331 
332     static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
333     {
334         {0x41, 0},    {0x3c, 0}, {0x42, 0},
335         {0x48, 0},    {0x3c, 0}, {0x0131, 0},
336         {0x48, 0},    {0x3c, 0}, {0x49, 0},
337         {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
338         {0x0130, 0}, {0x3c, 0}, {0x69, 0},
339         {0x0130, 0}, {0x3e, 0}, {0x48, 0}
340     };
341 
342     c->setStrength(Collator::TERTIARY);
343     compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
344 
345     /*
346     String[] secondary = {
347         "H",        "<",    "I",
348         "\u0131",   "=",    "\u0130",
349     };
350 */
351     static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
352     {
353         {0x48, 0},    {0x3c, 0}, {0x49, 0},
354         {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
355     };
356 
357     c->setStrength(Collator::PRIMARY);
358     compareArray(*c, secondary, ARRAY_LENGTH(secondary));
359 
360     delete c;
361 }
362 
363 // @bug 4062418
364 //
365 // Secondary/Tertiary comparison incorrect in French Secondary
366 //
Test4062418()367 void CollationRegressionTest::Test4062418(/* char* par */)
368 {
369     UErrorCode status = U_ZERO_ERROR;
370 
371     RuleBasedCollator *c = NULL;
372 
373     c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
374 
375     if (c == NULL || U_FAILURE(status))
376     {
377         errln("Failed to create collator for Locale::getCanadaFrench()");
378         delete c;
379         return;
380     }
381 
382     c->setStrength(Collator::SECONDARY);
383 
384 /*
385     String[] tests = {
386             "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
387     };
388 */
389     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
390     {
391         {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
392     };
393 
394     compareArray(*c, tests, ARRAY_LENGTH(tests));
395 
396     delete c;
397 }
398 
399 // @bug 4065540
400 //
401 // Collator::compare() method broken if either string contains spaces
402 //
Test4065540()403 void CollationRegressionTest::Test4065540(/* char* par */)
404 {
405     if (en_us->compare("abcd e", "abcd f") == 0)
406     {
407         errln("'abcd e' == 'abcd f'");
408     }
409 }
410 
411 // @bug 4066189
412 //
413 // Unicode characters need to be recursively decomposed to get the
414 // correct result. For example,
415 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
416 //
Test4066189()417 void CollationRegressionTest::Test4066189(/* char* par */)
418 {
419     static const UChar chars1[] = {0x1EB1, 0};
420     static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
421     const UnicodeString test1(chars1);
422     const UnicodeString test2(chars2);
423     UErrorCode status = U_ZERO_ERROR;
424 
425     // NOTE: The java code used en_us to create the
426     // CollationElementIterator's. I'm pretty sure that
427     // was wrong, so I've change the code to use c1 and c2
428     RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
429     c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
430     CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
431 
432     RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
433     c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
434     CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
435 
436     assertEqual(*i1, *i2);
437 
438     delete i2;
439     delete c2;
440     delete i1;
441     delete c1;
442 }
443 
444 // @bug 4066696
445 //
446 // French secondary collation checking at the end of compare iteration fails
447 //
Test4066696()448 void CollationRegressionTest::Test4066696(/* char* par */)
449 {
450     UErrorCode status = U_ZERO_ERROR;
451     RuleBasedCollator *c = NULL;
452 
453     c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
454 
455     if (c == NULL || U_FAILURE(status))
456     {
457         errln("Failure creating collator for Locale::getCanadaFrench()");
458         delete c;
459         return;
460     }
461 
462     c->setStrength(Collator::SECONDARY);
463 
464 /*
465     String[] tests = {
466         "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
467     };
468 
469   should be:
470 
471     String[] tests = {
472         "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
473     };
474 
475 */
476 
477     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
478     {
479         {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
480     };
481 
482     compareArray(*c, tests, ARRAY_LENGTH(tests));
483 
484     delete c;
485 }
486 
487 // @bug 4076676
488 //
489 // Bad canonicalization of same-class combining characters
490 //
Test4076676()491 void CollationRegressionTest::Test4076676(/* char* par */)
492 {
493     // These combining characters are all in the same class, so they should not
494     // be reordered, and they should compare as unequal.
495     static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
496     static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
497 
498     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
499     c->setStrength(Collator::TERTIARY);
500 
501     if (c->compare(s1,s2) == 0)
502     {
503         errln("Same-class combining chars were reordered");
504     }
505 
506     delete c;
507 }
508 
509 // @bug 4079231
510 //
511 // RuleBasedCollator::operator==(NULL) throws NullPointerException
512 //
Test4079231()513 void CollationRegressionTest::Test4079231(/* char* par */)
514 {
515     // I don't think there's any way to write this test
516     // in C++. The following is equivalent to the Java,
517     // but doesn't compile 'cause NULL can't be converted
518     // to Collator&
519     //
520     // if (en_us->operator==(NULL))
521     // {
522     //     errln("en_us->operator==(NULL) returned TRUE");
523     // }
524 
525  /*
526    try {
527         if (en_us->equals(null)) {
528             errln("en_us->equals(null) returned true");
529         }
530     }
531     catch (Exception e) {
532         errln("en_us->equals(null) threw " + e.toString());
533     }
534 */
535 }
536 
537 // @bug 4078588
538 //
539 // RuleBasedCollator breaks on "< a < bb" rule
540 //
Test4078588()541 void CollationRegressionTest::Test4078588(/* char *par */)
542 {
543     UErrorCode status = U_ZERO_ERROR;
544     RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);
545 
546     if (rbc == NULL || U_FAILURE(status))
547     {
548         errln("Failed to create RuleBasedCollator.");
549         delete rbc;
550         return;
551     }
552 
553     Collator::EComparisonResult result = rbc->compare("a","bb");
554 
555     if (result != Collator::LESS)
556     {
557         errln((UnicodeString)"Compare(a,bb) returned " + (int)result
558             + (UnicodeString)"; expected -1");
559     }
560 
561     delete rbc;
562 }
563 
564 // @bug 4081866
565 //
566 // Combining characters in different classes not reordered properly.
567 //
Test4081866()568 void CollationRegressionTest::Test4081866(/* char* par */)
569 {
570     // These combining characters are all in different classes,
571     // so they should be reordered and the strings should compare as equal.
572     static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
573     static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
574 
575     UErrorCode status = U_ZERO_ERROR;
576     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
577     c->setStrength(Collator::TERTIARY);
578 
579     // Now that the default collators are set to NO_DECOMPOSITION
580     // (as a result of fixing bug 4114077), we must set it explicitly
581     // when we're testing reordering behavior.  -- lwerner, 5/5/98
582     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
583 
584     if (c->compare(s1,s2) != 0)
585     {
586         errln("Combining chars were not reordered");
587     }
588 
589     delete c;
590 }
591 
592 // @bug 4087241
593 //
594 // string comparison errors in Scandinavian collators
595 //
Test4087241()596 void CollationRegressionTest::Test4087241(/* char* par */)
597 {
598     UErrorCode status = U_ZERO_ERROR;
599     Locale da_DK("da", "DK");
600     RuleBasedCollator *c = NULL;
601 
602     c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
603 
604     if (c == NULL || U_FAILURE(status))
605     {
606         errln("Failed to create collator for da_DK locale");
607         delete c;
608         return;
609     }
610 
611     c->setStrength(Collator::SECONDARY);
612 
613     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
614     {
615         {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
616         {0x61, 0x0308, 0},  {0x3c, 0}, {0x61, 0x030A, 0},      // a-umlaut < a-ring
617         {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
618     };
619 
620     compareArray(*c, tests, ARRAY_LENGTH(tests));
621 
622     delete c;
623 }
624 
625 // @bug 4087243
626 //
627 // CollationKey takes ignorable strings into account when it shouldn't
628 //
Test4087243()629 void CollationRegressionTest::Test4087243(/* char* par */)
630 {
631     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
632     c->setStrength(Collator::TERTIARY);
633 
634     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
635     {
636         {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
637     };
638 
639     compareArray(*c, tests, ARRAY_LENGTH(tests));
640 
641     delete c;
642 }
643 
644 // @bug 4092260
645 //
646 // Mu/micro conflict
647 // Micro symbol and greek lowercase letter Mu should sort identically
648 //
Test4092260()649 void CollationRegressionTest::Test4092260(/* char* par */)
650 {
651     UErrorCode status = U_ZERO_ERROR;
652     Locale el("el", "");
653     Collator *c = NULL;
654 
655     c = Collator::createInstance(el, status);
656 
657     if (c == NULL || U_FAILURE(status))
658     {
659         errln("Failed to create collator for el locale.");
660         delete c;
661         return;
662     }
663 
664     // These now have tertiary differences in UCA
665     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
666 
667     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
668     {
669         {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
670     };
671 
672     compareArray(*c, tests, ARRAY_LENGTH(tests));
673 
674     delete c;
675 }
676 
677 // @bug 4095316
678 //
Test4095316()679 void CollationRegressionTest::Test4095316(/* char* par */)
680 {
681     UErrorCode status = U_ZERO_ERROR;
682     Locale el_GR("el", "GR");
683     Collator *c = Collator::createInstance(el_GR, status);
684 
685     if (c == NULL || U_FAILURE(status))
686     {
687         errln("Failed to create collator for el_GR locale");
688         delete c;
689         return;
690     }
691     // These now have tertiary differences in UCA
692     //c->setStrength(Collator::TERTIARY);
693     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
694 
695     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
696     {
697         {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
698     };
699 
700     compareArray(*c, tests, ARRAY_LENGTH(tests));
701 
702     delete c;
703 }
704 
705 // @bug 4101940
706 //
Test4101940()707 void CollationRegressionTest::Test4101940(/* char* par */)
708 {
709     UErrorCode status = U_ZERO_ERROR;
710     RuleBasedCollator *c = NULL;
711     UnicodeString rules = "&9 < a < b";
712     UnicodeString nothing = "";
713 
714     c = new RuleBasedCollator(rules, status);
715 
716     if (c == NULL || U_FAILURE(status))
717     {
718         errln("Failed to create RuleBasedCollator");
719         delete c;
720         return;
721     }
722 
723     CollationElementIterator *i = c->createCollationElementIterator(nothing);
724     i->reset();
725 
726     if (i->next(status) != CollationElementIterator::NULLORDER)
727     {
728         errln("next did not return NULLORDER");
729     }
730 
731     delete i;
732     delete c;
733 }
734 
735 // @bug 4103436
736 //
737 // Collator::compare not handling spaces properly
738 //
Test4103436()739 void CollationRegressionTest::Test4103436(/* char* par */)
740 {
741     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
742     c->setStrength(Collator::TERTIARY);
743 
744     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
745     {
746         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
747         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
748     };
749 
750     compareArray(*c, tests, ARRAY_LENGTH(tests));
751 
752     delete c;
753 }
754 
755 // @bug 4114076
756 //
757 // Collation not Unicode conformant with Hangul syllables
758 //
Test4114076()759 void CollationRegressionTest::Test4114076(/* char* par */)
760 {
761     UErrorCode status = U_ZERO_ERROR;
762     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
763     c->setStrength(Collator::TERTIARY);
764 
765     //
766     // With Canonical decomposition, Hangul syllables should get decomposed
767     // into Jamo, but Jamo characters should not be decomposed into
768     // conjoining Jamo
769     //
770     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
771     {
772         {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
773     };
774 
775     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
776     compareArray(*c, test1, ARRAY_LENGTH(test1));
777 
778     // From UTR #15:
779     // *In earlier versions of Unicode, jamo characters like ksf
780     //  had compatibility mappings to kf + sf. These mappings were
781     //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
782     // That is, the following test is obsolete as of 2.1.9
783 
784 //obsolete-    // With Full decomposition, it should go all the way down to
785 //obsolete-    // conjoining Jamo characters.
786 //obsolete-    //
787 //obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
788 //obsolete-    {
789 //obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
790 //obsolete-    };
791 //obsolete-
792 //obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
793 //obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));
794 
795     delete c;
796 }
797 
798 
799 // @bug 4124632
800 //
801 // Collator::getCollationKey was hanging on certain character sequences
802 //
Test4124632()803 void CollationRegressionTest::Test4124632(/* char* par */)
804 {
805     UErrorCode status = U_ZERO_ERROR;
806     Collator *coll = NULL;
807 
808     coll = Collator::createInstance(Locale::getJapan(), status);
809 
810     if (coll == NULL || U_FAILURE(status))
811     {
812         errln("Failed to create collator for Locale::JAPAN");
813         delete coll;
814         return;
815     }
816 
817     static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
818     CollationKey key;
819 
820     coll->getCollationKey(test, key, status);
821 
822     if (key.isBogus() || U_FAILURE(status))
823     {
824         errln("CollationKey creation failed.");
825     }
826 
827     delete coll;
828 }
829 
830 // @bug 4132736
831 //
832 // sort order of french words with multiple accents has errors
833 //
Test4132736()834 void CollationRegressionTest::Test4132736(/* char* par */)
835 {
836     UErrorCode status = U_ZERO_ERROR;
837 
838     Collator *c = NULL;
839 
840     c = Collator::createInstance(Locale::getCanadaFrench(), status);
841     c->setStrength(Collator::TERTIARY);
842 
843     if (c == NULL || U_FAILURE(status))
844     {
845         errln("Failed to create a collator for Locale::getCanadaFrench()");
846         delete c;
847         return;
848     }
849 
850     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
851     {
852         {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
853         {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
854     };
855 
856     compareArray(*c, test1, ARRAY_LENGTH(test1));
857 
858     delete c;
859 }
860 
861 // @bug 4133509
862 //
863 // The sorting using java.text.CollationKey is not in the exact order
864 //
Test4133509()865 void CollationRegressionTest::Test4133509(/* char* par */)
866 {
867     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
868     {
869         {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
870         {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
871         {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
872     };
873 
874     compareArray(*en_us, test1, ARRAY_LENGTH(test1));
875 }
876 
877 // @bug 4114077
878 //
879 // Collation with decomposition off doesn't work for Europe
880 //
Test4114077()881 void CollationRegressionTest::Test4114077(/* char* par */)
882 {
883     // Ensure that we get the same results with decomposition off
884     // as we do with it on....
885 
886     UErrorCode status = U_ZERO_ERROR;
887     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
888     c->setStrength(Collator::TERTIARY);
889 
890     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
891     {
892         {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
893         {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
894         {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
895         {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
896                                                 //   -> a, ring, acute
897         {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
898     };
899 
900     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
901     compareArray(*c, test1, ARRAY_LENGTH(test1));
902 
903     static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
904     {
905         {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
906     };
907 
908     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
909     compareArray(*c, test2, ARRAY_LENGTH(test2));
910 
911     delete c;
912 }
913 
914 // @bug 4141640
915 //
916 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
917 //
Test4141640()918 void CollationRegressionTest::Test4141640(/* char* par */)
919 {
920     //
921     // Rather than just creating a Swedish collator, we might as well
922     // try to instantiate one for every locale available on the system
923     // in order to prevent this sort of bug from cropping up in the future
924     //
925     UErrorCode status = U_ZERO_ERROR;
926     int32_t i, localeCount;
927     const Locale *locales = Locale::getAvailableLocales(localeCount);
928 
929     for (i = 0; i < localeCount; i += 1)
930     {
931         Collator *c = NULL;
932 
933         status = U_ZERO_ERROR;
934         c = Collator::createInstance(locales[i], status);
935 
936         if (c == NULL || U_FAILURE(status))
937         {
938             UnicodeString msg, localeName;
939 
940             msg += "Could not create collator for locale ";
941             msg += locales[i].getName();
942 
943             errln(msg);
944         }
945 
946         delete c;
947     }
948 }
949 
950 // @bug 4139572
951 //
952 // getCollationKey throws exception for spanish text
953 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
954 //
Test4139572()955 void CollationRegressionTest::Test4139572(/* char* par */)
956 {
957     //
958     // Code pasted straight from the bug report
959     // (and then translated to C++ ;-)
960     //
961     // create spanish locale and collator
962     UErrorCode status = U_ZERO_ERROR;
963     Locale l("es", "es");
964     Collator *col = NULL;
965 
966     col = Collator::createInstance(l, status);
967 
968     if (col == NULL || U_FAILURE(status))
969     {
970         errln("Failed to create a collator for es_es locale.");
971         delete col;
972         return;
973     }
974 
975     CollationKey key;
976 
977     // this spanish phrase kills it!
978     col->getCollationKey("Nombre De Objeto", key, status);
979 
980     if (key.isBogus() || U_FAILURE(status))
981     {
982         errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
983     }
984 
985     delete col;
986 }
987 /* HSYS : RuleBasedCollator::compare() performance enhancements
988           compare() does not create CollationElementIterator() anymore.*/
989 
990 class My4146160Collator : public RuleBasedCollator
991 {
992 public:
993     My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
994     ~My4146160Collator();
995 
996     CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
997 
998     CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
999 
1000     static int32_t count;
1001 };
1002 
1003 int32_t My4146160Collator::count = 0;
1004 
My4146160Collator(RuleBasedCollator & rbc,UErrorCode & status)1005 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
1006   : RuleBasedCollator(rbc.getRules(), status)
1007 {
1008 }
1009 
~My4146160Collator()1010 My4146160Collator::~My4146160Collator()
1011 {
1012 }
1013 
createCollationElementIterator(const UnicodeString & text) const1014 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
1015 {
1016     count += 1;
1017     return RuleBasedCollator::createCollationElementIterator(text);
1018 }
1019 
createCollationElementIterator(const CharacterIterator & text) const1020 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
1021 {
1022     count += 1;
1023     return RuleBasedCollator::createCollationElementIterator(text);
1024 }
1025 
1026 // @bug 4146160
1027 //
1028 // RuleBasedCollator doesn't use createCollationElementIterator internally
1029 //
Test4146160()1030 void CollationRegressionTest::Test4146160(/* char* par */)
1031 {
1032 #if 0
1033     //
1034     // Use a custom collator class whose createCollationElementIterator
1035     // methods increment a count....
1036     //
1037     UErrorCode status = U_ZERO_ERROR;
1038     CollationKey key;
1039 
1040     My4146160Collator::count = 0;
1041     My4146160Collator *mc = NULL;
1042 
1043     mc = new My4146160Collator(*en_us, status);
1044 
1045     if (mc == NULL || U_FAILURE(status))
1046     {
1047         errln("Failed to create a My4146160Collator.");
1048         delete mc;
1049         return;
1050     }
1051 
1052     mc->getCollationKey("1", key, status);
1053 
1054     if (key.isBogus() || U_FAILURE(status))
1055     {
1056         errln("Failure to get a CollationKey from a My4146160Collator.");
1057         delete mc;
1058         return;
1059     }
1060 
1061     if (My4146160Collator::count < 1)
1062     {
1063         errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1064     }
1065 
1066     My4146160Collator::count = 0;
1067     mc->compare("1", "2");
1068 
1069     if (My4146160Collator::count < 1)
1070     {
1071         errln("My4146160Collator::createtCollationElementIterator not called for compare");
1072     }
1073 
1074     delete mc;
1075 #endif
1076 }
1077 
Test4179216()1078 void CollationRegressionTest::Test4179216() {
1079     // you can position a CollationElementIterator in the middle of
1080     // a contracting character sequence, yielding a bogus collation
1081     // element
1082     IcuTestErrorCode errorCode(*this, "Test4179216");
1083     RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
1084     UnicodeString testText = "church church catcatcher runcrunchynchy";
1085     CollationElementIterator *iter = coll.createCollationElementIterator(testText);
1086 
1087     // test that the "ch" combination works properly
1088     iter->setOffset(4, errorCode);
1089     int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1090 
1091     iter->reset();
1092     int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1093 
1094     iter->setOffset(5, errorCode);
1095     int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1096 
1097     // Compares and prints only 16-bit primary weights.
1098     if (elt4 != elt0 || elt5 != elt0) {
1099         errln("The collation elements at positions 0 (0x%04x), "
1100                 "4 (0x%04x), and 5 (0x%04x) don't match.",
1101                 elt0, elt4, elt5);
1102     }
1103 
1104     // test that the "cat" combination works properly
1105     iter->setOffset(14, errorCode);
1106     int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1107 
1108     iter->setOffset(15, errorCode);
1109     int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1110 
1111     iter->setOffset(16, errorCode);
1112     int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1113 
1114     iter->setOffset(17, errorCode);
1115     int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1116 
1117     iter->setOffset(18, errorCode);
1118     int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1119 
1120     iter->setOffset(19, errorCode);
1121     int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1122 
1123     // Compares and prints only 16-bit primary weights.
1124     if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
1125             || elt14 != elt18 || elt14 != elt19) {
1126         errln("\"cat\" elements don't match: elt14 = 0x%04x, "
1127                 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
1128                 "elt18 = 0x%04x, elt19 = 0x%04x",
1129                 elt14, elt15, elt16, elt17, elt18, elt19);
1130     }
1131 
1132     // now generate a complete list of the collation elements,
1133     // first using next() and then using setOffset(), and
1134     // make sure both interfaces return the same set of elements
1135     iter->reset();
1136 
1137     int32_t elt = iter->next(errorCode);
1138     int32_t count = 0;
1139     while (elt != CollationElementIterator::NULLORDER) {
1140         ++count;
1141         elt = iter->next(errorCode);
1142     }
1143 
1144     LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
1145     LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
1146     int32_t lastPos = 0;
1147 
1148     iter->reset();
1149     elt = iter->next(errorCode);
1150     count = 0;
1151     while (elt != CollationElementIterator::NULLORDER) {
1152         nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1153         lastPos = iter->getOffset();
1154         elt = iter->next(errorCode);
1155     }
1156     int32_t nextElementsLength = count;
1157     count = 0;
1158     for (int32_t i = 0; i < testText.length(); ) {
1159         iter->setOffset(i, errorCode);
1160         lastPos = iter->getOffset();
1161         elt = iter->next(errorCode);
1162         setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1163         i = iter->getOffset();
1164     }
1165     for (int32_t i = 0; i < nextElementsLength; i++) {
1166         if (nextElements[i] == setOffsetElements[i]) {
1167             logln(nextElements[i]);
1168         } else {
1169             errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
1170                 ", but setOffset() yielded " + setOffsetElements[i]);
1171         }
1172     }
1173     delete iter;
1174 }
1175 
1176 // Ticket 7189
1177 //
1178 // nextSortKeyPart incorrect for EO_S1 collation
calcKeyIncremental(UCollator * coll,const UChar * text,int32_t len,uint8_t * keyBuf,int32_t,UErrorCode & status)1179 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
1180     UCharIterator uiter;
1181     uint32_t state[2] = { 0, 0 };
1182     int32_t keyLen;
1183     int32_t count = 8;
1184 
1185     uiter_setString(&uiter, text, len);
1186     keyLen = 0;
1187     while (TRUE) {
1188         int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
1189         if (U_FAILURE(status)) {
1190             return -1;
1191         }
1192         if (keyPartLen == 0) {
1193             break;
1194         }
1195         keyLen += keyPartLen;
1196     }
1197     return keyLen;
1198 }
1199 
TestT7189()1200 void CollationRegressionTest::TestT7189() {
1201     UErrorCode status = U_ZERO_ERROR;
1202     UCollator *coll;
1203     uint32_t i;
1204 
1205     static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1206     // "Achter De Hoven"
1207         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1208         // "ABC"
1209         { 0x41, 0x42, 0x43, 0x00 },
1210         // "HELLO world!"
1211         { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1212     };
1213 
1214     static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1215     // "Achter de Hoven"
1216         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1217         // "abc"
1218         { 0x61, 0x62, 0x63, 0x00 },
1219         // "hello world!"
1220         { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1221     };
1222 
1223     // Open the collator
1224     coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
1225     if (U_FAILURE(status)) {
1226         errln("Failed to create a collator for short string EO_S1");
1227         return;
1228     }
1229 
1230     for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
1231         uint8_t key1[100], key2[100];
1232         int32_t len1, len2;
1233 
1234         len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
1235         if (U_FAILURE(status)) {
1236             errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
1237             break;
1238         }
1239         len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
1240         if (U_FAILURE(status)) {
1241             errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
1242             break;
1243         }
1244 
1245         if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
1246             errln(UnicodeString("Failed: Identical key\n") + "    text1: " + text1[i] + "\n" + "    text2: " + text2[i] + "\n" + "    key  : " + TestUtility::hex(key1, len1));
1247         } else {
1248             logln(UnicodeString("Keys produced -\n") + "    text1: " + text1[i] + "\n" + "    key1 : " + TestUtility::hex(key1, len1) + "\n" + "    text2: " + text2[i] + "\n" + "    key2 : "
1249                     + TestUtility::hex(key2, len2));
1250         }
1251     }
1252     ucol_close(coll);
1253 }
1254 
TestCaseFirstCompression()1255 void CollationRegressionTest::TestCaseFirstCompression() {
1256     RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone();
1257     UErrorCode status = U_ZERO_ERROR;
1258 
1259     // default
1260     caseFirstCompressionSub(col, "default");
1261 
1262     // Upper first
1263     col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
1264     if (U_FAILURE(status)) {
1265         errln("Failed to set UCOL_UPPER_FIRST");
1266         return;
1267     }
1268     caseFirstCompressionSub(col, "upper first");
1269 
1270     // Lower first
1271     col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
1272     if (U_FAILURE(status)) {
1273         errln("Failed to set UCOL_LOWER_FIRST");
1274         return;
1275     }
1276     caseFirstCompressionSub(col, "lower first");
1277 
1278     delete col;
1279 }
1280 
caseFirstCompressionSub(Collator * col,UnicodeString opt)1281 void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
1282     const int32_t maxLength = 50;
1283 
1284     UChar str1[maxLength];
1285     UChar str2[maxLength];
1286 
1287     CollationKey key1, key2;
1288 
1289     for (int32_t len = 1; len <= maxLength; len++) {
1290         int32_t i = 0;
1291         for (; i < len - 1; i++) {
1292             str1[i] = str2[i] = (UChar)0x61; // 'a'
1293         }
1294         str1[i] = (UChar)0x41; // 'A'
1295         str2[i] = (UChar)0x61; // 'a'
1296 
1297         UErrorCode status = U_ZERO_ERROR;
1298         col->getCollationKey(str1, len, key1, status);
1299         col->getCollationKey(str2, len, key2, status);
1300 
1301         UCollationResult cmpKey = key1.compareTo(key2, status);
1302         UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
1303 
1304         if (U_FAILURE(status)) {
1305             errln("Error in caseFirstCompressionSub");
1306         } else if (cmpKey != cmpCol) {
1307             errln((UnicodeString)"Inconsistent comparison(" + opt
1308                 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
1309                 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
1310         }
1311     }
1312 }
1313 
TestTrailingComment()1314 void CollationRegressionTest::TestTrailingComment() {
1315     // ICU ticket #8070:
1316     // Check that the rule parser handles a comment without terminating end-of-line.
1317     IcuTestErrorCode errorCode(*this, "TestTrailingComment");
1318     RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode);
1319     UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63);
1320     assertTrue("c<b", coll.compare(c, b) < 0);
1321     assertTrue("b<a", coll.compare(b, a) < 0);
1322 }
1323 
TestBeforeWithTooStrongAfter()1324 void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
1325     // ICU ticket #9959:
1326     // Forbid rules with a before-reset followed by a stronger relation.
1327     IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter");
1328     RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode);
1329     if(errorCode.isSuccess()) {
1330         errln("should forbid before-2-reset followed by primary relation");
1331     } else {
1332         errorCode.reset();
1333     }
1334     RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode);
1335     if(errorCode.isSuccess()) {
1336         errln("should forbid before-3-reset followed by primary or secondary relation");
1337     } else {
1338         errorCode.reset();
1339     }
1340 }
1341 
compareArray(Collator & c,const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],int32_t testCount)1342 void CollationRegressionTest::compareArray(Collator &c,
1343                                            const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1344                                            int32_t testCount)
1345 {
1346     int32_t i;
1347     Collator::EComparisonResult expectedResult = Collator::EQUAL;
1348 
1349     for (i = 0; i < testCount; i += 3)
1350     {
1351         UnicodeString source(tests[i]);
1352         UnicodeString comparison(tests[i + 1]);
1353         UnicodeString target(tests[i + 2]);
1354 
1355         if (comparison == "<")
1356         {
1357             expectedResult = Collator::LESS;
1358         }
1359         else if (comparison == ">")
1360         {
1361             expectedResult = Collator::GREATER;
1362         }
1363         else if (comparison == "=")
1364         {
1365             expectedResult = Collator::EQUAL;
1366         }
1367         else
1368         {
1369             UnicodeString bogus1("Bogus comparison string \"");
1370             UnicodeString bogus2("\"");
1371             errln(bogus1 + comparison + bogus2);
1372         }
1373 
1374         Collator::EComparisonResult compareResult = c.compare(source, target);
1375 
1376         CollationKey sourceKey, targetKey;
1377         UErrorCode status = U_ZERO_ERROR;
1378 
1379         c.getCollationKey(source, sourceKey, status);
1380 
1381         if (U_FAILURE(status))
1382         {
1383             errln("Couldn't get collationKey for source");
1384             continue;
1385         }
1386 
1387         c.getCollationKey(target, targetKey, status);
1388 
1389         if (U_FAILURE(status))
1390         {
1391             errln("Couldn't get collationKey for target");
1392             continue;
1393         }
1394 
1395         Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1396 
1397         reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1398 
1399     }
1400 }
1401 
assertEqual(CollationElementIterator & i1,CollationElementIterator & i2)1402 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1403 {
1404     int32_t c1, c2, count = 0;
1405     UErrorCode status = U_ZERO_ERROR;
1406 
1407     do
1408     {
1409         c1 = i1.next(status);
1410         c2 = i2.next(status);
1411 
1412         if (c1 != c2)
1413         {
1414             UnicodeString msg, msg1("    ");
1415 
1416             msg += msg1 + count;
1417             msg += ": strength(0x";
1418             appendHex(c1, 8, msg);
1419             msg += ") != strength(0x";
1420             appendHex(c2, 8, msg);
1421             msg += ")";
1422 
1423             errln(msg);
1424             break;
1425         }
1426 
1427         count += 1;
1428     }
1429     while (c1 != CollationElementIterator::NULLORDER);
1430 }
1431 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1432 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1433 {
1434     if (exec)
1435     {
1436         logln("Collation Regression Tests: ");
1437     }
1438 
1439     if(en_us == NULL) {
1440         dataerrln("Class collator not instantiated");
1441         name = "";
1442         return;
1443     }
1444     TESTCASE_AUTO_BEGIN;
1445     TESTCASE_AUTO(Test4048446);
1446     TESTCASE_AUTO(Test4051866);
1447     TESTCASE_AUTO(Test4053636);
1448     TESTCASE_AUTO(Test4054238);
1449     TESTCASE_AUTO(Test4054734);
1450     TESTCASE_AUTO(Test4054736);
1451     TESTCASE_AUTO(Test4058613);
1452     TESTCASE_AUTO(Test4059820);
1453     TESTCASE_AUTO(Test4060154);
1454     TESTCASE_AUTO(Test4062418);
1455     TESTCASE_AUTO(Test4065540);
1456     TESTCASE_AUTO(Test4066189);
1457     TESTCASE_AUTO(Test4066696);
1458     TESTCASE_AUTO(Test4076676);
1459     TESTCASE_AUTO(Test4078588);
1460     TESTCASE_AUTO(Test4079231);
1461     TESTCASE_AUTO(Test4081866);
1462     TESTCASE_AUTO(Test4087241);
1463     TESTCASE_AUTO(Test4087243);
1464     TESTCASE_AUTO(Test4092260);
1465     TESTCASE_AUTO(Test4095316);
1466     TESTCASE_AUTO(Test4101940);
1467     TESTCASE_AUTO(Test4103436);
1468     TESTCASE_AUTO(Test4114076);
1469     TESTCASE_AUTO(Test4114077);
1470     TESTCASE_AUTO(Test4124632);
1471     TESTCASE_AUTO(Test4132736);
1472     TESTCASE_AUTO(Test4133509);
1473     TESTCASE_AUTO(Test4139572);
1474     TESTCASE_AUTO(Test4141640);
1475     TESTCASE_AUTO(Test4146160);
1476     TESTCASE_AUTO(Test4179216);
1477     TESTCASE_AUTO(TestT7189);
1478     TESTCASE_AUTO(TestCaseFirstCompression);
1479     TESTCASE_AUTO(TestTrailingComment);
1480     TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
1481     TESTCASE_AUTO_END;
1482 }
1483 
1484 #endif /* #if !UCONFIG_NO_COLLATION */
1485