1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 //===============================================================================
9 //
10 // File apicoll.cpp
11 //
12 //
13 //
14 // Created by: Helena Shih
15 //
16 // Modification History:
17 //
18 //  Date         Name          Description
19 //  2/5/97      aliu        Added streamIn and streamOut methods.  Added
20 //                          constructor which reads RuleBasedCollator object from
21 //                          a binary file.  Added writeToFile method which streams
22 //                          RuleBasedCollator out to a binary file.  The streamIn
23 //                          and streamOut methods use istream and ostream objects
24 //                          in binary mode.
25 //  6/30/97     helena      Added tests for CollationElementIterator::setText, getOffset
26 //                          setOffset and DecompositionIterator::getOffset, setOffset.
27 //                          DecompositionIterator is made public so add class scope
28 //                          testing.
29 //  02/10/98    damiba      Added test for compare(UnicodeString&, UnicodeString&, int32_t)
30 //===============================================================================
31 
32 #include "unicode/utypes.h"
33 
34 #if !UCONFIG_NO_COLLATION
35 
36 #include "unicode/localpointer.h"
37 #include "unicode/coll.h"
38 #include "unicode/tblcoll.h"
39 #include "unicode/coleitr.h"
40 #include "unicode/sortkey.h"
41 #include "apicoll.h"
42 #include "unicode/chariter.h"
43 #include "unicode/schriter.h"
44 #include "unicode/strenum.h"
45 #include "unicode/ustring.h"
46 #include "unicode/ucol.h"
47 
48 #include "sfwdchit.h"
49 #include "cmemory.h"
50 #include <stdlib.h>
51 
52 void
doAssert(UBool condition,const char * message)53 CollationAPITest::doAssert(UBool condition, const char *message)
54 {
55     if (!condition) {
56         errln(UnicodeString("ERROR : ") + message);
57     }
58 }
59 
60 // Collator Class Properties
61 // ctor, dtor, createInstance, compare, getStrength/setStrength
62 // getDecomposition/setDecomposition, getDisplayName
63 void
TestProperty()64 CollationAPITest::TestProperty(/* char* par */)
65 {
66     UErrorCode success = U_ZERO_ERROR;
67     Collator *col = 0;
68     /*
69      * Expected version of the English collator.
70      * Currently, the major/minor version numbers change when the builder code
71      * changes,
72      * number 2 is from the tailoring data version and
73      * number 3 is the UCA version.
74      * This changes with every UCA version change, and the expected value
75      * needs to be adjusted.
76      * Same in cintltst/capitst.c.
77      */
78     UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A};  // from ICU 4.4/UCA 5.2
79     UVersionInfo versionArray;
80 
81     logln("The property tests begin : ");
82     logln("Test ctors : ");
83     col = Collator::createInstance(Locale::getEnglish(), success);
84     if (U_FAILURE(success)){
85         errcheckln(success, "English Collator creation failed. - %s", u_errorName(success));
86         return;
87     }
88 
89     col->getVersion(versionArray);
90     // Check for a version greater than some value rather than equality
91     // so that we need not update the expected version each time.
92     if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
93       errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
94             versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
95     } else {
96       logln("Collator::getVersion() result: %02x.%02x.%02x.%02x",
97             versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
98     }
99 
100     doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed");
101     doAssert((col->compare("ab", "AB") == Collator::LESS), "ab < AB comparison failed");
102     doAssert((col->compare("blackbird", "black-bird") == Collator::GREATER), "black-bird > blackbird comparison failed");
103     doAssert((col->compare("black bird", "black-bird") == Collator::LESS), "black bird > black-bird comparison failed");
104     doAssert((col->compare("Hello", "hello") == Collator::GREATER), "Hello > hello comparison failed");
105     doAssert((col->compare("","",success) == UCOL_EQUAL), "Comparison between empty strings failed");
106 
107     doAssert((col->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UTF-8 comparison failed");
108     success = U_ZERO_ERROR;
109     {
110         UnicodeString abau=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape();
111         UnicodeString abss=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape();
112         UCharIterator abauIter, abssIter;
113         uiter_setReplaceable(&abauIter, &abau);
114         uiter_setReplaceable(&abssIter, &abss);
115         doAssert((col->compare(abauIter, abssIter, success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UCharIterator comparison failed");
116         success = U_ZERO_ERROR;
117     }
118 
119     /*start of update [Bertrand A. D. 02/10/98]*/
120     doAssert((col->compare("ab", "abc", 2) == Collator::EQUAL), "ab = abc with length 2 comparison failed");
121     doAssert((col->compare("ab", "AB", 2) == Collator::LESS), "ab < AB  with length 2 comparison failed");
122     doAssert((col->compare("ab", "Aa", 1) == Collator::LESS), "ab < Aa  with length 1 comparison failed");
123     doAssert((col->compare("ab", "Aa", 2) == Collator::GREATER), "ab > Aa  with length 2 comparison failed");
124     doAssert((col->compare("black-bird", "blackbird", 5) == Collator::EQUAL), "black-bird = blackbird with length of 5 comparison failed");
125     doAssert((col->compare("black bird", "black-bird", 10) == Collator::LESS), "black bird < black-bird with length 10 comparison failed");
126     doAssert((col->compare("Hello", "hello", 5) == Collator::GREATER), "Hello > hello with length 5 comparison failed");
127     /*end of update [Bertrand A. D. 02/10/98]*/
128 
129 
130     logln("Test ctors ends.");
131     logln("testing Collator::getStrength() method ...");
132     doAssert((col->getStrength() == Collator::TERTIARY), "collation object has the wrong strength");
133     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
134 
135 
136     logln("testing Collator::setStrength() method ...");
137     col->setStrength(Collator::SECONDARY);
138     doAssert((col->getStrength() != Collator::TERTIARY), "collation object's strength is secondary difference");
139     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
140     doAssert((col->getStrength() == Collator::SECONDARY), "collation object has the wrong strength");
141 
142     UnicodeString name;
143 
144     logln("Get display name for the US English collation in German : ");
145     logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name));
146     doAssert((name == UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed");
147 
148     logln("Get display name for the US English collation in English : ");
149     logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name));
150     doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed");
151 #if 0
152     // weiv : this test is bogus if we're running on any machine that has different default locale than English.
153     // Therefore, it is banned!
154     logln("Get display name for the US English in default locale language : ");
155     logln(Collator::getDisplayName(Locale::US, name));
156     doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine");
157 #endif
158     delete col; col = 0;
159     RuleBasedCollator *rcol = (RuleBasedCollator *)Collator::createInstance("da_DK",
160                                                                             success);
161     if (U_FAILURE(success)) {
162         errcheckln(success, "Collator::createInstance(\"da_DK\") failed - %s", u_errorName(success));
163         return;
164     }
165     const UnicodeString &daRules = rcol->getRules();
166     if(daRules.isEmpty()) {
167         dataerrln("missing da_DK tailoring rule string");
168     } else {
169         doAssert(daRules.indexOf("aa") >= 0, "da_DK rules do not contain 'aa'");
170     }
171     delete rcol;
172 
173     col = Collator::createInstance(Locale::getFrench(), success);
174     if (U_FAILURE(success))
175     {
176         errln("Creating French collation failed.");
177         return;
178     }
179 
180     col->setStrength(Collator::PRIMARY);
181     logln("testing Collator::getStrength() method again ...");
182     doAssert((col->getStrength() != Collator::TERTIARY), "collation object has the wrong strength");
183     doAssert((col->getStrength() == Collator::PRIMARY), "collation object's strength is not primary difference");
184 
185     logln("testing French Collator::setStrength() method ...");
186     col->setStrength(Collator::TERTIARY);
187     doAssert((col->getStrength() == Collator::TERTIARY), "collation object's strength is not tertiary difference");
188     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
189     doAssert((col->getStrength() != Collator::SECONDARY), "collation object's strength is secondary difference");
190     delete col;
191 
192     logln("Create junk collation: ");
193     Locale abcd("ab", "CD", "");
194     success = U_ZERO_ERROR;
195     Collator *junk = 0;
196     junk = Collator::createInstance(abcd, success);
197 
198     if (U_FAILURE(success))
199     {
200         errln("Junk collation creation failed, should at least return default.");
201         return;
202     }
203 
204     doAssert(((RuleBasedCollator *)junk)->getRules().isEmpty(),
205                "The root collation should be returned for an unsupported language.");
206     Collator *frCol = Collator::createInstance(Locale::getCanadaFrench(), success);
207     if (U_FAILURE(success))
208     {
209         errln("Creating fr_CA collator failed.");
210         delete junk;
211         return;
212     }
213 
214     // If the default locale isn't French, the French and non-French collators
215     // should be different
216     if (frCol->getLocale(ULOC_ACTUAL_LOCALE, success) != Locale::getCanadaFrench()) {
217         doAssert((*frCol != *junk), "The junk is the same as the fr_CA collator.");
218     }
219     Collator *aFrCol = frCol->clone();
220     doAssert((*frCol == *aFrCol), "The cloning of a fr_CA collator failed.");
221     logln("Collator property test ended.");
222 
223     delete frCol;
224     delete aFrCol;
225     delete junk;
226 }
227 
TestKeywordValues()228 void CollationAPITest::TestKeywordValues() {
229     IcuTestErrorCode errorCode(*this, "TestKeywordValues");
230     LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode));
231     if (errorCode.errIfFailureAndReset("English Collator creation failed")) {
232         return;
233     }
234 
235     LocalPointer<StringEnumeration> kwEnum(
236         col->getKeywordValuesForLocale("collation", Locale::getEnglish(), TRUE, errorCode));
237     if (errorCode.errIfFailureAndReset("Get Keyword Values for English Collator failed")) {
238         return;
239     }
240     assertTrue("expect at least one collation tailoring for English", kwEnum->count(errorCode) > 0);
241     const char *kw;
242     UBool hasStandard = FALSE;
243     while ((kw = kwEnum->next(NULL, errorCode)) != NULL) {
244         if (strcmp(kw, "standard") == 0) {
245             hasStandard = TRUE;
246         }
247     }
248     assertTrue("expect at least the 'standard' collation tailoring for English", hasStandard);
249 }
250 
251 void
TestRuleBasedColl()252 CollationAPITest::TestRuleBasedColl()
253 {
254     RuleBasedCollator *col1, *col2, *col3, *col4;
255     UErrorCode status = U_ZERO_ERROR;
256 
257     UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
258     UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
259 
260     col1 = new RuleBasedCollator(ruleset1, status);
261     if (U_FAILURE(status)) {
262         errcheckln(status, "RuleBased Collator creation failed. - %s", u_errorName(status));
263         return;
264     }
265     else {
266         logln("PASS: RuleBased Collator creation passed\n");
267     }
268 
269     status = U_ZERO_ERROR;
270     col2 = new RuleBasedCollator(ruleset2, status);
271     if (U_FAILURE(status)) {
272         errln("RuleBased Collator creation failed.\n");
273         return;
274     }
275     else {
276         logln("PASS: RuleBased Collator creation passed\n");
277     }
278 
279     status = U_ZERO_ERROR;
280     Locale locale("aa", "AA");
281     col3 = (RuleBasedCollator *)Collator::createInstance(locale, status);
282     if (U_FAILURE(status)) {
283         errln("Fallback Collator creation failed.: %s\n");
284         return;
285     }
286     else {
287         logln("PASS: Fallback Collator creation passed\n");
288     }
289     delete col3;
290 
291     status = U_ZERO_ERROR;
292     col3 = (RuleBasedCollator *)Collator::createInstance(status);
293     if (U_FAILURE(status)) {
294         errln("Default Collator creation failed.: %s\n");
295         return;
296     }
297     else {
298         logln("PASS: Default Collator creation passed\n");
299     }
300 
301     UnicodeString rule1 = col1->getRules();
302     UnicodeString rule2 = col2->getRules();
303     UnicodeString rule3 = col3->getRules();
304 
305     doAssert(rule1 != rule2, "Default collator getRules failed");
306     doAssert(rule2 != rule3, "Default collator getRules failed");
307     doAssert(rule1 != rule3, "Default collator getRules failed");
308 
309     col4 = new RuleBasedCollator(rule2, status);
310     if (U_FAILURE(status)) {
311         errln("RuleBased Collator creation failed.\n");
312         return;
313     }
314 
315     UnicodeString rule4 = col4->getRules();
316     doAssert(rule2 == rule4, "Default collator getRules failed");
317     int32_t length4 = 0;
318     uint8_t *clonedrule4 = col4->cloneRuleData(length4, status);
319     if (U_FAILURE(status)) {
320         errln("Cloned rule data failed.\n");
321         return;
322     }
323 
324  //   free(clonedrule4);     BAD API!!!!
325     uprv_free(clonedrule4);
326 
327 
328     delete col1;
329     delete col2;
330     delete col3;
331     delete col4;
332 }
333 
334 void
TestRules()335 CollationAPITest::TestRules()
336 {
337     RuleBasedCollator *coll;
338     UErrorCode status = U_ZERO_ERROR;
339     UnicodeString rules;
340 
341     coll = (RuleBasedCollator *)Collator::createInstance(Locale::getEnglish(), status);
342     if (U_FAILURE(status)) {
343         errcheckln(status, "English Collator creation failed. - %s", u_errorName(status));
344         return;
345     }
346     else {
347         logln("PASS: RuleBased Collator creation passed\n");
348     }
349 
350     coll->getRules(UCOL_TAILORING_ONLY, rules);
351     if (rules.length() != 0x00) {
352       errln("English tailored rules failed - length is 0x%x expected 0x%x", rules.length(), 0x00);
353     }
354 
355     coll->getRules(UCOL_FULL_RULES, rules);
356     if (rules.length() < 0) {
357         errln("English full rules failed");
358     }
359     delete coll;
360 }
361 
362 void
TestDecomposition()363 CollationAPITest::TestDecomposition() {
364   UErrorCode status = U_ZERO_ERROR;
365   Collator *en_US = Collator::createInstance("en_US", status),
366     *el_GR = Collator::createInstance("el_GR", status),
367     *vi_VN = Collator::createInstance("vi_VN", status);
368 
369   if (U_FAILURE(status)) {
370     errcheckln(status, "ERROR: collation creation failed. - %s", u_errorName(status));
371     return;
372   }
373 
374   /* there is no reason to have canonical decomposition in en_US OR default locale */
375   if (vi_VN->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
376   {
377     errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
378   }
379 
380   if (el_GR->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
381   {
382     errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
383   }
384 
385   if (en_US->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
386   {
387     errln("ERROR: en_US collation had canonical decomposition for normalization!\n");
388   }
389 
390   delete en_US;
391   delete el_GR;
392   delete vi_VN;
393 }
394 
395 void
TestSafeClone()396 CollationAPITest::TestSafeClone() {
397     static const int CLONETEST_COLLATOR_COUNT = 3;
398     Collator *someCollators [CLONETEST_COLLATOR_COUNT];
399     Collator *col;
400     UErrorCode err = U_ZERO_ERROR;
401     int index;
402 
403     UnicodeString test1("abCda");
404     UnicodeString test2("abcda");
405 
406     /* one default collator & two complex ones */
407     someCollators[0] = Collator::createInstance("en_US", err);
408     someCollators[1] = Collator::createInstance("ko", err);
409     someCollators[2] = Collator::createInstance("ja_JP", err);
410     if(U_FAILURE(err)) {
411       errcheckln(err, "Couldn't instantiate collators. Error: %s", u_errorName(err));
412       delete someCollators[0];
413       delete someCollators[1];
414       delete someCollators[2];
415       return;
416     }
417 
418     /* change orig & clone & make sure they are independent */
419 
420     for (index = 0; index < CLONETEST_COLLATOR_COUNT; index++)
421     {
422         col = someCollators[index]->safeClone();
423         if (col == 0) {
424             errln("SafeClone of collator should not return null\n");
425             break;
426         }
427         col->setStrength(Collator::TERTIARY);
428         someCollators[index]->setStrength(Collator::PRIMARY);
429         col->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
430         someCollators[index]->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
431 
432         doAssert(col->greater(test1, test2), "Result should be \"abCda\" >>> \"abcda\" ");
433         doAssert(someCollators[index]->equals(test1, test2), "Result should be \"abcda\" == \"abCda\"");
434         delete col;
435         delete someCollators[index];
436     }
437 }
438 
439 void
TestHashCode()440 CollationAPITest::TestHashCode(/* char* par */)
441 {
442     logln("hashCode tests begin.");
443     UErrorCode success = U_ZERO_ERROR;
444     Collator *col1 = 0;
445     col1 = Collator::createInstance(Locale::getEnglish(), success);
446     if (U_FAILURE(success))
447     {
448         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
449         return;
450     }
451 
452     Collator *col2 = 0;
453     Locale dk("da", "DK", "");
454     col2 = Collator::createInstance(dk, success);
455     if (U_FAILURE(success))
456     {
457         errln("Danish collation creation failed.");
458         return;
459     }
460 
461     Collator *col3 = 0;
462     col3 = Collator::createInstance(Locale::getEnglish(), success);
463     if (U_FAILURE(success))
464     {
465         errln("2nd default collation creation failed.");
466         return;
467     }
468 
469     logln("Collator::hashCode() testing ...");
470 
471     doAssert(col1->hashCode() != col2->hashCode(), "Hash test1 result incorrect" );
472     doAssert(!(col1->hashCode() == col2->hashCode()), "Hash test2 result incorrect" );
473     doAssert(col1->hashCode() == col3->hashCode(), "Hash result not equal" );
474 
475     logln("hashCode tests end.");
476     delete col1;
477     delete col2;
478 
479     UnicodeString test1("Abcda");
480     UnicodeString test2("abcda");
481 
482     CollationKey sortk1, sortk2, sortk3;
483     UErrorCode status = U_ZERO_ERROR;
484 
485     col3->getCollationKey(test1, sortk1, status);
486     col3->getCollationKey(test2, sortk2, status);
487     col3->getCollationKey(test2, sortk3, status);
488 
489     doAssert(sortk1.hashCode() != sortk2.hashCode(), "Hash test1 result incorrect");
490     doAssert(sortk2.hashCode() == sortk3.hashCode(), "Hash result not equal" );
491 
492     delete col3;
493 }
494 
495 //----------------------------------------------------------------------------
496 // CollationKey -- Tests the CollationKey methods
497 //
498 void
TestCollationKey()499 CollationAPITest::TestCollationKey(/* char* par */)
500 {
501     logln("testing CollationKey begins...");
502     Collator *col = 0;
503     UErrorCode success=U_ZERO_ERROR;
504     col = Collator::createInstance(Locale::getEnglish(), success);
505     if (U_FAILURE(success))
506     {
507         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
508         return;
509     }
510     col->setStrength(Collator::TERTIARY);
511 
512     CollationKey sortk1, sortk2;
513     UnicodeString test1("Abcda"), test2("abcda");
514     UErrorCode key1Status = U_ZERO_ERROR, key2Status = U_ZERO_ERROR;
515 
516     logln("Testing weird arguments");
517     // No string vs. empty string vs. completely-ignorable string:
518     // See ICU ticket #10495.
519     CollationKey sortkNone;
520     int32_t length;
521     sortkNone.getByteArray(length);
522     doAssert(!sortkNone.isBogus() && length == 0,
523              "Default-constructed collation key should be empty");
524     CollationKey sortkEmpty;
525     col->getCollationKey(NULL, 0, sortkEmpty, key1Status);
526     // key gets reset here
527     const uint8_t* byteArrayEmpty = sortkEmpty.getByteArray(length);
528     doAssert(sortkEmpty.isBogus() == FALSE && length == 3 &&
529              byteArrayEmpty[0] == 1 && byteArrayEmpty[1] == 1 && byteArrayEmpty[2] == 0,
530              "Empty string should return a collation key with empty levels");
531     doAssert(sortkNone.compareTo(sortkEmpty) == Collator::LESS,
532              "Expected no collation key < collation key for empty string");
533     doAssert(sortkEmpty.compareTo(sortkNone) == Collator::GREATER,
534              "Expected collation key for empty string > no collation key");
535 
536     CollationKey sortkIgnorable;
537     // Most control codes and CGJ are completely ignorable.
538     // A string with only completely ignorables must compare equal to an empty string.
539     col->getCollationKey(UnicodeString((UChar)1).append((UChar)0x34f), sortkIgnorable, key1Status);
540     sortkIgnorable.getByteArray(length);
541     doAssert(!sortkIgnorable.isBogus() && length == 3,
542              "Completely ignorable string should return a collation key with empty levels");
543     doAssert(sortkIgnorable.compareTo(sortkEmpty) == Collator::EQUAL,
544              "Completely ignorable string should compare equal to empty string");
545 
546     // bogus key returned here
547     key1Status = U_ILLEGAL_ARGUMENT_ERROR;
548     col->getCollationKey(NULL, 0, sortk1, key1Status);
549     doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0,
550         "Error code should return bogus collation key");
551 
552     key1Status = U_ZERO_ERROR;
553     logln("Use tertiary comparison level testing ....");
554 
555     col->getCollationKey(test1, sortk1, key1Status);
556     if (U_FAILURE(key1Status)) {
557         errln("getCollationKey(Abcda) failed - %s", u_errorName(key1Status));
558         return;
559     }
560     doAssert((sortk1.compareTo(col->getCollationKey(test2, sortk2, key2Status)))
561                  == Collator::GREATER,
562                 "Result should be \"Abcda\" >>> \"abcda\"");
563 
564     CollationKey sortk3(sortk2), sortkNew;
565 
566     sortkNew = sortk1;
567     doAssert((sortk1 != sortk2), "The sort keys should be different");
568     doAssert((sortk1.hashCode() != sortk2.hashCode()), "sort key hashCode() failed");
569     doAssert((sortk2 == sortk3), "The sort keys should be the same");
570     doAssert((sortk1 == sortkNew), "The sort keys assignment failed");
571     doAssert((sortk1.hashCode() == sortkNew.hashCode()), "sort key hashCode() failed");
572     doAssert((sortkNew != sortk3), "The sort keys should be different");
573     doAssert(sortk1.compareTo(sortk3) == Collator::GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
574     doAssert(sortk2.compareTo(sortk3) == Collator::EQUAL, "Result should be \"abcda\" == \"abcda\"");
575     doAssert(sortkEmpty.compareTo(sortk1) == Collator::LESS, "Result should be (empty key) <<< \"Abcda\"");
576     doAssert(sortk1.compareTo(sortkEmpty) == Collator::GREATER, "Result should be \"Abcda\" >>> (empty key)");
577     doAssert(sortkEmpty.compareTo(sortkEmpty) == Collator::EQUAL, "Result should be (empty key) == (empty key)");
578     doAssert(sortk1.compareTo(sortk3, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
579     doAssert(sortk2.compareTo(sortk3, success) == UCOL_EQUAL, "Result should be \"abcda\" == \"abcda\"");
580     doAssert(sortkEmpty.compareTo(sortk1, success) == UCOL_LESS, "Result should be (empty key) <<< \"Abcda\"");
581     doAssert(sortk1.compareTo(sortkEmpty, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> (empty key)");
582     doAssert(sortkEmpty.compareTo(sortkEmpty, success) == UCOL_EQUAL, "Result should be (empty key) == (empty key)");
583 
584     int32_t    cnt1, cnt2, cnt3, cnt4;
585 
586     const uint8_t* byteArray1 = sortk1.getByteArray(cnt1);
587     const uint8_t* byteArray2 = sortk2.getByteArray(cnt2);
588 
589     const uint8_t* byteArray3 = 0;
590     byteArray3 = sortk1.getByteArray(cnt3);
591 
592     const uint8_t* byteArray4 = 0;
593     byteArray4 = sortk2.getByteArray(cnt4);
594 
595     CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2);
596     CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4);
597 
598     doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed.");
599     doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed.");
600     doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed");
601     doAssert(sortk1.compareTo(sortk6) == Collator::EQUAL, "CollationKey::getByteArray(sortk1) Failed.");
602     doAssert(sortk2.compareTo(sortk7) == Collator::EQUAL, "CollationKey::getByteArray(sortk2) Failed.");
603     doAssert(sortk6.compareTo(sortk7) == Collator::GREATER, "sortk6 >>> sortk7 Failed");
604 
605     logln("Equality tests : ");
606     doAssert(sortk1 == sortk4, "sortk1 == sortk4 Failed.");
607     doAssert(sortk2 == sortk5, "sortk2 == sortk5 Failed.");
608     doAssert(sortk1 != sortk5, "sortk1 != sortk5 Failed.");
609     doAssert(sortk1 == sortk6, "sortk1 == sortk6 Failed.");
610     doAssert(sortk2 == sortk7, "sortk2 == sortk7 Failed.");
611     doAssert(sortk1 != sortk7, "sortk1 != sortk7 Failed.");
612 
613     byteArray1 = 0;
614     byteArray2 = 0;
615 
616     sortk3 = sortk1;
617     doAssert(sortk1 == sortk3, "sortk1 = sortk3 assignment Failed.");
618     doAssert(sortk2 != sortk3, "sortk2 != sortk3 Failed.");
619     logln("testing sortkey ends...");
620 
621     col->setStrength(Collator::SECONDARY);
622     doAssert(col->getCollationKey(test1, sortk1, key1Status).compareTo(
623                                   col->getCollationKey(test2, sortk2, key2Status))
624                                   == Collator::EQUAL,
625                                   "Result should be \"Abcda\" == \"abcda\"");
626     delete col;
627 }
628 
629 //----------------------------------------------------------------------------
630 // Tests the CollatorElementIterator class.
631 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!=
632 //
633 void
TestElemIter()634 CollationAPITest::TestElemIter(/* char* par */)
635 {
636     logln("testing sortkey begins...");
637     Collator *col = 0;
638     UErrorCode success = U_ZERO_ERROR;
639     col = Collator::createInstance(Locale::getEnglish(), success);
640     if (U_FAILURE(success))
641     {
642         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
643         return;
644     }
645 
646     UnicodeString testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
647     UnicodeString testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
648     logln("Constructors and comparison testing....");
649     CollationElementIterator *iterator1 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
650 
651     CharacterIterator *chariter=new StringCharacterIterator(testString1);
652     CollationElementIterator *coliter=((RuleBasedCollator*)col)->createCollationElementIterator(*chariter);
653 
654     // copy ctor
655     CollationElementIterator *iterator2 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
656     CollationElementIterator *iterator3 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString2);
657 
658     int32_t offset = iterator1->getOffset();
659     if (offset != 0) {
660         errln("Error in getOffset for collation element iterator\n");
661         return;
662     }
663     iterator1->setOffset(6, success);
664     if (U_FAILURE(success)) {
665         errln("Error in setOffset for collation element iterator\n");
666         return;
667     }
668     iterator1->setOffset(0, success);
669     int32_t order1, order2, order3;
670     doAssert((*iterator1 == *iterator2), "The two iterators should be the same");
671     doAssert((*iterator1 != *iterator3), "The two iterators should be different");
672 
673     doAssert((*coliter == *iterator1), "The two iterators should be the same");
674     doAssert((*coliter == *iterator2), "The two iterators should be the same");
675     doAssert((*coliter != *iterator3), "The two iterators should be different");
676 
677     order1 = iterator1->next(success);
678     if (U_FAILURE(success))
679     {
680         errln("Somehow ran out of memory stepping through the iterator.");
681         return;
682     }
683 
684     doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
685     order2 = iterator2->getOffset();
686     doAssert((order1 != order2), "The order result should not be the same");
687     order2 = iterator2->next(success);
688     if (U_FAILURE(success))
689     {
690         errln("Somehow ran out of memory stepping through the iterator.");
691         return;
692     }
693 
694     doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
695     doAssert((order1 == order2), "The order result should be the same");
696     order3 = iterator3->next(success);
697     if (U_FAILURE(success))
698     {
699         errln("Somehow ran out of memory stepping through the iterator.");
700         return;
701     }
702 
703     doAssert((CollationElementIterator::primaryOrder(order1) ==
704         CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
705     doAssert((CollationElementIterator::secondaryOrder(order1) ==
706         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
707     doAssert((CollationElementIterator::tertiaryOrder(order1) ==
708         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
709 
710     order1 = iterator1->next(success); order3 = iterator3->next(success);
711     if (U_FAILURE(success))
712     {
713         errln("Somehow ran out of memory stepping through the iterator.");
714         return;
715     }
716 
717     doAssert((CollationElementIterator::primaryOrder(order1) ==
718         CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
719     doAssert((CollationElementIterator::tertiaryOrder(order1) !=
720         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
721 
722     order1 = iterator1->next(success);
723     order3 = iterator3->next(success);
724     /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
725     /*
726     doAssert((CollationElementIterator::secondaryOrder(order1) !=
727         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
728     */
729     doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
730 
731     iterator1->reset(); iterator2->reset(); iterator3->reset();
732     order1 = iterator1->next(success);
733     if (U_FAILURE(success))
734     {
735         errln("Somehow ran out of memory stepping through the iterator.");
736         return;
737     }
738 
739     doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
740 
741     order2 = iterator2->next(success);
742     if (U_FAILURE(success))
743     {
744         errln("Somehow ran out of memory stepping through the iterator.");
745         return;
746     }
747 
748     doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
749     doAssert((order1 == order2), "The order result should be the same");
750 
751     order3 = iterator3->next(success);
752     if (U_FAILURE(success))
753     {
754         errln("Somehow ran out of memory stepping through the iterator.");
755         return;
756     }
757 
758     doAssert((CollationElementIterator::primaryOrder(order1) ==
759         CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
760     doAssert((CollationElementIterator::secondaryOrder(order1) ==
761         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
762     doAssert((CollationElementIterator::tertiaryOrder(order1) ==
763         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
764 
765     order1 = iterator1->next(success); order2 = iterator2->next(success); order3 = iterator3->next(success);
766     if (U_FAILURE(success))
767     {
768         errln("Somehow ran out of memory stepping through the iterator.");
769         return;
770     }
771 
772     doAssert((CollationElementIterator::primaryOrder(order1) ==
773         CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
774     doAssert((CollationElementIterator::tertiaryOrder(order1) !=
775         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
776 
777     order1 = iterator1->next(success); order3 = iterator3->next(success);
778     if (U_FAILURE(success))
779     {
780         errln("Somehow ran out of memory stepping through the iterator.");
781         return;
782     }
783 
784     /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
785     /*
786     doAssert((CollationElementIterator::secondaryOrder(order1) !=
787         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
788     */
789     doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
790     doAssert((*iterator2 != *iterator3), "The iterators should be different");
791 
792 
793     //test error values
794     success=U_UNSUPPORTED_ERROR;
795     Collator *colerror=NULL;
796     colerror=Collator::createInstance(Locale::getEnglish(), success);
797     if (colerror != 0 || success == U_ZERO_ERROR){
798         errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n");
799     }
800     int32_t position=coliter->previous(success);
801     if(position != CollationElementIterator::NULLORDER){
802         errln((UnicodeString)"Expected NULLORDER got" + position);
803     }
804     coliter->reset();
805     coliter->setText(*chariter, success);
806     if(!U_FAILURE(success)){
807         errln("Expeceted error");
808     }
809     iterator1->setText((UnicodeString)"hello there", success);
810     if(!U_FAILURE(success)){
811         errln("Expeceted error");
812     }
813 
814     delete chariter;
815     delete coliter;
816     delete iterator1;
817     delete iterator2;
818     delete iterator3;
819     delete col;
820 
821 
822 
823     logln("testing CollationElementIterator ends...");
824 }
825 
826 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules
827 void
TestOperators()828 CollationAPITest::TestOperators(/* char* par */)
829 {
830     UErrorCode success = U_ZERO_ERROR;
831     UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
832     UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
833     RuleBasedCollator *col1 = new RuleBasedCollator(ruleset1, success);
834     if (U_FAILURE(success)) {
835         errcheckln(success, "RuleBasedCollator creation failed. - %s", u_errorName(success));
836         return;
837     }
838     success = U_ZERO_ERROR;
839     RuleBasedCollator *col2 = new RuleBasedCollator(ruleset2, success);
840     if (U_FAILURE(success)) {
841         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set.");
842         return;
843     }
844     logln("The operator tests begin : ");
845     logln("testing operator==, operator!=, clone  methods ...");
846     doAssert((*col1 != *col2), "The two different table collations compared equal");
847     *col1 = *col2;
848     doAssert((*col1 == *col2), "Collator objects not equal after assignment (operator=)");
849 
850     success = U_ZERO_ERROR;
851     Collator *col3 = Collator::createInstance(Locale::getEnglish(), success);
852     if (U_FAILURE(success)) {
853         errln("Default collation creation failed.");
854         return;
855     }
856     doAssert((*col1 != *col3), "The two different table collations compared equal");
857     Collator* col4 = col1->clone();
858     Collator* col5 = col3->clone();
859     doAssert((*col1 == *col4), "Cloned collation objects not equal");
860     doAssert((*col3 != *col4), "Two different table collations compared equal");
861     doAssert((*col3 == *col5), "Cloned collation objects not equal");
862     doAssert((*col4 != *col5), "Two cloned collations compared equal");
863 
864     const UnicodeString& defRules = ((RuleBasedCollator*)col3)->getRules();
865     RuleBasedCollator* col6 = new RuleBasedCollator(defRules, success);
866     if (U_FAILURE(success)) {
867         errln("Creating default collation with rules failed.");
868         return;
869     }
870     doAssert((((RuleBasedCollator*)col3)->getRules() == col6->getRules()), "Default collator getRules failed");
871 
872     success = U_ZERO_ERROR;
873     RuleBasedCollator *col7 = new RuleBasedCollator(ruleset2, Collator::TERTIARY, success);
874     if (U_FAILURE(success)) {
875         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength.");
876         return;
877     }
878     success = U_ZERO_ERROR;
879     RuleBasedCollator *col8 = new RuleBasedCollator(ruleset2, UCOL_OFF, success);
880     if (U_FAILURE(success)) {
881         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP.");
882         return;
883     }
884     success = U_ZERO_ERROR;
885     RuleBasedCollator *col9 = new RuleBasedCollator(ruleset2, Collator::PRIMARY, UCOL_ON, success);
886     if (U_FAILURE(success)) {
887         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP.");
888         return;
889     }
890   //  doAssert((*col7 == *col8), "The two equal table collations compared different");
891     doAssert((*col7 != *col9), "The two different table collations compared equal");
892     doAssert((*col8 != *col9), "The two different table collations compared equal");
893 
894     logln("operator tests ended.");
895     delete col1;
896     delete col2;
897     delete col3;
898     delete col4;
899     delete col5;
900     delete col6;
901     delete col7;
902     delete col8;
903     delete col9;
904 }
905 
906 // test clone and copy
907 void
TestDuplicate()908 CollationAPITest::TestDuplicate(/* char* par */)
909 {
910     UErrorCode status = U_ZERO_ERROR;
911     Collator *col1 = Collator::createInstance(Locale::getEnglish(), status);
912     if (U_FAILURE(status)) {
913         logln("Default collator creation failed.");
914         return;
915     }
916     Collator *col2 = col1->clone();
917     doAssert((*col1 == *col2), "Cloned object is not equal to the orginal");
918     UnicodeString ruleset("&9 < a, A < b, B < c, C < d, D, e, E");
919     RuleBasedCollator *col3 = new RuleBasedCollator(ruleset, status);
920     if (U_FAILURE(status)) {
921         logln("Collation tailoring failed.");
922         return;
923     }
924     doAssert((*col1 != *col3), "Cloned object is equal to some dummy");
925     *col3 = *((RuleBasedCollator*)col1);
926     doAssert((*col1 == *col3), "Copied object is not equal to the orginal");
927 
928     UCollationResult res;
929     UnicodeString first((UChar)0x0061);
930     UnicodeString second((UChar)0x0062);
931     UnicodeString copiedEnglishRules(((RuleBasedCollator*)col1)->getRules());
932 
933     delete col1;
934 
935     // Try using the cloned collators after deleting the original data
936     res = col2->compare(first, second, status);
937     if(res != UCOL_LESS) {
938         errln("a should be less then b after tailoring");
939     }
940     if (((RuleBasedCollator*)col2)->getRules() != copiedEnglishRules) {
941         errln(UnicodeString("English rule difference. ")
942             + copiedEnglishRules + UnicodeString("\ngetRules=") + ((RuleBasedCollator*)col2)->getRules());
943     }
944     res = col3->compare(first, second, status);
945     if(res != UCOL_LESS) {
946         errln("a should be less then b after tailoring");
947     }
948     if (col3->getRules() != copiedEnglishRules) {
949         errln(UnicodeString("English rule difference. ")
950             + copiedEnglishRules + UnicodeString("\ngetRules=") + col3->getRules());
951     }
952 
953     delete col2;
954     delete col3;
955 }
956 
957 void
TestCompare()958 CollationAPITest::TestCompare(/* char* par */)
959 {
960     logln("The compare tests begin : ");
961     Collator *col = 0;
962     UErrorCode success = U_ZERO_ERROR;
963     col = Collator::createInstance(Locale::getEnglish(), success);
964     if (U_FAILURE(success)) {
965         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
966         return;
967     }
968     UnicodeString test1("Abcda"), test2("abcda");
969     logln("Use tertiary comparison level testing ....");
970 
971     doAssert((!col->equals(test1, test2) ), "Result should be \"Abcda\" != \"abcda\"");
972     doAssert((col->greater(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
973     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
974 
975     col->setStrength(Collator::SECONDARY);
976     logln("Use secondary comparison level testing ....");
977 
978     doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
979     doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
980     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
981 
982     col->setStrength(Collator::PRIMARY);
983     logln("Use primary comparison level testing ....");
984 
985     doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
986     doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
987     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
988 
989     // Test different APIs
990     const UChar* t1 = test1.getBuffer();
991     int32_t t1Len = test1.length();
992     const UChar* t2 = test2.getBuffer();
993     int32_t t2Len = test2.length();
994 
995     doAssert((col->compare(test1, test2) == Collator::EQUAL), "Problem");
996     doAssert((col->compare(test1, test2, success) == UCOL_EQUAL), "Problem");
997     doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::EQUAL), "Problem");
998     doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_EQUAL), "Problem");
999     doAssert((col->compare(test1, test2, t1Len) == Collator::EQUAL), "Problem");
1000     doAssert((col->compare(test1, test2, t1Len, success) == UCOL_EQUAL), "Problem");
1001 
1002     col->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, success);
1003     doAssert((col->compare(test1, test2) == Collator::GREATER), "Problem");
1004     doAssert((col->compare(test1, test2, success) == UCOL_GREATER), "Problem");
1005     doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::GREATER), "Problem");
1006     doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_GREATER), "Problem");
1007     doAssert((col->compare(test1, test2, t1Len) == Collator::GREATER), "Problem");
1008     doAssert((col->compare(test1, test2, t1Len, success) == UCOL_GREATER), "Problem");
1009 
1010 
1011 
1012     logln("The compare tests end.");
1013     delete col;
1014 }
1015 
1016 void
TestGetAll()1017 CollationAPITest::TestGetAll(/* char* par */)
1018 {
1019     int32_t count1, count2;
1020     UErrorCode status = U_ZERO_ERROR;
1021 
1022     logln("Trying Collator::getAvailableLocales(int&)");
1023 
1024     const Locale* list = Collator::getAvailableLocales(count1);
1025     for (int32_t i = 0; i < count1; ++i) {
1026         UnicodeString dispName;
1027         logln(UnicodeString("Locale name: ")
1028             + UnicodeString(list[i].getName())
1029             + UnicodeString(" , the display name is : ")
1030             + UnicodeString(list[i].getDisplayName(dispName)));
1031     }
1032 
1033     if (count1 == 0 || list == NULL) {
1034         dataerrln("getAvailableLocales(int&) returned an empty list");
1035     }
1036 
1037     logln("Trying Collator::getAvailableLocales()");
1038     StringEnumeration* localeEnum = Collator::getAvailableLocales();
1039     const UnicodeString* locStr;
1040     const char *locCStr;
1041     count2 = 0;
1042 
1043     if (localeEnum == NULL) {
1044         dataerrln("getAvailableLocales() returned NULL");
1045         return;
1046     }
1047 
1048     while ((locStr = localeEnum->snext(status)) != NULL)
1049     {
1050         logln(UnicodeString("Locale name is: ") + *locStr);
1051         count2++;
1052     }
1053     if (count1 != count2) {
1054         errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1, count2);
1055     }
1056 
1057     logln("Trying Collator::getAvailableLocales() clone");
1058     count1 = 0;
1059     StringEnumeration* localeEnum2 = localeEnum->clone();
1060     localeEnum2->reset(status);
1061     while ((locCStr = localeEnum2->next(NULL, status)) != NULL)
1062     {
1063         logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr));
1064         count1++;
1065     }
1066     if (count1 != count2) {
1067         errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1, count2);
1068     }
1069     if (localeEnum->count(status) != count1) {
1070         errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum->count(status), count1);
1071     }
1072     delete localeEnum;
1073     delete localeEnum2;
1074 }
1075 
TestSortKey()1076 void CollationAPITest::TestSortKey()
1077 {
1078     UErrorCode status = U_ZERO_ERROR;
1079     /*
1080     this is supposed to open default date format, but later on it treats
1081     it like it is "en_US"
1082     - very bad if you try to run the tests on machine where default
1083       locale is NOT "en_US"
1084     */
1085     Collator *col = Collator::createInstance(Locale::getEnglish(), status);
1086     if (U_FAILURE(status)) {
1087         errcheckln(status, "ERROR: Default collation creation failed.: %s\n", u_errorName(status));
1088         return;
1089     }
1090 
1091     if (col->getStrength() != Collator::TERTIARY)
1092     {
1093         errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1094     }
1095 
1096     /* Need to use identical strength */
1097     col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status);
1098 
1099     UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0},
1100           test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0},
1101           test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0};
1102 
1103     uint8_t sortkey1[64];
1104     uint8_t sortkey2[64];
1105     uint8_t sortkey3[64];
1106 
1107     logln("Use tertiary comparison level testing ....\n");
1108 
1109     CollationKey key1;
1110     col->getCollationKey(test1, u_strlen(test1), key1, status);
1111 
1112     CollationKey key2;
1113     col->getCollationKey(test2, u_strlen(test2), key2, status);
1114 
1115     CollationKey key3;
1116     col->getCollationKey(test3, u_strlen(test3), key3, status);
1117 
1118     doAssert(key1.compareTo(key2) == Collator::GREATER,
1119         "Result should be \"Abcda\" > \"abcda\"");
1120     doAssert(key2.compareTo(key1) == Collator::LESS,
1121         "Result should be \"abcda\" < \"Abcda\"");
1122     doAssert(key2.compareTo(key3) == Collator::EQUAL,
1123         "Result should be \"abcda\" ==  \"abcda\"");
1124 
1125     // Clone the key2 sortkey for later.
1126     int32_t keylength = 0;
1127     const uint8_t *key2primary_alias = key2.getByteArray(keylength);
1128     LocalArray<uint8_t> key2primary(new uint8_t[keylength]);
1129     memcpy(key2primary.getAlias(), key2primary_alias, keylength);
1130 
1131     col->getSortKey(test1, sortkey1, 64);
1132     col->getSortKey(test2, sortkey2, 64);
1133     col->getSortKey(test3, sortkey3, 64);
1134 
1135     const uint8_t *tempkey = key1.getByteArray(keylength);
1136     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1137         "Test1 string should have the same collation key and sort key");
1138     tempkey = key2.getByteArray(keylength);
1139     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1140         "Test2 string should have the same collation key and sort key");
1141     tempkey = key3.getByteArray(keylength);
1142     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1143         "Test3 string should have the same collation key and sort key");
1144 
1145     col->getSortKey(test1, 5, sortkey1, 64);
1146     col->getSortKey(test2, 5, sortkey2, 64);
1147     col->getSortKey(test3, 5, sortkey3, 64);
1148 
1149     tempkey = key1.getByteArray(keylength);
1150     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1151         "Test1 string should have the same collation key and sort key");
1152     tempkey = key2.getByteArray(keylength);
1153     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1154         "Test2 string should have the same collation key and sort key");
1155     tempkey = key3.getByteArray(keylength);
1156     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1157         "Test3 string should have the same collation key and sort key");
1158 
1159     UnicodeString strtest1(test1);
1160     col->getSortKey(strtest1, sortkey1, 64);
1161     UnicodeString strtest2(test2);
1162     col->getSortKey(strtest2, sortkey2, 64);
1163     UnicodeString strtest3(test3);
1164     col->getSortKey(strtest3, sortkey3, 64);
1165 
1166     tempkey = key1.getByteArray(keylength);
1167     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1168         "Test1 string should have the same collation key and sort key");
1169     tempkey = key2.getByteArray(keylength);
1170     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1171         "Test2 string should have the same collation key and sort key");
1172     tempkey = key3.getByteArray(keylength);
1173     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1174         "Test3 string should have the same collation key and sort key");
1175 
1176     logln("Use secondary comparision level testing ...\n");
1177     col->setStrength(Collator::SECONDARY);
1178 
1179     col->getCollationKey(test1, u_strlen(test1), key1, status);
1180     col->getCollationKey(test2, u_strlen(test2), key2, status);
1181     col->getCollationKey(test3, u_strlen(test3), key3, status);
1182 
1183     doAssert(key1.compareTo(key2) == Collator::EQUAL,
1184         "Result should be \"Abcda\" == \"abcda\"");
1185     doAssert(key2.compareTo(key3) == Collator::EQUAL,
1186         "Result should be \"abcda\" ==  \"abcda\"");
1187 
1188     tempkey = key2.getByteArray(keylength);
1189     doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0,
1190              "Binary format for 'abcda' sortkey different for secondary strength!");
1191 
1192     col->getSortKey(test1, sortkey1, 64);
1193     col->getSortKey(test2, sortkey2, 64);
1194     col->getSortKey(test3, sortkey3, 64);
1195 
1196     tempkey = key1.getByteArray(keylength);
1197     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1198         "Test1 string should have the same collation key and sort key");
1199     tempkey = key2.getByteArray(keylength);
1200     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1201         "Test2 string should have the same collation key and sort key");
1202     tempkey = key3.getByteArray(keylength);
1203     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1204         "Test3 string should have the same collation key and sort key");
1205 
1206     col->getSortKey(test1, 5, sortkey1, 64);
1207     col->getSortKey(test2, 5, sortkey2, 64);
1208     col->getSortKey(test3, 5, sortkey3, 64);
1209 
1210     tempkey = key1.getByteArray(keylength);
1211     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1212         "Test1 string should have the same collation key and sort key");
1213     tempkey = key2.getByteArray(keylength);
1214     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1215         "Test2 string should have the same collation key and sort key");
1216     tempkey = key3.getByteArray(keylength);
1217     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1218         "Test3 string should have the same collation key and sort key");
1219 
1220     col->getSortKey(strtest1, sortkey1, 64);
1221     col->getSortKey(strtest2, sortkey2, 64);
1222     col->getSortKey(strtest3, sortkey3, 64);
1223 
1224     tempkey = key1.getByteArray(keylength);
1225     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1226         "Test1 string should have the same collation key and sort key");
1227     tempkey = key2.getByteArray(keylength);
1228     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1229         "Test2 string should have the same collation key and sort key");
1230     tempkey = key3.getByteArray(keylength);
1231     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1232         "Test3 string should have the same collation key and sort key");
1233 
1234     logln("testing sortkey ends...");
1235     delete col;
1236 }
1237 
TestSortKeyOverflow()1238 void CollationAPITest::TestSortKeyOverflow() {
1239     IcuTestErrorCode errorCode(*this, "TestSortKeyOverflow()");
1240     LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode));
1241     if (errorCode.errDataIfFailureAndReset("Collator::createInstance(English) failed")) {
1242         return;
1243     }
1244     col->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
1245     UChar i_and_phi[] = { 0x438, 0x3c6 };  // Cyrillic small i & Greek small phi.
1246     // The sort key should be 6 bytes:
1247     // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator,
1248     // 2 bytes for the Greek phi, and 1 byte for the NUL terminator.
1249     uint8_t sortKey[12];
1250     int32_t length = col->getSortKey(i_and_phi, 2, sortKey, UPRV_LENGTHOF(sortKey));
1251     uint8_t sortKey2[12];
1252     for (int32_t capacity = 0; capacity < length; ++capacity) {
1253         uprv_memset(sortKey2, 2, UPRV_LENGTHOF(sortKey2));
1254         int32_t length2 = col->getSortKey(i_and_phi, 2, sortKey2, capacity);
1255         if (length2 != length || 0 != uprv_memcmp(sortKey, sortKey2, capacity)) {
1256             errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity);
1257         } else if (sortKey2[capacity] != 2 || sortKey2[capacity + 1] != 2) {
1258             errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity);
1259         }
1260     }
1261 
1262     // Now try to break getCollationKey().
1263     // Internally, it always starts with a large stack buffer.
1264     // Since we cannot control the initial capacity, we throw an increasing number
1265     // of characters at it, with the problematic part at the end.
1266     const int32_t longCapacity = 2000;
1267     // Each 'a' in the prefix should result in one primary sort key byte.
1268     // For i_and_phi we expect 6 bytes, then the NUL terminator.
1269     const int32_t maxPrefixLength = longCapacity - 6 - 1;
1270     LocalArray<uint8_t> longSortKey(new uint8_t[longCapacity]);
1271     UnicodeString s(FALSE, i_and_phi, 2);
1272     for (int32_t prefixLength = 0; prefixLength < maxPrefixLength; ++prefixLength) {
1273         length = col->getSortKey(s, longSortKey.getAlias(), longCapacity);
1274         CollationKey collKey;
1275         col->getCollationKey(s, collKey, errorCode);
1276         int32_t collKeyLength;
1277         const uint8_t *collSortKey = collKey.getByteArray(collKeyLength);
1278         if (collKeyLength != length || 0 != uprv_memcmp(longSortKey.getAlias(), collSortKey, length)) {
1279             errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength);
1280         }
1281 
1282         // Insert an 'a' to match ++prefixLength.
1283         s.insert(prefixLength, (UChar)0x61);
1284     }
1285 }
1286 
TestMaxExpansion()1287 void CollationAPITest::TestMaxExpansion()
1288 {
1289     UErrorCode          status = U_ZERO_ERROR;
1290     UChar               ch     = 0;
1291     UChar32             unassigned = 0xEFFFD;
1292     uint32_t            sorder = 0;
1293     uint32_t            temporder = 0;
1294 
1295     UnicodeString rule("&a < ab < c/aba < d < z < ch");
1296     RuleBasedCollator coll(rule, status);
1297     if(U_FAILURE(status)) {
1298       errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1299       return;
1300     }
1301     UnicodeString str(ch);
1302     CollationElementIterator *iter =
1303                                   coll.createCollationElementIterator(str);
1304 
1305     while (ch < 0xFFFF && U_SUCCESS(status)) {
1306         int      count = 1;
1307         uint32_t order;
1308         int32_t  size = 0;
1309 
1310         ch ++;
1311 
1312         str.setCharAt(0, ch);
1313         iter->setText(str, status);
1314         order = iter->previous(status);
1315 
1316         /* thai management */
1317         if (order == 0)
1318             order = iter->previous(status);
1319 
1320         while (U_SUCCESS(status) && iter->previous(status) != CollationElementIterator::NULLORDER) {
1321             count ++;
1322         }
1323 
1324         size = coll.getMaxExpansion(order);
1325         if (U_FAILURE(status) || size < count) {
1326             errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1327                   ch, size, count);
1328         }
1329     }
1330 
1331     /* testing for exact max expansion */
1332     int32_t size;
1333     ch = 0;
1334     while (ch < 0x61) {
1335         uint32_t order;
1336         str.setCharAt(0, ch);
1337         iter->setText(str, status);
1338         order = iter->previous(status);
1339         size  = coll.getMaxExpansion(order);
1340         if (U_FAILURE(status) || size != 1) {
1341             errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1342                   ch, size, 1);
1343         }
1344         ch ++;
1345     }
1346 
1347     ch = 0x63;
1348     str.setTo(ch);
1349     iter->setText(str, status);
1350     temporder = iter->previous(status);
1351     size = coll.getMaxExpansion(temporder);
1352     if (U_FAILURE(status) || size != 3) {
1353         errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1354               ch, temporder, size, 3);
1355     }
1356 
1357     ch = 0x64;
1358     str.setTo(ch);
1359     iter->setText(str, status);
1360     temporder = iter->previous(status);
1361     size = coll.getMaxExpansion(temporder);
1362     if (U_FAILURE(status) || size != 1) {
1363         errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1364               ch, temporder, size, 1);
1365     }
1366 
1367     str.setTo(unassigned);
1368     iter->setText(str, status);
1369     sorder = iter->previous(status);
1370     size = coll.getMaxExpansion(sorder);
1371     if (U_FAILURE(status) || size != 2) {
1372         errln("Failure at supplementary codepoints, maximum expansion count %d < %d",
1373               size, 2);
1374     }
1375 
1376     /* testing jamo */
1377     ch = 0x1165;
1378     str.setTo(ch);
1379     iter->setText(str, status);
1380     temporder = iter->previous(status);
1381     size = coll.getMaxExpansion(temporder);
1382     if (U_FAILURE(status) || size > 3) {
1383         errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1384               ch, size, 3);
1385     }
1386 
1387     delete iter;
1388 
1389     /* testing special jamo &a<\u1160 */
1390     rule = CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071");
1391 
1392     RuleBasedCollator jamocoll(rule, status);
1393     iter = jamocoll.createCollationElementIterator(str);
1394     temporder = iter->previous(status);
1395     size = iter->getMaxExpansion(temporder);
1396     if (U_FAILURE(status) || size != 6) {
1397         errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1398               ch, size, 5);
1399     }
1400 
1401     delete iter;
1402 }
1403 
TestDisplayName()1404 void CollationAPITest::TestDisplayName()
1405 {
1406     UErrorCode error = U_ZERO_ERROR;
1407     Collator *coll = Collator::createInstance("en_US", error);
1408     if (U_FAILURE(error)) {
1409         errcheckln(error, "Failure creating english collator - %s", u_errorName(error));
1410         return;
1411     }
1412     UnicodeString name;
1413     UnicodeString result;
1414     coll->getDisplayName(Locale::getCanadaFrench(), result);
1415     Locale::getCanadaFrench().getDisplayName(name);
1416     if (result.compare(name)) {
1417         errln("Failure getting the correct name for locale en_US");
1418     }
1419 
1420     coll->getDisplayName(Locale::getSimplifiedChinese(), result);
1421     Locale::getSimplifiedChinese().getDisplayName(name);
1422     if (result.compare(name)) {
1423         errln("Failure getting the correct name for locale zh_SG");
1424     }
1425     delete coll;
1426 }
1427 
TestAttribute()1428 void CollationAPITest::TestAttribute()
1429 {
1430     UErrorCode error = U_ZERO_ERROR;
1431     Collator *coll = Collator::createInstance(error);
1432 
1433     if (U_FAILURE(error)) {
1434         errcheckln(error, "Creation of default collator failed - %s", u_errorName(error));
1435         return;
1436     }
1437 
1438     coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_OFF, error);
1439     if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_OFF ||
1440         U_FAILURE(error)) {
1441         errln("Setting and retrieving of the french collation failed");
1442     }
1443 
1444     coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_ON, error);
1445     if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_ON ||
1446         U_FAILURE(error)) {
1447         errln("Setting and retrieving of the french collation failed");
1448     }
1449 
1450     coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, error);
1451     if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_SHIFTED ||
1452         U_FAILURE(error)) {
1453         errln("Setting and retrieving of the alternate handling failed");
1454     }
1455 
1456     coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, error);
1457     if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_NON_IGNORABLE ||
1458         U_FAILURE(error)) {
1459         errln("Setting and retrieving of the alternate handling failed");
1460     }
1461 
1462     coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, error);
1463     if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_LOWER_FIRST ||
1464         U_FAILURE(error)) {
1465         errln("Setting and retrieving of the case first attribute failed");
1466     }
1467 
1468     coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, error);
1469     if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_UPPER_FIRST ||
1470         U_FAILURE(error)) {
1471         errln("Setting and retrieving of the case first attribute failed");
1472     }
1473 
1474     coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, error);
1475     if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_ON ||
1476         U_FAILURE(error)) {
1477         errln("Setting and retrieving of the case level attribute failed");
1478     }
1479 
1480     coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, error);
1481     if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_OFF ||
1482         U_FAILURE(error)) {
1483         errln("Setting and retrieving of the case level attribute failed");
1484     }
1485 
1486     coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, error);
1487     if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_ON ||
1488         U_FAILURE(error)) {
1489         errln("Setting and retrieving of the normalization on/off attribute failed");
1490     }
1491 
1492     coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, error);
1493     if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_OFF ||
1494         U_FAILURE(error)) {
1495         errln("Setting and retrieving of the normalization on/off attribute failed");
1496     }
1497 
1498     coll->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, error);
1499     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_PRIMARY ||
1500         U_FAILURE(error)) {
1501         errln("Setting and retrieving of the collation strength failed");
1502     }
1503 
1504     coll->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, error);
1505     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_SECONDARY ||
1506         U_FAILURE(error)) {
1507         errln("Setting and retrieving of the collation strength failed");
1508     }
1509 
1510     coll->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, error);
1511     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_TERTIARY ||
1512         U_FAILURE(error)) {
1513         errln("Setting and retrieving of the collation strength failed");
1514     }
1515 
1516     coll->setAttribute(UCOL_STRENGTH, UCOL_QUATERNARY, error);
1517     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_QUATERNARY ||
1518         U_FAILURE(error)) {
1519         errln("Setting and retrieving of the collation strength failed");
1520     }
1521 
1522     coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, error);
1523     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_IDENTICAL ||
1524         U_FAILURE(error)) {
1525         errln("Setting and retrieving of the collation strength failed");
1526     }
1527 
1528     delete coll;
1529 }
1530 
TestVariableTopSetting()1531 void CollationAPITest::TestVariableTopSetting() {
1532   UErrorCode status = U_ZERO_ERROR;
1533 
1534   UChar vt[256] = { 0 };
1535 
1536   // Use the root collator, not the default collator.
1537   // This test fails with en_US_POSIX which tailors the dollar sign after 'A'.
1538   Collator *coll = Collator::createInstance(Locale::getRoot(), status);
1539   if(U_FAILURE(status)) {
1540     delete coll;
1541     errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1542     return;
1543   }
1544 
1545   uint32_t oldVarTop = coll->getVariableTop(status);
1546 
1547   // ICU 53+: The character must be in a supported reordering group,
1548   // and the variable top is pinned to the end of that group.
1549   vt[0] = 0x0041;
1550 
1551   (void)coll->setVariableTop(vt, 1, status);
1552   if(status != U_ILLEGAL_ARGUMENT_ERROR) {
1553     errln("setVariableTop(letter) did not detect illegal argument - %s", u_errorName(status));
1554   }
1555 
1556   status = U_ZERO_ERROR;
1557   vt[0] = 0x24;  // dollar sign (currency symbol)
1558   uint32_t newVarTop = coll->setVariableTop(vt, 1, status);
1559   if(U_FAILURE(status)) {
1560     errln("setVariableTop(dollar sign) failed: %s", u_errorName(status));
1561     return;
1562   }
1563   if(newVarTop != coll->getVariableTop(status)) {
1564     errln("setVariableTop(dollar sign) != following getVariableTop()");
1565   }
1566 
1567   UnicodeString dollar((UChar)0x24);
1568   UnicodeString euro((UChar)0x20AC);
1569   uint32_t newVarTop2 = coll->setVariableTop(euro, status);
1570   assertEquals("setVariableTop(Euro sign) == following getVariableTop()",
1571                (int64_t)newVarTop2, (int64_t)coll->getVariableTop(status));
1572   assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)",
1573                (int64_t)newVarTop2, (int64_t)newVarTop);
1574 
1575   coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
1576   assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), dollar));
1577   assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), euro));
1578   assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(dollar, UnicodeString((UChar)0x30)));
1579 
1580   coll->setVariableTop(oldVarTop, status);
1581 
1582   uint32_t newerVarTop = coll->setVariableTop(UnicodeString(vt, 1), status);
1583 
1584   if(newVarTop != newerVarTop) {
1585     errln("Didn't set vartop properly from UnicodeString!\n");
1586   }
1587 
1588   delete coll;
1589 
1590 }
1591 
TestMaxVariable()1592 void CollationAPITest::TestMaxVariable() {
1593   UErrorCode errorCode = U_ZERO_ERROR;
1594   LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), errorCode));
1595   if(U_FAILURE(errorCode)) {
1596     errcheckln(errorCode, "Collator creation failed with error %s", u_errorName(errorCode));
1597     return;
1598   }
1599 
1600   (void)coll->setMaxVariable(UCOL_REORDER_CODE_OTHERS, errorCode);
1601   if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
1602     errln("setMaxVariable(others) did not detect illegal argument - %s", u_errorName(errorCode));
1603   }
1604 
1605   errorCode = U_ZERO_ERROR;
1606   (void)coll->setMaxVariable(UCOL_REORDER_CODE_CURRENCY, errorCode);
1607 
1608   if(UCOL_REORDER_CODE_CURRENCY != coll->getMaxVariable()) {
1609     errln("setMaxVariable(currency) != following getMaxVariable()");
1610   }
1611 
1612   coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, errorCode);
1613   assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x24)));
1614   assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x20AC)));
1615   assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(UnicodeString((UChar)0x24), UnicodeString((UChar)0x30)));
1616 }
1617 
TestGetLocale()1618 void CollationAPITest::TestGetLocale() {
1619   UErrorCode status = U_ZERO_ERROR;
1620   const char *rules = "&a<x<y<z";
1621   UChar rlz[256] = {0};
1622 
1623   Collator *coll = Collator::createInstance("root", status);
1624   if(U_FAILURE(status)) {
1625     dataerrln("Failed to open collator for \"root\" with %s", u_errorName(status));
1626     return;
1627   }
1628   Locale locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1629   if(locale != Locale::getRoot()) {
1630     errln("Collator::createInstance(\"root\").getLocale(actual) != Locale::getRoot(); "
1631           "getLocale().getName() = \"%s\"",
1632           locale.getName());
1633   }
1634   delete coll;
1635 
1636   coll = Collator::createInstance("", status);
1637   if(U_FAILURE(status)) {
1638     dataerrln("Failed to open collator for \"\" with %s", u_errorName(status));
1639     return;
1640   }
1641   locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1642   if(locale != Locale::getRoot()) {
1643     errln("Collator::createInstance(\"\").getLocale(actual) != Locale::getRoot(); "
1644           "getLocale().getName() = \"%s\"",
1645           locale.getName());
1646   }
1647   delete coll;
1648 
1649   int32_t i = 0;
1650 
1651   static const struct {
1652     const char* requestedLocale;
1653     const char* validLocale;
1654     const char* actualLocale;
1655   } testStruct[] = {
1656     // Note: Locale::getRoot().getName() == "" not "root".
1657     { "de_DE", "de", "" },
1658     { "sr_RS", "sr_Cyrl_RS", "sr" },
1659     { "en_US_CALIFORNIA", "en_US", "" },
1660     { "fr_FR_NONEXISTANT", "fr", "" },
1661     // pinyin is the default, therefore suppressed.
1662     { "zh_CN", "zh_Hans_CN", "zh" },
1663     // zh_Hant has default=stroke but the data is in zh.
1664     { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" },
1665     { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" },
1666     { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" },
1667     // yue/yue_Hant aliased to zh_Hant, yue_Hans aliased to zh_Hans.
1668     { "yue", "zh_Hant", "zh@collation=stroke" },
1669     { "yue_HK", "zh_Hant", "zh@collation=stroke" },
1670     { "yue_Hant", "zh_Hant", "zh@collation=stroke" },
1671     { "yue_Hant_HK", "zh_Hant", "zh@collation=stroke" },
1672     { "yue@collation=pinyin", "zh_Hant@collation=pinyin", "zh" },
1673     { "yue_HK@collation=pinyin", "zh_Hant@collation=pinyin", "zh" },
1674     { "yue_CN", "zh_Hans", "zh" },
1675     { "yue_Hans", "zh_Hans", "zh" },
1676     { "yue_Hans_CN", "zh_Hans", "zh" },
1677     { "yue_Hans@collation=stroke", "zh_Hans@collation=stroke", "zh@collation=stroke" },
1678     { "yue_CN@collation=stroke", "zh_Hans@collation=stroke", "zh@collation=stroke" }
1679   };
1680 
1681   u_unescape(rules, rlz, 256);
1682 
1683   /* test opening collators for different locales */
1684   for(i = 0; i<UPRV_LENGTHOF(testStruct); i++) {
1685     status = U_ZERO_ERROR;
1686     coll = Collator::createInstance(testStruct[i].requestedLocale, status);
1687     if(U_FAILURE(status)) {
1688       errln("Failed to open collator for %s with %s", testStruct[i].requestedLocale, u_errorName(status));
1689       delete coll;
1690       continue;
1691     }
1692     // The requested locale may be the same as the valid locale,
1693     // or may not be supported at all. See ticket #10477.
1694     locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1695     if(U_SUCCESS(status) &&
1696         locale != testStruct[i].requestedLocale && locale != testStruct[i].validLocale) {
1697       errln("[Coll %s]: Error in requested locale, expected %s or %s, got %s",
1698             testStruct[i].requestedLocale,
1699             testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1700     }
1701     status = U_ZERO_ERROR;
1702     locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1703     if(locale != testStruct[i].validLocale) {
1704       errln("[Coll %s]: Error in valid locale, expected %s, got %s",
1705             testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1706     }
1707     locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1708     if(locale != testStruct[i].actualLocale) {
1709       errln("[Coll %s]: Error in actual locale, expected %s, got %s",
1710             testStruct[i].requestedLocale, testStruct[i].actualLocale, locale.getName());
1711     }
1712     // If we open a collator for the actual locale, we should get an equivalent one again.
1713     LocalPointer<Collator> coll2(Collator::createInstance(locale, status));
1714     if(U_FAILURE(status)) {
1715       errln("Failed to open collator for actual locale \"%s\" with %s",
1716             locale.getName(), u_errorName(status));
1717     } else {
1718       Locale actual2 = coll2->getLocale(ULOC_ACTUAL_LOCALE, status);
1719       if(actual2 != locale) {
1720         errln("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"",
1721               locale.getName(), actual2.getName());
1722       }
1723       if(*coll2 != *coll) {
1724         errln("[Coll actual \"%s\"]: Got different collator than before", locale.getName());
1725       }
1726     }
1727     delete coll;
1728   }
1729 
1730   /* completely non-existent locale for collator should get a root collator */
1731   {
1732     LocalPointer<Collator> coll(Collator::createInstance("blahaha", status));
1733     if(U_FAILURE(status)) {
1734       errln("Failed to open collator with %s", u_errorName(status));
1735       return;
1736     }
1737     Locale valid = coll->getLocale(ULOC_VALID_LOCALE, status);
1738     const char *name = valid.getName();
1739     if(*name != 0 && strcmp(name, "root") != 0) {
1740       errln("Valid locale for nonexisting-locale collator is \"%s\" not root", name);
1741     }
1742     Locale actual = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1743     name = actual.getName();
1744     if(*name != 0 && strcmp(name, "root") != 0) {
1745       errln("Actual locale for nonexisting-locale collator is \"%s\" not root", name);
1746     }
1747   }
1748 
1749 
1750 
1751   /* collator instantiated from rules should have all three locales NULL */
1752   coll = new RuleBasedCollator(rlz, status);
1753   locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1754   if(U_SUCCESS(status) && !locale.isBogus()) {
1755     errln("For collator instantiated from rules, requested locale %s is not bogus", locale.getName());
1756   }
1757   status = U_ZERO_ERROR;
1758   locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1759   if(!locale.isBogus()) {
1760     errln("For collator instantiated from rules, valid locale %s is not bogus", locale.getName());
1761   }
1762   locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1763   if(!locale.isBogus()) {
1764     errln("For collator instantiated from rules, actual locale %s is not bogus", locale.getName());
1765   }
1766   delete coll;
1767 }
1768 
1769 struct teststruct {
1770     const char *original;
1771     uint8_t key[256];
1772 };
1773 
1774 
1775 
1776 U_CDECL_BEGIN
1777 static int U_CALLCONV
compare_teststruct(const void * string1,const void * string2)1778 compare_teststruct(const void *string1, const void *string2) {
1779   return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key));
1780 }
1781 U_CDECL_END
1782 
1783 
TestBounds(void)1784 void CollationAPITest::TestBounds(void) {
1785     UErrorCode status = U_ZERO_ERROR;
1786 
1787     Collator *coll = Collator::createInstance(Locale("sh"), status);
1788     if(U_FAILURE(status)) {
1789       delete coll;
1790       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1791       return;
1792     }
1793 
1794     uint8_t sortkey[512], lower[512], upper[512];
1795     UChar buffer[512];
1796 
1797     static const char * const test[] = {
1798         "John Smith",
1799         "JOHN SMITH",
1800         "john SMITH",
1801         "j\\u00F6hn sm\\u00EFth",
1802         "J\\u00F6hn Sm\\u00EFth",
1803         "J\\u00D6HN SM\\u00CFTH",
1804         "john smithsonian",
1805         "John Smithsonian"
1806     };
1807 
1808     struct teststruct tests[] = {
1809         {"\\u010CAKI MIHALJ", {0}},
1810         {"\\u010CAKI MIHALJ", {0}},
1811         {"\\u010CAKI PIRO\\u0160KA", {0}},
1812         {"\\u010CABAI ANDRIJA", {0}},
1813         {"\\u010CABAI LAJO\\u0160", {0}},
1814         {"\\u010CABAI MARIJA", {0}},
1815         {"\\u010CABAI STEVAN", {0}},
1816         {"\\u010CABAI STEVAN", {0}},
1817         {"\\u010CABARKAPA BRANKO", {0}},
1818         {"\\u010CABARKAPA MILENKO", {0}},
1819         {"\\u010CABARKAPA MIROSLAV", {0}},
1820         {"\\u010CABARKAPA SIMO", {0}},
1821         {"\\u010CABARKAPA STANKO", {0}},
1822         {"\\u010CABARKAPA TAMARA", {0}},
1823         {"\\u010CABARKAPA TOMA\\u0160", {0}},
1824         {"\\u010CABDARI\\u0106 NIKOLA", {0}},
1825         {"\\u010CABDARI\\u0106 ZORICA", {0}},
1826         {"\\u010CABI NANDOR", {0}},
1827         {"\\u010CABOVI\\u0106 MILAN", {0}},
1828         {"\\u010CABRADI AGNEZIJA", {0}},
1829         {"\\u010CABRADI IVAN", {0}},
1830         {"\\u010CABRADI JELENA", {0}},
1831         {"\\u010CABRADI LJUBICA", {0}},
1832         {"\\u010CABRADI STEVAN", {0}},
1833         {"\\u010CABRDA MARTIN", {0}},
1834         {"\\u010CABRILO BOGDAN", {0}},
1835         {"\\u010CABRILO BRANISLAV", {0}},
1836         {"\\u010CABRILO LAZAR", {0}},
1837         {"\\u010CABRILO LJUBICA", {0}},
1838         {"\\u010CABRILO SPASOJA", {0}},
1839         {"\\u010CADE\\u0160 ZDENKA", {0}},
1840         {"\\u010CADESKI BLAGOJE", {0}},
1841         {"\\u010CADOVSKI VLADIMIR", {0}},
1842         {"\\u010CAGLJEVI\\u0106 TOMA", {0}},
1843         {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}},
1844         {"\\u010CAJA VANKA", {0}},
1845         {"\\u010CAJI\\u0106 BOGOLJUB", {0}},
1846         {"\\u010CAJI\\u0106 BORISLAV", {0}},
1847         {"\\u010CAJI\\u0106 RADOSLAV", {0}},
1848         {"\\u010CAK\\u0160IRAN MILADIN", {0}},
1849         {"\\u010CAKAN EUGEN", {0}},
1850         {"\\u010CAKAN EVGENIJE", {0}},
1851         {"\\u010CAKAN IVAN", {0}},
1852         {"\\u010CAKAN JULIJAN", {0}},
1853         {"\\u010CAKAN MIHAJLO", {0}},
1854         {"\\u010CAKAN STEVAN", {0}},
1855         {"\\u010CAKAN VLADIMIR", {0}},
1856         {"\\u010CAKAN VLADIMIR", {0}},
1857         {"\\u010CAKAN VLADIMIR", {0}},
1858         {"\\u010CAKARA ANA", {0}},
1859         {"\\u010CAKAREVI\\u0106 MOMIR", {0}},
1860         {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}},
1861         {"\\u010CAKI \\u0160ANDOR", {0}},
1862         {"\\u010CAKI AMALIJA", {0}},
1863         {"\\u010CAKI ANDRA\\u0160", {0}},
1864         {"\\u010CAKI LADISLAV", {0}},
1865         {"\\u010CAKI LAJO\\u0160", {0}},
1866         {"\\u010CAKI LASLO", {0}}
1867     };
1868 
1869 
1870 
1871     int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0;
1872     int32_t arraySize = UPRV_LENGTHOF(tests);
1873 
1874     (void)lowerSize;  // Suppress unused variable warnings.
1875     (void)upperSize;
1876 
1877     for(i = 0; i<arraySize; i++) {
1878         buffSize = u_unescape(tests[i].original, buffer, 512);
1879         skSize = coll->getSortKey(buffer, buffSize, tests[i].key, 512);
1880     }
1881 
1882     qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct);
1883 
1884     for(i = 0; i < arraySize-1; i++) {
1885         for(j = i+1; j < arraySize; j++) {
1886             lowerSize = coll->getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, status);
1887             upperSize = coll->getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, status);
1888             for(k = i; k <= j; k++) {
1889                 if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) {
1890                     errln("Problem with lower! j = %i (%s vs %s)", k, tests[k].original, tests[i].original);
1891                 }
1892                 if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) {
1893                     errln("Problem with upper! j = %i (%s vs %s)", k, tests[k].original, tests[j].original);
1894                 }
1895             }
1896         }
1897     }
1898 
1899 
1900     for(i = 0; i<UPRV_LENGTHOF(test); i++) {
1901         buffSize = u_unescape(test[i], buffer, 512);
1902         skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1903         lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1904         upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status);
1905         for(j = i+1; j<UPRV_LENGTHOF(test); j++) {
1906             buffSize = u_unescape(test[j], buffer, 512);
1907             skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1908             if(strcmp((const char *)lower, (const char *)sortkey) > 0) {
1909                 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1910             }
1911             if(strcmp((const char *)upper, (const char *)sortkey) <= 0) {
1912                 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1913             }
1914         }
1915     }
1916     delete coll;
1917 }
1918 
1919 
TestGetTailoredSet()1920 void CollationAPITest::TestGetTailoredSet()
1921 {
1922   struct {
1923     const char *rules;
1924     const char *tests[20];
1925     int32_t testsize;
1926   } setTest[] = {
1927     { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1928     { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1929   };
1930 
1931   int32_t i = 0, j = 0;
1932   UErrorCode status = U_ZERO_ERROR;
1933 
1934   UnicodeString buff;
1935   UnicodeSet *set = NULL;
1936 
1937   for(i = 0; i < UPRV_LENGTHOF(setTest); i++) {
1938     buff = UnicodeString(setTest[i].rules, -1, US_INV).unescape();
1939     RuleBasedCollator coll(buff, status);
1940     if(U_SUCCESS(status)) {
1941       set = coll.getTailoredSet(status);
1942       if(set->size() < setTest[i].testsize) {
1943         errln("Tailored set size smaller (%d) than expected (%d)", set->size(), setTest[i].testsize);
1944       }
1945       for(j = 0; j < setTest[i].testsize; j++) {
1946         buff = UnicodeString(setTest[i].tests[j], -1, US_INV).unescape();
1947         if(!set->contains(buff)) {
1948           errln("Tailored set doesn't contain %s... It should", setTest[i].tests[j]);
1949         }
1950       }
1951       delete set;
1952     } else {
1953       errcheckln(status, "Couldn't open collator with rules %s - %s", setTest[i].rules, u_errorName(status));
1954     }
1955   }
1956 }
1957 
TestUClassID()1958 void CollationAPITest::TestUClassID()
1959 {
1960     char id = *((char *)RuleBasedCollator::getStaticClassID());
1961     if (id != 0) {
1962         errln("Static class id for RuleBasedCollator should be 0");
1963     }
1964     UErrorCode status = U_ZERO_ERROR;
1965     RuleBasedCollator *coll
1966         = (RuleBasedCollator *)Collator::createInstance(status);
1967     if(U_FAILURE(status)) {
1968       delete coll;
1969       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1970       return;
1971     }
1972     id = *((char *)coll->getDynamicClassID());
1973     if (id != 0) {
1974         errln("Dynamic class id for RuleBasedCollator should be 0");
1975     }
1976     id = *((char *)CollationKey::getStaticClassID());
1977     if (id != 0) {
1978         errln("Static class id for CollationKey should be 0");
1979     }
1980     CollationKey *key = new CollationKey();
1981     id = *((char *)key->getDynamicClassID());
1982     if (id != 0) {
1983         errln("Dynamic class id for CollationKey should be 0");
1984     }
1985     id = *((char *)CollationElementIterator::getStaticClassID());
1986     if (id != 0) {
1987         errln("Static class id for CollationElementIterator should be 0");
1988     }
1989     UnicodeString str("testing");
1990     CollationElementIterator *iter = coll->createCollationElementIterator(str);
1991     id = *((char *)iter->getDynamicClassID());
1992     if (id != 0) {
1993         errln("Dynamic class id for CollationElementIterator should be 0");
1994     }
1995     delete key;
1996     delete iter;
1997     delete coll;
1998 }
1999 
2000 class TestCollator  : public Collator
2001 {
2002 public:
2003     virtual Collator* clone(void) const;
2004 
2005     using Collator::compare;
2006 
2007     virtual UCollationResult compare(const UnicodeString& source,
2008                                       const UnicodeString& target,
2009                                       UErrorCode& status) const;
2010     virtual UCollationResult compare(const UnicodeString& source,
2011                                       const UnicodeString& target,
2012                                       int32_t length,
2013                                       UErrorCode& status) const;
2014     virtual UCollationResult compare(const UChar* source,
2015                                       int32_t sourceLength,
2016                                       const UChar* target,
2017                                       int32_t targetLength,
2018                                       UErrorCode& status) const;
2019     virtual CollationKey& getCollationKey(const UnicodeString&  source,
2020                                           CollationKey& key,
2021                                           UErrorCode& status) const;
2022     virtual CollationKey& getCollationKey(const UChar*source,
2023                                           int32_t sourceLength,
2024                                           CollationKey& key,
2025                                           UErrorCode& status) const;
2026     virtual int32_t hashCode(void) const;
2027     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
2028     virtual ECollationStrength getStrength(void) const;
2029     virtual void setStrength(ECollationStrength newStrength);
2030     virtual UClassID getDynamicClassID(void) const;
2031     virtual void getVersion(UVersionInfo info) const;
2032     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
2033                               UErrorCode &status);
2034     virtual UColAttributeValue getAttribute(UColAttribute attr,
2035                                             UErrorCode &status) const;
2036     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len,
2037                                     UErrorCode &status);
2038     virtual uint32_t setVariableTop(const UnicodeString &varTop,
2039                                     UErrorCode &status);
2040     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
2041     virtual uint32_t getVariableTop(UErrorCode &status) const;
2042     virtual int32_t getSortKey(const UnicodeString& source,
2043                             uint8_t* result,
2044                             int32_t resultLength) const;
2045     virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
2046                              uint8_t*result, int32_t resultLength) const;
2047     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
2048     virtual UBool operator==(const Collator& other) const;
2049     // Collator::operator!= calls !Collator::operator== which works for all subclasses.
2050     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
TestCollator()2051     TestCollator() : Collator() {};
TestCollator(UCollationStrength collationStrength,UNormalizationMode decompositionMode)2052     TestCollator(UCollationStrength collationStrength,
2053            UNormalizationMode decompositionMode) : Collator(collationStrength, decompositionMode) {};
2054 };
2055 
operator ==(const Collator & other) const2056 inline UBool TestCollator::operator==(const Collator& other) const {
2057     // TestCollator has no fields, so we test for identity.
2058     return this == &other;
2059 
2060     // Normally, subclasses should do something like the following:
2061     //    if (this == &other) { return TRUE; }
2062     //    if (!Collator::operator==(other)) { return FALSE; }  // not the same class
2063     //
2064     //    const TestCollator &o = (const TestCollator&)other;
2065     //    (compare this vs. o's subclass fields)
2066 }
2067 
clone() const2068 Collator* TestCollator::clone() const
2069 {
2070     return new TestCollator();
2071 }
2072 
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const2073 UCollationResult TestCollator::compare(const UnicodeString& source,
2074                                         const UnicodeString& target,
2075                                         UErrorCode& status) const
2076 {
2077   if(U_SUCCESS(status)) {
2078     return UCollationResult(source.compare(target));
2079   } else {
2080     return UCOL_EQUAL;
2081   }
2082 }
2083 
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const2084 UCollationResult TestCollator::compare(const UnicodeString& source,
2085                                         const UnicodeString& target,
2086                                         int32_t length,
2087                                         UErrorCode& status) const
2088 {
2089   if(U_SUCCESS(status)) {
2090     return UCollationResult(source.compare(0, length, target));
2091   } else {
2092     return UCOL_EQUAL;
2093   }
2094 }
2095 
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const2096 UCollationResult TestCollator::compare(const UChar* source,
2097                                         int32_t sourceLength,
2098                                         const UChar* target,
2099                                         int32_t targetLength,
2100                                         UErrorCode& status) const
2101 {
2102     UnicodeString s(source, sourceLength);
2103     UnicodeString t(target, targetLength);
2104     return compare(s, t, status);
2105 }
2106 
getCollationKey(const UnicodeString & source,CollationKey & key,UErrorCode & status) const2107 CollationKey& TestCollator::getCollationKey(const UnicodeString& source,
2108                                             CollationKey& key,
2109                                             UErrorCode& status) const
2110 {
2111     char temp[100];
2112     int length = 100;
2113     length = source.extract(temp, length, NULL, status);
2114     temp[length] = 0;
2115     CollationKey tempkey((uint8_t*)temp, length);
2116     key = tempkey;
2117     return key;
2118 }
2119 
getCollationKey(const UChar * source,int32_t sourceLength,CollationKey & key,UErrorCode & status) const2120 CollationKey& TestCollator::getCollationKey(const UChar*source,
2121                                           int32_t sourceLength,
2122                                           CollationKey& key,
2123                                           UErrorCode& status) const
2124 {
2125     //s tack allocation used since collationkey does not keep the unicodestring
2126     UnicodeString str(source, sourceLength);
2127     return getCollationKey(str, key, status);
2128 }
2129 
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const2130 int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result,
2131                                  int32_t resultLength) const
2132 {
2133     UErrorCode status = U_ZERO_ERROR;
2134     int32_t length = source.extract((char *)result, resultLength, NULL,
2135                                     status);
2136     result[length] = 0;
2137     return length;
2138 }
2139 
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const2140 int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength,
2141                                  uint8_t*result, int32_t resultLength) const
2142 {
2143     UnicodeString str(source, sourceLength);
2144     return getSortKey(str, result, resultLength);
2145 }
2146 
hashCode() const2147 int32_t TestCollator::hashCode() const
2148 {
2149     return 0;
2150 }
2151 
getLocale(ULocDataLocaleType type,UErrorCode & status) const2152 Locale TestCollator::getLocale(ULocDataLocaleType type, UErrorCode& status) const
2153 {
2154     // api not used, this is to make the compiler happy
2155     if (U_FAILURE(status)) {
2156         (void)type;
2157     }
2158     return NULL;
2159 }
2160 
getStrength() const2161 Collator::ECollationStrength TestCollator::getStrength() const
2162 {
2163     return TERTIARY;
2164 }
2165 
setStrength(Collator::ECollationStrength newStrength)2166 void TestCollator::setStrength(Collator::ECollationStrength newStrength)
2167 {
2168     // api not used, this is to make the compiler happy
2169     (void)newStrength;
2170 }
2171 
getDynamicClassID(void) const2172 UClassID TestCollator::getDynamicClassID(void) const
2173 {
2174     return 0;
2175 }
2176 
getVersion(UVersionInfo info) const2177 void TestCollator::getVersion(UVersionInfo info) const
2178 {
2179     // api not used, this is to make the compiler happy
2180     memset(info, 0, U_MAX_VERSION_LENGTH);
2181 }
2182 
setAttribute(UColAttribute,UColAttributeValue,UErrorCode &)2183 void TestCollator::setAttribute(UColAttribute /*attr*/, UColAttributeValue /*value*/,
2184                                 UErrorCode & /*status*/)
2185 {
2186 }
2187 
getAttribute(UColAttribute attr,UErrorCode & status) const2188 UColAttributeValue TestCollator::getAttribute(UColAttribute attr,
2189                                               UErrorCode &status) const
2190 {
2191     // api not used, this is to make the compiler happy
2192     if (U_FAILURE(status) || attr == UCOL_ATTRIBUTE_COUNT) {
2193         return UCOL_OFF;
2194     }
2195     return UCOL_DEFAULT;
2196 }
2197 
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)2198 uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len,
2199                                   UErrorCode &status)
2200 {
2201     // api not used, this is to make the compiler happy
2202     if (U_SUCCESS(status) && (varTop == 0 || len < -1)) {
2203         status = U_ILLEGAL_ARGUMENT_ERROR;
2204     }
2205     return 0;
2206 }
2207 
setVariableTop(const UnicodeString & varTop,UErrorCode & status)2208 uint32_t TestCollator::setVariableTop(const UnicodeString &varTop,
2209                                   UErrorCode &status)
2210 {
2211     // api not used, this is to make the compiler happy
2212     if (U_SUCCESS(status) && varTop.length() == 0) {
2213         status = U_ILLEGAL_ARGUMENT_ERROR;
2214     }
2215     return 0;
2216 }
2217 
setVariableTop(uint32_t varTop,UErrorCode & status)2218 void TestCollator::setVariableTop(uint32_t varTop, UErrorCode &status)
2219 {
2220     // api not used, this is to make the compiler happy
2221     if (U_SUCCESS(status) && varTop == 0) {
2222         status = U_ILLEGAL_ARGUMENT_ERROR;
2223     }
2224 }
2225 
getVariableTop(UErrorCode & status) const2226 uint32_t TestCollator::getVariableTop(UErrorCode &status) const
2227 {
2228 
2229     // api not used, this is to make the compiler happy
2230     if (U_SUCCESS(status)) {
2231         return 0;
2232     }
2233     return (uint32_t)(0xFFFFFFFFu);
2234 }
2235 
getTailoredSet(UErrorCode & status) const2236 UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const
2237 {
2238     return Collator::getTailoredSet(status);
2239 }
2240 
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)2241 void TestCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale)
2242 {
2243     Collator::setLocales(requestedLocale, validLocale, actualLocale);
2244 }
2245 
2246 
TestSubclass()2247 void CollationAPITest::TestSubclass()
2248 {
2249     TestCollator col1;
2250     TestCollator col2;
2251     doAssert(col1 != col2, "2 instances of TestCollator should be different");
2252     if (col1.hashCode() != col2.hashCode()) {
2253         errln("Every TestCollator has the same hashcode");
2254     }
2255     UnicodeString abc("abc", 3);
2256     UnicodeString bcd("bcd", 3);
2257     if (col1.compare(abc, bcd) != abc.compare(bcd)) {
2258         errln("TestCollator compare should be the same as the default "
2259               "string comparison");
2260     }
2261     CollationKey key;
2262     UErrorCode status = U_ZERO_ERROR;
2263     col1.getCollationKey(abc, key, status);
2264     int32_t length = 0;
2265     const char* bytes = (const char *)key.getByteArray(length);
2266     UnicodeString keyarray(bytes, length, NULL, status);
2267     if (abc != keyarray) {
2268         errln("TestCollator collationkey API is returning wrong values");
2269     }
2270 
2271     UnicodeSet expectedset(0, 0x10FFFF);
2272     UnicodeSet *defaultset = col1.getTailoredSet(status);
2273     if (!defaultset->containsAll(expectedset)
2274         || !expectedset.containsAll(*defaultset)) {
2275         errln("Error: expected default tailoring to be 0 to 0x10ffff");
2276     }
2277     delete defaultset;
2278 
2279     // use base class implementation
2280     Locale loc1 = Locale::getGermany();
2281     Locale loc2 = Locale::getFrance();
2282     col1.setLocales(loc1, loc2, loc2); // default implementation has no effect
2283 
2284     UnicodeString displayName;
2285     col1.getDisplayName(loc1, loc2, displayName); // de_DE collator in fr_FR locale
2286 
2287     TestCollator col3(UCOL_TERTIARY, UNORM_NONE);
2288     UnicodeString a("a");
2289     UnicodeString b("b");
2290     Collator::EComparisonResult result = Collator::EComparisonResult(a.compare(b));
2291     if(col1.compare(a, b) != result) {
2292       errln("Collator doesn't give default result");
2293     }
2294     if(col1.compare(a, b, 1) != result) {
2295       errln("Collator doesn't give default result");
2296     }
2297     if(col1.compare(a.getBuffer(), a.length(), b.getBuffer(), b.length()) != result) {
2298       errln("Collator doesn't give default result");
2299     }
2300 }
2301 
TestNULLCharTailoring()2302 void CollationAPITest::TestNULLCharTailoring()
2303 {
2304     UErrorCode status = U_ZERO_ERROR;
2305     UChar buf[256] = {0};
2306     int32_t len = u_unescape("&a < '\\u0000'", buf, 256);
2307     UnicodeString first((UChar)0x0061);
2308     UnicodeString second((UChar)0);
2309     RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status);
2310     if(U_FAILURE(status)) {
2311         delete coll;
2312         errcheckln(status, "Failed to open collator - %s", u_errorName(status));
2313         return;
2314     }
2315     UCollationResult res = coll->compare(first, second, status);
2316     if(res != UCOL_LESS) {
2317         errln("a should be less then NULL after tailoring");
2318     }
2319     delete coll;
2320 }
2321 
TestClone()2322 void CollationAPITest::TestClone() {
2323     logln("\ninit c0");
2324     UErrorCode status = U_ZERO_ERROR;
2325     RuleBasedCollator* c0 = (RuleBasedCollator*)Collator::createInstance(status);
2326 
2327     if (U_FAILURE(status)) {
2328         errcheckln(status, "Collator::CreateInstance(status) failed with %s", u_errorName(status));
2329         return;
2330     }
2331 
2332     c0->setStrength(Collator::TERTIARY);
2333     dump("c0", c0, status);
2334 
2335     logln("\ninit c1");
2336     RuleBasedCollator* c1 = (RuleBasedCollator*)Collator::createInstance(status);
2337     c1->setStrength(Collator::TERTIARY);
2338     UColAttributeValue val = c1->getAttribute(UCOL_CASE_FIRST, status);
2339     if(val == UCOL_LOWER_FIRST){
2340         c1->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2341     }else{
2342         c1->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2343     }
2344     dump("c0", c0, status);
2345     dump("c1", c1, status);
2346 
2347     logln("\ninit c2");
2348     RuleBasedCollator* c2 = (RuleBasedCollator*)c1->clone();
2349     val = c2->getAttribute(UCOL_CASE_FIRST, status);
2350     if(val == UCOL_LOWER_FIRST){
2351         c2->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2352     }else{
2353         c2->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2354     }
2355     if(U_FAILURE(status)){
2356         errln("set and get attributes of collator failed. %s\n", u_errorName(status));
2357         return;
2358     }
2359     dump("c0", c0, status);
2360     dump("c1", c1, status);
2361     dump("c2", c2, status);
2362     if(*c1 == *c2){
2363         errln("The cloned objects refer to same data");
2364     }
2365     delete c0;
2366     delete c1;
2367     delete c2;
2368 }
2369 
TestCloneBinary()2370 void CollationAPITest::TestCloneBinary() {
2371     IcuTestErrorCode errorCode(*this, "TestCloneBinary");
2372     LocalPointer<Collator> root(Collator::createInstance(Locale::getRoot(), errorCode));
2373     LocalPointer<Collator> coll(Collator::createInstance("de@collation=phonebook", errorCode));
2374     if(errorCode.errDataIfFailureAndReset("Collator::createInstance(de@collation=phonebook)")) {
2375         return;
2376     }
2377     RuleBasedCollator *rbRoot = dynamic_cast<RuleBasedCollator *>(root.getAlias());
2378     RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias());
2379     if(rbRoot == NULL || rbc == NULL) {
2380         infoln("root or de@collation=phonebook is not a RuleBasedCollator");
2381         return;
2382     }
2383     rbc->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
2384     UnicodeString uUmlaut((UChar)0xfc);
2385     UnicodeString ue = UNICODE_STRING_SIMPLE("ue");
2386     assertEquals("rbc/primary: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc->compare(uUmlaut, ue, errorCode));
2387     uint8_t bin[25000];
2388     int32_t binLength = rbc->cloneBinary(bin, UPRV_LENGTHOF(bin), errorCode);
2389     if(errorCode.errDataIfFailureAndReset("rbc->cloneBinary()")) {
2390         return;
2391     }
2392     logln("rbc->cloneBinary() -> %d bytes", (int)binLength);
2393 
2394     RuleBasedCollator rbc2(bin, binLength, rbRoot, errorCode);
2395     if(errorCode.errDataIfFailureAndReset("RuleBasedCollator(rbc binary)")) {
2396         return;
2397     }
2398     assertEquals("rbc2.strength==primary", (int32_t)UCOL_PRIMARY, rbc2.getAttribute(UCOL_STRENGTH, errorCode));
2399     assertEquals("rbc2: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc2.compare(uUmlaut, ue, errorCode));
2400     assertTrue("rbc==rbc2", *rbc == rbc2);
2401     uint8_t bin2[25000];
2402     int32_t bin2Length = rbc2.cloneBinary(bin2, UPRV_LENGTHOF(bin2), errorCode);
2403     assertEquals("len(rbc binary)==len(rbc2 binary)", binLength, bin2Length);
2404     assertTrue("rbc binary==rbc2 binary", binLength == bin2Length && memcmp(bin, bin2, binLength) == 0);
2405 
2406     RuleBasedCollator rbc3(bin, -1, rbRoot, errorCode);
2407     if(errorCode.errDataIfFailureAndReset("RuleBasedCollator(rbc binary, length<0)")) {
2408         return;
2409     }
2410     assertEquals("rbc3.strength==primary", (int32_t)UCOL_PRIMARY, rbc3.getAttribute(UCOL_STRENGTH, errorCode));
2411     assertEquals("rbc3: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc3.compare(uUmlaut, ue, errorCode));
2412     assertTrue("rbc==rbc3", *rbc == rbc3);
2413 }
2414 
TestIterNumeric()2415 void CollationAPITest::TestIterNumeric() {
2416     // Regression test for ticket #9915.
2417     // The collation code sometimes masked the continuation marker away
2418     // but later tested the result for isContinuation().
2419     // This test case failed because the third bytes of the computed numeric-collation primaries
2420     // were permutated with the script reordering table.
2421     // It should have been possible to reproduce this with the root collator
2422     // and characters with appropriate 3-byte primary weights.
2423     // The effectiveness of this test depends completely on the collation elements
2424     // and on the implementation code.
2425     IcuTestErrorCode errorCode(*this, "TestIterNumeric");
2426     RuleBasedCollator coll(UnicodeString("[reorder Hang Hani]"), errorCode);
2427     if(errorCode.errDataIfFailureAndReset("RuleBasedCollator constructor")) {
2428         return;
2429     }
2430     coll.setAttribute(UCOL_NUMERIC_COLLATION, UCOL_ON, errorCode);
2431     UCharIterator iter40, iter72;
2432     uiter_setUTF8(&iter40, "\x34\x30", 2);
2433     uiter_setUTF8(&iter72, "\x37\x32", 2);
2434     UCollationResult result = coll.compare(iter40, iter72, errorCode);
2435     assertEquals("40<72", (int32_t)UCOL_LESS, (int32_t)result);
2436 }
2437 
TestBadKeywords()2438 void CollationAPITest::TestBadKeywords() {
2439     // Test locale IDs with errors.
2440     // Valid locale IDs are tested via data-driven tests.
2441     UErrorCode errorCode = U_ZERO_ERROR;
2442     Locale bogusLocale(Locale::getRoot());
2443     bogusLocale.setToBogus();
2444     LocalPointer<Collator> coll(Collator::createInstance(bogusLocale, errorCode));
2445     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2446         errln("Collator::createInstance(bogus locale) did not fail as expected - %s",
2447               u_errorName(errorCode));
2448     }
2449 
2450     // Unknown value.
2451     const char *localeID = "it-u-ks-xyz";
2452     errorCode = U_ZERO_ERROR;
2453     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2454     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2455         dataerrln("Collator::createInstance(%s) did not fail as expected - %s",
2456               localeID, u_errorName(errorCode));
2457     }
2458 
2459     // Unsupported attributes.
2460     localeID = "it@colHiraganaQuaternary=true";
2461     errorCode = U_ZERO_ERROR;
2462     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2463     if(errorCode != U_UNSUPPORTED_ERROR) {
2464         if (errorCode == U_FILE_ACCESS_ERROR) {
2465             dataerrln("Collator::createInstance(it@colHiraganaQuaternary=true) : %s", u_errorName(errorCode));
2466         } else {
2467             errln("Collator::createInstance(%s) did not fail as expected - %s",
2468                   localeID, u_errorName(errorCode));
2469         }
2470     }
2471 
2472     localeID = "it-u-vt-u24";
2473     errorCode = U_ZERO_ERROR;
2474     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2475     if(errorCode != U_UNSUPPORTED_ERROR) {
2476         if (errorCode == U_ILLEGAL_ARGUMENT_ERROR || errorCode == U_FILE_ACCESS_ERROR) {
2477             dataerrln("Collator::createInstance(it-u-vt-u24) : %s", u_errorName(errorCode));
2478         } else {
2479            errln("Collator::createInstance(%s) did not fail as expected - %s",
2480                   localeID, u_errorName(errorCode));
2481         }
2482     }
2483 }
2484 
TestGapTooSmall()2485 void CollationAPITest::TestGapTooSmall() {
2486     IcuTestErrorCode errorCode(*this, "TestGapTooSmall");
2487     // Try to tailor >20k characters into a too-small primary gap between symbols
2488     // that have 3-byte primary weights.
2489     // In FractionalUCA.txt:
2490     // 263A; [0C BA D0, 05, 05]  # Zyyy So  [084A.0020.0002]  * WHITE SMILING FACE
2491     // 263B; [0C BA D7, 05, 05]  # Zyyy So  [084B.0020.0002]  * BLACK SMILING FACE
2492     {
2493         RuleBasedCollator(u"&☺<*\u4E00-\u9FFF", errorCode);
2494         if(errorCode.isSuccess()) {
2495             errln("no exception for primary-gap overflow");
2496         } else if(errorCode.get() == U_BUFFER_OVERFLOW_ERROR) {
2497             // This is the expected error.
2498             // assertTrue("exception message mentions 'gap'", e.getMessage().contains("gap"));
2499         } else {
2500             errln("unexpected error for primary-gap overflow: %s", errorCode.errorName());
2501         }
2502         errorCode.reset();
2503     }
2504 
2505     // CLDR 32/ICU 60 FractionalUCA.txt makes room at the end of the symbols range
2506     // for several 2-byte primaries, or a large number of 3-byters.
2507     // The reset point is primary-before what should be
2508     // the special currency-first-primary contraction,
2509     // which is hopefully fairly stable, but not guaranteed stable.
2510     // In FractionalUCA.txt:
2511     // FDD1 20AC; [0D 70 02, 05, 05]  # CURRENCY first primary
2512     {
2513         RuleBasedCollator coll(u"&[before 1]\uFDD1€<*\u4E00-\u9FFF", errorCode);
2514         assertTrue("tailored Han before currency", coll.compare(u"\u4E00", u"$", errorCode) < 0);
2515         errorCode.errIfFailureAndReset(
2516             "unexpected exception for tailoring many characters at the end of symbols");
2517     }
2518 }
2519 
dump(UnicodeString msg,RuleBasedCollator * c,UErrorCode & status)2520  void CollationAPITest::dump(UnicodeString msg, RuleBasedCollator* c, UErrorCode& status) {
2521     const char* bigone = "One";
2522     const char* littleone = "one";
2523 
2524     logln(msg + " " + c->compare(bigone, littleone) +
2525                         " s: " + c->getStrength() +
2526                         " u: " + c->getAttribute(UCOL_CASE_FIRST, status));
2527 }
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)2528 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
2529 {
2530     if (exec) logln("TestSuite CollationAPITest: ");
2531     TESTCASE_AUTO_BEGIN;
2532     TESTCASE_AUTO(TestProperty);
2533     TESTCASE_AUTO(TestKeywordValues);
2534     TESTCASE_AUTO(TestOperators);
2535     TESTCASE_AUTO(TestDuplicate);
2536     TESTCASE_AUTO(TestCompare);
2537     TESTCASE_AUTO(TestHashCode);
2538     TESTCASE_AUTO(TestCollationKey);
2539     TESTCASE_AUTO(TestElemIter);
2540     TESTCASE_AUTO(TestGetAll);
2541     TESTCASE_AUTO(TestRuleBasedColl);
2542     TESTCASE_AUTO(TestDecomposition);
2543     TESTCASE_AUTO(TestSafeClone);
2544     TESTCASE_AUTO(TestSortKey);
2545     TESTCASE_AUTO(TestSortKeyOverflow);
2546     TESTCASE_AUTO(TestMaxExpansion);
2547     TESTCASE_AUTO(TestDisplayName);
2548     TESTCASE_AUTO(TestAttribute);
2549     TESTCASE_AUTO(TestVariableTopSetting);
2550     TESTCASE_AUTO(TestMaxVariable);
2551     TESTCASE_AUTO(TestRules);
2552     TESTCASE_AUTO(TestGetLocale);
2553     TESTCASE_AUTO(TestBounds);
2554     TESTCASE_AUTO(TestGetTailoredSet);
2555     TESTCASE_AUTO(TestUClassID);
2556     TESTCASE_AUTO(TestSubclass);
2557     TESTCASE_AUTO(TestNULLCharTailoring);
2558     TESTCASE_AUTO(TestClone);
2559     TESTCASE_AUTO(TestCloneBinary);
2560     TESTCASE_AUTO(TestIterNumeric);
2561     TESTCASE_AUTO(TestBadKeywords);
2562     TESTCASE_AUTO(TestGapTooSmall);
2563     TESTCASE_AUTO_END;
2564 }
2565 
2566 #endif /* #if !UCONFIG_NO_COLLATION */
2567