1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2015, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 //===============================================================================
7 //
8 // File apicoll.cpp
9 //
10 //
11 //
12 // Created by: Helena Shih
13 //
14 // Modification History:
15 //
16 //  Date         Name          Description
17 //  2/5/97      aliu        Added streamIn and streamOut methods.  Added
18 //                          constructor which reads RuleBasedCollator object from
19 //                          a binary file.  Added writeToFile method which streams
20 //                          RuleBasedCollator out to a binary file.  The streamIn
21 //                          and streamOut methods use istream and ostream objects
22 //                          in binary mode.
23 //  6/30/97     helena      Added tests for CollationElementIterator::setText, getOffset
24 //                          setOffset and DecompositionIterator::getOffset, setOffset.
25 //                          DecompositionIterator is made public so add class scope
26 //                          testing.
27 //  02/10/98    damiba      Added test for compare(UnicodeString&, UnicodeString&, int32_t)
28 //===============================================================================
29 
30 #include "unicode/utypes.h"
31 
32 #if !UCONFIG_NO_COLLATION
33 
34 #include "unicode/localpointer.h"
35 #include "unicode/coll.h"
36 #include "unicode/tblcoll.h"
37 #include "unicode/coleitr.h"
38 #include "unicode/sortkey.h"
39 #include "apicoll.h"
40 #include "unicode/chariter.h"
41 #include "unicode/schriter.h"
42 #include "unicode/ustring.h"
43 #include "unicode/ucol.h"
44 
45 #include "sfwdchit.h"
46 #include "cmemory.h"
47 #include <stdlib.h>
48 
49 void
doAssert(UBool condition,const char * message)50 CollationAPITest::doAssert(UBool condition, const char *message)
51 {
52     if (!condition) {
53         errln(UnicodeString("ERROR : ") + message);
54     }
55 }
56 
57 // Collator Class Properties
58 // ctor, dtor, createInstance, compare, getStrength/setStrength
59 // getDecomposition/setDecomposition, getDisplayName
60 void
TestProperty()61 CollationAPITest::TestProperty(/* char* par */)
62 {
63     UErrorCode success = U_ZERO_ERROR;
64     Collator *col = 0;
65     /*
66      * Expected version of the English collator.
67      * Currently, the major/minor version numbers change when the builder code
68      * changes,
69      * number 2 is from the tailoring data version and
70      * number 3 is the UCA version.
71      * This changes with every UCA version change, and the expected value
72      * needs to be adjusted.
73      * Same in cintltst/capitst.c.
74      */
75     UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A};  // from ICU 4.4/UCA 5.2
76     UVersionInfo versionArray;
77 
78     logln("The property tests begin : ");
79     logln("Test ctors : ");
80     col = Collator::createInstance(Locale::getEnglish(), success);
81     if (U_FAILURE(success)){
82         errcheckln(success, "Default Collator creation failed. - %s", u_errorName(success));
83         return;
84     }
85 
86     StringEnumeration* kwEnum = col->getKeywordValuesForLocale("", Locale::getEnglish(),true,success);
87     if (U_FAILURE(success)){
88         errcheckln(success, "Get Keyword Values for Locale failed. - %s", u_errorName(success));
89         return;
90     }
91     delete kwEnum;
92 
93     col->getVersion(versionArray);
94     // Check for a version greater than some value rather than equality
95     // so that we need not update the expected version each time.
96     if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
97       errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
98             versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
99     } else {
100       logln("Collator::getVersion() result: %02x.%02x.%02x.%02x",
101             versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
102     }
103 
104     doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed");
105     doAssert((col->compare("ab", "AB") == Collator::LESS), "ab < AB comparison failed");
106     doAssert((col->compare("blackbird", "black-bird") == Collator::GREATER), "black-bird > blackbird comparison failed");
107     doAssert((col->compare("black bird", "black-bird") == Collator::LESS), "black bird > black-bird comparison failed");
108     doAssert((col->compare("Hello", "hello") == Collator::GREATER), "Hello > hello comparison failed");
109     doAssert((col->compare("","",success) == UCOL_EQUAL), "Comparison between empty strings failed");
110 
111     doAssert((col->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UTF-8 comparison failed");
112     success = U_ZERO_ERROR;
113     {
114         UnicodeString abau=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape();
115         UnicodeString abss=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape();
116         UCharIterator abauIter, abssIter;
117         uiter_setReplaceable(&abauIter, &abau);
118         uiter_setReplaceable(&abssIter, &abss);
119         doAssert((col->compare(abauIter, abssIter, success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UCharIterator comparison failed");
120         success = U_ZERO_ERROR;
121     }
122 
123     /*start of update [Bertrand A. D. 02/10/98]*/
124     doAssert((col->compare("ab", "abc", 2) == Collator::EQUAL), "ab = abc with length 2 comparison failed");
125     doAssert((col->compare("ab", "AB", 2) == Collator::LESS), "ab < AB  with length 2 comparison failed");
126     doAssert((col->compare("ab", "Aa", 1) == Collator::LESS), "ab < Aa  with length 1 comparison failed");
127     doAssert((col->compare("ab", "Aa", 2) == Collator::GREATER), "ab > Aa  with length 2 comparison failed");
128     doAssert((col->compare("black-bird", "blackbird", 5) == Collator::EQUAL), "black-bird = blackbird with length of 5 comparison failed");
129     doAssert((col->compare("black bird", "black-bird", 10) == Collator::LESS), "black bird < black-bird with length 10 comparison failed");
130     doAssert((col->compare("Hello", "hello", 5) == Collator::GREATER), "Hello > hello with length 5 comparison failed");
131     /*end of update [Bertrand A. D. 02/10/98]*/
132 
133 
134     logln("Test ctors ends.");
135     logln("testing Collator::getStrength() method ...");
136     doAssert((col->getStrength() == Collator::TERTIARY), "collation object has the wrong strength");
137     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
138 
139 
140     logln("testing Collator::setStrength() method ...");
141     col->setStrength(Collator::SECONDARY);
142     doAssert((col->getStrength() != Collator::TERTIARY), "collation object's strength is secondary difference");
143     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
144     doAssert((col->getStrength() == Collator::SECONDARY), "collation object has the wrong strength");
145 
146     UnicodeString name;
147 
148     logln("Get display name for the US English collation in German : ");
149     logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name));
150     doAssert((name == UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed");
151 
152     logln("Get display name for the US English collation in English : ");
153     logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name));
154     doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed");
155 #if 0
156     // weiv : this test is bogus if we're running on any machine that has different default locale than English.
157     // Therefore, it is banned!
158     logln("Get display name for the US English in default locale language : ");
159     logln(Collator::getDisplayName(Locale::US, name));
160     doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine");
161 #endif
162     delete col; col = 0;
163     RuleBasedCollator *rcol = (RuleBasedCollator *)Collator::createInstance("da_DK",
164                                                                             success);
165     if (U_FAILURE(success)) {
166         errcheckln(success, "Collator::createInstance(\"da_DK\") failed - %s", u_errorName(success));
167         return;
168     }
169     const UnicodeString &daRules = rcol->getRules();
170     if(daRules.isEmpty()) {
171         dataerrln("missing da_DK tailoring rule string");
172     } else {
173         doAssert(daRules.indexOf("aa") >= 0, "da_DK rules do not contain 'aa'");
174     }
175     delete rcol;
176 
177     col = Collator::createInstance(Locale::getFrench(), success);
178     if (U_FAILURE(success))
179     {
180         errln("Creating French collation failed.");
181         return;
182     }
183 
184     col->setStrength(Collator::PRIMARY);
185     logln("testing Collator::getStrength() method again ...");
186     doAssert((col->getStrength() != Collator::TERTIARY), "collation object has the wrong strength");
187     doAssert((col->getStrength() == Collator::PRIMARY), "collation object's strength is not primary difference");
188 
189     logln("testing French Collator::setStrength() method ...");
190     col->setStrength(Collator::TERTIARY);
191     doAssert((col->getStrength() == Collator::TERTIARY), "collation object's strength is not tertiary difference");
192     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
193     doAssert((col->getStrength() != Collator::SECONDARY), "collation object's strength is secondary difference");
194     delete col;
195 
196     logln("Create junk collation: ");
197     Locale abcd("ab", "CD", "");
198     success = U_ZERO_ERROR;
199     Collator *junk = 0;
200     junk = Collator::createInstance(abcd, success);
201 
202     if (U_FAILURE(success))
203     {
204         errln("Junk collation creation failed, should at least return default.");
205         return;
206     }
207 
208     doAssert(((RuleBasedCollator *)junk)->getRules().isEmpty(),
209                "The root collation should be returned for an unsupported language.");
210     Collator *frCol = Collator::createInstance(Locale::getCanadaFrench(), success);
211     if (U_FAILURE(success))
212     {
213         errln("Creating fr_CA collator failed.");
214         delete junk;
215         return;
216     }
217 
218     // If the default locale isn't French, the French and non-French collators
219     // should be different
220     if (frCol->getLocale(ULOC_ACTUAL_LOCALE, success) != Locale::getCanadaFrench()) {
221         doAssert((*frCol != *junk), "The junk is the same as the fr_CA collator.");
222     }
223     Collator *aFrCol = frCol->clone();
224     doAssert((*frCol == *aFrCol), "The cloning of a fr_CA collator failed.");
225     logln("Collator property test ended.");
226 
227     delete frCol;
228     delete aFrCol;
229     delete junk;
230 }
231 
232 void
TestRuleBasedColl()233 CollationAPITest::TestRuleBasedColl()
234 {
235     RuleBasedCollator *col1, *col2, *col3, *col4;
236     UErrorCode status = U_ZERO_ERROR;
237 
238     UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
239     UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
240 
241     col1 = new RuleBasedCollator(ruleset1, status);
242     if (U_FAILURE(status)) {
243         errcheckln(status, "RuleBased Collator creation failed. - %s", u_errorName(status));
244         return;
245     }
246     else {
247         logln("PASS: RuleBased Collator creation passed\n");
248     }
249 
250     status = U_ZERO_ERROR;
251     col2 = new RuleBasedCollator(ruleset2, status);
252     if (U_FAILURE(status)) {
253         errln("RuleBased Collator creation failed.\n");
254         return;
255     }
256     else {
257         logln("PASS: RuleBased Collator creation passed\n");
258     }
259 
260     status = U_ZERO_ERROR;
261     Locale locale("aa", "AA");
262     col3 = (RuleBasedCollator *)Collator::createInstance(locale, status);
263     if (U_FAILURE(status)) {
264         errln("Fallback Collator creation failed.: %s\n");
265         return;
266     }
267     else {
268         logln("PASS: Fallback Collator creation passed\n");
269     }
270     delete col3;
271 
272     status = U_ZERO_ERROR;
273     col3 = (RuleBasedCollator *)Collator::createInstance(status);
274     if (U_FAILURE(status)) {
275         errln("Default Collator creation failed.: %s\n");
276         return;
277     }
278     else {
279         logln("PASS: Default Collator creation passed\n");
280     }
281 
282     UnicodeString rule1 = col1->getRules();
283     UnicodeString rule2 = col2->getRules();
284     UnicodeString rule3 = col3->getRules();
285 
286     doAssert(rule1 != rule2, "Default collator getRules failed");
287     doAssert(rule2 != rule3, "Default collator getRules failed");
288     doAssert(rule1 != rule3, "Default collator getRules failed");
289 
290     col4 = new RuleBasedCollator(rule2, status);
291     if (U_FAILURE(status)) {
292         errln("RuleBased Collator creation failed.\n");
293         return;
294     }
295 
296     UnicodeString rule4 = col4->getRules();
297     doAssert(rule2 == rule4, "Default collator getRules failed");
298     int32_t length4 = 0;
299     uint8_t *clonedrule4 = col4->cloneRuleData(length4, status);
300     if (U_FAILURE(status)) {
301         errln("Cloned rule data failed.\n");
302         return;
303     }
304 
305  //   free(clonedrule4);     BAD API!!!!
306     uprv_free(clonedrule4);
307 
308 
309     delete col1;
310     delete col2;
311     delete col3;
312     delete col4;
313 }
314 
315 void
TestRules()316 CollationAPITest::TestRules()
317 {
318     RuleBasedCollator *coll;
319     UErrorCode status = U_ZERO_ERROR;
320     UnicodeString rules;
321 
322     coll = (RuleBasedCollator *)Collator::createInstance(Locale::getEnglish(), status);
323     if (U_FAILURE(status)) {
324         errcheckln(status, "English Collator creation failed. - %s", u_errorName(status));
325         return;
326     }
327     else {
328         logln("PASS: RuleBased Collator creation passed\n");
329     }
330 
331     coll->getRules(UCOL_TAILORING_ONLY, rules);
332     if (rules.length() != 0x00) {
333       errln("English tailored rules failed - length is 0x%x expected 0x%x", rules.length(), 0x00);
334     }
335 
336     coll->getRules(UCOL_FULL_RULES, rules);
337     if (rules.length() < 0) {
338         errln("English full rules failed");
339     }
340     delete coll;
341 }
342 
343 void
TestDecomposition()344 CollationAPITest::TestDecomposition() {
345   UErrorCode status = U_ZERO_ERROR;
346   Collator *en_US = Collator::createInstance("en_US", status),
347     *el_GR = Collator::createInstance("el_GR", status),
348     *vi_VN = Collator::createInstance("vi_VN", status);
349 
350   if (U_FAILURE(status)) {
351     errcheckln(status, "ERROR: collation creation failed. - %s", u_errorName(status));
352     return;
353   }
354 
355   /* there is no reason to have canonical decomposition in en_US OR default locale */
356   if (vi_VN->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
357   {
358     errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
359   }
360 
361   if (el_GR->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
362   {
363     errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
364   }
365 
366   if (en_US->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
367   {
368     errln("ERROR: en_US collation had canonical decomposition for normalization!\n");
369   }
370 
371   delete en_US;
372   delete el_GR;
373   delete vi_VN;
374 }
375 
376 void
TestSafeClone()377 CollationAPITest::TestSafeClone() {
378     static const int CLONETEST_COLLATOR_COUNT = 3;
379     Collator *someCollators [CLONETEST_COLLATOR_COUNT];
380     Collator *col;
381     UErrorCode err = U_ZERO_ERROR;
382     int index;
383 
384     UnicodeString test1("abCda");
385     UnicodeString test2("abcda");
386 
387     /* one default collator & two complex ones */
388     someCollators[0] = Collator::createInstance("en_US", err);
389     someCollators[1] = Collator::createInstance("ko", err);
390     someCollators[2] = Collator::createInstance("ja_JP", err);
391     if(U_FAILURE(err)) {
392       errcheckln(err, "Couldn't instantiate collators. Error: %s", u_errorName(err));
393       delete someCollators[0];
394       delete someCollators[1];
395       delete someCollators[2];
396       return;
397     }
398 
399     /* change orig & clone & make sure they are independent */
400 
401     for (index = 0; index < CLONETEST_COLLATOR_COUNT; index++)
402     {
403         col = someCollators[index]->safeClone();
404         if (col == 0) {
405             errln("SafeClone of collator should not return null\n");
406             break;
407         }
408         col->setStrength(Collator::TERTIARY);
409         someCollators[index]->setStrength(Collator::PRIMARY);
410         col->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
411         someCollators[index]->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
412 
413         doAssert(col->greater(test1, test2), "Result should be \"abCda\" >>> \"abcda\" ");
414         doAssert(someCollators[index]->equals(test1, test2), "Result should be \"abcda\" == \"abCda\"");
415         delete col;
416         delete someCollators[index];
417     }
418 }
419 
420 void
TestHashCode()421 CollationAPITest::TestHashCode(/* char* par */)
422 {
423     logln("hashCode tests begin.");
424     UErrorCode success = U_ZERO_ERROR;
425     Collator *col1 = 0;
426     col1 = Collator::createInstance(Locale::getEnglish(), success);
427     if (U_FAILURE(success))
428     {
429         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
430         return;
431     }
432 
433     Collator *col2 = 0;
434     Locale dk("da", "DK", "");
435     col2 = Collator::createInstance(dk, success);
436     if (U_FAILURE(success))
437     {
438         errln("Danish collation creation failed.");
439         return;
440     }
441 
442     Collator *col3 = 0;
443     col3 = Collator::createInstance(Locale::getEnglish(), success);
444     if (U_FAILURE(success))
445     {
446         errln("2nd default collation creation failed.");
447         return;
448     }
449 
450     logln("Collator::hashCode() testing ...");
451 
452     doAssert(col1->hashCode() != col2->hashCode(), "Hash test1 result incorrect" );
453     doAssert(!(col1->hashCode() == col2->hashCode()), "Hash test2 result incorrect" );
454     doAssert(col1->hashCode() == col3->hashCode(), "Hash result not equal" );
455 
456     logln("hashCode tests end.");
457     delete col1;
458     delete col2;
459 
460     UnicodeString test1("Abcda");
461     UnicodeString test2("abcda");
462 
463     CollationKey sortk1, sortk2, sortk3;
464     UErrorCode status = U_ZERO_ERROR;
465 
466     col3->getCollationKey(test1, sortk1, status);
467     col3->getCollationKey(test2, sortk2, status);
468     col3->getCollationKey(test2, sortk3, status);
469 
470     doAssert(sortk1.hashCode() != sortk2.hashCode(), "Hash test1 result incorrect");
471     doAssert(sortk2.hashCode() == sortk3.hashCode(), "Hash result not equal" );
472 
473     delete col3;
474 }
475 
476 //----------------------------------------------------------------------------
477 // CollationKey -- Tests the CollationKey methods
478 //
479 void
TestCollationKey()480 CollationAPITest::TestCollationKey(/* char* par */)
481 {
482     logln("testing CollationKey begins...");
483     Collator *col = 0;
484     UErrorCode success=U_ZERO_ERROR;
485     col = Collator::createInstance(Locale::getEnglish(), success);
486     if (U_FAILURE(success))
487     {
488         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
489         return;
490     }
491     col->setStrength(Collator::TERTIARY);
492 
493     CollationKey sortk1, sortk2;
494     UnicodeString test1("Abcda"), test2("abcda");
495     UErrorCode key1Status = U_ZERO_ERROR, key2Status = U_ZERO_ERROR;
496 
497     logln("Testing weird arguments");
498     // No string vs. empty string vs. completely-ignorable string:
499     // See ICU ticket #10495.
500     CollationKey sortkNone;
501     int32_t length;
502     sortkNone.getByteArray(length);
503     doAssert(!sortkNone.isBogus() && length == 0,
504              "Default-constructed collation key should be empty");
505     CollationKey sortkEmpty;
506     col->getCollationKey(NULL, 0, sortkEmpty, key1Status);
507     // key gets reset here
508     const uint8_t* byteArrayEmpty = sortkEmpty.getByteArray(length);
509     doAssert(sortkEmpty.isBogus() == FALSE && length == 3 &&
510              byteArrayEmpty[0] == 1 && byteArrayEmpty[1] == 1 && byteArrayEmpty[2] == 0,
511              "Empty string should return a collation key with empty levels");
512     doAssert(sortkNone.compareTo(sortkEmpty) == Collator::LESS,
513              "Expected no collation key < collation key for empty string");
514     doAssert(sortkEmpty.compareTo(sortkNone) == Collator::GREATER,
515              "Expected collation key for empty string > no collation key");
516 
517     CollationKey sortkIgnorable;
518     // Most control codes and CGJ are completely ignorable.
519     // A string with only completely ignorables must compare equal to an empty string.
520     col->getCollationKey(UnicodeString((UChar)1).append((UChar)0x34f), sortkIgnorable, key1Status);
521     sortkIgnorable.getByteArray(length);
522     doAssert(!sortkIgnorable.isBogus() && length == 3,
523              "Completely ignorable string should return a collation key with empty levels");
524     doAssert(sortkIgnorable.compareTo(sortkEmpty) == Collator::EQUAL,
525              "Completely ignorable string should compare equal to empty string");
526 
527     // bogus key returned here
528     key1Status = U_ILLEGAL_ARGUMENT_ERROR;
529     col->getCollationKey(NULL, 0, sortk1, key1Status);
530     doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0,
531         "Error code should return bogus collation key");
532 
533     key1Status = U_ZERO_ERROR;
534     logln("Use tertiary comparison level testing ....");
535 
536     col->getCollationKey(test1, sortk1, key1Status);
537     if (U_FAILURE(key1Status)) {
538         errln("getCollationKey(Abcda) failed - %s", u_errorName(key1Status));
539         return;
540     }
541     doAssert((sortk1.compareTo(col->getCollationKey(test2, sortk2, key2Status)))
542                  == Collator::GREATER,
543                 "Result should be \"Abcda\" >>> \"abcda\"");
544 
545     CollationKey sortk3(sortk2), sortkNew;
546 
547     sortkNew = sortk1;
548     doAssert((sortk1 != sortk2), "The sort keys should be different");
549     doAssert((sortk1.hashCode() != sortk2.hashCode()), "sort key hashCode() failed");
550     doAssert((sortk2 == sortk3), "The sort keys should be the same");
551     doAssert((sortk1 == sortkNew), "The sort keys assignment failed");
552     doAssert((sortk1.hashCode() == sortkNew.hashCode()), "sort key hashCode() failed");
553     doAssert((sortkNew != sortk3), "The sort keys should be different");
554     doAssert(sortk1.compareTo(sortk3) == Collator::GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
555     doAssert(sortk2.compareTo(sortk3) == Collator::EQUAL, "Result should be \"abcda\" == \"abcda\"");
556     doAssert(sortkEmpty.compareTo(sortk1) == Collator::LESS, "Result should be (empty key) <<< \"Abcda\"");
557     doAssert(sortk1.compareTo(sortkEmpty) == Collator::GREATER, "Result should be \"Abcda\" >>> (empty key)");
558     doAssert(sortkEmpty.compareTo(sortkEmpty) == Collator::EQUAL, "Result should be (empty key) == (empty key)");
559     doAssert(sortk1.compareTo(sortk3, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
560     doAssert(sortk2.compareTo(sortk3, success) == UCOL_EQUAL, "Result should be \"abcda\" == \"abcda\"");
561     doAssert(sortkEmpty.compareTo(sortk1, success) == UCOL_LESS, "Result should be (empty key) <<< \"Abcda\"");
562     doAssert(sortk1.compareTo(sortkEmpty, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> (empty key)");
563     doAssert(sortkEmpty.compareTo(sortkEmpty, success) == UCOL_EQUAL, "Result should be (empty key) == (empty key)");
564 
565     int32_t    cnt1, cnt2, cnt3, cnt4;
566 
567     const uint8_t* byteArray1 = sortk1.getByteArray(cnt1);
568     const uint8_t* byteArray2 = sortk2.getByteArray(cnt2);
569 
570     const uint8_t* byteArray3 = 0;
571     byteArray3 = sortk1.getByteArray(cnt3);
572 
573     const uint8_t* byteArray4 = 0;
574     byteArray4 = sortk2.getByteArray(cnt4);
575 
576     CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2);
577     CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4);
578 
579     doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed.");
580     doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed.");
581     doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed");
582     doAssert(sortk1.compareTo(sortk6) == Collator::EQUAL, "CollationKey::getByteArray(sortk1) Failed.");
583     doAssert(sortk2.compareTo(sortk7) == Collator::EQUAL, "CollationKey::getByteArray(sortk2) Failed.");
584     doAssert(sortk6.compareTo(sortk7) == Collator::GREATER, "sortk6 >>> sortk7 Failed");
585 
586     logln("Equality tests : ");
587     doAssert(sortk1 == sortk4, "sortk1 == sortk4 Failed.");
588     doAssert(sortk2 == sortk5, "sortk2 == sortk5 Failed.");
589     doAssert(sortk1 != sortk5, "sortk1 != sortk5 Failed.");
590     doAssert(sortk1 == sortk6, "sortk1 == sortk6 Failed.");
591     doAssert(sortk2 == sortk7, "sortk2 == sortk7 Failed.");
592     doAssert(sortk1 != sortk7, "sortk1 != sortk7 Failed.");
593 
594     byteArray1 = 0;
595     byteArray2 = 0;
596 
597     sortk3 = sortk1;
598     doAssert(sortk1 == sortk3, "sortk1 = sortk3 assignment Failed.");
599     doAssert(sortk2 != sortk3, "sortk2 != sortk3 Failed.");
600     logln("testing sortkey ends...");
601 
602     col->setStrength(Collator::SECONDARY);
603     doAssert(col->getCollationKey(test1, sortk1, key1Status).compareTo(
604                                   col->getCollationKey(test2, sortk2, key2Status))
605                                   == Collator::EQUAL,
606                                   "Result should be \"Abcda\" == \"abcda\"");
607     delete col;
608 }
609 
610 //----------------------------------------------------------------------------
611 // Tests the CollatorElementIterator class.
612 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!=
613 //
614 void
TestElemIter()615 CollationAPITest::TestElemIter(/* char* par */)
616 {
617     logln("testing sortkey begins...");
618     Collator *col = 0;
619     UErrorCode success = U_ZERO_ERROR;
620     col = Collator::createInstance(Locale::getEnglish(), success);
621     if (U_FAILURE(success))
622     {
623         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
624         return;
625     }
626 
627     UnicodeString testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
628     UnicodeString testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
629     logln("Constructors and comparison testing....");
630     CollationElementIterator *iterator1 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
631 
632     CharacterIterator *chariter=new StringCharacterIterator(testString1);
633     CollationElementIterator *coliter=((RuleBasedCollator*)col)->createCollationElementIterator(*chariter);
634 
635     // copy ctor
636     CollationElementIterator *iterator2 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
637     CollationElementIterator *iterator3 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString2);
638 
639     int32_t offset = iterator1->getOffset();
640     if (offset != 0) {
641         errln("Error in getOffset for collation element iterator\n");
642         return;
643     }
644     iterator1->setOffset(6, success);
645     if (U_FAILURE(success)) {
646         errln("Error in setOffset for collation element iterator\n");
647         return;
648     }
649     iterator1->setOffset(0, success);
650     int32_t order1, order2, order3;
651     doAssert((*iterator1 == *iterator2), "The two iterators should be the same");
652     doAssert((*iterator1 != *iterator3), "The two iterators should be different");
653 
654     doAssert((*coliter == *iterator1), "The two iterators should be the same");
655     doAssert((*coliter == *iterator2), "The two iterators should be the same");
656     doAssert((*coliter != *iterator3), "The two iterators should be different");
657 
658     order1 = iterator1->next(success);
659     if (U_FAILURE(success))
660     {
661         errln("Somehow ran out of memory stepping through the iterator.");
662         return;
663     }
664 
665     doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
666     order2 = iterator2->getOffset();
667     doAssert((order1 != order2), "The order result should not be the same");
668     order2 = iterator2->next(success);
669     if (U_FAILURE(success))
670     {
671         errln("Somehow ran out of memory stepping through the iterator.");
672         return;
673     }
674 
675     doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
676     doAssert((order1 == order2), "The order result should be the same");
677     order3 = iterator3->next(success);
678     if (U_FAILURE(success))
679     {
680         errln("Somehow ran out of memory stepping through the iterator.");
681         return;
682     }
683 
684     doAssert((CollationElementIterator::primaryOrder(order1) ==
685         CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
686     doAssert((CollationElementIterator::secondaryOrder(order1) ==
687         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
688     doAssert((CollationElementIterator::tertiaryOrder(order1) ==
689         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
690 
691     order1 = iterator1->next(success); order3 = iterator3->next(success);
692     if (U_FAILURE(success))
693     {
694         errln("Somehow ran out of memory stepping through the iterator.");
695         return;
696     }
697 
698     doAssert((CollationElementIterator::primaryOrder(order1) ==
699         CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
700     doAssert((CollationElementIterator::tertiaryOrder(order1) !=
701         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
702 
703     order1 = iterator1->next(success);
704     order3 = iterator3->next(success);
705     /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
706     /*
707     doAssert((CollationElementIterator::secondaryOrder(order1) !=
708         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
709     */
710     doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
711 
712     iterator1->reset(); iterator2->reset(); iterator3->reset();
713     order1 = iterator1->next(success);
714     if (U_FAILURE(success))
715     {
716         errln("Somehow ran out of memory stepping through the iterator.");
717         return;
718     }
719 
720     doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
721 
722     order2 = iterator2->next(success);
723     if (U_FAILURE(success))
724     {
725         errln("Somehow ran out of memory stepping through the iterator.");
726         return;
727     }
728 
729     doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
730     doAssert((order1 == order2), "The order result should be the same");
731 
732     order3 = iterator3->next(success);
733     if (U_FAILURE(success))
734     {
735         errln("Somehow ran out of memory stepping through the iterator.");
736         return;
737     }
738 
739     doAssert((CollationElementIterator::primaryOrder(order1) ==
740         CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
741     doAssert((CollationElementIterator::secondaryOrder(order1) ==
742         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
743     doAssert((CollationElementIterator::tertiaryOrder(order1) ==
744         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
745 
746     order1 = iterator1->next(success); order2 = iterator2->next(success); order3 = iterator3->next(success);
747     if (U_FAILURE(success))
748     {
749         errln("Somehow ran out of memory stepping through the iterator.");
750         return;
751     }
752 
753     doAssert((CollationElementIterator::primaryOrder(order1) ==
754         CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
755     doAssert((CollationElementIterator::tertiaryOrder(order1) !=
756         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
757 
758     order1 = iterator1->next(success); order3 = iterator3->next(success);
759     if (U_FAILURE(success))
760     {
761         errln("Somehow ran out of memory stepping through the iterator.");
762         return;
763     }
764 
765     /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
766     /*
767     doAssert((CollationElementIterator::secondaryOrder(order1) !=
768         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
769     */
770     doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
771     doAssert((*iterator2 != *iterator3), "The iterators should be different");
772 
773 
774     //test error values
775     success=U_UNSUPPORTED_ERROR;
776     Collator *colerror=NULL;
777     colerror=Collator::createInstance(Locale::getEnglish(), success);
778     if (colerror != 0 || success == U_ZERO_ERROR){
779         errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n");
780     }
781     int32_t position=coliter->previous(success);
782     if(position != CollationElementIterator::NULLORDER){
783         errln((UnicodeString)"Expected NULLORDER got" + position);
784     }
785     coliter->reset();
786     coliter->setText(*chariter, success);
787     if(!U_FAILURE(success)){
788         errln("Expeceted error");
789     }
790     iterator1->setText((UnicodeString)"hello there", success);
791     if(!U_FAILURE(success)){
792         errln("Expeceted error");
793     }
794 
795     delete chariter;
796     delete coliter;
797     delete iterator1;
798     delete iterator2;
799     delete iterator3;
800     delete col;
801 
802 
803 
804     logln("testing CollationElementIterator ends...");
805 }
806 
807 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules
808 void
TestOperators()809 CollationAPITest::TestOperators(/* char* par */)
810 {
811     UErrorCode success = U_ZERO_ERROR;
812     UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
813     UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
814     RuleBasedCollator *col1 = new RuleBasedCollator(ruleset1, success);
815     if (U_FAILURE(success)) {
816         errcheckln(success, "RuleBasedCollator creation failed. - %s", u_errorName(success));
817         return;
818     }
819     success = U_ZERO_ERROR;
820     RuleBasedCollator *col2 = new RuleBasedCollator(ruleset2, success);
821     if (U_FAILURE(success)) {
822         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set.");
823         return;
824     }
825     logln("The operator tests begin : ");
826     logln("testing operator==, operator!=, clone  methods ...");
827     doAssert((*col1 != *col2), "The two different table collations compared equal");
828     *col1 = *col2;
829     doAssert((*col1 == *col2), "Collator objects not equal after assignment (operator=)");
830 
831     success = U_ZERO_ERROR;
832     Collator *col3 = Collator::createInstance(Locale::getEnglish(), success);
833     if (U_FAILURE(success)) {
834         errln("Default collation creation failed.");
835         return;
836     }
837     doAssert((*col1 != *col3), "The two different table collations compared equal");
838     Collator* col4 = col1->clone();
839     Collator* col5 = col3->clone();
840     doAssert((*col1 == *col4), "Cloned collation objects not equal");
841     doAssert((*col3 != *col4), "Two different table collations compared equal");
842     doAssert((*col3 == *col5), "Cloned collation objects not equal");
843     doAssert((*col4 != *col5), "Two cloned collations compared equal");
844 
845     const UnicodeString& defRules = ((RuleBasedCollator*)col3)->getRules();
846     RuleBasedCollator* col6 = new RuleBasedCollator(defRules, success);
847     if (U_FAILURE(success)) {
848         errln("Creating default collation with rules failed.");
849         return;
850     }
851     doAssert((((RuleBasedCollator*)col3)->getRules() == col6->getRules()), "Default collator getRules failed");
852 
853     success = U_ZERO_ERROR;
854     RuleBasedCollator *col7 = new RuleBasedCollator(ruleset2, Collator::TERTIARY, success);
855     if (U_FAILURE(success)) {
856         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength.");
857         return;
858     }
859     success = U_ZERO_ERROR;
860     RuleBasedCollator *col8 = new RuleBasedCollator(ruleset2, UCOL_OFF, success);
861     if (U_FAILURE(success)) {
862         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP.");
863         return;
864     }
865     success = U_ZERO_ERROR;
866     RuleBasedCollator *col9 = new RuleBasedCollator(ruleset2, Collator::PRIMARY, UCOL_ON, success);
867     if (U_FAILURE(success)) {
868         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP.");
869         return;
870     }
871   //  doAssert((*col7 == *col8), "The two equal table collations compared different");
872     doAssert((*col7 != *col9), "The two different table collations compared equal");
873     doAssert((*col8 != *col9), "The two different table collations compared equal");
874 
875     logln("operator tests ended.");
876     delete col1;
877     delete col2;
878     delete col3;
879     delete col4;
880     delete col5;
881     delete col6;
882     delete col7;
883     delete col8;
884     delete col9;
885 }
886 
887 // test clone and copy
888 void
TestDuplicate()889 CollationAPITest::TestDuplicate(/* char* par */)
890 {
891     UErrorCode status = U_ZERO_ERROR;
892     Collator *col1 = Collator::createInstance(Locale::getEnglish(), status);
893     if (U_FAILURE(status)) {
894         logln("Default collator creation failed.");
895         return;
896     }
897     Collator *col2 = col1->clone();
898     doAssert((*col1 == *col2), "Cloned object is not equal to the orginal");
899     UnicodeString ruleset("&9 < a, A < b, B < c, C < d, D, e, E");
900     RuleBasedCollator *col3 = new RuleBasedCollator(ruleset, status);
901     if (U_FAILURE(status)) {
902         logln("Collation tailoring failed.");
903         return;
904     }
905     doAssert((*col1 != *col3), "Cloned object is equal to some dummy");
906     *col3 = *((RuleBasedCollator*)col1);
907     doAssert((*col1 == *col3), "Copied object is not equal to the orginal");
908 
909     UCollationResult res;
910     UnicodeString first((UChar)0x0061);
911     UnicodeString second((UChar)0x0062);
912     UnicodeString copiedEnglishRules(((RuleBasedCollator*)col1)->getRules());
913 
914     delete col1;
915 
916     // Try using the cloned collators after deleting the original data
917     res = col2->compare(first, second, status);
918     if(res != UCOL_LESS) {
919         errln("a should be less then b after tailoring");
920     }
921     if (((RuleBasedCollator*)col2)->getRules() != copiedEnglishRules) {
922         errln(UnicodeString("English rule difference. ")
923             + copiedEnglishRules + UnicodeString("\ngetRules=") + ((RuleBasedCollator*)col2)->getRules());
924     }
925     res = col3->compare(first, second, status);
926     if(res != UCOL_LESS) {
927         errln("a should be less then b after tailoring");
928     }
929     if (col3->getRules() != copiedEnglishRules) {
930         errln(UnicodeString("English rule difference. ")
931             + copiedEnglishRules + UnicodeString("\ngetRules=") + col3->getRules());
932     }
933 
934     delete col2;
935     delete col3;
936 }
937 
938 void
TestCompare()939 CollationAPITest::TestCompare(/* char* par */)
940 {
941     logln("The compare tests begin : ");
942     Collator *col = 0;
943     UErrorCode success = U_ZERO_ERROR;
944     col = Collator::createInstance(Locale::getEnglish(), success);
945     if (U_FAILURE(success)) {
946         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
947         return;
948     }
949     UnicodeString test1("Abcda"), test2("abcda");
950     logln("Use tertiary comparison level testing ....");
951 
952     doAssert((!col->equals(test1, test2) ), "Result should be \"Abcda\" != \"abcda\"");
953     doAssert((col->greater(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
954     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
955 
956     col->setStrength(Collator::SECONDARY);
957     logln("Use secondary comparison level testing ....");
958 
959     doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
960     doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
961     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
962 
963     col->setStrength(Collator::PRIMARY);
964     logln("Use primary comparison level testing ....");
965 
966     doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
967     doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
968     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
969 
970     // Test different APIs
971     const UChar* t1 = test1.getBuffer();
972     int32_t t1Len = test1.length();
973     const UChar* t2 = test2.getBuffer();
974     int32_t t2Len = test2.length();
975 
976     doAssert((col->compare(test1, test2) == Collator::EQUAL), "Problem");
977     doAssert((col->compare(test1, test2, success) == UCOL_EQUAL), "Problem");
978     doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::EQUAL), "Problem");
979     doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_EQUAL), "Problem");
980     doAssert((col->compare(test1, test2, t1Len) == Collator::EQUAL), "Problem");
981     doAssert((col->compare(test1, test2, t1Len, success) == UCOL_EQUAL), "Problem");
982 
983     col->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, success);
984     doAssert((col->compare(test1, test2) == Collator::GREATER), "Problem");
985     doAssert((col->compare(test1, test2, success) == UCOL_GREATER), "Problem");
986     doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::GREATER), "Problem");
987     doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_GREATER), "Problem");
988     doAssert((col->compare(test1, test2, t1Len) == Collator::GREATER), "Problem");
989     doAssert((col->compare(test1, test2, t1Len, success) == UCOL_GREATER), "Problem");
990 
991 
992 
993     logln("The compare tests end.");
994     delete col;
995 }
996 
997 void
TestGetAll()998 CollationAPITest::TestGetAll(/* char* par */)
999 {
1000     int32_t count1, count2;
1001     UErrorCode status = U_ZERO_ERROR;
1002 
1003     logln("Trying Collator::getAvailableLocales(int&)");
1004 
1005     const Locale* list = Collator::getAvailableLocales(count1);
1006     for (int32_t i = 0; i < count1; ++i) {
1007         UnicodeString dispName;
1008         logln(UnicodeString("Locale name: ")
1009             + UnicodeString(list[i].getName())
1010             + UnicodeString(" , the display name is : ")
1011             + UnicodeString(list[i].getDisplayName(dispName)));
1012     }
1013 
1014     if (count1 == 0 || list == NULL) {
1015         dataerrln("getAvailableLocales(int&) returned an empty list");
1016     }
1017 
1018     logln("Trying Collator::getAvailableLocales()");
1019     StringEnumeration* localeEnum = Collator::getAvailableLocales();
1020     const UnicodeString* locStr;
1021     const char *locCStr;
1022     count2 = 0;
1023 
1024     if (localeEnum == NULL) {
1025         dataerrln("getAvailableLocales() returned NULL");
1026         return;
1027     }
1028 
1029     while ((locStr = localeEnum->snext(status)) != NULL)
1030     {
1031         logln(UnicodeString("Locale name is: ") + *locStr);
1032         count2++;
1033     }
1034     if (count1 != count2) {
1035         errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1, count2);
1036     }
1037 
1038     logln("Trying Collator::getAvailableLocales() clone");
1039     count1 = 0;
1040     StringEnumeration* localeEnum2 = localeEnum->clone();
1041     localeEnum2->reset(status);
1042     while ((locCStr = localeEnum2->next(NULL, status)) != NULL)
1043     {
1044         logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr));
1045         count1++;
1046     }
1047     if (count1 != count2) {
1048         errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1, count2);
1049     }
1050     if (localeEnum->count(status) != count1) {
1051         errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum->count(status), count1);
1052     }
1053     delete localeEnum;
1054     delete localeEnum2;
1055 }
1056 
TestSortKey()1057 void CollationAPITest::TestSortKey()
1058 {
1059     UErrorCode status = U_ZERO_ERROR;
1060     /*
1061     this is supposed to open default date format, but later on it treats
1062     it like it is "en_US"
1063     - very bad if you try to run the tests on machine where default
1064       locale is NOT "en_US"
1065     */
1066     Collator *col = Collator::createInstance(Locale::getEnglish(), status);
1067     if (U_FAILURE(status)) {
1068         errcheckln(status, "ERROR: Default collation creation failed.: %s\n", u_errorName(status));
1069         return;
1070     }
1071 
1072     if (col->getStrength() != Collator::TERTIARY)
1073     {
1074         errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1075     }
1076 
1077     /* Need to use identical strength */
1078     col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status);
1079 
1080     UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0},
1081           test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0},
1082           test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0};
1083 
1084     uint8_t sortkey1[64];
1085     uint8_t sortkey2[64];
1086     uint8_t sortkey3[64];
1087 
1088     logln("Use tertiary comparison level testing ....\n");
1089 
1090     CollationKey key1;
1091     col->getCollationKey(test1, u_strlen(test1), key1, status);
1092 
1093     CollationKey key2;
1094     col->getCollationKey(test2, u_strlen(test2), key2, status);
1095 
1096     CollationKey key3;
1097     col->getCollationKey(test3, u_strlen(test3), key3, status);
1098 
1099     doAssert(key1.compareTo(key2) == Collator::GREATER,
1100         "Result should be \"Abcda\" > \"abcda\"");
1101     doAssert(key2.compareTo(key1) == Collator::LESS,
1102         "Result should be \"abcda\" < \"Abcda\"");
1103     doAssert(key2.compareTo(key3) == Collator::EQUAL,
1104         "Result should be \"abcda\" ==  \"abcda\"");
1105 
1106     // Clone the key2 sortkey for later.
1107     int32_t keylength = 0;
1108     const uint8_t *key2primary_alias = key2.getByteArray(keylength);
1109     LocalArray<uint8_t> key2primary(new uint8_t[keylength]);
1110     memcpy(key2primary.getAlias(), key2primary_alias, keylength);
1111 
1112     col->getSortKey(test1, sortkey1, 64);
1113     col->getSortKey(test2, sortkey2, 64);
1114     col->getSortKey(test3, sortkey3, 64);
1115 
1116     const uint8_t *tempkey = key1.getByteArray(keylength);
1117     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1118         "Test1 string should have the same collation key and sort key");
1119     tempkey = key2.getByteArray(keylength);
1120     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1121         "Test2 string should have the same collation key and sort key");
1122     tempkey = key3.getByteArray(keylength);
1123     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1124         "Test3 string should have the same collation key and sort key");
1125 
1126     col->getSortKey(test1, 5, sortkey1, 64);
1127     col->getSortKey(test2, 5, sortkey2, 64);
1128     col->getSortKey(test3, 5, sortkey3, 64);
1129 
1130     tempkey = key1.getByteArray(keylength);
1131     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1132         "Test1 string should have the same collation key and sort key");
1133     tempkey = key2.getByteArray(keylength);
1134     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1135         "Test2 string should have the same collation key and sort key");
1136     tempkey = key3.getByteArray(keylength);
1137     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1138         "Test3 string should have the same collation key and sort key");
1139 
1140     UnicodeString strtest1(test1);
1141     col->getSortKey(strtest1, sortkey1, 64);
1142     UnicodeString strtest2(test2);
1143     col->getSortKey(strtest2, sortkey2, 64);
1144     UnicodeString strtest3(test3);
1145     col->getSortKey(strtest3, sortkey3, 64);
1146 
1147     tempkey = key1.getByteArray(keylength);
1148     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1149         "Test1 string should have the same collation key and sort key");
1150     tempkey = key2.getByteArray(keylength);
1151     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1152         "Test2 string should have the same collation key and sort key");
1153     tempkey = key3.getByteArray(keylength);
1154     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1155         "Test3 string should have the same collation key and sort key");
1156 
1157     logln("Use secondary comparision level testing ...\n");
1158     col->setStrength(Collator::SECONDARY);
1159 
1160     col->getCollationKey(test1, u_strlen(test1), key1, status);
1161     col->getCollationKey(test2, u_strlen(test2), key2, status);
1162     col->getCollationKey(test3, u_strlen(test3), key3, status);
1163 
1164     doAssert(key1.compareTo(key2) == Collator::EQUAL,
1165         "Result should be \"Abcda\" == \"abcda\"");
1166     doAssert(key2.compareTo(key3) == Collator::EQUAL,
1167         "Result should be \"abcda\" ==  \"abcda\"");
1168 
1169     tempkey = key2.getByteArray(keylength);
1170     doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0,
1171              "Binary format for 'abcda' sortkey different for secondary strength!");
1172 
1173     col->getSortKey(test1, sortkey1, 64);
1174     col->getSortKey(test2, sortkey2, 64);
1175     col->getSortKey(test3, sortkey3, 64);
1176 
1177     tempkey = key1.getByteArray(keylength);
1178     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1179         "Test1 string should have the same collation key and sort key");
1180     tempkey = key2.getByteArray(keylength);
1181     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1182         "Test2 string should have the same collation key and sort key");
1183     tempkey = key3.getByteArray(keylength);
1184     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1185         "Test3 string should have the same collation key and sort key");
1186 
1187     col->getSortKey(test1, 5, sortkey1, 64);
1188     col->getSortKey(test2, 5, sortkey2, 64);
1189     col->getSortKey(test3, 5, sortkey3, 64);
1190 
1191     tempkey = key1.getByteArray(keylength);
1192     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1193         "Test1 string should have the same collation key and sort key");
1194     tempkey = key2.getByteArray(keylength);
1195     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1196         "Test2 string should have the same collation key and sort key");
1197     tempkey = key3.getByteArray(keylength);
1198     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1199         "Test3 string should have the same collation key and sort key");
1200 
1201     col->getSortKey(strtest1, sortkey1, 64);
1202     col->getSortKey(strtest2, sortkey2, 64);
1203     col->getSortKey(strtest3, sortkey3, 64);
1204 
1205     tempkey = key1.getByteArray(keylength);
1206     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1207         "Test1 string should have the same collation key and sort key");
1208     tempkey = key2.getByteArray(keylength);
1209     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1210         "Test2 string should have the same collation key and sort key");
1211     tempkey = key3.getByteArray(keylength);
1212     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1213         "Test3 string should have the same collation key and sort key");
1214 
1215     logln("testing sortkey ends...");
1216     delete col;
1217 }
1218 
TestSortKeyOverflow()1219 void CollationAPITest::TestSortKeyOverflow() {
1220     IcuTestErrorCode errorCode(*this, "TestSortKeyOverflow()");
1221     LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode));
1222     if (errorCode.logDataIfFailureAndReset("Collator::createInstance(English) failed")) {
1223         return;
1224     }
1225     col->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
1226     UChar i_and_phi[] = { 0x438, 0x3c6 };  // Cyrillic small i & Greek small phi.
1227     // The sort key should be 6 bytes:
1228     // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator,
1229     // 2 bytes for the Greek phi, and 1 byte for the NUL terminator.
1230     uint8_t sortKey[12];
1231     int32_t length = col->getSortKey(i_and_phi, 2, sortKey, UPRV_LENGTHOF(sortKey));
1232     uint8_t sortKey2[12];
1233     for (int32_t capacity = 0; capacity < length; ++capacity) {
1234         uprv_memset(sortKey2, 2, UPRV_LENGTHOF(sortKey2));
1235         int32_t length2 = col->getSortKey(i_and_phi, 2, sortKey2, capacity);
1236         if (length2 != length || 0 != uprv_memcmp(sortKey, sortKey2, capacity)) {
1237             errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity);
1238         } else if (sortKey2[capacity] != 2 || sortKey2[capacity + 1] != 2) {
1239             errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity);
1240         }
1241     }
1242 
1243     // Now try to break getCollationKey().
1244     // Internally, it always starts with a large stack buffer.
1245     // Since we cannot control the initial capacity, we throw an increasing number
1246     // of characters at it, with the problematic part at the end.
1247     const int32_t longCapacity = 2000;
1248     // Each 'a' in the prefix should result in one primary sort key byte.
1249     // For i_and_phi we expect 6 bytes, then the NUL terminator.
1250     const int32_t maxPrefixLength = longCapacity - 6 - 1;
1251     LocalArray<uint8_t> longSortKey(new uint8_t[longCapacity]);
1252     UnicodeString s(FALSE, i_and_phi, 2);
1253     for (int32_t prefixLength = 0; prefixLength < maxPrefixLength; ++prefixLength) {
1254         length = col->getSortKey(s, longSortKey.getAlias(), longCapacity);
1255         CollationKey collKey;
1256         col->getCollationKey(s, collKey, errorCode);
1257         int32_t collKeyLength;
1258         const uint8_t *collSortKey = collKey.getByteArray(collKeyLength);
1259         if (collKeyLength != length || 0 != uprv_memcmp(longSortKey.getAlias(), collSortKey, length)) {
1260             errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength);
1261         }
1262 
1263         // Insert an 'a' to match ++prefixLength.
1264         s.insert(prefixLength, (UChar)0x61);
1265     }
1266 }
1267 
TestMaxExpansion()1268 void CollationAPITest::TestMaxExpansion()
1269 {
1270     UErrorCode          status = U_ZERO_ERROR;
1271     UChar               ch     = 0;
1272     UChar32             unassigned = 0xEFFFD;
1273     uint32_t            sorder = 0;
1274     uint32_t            temporder = 0;
1275 
1276     UnicodeString rule("&a < ab < c/aba < d < z < ch");
1277     RuleBasedCollator coll(rule, status);
1278     if(U_FAILURE(status)) {
1279       errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1280       return;
1281     }
1282     UnicodeString str(ch);
1283     CollationElementIterator *iter =
1284                                   coll.createCollationElementIterator(str);
1285 
1286     while (ch < 0xFFFF && U_SUCCESS(status)) {
1287         int      count = 1;
1288         uint32_t order;
1289         int32_t  size = 0;
1290 
1291         ch ++;
1292 
1293         str.setCharAt(0, ch);
1294         iter->setText(str, status);
1295         order = iter->previous(status);
1296 
1297         /* thai management */
1298         if (order == 0)
1299             order = iter->previous(status);
1300 
1301         while (U_SUCCESS(status) && iter->previous(status) != CollationElementIterator::NULLORDER) {
1302             count ++;
1303         }
1304 
1305         size = coll.getMaxExpansion(order);
1306         if (U_FAILURE(status) || size < count) {
1307             errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1308                   ch, size, count);
1309         }
1310     }
1311 
1312     /* testing for exact max expansion */
1313     int32_t size;
1314     ch = 0;
1315     while (ch < 0x61) {
1316         uint32_t order;
1317         str.setCharAt(0, ch);
1318         iter->setText(str, status);
1319         order = iter->previous(status);
1320         size  = coll.getMaxExpansion(order);
1321         if (U_FAILURE(status) || size != 1) {
1322             errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1323                   ch, size, 1);
1324         }
1325         ch ++;
1326     }
1327 
1328     ch = 0x63;
1329     str.setTo(ch);
1330     iter->setText(str, status);
1331     temporder = iter->previous(status);
1332     size = coll.getMaxExpansion(temporder);
1333     if (U_FAILURE(status) || size != 3) {
1334         errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1335               ch, temporder, size, 3);
1336     }
1337 
1338     ch = 0x64;
1339     str.setTo(ch);
1340     iter->setText(str, status);
1341     temporder = iter->previous(status);
1342     size = coll.getMaxExpansion(temporder);
1343     if (U_FAILURE(status) || size != 1) {
1344         errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1345               ch, temporder, size, 1);
1346     }
1347 
1348     str.setTo(unassigned);
1349     iter->setText(str, status);
1350     sorder = iter->previous(status);
1351     size = coll.getMaxExpansion(sorder);
1352     if (U_FAILURE(status) || size != 2) {
1353         errln("Failure at supplementary codepoints, maximum expansion count %d < %d",
1354               size, 2);
1355     }
1356 
1357     /* testing jamo */
1358     ch = 0x1165;
1359     str.setTo(ch);
1360     iter->setText(str, status);
1361     temporder = iter->previous(status);
1362     size = coll.getMaxExpansion(temporder);
1363     if (U_FAILURE(status) || size > 3) {
1364         errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1365               ch, size, 3);
1366     }
1367 
1368     delete iter;
1369 
1370     /* testing special jamo &a<\u1160 */
1371     rule = CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071");
1372 
1373     RuleBasedCollator jamocoll(rule, status);
1374     iter = jamocoll.createCollationElementIterator(str);
1375     temporder = iter->previous(status);
1376     size = iter->getMaxExpansion(temporder);
1377     if (U_FAILURE(status) || size != 6) {
1378         errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1379               ch, size, 5);
1380     }
1381 
1382     delete iter;
1383 }
1384 
TestDisplayName()1385 void CollationAPITest::TestDisplayName()
1386 {
1387     UErrorCode error = U_ZERO_ERROR;
1388     Collator *coll = Collator::createInstance("en_US", error);
1389     if (U_FAILURE(error)) {
1390         errcheckln(error, "Failure creating english collator - %s", u_errorName(error));
1391         return;
1392     }
1393     UnicodeString name;
1394     UnicodeString result;
1395     coll->getDisplayName(Locale::getCanadaFrench(), result);
1396     Locale::getCanadaFrench().getDisplayName(name);
1397     if (result.compare(name)) {
1398         errln("Failure getting the correct name for locale en_US");
1399     }
1400 
1401     coll->getDisplayName(Locale::getSimplifiedChinese(), result);
1402     Locale::getSimplifiedChinese().getDisplayName(name);
1403     if (result.compare(name)) {
1404         errln("Failure getting the correct name for locale zh_SG");
1405     }
1406     delete coll;
1407 }
1408 
TestAttribute()1409 void CollationAPITest::TestAttribute()
1410 {
1411     UErrorCode error = U_ZERO_ERROR;
1412     Collator *coll = Collator::createInstance(error);
1413 
1414     if (U_FAILURE(error)) {
1415         errcheckln(error, "Creation of default collator failed - %s", u_errorName(error));
1416         return;
1417     }
1418 
1419     coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_OFF, error);
1420     if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_OFF ||
1421         U_FAILURE(error)) {
1422         errln("Setting and retrieving of the french collation failed");
1423     }
1424 
1425     coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_ON, error);
1426     if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_ON ||
1427         U_FAILURE(error)) {
1428         errln("Setting and retrieving of the french collation failed");
1429     }
1430 
1431     coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, error);
1432     if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_SHIFTED ||
1433         U_FAILURE(error)) {
1434         errln("Setting and retrieving of the alternate handling failed");
1435     }
1436 
1437     coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, error);
1438     if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_NON_IGNORABLE ||
1439         U_FAILURE(error)) {
1440         errln("Setting and retrieving of the alternate handling failed");
1441     }
1442 
1443     coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, error);
1444     if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_LOWER_FIRST ||
1445         U_FAILURE(error)) {
1446         errln("Setting and retrieving of the case first attribute failed");
1447     }
1448 
1449     coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, error);
1450     if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_UPPER_FIRST ||
1451         U_FAILURE(error)) {
1452         errln("Setting and retrieving of the case first attribute failed");
1453     }
1454 
1455     coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, error);
1456     if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_ON ||
1457         U_FAILURE(error)) {
1458         errln("Setting and retrieving of the case level attribute failed");
1459     }
1460 
1461     coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, error);
1462     if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_OFF ||
1463         U_FAILURE(error)) {
1464         errln("Setting and retrieving of the case level attribute failed");
1465     }
1466 
1467     coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, error);
1468     if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_ON ||
1469         U_FAILURE(error)) {
1470         errln("Setting and retrieving of the normalization on/off attribute failed");
1471     }
1472 
1473     coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, error);
1474     if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_OFF ||
1475         U_FAILURE(error)) {
1476         errln("Setting and retrieving of the normalization on/off attribute failed");
1477     }
1478 
1479     coll->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, error);
1480     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_PRIMARY ||
1481         U_FAILURE(error)) {
1482         errln("Setting and retrieving of the collation strength failed");
1483     }
1484 
1485     coll->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, error);
1486     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_SECONDARY ||
1487         U_FAILURE(error)) {
1488         errln("Setting and retrieving of the collation strength failed");
1489     }
1490 
1491     coll->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, error);
1492     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_TERTIARY ||
1493         U_FAILURE(error)) {
1494         errln("Setting and retrieving of the collation strength failed");
1495     }
1496 
1497     coll->setAttribute(UCOL_STRENGTH, UCOL_QUATERNARY, error);
1498     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_QUATERNARY ||
1499         U_FAILURE(error)) {
1500         errln("Setting and retrieving of the collation strength failed");
1501     }
1502 
1503     coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, error);
1504     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_IDENTICAL ||
1505         U_FAILURE(error)) {
1506         errln("Setting and retrieving of the collation strength failed");
1507     }
1508 
1509     delete coll;
1510 }
1511 
TestVariableTopSetting()1512 void CollationAPITest::TestVariableTopSetting() {
1513   UErrorCode status = U_ZERO_ERROR;
1514 
1515   UChar vt[256] = { 0 };
1516 
1517   // Use the root collator, not the default collator.
1518   // This test fails with en_US_POSIX which tailors the dollar sign after 'A'.
1519   Collator *coll = Collator::createInstance(Locale::getRoot(), status);
1520   if(U_FAILURE(status)) {
1521     delete coll;
1522     errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1523     return;
1524   }
1525 
1526   uint32_t oldVarTop = coll->getVariableTop(status);
1527 
1528   // ICU 53+: The character must be in a supported reordering group,
1529   // and the variable top is pinned to the end of that group.
1530   vt[0] = 0x0041;
1531 
1532   (void)coll->setVariableTop(vt, 1, status);
1533   if(status != U_ILLEGAL_ARGUMENT_ERROR) {
1534     errln("setVariableTop(letter) did not detect illegal argument - %s", u_errorName(status));
1535   }
1536 
1537   status = U_ZERO_ERROR;
1538   vt[0] = 0x24;  // dollar sign (currency symbol)
1539   uint32_t newVarTop = coll->setVariableTop(vt, 1, status);
1540   if(U_FAILURE(status)) {
1541     errln("setVariableTop(dollar sign) failed: %s", u_errorName(status));
1542     return;
1543   }
1544   if(newVarTop != coll->getVariableTop(status)) {
1545     errln("setVariableTop(dollar sign) != following getVariableTop()");
1546   }
1547 
1548   UnicodeString dollar((UChar)0x24);
1549   UnicodeString euro((UChar)0x20AC);
1550   uint32_t newVarTop2 = coll->setVariableTop(euro, status);
1551   assertEquals("setVariableTop(Euro sign) == following getVariableTop()",
1552                (int64_t)newVarTop2, (int64_t)coll->getVariableTop(status));
1553   assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)",
1554                (int64_t)newVarTop2, (int64_t)newVarTop);
1555 
1556   coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
1557   assertEquals("empty==dollar", UCOL_EQUAL, coll->compare(UnicodeString(), dollar));
1558   assertEquals("empty==euro", UCOL_EQUAL, coll->compare(UnicodeString(), euro));
1559   assertEquals("dollar<zero", UCOL_LESS, coll->compare(dollar, UnicodeString((UChar)0x30)));
1560 
1561   coll->setVariableTop(oldVarTop, status);
1562 
1563   uint32_t newerVarTop = coll->setVariableTop(UnicodeString(vt, 1), status);
1564 
1565   if(newVarTop != newerVarTop) {
1566     errln("Didn't set vartop properly from UnicodeString!\n");
1567   }
1568 
1569   delete coll;
1570 
1571 }
1572 
TestMaxVariable()1573 void CollationAPITest::TestMaxVariable() {
1574   UErrorCode errorCode = U_ZERO_ERROR;
1575   LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), errorCode));
1576   if(U_FAILURE(errorCode)) {
1577     errcheckln(errorCode, "Collator creation failed with error %s", u_errorName(errorCode));
1578     return;
1579   }
1580 
1581   (void)coll->setMaxVariable(UCOL_REORDER_CODE_OTHERS, errorCode);
1582   if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
1583     errln("setMaxVariable(others) did not detect illegal argument - %s", u_errorName(errorCode));
1584   }
1585 
1586   errorCode = U_ZERO_ERROR;
1587   (void)coll->setMaxVariable(UCOL_REORDER_CODE_CURRENCY, errorCode);
1588 
1589   if(UCOL_REORDER_CODE_CURRENCY != coll->getMaxVariable()) {
1590     errln("setMaxVariable(currency) != following getMaxVariable()");
1591   }
1592 
1593   coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, errorCode);
1594   assertEquals("empty==dollar", UCOL_EQUAL, coll->compare(UnicodeString(), UnicodeString((UChar)0x24)));
1595   assertEquals("empty==euro", UCOL_EQUAL, coll->compare(UnicodeString(), UnicodeString((UChar)0x20AC)));
1596   assertEquals("dollar<zero", UCOL_LESS, coll->compare(UnicodeString((UChar)0x24), UnicodeString((UChar)0x30)));
1597 }
1598 
TestGetLocale()1599 void CollationAPITest::TestGetLocale() {
1600   UErrorCode status = U_ZERO_ERROR;
1601   const char *rules = "&a<x<y<z";
1602   UChar rlz[256] = {0};
1603 
1604   Collator *coll = Collator::createInstance("root", status);
1605   if(U_FAILURE(status)) {
1606     dataerrln("Failed to open collator for \"root\" with %s", u_errorName(status));
1607     return;
1608   }
1609   Locale locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1610   if(locale != Locale::getRoot()) {
1611     errln("Collator::createInstance(\"root\").getLocale(actual) != Locale::getRoot(); "
1612           "getLocale().getName() = \"%s\"",
1613           locale.getName());
1614   }
1615   delete coll;
1616 
1617   coll = Collator::createInstance("", status);
1618   if(U_FAILURE(status)) {
1619     dataerrln("Failed to open collator for \"\" with %s", u_errorName(status));
1620     return;
1621   }
1622   locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1623   if(locale != Locale::getRoot()) {
1624     errln("Collator::createInstance(\"\").getLocale(actual) != Locale::getRoot(); "
1625           "getLocale().getName() = \"%s\"",
1626           locale.getName());
1627   }
1628   delete coll;
1629 
1630   int32_t i = 0;
1631 
1632   static const struct {
1633     const char* requestedLocale;
1634     const char* validLocale;
1635     const char* actualLocale;
1636   } testStruct[] = {
1637     // Note: Locale::getRoot().getName() == "" not "root".
1638     { "de_DE", "de", "" },
1639     { "sr_RS", "sr_Cyrl_RS", "sr" },
1640     { "en_US_CALIFORNIA", "en_US", "" },
1641     { "fr_FR_NONEXISTANT", "fr", "" },
1642     // pinyin is the default, therefore suppressed.
1643     { "zh_CN", "zh_Hans_CN", "zh" },
1644     // zh_Hant has default=stroke but the data is in zh.
1645     { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" },
1646     { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" },
1647     { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" }
1648   };
1649 
1650   u_unescape(rules, rlz, 256);
1651 
1652   /* test opening collators for different locales */
1653   for(i = 0; i<(int32_t)UPRV_LENGTHOF(testStruct); i++) {
1654     status = U_ZERO_ERROR;
1655     coll = Collator::createInstance(testStruct[i].requestedLocale, status);
1656     if(U_FAILURE(status)) {
1657       errln("Failed to open collator for %s with %s", testStruct[i].requestedLocale, u_errorName(status));
1658       delete coll;
1659       continue;
1660     }
1661     // The requested locale may be the same as the valid locale,
1662     // or may not be supported at all. See ticket #10477.
1663     locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1664     if(U_SUCCESS(status) &&
1665         locale != testStruct[i].requestedLocale && locale != testStruct[i].validLocale) {
1666       errln("[Coll %s]: Error in requested locale, expected %s or %s, got %s",
1667             testStruct[i].requestedLocale,
1668             testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1669     }
1670     status = U_ZERO_ERROR;
1671     locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1672     if(locale != testStruct[i].validLocale) {
1673       errln("[Coll %s]: Error in valid locale, expected %s, got %s",
1674             testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1675     }
1676     locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1677     if(locale != testStruct[i].actualLocale) {
1678       errln("[Coll %s]: Error in actual locale, expected %s, got %s",
1679             testStruct[i].requestedLocale, testStruct[i].actualLocale, locale.getName());
1680     }
1681     // If we open a collator for the actual locale, we should get an equivalent one again.
1682     LocalPointer<Collator> coll2(Collator::createInstance(locale, status));
1683     if(U_FAILURE(status)) {
1684       errln("Failed to open collator for actual locale \"%s\" with %s",
1685             locale.getName(), u_errorName(status));
1686     } else {
1687       Locale actual2 = coll2->getLocale(ULOC_ACTUAL_LOCALE, status);
1688       if(actual2 != locale) {
1689         errln("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"",
1690               locale.getName(), actual2.getName());
1691       }
1692       if(*coll2 != *coll) {
1693         errln("[Coll actual \"%s\"]: Got different collator than before", locale.getName());
1694       }
1695     }
1696     delete coll;
1697   }
1698 
1699   /* completely non-existent locale for collator should get a root collator */
1700   {
1701     LocalPointer<Collator> coll(Collator::createInstance("blahaha", status));
1702     if(U_FAILURE(status)) {
1703       errln("Failed to open collator with %s", u_errorName(status));
1704       return;
1705     }
1706     Locale valid = coll->getLocale(ULOC_VALID_LOCALE, status);
1707     const char *name = valid.getName();
1708     if(*name != 0 && strcmp(name, "root") != 0) {
1709       errln("Valid locale for nonexisting-locale collator is \"%s\" not root", name);
1710     }
1711     Locale actual = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1712     name = actual.getName();
1713     if(*name != 0 && strcmp(name, "root") != 0) {
1714       errln("Actual locale for nonexisting-locale collator is \"%s\" not root", name);
1715     }
1716   }
1717 
1718 
1719 
1720   /* collator instantiated from rules should have all three locales NULL */
1721   coll = new RuleBasedCollator(rlz, status);
1722   locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1723   if(U_SUCCESS(status) && !locale.isBogus()) {
1724     errln("For collator instantiated from rules, requested locale %s is not bogus", locale.getName());
1725   }
1726   status = U_ZERO_ERROR;
1727   locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1728   if(!locale.isBogus()) {
1729     errln("For collator instantiated from rules, valid locale %s is not bogus", locale.getName());
1730   }
1731   locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1732   if(!locale.isBogus()) {
1733     errln("For collator instantiated from rules, actual locale %s is not bogus", locale.getName());
1734   }
1735   delete coll;
1736 }
1737 
1738 struct teststruct {
1739     const char *original;
1740     uint8_t key[256];
1741 };
1742 
1743 
1744 
1745 U_CDECL_BEGIN
1746 static int U_CALLCONV
compare_teststruct(const void * string1,const void * string2)1747 compare_teststruct(const void *string1, const void *string2) {
1748   return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key));
1749 }
1750 U_CDECL_END
1751 
1752 
TestBounds(void)1753 void CollationAPITest::TestBounds(void) {
1754     UErrorCode status = U_ZERO_ERROR;
1755 
1756     Collator *coll = Collator::createInstance(Locale("sh"), status);
1757     if(U_FAILURE(status)) {
1758       delete coll;
1759       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1760       return;
1761     }
1762 
1763     uint8_t sortkey[512], lower[512], upper[512];
1764     UChar buffer[512];
1765 
1766     static const char * const test[] = {
1767         "John Smith",
1768         "JOHN SMITH",
1769         "john SMITH",
1770         "j\\u00F6hn sm\\u00EFth",
1771         "J\\u00F6hn Sm\\u00EFth",
1772         "J\\u00D6HN SM\\u00CFTH",
1773         "john smithsonian",
1774         "John Smithsonian"
1775     };
1776 
1777     struct teststruct tests[] = {
1778         {"\\u010CAKI MIHALJ", {0}},
1779         {"\\u010CAKI MIHALJ", {0}},
1780         {"\\u010CAKI PIRO\\u0160KA", {0}},
1781         {"\\u010CABAI ANDRIJA", {0}},
1782         {"\\u010CABAI LAJO\\u0160", {0}},
1783         {"\\u010CABAI MARIJA", {0}},
1784         {"\\u010CABAI STEVAN", {0}},
1785         {"\\u010CABAI STEVAN", {0}},
1786         {"\\u010CABARKAPA BRANKO", {0}},
1787         {"\\u010CABARKAPA MILENKO", {0}},
1788         {"\\u010CABARKAPA MIROSLAV", {0}},
1789         {"\\u010CABARKAPA SIMO", {0}},
1790         {"\\u010CABARKAPA STANKO", {0}},
1791         {"\\u010CABARKAPA TAMARA", {0}},
1792         {"\\u010CABARKAPA TOMA\\u0160", {0}},
1793         {"\\u010CABDARI\\u0106 NIKOLA", {0}},
1794         {"\\u010CABDARI\\u0106 ZORICA", {0}},
1795         {"\\u010CABI NANDOR", {0}},
1796         {"\\u010CABOVI\\u0106 MILAN", {0}},
1797         {"\\u010CABRADI AGNEZIJA", {0}},
1798         {"\\u010CABRADI IVAN", {0}},
1799         {"\\u010CABRADI JELENA", {0}},
1800         {"\\u010CABRADI LJUBICA", {0}},
1801         {"\\u010CABRADI STEVAN", {0}},
1802         {"\\u010CABRDA MARTIN", {0}},
1803         {"\\u010CABRILO BOGDAN", {0}},
1804         {"\\u010CABRILO BRANISLAV", {0}},
1805         {"\\u010CABRILO LAZAR", {0}},
1806         {"\\u010CABRILO LJUBICA", {0}},
1807         {"\\u010CABRILO SPASOJA", {0}},
1808         {"\\u010CADE\\u0160 ZDENKA", {0}},
1809         {"\\u010CADESKI BLAGOJE", {0}},
1810         {"\\u010CADOVSKI VLADIMIR", {0}},
1811         {"\\u010CAGLJEVI\\u0106 TOMA", {0}},
1812         {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}},
1813         {"\\u010CAJA VANKA", {0}},
1814         {"\\u010CAJI\\u0106 BOGOLJUB", {0}},
1815         {"\\u010CAJI\\u0106 BORISLAV", {0}},
1816         {"\\u010CAJI\\u0106 RADOSLAV", {0}},
1817         {"\\u010CAK\\u0160IRAN MILADIN", {0}},
1818         {"\\u010CAKAN EUGEN", {0}},
1819         {"\\u010CAKAN EVGENIJE", {0}},
1820         {"\\u010CAKAN IVAN", {0}},
1821         {"\\u010CAKAN JULIJAN", {0}},
1822         {"\\u010CAKAN MIHAJLO", {0}},
1823         {"\\u010CAKAN STEVAN", {0}},
1824         {"\\u010CAKAN VLADIMIR", {0}},
1825         {"\\u010CAKAN VLADIMIR", {0}},
1826         {"\\u010CAKAN VLADIMIR", {0}},
1827         {"\\u010CAKARA ANA", {0}},
1828         {"\\u010CAKAREVI\\u0106 MOMIR", {0}},
1829         {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}},
1830         {"\\u010CAKI \\u0160ANDOR", {0}},
1831         {"\\u010CAKI AMALIJA", {0}},
1832         {"\\u010CAKI ANDRA\\u0160", {0}},
1833         {"\\u010CAKI LADISLAV", {0}},
1834         {"\\u010CAKI LAJO\\u0160", {0}},
1835         {"\\u010CAKI LASLO", {0}}
1836     };
1837 
1838 
1839 
1840     int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0;
1841     int32_t arraySize = sizeof(tests)/sizeof(tests[0]);
1842 
1843     (void)lowerSize;  // Suppress unused variable warnings.
1844     (void)upperSize;
1845 
1846     for(i = 0; i<arraySize; i++) {
1847         buffSize = u_unescape(tests[i].original, buffer, 512);
1848         skSize = coll->getSortKey(buffer, buffSize, tests[i].key, 512);
1849     }
1850 
1851     qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct);
1852 
1853     for(i = 0; i < arraySize-1; i++) {
1854         for(j = i+1; j < arraySize; j++) {
1855             lowerSize = coll->getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, status);
1856             upperSize = coll->getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, status);
1857             for(k = i; k <= j; k++) {
1858                 if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) {
1859                     errln("Problem with lower! j = %i (%s vs %s)", k, tests[k].original, tests[i].original);
1860                 }
1861                 if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) {
1862                     errln("Problem with upper! j = %i (%s vs %s)", k, tests[k].original, tests[j].original);
1863                 }
1864             }
1865         }
1866     }
1867 
1868 
1869     for(i = 0; i<(int32_t)(sizeof(test)/sizeof(test[0])); i++) {
1870         buffSize = u_unescape(test[i], buffer, 512);
1871         skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1872         lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1873         upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status);
1874         for(j = i+1; j<(int32_t)(sizeof(test)/sizeof(test[0])); j++) {
1875             buffSize = u_unescape(test[j], buffer, 512);
1876             skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1877             if(strcmp((const char *)lower, (const char *)sortkey) > 0) {
1878                 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1879             }
1880             if(strcmp((const char *)upper, (const char *)sortkey) <= 0) {
1881                 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1882             }
1883         }
1884     }
1885     delete coll;
1886 }
1887 
1888 
TestGetTailoredSet()1889 void CollationAPITest::TestGetTailoredSet()
1890 {
1891   struct {
1892     const char *rules;
1893     const char *tests[20];
1894     int32_t testsize;
1895   } setTest[] = {
1896     { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1897     { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1898   };
1899 
1900   int32_t i = 0, j = 0;
1901   UErrorCode status = U_ZERO_ERROR;
1902 
1903   UnicodeString buff;
1904   UnicodeSet *set = NULL;
1905 
1906   for(i = 0; i < UPRV_LENGTHOF(setTest); i++) {
1907     buff = UnicodeString(setTest[i].rules, -1, US_INV).unescape();
1908     RuleBasedCollator coll(buff, status);
1909     if(U_SUCCESS(status)) {
1910       set = coll.getTailoredSet(status);
1911       if(set->size() < setTest[i].testsize) {
1912         errln("Tailored set size smaller (%d) than expected (%d)", set->size(), setTest[i].testsize);
1913       }
1914       for(j = 0; j < setTest[i].testsize; j++) {
1915         buff = UnicodeString(setTest[i].tests[j], -1, US_INV).unescape();
1916         if(!set->contains(buff)) {
1917           errln("Tailored set doesn't contain %s... It should", setTest[i].tests[j]);
1918         }
1919       }
1920       delete set;
1921     } else {
1922       errcheckln(status, "Couldn't open collator with rules %s - %s", setTest[i].rules, u_errorName(status));
1923     }
1924   }
1925 }
1926 
TestUClassID()1927 void CollationAPITest::TestUClassID()
1928 {
1929     char id = *((char *)RuleBasedCollator::getStaticClassID());
1930     if (id != 0) {
1931         errln("Static class id for RuleBasedCollator should be 0");
1932     }
1933     UErrorCode status = U_ZERO_ERROR;
1934     RuleBasedCollator *coll
1935         = (RuleBasedCollator *)Collator::createInstance(status);
1936     if(U_FAILURE(status)) {
1937       delete coll;
1938       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1939       return;
1940     }
1941     id = *((char *)coll->getDynamicClassID());
1942     if (id != 0) {
1943         errln("Dynamic class id for RuleBasedCollator should be 0");
1944     }
1945     id = *((char *)CollationKey::getStaticClassID());
1946     if (id != 0) {
1947         errln("Static class id for CollationKey should be 0");
1948     }
1949     CollationKey *key = new CollationKey();
1950     id = *((char *)key->getDynamicClassID());
1951     if (id != 0) {
1952         errln("Dynamic class id for CollationKey should be 0");
1953     }
1954     id = *((char *)CollationElementIterator::getStaticClassID());
1955     if (id != 0) {
1956         errln("Static class id for CollationElementIterator should be 0");
1957     }
1958     UnicodeString str("testing");
1959     CollationElementIterator *iter = coll->createCollationElementIterator(str);
1960     id = *((char *)iter->getDynamicClassID());
1961     if (id != 0) {
1962         errln("Dynamic class id for CollationElementIterator should be 0");
1963     }
1964     delete key;
1965     delete iter;
1966     delete coll;
1967 }
1968 
1969 class TestCollator  : public Collator
1970 {
1971 public:
1972     virtual Collator* clone(void) const;
1973 
1974     using Collator::compare;
1975 
1976     virtual UCollationResult compare(const UnicodeString& source,
1977                                       const UnicodeString& target,
1978                                       UErrorCode& status) const;
1979     virtual UCollationResult compare(const UnicodeString& source,
1980                                       const UnicodeString& target,
1981                                       int32_t length,
1982                                       UErrorCode& status) const;
1983     virtual UCollationResult compare(const UChar* source,
1984                                       int32_t sourceLength,
1985                                       const UChar* target,
1986                                       int32_t targetLength,
1987                                       UErrorCode& status) const;
1988     virtual CollationKey& getCollationKey(const UnicodeString&  source,
1989                                           CollationKey& key,
1990                                           UErrorCode& status) const;
1991     virtual CollationKey& getCollationKey(const UChar*source,
1992                                           int32_t sourceLength,
1993                                           CollationKey& key,
1994                                           UErrorCode& status) const;
1995     virtual int32_t hashCode(void) const;
1996     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
1997     virtual ECollationStrength getStrength(void) const;
1998     virtual void setStrength(ECollationStrength newStrength);
1999     virtual UClassID getDynamicClassID(void) const;
2000     virtual void getVersion(UVersionInfo info) const;
2001     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
2002                               UErrorCode &status);
2003     virtual UColAttributeValue getAttribute(UColAttribute attr,
2004                                             UErrorCode &status) const;
2005     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len,
2006                                     UErrorCode &status);
2007     virtual uint32_t setVariableTop(const UnicodeString &varTop,
2008                                     UErrorCode &status);
2009     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
2010     virtual uint32_t getVariableTop(UErrorCode &status) const;
2011     virtual int32_t getSortKey(const UnicodeString& source,
2012                             uint8_t* result,
2013                             int32_t resultLength) const;
2014     virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
2015                              uint8_t*result, int32_t resultLength) const;
2016     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
2017     virtual UBool operator==(const Collator& other) const;
2018     // Collator::operator!= calls !Collator::operator== which works for all subclasses.
2019     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
TestCollator()2020     TestCollator() : Collator() {};
TestCollator(UCollationStrength collationStrength,UNormalizationMode decompositionMode)2021     TestCollator(UCollationStrength collationStrength,
2022            UNormalizationMode decompositionMode) : Collator(collationStrength, decompositionMode) {};
2023 };
2024 
operator ==(const Collator & other) const2025 inline UBool TestCollator::operator==(const Collator& other) const {
2026     // TestCollator has no fields, so we test for identity.
2027     return this == &other;
2028 
2029     // Normally, subclasses should do something like the following:
2030     //    if (this == &other) { return TRUE; }
2031     //    if (!Collator::operator==(other)) { return FALSE; }  // not the same class
2032     //
2033     //    const TestCollator &o = (const TestCollator&)other;
2034     //    (compare this vs. o's subclass fields)
2035 }
2036 
clone() const2037 Collator* TestCollator::clone() const
2038 {
2039     return new TestCollator();
2040 }
2041 
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const2042 UCollationResult TestCollator::compare(const UnicodeString& source,
2043                                         const UnicodeString& target,
2044                                         UErrorCode& status) const
2045 {
2046   if(U_SUCCESS(status)) {
2047     return UCollationResult(source.compare(target));
2048   } else {
2049     return UCOL_EQUAL;
2050   }
2051 }
2052 
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const2053 UCollationResult TestCollator::compare(const UnicodeString& source,
2054                                         const UnicodeString& target,
2055                                         int32_t length,
2056                                         UErrorCode& status) const
2057 {
2058   if(U_SUCCESS(status)) {
2059     return UCollationResult(source.compare(0, length, target));
2060   } else {
2061     return UCOL_EQUAL;
2062   }
2063 }
2064 
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const2065 UCollationResult TestCollator::compare(const UChar* source,
2066                                         int32_t sourceLength,
2067                                         const UChar* target,
2068                                         int32_t targetLength,
2069                                         UErrorCode& status) const
2070 {
2071     UnicodeString s(source, sourceLength);
2072     UnicodeString t(target, targetLength);
2073     return compare(s, t, status);
2074 }
2075 
getCollationKey(const UnicodeString & source,CollationKey & key,UErrorCode & status) const2076 CollationKey& TestCollator::getCollationKey(const UnicodeString& source,
2077                                             CollationKey& key,
2078                                             UErrorCode& status) const
2079 {
2080     char temp[100];
2081     int length = 100;
2082     length = source.extract(temp, length, NULL, status);
2083     temp[length] = 0;
2084     CollationKey tempkey((uint8_t*)temp, length);
2085     key = tempkey;
2086     return key;
2087 }
2088 
getCollationKey(const UChar * source,int32_t sourceLength,CollationKey & key,UErrorCode & status) const2089 CollationKey& TestCollator::getCollationKey(const UChar*source,
2090                                           int32_t sourceLength,
2091                                           CollationKey& key,
2092                                           UErrorCode& status) const
2093 {
2094     //s tack allocation used since collationkey does not keep the unicodestring
2095     UnicodeString str(source, sourceLength);
2096     return getCollationKey(str, key, status);
2097 }
2098 
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const2099 int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result,
2100                                  int32_t resultLength) const
2101 {
2102     UErrorCode status = U_ZERO_ERROR;
2103     int32_t length = source.extract((char *)result, resultLength, NULL,
2104                                     status);
2105     result[length] = 0;
2106     return length;
2107 }
2108 
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const2109 int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength,
2110                                  uint8_t*result, int32_t resultLength) const
2111 {
2112     UnicodeString str(source, sourceLength);
2113     return getSortKey(str, result, resultLength);
2114 }
2115 
hashCode() const2116 int32_t TestCollator::hashCode() const
2117 {
2118     return 0;
2119 }
2120 
getLocale(ULocDataLocaleType type,UErrorCode & status) const2121 Locale TestCollator::getLocale(ULocDataLocaleType type, UErrorCode& status) const
2122 {
2123     // api not used, this is to make the compiler happy
2124     if (U_FAILURE(status)) {
2125         (void)type;
2126     }
2127     return NULL;
2128 }
2129 
getStrength() const2130 Collator::ECollationStrength TestCollator::getStrength() const
2131 {
2132     return TERTIARY;
2133 }
2134 
setStrength(Collator::ECollationStrength newStrength)2135 void TestCollator::setStrength(Collator::ECollationStrength newStrength)
2136 {
2137     // api not used, this is to make the compiler happy
2138     (void)newStrength;
2139 }
2140 
getDynamicClassID(void) const2141 UClassID TestCollator::getDynamicClassID(void) const
2142 {
2143     return 0;
2144 }
2145 
getVersion(UVersionInfo info) const2146 void TestCollator::getVersion(UVersionInfo info) const
2147 {
2148     // api not used, this is to make the compiler happy
2149     memset(info, 0, U_MAX_VERSION_LENGTH);
2150 }
2151 
setAttribute(UColAttribute,UColAttributeValue,UErrorCode &)2152 void TestCollator::setAttribute(UColAttribute /*attr*/, UColAttributeValue /*value*/,
2153                                 UErrorCode & /*status*/)
2154 {
2155 }
2156 
getAttribute(UColAttribute attr,UErrorCode & status) const2157 UColAttributeValue TestCollator::getAttribute(UColAttribute attr,
2158                                               UErrorCode &status) const
2159 {
2160     // api not used, this is to make the compiler happy
2161     if (U_FAILURE(status) || attr == UCOL_ATTRIBUTE_COUNT) {
2162         return UCOL_OFF;
2163     }
2164     return UCOL_DEFAULT;
2165 }
2166 
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)2167 uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len,
2168                                   UErrorCode &status)
2169 {
2170     // api not used, this is to make the compiler happy
2171     if (U_SUCCESS(status) && (varTop == 0 || len < -1)) {
2172         status = U_ILLEGAL_ARGUMENT_ERROR;
2173     }
2174     return 0;
2175 }
2176 
setVariableTop(const UnicodeString & varTop,UErrorCode & status)2177 uint32_t TestCollator::setVariableTop(const UnicodeString &varTop,
2178                                   UErrorCode &status)
2179 {
2180     // api not used, this is to make the compiler happy
2181     if (U_SUCCESS(status) && varTop.length() == 0) {
2182         status = U_ILLEGAL_ARGUMENT_ERROR;
2183     }
2184     return 0;
2185 }
2186 
setVariableTop(uint32_t varTop,UErrorCode & status)2187 void TestCollator::setVariableTop(uint32_t varTop, UErrorCode &status)
2188 {
2189     // api not used, this is to make the compiler happy
2190     if (U_SUCCESS(status) && varTop == 0) {
2191         status = U_ILLEGAL_ARGUMENT_ERROR;
2192     }
2193 }
2194 
getVariableTop(UErrorCode & status) const2195 uint32_t TestCollator::getVariableTop(UErrorCode &status) const
2196 {
2197 
2198     // api not used, this is to make the compiler happy
2199     if (U_SUCCESS(status)) {
2200         return 0;
2201     }
2202     return (uint32_t)(0xFFFFFFFFu);
2203 }
2204 
getTailoredSet(UErrorCode & status) const2205 UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const
2206 {
2207     return Collator::getTailoredSet(status);
2208 }
2209 
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)2210 void TestCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale)
2211 {
2212     Collator::setLocales(requestedLocale, validLocale, actualLocale);
2213 }
2214 
2215 
TestSubclass()2216 void CollationAPITest::TestSubclass()
2217 {
2218     TestCollator col1;
2219     TestCollator col2;
2220     doAssert(col1 != col2, "2 instances of TestCollator should be different");
2221     if (col1.hashCode() != col2.hashCode()) {
2222         errln("Every TestCollator has the same hashcode");
2223     }
2224     UnicodeString abc("abc", 3);
2225     UnicodeString bcd("bcd", 3);
2226     if (col1.compare(abc, bcd) != abc.compare(bcd)) {
2227         errln("TestCollator compare should be the same as the default "
2228               "string comparison");
2229     }
2230     CollationKey key;
2231     UErrorCode status = U_ZERO_ERROR;
2232     col1.getCollationKey(abc, key, status);
2233     int32_t length = 0;
2234     const char* bytes = (const char *)key.getByteArray(length);
2235     UnicodeString keyarray(bytes, length, NULL, status);
2236     if (abc != keyarray) {
2237         errln("TestCollator collationkey API is returning wrong values");
2238     }
2239 
2240     UnicodeSet expectedset(0, 0x10FFFF);
2241     UnicodeSet *defaultset = col1.getTailoredSet(status);
2242     if (!defaultset->containsAll(expectedset)
2243         || !expectedset.containsAll(*defaultset)) {
2244         errln("Error: expected default tailoring to be 0 to 0x10ffff");
2245     }
2246     delete defaultset;
2247 
2248     // use base class implementation
2249     Locale loc1 = Locale::getGermany();
2250     Locale loc2 = Locale::getFrance();
2251     col1.setLocales(loc1, loc2, loc2); // default implementation has no effect
2252 
2253     UnicodeString displayName;
2254     col1.getDisplayName(loc1, loc2, displayName); // de_DE collator in fr_FR locale
2255 
2256     TestCollator col3(UCOL_TERTIARY, UNORM_NONE);
2257     UnicodeString a("a");
2258     UnicodeString b("b");
2259     Collator::EComparisonResult result = Collator::EComparisonResult(a.compare(b));
2260     if(col1.compare(a, b) != result) {
2261       errln("Collator doesn't give default result");
2262     }
2263     if(col1.compare(a, b, 1) != result) {
2264       errln("Collator doesn't give default result");
2265     }
2266     if(col1.compare(a.getBuffer(), a.length(), b.getBuffer(), b.length()) != result) {
2267       errln("Collator doesn't give default result");
2268     }
2269 }
2270 
TestNULLCharTailoring()2271 void CollationAPITest::TestNULLCharTailoring()
2272 {
2273     UErrorCode status = U_ZERO_ERROR;
2274     UChar buf[256] = {0};
2275     int32_t len = u_unescape("&a < '\\u0000'", buf, 256);
2276     UnicodeString first((UChar)0x0061);
2277     UnicodeString second((UChar)0);
2278     RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status);
2279     if(U_FAILURE(status)) {
2280         delete coll;
2281         errcheckln(status, "Failed to open collator - %s", u_errorName(status));
2282         return;
2283     }
2284     UCollationResult res = coll->compare(first, second, status);
2285     if(res != UCOL_LESS) {
2286         errln("a should be less then NULL after tailoring");
2287     }
2288     delete coll;
2289 }
2290 
TestClone()2291 void CollationAPITest::TestClone() {
2292     logln("\ninit c0");
2293     UErrorCode status = U_ZERO_ERROR;
2294     RuleBasedCollator* c0 = (RuleBasedCollator*)Collator::createInstance(status);
2295 
2296     if (U_FAILURE(status)) {
2297         errcheckln(status, "Collator::CreateInstance(status) failed with %s", u_errorName(status));
2298         return;
2299     }
2300 
2301     c0->setStrength(Collator::TERTIARY);
2302     dump("c0", c0, status);
2303 
2304     logln("\ninit c1");
2305     RuleBasedCollator* c1 = (RuleBasedCollator*)Collator::createInstance(status);
2306     c1->setStrength(Collator::TERTIARY);
2307     UColAttributeValue val = c1->getAttribute(UCOL_CASE_FIRST, status);
2308     if(val == UCOL_LOWER_FIRST){
2309         c1->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2310     }else{
2311         c1->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2312     }
2313     dump("c0", c0, status);
2314     dump("c1", c1, status);
2315 
2316     logln("\ninit c2");
2317     RuleBasedCollator* c2 = (RuleBasedCollator*)c1->clone();
2318     val = c2->getAttribute(UCOL_CASE_FIRST, status);
2319     if(val == UCOL_LOWER_FIRST){
2320         c2->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2321     }else{
2322         c2->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2323     }
2324     if(U_FAILURE(status)){
2325         errln("set and get attributes of collator failed. %s\n", u_errorName(status));
2326         return;
2327     }
2328     dump("c0", c0, status);
2329     dump("c1", c1, status);
2330     dump("c2", c2, status);
2331     if(*c1 == *c2){
2332         errln("The cloned objects refer to same data");
2333     }
2334     delete c0;
2335     delete c1;
2336     delete c2;
2337 }
2338 
TestCloneBinary()2339 void CollationAPITest::TestCloneBinary() {
2340     IcuTestErrorCode errorCode(*this, "TestCloneBinary");
2341     LocalPointer<Collator> root(Collator::createInstance(Locale::getRoot(), errorCode));
2342     LocalPointer<Collator> coll(Collator::createInstance("de@collation=phonebook", errorCode));
2343     if(errorCode.logDataIfFailureAndReset("Collator::createInstance(de@collation=phonebook)")) {
2344         return;
2345     }
2346     RuleBasedCollator *rbRoot = dynamic_cast<RuleBasedCollator *>(root.getAlias());
2347     RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias());
2348     if(rbRoot == NULL || rbc == NULL) {
2349         infoln("root or de@collation=phonebook is not a RuleBasedCollator");
2350         return;
2351     }
2352     rbc->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
2353     UnicodeString uUmlaut((UChar)0xfc);
2354     UnicodeString ue = UNICODE_STRING_SIMPLE("ue");
2355     assertEquals("rbc/primary: u-umlaut==ue", UCOL_EQUAL, rbc->compare(uUmlaut, ue, errorCode));
2356     uint8_t bin[25000];
2357     int32_t binLength = rbc->cloneBinary(bin, UPRV_LENGTHOF(bin), errorCode);
2358     if(errorCode.logDataIfFailureAndReset("rbc->cloneBinary()")) {
2359         return;
2360     }
2361     logln("rbc->cloneBinary() -> %d bytes", (int)binLength);
2362 
2363     RuleBasedCollator rbc2(bin, binLength, rbRoot, errorCode);
2364     if(errorCode.logDataIfFailureAndReset("RuleBasedCollator(rbc binary)")) {
2365         return;
2366     }
2367     assertEquals("rbc2.strength==primary", UCOL_PRIMARY, rbc2.getAttribute(UCOL_STRENGTH, errorCode));
2368     assertEquals("rbc2: u-umlaut==ue", UCOL_EQUAL, rbc2.compare(uUmlaut, ue, errorCode));
2369     assertTrue("rbc==rbc2", *rbc == rbc2);
2370     uint8_t bin2[25000];
2371     int32_t bin2Length = rbc2.cloneBinary(bin2, UPRV_LENGTHOF(bin2), errorCode);
2372     assertEquals("len(rbc binary)==len(rbc2 binary)", binLength, bin2Length);
2373     assertTrue("rbc binary==rbc2 binary", binLength == bin2Length && memcmp(bin, bin2, binLength) == 0);
2374 
2375     RuleBasedCollator rbc3(bin, -1, rbRoot, errorCode);
2376     if(errorCode.logDataIfFailureAndReset("RuleBasedCollator(rbc binary, length<0)")) {
2377         return;
2378     }
2379     assertEquals("rbc3.strength==primary", UCOL_PRIMARY, rbc3.getAttribute(UCOL_STRENGTH, errorCode));
2380     assertEquals("rbc3: u-umlaut==ue", UCOL_EQUAL, rbc3.compare(uUmlaut, ue, errorCode));
2381     assertTrue("rbc==rbc3", *rbc == rbc3);
2382 }
2383 
TestIterNumeric()2384 void CollationAPITest::TestIterNumeric() {
2385     // Regression test for ticket #9915.
2386     // The collation code sometimes masked the continuation marker away
2387     // but later tested the result for isContinuation().
2388     // This test case failed because the third bytes of the computed numeric-collation primaries
2389     // were permutated with the script reordering table.
2390     // It should have been possible to reproduce this with the root collator
2391     // and characters with appropriate 3-byte primary weights.
2392     // The effectiveness of this test depends completely on the collation elements
2393     // and on the implementation code.
2394     IcuTestErrorCode errorCode(*this, "TestIterNumeric");
2395     RuleBasedCollator coll(UnicodeString("[reorder Hang Hani]"), errorCode);
2396     if(errorCode.logDataIfFailureAndReset("RuleBasedCollator constructor")) {
2397         return;
2398     }
2399     coll.setAttribute(UCOL_NUMERIC_COLLATION, UCOL_ON, errorCode);
2400     UCharIterator iter40, iter72;
2401     uiter_setUTF8(&iter40, "\x34\x30", 2);
2402     uiter_setUTF8(&iter72, "\x37\x32", 2);
2403     UCollationResult result = coll.compare(iter40, iter72, errorCode);
2404     assertEquals("40<72", (int32_t)UCOL_LESS, (int32_t)result);
2405 }
2406 
TestBadKeywords()2407 void CollationAPITest::TestBadKeywords() {
2408     // Test locale IDs with errors.
2409     // Valid locale IDs are tested via data-driven tests.
2410     UErrorCode errorCode = U_ZERO_ERROR;
2411     Locale bogusLocale(Locale::getRoot());
2412     bogusLocale.setToBogus();
2413     LocalPointer<Collator> coll(Collator::createInstance(bogusLocale, errorCode));
2414     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2415         errln("Collator::createInstance(bogus locale) did not fail as expected - %s",
2416               u_errorName(errorCode));
2417     }
2418 
2419     // Unknown value.
2420     const char *localeID = "it-u-ks-xyz";
2421     errorCode = U_ZERO_ERROR;
2422     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2423     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2424         dataerrln("Collator::createInstance(%s) did not fail as expected - %s",
2425               localeID, u_errorName(errorCode));
2426     }
2427 
2428     // Unsupported attributes.
2429     localeID = "it@colHiraganaQuaternary=true";
2430     errorCode = U_ZERO_ERROR;
2431     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2432     if(errorCode != U_UNSUPPORTED_ERROR) {
2433         if (errorCode == U_FILE_ACCESS_ERROR) {
2434             dataerrln("Collator::createInstance(it@colHiraganaQuaternary=true) : %s", u_errorName(errorCode));
2435         } else {
2436             errln("Collator::createInstance(%s) did not fail as expected - %s",
2437                   localeID, u_errorName(errorCode));
2438         }
2439     }
2440 
2441     localeID = "it-u-vt-u24";
2442     errorCode = U_ZERO_ERROR;
2443     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2444     if(errorCode != U_UNSUPPORTED_ERROR) {
2445         if (errorCode == U_ILLEGAL_ARGUMENT_ERROR || errorCode == U_FILE_ACCESS_ERROR) {
2446             dataerrln("Collator::createInstance(it-u-vt-u24) : %s", u_errorName(errorCode));
2447         } else {
2448            errln("Collator::createInstance(%s) did not fail as expected - %s",
2449                   localeID, u_errorName(errorCode));
2450         }
2451     }
2452 }
2453 
dump(UnicodeString msg,RuleBasedCollator * c,UErrorCode & status)2454  void CollationAPITest::dump(UnicodeString msg, RuleBasedCollator* c, UErrorCode& status) {
2455     const char* bigone = "One";
2456     const char* littleone = "one";
2457 
2458     logln(msg + " " + c->compare(bigone, littleone) +
2459                         " s: " + c->getStrength() +
2460                         " u: " + c->getAttribute(UCOL_CASE_FIRST, status));
2461 }
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)2462 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
2463 {
2464     if (exec) logln("TestSuite CollationAPITest: ");
2465     TESTCASE_AUTO_BEGIN;
2466     TESTCASE_AUTO(TestProperty);
2467     TESTCASE_AUTO(TestOperators);
2468     TESTCASE_AUTO(TestDuplicate);
2469     TESTCASE_AUTO(TestCompare);
2470     TESTCASE_AUTO(TestHashCode);
2471     TESTCASE_AUTO(TestCollationKey);
2472     TESTCASE_AUTO(TestElemIter);
2473     TESTCASE_AUTO(TestGetAll);
2474     TESTCASE_AUTO(TestRuleBasedColl);
2475     TESTCASE_AUTO(TestDecomposition);
2476     TESTCASE_AUTO(TestSafeClone);
2477     TESTCASE_AUTO(TestSortKey);
2478     TESTCASE_AUTO(TestSortKeyOverflow);
2479     TESTCASE_AUTO(TestMaxExpansion);
2480     TESTCASE_AUTO(TestDisplayName);
2481     TESTCASE_AUTO(TestAttribute);
2482     TESTCASE_AUTO(TestVariableTopSetting);
2483     TESTCASE_AUTO(TestMaxVariable);
2484     TESTCASE_AUTO(TestRules);
2485     TESTCASE_AUTO(TestGetLocale);
2486     TESTCASE_AUTO(TestBounds);
2487     TESTCASE_AUTO(TestGetTailoredSet);
2488     TESTCASE_AUTO(TestUClassID);
2489     TESTCASE_AUTO(TestSubclass);
2490     TESTCASE_AUTO(TestNULLCharTailoring);
2491     TESTCASE_AUTO(TestClone);
2492     TESTCASE_AUTO(TestCloneBinary);
2493     TESTCASE_AUTO(TestIterNumeric);
2494     TESTCASE_AUTO(TestBadKeywords);
2495     TESTCASE_AUTO_END;
2496 }
2497 
2498 #endif /* #if !UCONFIG_NO_COLLATION */
2499