1 /*
2  ******************************************************************************
3  * Copyright (C) 1996-2014, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ******************************************************************************
6  */
7 
8 /**
9  * File coll.cpp
10  *
11  * Created by: Helena Shih
12  *
13  * Modification History:
14  *
15  *  Date        Name        Description
16  *  2/5/97      aliu        Modified createDefault to load collation data from
17  *                          binary files when possible.  Added related methods
18  *                          createCollationFromFile, chopLocale, createPathName.
19  *  2/11/97     aliu        Added methods addToCache, findInCache, which implement
20  *                          a Collation cache.  Modified createDefault to look in
21  *                          cache first, and also to store newly created Collation
22  *                          objects in the cache.  Modified to not use gLocPath.
23  *  2/12/97     aliu        Modified to create objects from RuleBasedCollator cache.
24  *                          Moved cache out of Collation class.
25  *  2/13/97     aliu        Moved several methods out of this class and into
26  *                          RuleBasedCollator, with modifications.  Modified
27  *                          createDefault() to call new RuleBasedCollator(Locale&)
28  *                          constructor.  General clean up and documentation.
29  *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
30  *                          constructor.
31  * 05/06/97     helena      Added memory allocation error detection.
32  * 05/08/97     helena      Added createInstance().
33  *  6/20/97     helena      Java class name change.
34  * 04/23/99     stephen     Removed EDecompositionMode, merged with
35  *                          Normalizer::EMode
36  * 11/23/9      srl         Inlining of some critical functions
37  * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
38  * 2012-2014    markus      Rewritten in C++ again.
39  */
40 
41 #include "utypeinfo.h"  // for 'typeid' to work
42 
43 #include "unicode/utypes.h"
44 
45 #if !UCONFIG_NO_COLLATION
46 
47 #include "unicode/coll.h"
48 #include "unicode/tblcoll.h"
49 #include "collationdata.h"
50 #include "collationroot.h"
51 #include "collationtailoring.h"
52 #include "ucol_imp.h"
53 #include "cstring.h"
54 #include "cmemory.h"
55 #include "umutex.h"
56 #include "servloc.h"
57 #include "uassert.h"
58 #include "ustrenum.h"
59 #include "uresimp.h"
60 #include "ucln_in.h"
61 
62 static icu::Locale* availableLocaleList = NULL;
63 static int32_t  availableLocaleListCount;
64 static icu::ICULocaleService* gService = NULL;
65 static icu::UInitOnce gServiceInitOnce = U_INITONCE_INITIALIZER;
66 static icu::UInitOnce gAvailableLocaleListInitOnce;
67 
68 /**
69  * Release all static memory held by collator.
70  */
71 U_CDECL_BEGIN
collator_cleanup(void)72 static UBool U_CALLCONV collator_cleanup(void) {
73 #if !UCONFIG_NO_SERVICE
74     if (gService) {
75         delete gService;
76         gService = NULL;
77     }
78     gServiceInitOnce.reset();
79 #endif
80     if (availableLocaleList) {
81         delete []availableLocaleList;
82         availableLocaleList = NULL;
83     }
84     availableLocaleListCount = 0;
85     gAvailableLocaleListInitOnce.reset();
86     return TRUE;
87 }
88 
89 U_CDECL_END
90 
91 U_NAMESPACE_BEGIN
92 
93 #if !UCONFIG_NO_SERVICE
94 
95 // ------------------------------------------
96 //
97 // Registration
98 //
99 
100 //-------------------------------------------
101 
~CollatorFactory()102 CollatorFactory::~CollatorFactory() {}
103 
104 //-------------------------------------------
105 
106 UBool
visible(void) const107 CollatorFactory::visible(void) const {
108     return TRUE;
109 }
110 
111 //-------------------------------------------
112 
113 UnicodeString&
getDisplayName(const Locale & objectLocale,const Locale & displayLocale,UnicodeString & result)114 CollatorFactory::getDisplayName(const Locale& objectLocale,
115                                 const Locale& displayLocale,
116                                 UnicodeString& result)
117 {
118   return objectLocale.getDisplayName(displayLocale, result);
119 }
120 
121 // -------------------------------------
122 
123 class ICUCollatorFactory : public ICUResourceBundleFactory {
124  public:
ICUCollatorFactory()125     ICUCollatorFactory() : ICUResourceBundleFactory(UnicodeString(U_ICUDATA_COLL, -1, US_INV)) { }
126     virtual ~ICUCollatorFactory();
127  protected:
128     virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
129 };
130 
~ICUCollatorFactory()131 ICUCollatorFactory::~ICUCollatorFactory() {}
132 
133 UObject*
create(const ICUServiceKey & key,const ICUService *,UErrorCode & status) const134 ICUCollatorFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const {
135     if (handlesKey(key, status)) {
136         const LocaleKey& lkey = (const LocaleKey&)key;
137         Locale loc;
138         // make sure the requested locale is correct
139         // default LocaleFactory uses currentLocale since that's the one vetted by handlesKey
140         // but for ICU rb resources we use the actual one since it will fallback again
141         lkey.canonicalLocale(loc);
142 
143         return Collator::makeInstance(loc, status);
144     }
145     return NULL;
146 }
147 
148 // -------------------------------------
149 
150 class ICUCollatorService : public ICULocaleService {
151 public:
ICUCollatorService()152     ICUCollatorService()
153         : ICULocaleService(UNICODE_STRING_SIMPLE("Collator"))
154     {
155         UErrorCode status = U_ZERO_ERROR;
156         registerFactory(new ICUCollatorFactory(), status);
157     }
158 
159     virtual ~ICUCollatorService();
160 
cloneInstance(UObject * instance) const161     virtual UObject* cloneInstance(UObject* instance) const {
162         return ((Collator*)instance)->clone();
163     }
164 
handleDefault(const ICUServiceKey & key,UnicodeString * actualID,UErrorCode & status) const165     virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualID, UErrorCode& status) const {
166         LocaleKey& lkey = (LocaleKey&)key;
167         if (actualID) {
168             // Ugly Hack Alert! We return an empty actualID to signal
169             // to callers that this is a default object, not a "real"
170             // service-created object. (TODO remove in 3.0) [aliu]
171             actualID->truncate(0);
172         }
173         Locale loc("");
174         lkey.canonicalLocale(loc);
175         return Collator::makeInstance(loc, status);
176     }
177 
getKey(ICUServiceKey & key,UnicodeString * actualReturn,UErrorCode & status) const178     virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const {
179         UnicodeString ar;
180         if (actualReturn == NULL) {
181             actualReturn = &ar;
182         }
183         return (Collator*)ICULocaleService::getKey(key, actualReturn, status);
184     }
185 
isDefault() const186     virtual UBool isDefault() const {
187         return countFactories() == 1;
188     }
189 };
190 
~ICUCollatorService()191 ICUCollatorService::~ICUCollatorService() {}
192 
193 // -------------------------------------
194 
initService()195 static void U_CALLCONV initService() {
196     gService = new ICUCollatorService();
197     ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
198 }
199 
200 
201 static ICULocaleService*
getService(void)202 getService(void)
203 {
204     umtx_initOnce(gServiceInitOnce, &initService);
205     return gService;
206 }
207 
208 // -------------------------------------
209 
210 static inline UBool
hasService(void)211 hasService(void)
212 {
213     UBool retVal = !gServiceInitOnce.isReset() && (getService() != NULL);
214     return retVal;
215 }
216 
217 #endif /* UCONFIG_NO_SERVICE */
218 
219 static void U_CALLCONV
initAvailableLocaleList(UErrorCode & status)220 initAvailableLocaleList(UErrorCode &status) {
221     U_ASSERT(availableLocaleListCount == 0);
222     U_ASSERT(availableLocaleList == NULL);
223     // for now, there is a hardcoded list, so just walk through that list and set it up.
224     UResourceBundle *index = NULL;
225     UResourceBundle installed;
226     int32_t i = 0;
227 
228     ures_initStackObject(&installed);
229     index = ures_openDirect(U_ICUDATA_COLL, "res_index", &status);
230     ures_getByKey(index, "InstalledLocales", &installed, &status);
231 
232     if(U_SUCCESS(status)) {
233         availableLocaleListCount = ures_getSize(&installed);
234         availableLocaleList = new Locale[availableLocaleListCount];
235 
236         if (availableLocaleList != NULL) {
237             ures_resetIterator(&installed);
238             while(ures_hasNext(&installed)) {
239                 const char *tempKey = NULL;
240                 ures_getNextString(&installed, NULL, &tempKey, &status);
241                 availableLocaleList[i++] = Locale(tempKey);
242             }
243         }
244         U_ASSERT(availableLocaleListCount == i);
245         ures_close(&installed);
246     }
247     ures_close(index);
248     ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
249 }
250 
isAvailableLocaleListInitialized(UErrorCode & status)251 static UBool isAvailableLocaleListInitialized(UErrorCode &status) {
252     umtx_initOnce(gAvailableLocaleListInitOnce, &initAvailableLocaleList, status);
253     return U_SUCCESS(status);
254 }
255 
256 
257 // Collator public methods -----------------------------------------------
258 
259 namespace {
260 
261 static const struct {
262     const char *name;
263     UColAttribute attr;
264 } collAttributes[] = {
265     { "colStrength", UCOL_STRENGTH },
266     { "colBackwards", UCOL_FRENCH_COLLATION },
267     { "colCaseLevel", UCOL_CASE_LEVEL },
268     { "colCaseFirst", UCOL_CASE_FIRST },
269     { "colAlternate", UCOL_ALTERNATE_HANDLING },
270     { "colNormalization", UCOL_NORMALIZATION_MODE },
271     { "colNumeric", UCOL_NUMERIC_COLLATION }
272 };
273 
274 static const struct {
275     const char *name;
276     UColAttributeValue value;
277 } collAttributeValues[] = {
278     { "primary", UCOL_PRIMARY },
279     { "secondary", UCOL_SECONDARY },
280     { "tertiary", UCOL_TERTIARY },
281     { "quaternary", UCOL_QUATERNARY },
282     // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
283     { "identical", UCOL_IDENTICAL },
284     { "no", UCOL_OFF },
285     { "yes", UCOL_ON },
286     { "shifted", UCOL_SHIFTED },
287     { "non-ignorable", UCOL_NON_IGNORABLE },
288     { "lower", UCOL_LOWER_FIRST },
289     { "upper", UCOL_UPPER_FIRST }
290 };
291 
292 static const char *collReorderCodes[UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST] = {
293     "space", "punct", "symbol", "currency", "digit"
294 };
295 
getReorderCode(const char * s)296 int32_t getReorderCode(const char *s) {
297     for (int32_t i = 0; i < UPRV_LENGTHOF(collReorderCodes); ++i) {
298         if (uprv_stricmp(s, collReorderCodes[i]) == 0) {
299             return UCOL_REORDER_CODE_FIRST + i;
300         }
301     }
302     // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
303     // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
304     // Avoid introducing synonyms/aliases.
305     return -1;
306 }
307 
308 /**
309  * Sets collation attributes according to locale keywords. See
310  * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
311  *
312  * Using "alias" keywords and values where defined:
313  * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
314  * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
315  */
setAttributesFromKeywords(const Locale & loc,Collator & coll,UErrorCode & errorCode)316 void setAttributesFromKeywords(const Locale &loc, Collator &coll, UErrorCode &errorCode) {
317     if (U_FAILURE(errorCode)) {
318         return;
319     }
320     if (uprv_strcmp(loc.getName(), loc.getBaseName()) == 0) {
321         // No keywords.
322         return;
323     }
324     char value[1024];  // The reordering value could be long.
325     // Check for collation keywords that were already deprecated
326     // before any were supported in createInstance() (except for "collation").
327     int32_t length = loc.getKeywordValue("colHiraganaQuaternary", value, UPRV_LENGTHOF(value), errorCode);
328     if (U_FAILURE(errorCode)) {
329         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
330         return;
331     }
332     if (length != 0) {
333         errorCode = U_UNSUPPORTED_ERROR;
334         return;
335     }
336     length = loc.getKeywordValue("variableTop", value, UPRV_LENGTHOF(value), errorCode);
337     if (U_FAILURE(errorCode)) {
338         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
339         return;
340     }
341     if (length != 0) {
342         errorCode = U_UNSUPPORTED_ERROR;
343         return;
344     }
345     // Parse known collation keywords, ignore others.
346     if (errorCode == U_STRING_NOT_TERMINATED_WARNING) {
347         errorCode = U_ZERO_ERROR;
348     }
349     for (int32_t i = 0; i < UPRV_LENGTHOF(collAttributes); ++i) {
350         length = loc.getKeywordValue(collAttributes[i].name, value, UPRV_LENGTHOF(value), errorCode);
351         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
352             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
353             return;
354         }
355         if (length == 0) { continue; }
356         for (int32_t j = 0;; ++j) {
357             if (j == UPRV_LENGTHOF(collAttributeValues)) {
358                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
359                 return;
360             }
361             if (uprv_stricmp(value, collAttributeValues[j].name) == 0) {
362                 coll.setAttribute(collAttributes[i].attr, collAttributeValues[j].value, errorCode);
363                 break;
364             }
365         }
366     }
367     length = loc.getKeywordValue("colReorder", value, UPRV_LENGTHOF(value), errorCode);
368     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
369         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
370         return;
371     }
372     if (length != 0) {
373         int32_t codes[USCRIPT_CODE_LIMIT + UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST];
374         int32_t codesLength = 0;
375         char *scriptName = value;
376         for (;;) {
377             if (codesLength == UPRV_LENGTHOF(codes)) {
378                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
379                 return;
380             }
381             char *limit = scriptName;
382             char c;
383             while ((c = *limit) != 0 && c != '-') { ++limit; }
384             *limit = 0;
385             int32_t code;
386             if ((limit - scriptName) == 4) {
387                 // Strict parsing, accept only 4-letter script codes, not long names.
388                 code = u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName);
389             } else {
390                 code = getReorderCode(scriptName);
391             }
392             if (code < 0) {
393                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
394                 return;
395             }
396             codes[codesLength++] = code;
397             if (c == 0) { break; }
398             scriptName = limit + 1;
399         }
400         coll.setReorderCodes(codes, codesLength, errorCode);
401     }
402     length = loc.getKeywordValue("kv", value, UPRV_LENGTHOF(value), errorCode);
403     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
404         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
405         return;
406     }
407     if (length != 0) {
408         int32_t code = getReorderCode(value);
409         if (code < 0) {
410             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
411             return;
412         }
413         coll.setMaxVariable((UColReorderCode)code, errorCode);
414     }
415     if (U_FAILURE(errorCode)) {
416         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
417     }
418 }
419 
420 }  // namespace
421 
createInstance(UErrorCode & success)422 Collator* U_EXPORT2 Collator::createInstance(UErrorCode& success)
423 {
424     return createInstance(Locale::getDefault(), success);
425 }
426 
createInstance(const Locale & desiredLocale,UErrorCode & status)427 Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale,
428                                    UErrorCode& status)
429 {
430     if (U_FAILURE(status))
431         return 0;
432     if (desiredLocale.isBogus()) {
433         // Locale constructed from malformed locale ID or language tag.
434         status = U_ILLEGAL_ARGUMENT_ERROR;
435         return NULL;
436     }
437 
438     Collator* coll;
439 #if !UCONFIG_NO_SERVICE
440     if (hasService()) {
441         Locale actualLoc;
442         coll = (Collator*)gService->get(desiredLocale, &actualLoc, status);
443     } else
444 #endif
445     {
446         coll = makeInstance(desiredLocale, status);
447     }
448     setAttributesFromKeywords(desiredLocale, *coll, status);
449     if (U_FAILURE(status)) {
450         delete coll;
451         return NULL;
452     }
453     return coll;
454 }
455 
456 
makeInstance(const Locale & desiredLocale,UErrorCode & status)457 Collator* Collator::makeInstance(const Locale&  desiredLocale, UErrorCode& status) {
458     const CollationCacheEntry *entry = CollationLoader::loadTailoring(desiredLocale, status);
459     if (U_SUCCESS(status)) {
460         Collator *result = new RuleBasedCollator(entry);
461         if (result != NULL) {
462             // Both the unified cache's get() and the RBC constructor
463             // did addRef(). Undo one of them.
464             entry->removeRef();
465             return result;
466         }
467         status = U_MEMORY_ALLOCATION_ERROR;
468     }
469     if (entry != NULL) {
470         // Undo the addRef() from the cache.get().
471         entry->removeRef();
472     }
473     return NULL;
474 }
475 
476 Collator *
safeClone() const477 Collator::safeClone() const {
478     return clone();
479 }
480 
481 // implement deprecated, previously abstract method
compare(const UnicodeString & source,const UnicodeString & target) const482 Collator::EComparisonResult Collator::compare(const UnicodeString& source,
483                                     const UnicodeString& target) const
484 {
485     UErrorCode ec = U_ZERO_ERROR;
486     return (EComparisonResult)compare(source, target, ec);
487 }
488 
489 // implement deprecated, previously abstract method
compare(const UnicodeString & source,const UnicodeString & target,int32_t length) const490 Collator::EComparisonResult Collator::compare(const UnicodeString& source,
491                                     const UnicodeString& target,
492                                     int32_t length) const
493 {
494     UErrorCode ec = U_ZERO_ERROR;
495     return (EComparisonResult)compare(source, target, length, ec);
496 }
497 
498 // implement deprecated, previously abstract method
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength) const499 Collator::EComparisonResult Collator::compare(const UChar* source, int32_t sourceLength,
500                                     const UChar* target, int32_t targetLength)
501                                     const
502 {
503     UErrorCode ec = U_ZERO_ERROR;
504     return (EComparisonResult)compare(source, sourceLength, target, targetLength, ec);
505 }
506 
compare(UCharIterator &,UCharIterator &,UErrorCode & status) const507 UCollationResult Collator::compare(UCharIterator &/*sIter*/,
508                                    UCharIterator &/*tIter*/,
509                                    UErrorCode &status) const {
510     if(U_SUCCESS(status)) {
511         // Not implemented in the base class.
512         status = U_UNSUPPORTED_ERROR;
513     }
514     return UCOL_EQUAL;
515 }
516 
compareUTF8(const StringPiece & source,const StringPiece & target,UErrorCode & status) const517 UCollationResult Collator::compareUTF8(const StringPiece &source,
518                                        const StringPiece &target,
519                                        UErrorCode &status) const {
520     if(U_FAILURE(status)) {
521         return UCOL_EQUAL;
522     }
523     UCharIterator sIter, tIter;
524     uiter_setUTF8(&sIter, source.data(), source.length());
525     uiter_setUTF8(&tIter, target.data(), target.length());
526     return compare(sIter, tIter, status);
527 }
528 
equals(const UnicodeString & source,const UnicodeString & target) const529 UBool Collator::equals(const UnicodeString& source,
530                        const UnicodeString& target) const
531 {
532     UErrorCode ec = U_ZERO_ERROR;
533     return (compare(source, target, ec) == UCOL_EQUAL);
534 }
535 
greaterOrEqual(const UnicodeString & source,const UnicodeString & target) const536 UBool Collator::greaterOrEqual(const UnicodeString& source,
537                                const UnicodeString& target) const
538 {
539     UErrorCode ec = U_ZERO_ERROR;
540     return (compare(source, target, ec) != UCOL_LESS);
541 }
542 
greater(const UnicodeString & source,const UnicodeString & target) const543 UBool Collator::greater(const UnicodeString& source,
544                         const UnicodeString& target) const
545 {
546     UErrorCode ec = U_ZERO_ERROR;
547     return (compare(source, target, ec) == UCOL_GREATER);
548 }
549 
550 // this API  ignores registered collators, since it returns an
551 // array of indefinite lifetime
getAvailableLocales(int32_t & count)552 const Locale* U_EXPORT2 Collator::getAvailableLocales(int32_t& count)
553 {
554     UErrorCode status = U_ZERO_ERROR;
555     Locale *result = NULL;
556     count = 0;
557     if (isAvailableLocaleListInitialized(status))
558     {
559         result = availableLocaleList;
560         count = availableLocaleListCount;
561     }
562     return result;
563 }
564 
getDisplayName(const Locale & objectLocale,const Locale & displayLocale,UnicodeString & name)565 UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
566                                         const Locale& displayLocale,
567                                         UnicodeString& name)
568 {
569 #if !UCONFIG_NO_SERVICE
570     if (hasService()) {
571         UnicodeString locNameStr;
572         LocaleUtility::initNameFromLocale(objectLocale, locNameStr);
573         return gService->getDisplayName(locNameStr, name, displayLocale);
574     }
575 #endif
576     return objectLocale.getDisplayName(displayLocale, name);
577 }
578 
getDisplayName(const Locale & objectLocale,UnicodeString & name)579 UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
580                                         UnicodeString& name)
581 {
582     return getDisplayName(objectLocale, Locale::getDefault(), name);
583 }
584 
585 /* This is useless information */
586 /*void Collator::getVersion(UVersionInfo versionInfo) const
587 {
588   if (versionInfo!=NULL)
589     uprv_memcpy(versionInfo, fVersion, U_MAX_VERSION_LENGTH);
590 }
591 */
592 
593 // UCollator protected constructor destructor ----------------------------
594 
595 /**
596 * Default constructor.
597 * Constructor is different from the old default Collator constructor.
598 * The task for determing the default collation strength and normalization mode
599 * is left to the child class.
600 */
Collator()601 Collator::Collator()
602 : UObject()
603 {
604 }
605 
606 /**
607 * Constructor.
608 * Empty constructor, does not handle the arguments.
609 * This constructor is done for backward compatibility with 1.7 and 1.8.
610 * The task for handling the argument collation strength and normalization
611 * mode is left to the child class.
612 * @param collationStrength collation strength
613 * @param decompositionMode
614 * @deprecated 2.4 use the default constructor instead
615 */
Collator(UCollationStrength,UNormalizationMode)616 Collator::Collator(UCollationStrength, UNormalizationMode )
617 : UObject()
618 {
619 }
620 
~Collator()621 Collator::~Collator()
622 {
623 }
624 
Collator(const Collator & other)625 Collator::Collator(const Collator &other)
626     : UObject(other)
627 {
628 }
629 
operator ==(const Collator & other) const630 UBool Collator::operator==(const Collator& other) const
631 {
632     // Subclasses: Call this method and then add more specific checks.
633     return typeid(*this) == typeid(other);
634 }
635 
operator !=(const Collator & other) const636 UBool Collator::operator!=(const Collator& other) const
637 {
638     return (UBool)!(*this == other);
639 }
640 
getBound(const uint8_t * source,int32_t sourceLength,UColBoundMode boundType,uint32_t noOfLevels,uint8_t * result,int32_t resultLength,UErrorCode & status)641 int32_t U_EXPORT2 Collator::getBound(const uint8_t       *source,
642                            int32_t             sourceLength,
643                            UColBoundMode       boundType,
644                            uint32_t            noOfLevels,
645                            uint8_t             *result,
646                            int32_t             resultLength,
647                            UErrorCode          &status)
648 {
649     return ucol_getBound(source, sourceLength, boundType, noOfLevels, result, resultLength, &status);
650 }
651 
652 void
setLocales(const Locale &,const Locale &,const Locale &)653 Collator::setLocales(const Locale& /* requestedLocale */, const Locale& /* validLocale */, const Locale& /*actualLocale*/) {
654 }
655 
getTailoredSet(UErrorCode & status) const656 UnicodeSet *Collator::getTailoredSet(UErrorCode &status) const
657 {
658     if(U_FAILURE(status)) {
659         return NULL;
660     }
661     // everything can be changed
662     return new UnicodeSet(0, 0x10FFFF);
663 }
664 
665 // -------------------------------------
666 
667 #if !UCONFIG_NO_SERVICE
668 URegistryKey U_EXPORT2
registerInstance(Collator * toAdopt,const Locale & locale,UErrorCode & status)669 Collator::registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status)
670 {
671     if (U_SUCCESS(status)) {
672         // Set the collator locales while registering so that createInstance()
673         // need not guess whether the collator's locales are already set properly
674         // (as they are by the data loader).
675         toAdopt->setLocales(locale, locale, locale);
676         return getService()->registerInstance(toAdopt, locale, status);
677     }
678     return NULL;
679 }
680 
681 // -------------------------------------
682 
683 class CFactory : public LocaleKeyFactory {
684 private:
685     CollatorFactory* _delegate;
686     Hashtable* _ids;
687 
688 public:
CFactory(CollatorFactory * delegate,UErrorCode & status)689     CFactory(CollatorFactory* delegate, UErrorCode& status)
690         : LocaleKeyFactory(delegate->visible() ? VISIBLE : INVISIBLE)
691         , _delegate(delegate)
692         , _ids(NULL)
693     {
694         if (U_SUCCESS(status)) {
695             int32_t count = 0;
696             _ids = new Hashtable(status);
697             if (_ids) {
698                 const UnicodeString * idlist = _delegate->getSupportedIDs(count, status);
699                 for (int i = 0; i < count; ++i) {
700                     _ids->put(idlist[i], (void*)this, status);
701                     if (U_FAILURE(status)) {
702                         delete _ids;
703                         _ids = NULL;
704                         return;
705                     }
706                 }
707             } else {
708                 status = U_MEMORY_ALLOCATION_ERROR;
709             }
710         }
711     }
712 
713     virtual ~CFactory();
714 
715     virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
716 
717 protected:
getSupportedIDs(UErrorCode & status) const718     virtual const Hashtable* getSupportedIDs(UErrorCode& status) const
719     {
720         if (U_SUCCESS(status)) {
721             return _ids;
722         }
723         return NULL;
724     }
725 
726     virtual UnicodeString&
727         getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
728 };
729 
~CFactory()730 CFactory::~CFactory()
731 {
732     delete _delegate;
733     delete _ids;
734 }
735 
736 UObject*
create(const ICUServiceKey & key,const ICUService *,UErrorCode & status) const737 CFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const
738 {
739     if (handlesKey(key, status)) {
740         const LocaleKey& lkey = (const LocaleKey&)key;
741         Locale validLoc;
742         lkey.currentLocale(validLoc);
743         return _delegate->createCollator(validLoc);
744     }
745     return NULL;
746 }
747 
748 UnicodeString&
getDisplayName(const UnicodeString & id,const Locale & locale,UnicodeString & result) const749 CFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const
750 {
751     if ((_coverage & 0x1) == 0) {
752         UErrorCode status = U_ZERO_ERROR;
753         const Hashtable* ids = getSupportedIDs(status);
754         if (ids && (ids->get(id) != NULL)) {
755             Locale loc;
756             LocaleUtility::initLocaleFromName(id, loc);
757             return _delegate->getDisplayName(loc, locale, result);
758         }
759     }
760     result.setToBogus();
761     return result;
762 }
763 
764 URegistryKey U_EXPORT2
registerFactory(CollatorFactory * toAdopt,UErrorCode & status)765 Collator::registerFactory(CollatorFactory* toAdopt, UErrorCode& status)
766 {
767     if (U_SUCCESS(status)) {
768         CFactory* f = new CFactory(toAdopt, status);
769         if (f) {
770             return getService()->registerFactory(f, status);
771         }
772         status = U_MEMORY_ALLOCATION_ERROR;
773     }
774     return NULL;
775 }
776 
777 // -------------------------------------
778 
779 UBool U_EXPORT2
unregister(URegistryKey key,UErrorCode & status)780 Collator::unregister(URegistryKey key, UErrorCode& status)
781 {
782     if (U_SUCCESS(status)) {
783         if (hasService()) {
784             return gService->unregister(key, status);
785         }
786         status = U_ILLEGAL_ARGUMENT_ERROR;
787     }
788     return FALSE;
789 }
790 #endif /* UCONFIG_NO_SERVICE */
791 
792 class CollationLocaleListEnumeration : public StringEnumeration {
793 private:
794     int32_t index;
795 public:
796     static UClassID U_EXPORT2 getStaticClassID(void);
797     virtual UClassID getDynamicClassID(void) const;
798 public:
CollationLocaleListEnumeration()799     CollationLocaleListEnumeration()
800         : index(0)
801     {
802         // The global variables should already be initialized.
803         //isAvailableLocaleListInitialized(status);
804     }
805 
806     virtual ~CollationLocaleListEnumeration();
807 
clone() const808     virtual StringEnumeration * clone() const
809     {
810         CollationLocaleListEnumeration *result = new CollationLocaleListEnumeration();
811         if (result) {
812             result->index = index;
813         }
814         return result;
815     }
816 
count(UErrorCode &) const817     virtual int32_t count(UErrorCode &/*status*/) const {
818         return availableLocaleListCount;
819     }
820 
next(int32_t * resultLength,UErrorCode &)821     virtual const char* next(int32_t* resultLength, UErrorCode& /*status*/) {
822         const char* result;
823         if(index < availableLocaleListCount) {
824             result = availableLocaleList[index++].getName();
825             if(resultLength != NULL) {
826                 *resultLength = (int32_t)uprv_strlen(result);
827             }
828         } else {
829             if(resultLength != NULL) {
830                 *resultLength = 0;
831             }
832             result = NULL;
833         }
834         return result;
835     }
836 
snext(UErrorCode & status)837     virtual const UnicodeString* snext(UErrorCode& status) {
838         int32_t resultLength = 0;
839         const char *s = next(&resultLength, status);
840         return setChars(s, resultLength, status);
841     }
842 
reset(UErrorCode &)843     virtual void reset(UErrorCode& /*status*/) {
844         index = 0;
845     }
846 };
847 
~CollationLocaleListEnumeration()848 CollationLocaleListEnumeration::~CollationLocaleListEnumeration() {}
849 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration)850 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration)
851 
852 
853 // -------------------------------------
854 
855 StringEnumeration* U_EXPORT2
856 Collator::getAvailableLocales(void)
857 {
858 #if !UCONFIG_NO_SERVICE
859     if (hasService()) {
860         return getService()->getAvailableLocales();
861     }
862 #endif /* UCONFIG_NO_SERVICE */
863     UErrorCode status = U_ZERO_ERROR;
864     if (isAvailableLocaleListInitialized(status)) {
865         return new CollationLocaleListEnumeration();
866     }
867     return NULL;
868 }
869 
870 StringEnumeration* U_EXPORT2
getKeywords(UErrorCode & status)871 Collator::getKeywords(UErrorCode& status) {
872     return UStringEnumeration::fromUEnumeration(
873             ucol_getKeywords(&status), status);
874 }
875 
876 StringEnumeration* U_EXPORT2
getKeywordValues(const char * keyword,UErrorCode & status)877 Collator::getKeywordValues(const char *keyword, UErrorCode& status) {
878     return UStringEnumeration::fromUEnumeration(
879             ucol_getKeywordValues(keyword, &status), status);
880 }
881 
882 StringEnumeration* U_EXPORT2
getKeywordValuesForLocale(const char * key,const Locale & locale,UBool commonlyUsed,UErrorCode & status)883 Collator::getKeywordValuesForLocale(const char* key, const Locale& locale,
884                                     UBool commonlyUsed, UErrorCode& status) {
885     return UStringEnumeration::fromUEnumeration(
886             ucol_getKeywordValuesForLocale(
887                     key, locale.getName(), commonlyUsed, &status),
888             status);
889 }
890 
891 Locale U_EXPORT2
getFunctionalEquivalent(const char * keyword,const Locale & locale,UBool & isAvailable,UErrorCode & status)892 Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
893                                   UBool& isAvailable, UErrorCode& status) {
894     // This is a wrapper over ucol_getFunctionalEquivalent
895     char loc[ULOC_FULLNAME_CAPACITY];
896     /*int32_t len =*/ ucol_getFunctionalEquivalent(loc, sizeof(loc),
897                     keyword, locale.getName(), &isAvailable, &status);
898     if (U_FAILURE(status)) {
899         *loc = 0; // root
900     }
901     return Locale::createFromName(loc);
902 }
903 
904 Collator::ECollationStrength
getStrength(void) const905 Collator::getStrength(void) const {
906     UErrorCode intStatus = U_ZERO_ERROR;
907     return (ECollationStrength)getAttribute(UCOL_STRENGTH, intStatus);
908 }
909 
910 void
setStrength(ECollationStrength newStrength)911 Collator::setStrength(ECollationStrength newStrength) {
912     UErrorCode intStatus = U_ZERO_ERROR;
913     setAttribute(UCOL_STRENGTH, (UColAttributeValue)newStrength, intStatus);
914 }
915 
916 Collator &
setMaxVariable(UColReorderCode,UErrorCode & errorCode)917 Collator::setMaxVariable(UColReorderCode /*group*/, UErrorCode &errorCode) {
918     if (U_SUCCESS(errorCode)) {
919         errorCode = U_UNSUPPORTED_ERROR;
920     }
921     return *this;
922 }
923 
924 UColReorderCode
getMaxVariable() const925 Collator::getMaxVariable() const {
926     return UCOL_REORDER_CODE_PUNCTUATION;
927 }
928 
929 int32_t
getReorderCodes(int32_t *,int32_t,UErrorCode & status) const930 Collator::getReorderCodes(int32_t* /* dest*/,
931                           int32_t /* destCapacity*/,
932                           UErrorCode& status) const
933 {
934     if (U_SUCCESS(status)) {
935         status = U_UNSUPPORTED_ERROR;
936     }
937     return 0;
938 }
939 
940 void
setReorderCodes(const int32_t *,int32_t,UErrorCode & status)941 Collator::setReorderCodes(const int32_t* /* reorderCodes */,
942                           int32_t /* reorderCodesLength */,
943                           UErrorCode& status)
944 {
945     if (U_SUCCESS(status)) {
946         status = U_UNSUPPORTED_ERROR;
947     }
948 }
949 
950 int32_t
getEquivalentReorderCodes(int32_t reorderCode,int32_t * dest,int32_t capacity,UErrorCode & errorCode)951 Collator::getEquivalentReorderCodes(int32_t reorderCode,
952                                     int32_t *dest, int32_t capacity,
953                                     UErrorCode &errorCode) {
954     if(U_FAILURE(errorCode)) { return 0; }
955     if(capacity < 0 || (dest == NULL && capacity > 0)) {
956         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
957         return 0;
958     }
959     const CollationData *baseData = CollationRoot::getData(errorCode);
960     if(U_FAILURE(errorCode)) { return 0; }
961     return baseData->getEquivalentScripts(reorderCode, dest, capacity, errorCode);
962 }
963 
964 int32_t
internalGetShortDefinitionString(const char *,char *,int32_t,UErrorCode & status) const965 Collator::internalGetShortDefinitionString(const char * /*locale*/,
966                                                              char * /*buffer*/,
967                                                              int32_t /*capacity*/,
968                                                              UErrorCode &status) const {
969   if(U_SUCCESS(status)) {
970     status = U_UNSUPPORTED_ERROR; /* Shouldn't happen, internal function */
971   }
972   return 0;
973 }
974 
975 UCollationResult
internalCompareUTF8(const char * left,int32_t leftLength,const char * right,int32_t rightLength,UErrorCode & errorCode) const976 Collator::internalCompareUTF8(const char *left, int32_t leftLength,
977                               const char *right, int32_t rightLength,
978                               UErrorCode &errorCode) const {
979     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
980     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
981         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
982         return UCOL_EQUAL;
983     }
984     return compareUTF8(
985             StringPiece(left, (leftLength < 0) ? uprv_strlen(left) : leftLength),
986             StringPiece(right, (rightLength < 0) ? uprv_strlen(right) : rightLength),
987             errorCode);
988 }
989 
990 int32_t
internalNextSortKeyPart(UCharIterator *,uint32_t[2],uint8_t *,int32_t,UErrorCode & errorCode) const991 Collator::internalNextSortKeyPart(UCharIterator * /*iter*/, uint32_t /*state*/[2],
992                                   uint8_t * /*dest*/, int32_t /*count*/, UErrorCode &errorCode) const {
993     if (U_SUCCESS(errorCode)) {
994         errorCode = U_UNSUPPORTED_ERROR;
995     }
996     return 0;
997 }
998 
999 // UCollator private data members ----------------------------------------
1000 
1001 /* This is useless information */
1002 /*const UVersionInfo Collator::fVersion = {1, 1, 0, 0};*/
1003 
1004 // -------------------------------------
1005 
1006 U_NAMESPACE_END
1007 
1008 #endif /* #if !UCONFIG_NO_COLLATION */
1009 
1010 /* eof */
1011