1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1996-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
8 */
9
10 /**
11 * File coll.cpp
12 *
13 * Created by: Helena Shih
14 *
15 * Modification History:
16 *
17 * Date Name Description
18 * 2/5/97 aliu Modified createDefault to load collation data from
19 * binary files when possible. Added related methods
20 * createCollationFromFile, chopLocale, createPathName.
21 * 2/11/97 aliu Added methods addToCache, findInCache, which implement
22 * a Collation cache. Modified createDefault to look in
23 * cache first, and also to store newly created Collation
24 * objects in the cache. Modified to not use gLocPath.
25 * 2/12/97 aliu Modified to create objects from RuleBasedCollator cache.
26 * Moved cache out of Collation class.
27 * 2/13/97 aliu Moved several methods out of this class and into
28 * RuleBasedCollator, with modifications. Modified
29 * createDefault() to call new RuleBasedCollator(Locale&)
30 * constructor. General clean up and documentation.
31 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
32 * constructor.
33 * 05/06/97 helena Added memory allocation error detection.
34 * 05/08/97 helena Added createInstance().
35 * 6/20/97 helena Java class name change.
36 * 04/23/99 stephen Removed EDecompositionMode, merged with
37 * Normalizer::EMode
38 * 11/23/9 srl Inlining of some critical functions
39 * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
40 * 2012-2014 markus Rewritten in C++ again.
41 */
42
43 #include "utypeinfo.h" // for 'typeid' to work
44
45 #include "unicode/utypes.h"
46
47 #if !UCONFIG_NO_COLLATION
48
49 #include "unicode/coll.h"
50 #include "unicode/tblcoll.h"
51 #include "collationdata.h"
52 #include "collationroot.h"
53 #include "collationtailoring.h"
54 #include "ucol_imp.h"
55 #include "cstring.h"
56 #include "cmemory.h"
57 #include "umutex.h"
58 #include "servloc.h"
59 #include "uassert.h"
60 #include "ustrenum.h"
61 #include "uresimp.h"
62 #include "ucln_in.h"
63
64 static icu::Locale* availableLocaleList = NULL;
65 static int32_t availableLocaleListCount;
66 #if !UCONFIG_NO_SERVICE
67 static icu::ICULocaleService* gService = NULL;
68 static icu::UInitOnce gServiceInitOnce = U_INITONCE_INITIALIZER;
69 #endif
70 static icu::UInitOnce gAvailableLocaleListInitOnce;
71
72 /**
73 * Release all static memory held by collator.
74 */
75 U_CDECL_BEGIN
collator_cleanup(void)76 static UBool U_CALLCONV collator_cleanup(void) {
77 #if !UCONFIG_NO_SERVICE
78 if (gService) {
79 delete gService;
80 gService = NULL;
81 }
82 gServiceInitOnce.reset();
83 #endif
84 if (availableLocaleList) {
85 delete []availableLocaleList;
86 availableLocaleList = NULL;
87 }
88 availableLocaleListCount = 0;
89 gAvailableLocaleListInitOnce.reset();
90 return TRUE;
91 }
92
93 U_CDECL_END
94
95 U_NAMESPACE_BEGIN
96
97 #if !UCONFIG_NO_SERVICE
98
99 // ------------------------------------------
100 //
101 // Registration
102 //
103
104 //-------------------------------------------
105
~CollatorFactory()106 CollatorFactory::~CollatorFactory() {}
107
108 //-------------------------------------------
109
110 UBool
visible(void) const111 CollatorFactory::visible(void) const {
112 return TRUE;
113 }
114
115 //-------------------------------------------
116
117 UnicodeString&
getDisplayName(const Locale & objectLocale,const Locale & displayLocale,UnicodeString & result)118 CollatorFactory::getDisplayName(const Locale& objectLocale,
119 const Locale& displayLocale,
120 UnicodeString& result)
121 {
122 return objectLocale.getDisplayName(displayLocale, result);
123 }
124
125 // -------------------------------------
126
127 class ICUCollatorFactory : public ICUResourceBundleFactory {
128 public:
ICUCollatorFactory()129 ICUCollatorFactory() : ICUResourceBundleFactory(UnicodeString(U_ICUDATA_COLL, -1, US_INV)) { }
130 virtual ~ICUCollatorFactory();
131 protected:
132 virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
133 };
134
~ICUCollatorFactory()135 ICUCollatorFactory::~ICUCollatorFactory() {}
136
137 UObject*
create(const ICUServiceKey & key,const ICUService *,UErrorCode & status) const138 ICUCollatorFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const {
139 if (handlesKey(key, status)) {
140 const LocaleKey& lkey = (const LocaleKey&)key;
141 Locale loc;
142 // make sure the requested locale is correct
143 // default LocaleFactory uses currentLocale since that's the one vetted by handlesKey
144 // but for ICU rb resources we use the actual one since it will fallback again
145 lkey.canonicalLocale(loc);
146
147 return Collator::makeInstance(loc, status);
148 }
149 return NULL;
150 }
151
152 // -------------------------------------
153
154 class ICUCollatorService : public ICULocaleService {
155 public:
ICUCollatorService()156 ICUCollatorService()
157 : ICULocaleService(UNICODE_STRING_SIMPLE("Collator"))
158 {
159 UErrorCode status = U_ZERO_ERROR;
160 registerFactory(new ICUCollatorFactory(), status);
161 }
162
163 virtual ~ICUCollatorService();
164
cloneInstance(UObject * instance) const165 virtual UObject* cloneInstance(UObject* instance) const {
166 return ((Collator*)instance)->clone();
167 }
168
handleDefault(const ICUServiceKey & key,UnicodeString * actualID,UErrorCode & status) const169 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualID, UErrorCode& status) const {
170 LocaleKey& lkey = (LocaleKey&)key;
171 if (actualID) {
172 // Ugly Hack Alert! We return an empty actualID to signal
173 // to callers that this is a default object, not a "real"
174 // service-created object. (TODO remove in 3.0) [aliu]
175 actualID->truncate(0);
176 }
177 Locale loc("");
178 lkey.canonicalLocale(loc);
179 return Collator::makeInstance(loc, status);
180 }
181
getKey(ICUServiceKey & key,UnicodeString * actualReturn,UErrorCode & status) const182 virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const {
183 UnicodeString ar;
184 if (actualReturn == NULL) {
185 actualReturn = &ar;
186 }
187 return (Collator*)ICULocaleService::getKey(key, actualReturn, status);
188 }
189
isDefault() const190 virtual UBool isDefault() const {
191 return countFactories() == 1;
192 }
193 };
194
~ICUCollatorService()195 ICUCollatorService::~ICUCollatorService() {}
196
197 // -------------------------------------
198
initService()199 static void U_CALLCONV initService() {
200 gService = new ICUCollatorService();
201 ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
202 }
203
204
205 static ICULocaleService*
getService(void)206 getService(void)
207 {
208 umtx_initOnce(gServiceInitOnce, &initService);
209 return gService;
210 }
211
212 // -------------------------------------
213
214 static inline UBool
hasService(void)215 hasService(void)
216 {
217 UBool retVal = !gServiceInitOnce.isReset() && (getService() != NULL);
218 return retVal;
219 }
220
221 #endif /* UCONFIG_NO_SERVICE */
222
223 static void U_CALLCONV
initAvailableLocaleList(UErrorCode & status)224 initAvailableLocaleList(UErrorCode &status) {
225 U_ASSERT(availableLocaleListCount == 0);
226 U_ASSERT(availableLocaleList == NULL);
227 // for now, there is a hardcoded list, so just walk through that list and set it up.
228 UResourceBundle *index = NULL;
229 UResourceBundle installed;
230 int32_t i = 0;
231
232 ures_initStackObject(&installed);
233 index = ures_openDirect(U_ICUDATA_COLL, "res_index", &status);
234 ures_getByKey(index, "InstalledLocales", &installed, &status);
235
236 if(U_SUCCESS(status)) {
237 availableLocaleListCount = ures_getSize(&installed);
238 availableLocaleList = new Locale[availableLocaleListCount];
239
240 if (availableLocaleList != NULL) {
241 ures_resetIterator(&installed);
242 while(ures_hasNext(&installed)) {
243 const char *tempKey = NULL;
244 ures_getNextString(&installed, NULL, &tempKey, &status);
245 availableLocaleList[i++] = Locale(tempKey);
246 }
247 }
248 U_ASSERT(availableLocaleListCount == i);
249 ures_close(&installed);
250 }
251 ures_close(index);
252 ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
253 }
254
isAvailableLocaleListInitialized(UErrorCode & status)255 static UBool isAvailableLocaleListInitialized(UErrorCode &status) {
256 umtx_initOnce(gAvailableLocaleListInitOnce, &initAvailableLocaleList, status);
257 return U_SUCCESS(status);
258 }
259
260
261 // Collator public methods -----------------------------------------------
262
263 namespace {
264
265 static const struct {
266 const char *name;
267 UColAttribute attr;
268 } collAttributes[] = {
269 { "colStrength", UCOL_STRENGTH },
270 { "colBackwards", UCOL_FRENCH_COLLATION },
271 { "colCaseLevel", UCOL_CASE_LEVEL },
272 { "colCaseFirst", UCOL_CASE_FIRST },
273 { "colAlternate", UCOL_ALTERNATE_HANDLING },
274 { "colNormalization", UCOL_NORMALIZATION_MODE },
275 { "colNumeric", UCOL_NUMERIC_COLLATION }
276 };
277
278 static const struct {
279 const char *name;
280 UColAttributeValue value;
281 } collAttributeValues[] = {
282 { "primary", UCOL_PRIMARY },
283 { "secondary", UCOL_SECONDARY },
284 { "tertiary", UCOL_TERTIARY },
285 { "quaternary", UCOL_QUATERNARY },
286 // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
287 { "identical", UCOL_IDENTICAL },
288 { "no", UCOL_OFF },
289 { "yes", UCOL_ON },
290 { "shifted", UCOL_SHIFTED },
291 { "non-ignorable", UCOL_NON_IGNORABLE },
292 { "lower", UCOL_LOWER_FIRST },
293 { "upper", UCOL_UPPER_FIRST }
294 };
295
296 static const char *collReorderCodes[UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST] = {
297 "space", "punct", "symbol", "currency", "digit"
298 };
299
getReorderCode(const char * s)300 int32_t getReorderCode(const char *s) {
301 for (int32_t i = 0; i < UPRV_LENGTHOF(collReorderCodes); ++i) {
302 if (uprv_stricmp(s, collReorderCodes[i]) == 0) {
303 return UCOL_REORDER_CODE_FIRST + i;
304 }
305 }
306 // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
307 // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
308 // Avoid introducing synonyms/aliases.
309 return -1;
310 }
311
312 /**
313 * Sets collation attributes according to locale keywords. See
314 * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
315 *
316 * Using "alias" keywords and values where defined:
317 * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
318 * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
319 */
setAttributesFromKeywords(const Locale & loc,Collator & coll,UErrorCode & errorCode)320 void setAttributesFromKeywords(const Locale &loc, Collator &coll, UErrorCode &errorCode) {
321 if (U_FAILURE(errorCode)) {
322 return;
323 }
324 if (uprv_strcmp(loc.getName(), loc.getBaseName()) == 0) {
325 // No keywords.
326 return;
327 }
328 char value[1024]; // The reordering value could be long.
329 // Check for collation keywords that were already deprecated
330 // before any were supported in createInstance() (except for "collation").
331 int32_t length = loc.getKeywordValue("colHiraganaQuaternary", value, UPRV_LENGTHOF(value), errorCode);
332 if (U_FAILURE(errorCode)) {
333 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
334 return;
335 }
336 if (length != 0) {
337 errorCode = U_UNSUPPORTED_ERROR;
338 return;
339 }
340 length = loc.getKeywordValue("variableTop", value, UPRV_LENGTHOF(value), errorCode);
341 if (U_FAILURE(errorCode)) {
342 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
343 return;
344 }
345 if (length != 0) {
346 errorCode = U_UNSUPPORTED_ERROR;
347 return;
348 }
349 // Parse known collation keywords, ignore others.
350 if (errorCode == U_STRING_NOT_TERMINATED_WARNING) {
351 errorCode = U_ZERO_ERROR;
352 }
353 for (int32_t i = 0; i < UPRV_LENGTHOF(collAttributes); ++i) {
354 length = loc.getKeywordValue(collAttributes[i].name, value, UPRV_LENGTHOF(value), errorCode);
355 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
356 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
357 return;
358 }
359 if (length == 0) { continue; }
360 for (int32_t j = 0;; ++j) {
361 if (j == UPRV_LENGTHOF(collAttributeValues)) {
362 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
363 return;
364 }
365 if (uprv_stricmp(value, collAttributeValues[j].name) == 0) {
366 coll.setAttribute(collAttributes[i].attr, collAttributeValues[j].value, errorCode);
367 break;
368 }
369 }
370 }
371 length = loc.getKeywordValue("colReorder", value, UPRV_LENGTHOF(value), errorCode);
372 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
373 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
374 return;
375 }
376 if (length != 0) {
377 int32_t codes[USCRIPT_CODE_LIMIT + UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST];
378 int32_t codesLength = 0;
379 char *scriptName = value;
380 for (;;) {
381 if (codesLength == UPRV_LENGTHOF(codes)) {
382 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
383 return;
384 }
385 char *limit = scriptName;
386 char c;
387 while ((c = *limit) != 0 && c != '-') { ++limit; }
388 *limit = 0;
389 int32_t code;
390 if ((limit - scriptName) == 4) {
391 // Strict parsing, accept only 4-letter script codes, not long names.
392 code = u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName);
393 } else {
394 code = getReorderCode(scriptName);
395 }
396 if (code < 0) {
397 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
398 return;
399 }
400 codes[codesLength++] = code;
401 if (c == 0) { break; }
402 scriptName = limit + 1;
403 }
404 coll.setReorderCodes(codes, codesLength, errorCode);
405 }
406 length = loc.getKeywordValue("kv", value, UPRV_LENGTHOF(value), errorCode);
407 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
408 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
409 return;
410 }
411 if (length != 0) {
412 int32_t code = getReorderCode(value);
413 if (code < 0) {
414 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
415 return;
416 }
417 coll.setMaxVariable((UColReorderCode)code, errorCode);
418 }
419 if (U_FAILURE(errorCode)) {
420 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
421 }
422 }
423
424 } // namespace
425
createInstance(UErrorCode & success)426 Collator* U_EXPORT2 Collator::createInstance(UErrorCode& success)
427 {
428 return createInstance(Locale::getDefault(), success);
429 }
430
createInstance(const Locale & desiredLocale,UErrorCode & status)431 Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale,
432 UErrorCode& status)
433 {
434 if (U_FAILURE(status))
435 return 0;
436 if (desiredLocale.isBogus()) {
437 // Locale constructed from malformed locale ID or language tag.
438 status = U_ILLEGAL_ARGUMENT_ERROR;
439 return NULL;
440 }
441
442 Collator* coll;
443 #if !UCONFIG_NO_SERVICE
444 if (hasService()) {
445 Locale actualLoc;
446 coll = (Collator*)gService->get(desiredLocale, &actualLoc, status);
447 } else
448 #endif
449 {
450 coll = makeInstance(desiredLocale, status);
451 // Either returns NULL with U_FAILURE(status), or non-NULL with U_SUCCESS(status)
452 }
453 // The use of *coll in setAttributesFromKeywords can cause the NULL check to be
454 // optimized out of the delete even though setAttributesFromKeywords returns
455 // immediately if U_FAILURE(status), so we add a check here.
456 if (U_FAILURE(status)) {
457 return NULL;
458 }
459 setAttributesFromKeywords(desiredLocale, *coll, status);
460 if (U_FAILURE(status)) {
461 delete coll;
462 return NULL;
463 }
464 return coll;
465 }
466
467
makeInstance(const Locale & desiredLocale,UErrorCode & status)468 Collator* Collator::makeInstance(const Locale& desiredLocale, UErrorCode& status) {
469 const CollationCacheEntry *entry = CollationLoader::loadTailoring(desiredLocale, status);
470 if (U_SUCCESS(status)) {
471 Collator *result = new RuleBasedCollator(entry);
472 if (result != NULL) {
473 // Both the unified cache's get() and the RBC constructor
474 // did addRef(). Undo one of them.
475 entry->removeRef();
476 return result;
477 }
478 status = U_MEMORY_ALLOCATION_ERROR;
479 }
480 if (entry != NULL) {
481 // Undo the addRef() from the cache.get().
482 entry->removeRef();
483 }
484 return NULL;
485 }
486
487 Collator *
safeClone() const488 Collator::safeClone() const {
489 return clone();
490 }
491
492 // implement deprecated, previously abstract method
compare(const UnicodeString & source,const UnicodeString & target) const493 Collator::EComparisonResult Collator::compare(const UnicodeString& source,
494 const UnicodeString& target) const
495 {
496 UErrorCode ec = U_ZERO_ERROR;
497 return (EComparisonResult)compare(source, target, ec);
498 }
499
500 // implement deprecated, previously abstract method
compare(const UnicodeString & source,const UnicodeString & target,int32_t length) const501 Collator::EComparisonResult Collator::compare(const UnicodeString& source,
502 const UnicodeString& target,
503 int32_t length) const
504 {
505 UErrorCode ec = U_ZERO_ERROR;
506 return (EComparisonResult)compare(source, target, length, ec);
507 }
508
509 // implement deprecated, previously abstract method
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength) const510 Collator::EComparisonResult Collator::compare(const UChar* source, int32_t sourceLength,
511 const UChar* target, int32_t targetLength)
512 const
513 {
514 UErrorCode ec = U_ZERO_ERROR;
515 return (EComparisonResult)compare(source, sourceLength, target, targetLength, ec);
516 }
517
compare(UCharIterator &,UCharIterator &,UErrorCode & status) const518 UCollationResult Collator::compare(UCharIterator &/*sIter*/,
519 UCharIterator &/*tIter*/,
520 UErrorCode &status) const {
521 if(U_SUCCESS(status)) {
522 // Not implemented in the base class.
523 status = U_UNSUPPORTED_ERROR;
524 }
525 return UCOL_EQUAL;
526 }
527
compareUTF8(const StringPiece & source,const StringPiece & target,UErrorCode & status) const528 UCollationResult Collator::compareUTF8(const StringPiece &source,
529 const StringPiece &target,
530 UErrorCode &status) const {
531 if(U_FAILURE(status)) {
532 return UCOL_EQUAL;
533 }
534 UCharIterator sIter, tIter;
535 uiter_setUTF8(&sIter, source.data(), source.length());
536 uiter_setUTF8(&tIter, target.data(), target.length());
537 return compare(sIter, tIter, status);
538 }
539
equals(const UnicodeString & source,const UnicodeString & target) const540 UBool Collator::equals(const UnicodeString& source,
541 const UnicodeString& target) const
542 {
543 UErrorCode ec = U_ZERO_ERROR;
544 return (compare(source, target, ec) == UCOL_EQUAL);
545 }
546
greaterOrEqual(const UnicodeString & source,const UnicodeString & target) const547 UBool Collator::greaterOrEqual(const UnicodeString& source,
548 const UnicodeString& target) const
549 {
550 UErrorCode ec = U_ZERO_ERROR;
551 return (compare(source, target, ec) != UCOL_LESS);
552 }
553
greater(const UnicodeString & source,const UnicodeString & target) const554 UBool Collator::greater(const UnicodeString& source,
555 const UnicodeString& target) const
556 {
557 UErrorCode ec = U_ZERO_ERROR;
558 return (compare(source, target, ec) == UCOL_GREATER);
559 }
560
561 // this API ignores registered collators, since it returns an
562 // array of indefinite lifetime
getAvailableLocales(int32_t & count)563 const Locale* U_EXPORT2 Collator::getAvailableLocales(int32_t& count)
564 {
565 UErrorCode status = U_ZERO_ERROR;
566 Locale *result = NULL;
567 count = 0;
568 if (isAvailableLocaleListInitialized(status))
569 {
570 result = availableLocaleList;
571 count = availableLocaleListCount;
572 }
573 return result;
574 }
575
getDisplayName(const Locale & objectLocale,const Locale & displayLocale,UnicodeString & name)576 UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
577 const Locale& displayLocale,
578 UnicodeString& name)
579 {
580 #if !UCONFIG_NO_SERVICE
581 if (hasService()) {
582 UnicodeString locNameStr;
583 LocaleUtility::initNameFromLocale(objectLocale, locNameStr);
584 return gService->getDisplayName(locNameStr, name, displayLocale);
585 }
586 #endif
587 return objectLocale.getDisplayName(displayLocale, name);
588 }
589
getDisplayName(const Locale & objectLocale,UnicodeString & name)590 UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
591 UnicodeString& name)
592 {
593 return getDisplayName(objectLocale, Locale::getDefault(), name);
594 }
595
596 /* This is useless information */
597 /*void Collator::getVersion(UVersionInfo versionInfo) const
598 {
599 if (versionInfo!=NULL)
600 uprv_memcpy(versionInfo, fVersion, U_MAX_VERSION_LENGTH);
601 }
602 */
603
604 // UCollator protected constructor destructor ----------------------------
605
606 /**
607 * Default constructor.
608 * Constructor is different from the old default Collator constructor.
609 * The task for determing the default collation strength and normalization mode
610 * is left to the child class.
611 */
Collator()612 Collator::Collator()
613 : UObject()
614 {
615 }
616
617 /**
618 * Constructor.
619 * Empty constructor, does not handle the arguments.
620 * This constructor is done for backward compatibility with 1.7 and 1.8.
621 * The task for handling the argument collation strength and normalization
622 * mode is left to the child class.
623 * @param collationStrength collation strength
624 * @param decompositionMode
625 * @deprecated 2.4 use the default constructor instead
626 */
Collator(UCollationStrength,UNormalizationMode)627 Collator::Collator(UCollationStrength, UNormalizationMode )
628 : UObject()
629 {
630 }
631
~Collator()632 Collator::~Collator()
633 {
634 }
635
Collator(const Collator & other)636 Collator::Collator(const Collator &other)
637 : UObject(other)
638 {
639 }
640
operator ==(const Collator & other) const641 UBool Collator::operator==(const Collator& other) const
642 {
643 // Subclasses: Call this method and then add more specific checks.
644 return typeid(*this) == typeid(other);
645 }
646
operator !=(const Collator & other) const647 UBool Collator::operator!=(const Collator& other) const
648 {
649 return (UBool)!(*this == other);
650 }
651
getBound(const uint8_t * source,int32_t sourceLength,UColBoundMode boundType,uint32_t noOfLevels,uint8_t * result,int32_t resultLength,UErrorCode & status)652 int32_t U_EXPORT2 Collator::getBound(const uint8_t *source,
653 int32_t sourceLength,
654 UColBoundMode boundType,
655 uint32_t noOfLevels,
656 uint8_t *result,
657 int32_t resultLength,
658 UErrorCode &status)
659 {
660 return ucol_getBound(source, sourceLength, boundType, noOfLevels, result, resultLength, &status);
661 }
662
663 void
setLocales(const Locale &,const Locale &,const Locale &)664 Collator::setLocales(const Locale& /* requestedLocale */, const Locale& /* validLocale */, const Locale& /*actualLocale*/) {
665 }
666
getTailoredSet(UErrorCode & status) const667 UnicodeSet *Collator::getTailoredSet(UErrorCode &status) const
668 {
669 if(U_FAILURE(status)) {
670 return NULL;
671 }
672 // everything can be changed
673 return new UnicodeSet(0, 0x10FFFF);
674 }
675
676 // -------------------------------------
677
678 #if !UCONFIG_NO_SERVICE
679 URegistryKey U_EXPORT2
registerInstance(Collator * toAdopt,const Locale & locale,UErrorCode & status)680 Collator::registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status)
681 {
682 if (U_SUCCESS(status)) {
683 // Set the collator locales while registering so that createInstance()
684 // need not guess whether the collator's locales are already set properly
685 // (as they are by the data loader).
686 toAdopt->setLocales(locale, locale, locale);
687 return getService()->registerInstance(toAdopt, locale, status);
688 }
689 return NULL;
690 }
691
692 // -------------------------------------
693
694 class CFactory : public LocaleKeyFactory {
695 private:
696 CollatorFactory* _delegate;
697 Hashtable* _ids;
698
699 public:
CFactory(CollatorFactory * delegate,UErrorCode & status)700 CFactory(CollatorFactory* delegate, UErrorCode& status)
701 : LocaleKeyFactory(delegate->visible() ? VISIBLE : INVISIBLE)
702 , _delegate(delegate)
703 , _ids(NULL)
704 {
705 if (U_SUCCESS(status)) {
706 int32_t count = 0;
707 _ids = new Hashtable(status);
708 if (_ids) {
709 const UnicodeString * idlist = _delegate->getSupportedIDs(count, status);
710 for (int i = 0; i < count; ++i) {
711 _ids->put(idlist[i], (void*)this, status);
712 if (U_FAILURE(status)) {
713 delete _ids;
714 _ids = NULL;
715 return;
716 }
717 }
718 } else {
719 status = U_MEMORY_ALLOCATION_ERROR;
720 }
721 }
722 }
723
724 virtual ~CFactory();
725
726 virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
727
728 protected:
getSupportedIDs(UErrorCode & status) const729 virtual const Hashtable* getSupportedIDs(UErrorCode& status) const
730 {
731 if (U_SUCCESS(status)) {
732 return _ids;
733 }
734 return NULL;
735 }
736
737 virtual UnicodeString&
738 getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
739 };
740
~CFactory()741 CFactory::~CFactory()
742 {
743 delete _delegate;
744 delete _ids;
745 }
746
747 UObject*
create(const ICUServiceKey & key,const ICUService *,UErrorCode & status) const748 CFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const
749 {
750 if (handlesKey(key, status)) {
751 const LocaleKey& lkey = (const LocaleKey&)key;
752 Locale validLoc;
753 lkey.currentLocale(validLoc);
754 return _delegate->createCollator(validLoc);
755 }
756 return NULL;
757 }
758
759 UnicodeString&
getDisplayName(const UnicodeString & id,const Locale & locale,UnicodeString & result) const760 CFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const
761 {
762 if ((_coverage & 0x1) == 0) {
763 UErrorCode status = U_ZERO_ERROR;
764 const Hashtable* ids = getSupportedIDs(status);
765 if (ids && (ids->get(id) != NULL)) {
766 Locale loc;
767 LocaleUtility::initLocaleFromName(id, loc);
768 return _delegate->getDisplayName(loc, locale, result);
769 }
770 }
771 result.setToBogus();
772 return result;
773 }
774
775 URegistryKey U_EXPORT2
registerFactory(CollatorFactory * toAdopt,UErrorCode & status)776 Collator::registerFactory(CollatorFactory* toAdopt, UErrorCode& status)
777 {
778 if (U_SUCCESS(status)) {
779 CFactory* f = new CFactory(toAdopt, status);
780 if (f) {
781 return getService()->registerFactory(f, status);
782 }
783 status = U_MEMORY_ALLOCATION_ERROR;
784 }
785 return NULL;
786 }
787
788 // -------------------------------------
789
790 UBool U_EXPORT2
unregister(URegistryKey key,UErrorCode & status)791 Collator::unregister(URegistryKey key, UErrorCode& status)
792 {
793 if (U_SUCCESS(status)) {
794 if (hasService()) {
795 return gService->unregister(key, status);
796 }
797 status = U_ILLEGAL_ARGUMENT_ERROR;
798 }
799 return FALSE;
800 }
801 #endif /* UCONFIG_NO_SERVICE */
802
803 class CollationLocaleListEnumeration : public StringEnumeration {
804 private:
805 int32_t index;
806 public:
807 static UClassID U_EXPORT2 getStaticClassID(void);
808 virtual UClassID getDynamicClassID(void) const;
809 public:
CollationLocaleListEnumeration()810 CollationLocaleListEnumeration()
811 : index(0)
812 {
813 // The global variables should already be initialized.
814 //isAvailableLocaleListInitialized(status);
815 }
816
817 virtual ~CollationLocaleListEnumeration();
818
clone() const819 virtual StringEnumeration * clone() const
820 {
821 CollationLocaleListEnumeration *result = new CollationLocaleListEnumeration();
822 if (result) {
823 result->index = index;
824 }
825 return result;
826 }
827
count(UErrorCode &) const828 virtual int32_t count(UErrorCode &/*status*/) const {
829 return availableLocaleListCount;
830 }
831
next(int32_t * resultLength,UErrorCode &)832 virtual const char* next(int32_t* resultLength, UErrorCode& /*status*/) {
833 const char* result;
834 if(index < availableLocaleListCount) {
835 result = availableLocaleList[index++].getName();
836 if(resultLength != NULL) {
837 *resultLength = (int32_t)uprv_strlen(result);
838 }
839 } else {
840 if(resultLength != NULL) {
841 *resultLength = 0;
842 }
843 result = NULL;
844 }
845 return result;
846 }
847
snext(UErrorCode & status)848 virtual const UnicodeString* snext(UErrorCode& status) {
849 int32_t resultLength = 0;
850 const char *s = next(&resultLength, status);
851 return setChars(s, resultLength, status);
852 }
853
reset(UErrorCode &)854 virtual void reset(UErrorCode& /*status*/) {
855 index = 0;
856 }
857 };
858
~CollationLocaleListEnumeration()859 CollationLocaleListEnumeration::~CollationLocaleListEnumeration() {}
860
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration)861 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration)
862
863
864 // -------------------------------------
865
866 StringEnumeration* U_EXPORT2
867 Collator::getAvailableLocales(void)
868 {
869 #if !UCONFIG_NO_SERVICE
870 if (hasService()) {
871 return getService()->getAvailableLocales();
872 }
873 #endif /* UCONFIG_NO_SERVICE */
874 UErrorCode status = U_ZERO_ERROR;
875 if (isAvailableLocaleListInitialized(status)) {
876 return new CollationLocaleListEnumeration();
877 }
878 return NULL;
879 }
880
881 StringEnumeration* U_EXPORT2
getKeywords(UErrorCode & status)882 Collator::getKeywords(UErrorCode& status) {
883 return UStringEnumeration::fromUEnumeration(
884 ucol_getKeywords(&status), status);
885 }
886
887 StringEnumeration* U_EXPORT2
getKeywordValues(const char * keyword,UErrorCode & status)888 Collator::getKeywordValues(const char *keyword, UErrorCode& status) {
889 return UStringEnumeration::fromUEnumeration(
890 ucol_getKeywordValues(keyword, &status), status);
891 }
892
893 StringEnumeration* U_EXPORT2
getKeywordValuesForLocale(const char * key,const Locale & locale,UBool commonlyUsed,UErrorCode & status)894 Collator::getKeywordValuesForLocale(const char* key, const Locale& locale,
895 UBool commonlyUsed, UErrorCode& status) {
896 return UStringEnumeration::fromUEnumeration(
897 ucol_getKeywordValuesForLocale(
898 key, locale.getName(), commonlyUsed, &status),
899 status);
900 }
901
902 Locale U_EXPORT2
getFunctionalEquivalent(const char * keyword,const Locale & locale,UBool & isAvailable,UErrorCode & status)903 Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
904 UBool& isAvailable, UErrorCode& status) {
905 // This is a wrapper over ucol_getFunctionalEquivalent
906 char loc[ULOC_FULLNAME_CAPACITY];
907 /*int32_t len =*/ ucol_getFunctionalEquivalent(loc, sizeof(loc),
908 keyword, locale.getName(), &isAvailable, &status);
909 if (U_FAILURE(status)) {
910 *loc = 0; // root
911 }
912 return Locale::createFromName(loc);
913 }
914
915 Collator::ECollationStrength
getStrength(void) const916 Collator::getStrength(void) const {
917 UErrorCode intStatus = U_ZERO_ERROR;
918 return (ECollationStrength)getAttribute(UCOL_STRENGTH, intStatus);
919 }
920
921 void
setStrength(ECollationStrength newStrength)922 Collator::setStrength(ECollationStrength newStrength) {
923 UErrorCode intStatus = U_ZERO_ERROR;
924 setAttribute(UCOL_STRENGTH, (UColAttributeValue)newStrength, intStatus);
925 }
926
927 Collator &
setMaxVariable(UColReorderCode,UErrorCode & errorCode)928 Collator::setMaxVariable(UColReorderCode /*group*/, UErrorCode &errorCode) {
929 if (U_SUCCESS(errorCode)) {
930 errorCode = U_UNSUPPORTED_ERROR;
931 }
932 return *this;
933 }
934
935 UColReorderCode
getMaxVariable() const936 Collator::getMaxVariable() const {
937 return UCOL_REORDER_CODE_PUNCTUATION;
938 }
939
940 int32_t
getReorderCodes(int32_t *,int32_t,UErrorCode & status) const941 Collator::getReorderCodes(int32_t* /* dest*/,
942 int32_t /* destCapacity*/,
943 UErrorCode& status) const
944 {
945 if (U_SUCCESS(status)) {
946 status = U_UNSUPPORTED_ERROR;
947 }
948 return 0;
949 }
950
951 void
setReorderCodes(const int32_t *,int32_t,UErrorCode & status)952 Collator::setReorderCodes(const int32_t* /* reorderCodes */,
953 int32_t /* reorderCodesLength */,
954 UErrorCode& status)
955 {
956 if (U_SUCCESS(status)) {
957 status = U_UNSUPPORTED_ERROR;
958 }
959 }
960
961 int32_t
getEquivalentReorderCodes(int32_t reorderCode,int32_t * dest,int32_t capacity,UErrorCode & errorCode)962 Collator::getEquivalentReorderCodes(int32_t reorderCode,
963 int32_t *dest, int32_t capacity,
964 UErrorCode &errorCode) {
965 if(U_FAILURE(errorCode)) { return 0; }
966 if(capacity < 0 || (dest == NULL && capacity > 0)) {
967 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
968 return 0;
969 }
970 const CollationData *baseData = CollationRoot::getData(errorCode);
971 if(U_FAILURE(errorCode)) { return 0; }
972 return baseData->getEquivalentScripts(reorderCode, dest, capacity, errorCode);
973 }
974
975 int32_t
internalGetShortDefinitionString(const char *,char *,int32_t,UErrorCode & status) const976 Collator::internalGetShortDefinitionString(const char * /*locale*/,
977 char * /*buffer*/,
978 int32_t /*capacity*/,
979 UErrorCode &status) const {
980 if(U_SUCCESS(status)) {
981 status = U_UNSUPPORTED_ERROR; /* Shouldn't happen, internal function */
982 }
983 return 0;
984 }
985
986 UCollationResult
internalCompareUTF8(const char * left,int32_t leftLength,const char * right,int32_t rightLength,UErrorCode & errorCode) const987 Collator::internalCompareUTF8(const char *left, int32_t leftLength,
988 const char *right, int32_t rightLength,
989 UErrorCode &errorCode) const {
990 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
991 if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
992 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
993 return UCOL_EQUAL;
994 }
995 return compareUTF8(
996 StringPiece(left, (leftLength < 0) ? static_cast<int32_t>(uprv_strlen(left)) : leftLength),
997 StringPiece(right, (rightLength < 0) ? static_cast<int32_t>(uprv_strlen(right)) : rightLength),
998 errorCode);
999 }
1000
1001 int32_t
internalNextSortKeyPart(UCharIterator *,uint32_t[2],uint8_t *,int32_t,UErrorCode & errorCode) const1002 Collator::internalNextSortKeyPart(UCharIterator * /*iter*/, uint32_t /*state*/[2],
1003 uint8_t * /*dest*/, int32_t /*count*/, UErrorCode &errorCode) const {
1004 if (U_SUCCESS(errorCode)) {
1005 errorCode = U_UNSUPPORTED_ERROR;
1006 }
1007 return 0;
1008 }
1009
1010 // UCollator private data members ----------------------------------------
1011
1012 /* This is useless information */
1013 /*const UVersionInfo Collator::fVersion = {1, 1, 0, 0};*/
1014
1015 // -------------------------------------
1016
1017 U_NAMESPACE_END
1018
1019 #endif /* #if !UCONFIG_NO_COLLATION */
1020
1021 /* eof */
1022