1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2015, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * rulebasedcollator.cpp
7 *
8 * (replaced the former tblcoll.cpp)
9 *
10 * created on: 2012feb14 with new and old collation code
11 * created by: Markus W. Scherer
12 */
13 
14 #include "unicode/utypes.h"
15 
16 #if !UCONFIG_NO_COLLATION
17 
18 #include "unicode/coll.h"
19 #include "unicode/coleitr.h"
20 #include "unicode/localpointer.h"
21 #include "unicode/locid.h"
22 #include "unicode/sortkey.h"
23 #include "unicode/tblcoll.h"
24 #include "unicode/ucol.h"
25 #include "unicode/uiter.h"
26 #include "unicode/uloc.h"
27 #include "unicode/uniset.h"
28 #include "unicode/unistr.h"
29 #include "unicode/usetiter.h"
30 #include "unicode/utf8.h"
31 #include "unicode/uversion.h"
32 #include "bocsu.h"
33 #include "charstr.h"
34 #include "cmemory.h"
35 #include "collation.h"
36 #include "collationcompare.h"
37 #include "collationdata.h"
38 #include "collationdatareader.h"
39 #include "collationfastlatin.h"
40 #include "collationiterator.h"
41 #include "collationkeys.h"
42 #include "collationroot.h"
43 #include "collationsets.h"
44 #include "collationsettings.h"
45 #include "collationtailoring.h"
46 #include "cstring.h"
47 #include "uassert.h"
48 #include "ucol_imp.h"
49 #include "uhash.h"
50 #include "uitercollationiterator.h"
51 #include "ustr_imp.h"
52 #include "utf16collationiterator.h"
53 #include "utf8collationiterator.h"
54 #include "uvectr64.h"
55 
56 U_NAMESPACE_BEGIN
57 
58 namespace {
59 
60 class FixedSortKeyByteSink : public SortKeyByteSink {
61 public:
FixedSortKeyByteSink(char * dest,int32_t destCapacity)62     FixedSortKeyByteSink(char *dest, int32_t destCapacity)
63             : SortKeyByteSink(dest, destCapacity) {}
64     virtual ~FixedSortKeyByteSink();
65 
66 private:
67     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
68     virtual UBool Resize(int32_t appendCapacity, int32_t length);
69 };
70 
~FixedSortKeyByteSink()71 FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
72 
73 void
AppendBeyondCapacity(const char * bytes,int32_t,int32_t length)74 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
75     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
76     // Fill the buffer completely.
77     int32_t available = capacity_ - length;
78     if (available > 0) {
79         uprv_memcpy(buffer_ + length, bytes, available);
80     }
81 }
82 
83 UBool
Resize(int32_t,int32_t)84 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
85     return FALSE;
86 }
87 
88 }  // namespace
89 
90 // Not in an anonymous namespace, so that it can be a friend of CollationKey.
91 class CollationKeyByteSink : public SortKeyByteSink {
92 public:
CollationKeyByteSink(CollationKey & key)93     CollationKeyByteSink(CollationKey &key)
94             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
95               key_(key) {}
96     virtual ~CollationKeyByteSink();
97 
98 private:
99     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
100     virtual UBool Resize(int32_t appendCapacity, int32_t length);
101 
102     CollationKey &key_;
103 };
104 
~CollationKeyByteSink()105 CollationKeyByteSink::~CollationKeyByteSink() {}
106 
107 void
AppendBeyondCapacity(const char * bytes,int32_t n,int32_t length)108 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
109     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
110     if (Resize(n, length)) {
111         uprv_memcpy(buffer_ + length, bytes, n);
112     }
113 }
114 
115 UBool
Resize(int32_t appendCapacity,int32_t length)116 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
117     if (buffer_ == NULL) {
118         return FALSE;  // allocation failed before already
119     }
120     int32_t newCapacity = 2 * capacity_;
121     int32_t altCapacity = length + 2 * appendCapacity;
122     if (newCapacity < altCapacity) {
123         newCapacity = altCapacity;
124     }
125     if (newCapacity < 200) {
126         newCapacity = 200;
127     }
128     uint8_t *newBuffer = key_.reallocate(newCapacity, length);
129     if (newBuffer == NULL) {
130         SetNotOk();
131         return FALSE;
132     }
133     buffer_ = reinterpret_cast<char *>(newBuffer);
134     capacity_ = newCapacity;
135     return TRUE;
136 }
137 
RuleBasedCollator(const RuleBasedCollator & other)138 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
139         : Collator(other),
140           data(other.data),
141           settings(other.settings),
142           tailoring(other.tailoring),
143           cacheEntry(other.cacheEntry),
144           validLocale(other.validLocale),
145           explicitlySetAttributes(other.explicitlySetAttributes),
146           actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
147     settings->addRef();
148     cacheEntry->addRef();
149 }
150 
RuleBasedCollator(const uint8_t * bin,int32_t length,const RuleBasedCollator * base,UErrorCode & errorCode)151 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
152                                      const RuleBasedCollator *base, UErrorCode &errorCode)
153         : data(NULL),
154           settings(NULL),
155           tailoring(NULL),
156           cacheEntry(NULL),
157           validLocale(""),
158           explicitlySetAttributes(0),
159           actualLocaleIsSameAsValid(FALSE) {
160     if(U_FAILURE(errorCode)) { return; }
161     if(bin == NULL || length == 0 || base == NULL) {
162         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
163         return;
164     }
165     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
166     if(U_FAILURE(errorCode)) { return; }
167     if(base->tailoring != root) {
168         errorCode = U_UNSUPPORTED_ERROR;
169         return;
170     }
171     LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
172     if(t.isNull() || t->isBogus()) {
173         errorCode = U_MEMORY_ALLOCATION_ERROR;
174         return;
175     }
176     CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
177     if(U_FAILURE(errorCode)) { return; }
178     t->actualLocale.setToBogus();
179     adoptTailoring(t.orphan(), errorCode);
180 }
181 
RuleBasedCollator(const CollationCacheEntry * entry)182 RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
183         : data(entry->tailoring->data),
184           settings(entry->tailoring->settings),
185           tailoring(entry->tailoring),
186           cacheEntry(entry),
187           validLocale(entry->validLocale),
188           explicitlySetAttributes(0),
189           actualLocaleIsSameAsValid(FALSE) {
190     settings->addRef();
191     cacheEntry->addRef();
192 }
193 
~RuleBasedCollator()194 RuleBasedCollator::~RuleBasedCollator() {
195     SharedObject::clearPtr(settings);
196     SharedObject::clearPtr(cacheEntry);
197 }
198 
199 void
adoptTailoring(CollationTailoring * t,UErrorCode & errorCode)200 RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
201     if(U_FAILURE(errorCode)) {
202         t->deleteIfZeroRefCount();
203         return;
204     }
205     U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry == NULL);
206     cacheEntry = new CollationCacheEntry(t->actualLocale, t);
207     if(cacheEntry == NULL) {
208         errorCode = U_MEMORY_ALLOCATION_ERROR;
209         t->deleteIfZeroRefCount();
210         return;
211     }
212     data = t->data;
213     settings = t->settings;
214     settings->addRef();
215     tailoring = t;
216     cacheEntry->addRef();
217     validLocale = t->actualLocale;
218     actualLocaleIsSameAsValid = FALSE;
219 }
220 
221 Collator *
clone() const222 RuleBasedCollator::clone() const {
223     return new RuleBasedCollator(*this);
224 }
225 
operator =(const RuleBasedCollator & other)226 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
227     if(this == &other) { return *this; }
228     SharedObject::copyPtr(other.settings, settings);
229     tailoring = other.tailoring;
230     SharedObject::copyPtr(other.cacheEntry, cacheEntry);
231     data = tailoring->data;
232     validLocale = other.validLocale;
233     explicitlySetAttributes = other.explicitlySetAttributes;
234     actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
235     return *this;
236 }
237 
238 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
239 
240 UBool
241 RuleBasedCollator::operator==(const Collator& other) const {
242     if(this == &other) { return TRUE; }
243     if(!Collator::operator==(other)) { return FALSE; }
244     const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
245     if(*settings != *o.settings) { return FALSE; }
246     if(data == o.data) { return TRUE; }
247     UBool thisIsRoot = data->base == NULL;
248     UBool otherIsRoot = o.data->base == NULL;
249     U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
250     if(thisIsRoot != otherIsRoot) { return FALSE; }
251     if((thisIsRoot || !tailoring->rules.isEmpty()) &&
252             (otherIsRoot || !o.tailoring->rules.isEmpty())) {
253         // Shortcut: If both collators have valid rule strings, then compare those.
254         if(tailoring->rules == o.tailoring->rules) { return TRUE; }
255     }
256     // Different rule strings can result in the same or equivalent tailoring.
257     // The rule strings are optional in ICU resource bundles, although included by default.
258     // cloneBinary() drops the rule string.
259     UErrorCode errorCode = U_ZERO_ERROR;
260     LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
261     LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
262     if(U_FAILURE(errorCode)) { return FALSE; }
263     if(*thisTailored != *otherTailored) { return FALSE; }
264     // For completeness, we should compare all of the mappings;
265     // or we should create a list of strings, sort it with one collator,
266     // and check if both collators compare adjacent strings the same
267     // (order & strength, down to quaternary); or similar.
268     // Testing equality of collators seems unusual.
269     return TRUE;
270 }
271 
272 int32_t
hashCode() const273 RuleBasedCollator::hashCode() const {
274     int32_t h = settings->hashCode();
275     if(data->base == NULL) { return h; }  // root collator
276     // Do not rely on the rule string, see comments in operator==().
277     UErrorCode errorCode = U_ZERO_ERROR;
278     LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
279     if(U_FAILURE(errorCode)) { return 0; }
280     UnicodeSetIterator iter(*set);
281     while(iter.next() && !iter.isString()) {
282         h ^= data->getCE32(iter.getCodepoint());
283     }
284     return h;
285 }
286 
287 void
setLocales(const Locale & requested,const Locale & valid,const Locale & actual)288 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
289                               const Locale &actual) {
290     if(actual == tailoring->actualLocale) {
291         actualLocaleIsSameAsValid = FALSE;
292     } else {
293         U_ASSERT(actual == valid);
294         actualLocaleIsSameAsValid = TRUE;
295     }
296     // Do not modify tailoring.actualLocale:
297     // We cannot be sure that that would be thread-safe.
298     validLocale = valid;
299     (void)requested;  // Ignore, see also ticket #10477.
300 }
301 
302 Locale
getLocale(ULocDataLocaleType type,UErrorCode & errorCode) const303 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
304     if(U_FAILURE(errorCode)) {
305         return Locale::getRoot();
306     }
307     switch(type) {
308     case ULOC_ACTUAL_LOCALE:
309         return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
310     case ULOC_VALID_LOCALE:
311         return validLocale;
312     case ULOC_REQUESTED_LOCALE:
313     default:
314         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
315         return Locale::getRoot();
316     }
317 }
318 
319 const char *
internalGetLocaleID(ULocDataLocaleType type,UErrorCode & errorCode) const320 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
321     if(U_FAILURE(errorCode)) {
322         return NULL;
323     }
324     const Locale *result;
325     switch(type) {
326     case ULOC_ACTUAL_LOCALE:
327         result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
328         break;
329     case ULOC_VALID_LOCALE:
330         result = &validLocale;
331         break;
332     case ULOC_REQUESTED_LOCALE:
333     default:
334         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
335         return NULL;
336     }
337     if(result->isBogus()) { return NULL; }
338     const char *id = result->getName();
339     return id[0] == 0 ? "root" : id;
340 }
341 
342 const UnicodeString&
getRules() const343 RuleBasedCollator::getRules() const {
344     return tailoring->rules;
345 }
346 
347 void
getRules(UColRuleOption delta,UnicodeString & buffer) const348 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
349     if(delta == UCOL_TAILORING_ONLY) {
350         buffer = tailoring->rules;
351         return;
352     }
353     // UCOL_FULL_RULES
354     buffer.remove();
355     CollationLoader::appendRootRules(buffer);
356     buffer.append(tailoring->rules).getTerminatedBuffer();
357 }
358 
359 void
getVersion(UVersionInfo version) const360 RuleBasedCollator::getVersion(UVersionInfo version) const {
361     uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
362     version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
363 }
364 
365 UnicodeSet *
getTailoredSet(UErrorCode & errorCode) const366 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
367     if(U_FAILURE(errorCode)) { return NULL; }
368     UnicodeSet *tailored = new UnicodeSet();
369     if(tailored == NULL) {
370         errorCode = U_MEMORY_ALLOCATION_ERROR;
371         return NULL;
372     }
373     if(data->base != NULL) {
374         TailoredSet(tailored).forData(data, errorCode);
375         if(U_FAILURE(errorCode)) {
376             delete tailored;
377             return NULL;
378         }
379     }
380     return tailored;
381 }
382 
383 void
internalGetContractionsAndExpansions(UnicodeSet * contractions,UnicodeSet * expansions,UBool addPrefixes,UErrorCode & errorCode) const384 RuleBasedCollator::internalGetContractionsAndExpansions(
385         UnicodeSet *contractions, UnicodeSet *expansions,
386         UBool addPrefixes, UErrorCode &errorCode) const {
387     if(U_FAILURE(errorCode)) { return; }
388     if(contractions != NULL) {
389         contractions->clear();
390     }
391     if(expansions != NULL) {
392         expansions->clear();
393     }
394     ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
395 }
396 
397 void
internalAddContractions(UChar32 c,UnicodeSet & set,UErrorCode & errorCode) const398 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
399     if(U_FAILURE(errorCode)) { return; }
400     ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);
401 }
402 
403 const CollationSettings &
getDefaultSettings() const404 RuleBasedCollator::getDefaultSettings() const {
405     return *tailoring->settings;
406 }
407 
408 UColAttributeValue
getAttribute(UColAttribute attr,UErrorCode & errorCode) const409 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
410     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
411     int32_t option;
412     switch(attr) {
413     case UCOL_FRENCH_COLLATION:
414         option = CollationSettings::BACKWARD_SECONDARY;
415         break;
416     case UCOL_ALTERNATE_HANDLING:
417         return settings->getAlternateHandling();
418     case UCOL_CASE_FIRST:
419         return settings->getCaseFirst();
420     case UCOL_CASE_LEVEL:
421         option = CollationSettings::CASE_LEVEL;
422         break;
423     case UCOL_NORMALIZATION_MODE:
424         option = CollationSettings::CHECK_FCD;
425         break;
426     case UCOL_STRENGTH:
427         return (UColAttributeValue)settings->getStrength();
428     case UCOL_HIRAGANA_QUATERNARY_MODE:
429         // Deprecated attribute, unsettable.
430         return UCOL_OFF;
431     case UCOL_NUMERIC_COLLATION:
432         option = CollationSettings::NUMERIC;
433         break;
434     default:
435         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
436         return UCOL_DEFAULT;
437     }
438     return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
439 }
440 
441 void
setAttribute(UColAttribute attr,UColAttributeValue value,UErrorCode & errorCode)442 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
443                                 UErrorCode &errorCode) {
444     UColAttributeValue oldValue = getAttribute(attr, errorCode);
445     if(U_FAILURE(errorCode)) { return; }
446     if(value == oldValue) {
447         setAttributeExplicitly(attr);
448         return;
449     }
450     const CollationSettings &defaultSettings = getDefaultSettings();
451     if(settings == &defaultSettings) {
452         if(value == UCOL_DEFAULT) {
453             setAttributeDefault(attr);
454             return;
455         }
456     }
457     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
458     if(ownedSettings == NULL) {
459         errorCode = U_MEMORY_ALLOCATION_ERROR;
460         return;
461     }
462 
463     switch(attr) {
464     case UCOL_FRENCH_COLLATION:
465         ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
466                                defaultSettings.options, errorCode);
467         break;
468     case UCOL_ALTERNATE_HANDLING:
469         ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
470         break;
471     case UCOL_CASE_FIRST:
472         ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
473         break;
474     case UCOL_CASE_LEVEL:
475         ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
476                                defaultSettings.options, errorCode);
477         break;
478     case UCOL_NORMALIZATION_MODE:
479         ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
480                                defaultSettings.options, errorCode);
481         break;
482     case UCOL_STRENGTH:
483         ownedSettings->setStrength(value, defaultSettings.options, errorCode);
484         break;
485     case UCOL_HIRAGANA_QUATERNARY_MODE:
486         // Deprecated attribute. Check for valid values but do not change anything.
487         if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
488             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
489         }
490         break;
491     case UCOL_NUMERIC_COLLATION:
492         ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
493         break;
494     default:
495         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
496         break;
497     }
498     if(U_FAILURE(errorCode)) { return; }
499     setFastLatinOptions(*ownedSettings);
500     if(value == UCOL_DEFAULT) {
501         setAttributeDefault(attr);
502     } else {
503         setAttributeExplicitly(attr);
504     }
505 }
506 
507 Collator &
setMaxVariable(UColReorderCode group,UErrorCode & errorCode)508 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
509     if(U_FAILURE(errorCode)) { return *this; }
510     // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
511     int32_t value;
512     if(group == UCOL_REORDER_CODE_DEFAULT) {
513         value = UCOL_DEFAULT;
514     } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
515         value = group - UCOL_REORDER_CODE_FIRST;
516     } else {
517         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
518         return *this;
519     }
520     CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
521     if(value == oldValue) {
522         setAttributeExplicitly(ATTR_VARIABLE_TOP);
523         return *this;
524     }
525     const CollationSettings &defaultSettings = getDefaultSettings();
526     if(settings == &defaultSettings) {
527         if(value == UCOL_DEFAULT) {
528             setAttributeDefault(ATTR_VARIABLE_TOP);
529             return *this;
530         }
531     }
532     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
533     if(ownedSettings == NULL) {
534         errorCode = U_MEMORY_ALLOCATION_ERROR;
535         return *this;
536     }
537 
538     if(group == UCOL_REORDER_CODE_DEFAULT) {
539         group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());
540     }
541     uint32_t varTop = data->getLastPrimaryForGroup(group);
542     U_ASSERT(varTop != 0);
543     ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
544     if(U_FAILURE(errorCode)) { return *this; }
545     ownedSettings->variableTop = varTop;
546     setFastLatinOptions(*ownedSettings);
547     if(value == UCOL_DEFAULT) {
548         setAttributeDefault(ATTR_VARIABLE_TOP);
549     } else {
550         setAttributeExplicitly(ATTR_VARIABLE_TOP);
551     }
552     return *this;
553 }
554 
555 UColReorderCode
getMaxVariable() const556 RuleBasedCollator::getMaxVariable() const {
557     return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
558 }
559 
560 uint32_t
getVariableTop(UErrorCode &) const561 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
562     return settings->variableTop;
563 }
564 
565 uint32_t
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & errorCode)566 RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
567     if(U_FAILURE(errorCode)) { return 0; }
568     if(varTop == NULL && len !=0) {
569         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
570         return 0;
571     }
572     if(len < 0) { len = u_strlen(varTop); }
573     if(len == 0) {
574         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
575         return 0;
576     }
577     UBool numeric = settings->isNumeric();
578     int64_t ce1, ce2;
579     if(settings->dontCheckFCD()) {
580         UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
581         ce1 = ci.nextCE(errorCode);
582         ce2 = ci.nextCE(errorCode);
583     } else {
584         FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
585         ce1 = ci.nextCE(errorCode);
586         ce2 = ci.nextCE(errorCode);
587     }
588     if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
589         errorCode = U_CE_NOT_FOUND_ERROR;
590         return 0;
591     }
592     setVariableTop((uint32_t)(ce1 >> 32), errorCode);
593     return settings->variableTop;
594 }
595 
596 uint32_t
setVariableTop(const UnicodeString & varTop,UErrorCode & errorCode)597 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
598     return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
599 }
600 
601 void
setVariableTop(uint32_t varTop,UErrorCode & errorCode)602 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
603     if(U_FAILURE(errorCode)) { return; }
604     if(varTop != settings->variableTop) {
605         // Pin the variable top to the end of the reordering group which contains it.
606         // Only a few special groups are supported.
607         int32_t group = data->getGroupForPrimary(varTop);
608         if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
609             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
610             return;
611         }
612         uint32_t v = data->getLastPrimaryForGroup(group);
613         U_ASSERT(v != 0 && v >= varTop);
614         varTop = v;
615         if(varTop != settings->variableTop) {
616             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
617             if(ownedSettings == NULL) {
618                 errorCode = U_MEMORY_ALLOCATION_ERROR;
619                 return;
620             }
621             ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
622                                           getDefaultSettings().options, errorCode);
623             if(U_FAILURE(errorCode)) { return; }
624             ownedSettings->variableTop = varTop;
625             setFastLatinOptions(*ownedSettings);
626         }
627     }
628     if(varTop == getDefaultSettings().variableTop) {
629         setAttributeDefault(ATTR_VARIABLE_TOP);
630     } else {
631         setAttributeExplicitly(ATTR_VARIABLE_TOP);
632     }
633 }
634 
635 int32_t
getReorderCodes(int32_t * dest,int32_t capacity,UErrorCode & errorCode) const636 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
637                                    UErrorCode &errorCode) const {
638     if(U_FAILURE(errorCode)) { return 0; }
639     if(capacity < 0 || (dest == NULL && capacity > 0)) {
640         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
641         return 0;
642     }
643     int32_t length = settings->reorderCodesLength;
644     if(length == 0) { return 0; }
645     if(length > capacity) {
646         errorCode = U_BUFFER_OVERFLOW_ERROR;
647         return length;
648     }
649     uprv_memcpy(dest, settings->reorderCodes, length * 4);
650     return length;
651 }
652 
653 void
setReorderCodes(const int32_t * reorderCodes,int32_t length,UErrorCode & errorCode)654 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
655                                    UErrorCode &errorCode) {
656     if(U_FAILURE(errorCode)) { return; }
657     if(length < 0 || (reorderCodes == NULL && length > 0)) {
658         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
659         return;
660     }
661     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
662         length = 0;
663     }
664     if(length == settings->reorderCodesLength &&
665             uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
666         return;
667     }
668     const CollationSettings &defaultSettings = getDefaultSettings();
669     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
670         if(settings != &defaultSettings) {
671             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
672             if(ownedSettings == NULL) {
673                 errorCode = U_MEMORY_ALLOCATION_ERROR;
674                 return;
675             }
676             ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
677             setFastLatinOptions(*ownedSettings);
678         }
679         return;
680     }
681     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
682     if(ownedSettings == NULL) {
683         errorCode = U_MEMORY_ALLOCATION_ERROR;
684         return;
685     }
686     ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
687     setFastLatinOptions(*ownedSettings);
688 }
689 
690 void
setFastLatinOptions(CollationSettings & ownedSettings) const691 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
692     ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
693             data, ownedSettings,
694             ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
695 }
696 
697 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,UErrorCode & errorCode) const698 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
699                            UErrorCode &errorCode) const {
700     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
701     return doCompare(left.getBuffer(), left.length(),
702                      right.getBuffer(), right.length(), errorCode);
703 }
704 
705 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,int32_t length,UErrorCode & errorCode) const706 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
707                            int32_t length, UErrorCode &errorCode) const {
708     if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
709     if(length < 0) {
710         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
711         return UCOL_EQUAL;
712     }
713     int32_t leftLength = left.length();
714     int32_t rightLength = right.length();
715     if(leftLength > length) { leftLength = length; }
716     if(rightLength > length) { rightLength = length; }
717     return doCompare(left.getBuffer(), leftLength,
718                      right.getBuffer(), rightLength, errorCode);
719 }
720 
721 UCollationResult
compare(const UChar * left,int32_t leftLength,const UChar * right,int32_t rightLength,UErrorCode & errorCode) const722 RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
723                            const UChar *right, int32_t rightLength,
724                            UErrorCode &errorCode) const {
725     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
726     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
727         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
728         return UCOL_EQUAL;
729     }
730     // Make sure both or neither strings have a known length.
731     // We do not optimize for mixed length/termination.
732     if(leftLength >= 0) {
733         if(rightLength < 0) { rightLength = u_strlen(right); }
734     } else {
735         if(rightLength >= 0) { leftLength = u_strlen(left); }
736     }
737     return doCompare(left, leftLength, right, rightLength, errorCode);
738 }
739 
740 UCollationResult
compareUTF8(const StringPiece & left,const StringPiece & right,UErrorCode & errorCode) const741 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
742                                UErrorCode &errorCode) const {
743     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
744     const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
745     const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
746     if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
747         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
748         return UCOL_EQUAL;
749     }
750     return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
751 }
752 
753 UCollationResult
internalCompareUTF8(const char * left,int32_t leftLength,const char * right,int32_t rightLength,UErrorCode & errorCode) const754 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
755                                        const char *right, int32_t rightLength,
756                                        UErrorCode &errorCode) const {
757     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
758     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
759         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
760         return UCOL_EQUAL;
761     }
762     // Make sure both or neither strings have a known length.
763     // We do not optimize for mixed length/termination.
764     if(leftLength >= 0) {
765         if(rightLength < 0) { rightLength = uprv_strlen(right); }
766     } else {
767         if(rightLength >= 0) { leftLength = uprv_strlen(left); }
768     }
769     return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
770                      reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
771 }
772 
773 namespace {
774 
775 /**
776  * Abstract iterator for identical-level string comparisons.
777  * Returns FCD code points and handles temporary switching to NFD.
778  */
779 class NFDIterator : public UObject {
780 public:
NFDIterator()781     NFDIterator() : index(-1), length(0) {}
~NFDIterator()782     virtual ~NFDIterator() {}
783     /**
784      * Returns the next code point from the internal normalization buffer,
785      * or else the next text code point.
786      * Returns -1 at the end of the text.
787      */
nextCodePoint()788     UChar32 nextCodePoint() {
789         if(index >= 0) {
790             if(index == length) {
791                 index = -1;
792             } else {
793                 UChar32 c;
794                 U16_NEXT_UNSAFE(decomp, index, c);
795                 return c;
796             }
797         }
798         return nextRawCodePoint();
799     }
800     /**
801      * @param nfcImpl
802      * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
803      * @return the first code point in c's decomposition,
804      *         or c itself if it was decomposed already or if it does not decompose
805      */
nextDecomposedCodePoint(const Normalizer2Impl & nfcImpl,UChar32 c)806     UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
807         if(index >= 0) { return c; }
808         decomp = nfcImpl.getDecomposition(c, buffer, length);
809         if(decomp == NULL) { return c; }
810         index = 0;
811         U16_NEXT_UNSAFE(decomp, index, c);
812         return c;
813     }
814 protected:
815     /**
816      * Returns the next text code point in FCD order.
817      * Returns -1 at the end of the text.
818      */
819     virtual UChar32 nextRawCodePoint() = 0;
820 private:
821     const UChar *decomp;
822     UChar buffer[4];
823     int32_t index;
824     int32_t length;
825 };
826 
827 class UTF16NFDIterator : public NFDIterator {
828 public:
UTF16NFDIterator(const UChar * text,const UChar * textLimit)829     UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
830 protected:
nextRawCodePoint()831     virtual UChar32 nextRawCodePoint() {
832         if(s == limit) { return U_SENTINEL; }
833         UChar32 c = *s++;
834         if(limit == NULL && c == 0) {
835             s = NULL;
836             return U_SENTINEL;
837         }
838         UChar trail;
839         if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
840             ++s;
841             c = U16_GET_SUPPLEMENTARY(c, trail);
842         }
843         return c;
844     }
845 
846     const UChar *s;
847     const UChar *limit;
848 };
849 
850 class FCDUTF16NFDIterator : public UTF16NFDIterator {
851 public:
FCDUTF16NFDIterator(const Normalizer2Impl & nfcImpl,const UChar * text,const UChar * textLimit)852     FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
853             : UTF16NFDIterator(NULL, NULL) {
854         UErrorCode errorCode = U_ZERO_ERROR;
855         const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
856         if(U_FAILURE(errorCode)) { return; }
857         if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
858             s = text;
859             limit = spanLimit;
860         } else {
861             str.setTo(text, (int32_t)(spanLimit - text));
862             {
863                 ReorderingBuffer buffer(nfcImpl, str);
864                 if(buffer.init(str.length(), errorCode)) {
865                     nfcImpl.makeFCD(spanLimit, textLimit, &buffer, errorCode);
866                 }
867             }
868             if(U_SUCCESS(errorCode)) {
869                 s = str.getBuffer();
870                 limit = s + str.length();
871             }
872         }
873     }
874 private:
875     UnicodeString str;
876 };
877 
878 class UTF8NFDIterator : public NFDIterator {
879 public:
UTF8NFDIterator(const uint8_t * text,int32_t textLength)880     UTF8NFDIterator(const uint8_t *text, int32_t textLength)
881         : s(text), pos(0), length(textLength) {}
882 protected:
nextRawCodePoint()883     virtual UChar32 nextRawCodePoint() {
884         if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
885         UChar32 c;
886         U8_NEXT_OR_FFFD(s, pos, length, c);
887         return c;
888     }
889 
890     const uint8_t *s;
891     int32_t pos;
892     int32_t length;
893 };
894 
895 class FCDUTF8NFDIterator : public NFDIterator {
896 public:
FCDUTF8NFDIterator(const CollationData * data,const uint8_t * text,int32_t textLength)897     FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
898             : u8ci(data, FALSE, text, 0, textLength) {}
899 protected:
nextRawCodePoint()900     virtual UChar32 nextRawCodePoint() {
901         UErrorCode errorCode = U_ZERO_ERROR;
902         return u8ci.nextCodePoint(errorCode);
903     }
904 private:
905     FCDUTF8CollationIterator u8ci;
906 };
907 
908 class UIterNFDIterator : public NFDIterator {
909 public:
UIterNFDIterator(UCharIterator & it)910     UIterNFDIterator(UCharIterator &it) : iter(it) {}
911 protected:
nextRawCodePoint()912     virtual UChar32 nextRawCodePoint() {
913         return uiter_next32(&iter);
914     }
915 private:
916     UCharIterator &iter;
917 };
918 
919 class FCDUIterNFDIterator : public NFDIterator {
920 public:
FCDUIterNFDIterator(const CollationData * data,UCharIterator & it,int32_t startIndex)921     FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
922             : uici(data, FALSE, it, startIndex) {}
923 protected:
nextRawCodePoint()924     virtual UChar32 nextRawCodePoint() {
925         UErrorCode errorCode = U_ZERO_ERROR;
926         return uici.nextCodePoint(errorCode);
927     }
928 private:
929     FCDUIterCollationIterator uici;
930 };
931 
compareNFDIter(const Normalizer2Impl & nfcImpl,NFDIterator & left,NFDIterator & right)932 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
933                                 NFDIterator &left, NFDIterator &right) {
934     for(;;) {
935         // Fetch the next FCD code point from each string.
936         UChar32 leftCp = left.nextCodePoint();
937         UChar32 rightCp = right.nextCodePoint();
938         if(leftCp == rightCp) {
939             if(leftCp < 0) { break; }
940             continue;
941         }
942         // If they are different, then decompose each and compare again.
943         if(leftCp < 0) {
944             leftCp = -2;  // end of string
945         } else if(leftCp == 0xfffe) {
946             leftCp = -1;  // U+FFFE: merge separator
947         } else {
948             leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
949         }
950         if(rightCp < 0) {
951             rightCp = -2;  // end of string
952         } else if(rightCp == 0xfffe) {
953             rightCp = -1;  // U+FFFE: merge separator
954         } else {
955             rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
956         }
957         if(leftCp < rightCp) { return UCOL_LESS; }
958         if(leftCp > rightCp) { return UCOL_GREATER; }
959     }
960     return UCOL_EQUAL;
961 }
962 
963 }  // namespace
964 
965 UCollationResult
doCompare(const UChar * left,int32_t leftLength,const UChar * right,int32_t rightLength,UErrorCode & errorCode) const966 RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
967                              const UChar *right, int32_t rightLength,
968                              UErrorCode &errorCode) const {
969     // U_FAILURE(errorCode) checked by caller.
970     if(left == right && leftLength == rightLength) {
971         return UCOL_EQUAL;
972     }
973 
974     // Identical-prefix test.
975     const UChar *leftLimit;
976     const UChar *rightLimit;
977     int32_t equalPrefixLength = 0;
978     if(leftLength < 0) {
979         leftLimit = NULL;
980         rightLimit = NULL;
981         UChar c;
982         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
983             if(c == 0) { return UCOL_EQUAL; }
984             ++equalPrefixLength;
985         }
986     } else {
987         leftLimit = left + leftLength;
988         rightLimit = right + rightLength;
989         for(;;) {
990             if(equalPrefixLength == leftLength) {
991                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
992                 break;
993             } else if(equalPrefixLength == rightLength ||
994                       left[equalPrefixLength] != right[equalPrefixLength]) {
995                 break;
996             }
997             ++equalPrefixLength;
998         }
999     }
1000 
1001     UBool numeric = settings->isNumeric();
1002     if(equalPrefixLength > 0) {
1003         if((equalPrefixLength != leftLength &&
1004                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1005                 (equalPrefixLength != rightLength &&
1006                     data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1007             // Identical prefix: Back up to the start of a contraction or reordering sequence.
1008             while(--equalPrefixLength > 0 &&
1009                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1010         }
1011         // Notes:
1012         // - A longer string can compare equal to a prefix of it if only ignorables follow.
1013         // - With a backward level, a longer string can compare less-than a prefix of it.
1014 
1015         // Pass the actual start of each string into the CollationIterators,
1016         // plus the equalPrefixLength position,
1017         // so that prefix matches back into the equal prefix work.
1018     }
1019 
1020     int32_t result;
1021     int32_t fastLatinOptions = settings->fastLatinOptions;
1022     if(fastLatinOptions >= 0 &&
1023             (equalPrefixLength == leftLength ||
1024                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1025             (equalPrefixLength == rightLength ||
1026                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1027         if(leftLength >= 0) {
1028             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1029                                                       settings->fastLatinPrimaries,
1030                                                       fastLatinOptions,
1031                                                       left + equalPrefixLength,
1032                                                       leftLength - equalPrefixLength,
1033                                                       right + equalPrefixLength,
1034                                                       rightLength - equalPrefixLength);
1035         } else {
1036             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1037                                                       settings->fastLatinPrimaries,
1038                                                       fastLatinOptions,
1039                                                       left + equalPrefixLength, -1,
1040                                                       right + equalPrefixLength, -1);
1041         }
1042     } else {
1043         result = CollationFastLatin::BAIL_OUT_RESULT;
1044     }
1045 
1046     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1047         if(settings->dontCheckFCD()) {
1048             UTF16CollationIterator leftIter(data, numeric,
1049                                             left, left + equalPrefixLength, leftLimit);
1050             UTF16CollationIterator rightIter(data, numeric,
1051                                             right, right + equalPrefixLength, rightLimit);
1052             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1053         } else {
1054             FCDUTF16CollationIterator leftIter(data, numeric,
1055                                               left, left + equalPrefixLength, leftLimit);
1056             FCDUTF16CollationIterator rightIter(data, numeric,
1057                                                 right, right + equalPrefixLength, rightLimit);
1058             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1059         }
1060     }
1061     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1062         return (UCollationResult)result;
1063     }
1064 
1065     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1066     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1067     // and the benefit seems unlikely to be measurable.
1068 
1069     // Compare identical level.
1070     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1071     left += equalPrefixLength;
1072     right += equalPrefixLength;
1073     if(settings->dontCheckFCD()) {
1074         UTF16NFDIterator leftIter(left, leftLimit);
1075         UTF16NFDIterator rightIter(right, rightLimit);
1076         return compareNFDIter(nfcImpl, leftIter, rightIter);
1077     } else {
1078         FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1079         FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1080         return compareNFDIter(nfcImpl, leftIter, rightIter);
1081     }
1082 }
1083 
1084 UCollationResult
doCompare(const uint8_t * left,int32_t leftLength,const uint8_t * right,int32_t rightLength,UErrorCode & errorCode) const1085 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1086                              const uint8_t *right, int32_t rightLength,
1087                              UErrorCode &errorCode) const {
1088     // U_FAILURE(errorCode) checked by caller.
1089     if(left == right && leftLength == rightLength) {
1090         return UCOL_EQUAL;
1091     }
1092 
1093     // Identical-prefix test.
1094     int32_t equalPrefixLength = 0;
1095     if(leftLength < 0) {
1096         uint8_t c;
1097         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1098             if(c == 0) { return UCOL_EQUAL; }
1099             ++equalPrefixLength;
1100         }
1101     } else {
1102         for(;;) {
1103             if(equalPrefixLength == leftLength) {
1104                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1105                 break;
1106             } else if(equalPrefixLength == rightLength ||
1107                       left[equalPrefixLength] != right[equalPrefixLength]) {
1108                 break;
1109             }
1110             ++equalPrefixLength;
1111         }
1112     }
1113     // Back up to the start of a partially-equal code point.
1114     if(equalPrefixLength > 0 &&
1115             ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
1116             (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
1117         while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
1118     }
1119 
1120     UBool numeric = settings->isNumeric();
1121     if(equalPrefixLength > 0) {
1122         UBool unsafe = FALSE;
1123         if(equalPrefixLength != leftLength) {
1124             int32_t i = equalPrefixLength;
1125             UChar32 c;
1126             U8_NEXT_OR_FFFD(left, i, leftLength, c);
1127             unsafe = data->isUnsafeBackward(c, numeric);
1128         }
1129         if(!unsafe && equalPrefixLength != rightLength) {
1130             int32_t i = equalPrefixLength;
1131             UChar32 c;
1132             U8_NEXT_OR_FFFD(right, i, rightLength, c);
1133             unsafe = data->isUnsafeBackward(c, numeric);
1134         }
1135         if(unsafe) {
1136             // Identical prefix: Back up to the start of a contraction or reordering sequence.
1137             UChar32 c;
1138             do {
1139                 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
1140             } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1141         }
1142         // See the notes in the UTF-16 version.
1143 
1144         // Pass the actual start of each string into the CollationIterators,
1145         // plus the equalPrefixLength position,
1146         // so that prefix matches back into the equal prefix work.
1147     }
1148 
1149     int32_t result;
1150     int32_t fastLatinOptions = settings->fastLatinOptions;
1151     if(fastLatinOptions >= 0 &&
1152             (equalPrefixLength == leftLength ||
1153                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1154             (equalPrefixLength == rightLength ||
1155                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1156         if(leftLength >= 0) {
1157             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1158                                                      settings->fastLatinPrimaries,
1159                                                      fastLatinOptions,
1160                                                      left + equalPrefixLength,
1161                                                      leftLength - equalPrefixLength,
1162                                                      right + equalPrefixLength,
1163                                                      rightLength - equalPrefixLength);
1164         } else {
1165             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1166                                                      settings->fastLatinPrimaries,
1167                                                      fastLatinOptions,
1168                                                      left + equalPrefixLength, -1,
1169                                                      right + equalPrefixLength, -1);
1170         }
1171     } else {
1172         result = CollationFastLatin::BAIL_OUT_RESULT;
1173     }
1174 
1175     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1176         if(settings->dontCheckFCD()) {
1177             UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1178             UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1179             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1180         } else {
1181             FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1182             FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1183             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1184         }
1185     }
1186     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1187         return (UCollationResult)result;
1188     }
1189 
1190     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1191     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1192     // and the benefit seems unlikely to be measurable.
1193 
1194     // Compare identical level.
1195     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1196     left += equalPrefixLength;
1197     right += equalPrefixLength;
1198     if(leftLength > 0) {
1199         leftLength -= equalPrefixLength;
1200         rightLength -= equalPrefixLength;
1201     }
1202     if(settings->dontCheckFCD()) {
1203         UTF8NFDIterator leftIter(left, leftLength);
1204         UTF8NFDIterator rightIter(right, rightLength);
1205         return compareNFDIter(nfcImpl, leftIter, rightIter);
1206     } else {
1207         FCDUTF8NFDIterator leftIter(data, left, leftLength);
1208         FCDUTF8NFDIterator rightIter(data, right, rightLength);
1209         return compareNFDIter(nfcImpl, leftIter, rightIter);
1210     }
1211 }
1212 
1213 UCollationResult
compare(UCharIterator & left,UCharIterator & right,UErrorCode & errorCode) const1214 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1215                            UErrorCode &errorCode) const {
1216     if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1217     UBool numeric = settings->isNumeric();
1218 
1219     // Identical-prefix test.
1220     int32_t equalPrefixLength = 0;
1221     {
1222         UChar32 leftUnit;
1223         UChar32 rightUnit;
1224         while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1225             if(leftUnit < 0) { return UCOL_EQUAL; }
1226             ++equalPrefixLength;
1227         }
1228 
1229         // Back out the code units that differed, for the real collation comparison.
1230         if(leftUnit >= 0) { left.previous(&left); }
1231         if(rightUnit >= 0) { right.previous(&right); }
1232 
1233         if(equalPrefixLength > 0) {
1234             if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1235                     (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1236                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1237                 do {
1238                     --equalPrefixLength;
1239                     leftUnit = left.previous(&left);
1240                     right.previous(&right);
1241                 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1242             }
1243             // See the notes in the UTF-16 version.
1244         }
1245     }
1246 
1247     UCollationResult result;
1248     if(settings->dontCheckFCD()) {
1249         UIterCollationIterator leftIter(data, numeric, left);
1250         UIterCollationIterator rightIter(data, numeric, right);
1251         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1252     } else {
1253         FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1254         FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1255         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1256     }
1257     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1258         return result;
1259     }
1260 
1261     // Compare identical level.
1262     left.move(&left, equalPrefixLength, UITER_ZERO);
1263     right.move(&right, equalPrefixLength, UITER_ZERO);
1264     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1265     if(settings->dontCheckFCD()) {
1266         UIterNFDIterator leftIter(left);
1267         UIterNFDIterator rightIter(right);
1268         return compareNFDIter(nfcImpl, leftIter, rightIter);
1269     } else {
1270         FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1271         FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1272         return compareNFDIter(nfcImpl, leftIter, rightIter);
1273     }
1274 }
1275 
1276 CollationKey &
getCollationKey(const UnicodeString & s,CollationKey & key,UErrorCode & errorCode) const1277 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1278                                    UErrorCode &errorCode) const {
1279     return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1280 }
1281 
1282 CollationKey &
getCollationKey(const UChar * s,int32_t length,CollationKey & key,UErrorCode & errorCode) const1283 RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
1284                                    UErrorCode &errorCode) const {
1285     if(U_FAILURE(errorCode)) {
1286         return key.setToBogus();
1287     }
1288     if(s == NULL && length != 0) {
1289         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1290         return key.setToBogus();
1291     }
1292     key.reset();  // resets the "bogus" state
1293     CollationKeyByteSink sink(key);
1294     writeSortKey(s, length, sink, errorCode);
1295     if(U_FAILURE(errorCode)) {
1296         key.setToBogus();
1297     } else if(key.isBogus()) {
1298         errorCode = U_MEMORY_ALLOCATION_ERROR;
1299     } else {
1300         key.setLength(sink.NumberOfBytesAppended());
1301     }
1302     return key;
1303 }
1304 
1305 int32_t
getSortKey(const UnicodeString & s,uint8_t * dest,int32_t capacity) const1306 RuleBasedCollator::getSortKey(const UnicodeString &s,
1307                               uint8_t *dest, int32_t capacity) const {
1308     return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1309 }
1310 
1311 int32_t
getSortKey(const UChar * s,int32_t length,uint8_t * dest,int32_t capacity) const1312 RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
1313                               uint8_t *dest, int32_t capacity) const {
1314     if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
1315         return 0;
1316     }
1317     uint8_t noDest[1] = { 0 };
1318     if(dest == NULL) {
1319         // Distinguish pure preflighting from an allocation error.
1320         dest = noDest;
1321         capacity = 0;
1322     }
1323     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1324     UErrorCode errorCode = U_ZERO_ERROR;
1325     writeSortKey(s, length, sink, errorCode);
1326     return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1327 }
1328 
1329 void
writeSortKey(const UChar * s,int32_t length,SortKeyByteSink & sink,UErrorCode & errorCode) const1330 RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
1331                                 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1332     if(U_FAILURE(errorCode)) { return; }
1333     const UChar *limit = (length >= 0) ? s + length : NULL;
1334     UBool numeric = settings->isNumeric();
1335     CollationKeys::LevelCallback callback;
1336     if(settings->dontCheckFCD()) {
1337         UTF16CollationIterator iter(data, numeric, s, s, limit);
1338         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1339                                                   sink, Collation::PRIMARY_LEVEL,
1340                                                   callback, TRUE, errorCode);
1341     } else {
1342         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1343         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1344                                                   sink, Collation::PRIMARY_LEVEL,
1345                                                   callback, TRUE, errorCode);
1346     }
1347     if(settings->getStrength() == UCOL_IDENTICAL) {
1348         writeIdenticalLevel(s, limit, sink, errorCode);
1349     }
1350     static const char terminator = 0;  // TERMINATOR_BYTE
1351     sink.Append(&terminator, 1);
1352 }
1353 
1354 void
writeIdenticalLevel(const UChar * s,const UChar * limit,SortKeyByteSink & sink,UErrorCode & errorCode) const1355 RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
1356                                        SortKeyByteSink &sink, UErrorCode &errorCode) const {
1357     // NFD quick check
1358     const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
1359     if(U_FAILURE(errorCode)) { return; }
1360     sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1361     UChar32 prev = 0;
1362     if(nfdQCYesLimit != s) {
1363         prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
1364     }
1365     // Is there non-NFD text?
1366     int32_t destLengthEstimate;
1367     if(limit != NULL) {
1368         if(nfdQCYesLimit == limit) { return; }
1369         destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
1370     } else {
1371         // s is NUL-terminated
1372         if(*nfdQCYesLimit == 0) { return; }
1373         destLengthEstimate = -1;
1374     }
1375     UnicodeString nfd;
1376     data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1377     u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
1378 }
1379 
1380 namespace {
1381 
1382 /**
1383  * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1384  * with an instance of this callback class.
1385  * When another level is about to be written, the callback
1386  * records the level and the number of bytes that will be written until
1387  * the sink (which is actually a FixedSortKeyByteSink) fills up.
1388  *
1389  * When internalNextSortKeyPart() is called again, it restarts with the last level
1390  * and ignores as many bytes as were written previously for that level.
1391  */
1392 class PartLevelCallback : public CollationKeys::LevelCallback {
1393 public:
PartLevelCallback(const SortKeyByteSink & s)1394     PartLevelCallback(const SortKeyByteSink &s)
1395             : sink(s), level(Collation::PRIMARY_LEVEL) {
1396         levelCapacity = sink.GetRemainingCapacity();
1397     }
~PartLevelCallback()1398     virtual ~PartLevelCallback() {}
needToWrite(Collation::Level l)1399     virtual UBool needToWrite(Collation::Level l) {
1400         if(!sink.Overflowed()) {
1401             // Remember a level that will be at least partially written.
1402             level = l;
1403             levelCapacity = sink.GetRemainingCapacity();
1404             return TRUE;
1405         } else {
1406             return FALSE;
1407         }
1408     }
getLevel() const1409     Collation::Level getLevel() const { return level; }
getLevelCapacity() const1410     int32_t getLevelCapacity() const { return levelCapacity; }
1411 
1412 private:
1413     const SortKeyByteSink &sink;
1414     Collation::Level level;
1415     int32_t levelCapacity;
1416 };
1417 
1418 }  // namespace
1419 
1420 int32_t
internalNextSortKeyPart(UCharIterator * iter,uint32_t state[2],uint8_t * dest,int32_t count,UErrorCode & errorCode) const1421 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1422                                            uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1423     if(U_FAILURE(errorCode)) { return 0; }
1424     if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
1425         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1426         return 0;
1427     }
1428     if(count == 0) { return 0; }
1429 
1430     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1431     sink.IgnoreBytes((int32_t)state[1]);
1432     iter->move(iter, 0, UITER_START);
1433 
1434     Collation::Level level = (Collation::Level)state[0];
1435     if(level <= Collation::QUATERNARY_LEVEL) {
1436         UBool numeric = settings->isNumeric();
1437         PartLevelCallback callback(sink);
1438         if(settings->dontCheckFCD()) {
1439             UIterCollationIterator ci(data, numeric, *iter);
1440             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1441                                                       sink, level, callback, FALSE, errorCode);
1442         } else {
1443             FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1444             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1445                                                       sink, level, callback, FALSE, errorCode);
1446         }
1447         if(U_FAILURE(errorCode)) { return 0; }
1448         if(sink.NumberOfBytesAppended() > count) {
1449             state[0] = (uint32_t)callback.getLevel();
1450             state[1] = (uint32_t)callback.getLevelCapacity();
1451             return count;
1452         }
1453         // All of the normal levels are done.
1454         if(settings->getStrength() == UCOL_IDENTICAL) {
1455             level = Collation::IDENTICAL_LEVEL;
1456             iter->move(iter, 0, UITER_START);
1457         }
1458         // else fall through to setting ZERO_LEVEL
1459     }
1460 
1461     if(level == Collation::IDENTICAL_LEVEL) {
1462         int32_t levelCapacity = sink.GetRemainingCapacity();
1463         UnicodeString s;
1464         for(;;) {
1465             UChar32 c = iter->next(iter);
1466             if(c < 0) { break; }
1467             s.append((UChar)c);
1468         }
1469         const UChar *sArray = s.getBuffer();
1470         writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1471         if(U_FAILURE(errorCode)) { return 0; }
1472         if(sink.NumberOfBytesAppended() > count) {
1473             state[0] = (uint32_t)level;
1474             state[1] = (uint32_t)levelCapacity;
1475             return count;
1476         }
1477     }
1478 
1479     // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1480     state[0] = (uint32_t)Collation::ZERO_LEVEL;
1481     state[1] = 0;
1482     int32_t length = sink.NumberOfBytesAppended();
1483     int32_t i = length;
1484     while(i < count) { dest[i++] = 0; }
1485     return length;
1486 }
1487 
1488 void
internalGetCEs(const UnicodeString & str,UVector64 & ces,UErrorCode & errorCode) const1489 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1490                                   UErrorCode &errorCode) const {
1491     if(U_FAILURE(errorCode)) { return; }
1492     const UChar *s = str.getBuffer();
1493     const UChar *limit = s + str.length();
1494     UBool numeric = settings->isNumeric();
1495     if(settings->dontCheckFCD()) {
1496         UTF16CollationIterator iter(data, numeric, s, s, limit);
1497         int64_t ce;
1498         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1499             ces.addElement(ce, errorCode);
1500         }
1501     } else {
1502         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1503         int64_t ce;
1504         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1505             ces.addElement(ce, errorCode);
1506         }
1507     }
1508 }
1509 
1510 namespace {
1511 
appendSubtag(CharString & s,char letter,const char * subtag,int32_t length,UErrorCode & errorCode)1512 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1513                   UErrorCode &errorCode) {
1514     if(U_FAILURE(errorCode) || length == 0) { return; }
1515     if(!s.isEmpty()) {
1516         s.append('_', errorCode);
1517     }
1518     s.append(letter, errorCode);
1519     for(int32_t i = 0; i < length; ++i) {
1520         s.append(uprv_toupper(subtag[i]), errorCode);
1521     }
1522 }
1523 
appendAttribute(CharString & s,char letter,UColAttributeValue value,UErrorCode & errorCode)1524 void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1525                      UErrorCode &errorCode) {
1526     if(U_FAILURE(errorCode)) { return; }
1527     if(!s.isEmpty()) {
1528         s.append('_', errorCode);
1529     }
1530     static const char *valueChars = "1234...........IXO..SN..LU......";
1531     s.append(letter, errorCode);
1532     s.append(valueChars[value], errorCode);
1533 }
1534 
1535 }  // namespace
1536 
1537 int32_t
internalGetShortDefinitionString(const char * locale,char * buffer,int32_t capacity,UErrorCode & errorCode) const1538 RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1539                                                     char *buffer, int32_t capacity,
1540                                                     UErrorCode &errorCode) const {
1541     if(U_FAILURE(errorCode)) { return 0; }
1542     if(buffer == NULL ? capacity != 0 : capacity < 0) {
1543         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1544         return 0;
1545     }
1546     if(locale == NULL) {
1547         locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1548     }
1549 
1550     char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
1551     int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
1552                                                   "collation", locale,
1553                                                   NULL, &errorCode);
1554     if(U_FAILURE(errorCode)) { return 0; }
1555     if(length == 0) {
1556         uprv_strcpy(resultLocale, "root");
1557     } else {
1558         resultLocale[length] = 0;
1559     }
1560 
1561     // Append items in alphabetic order of their short definition letters.
1562     CharString result;
1563     char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1564 
1565     if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1566         appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1567     }
1568     // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1569     // See ICU tickets #10372 and #10386.
1570     if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1571         appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1572     }
1573     if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1574         appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1575     }
1576     if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1577         appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1578     }
1579     if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1580         appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1581     }
1582     // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1583     length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);
1584     appendSubtag(result, 'K', subtag, length, errorCode);
1585     length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1586     appendSubtag(result, 'L', subtag, length, errorCode);
1587     if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1588         appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1589     }
1590     length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1591     appendSubtag(result, 'R', subtag, length, errorCode);
1592     if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1593         appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1594     }
1595     length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1596     appendSubtag(result, 'V', subtag, length, errorCode);
1597     length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1598     appendSubtag(result, 'Z', subtag, length, errorCode);
1599 
1600     if(U_FAILURE(errorCode)) { return 0; }
1601     if(result.length() <= capacity) {
1602         uprv_memcpy(buffer, result.data(), result.length());
1603     }
1604     return u_terminateChars(buffer, capacity, result.length(), &errorCode);
1605 }
1606 
1607 UBool
isUnsafe(UChar32 c) const1608 RuleBasedCollator::isUnsafe(UChar32 c) const {
1609     return data->isUnsafeBackward(c, settings->isNumeric());
1610 }
1611 
1612 void
computeMaxExpansions(const CollationTailoring * t,UErrorCode & errorCode)1613 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1614     t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1615 }
1616 
1617 UBool
initMaxExpansions(UErrorCode & errorCode) const1618 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1619     umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1620     return U_SUCCESS(errorCode);
1621 }
1622 
1623 CollationElementIterator *
createCollationElementIterator(const UnicodeString & source) const1624 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1625     UErrorCode errorCode = U_ZERO_ERROR;
1626     if(!initMaxExpansions(errorCode)) { return NULL; }
1627     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1628     if(U_FAILURE(errorCode)) {
1629         delete cei;
1630         return NULL;
1631     }
1632     return cei;
1633 }
1634 
1635 CollationElementIterator *
createCollationElementIterator(const CharacterIterator & source) const1636 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1637     UErrorCode errorCode = U_ZERO_ERROR;
1638     if(!initMaxExpansions(errorCode)) { return NULL; }
1639     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1640     if(U_FAILURE(errorCode)) {
1641         delete cei;
1642         return NULL;
1643     }
1644     return cei;
1645 }
1646 
1647 int32_t
getMaxExpansion(int32_t order) const1648 RuleBasedCollator::getMaxExpansion(int32_t order) const {
1649     UErrorCode errorCode = U_ZERO_ERROR;
1650     (void)initMaxExpansions(errorCode);
1651     return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1652 }
1653 
1654 U_NAMESPACE_END
1655 
1656 #endif  // !UCONFIG_NO_COLLATION
1657