1 /**
2  *******************************************************************************
3  * Copyright (C) 1996-2015, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  */
7 package com.ibm.icu.text;
8 
9 import java.lang.reflect.InvocationTargetException;
10 import java.lang.reflect.Method;
11 import java.text.CharacterIterator;
12 import java.text.ParseException;
13 import java.util.Arrays;
14 import java.util.concurrent.locks.Lock;
15 import java.util.concurrent.locks.ReentrantLock;
16 
17 import com.ibm.icu.impl.ClassLoaderUtil;
18 import com.ibm.icu.impl.Normalizer2Impl;
19 import com.ibm.icu.impl.Normalizer2Impl.ReorderingBuffer;
20 import com.ibm.icu.impl.Utility;
21 import com.ibm.icu.impl.coll.BOCSU;
22 import com.ibm.icu.impl.coll.Collation;
23 import com.ibm.icu.impl.coll.CollationCompare;
24 import com.ibm.icu.impl.coll.CollationData;
25 import com.ibm.icu.impl.coll.CollationFastLatin;
26 import com.ibm.icu.impl.coll.CollationIterator;
27 import com.ibm.icu.impl.coll.CollationKeys;
28 import com.ibm.icu.impl.coll.CollationKeys.SortKeyByteSink;
29 import com.ibm.icu.impl.coll.CollationLoader;
30 import com.ibm.icu.impl.coll.CollationRoot;
31 import com.ibm.icu.impl.coll.CollationSettings;
32 import com.ibm.icu.impl.coll.CollationTailoring;
33 import com.ibm.icu.impl.coll.ContractionsAndExpansions;
34 import com.ibm.icu.impl.coll.FCDUTF16CollationIterator;
35 import com.ibm.icu.impl.coll.SharedObject;
36 import com.ibm.icu.impl.coll.TailoredSet;
37 import com.ibm.icu.impl.coll.UTF16CollationIterator;
38 import com.ibm.icu.lang.UScript;
39 import com.ibm.icu.util.ULocale;
40 import com.ibm.icu.util.VersionInfo;
41 
42 /**
43  * <p>
44  * RuleBasedCollator is a concrete subclass of Collator. It allows customization of the Collator via user-specified rule
45  * sets. RuleBasedCollator is designed to be fully compliant to the <a
46  * href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation Algorithm (UCA)</a> and conforms to ISO 14651.
47  * </p>
48  *
49  * <p>A Collator is thread-safe only when frozen. See {{@link #isFrozen()} and {@link com.ibm.icu.util.Freezable}.
50  *
51  * <p>
52  * Users are strongly encouraged to read the <a href="http://userguide.icu-project.org/collation">User
53  * Guide</a> for more information about the collation service before using this class.
54  * </p>
55  *
56  * <p>
57  * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class
58  * Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the
59  * argument locale. If a customized collation ordering or attributes is required, use the RuleBasedCollator(String)
60  * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on the CLDR root collation, while
61  * re-adjusting the attributes and orders of the characters in the specified rule accordingly.
62  * </p>
63  *
64  * <p>
65  * RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale
66  * is not available, the orders eventually falls back to the
67  * <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
68  * </p>
69  *
70  * <p>
71  * For information about the collation rule syntax and details about customization, please refer to the <a
72  * href="http://userguide.icu-project.org/collation/customization">Collation customization</a> section of the
73  * User Guide.
74  * </p>
75  *
76  * <p>
77  * <strong>Note</strong> that there are some differences between the Collation rule syntax used in Java and ICU4J:
78  *
79  * <ul>
80  * <li>According to the JDK documentation: <i>
81  * <p>
82  * Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule is in force when a Thai vowel of the range
83  * &#92;U0E40-&#92;U0E44 precedes a Thai consonant of the range &#92;U0E01-&#92;U0E2E OR a Lao vowel of the range
84  * &#92;U0EC0-&#92;U0EC4 precedes a Lao consonant of the range &#92;U0E81-&#92;U0EAE then the vowel is placed after the
85  * consonant for collation purposes.
86  * </p>
87  * <p>
88  * If a rule is without the modifier '!', the Thai/Lao vowel-consonant swapping is not turned on.
89  * </p>
90  * </i>
91  * <p>
92  * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao vowel-consonant swapping, since the UCA clearly
93  * states that it has to be supported to ensure a correct sorting order. If a '!' is encountered, it is ignored.
94  * </p>
95  * <li>As mentioned in the documentation of the base class Collator, compatibility decomposition mode is not supported.
96  * </ul>
97  * <p>
98  * <strong>Examples</strong>
99  * </p>
100  * <p>
101  * Creating Customized RuleBasedCollators: <blockquote>
102  *
103  * <pre>
104  * String simple = "&amp; a &lt; b &lt; c &lt; d";
105  * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple);
106  *
107  * String norwegian = "&amp; a , A &lt; b , B &lt; c , C &lt; d , D &lt; e , E "
108  *                    + "&lt; f , F &lt; g , G &lt; h , H &lt; i , I &lt; j , "
109  *                    + "J &lt; k , K &lt; l , L &lt; m , M &lt; n , N &lt; "
110  *                    + "o , O &lt; p , P &lt; q , Q &lt r , R &lt s , S &lt; "
111  *                    + "t , T &lt; u , U &lt; v , V &lt; w , W &lt; x , X "
112  *                    + "&lt; y , Y &lt; z , Z &lt; &#92;u00E5 = a&#92;u030A "
113  *                    + ", &#92;u00C5 = A&#92;u030A ; aa , AA &lt; &#92;u00E6 "
114  *                    + ", &#92;u00C6 &lt; &#92;u00F8 , &#92;u00D8";
115  * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian);
116  * </pre>
117  *
118  * </blockquote>
119  *
120  * Concatenating rules to combine <code>Collator</code>s: <blockquote>
121  *
122  * <pre>
123  * // Create an en_US Collator object
124  * RuleBasedCollator en_USCollator = (RuleBasedCollator)
125  *     Collator.getInstance(new Locale("en", "US", ""));
126  * // Create a da_DK Collator object
127  * RuleBasedCollator da_DKCollator = (RuleBasedCollator)
128  *     Collator.getInstance(new Locale("da", "DK", ""));
129  * // Combine the two
130  * // First, get the collation rules from en_USCollator
131  * String en_USRules = en_USCollator.getRules();
132  * // Second, get the collation rules from da_DKCollator
133  * String da_DKRules = da_DKCollator.getRules();
134  * RuleBasedCollator newCollator =
135  *                             new RuleBasedCollator(en_USRules + da_DKRules);
136  * // newCollator has the combined rules
137  * </pre>
138  *
139  * </blockquote>
140  *
141  * Making changes to an existing RuleBasedCollator to create a new <code>Collator</code> object, by appending changes to
142  * the existing rule: <blockquote>
143  *
144  * <pre>
145  * // Create a new Collator object with additional rules
146  * String addRules = "&amp; C &lt; ch, cH, Ch, CH";
147  * RuleBasedCollator myCollator =
148  *     new RuleBasedCollator(en_USCollator.getRules() + addRules);
149  * // myCollator contains the new rules
150  * </pre>
151  *
152  * </blockquote>
153  *
154  * How to change the order of non-spacing accents: <blockquote>
155  *
156  * <pre>
157  * // old rule with main accents
158  * String oldRules = "= &#92;u0301 ; &#92;u0300 ; &#92;u0302 ; &#92;u0308 "
159  *                 + "; &#92;u0327 ; &#92;u0303 ; &#92;u0304 ; &#92;u0305 "
160  *                 + "; &#92;u0306 ; &#92;u0307 ; &#92;u0309 ; &#92;u030A "
161  *                 + "; &#92;u030B ; &#92;u030C ; &#92;u030D ; &#92;u030E "
162  *                 + "; &#92;u030F ; &#92;u0310 ; &#92;u0311 ; &#92;u0312 "
163  *                 + "&lt; a , A ; ae, AE ; &#92;u00e6 , &#92;u00c6 "
164  *                 + "&lt; b , B &lt; c, C &lt; e, E &amp; C &lt; d , D";
165  * // change the order of accent characters
166  * String addOn = "&amp; &#92;u0300 ; &#92;u0308 ; &#92;u0302";
167  * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
168  * </pre>
169  *
170  * </blockquote>
171  *
172  * Putting in a new primary ordering before the default setting, e.g. sort English characters before or after Japanese
173  * characters in the Japanese <code>Collator</code>: <blockquote>
174  *
175  * <pre>
176  * // get en_US Collator rules
177  * RuleBasedCollator en_USCollator
178  *                        = (RuleBasedCollator)Collator.getInstance(Locale.US);
179  * // add a few Japanese characters to sort before English characters
180  * // suppose the last character before the first base letter 'a' in
181  * // the English collation rule is &#92;u2212
182  * String jaString = "& &#92;u2212 &lt &#92;u3041, &#92;u3042 &lt &#92;u3043, "
183  *                   + "&#92;u3044";
184  * RuleBasedCollator myJapaneseCollator
185  *              = new RuleBasedCollator(en_USCollator.getRules() + jaString);
186  * </pre>
187  *
188  * </blockquote>
189  * </p>
190  * <p>
191  * This class is not subclassable
192  * </p>
193  *
194  * @author Syn Wee Quek
195  * @stable ICU 2.8
196  */
197 public final class RuleBasedCollator extends Collator {
198     // public constructors ---------------------------------------------------
199 
200     /**
201      * <p>
202      * Constructor that takes the argument rules for customization.
203      * The collator will be based on the CLDR root collation, with the
204      * attributes and re-ordering of the characters specified in the argument rules.
205      * </p>
206      * <p>
207      * See the User Guide's section on <a href="http://userguide.icu-project.org/collation/customization">
208      * Collation Customization</a> for details on the rule syntax.
209      * </p>
210      *
211      * @param rules
212      *            the collation rules to build the collation table from.
213      * @exception ParseException
214      *                and IOException thrown. ParseException thrown when argument rules have an invalid syntax.
215      *                IOException thrown when an error occurred while reading internal data.
216      * @stable ICU 2.8
217      */
RuleBasedCollator(String rules)218     public RuleBasedCollator(String rules) throws Exception {
219         if (rules == null) {
220             throw new IllegalArgumentException("Collation rules can not be null");
221         }
222         validLocale = ULocale.ROOT;
223         internalBuildTailoring(rules);
224     }
225 
226     /**
227      * Implements from-rule constructors.
228      * @param rules rule string
229      * @throws Exception
230      */
internalBuildTailoring(String rules)231     private final void internalBuildTailoring(String rules) throws Exception {
232         CollationTailoring base = CollationRoot.getRoot();
233         // Most code using Collator does not need to build a Collator from rules.
234         // By using reflection, most code will not have a static dependency on the builder code.
235         // CollationBuilder builder = new CollationBuilder(base);
236         ClassLoader classLoader = ClassLoaderUtil.getClassLoader(getClass());
237         CollationTailoring t;
238         try {
239             Class<?> builderClass = classLoader.loadClass("com.ibm.icu.impl.coll.CollationBuilder");
240             Object builder = builderClass.getConstructor(CollationTailoring.class).newInstance(base);
241             // builder.parseAndBuild(rules);
242             Method parseAndBuild = builderClass.getMethod("parseAndBuild", String.class);
243             t = (CollationTailoring)parseAndBuild.invoke(builder, rules);
244         } catch(InvocationTargetException e) {
245             throw (Exception)e.getTargetException();
246         }
247         t.actualLocale = null;
248         adoptTailoring(t);
249     }
250 
251     // public methods --------------------------------------------------------
252 
253     /**
254      * Clones the RuleBasedCollator
255      *
256      * @return a new instance of this RuleBasedCollator object
257      * @stable ICU 2.8
258      */
259     @Override
clone()260     public Object clone() throws CloneNotSupportedException {
261         if (isFrozen()) {
262             return this;
263         }
264         return cloneAsThawed();
265     }
266 
initMaxExpansions()267     private final void initMaxExpansions() {
268         synchronized(tailoring) {
269             if (tailoring.maxExpansions == null) {
270                 tailoring.maxExpansions = CollationElementIterator.computeMaxExpansions(tailoring.data);
271             }
272         }
273     }
274 
275     /**
276      * Return a CollationElementIterator for the given String.
277      *
278      * @see CollationElementIterator
279      * @stable ICU 2.8
280      */
getCollationElementIterator(String source)281     public CollationElementIterator getCollationElementIterator(String source) {
282         initMaxExpansions();
283         return new CollationElementIterator(source, this);
284     }
285 
286     /**
287      * Return a CollationElementIterator for the given CharacterIterator. The source iterator's integrity will be
288      * preserved since a new copy will be created for use.
289      *
290      * @see CollationElementIterator
291      * @stable ICU 2.8
292      */
getCollationElementIterator(CharacterIterator source)293     public CollationElementIterator getCollationElementIterator(CharacterIterator source) {
294         initMaxExpansions();
295         CharacterIterator newsource = (CharacterIterator) source.clone();
296         return new CollationElementIterator(newsource, this);
297     }
298 
299     /**
300      * Return a CollationElementIterator for the given UCharacterIterator. The source iterator's integrity will be
301      * preserved since a new copy will be created for use.
302      *
303      * @see CollationElementIterator
304      * @stable ICU 2.8
305      */
getCollationElementIterator(UCharacterIterator source)306     public CollationElementIterator getCollationElementIterator(UCharacterIterator source) {
307         initMaxExpansions();
308         return new CollationElementIterator(source, this);
309     }
310 
311     // Freezable interface implementation -------------------------------------------------
312 
313     /**
314      * Determines whether the object has been frozen or not.
315      *
316      * <p>An unfrozen Collator is mutable and not thread-safe.
317      * A frozen Collator is immutable and thread-safe.
318      *
319      * @stable ICU 4.8
320      */
321     @Override
isFrozen()322     public boolean isFrozen() {
323         return frozenLock != null;
324     }
325 
326     /**
327      * Freezes the collator.
328      * @return the collator itself.
329      * @stable ICU 4.8
330      */
331     @Override
freeze()332     public Collator freeze() {
333         if (!isFrozen()) {
334             frozenLock = new ReentrantLock();
335             if (collationBuffer == null) {
336                 collationBuffer = new CollationBuffer(data);
337             }
338         }
339         return this;
340     }
341 
342     /**
343      * Provides for the clone operation. Any clone is initially unfrozen.
344      * @stable ICU 4.8
345      */
346     @Override
cloneAsThawed()347     public RuleBasedCollator cloneAsThawed() {
348         try {
349             RuleBasedCollator result = (RuleBasedCollator) super.clone();
350             // since all collation data in the RuleBasedCollator do not change
351             // we can safely assign the result.fields to this collator
352             // except in cases where we can't
353             result.settings = settings.clone();
354             result.collationBuffer = null;
355             result.frozenLock = null;
356             return result;
357         } catch (CloneNotSupportedException e) {
358             // Clone is implemented
359             return null;
360         }
361     }
362 
363     // public setters --------------------------------------------------------
364 
checkNotFrozen()365     private void checkNotFrozen() {
366         if (isFrozen()) {
367             throw new UnsupportedOperationException("Attempt to modify frozen RuleBasedCollator");
368         }
369     }
370 
getOwnedSettings()371     private final CollationSettings getOwnedSettings() {
372         return settings.copyOnWrite();
373     }
374 
getDefaultSettings()375     private final CollationSettings getDefaultSettings() {
376         return tailoring.settings.readOnly();
377     }
378 
379     /**
380      * Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator
381      * positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a
382      * correct JIS collation order, distinguishing between Katakana and Hiragana characters.
383      *
384      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
385      * Since ICU 50, this attribute is not settable any more via API functions.
386      * Since CLDR 25/ICU 53, explicit quaternary relations are used
387      * to achieve the same Japanese sort order.
388      *
389      * @param flag
390      *            true if Hiragana Quaternary mode is to be on, false otherwise
391      * @see #setHiraganaQuaternaryDefault
392      * @see #isHiraganaQuaternary
393      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
394      */
395     @Deprecated
setHiraganaQuaternary(boolean flag)396     public void setHiraganaQuaternary(boolean flag) {
397         checkNotFrozen();
398     }
399 
400     /**
401      * Sets the Hiragana Quaternary mode to the initial mode set during construction of the RuleBasedCollator. See
402      * setHiraganaQuaternary(boolean) for more details.
403      *
404      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
405      * Since ICU 50, this attribute is not settable any more via API functions.
406      * Since CLDR 25/ICU 53, explicit quaternary relations are used
407      * to achieve the same Japanese sort order.
408      *
409      * @see #setHiraganaQuaternary(boolean)
410      * @see #isHiraganaQuaternary
411      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
412      */
413     @Deprecated
setHiraganaQuaternaryDefault()414     public void setHiraganaQuaternaryDefault() {
415         checkNotFrozen();
416     }
417 
418     /**
419      * Sets whether uppercase characters sort before lowercase characters or vice versa, in strength TERTIARY. The
420      * default mode is false, and so lowercase characters sort before uppercase characters. If true, sort upper case
421      * characters first.
422      *
423      * @param upperfirst
424      *            true to sort uppercase characters before lowercase characters, false to sort lowercase characters
425      *            before uppercase characters
426      * @see #isLowerCaseFirst
427      * @see #isUpperCaseFirst
428      * @see #setLowerCaseFirst
429      * @see #setCaseFirstDefault
430      * @stable ICU 2.8
431      */
setUpperCaseFirst(boolean upperfirst)432     public void setUpperCaseFirst(boolean upperfirst) {
433         checkNotFrozen();
434         if (upperfirst == isUpperCaseFirst()) { return; }
435         CollationSettings ownedSettings = getOwnedSettings();
436         ownedSettings.setCaseFirst(upperfirst ? CollationSettings.CASE_FIRST_AND_UPPER_MASK : 0);
437         setFastLatinOptions(ownedSettings);
438     }
439 
440     /**
441      * Sets the orders of lower cased characters to sort before upper cased characters, in strength TERTIARY. The
442      * default mode is false. If true is set, the RuleBasedCollator will sort lower cased characters before the upper
443      * cased ones. Otherwise, if false is set, the RuleBasedCollator will ignore case preferences.
444      *
445      * @param lowerfirst
446      *            true for sorting lower cased characters before upper cased characters, false to ignore case
447      *            preferences.
448      * @see #isLowerCaseFirst
449      * @see #isUpperCaseFirst
450      * @see #setUpperCaseFirst
451      * @see #setCaseFirstDefault
452      * @stable ICU 2.8
453      */
setLowerCaseFirst(boolean lowerfirst)454     public void setLowerCaseFirst(boolean lowerfirst) {
455         checkNotFrozen();
456         if (lowerfirst == isLowerCaseFirst()) { return; }
457         CollationSettings ownedSettings = getOwnedSettings();
458         ownedSettings.setCaseFirst(lowerfirst ? CollationSettings.CASE_FIRST : 0);
459         setFastLatinOptions(ownedSettings);
460     }
461 
462     /**
463      * Sets the case first mode to the initial mode set during construction of the RuleBasedCollator. See
464      * setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more details.
465      *
466      * @see #isLowerCaseFirst
467      * @see #isUpperCaseFirst
468      * @see #setLowerCaseFirst(boolean)
469      * @see #setUpperCaseFirst(boolean)
470      * @stable ICU 2.8
471      */
setCaseFirstDefault()472     public final void setCaseFirstDefault() {
473         checkNotFrozen();
474         CollationSettings defaultSettings = getDefaultSettings();
475         if(settings.readOnly() == defaultSettings) { return; }
476         CollationSettings ownedSettings = getOwnedSettings();
477         ownedSettings.setCaseFirstDefault(defaultSettings.options);
478         setFastLatinOptions(ownedSettings);
479     }
480 
481     /**
482      * Sets the alternate handling mode to the initial mode set during construction of the RuleBasedCollator. See
483      * setAlternateHandling(boolean) for more details.
484      *
485      * @see #setAlternateHandlingShifted(boolean)
486      * @see #isAlternateHandlingShifted()
487      * @stable ICU 2.8
488      */
setAlternateHandlingDefault()489     public void setAlternateHandlingDefault() {
490         checkNotFrozen();
491         CollationSettings defaultSettings = getDefaultSettings();
492         if(settings.readOnly() == defaultSettings) { return; }
493         CollationSettings ownedSettings = getOwnedSettings();
494         ownedSettings.setAlternateHandlingDefault(defaultSettings.options);
495         setFastLatinOptions(ownedSettings);
496     }
497 
498     /**
499      * Sets the case level mode to the initial mode set during construction of the RuleBasedCollator. See
500      * setCaseLevel(boolean) for more details.
501      *
502      * @see #setCaseLevel(boolean)
503      * @see #isCaseLevel
504      * @stable ICU 2.8
505      */
setCaseLevelDefault()506     public void setCaseLevelDefault() {
507         checkNotFrozen();
508         CollationSettings defaultSettings = getDefaultSettings();
509         if(settings.readOnly() == defaultSettings) { return; }
510         CollationSettings ownedSettings = getOwnedSettings();
511         ownedSettings.setFlagDefault(CollationSettings.CASE_LEVEL, defaultSettings.options);
512         setFastLatinOptions(ownedSettings);
513     }
514 
515     /**
516      * Sets the decomposition mode to the initial mode set during construction of the RuleBasedCollator. See
517      * setDecomposition(int) for more details.
518      *
519      * @see #getDecomposition
520      * @see #setDecomposition(int)
521      * @stable ICU 2.8
522      */
setDecompositionDefault()523     public void setDecompositionDefault() {
524         checkNotFrozen();
525         CollationSettings defaultSettings = getDefaultSettings();
526         if(settings.readOnly() == defaultSettings) { return; }
527         CollationSettings ownedSettings = getOwnedSettings();
528         ownedSettings.setFlagDefault(CollationSettings.CHECK_FCD, defaultSettings.options);
529         setFastLatinOptions(ownedSettings);
530     }
531 
532     /**
533      * Sets the French collation mode to the initial mode set during construction of the RuleBasedCollator. See
534      * setFrenchCollation(boolean) for more details.
535      *
536      * @see #isFrenchCollation
537      * @see #setFrenchCollation(boolean)
538      * @stable ICU 2.8
539      */
setFrenchCollationDefault()540     public void setFrenchCollationDefault() {
541         checkNotFrozen();
542         CollationSettings defaultSettings = getDefaultSettings();
543         if(settings.readOnly() == defaultSettings) { return; }
544         CollationSettings ownedSettings = getOwnedSettings();
545         ownedSettings.setFlagDefault(CollationSettings.BACKWARD_SECONDARY, defaultSettings.options);
546         setFastLatinOptions(ownedSettings);
547     }
548 
549     /**
550      * Sets the collation strength to the initial mode set during the construction of the RuleBasedCollator. See
551      * setStrength(int) for more details.
552      *
553      * @see #setStrength(int)
554      * @see #getStrength
555      * @stable ICU 2.8
556      */
setStrengthDefault()557     public void setStrengthDefault() {
558         checkNotFrozen();
559         CollationSettings defaultSettings = getDefaultSettings();
560         if(settings.readOnly() == defaultSettings) { return; }
561         CollationSettings ownedSettings = getOwnedSettings();
562         ownedSettings.setStrengthDefault(defaultSettings.options);
563         setFastLatinOptions(ownedSettings);
564     }
565 
566     /**
567      * Method to set numeric collation to its default value.
568      *
569      * @see #getNumericCollation
570      * @see #setNumericCollation
571      * @stable ICU 2.8
572      */
setNumericCollationDefault()573     public void setNumericCollationDefault() {
574         checkNotFrozen();
575         CollationSettings defaultSettings = getDefaultSettings();
576         if(settings.readOnly() == defaultSettings) { return; }
577         CollationSettings ownedSettings = getOwnedSettings();
578         ownedSettings.setFlagDefault(CollationSettings.NUMERIC, defaultSettings.options);
579         setFastLatinOptions(ownedSettings);
580     }
581 
582     /**
583      * Sets the mode for the direction of SECONDARY weights to be used in French collation. The default value is false,
584      * which treats SECONDARY weights in the order they appear. If set to true, the SECONDARY weights will be sorted
585      * backwards. See the section on <a href="http://userguide.icu-project.org/collation/architecture">
586      * French collation</a> for more information.
587      *
588      * @param flag
589      *            true to set the French collation on, false to set it off
590      * @stable ICU 2.8
591      * @see #isFrenchCollation
592      * @see #setFrenchCollationDefault
593      */
setFrenchCollation(boolean flag)594     public void setFrenchCollation(boolean flag) {
595         checkNotFrozen();
596         if(flag == isFrenchCollation()) { return; }
597         CollationSettings ownedSettings = getOwnedSettings();
598         ownedSettings.setFlag(CollationSettings.BACKWARD_SECONDARY, flag);
599         setFastLatinOptions(ownedSettings);
600     }
601 
602     /**
603      * Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the UCA definition
604      * on <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">Variable Weighting</a>. This
605      * attribute will only be effective when QUATERNARY strength is set. The default value for this mode is false,
606      * corresponding to the NON_IGNORABLE mode in UCA. In the NON_IGNORABLE mode, the RuleBasedCollator treats all
607      * the code points with non-ignorable primary weights in the same way. If the mode is set to true, the behavior
608      * corresponds to SHIFTED defined in UCA, this causes code points with PRIMARY orders that are equal or below the
609      * variable top value to be ignored in PRIMARY order and moved to the QUATERNARY order.
610      *
611      * @param shifted
612      *            true if SHIFTED behavior for alternate handling is desired, false for the NON_IGNORABLE behavior.
613      * @see #isAlternateHandlingShifted
614      * @see #setAlternateHandlingDefault
615      * @stable ICU 2.8
616      */
setAlternateHandlingShifted(boolean shifted)617     public void setAlternateHandlingShifted(boolean shifted) {
618         checkNotFrozen();
619         if(shifted == isAlternateHandlingShifted()) { return; }
620         CollationSettings ownedSettings = getOwnedSettings();
621         ownedSettings.setAlternateHandlingShifted(shifted);
622         setFastLatinOptions(ownedSettings);
623     }
624 
625     /**
626      * <p>
627      * When case level is set to true, an additional weight is formed between the SECONDARY and TERTIARY weight, known
628      * as the case level. The case level is used to distinguish large and small Japanese Kana characters. Case level
629      * could also be used in other situations. For example to distinguish certain Pinyin characters. The default value
630      * is false, which means the case level is not generated. The contents of the case level are affected by the case
631      * first mode. A simple way to ignore accent differences in a string is to set the strength to PRIMARY and enable
632      * case level.
633      * </p>
634      * <p>
635      * See the section on <a href="http://userguide.icu-project.org/collation/architecture">case
636      * level</a> for more information.
637      * </p>
638      *
639      * @param flag
640      *            true if case level sorting is required, false otherwise
641      * @stable ICU 2.8
642      * @see #setCaseLevelDefault
643      * @see #isCaseLevel
644      */
setCaseLevel(boolean flag)645     public void setCaseLevel(boolean flag) {
646         checkNotFrozen();
647         if(flag == isCaseLevel()) { return; }
648         CollationSettings ownedSettings = getOwnedSettings();
649         ownedSettings.setFlag(CollationSettings.CASE_LEVEL, flag);
650         setFastLatinOptions(ownedSettings);
651     }
652 
653     /**
654      * Sets the decomposition mode of this Collator.  Setting this
655      * decomposition attribute with CANONICAL_DECOMPOSITION allows the
656      * Collator to handle un-normalized text properly, producing the
657      * same results as if the text were normalized. If
658      * NO_DECOMPOSITION is set, it is the user's responsibility to
659      * insure that all text is already in the appropriate form before
660      * a comparison or before getting a CollationKey. Adjusting
661      * decomposition mode allows the user to select between faster and
662      * more complete collation behavior.</p>
663      *
664      * <p>Since a great many of the world's languages do not require
665      * text normalization, most locales set NO_DECOMPOSITION as the
666      * default decomposition mode.</p>
667      *
668      * The default decompositon mode for the Collator is
669      * NO_DECOMPOSITON, unless specified otherwise by the locale used
670      * to create the Collator.</p>
671      *
672      * <p>See getDecomposition for a description of decomposition
673      * mode.</p>
674      *
675      * @param decomposition the new decomposition mode
676      * @see #getDecomposition
677      * @see #NO_DECOMPOSITION
678      * @see #CANONICAL_DECOMPOSITION
679      * @throws IllegalArgumentException If the given value is not a valid
680      *            decomposition mode.
681      * @stable ICU 2.8
682      */
683     @Override
setDecomposition(int decomposition)684     public void setDecomposition(int decomposition)
685     {
686         checkNotFrozen();
687         boolean flag;
688         switch(decomposition) {
689         case NO_DECOMPOSITION:
690             flag = false;
691             break;
692         case CANONICAL_DECOMPOSITION:
693             flag = true;
694             break;
695         default:
696             throw new IllegalArgumentException("Wrong decomposition mode.");
697         }
698         if(flag == settings.readOnly().getFlag(CollationSettings.CHECK_FCD)) { return; }
699         CollationSettings ownedSettings = getOwnedSettings();
700         ownedSettings.setFlag(CollationSettings.CHECK_FCD, flag);
701         setFastLatinOptions(ownedSettings);
702     }
703 
704     /**
705      * Sets this Collator's strength attribute. The strength attribute determines the minimum level of difference
706      * considered significant during comparison.
707      *
708      * <p>See the Collator class description for an example of use.
709      *
710      * @param newStrength
711      *            the new strength value.
712      * @see #getStrength
713      * @see #setStrengthDefault
714      * @see #PRIMARY
715      * @see #SECONDARY
716      * @see #TERTIARY
717      * @see #QUATERNARY
718      * @see #IDENTICAL
719      * @exception IllegalArgumentException
720      *                If the new strength value is not one of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
721      * @stable ICU 2.8
722      */
723     @Override
setStrength(int newStrength)724     public void setStrength(int newStrength) {
725         checkNotFrozen();
726         if(newStrength == getStrength()) { return; }
727         CollationSettings ownedSettings = getOwnedSettings();
728         ownedSettings.setStrength(newStrength);
729         setFastLatinOptions(ownedSettings);
730     }
731 
732     /**
733      * {@icu} Sets the variable top to the top of the specified reordering group.
734      * The variable top determines the highest-sorting character
735      * which is affected by the alternate handling behavior.
736      * If that attribute is set to NON_IGNORABLE, then the variable top has no effect.
737      * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
738      *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
739      *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
740      * @return this
741      * @see #getMaxVariable
742      * @stable ICU 53
743      */
744     @Override
setMaxVariable(int group)745     public RuleBasedCollator setMaxVariable(int group) {
746         // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
747         int value;
748         if(group == Collator.ReorderCodes.DEFAULT) {
749             value = -1;  // UCOL_DEFAULT
750         } else if(Collator.ReorderCodes.FIRST <= group && group <= Collator.ReorderCodes.CURRENCY) {
751             value = group - Collator.ReorderCodes.FIRST;
752         } else {
753             throw new IllegalArgumentException("illegal max variable group " + group);
754         }
755         int oldValue = settings.readOnly().getMaxVariable();
756         if(value == oldValue) {
757             return this;
758         }
759         CollationSettings defaultSettings = getDefaultSettings();
760         if(settings.readOnly() == defaultSettings) {
761             if(value < 0) {  // UCOL_DEFAULT
762                 return this;
763             }
764         }
765         CollationSettings ownedSettings = getOwnedSettings();
766 
767         if(group == Collator.ReorderCodes.DEFAULT) {
768             group = Collator.ReorderCodes.FIRST + defaultSettings.getMaxVariable();
769         }
770         long varTop = data.getLastPrimaryForGroup(group);
771         assert(varTop != 0);
772         ownedSettings.setMaxVariable(value, defaultSettings.options);
773         ownedSettings.variableTop = varTop;
774         setFastLatinOptions(ownedSettings);
775         return this;
776     }
777 
778     /**
779      * {@icu} Returns the maximum reordering group whose characters are affected by
780      * the alternate handling behavior.
781      * @return the maximum variable reordering group.
782      * @see #setMaxVariable
783      * @stable ICU 53
784      */
785     @Override
getMaxVariable()786     public int getMaxVariable() {
787         return Collator.ReorderCodes.FIRST + settings.readOnly().getMaxVariable();
788     }
789 
790     /**
791      * {@icu} Sets the variable top to the primary weight of the specified string.
792      *
793      * <p>Beginning with ICU 53, the variable top is pinned to
794      * the top of one of the supported reordering groups,
795      * and it must not be beyond the last of those groups.
796      * See {@link #setMaxVariable(int)}.
797      *
798      * @param varTop
799      *            one or more (if contraction) characters to which the variable top should be set
800      * @return variable top primary weight
801      * @exception IllegalArgumentException
802      *                is thrown if varTop argument is not a valid variable top element. A variable top element is
803      *                invalid when
804      *                <ul>
805      *                <li>it is a contraction that does not exist in the Collation order
806      *                <li>the variable top is beyond
807      *                    the last reordering group supported by setMaxVariable()
808      *                <li>when the varTop argument is null or zero in length.
809      *                </ul>
810      * @see #getVariableTop
811      * @see RuleBasedCollator#setAlternateHandlingShifted
812      * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
813      */
814     @Override
815     @Deprecated
setVariableTop(String varTop)816     public int setVariableTop(String varTop) {
817         checkNotFrozen();
818         if (varTop == null || varTop.length() == 0) {
819             throw new IllegalArgumentException("Variable top argument string can not be null or zero in length.");
820         }
821         boolean numeric = settings.readOnly().isNumeric();
822         long ce1, ce2;
823         if(settings.readOnly().dontCheckFCD()) {
824             UTF16CollationIterator ci = new UTF16CollationIterator(data, numeric, varTop, 0);
825             ce1 = ci.nextCE();
826             ce2 = ci.nextCE();
827         } else {
828             FCDUTF16CollationIterator ci = new FCDUTF16CollationIterator(data, numeric, varTop, 0);
829             ce1 = ci.nextCE();
830             ce2 = ci.nextCE();
831         }
832         if(ce1 == Collation.NO_CE || ce2 != Collation.NO_CE) {
833             throw new IllegalArgumentException("Variable top argument string must map to exactly one collation element");
834         }
835         internalSetVariableTop(ce1 >>> 32);
836         return (int)settings.readOnly().variableTop;
837     }
838 
839     /**
840      * {@icu} Sets the variable top to the specified primary weight.
841      *
842      * <p>Beginning with ICU 53, the variable top is pinned to
843      * the top of one of the supported reordering groups,
844      * and it must not be beyond the last of those groups.
845      * See {@link #setMaxVariable(int)}.
846      *
847      * @param varTop primary weight, as returned by setVariableTop or getVariableTop
848      * @see #getVariableTop
849      * @see #setVariableTop(String)
850      * @deprecated ICU 53 Call setMaxVariable() instead.
851      */
852     @Override
853     @Deprecated
setVariableTop(int varTop)854     public void setVariableTop(int varTop) {
855         checkNotFrozen();
856         internalSetVariableTop(varTop & 0xffffffffL);
857     }
858 
internalSetVariableTop(long varTop)859     private void internalSetVariableTop(long varTop) {
860         if(varTop != settings.readOnly().variableTop) {
861             // Pin the variable top to the end of the reordering group which contains it.
862             // Only a few special groups are supported.
863             int group = data.getGroupForPrimary(varTop);
864             if(group < Collator.ReorderCodes.FIRST || Collator.ReorderCodes.CURRENCY < group) {
865                 throw new IllegalArgumentException("The variable top must be a primary weight in " +
866                         "the space/punctuation/symbols/currency symbols range");
867             }
868             long v = data.getLastPrimaryForGroup(group);
869             assert(v != 0 && v >= varTop);
870             varTop = v;
871             if(varTop != settings.readOnly().variableTop) {
872                 CollationSettings ownedSettings = getOwnedSettings();
873                 ownedSettings.setMaxVariable(group - Collator.ReorderCodes.FIRST,
874                         getDefaultSettings().options);
875                 ownedSettings.variableTop = varTop;
876                 setFastLatinOptions(ownedSettings);
877             }
878         }
879     }
880 
881     /**
882      * {@icu} When numeric collation is turned on, this Collator makes
883      * substrings of digits sort according to their numeric values.
884      *
885      * <p>This is a way to get '100' to sort AFTER '2'. Note that the longest
886      * digit substring that can be treated as a single unit is
887      * 254 digits (not counting leading zeros). If a digit substring is
888      * longer than that, the digits beyond the limit will be treated as a
889      * separate digit substring.
890      *
891      * <p>A "digit" in this sense is a code point with General_Category=Nd,
892      * which does not include circled numbers, roman numerals, etc.
893      * Only a contiguous digit substring is considered, that is,
894      * non-negative integers without separators.
895      * There is no support for plus/minus signs, decimals, exponents, etc.
896      *
897      * @param flag
898      *            true to turn numeric collation on and false to turn it off
899      * @see #getNumericCollation
900      * @see #setNumericCollationDefault
901      * @stable ICU 2.8
902      */
setNumericCollation(boolean flag)903     public void setNumericCollation(boolean flag) {
904         checkNotFrozen();
905         // sort substrings of digits as numbers
906         if(flag == getNumericCollation()) { return; }
907         CollationSettings ownedSettings = getOwnedSettings();
908         ownedSettings.setFlag(CollationSettings.NUMERIC, flag);
909         setFastLatinOptions(ownedSettings);
910     }
911 
912     /**
913      * {@inheritDoc}
914      *
915      * @param order the reordering codes to apply to this collator; if this is null or an empty array
916      * then this clears any existing reordering
917      * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts)
918      * @see #getReorderCodes
919      * @see Collator#getEquivalentReorderCodes
920      * @see Collator.ReorderCodes
921      * @see UScript
922      * @stable ICU 4.8
923      */
924     @Override
setReorderCodes(int... order)925     public void setReorderCodes(int... order) {
926         checkNotFrozen();
927         int length = (order != null) ? order.length : 0;
928         if(length == 1 && order[0] == ReorderCodes.NONE) {
929             length = 0;
930         }
931         if(length == 0 ?
932                 settings.readOnly().reorderCodes.length == 0 :
933                 Arrays.equals(order, settings.readOnly().reorderCodes)) {
934             return;
935         }
936         CollationSettings defaultSettings = getDefaultSettings();
937         if(length == 1 && order[0] == Collator.ReorderCodes.DEFAULT) {
938             if(settings.readOnly() != defaultSettings) {
939                 CollationSettings ownedSettings = getOwnedSettings();
940                 ownedSettings.copyReorderingFrom(defaultSettings);
941                 setFastLatinOptions(ownedSettings);
942             }
943             return;
944         }
945         CollationSettings ownedSettings = getOwnedSettings();
946         if(length == 0) {
947             ownedSettings.resetReordering();
948         } else {
949             ownedSettings.setReordering(data, order.clone());
950         }
951         setFastLatinOptions(ownedSettings);
952     }
953 
setFastLatinOptions(CollationSettings ownedSettings)954     private void setFastLatinOptions(CollationSettings ownedSettings) {
955         ownedSettings.fastLatinOptions = CollationFastLatin.getOptions(
956                 data, ownedSettings, ownedSettings.fastLatinPrimaries);
957     }
958 
959     // public getters --------------------------------------------------------
960 
961     /**
962      * Gets the collation tailoring rules for this RuleBasedCollator.
963      * Equivalent to String getRules(false).
964      *
965      * @return the collation tailoring rules
966      * @see #getRules(boolean)
967      * @stable ICU 2.8
968      */
getRules()969     public String getRules() {
970         return tailoring.rules;
971     }
972 
973     /**
974      * Returns current rules.
975      * The argument defines whether full rules (root collation + tailored) rules are returned
976      * or just the tailoring.
977      *
978      * <p>The root collation rules are an <i>approximation</i> of the root collator's sort order.
979      * They are almost never used or useful at runtime and can be removed from the data.
980      * See <a href="http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales">User Guide:
981      * Collation Customization, Building on Existing Locales</a>
982      *
983      * <p>{@link #getRules()} should normally be used instead.
984      * @param fullrules
985      *            true if the rules that defines the full set of collation order is required, otherwise false for
986      *            returning only the tailored rules
987      * @return the current rules that defines this Collator.
988      * @see #getRules()
989      * @stable ICU 2.6
990      */
getRules(boolean fullrules)991     public String getRules(boolean fullrules) {
992         if (!fullrules) {
993             return tailoring.rules;
994         }
995         return CollationLoader.getRootRules() + tailoring.rules;
996     }
997 
998     /**
999      * Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
1000      *
1001      * @return a pointer to a UnicodeSet object containing all the code points and sequences that may sort differently
1002      *         than in the root collator.
1003      * @stable ICU 2.4
1004      */
1005     @Override
getTailoredSet()1006     public UnicodeSet getTailoredSet() {
1007         UnicodeSet tailored = new UnicodeSet();
1008         if(data.base != null) {
1009             new TailoredSet(tailored).forData(data);
1010         }
1011         return tailored;
1012     }
1013 
1014     /**
1015      * Gets unicode sets containing contractions and/or expansions of a collator
1016      *
1017      * @param contractions
1018      *            if not null, set to contain contractions
1019      * @param expansions
1020      *            if not null, set to contain expansions
1021      * @param addPrefixes
1022      *            add the prefix contextual elements to contractions
1023      * @throws Exception
1024      *             Throws an exception if any errors occurs.
1025      * @stable ICU 3.4
1026      */
getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)1027     public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)
1028             throws Exception {
1029         if (contractions != null) {
1030             contractions.clear();
1031         }
1032         if (expansions != null) {
1033             expansions.clear();
1034         }
1035         new ContractionsAndExpansions(contractions, expansions, null, addPrefixes).forData(data);
1036     }
1037 
1038     /**
1039      * Adds the contractions that start with character c to the set.
1040      * Ignores prefixes. Used by AlphabeticIndex.
1041      * @internal
1042      * @deprecated This API is ICU internal only.
1043      */
internalAddContractions(int c, UnicodeSet set)1044     void internalAddContractions(int c, UnicodeSet set) {
1045         new ContractionsAndExpansions(set, null, null, false).forCodePoint(data, c);
1046     }
1047 
1048     /**
1049      * <p>
1050      * Get a Collation key for the argument String source from this RuleBasedCollator.
1051      * </p>
1052      * <p>
1053      * General recommendation: <br>
1054      * If comparison are to be done to the same String multiple times, it would be more efficient to generate
1055      * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If the each
1056      * Strings are compared to only once, using the method RuleBasedCollator.compare(String, String) will have a better
1057      * performance.
1058      * </p>
1059      * <p>
1060      * See the class documentation for an explanation about CollationKeys.
1061      * </p>
1062      *
1063      * @param source
1064      *            the text String to be transformed into a collation key.
1065      * @return the CollationKey for the given String based on this RuleBasedCollator's collation rules. If the source
1066      *         String is null, a null CollationKey is returned.
1067      * @see CollationKey
1068      * @see #compare(String, String)
1069      * @see #getRawCollationKey
1070      * @stable ICU 2.8
1071      */
1072     @Override
getCollationKey(String source)1073     public CollationKey getCollationKey(String source) {
1074         if (source == null) {
1075             return null;
1076         }
1077         CollationBuffer buffer = null;
1078         try {
1079             buffer = getCollationBuffer();
1080             return getCollationKey(source, buffer);
1081         } finally {
1082             releaseCollationBuffer(buffer);
1083         }
1084     }
1085 
getCollationKey(String source, CollationBuffer buffer)1086     private CollationKey getCollationKey(String source, CollationBuffer buffer) {
1087         buffer.rawCollationKey = getRawCollationKey(source, buffer.rawCollationKey, buffer);
1088         return new CollationKey(source, buffer.rawCollationKey);
1089     }
1090 
1091     /**
1092      * Gets the simpler form of a CollationKey for the String source following the rules of this Collator and stores the
1093      * result into the user provided argument key. If key has a internal byte array of length that's too small for the
1094      * result, the internal byte array will be grown to the exact required size.
1095      *
1096      * @param source the text String to be transformed into a RawCollationKey
1097      * @param key output RawCollationKey to store results
1098      * @return If key is null, a new instance of RawCollationKey will be created and returned, otherwise the user
1099      *         provided key will be returned.
1100      * @see #getCollationKey
1101      * @see #compare(String, String)
1102      * @see RawCollationKey
1103      * @stable ICU 2.8
1104      */
1105     @Override
getRawCollationKey(String source, RawCollationKey key)1106     public RawCollationKey getRawCollationKey(String source, RawCollationKey key) {
1107         if (source == null) {
1108             return null;
1109         }
1110         CollationBuffer buffer = null;
1111         try {
1112             buffer = getCollationBuffer();
1113             return getRawCollationKey(source, key, buffer);
1114         } finally {
1115             releaseCollationBuffer(buffer);
1116         }
1117     }
1118 
1119     private static final class CollationKeyByteSink extends SortKeyByteSink {
CollationKeyByteSink(RawCollationKey key)1120         CollationKeyByteSink(RawCollationKey key) {
1121             super(key.bytes);
1122             key_ = key;
1123         }
1124 
1125         @Override
AppendBeyondCapacity(byte[] bytes, int start, int n, int length)1126         protected void AppendBeyondCapacity(byte[] bytes, int start, int n, int length) {
1127             // n > 0 && appended_ > capacity_
1128             if (Resize(n, length)) {
1129                 System.arraycopy(bytes, start, buffer_, length, n);
1130             }
1131         }
1132 
1133         @Override
Resize(int appendCapacity, int length)1134         protected boolean Resize(int appendCapacity, int length) {
1135             int newCapacity = 2 * buffer_.length;
1136             int altCapacity = length + 2 * appendCapacity;
1137             if (newCapacity < altCapacity) {
1138                 newCapacity = altCapacity;
1139             }
1140             if (newCapacity < 200) {
1141                 newCapacity = 200;
1142             }
1143             // Do not call key_.ensureCapacity(newCapacity) because we do not
1144             // keep key_.size in sync with appended_.
1145             // We only set it when we are done.
1146             byte[] newBytes = new byte[newCapacity];
1147             System.arraycopy(buffer_, 0, newBytes, 0, length);
1148             buffer_ = key_.bytes = newBytes;
1149             return true;
1150         }
1151 
1152         private RawCollationKey key_;
1153     }
1154 
getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer)1155     private RawCollationKey getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer) {
1156         if (key == null) {
1157             key = new RawCollationKey(simpleKeyLengthEstimate(source));
1158         } else if (key.bytes == null) {
1159             key.bytes = new byte[simpleKeyLengthEstimate(source)];
1160         }
1161         CollationKeyByteSink sink = new CollationKeyByteSink(key);
1162         writeSortKey(source, sink, buffer);
1163         key.size = sink.NumberOfBytesAppended();
1164         return key;
1165     }
1166 
simpleKeyLengthEstimate(CharSequence source)1167     private int simpleKeyLengthEstimate(CharSequence source) {
1168         return 2 * source.length() + 10;
1169     }
1170 
writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer)1171     private void writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer) {
1172         boolean numeric = settings.readOnly().isNumeric();
1173         if(settings.readOnly().dontCheckFCD()) {
1174             buffer.leftUTF16CollIter.setText(numeric, s, 0);
1175             CollationKeys.writeSortKeyUpToQuaternary(
1176                     buffer.leftUTF16CollIter, data.compressibleBytes, settings.readOnly(),
1177                     sink, Collation.PRIMARY_LEVEL,
1178                     CollationKeys.SIMPLE_LEVEL_FALLBACK, true);
1179         } else {
1180             buffer.leftFCDUTF16Iter.setText(numeric, s, 0);
1181             CollationKeys.writeSortKeyUpToQuaternary(
1182                     buffer.leftFCDUTF16Iter, data.compressibleBytes, settings.readOnly(),
1183                     sink, Collation.PRIMARY_LEVEL,
1184                     CollationKeys.SIMPLE_LEVEL_FALLBACK, true);
1185         }
1186         if(settings.readOnly().getStrength() == IDENTICAL) {
1187             writeIdenticalLevel(s, sink);
1188         }
1189         sink.Append(Collation.TERMINATOR_BYTE);
1190     }
1191 
writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink)1192     private void writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink) {
1193         // NFD quick check
1194         int nfdQCYesLimit = data.nfcImpl.decompose(s, 0, s.length(), null);
1195         sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
1196         // Sync the ByteArrayWrapper size with the key length.
1197         sink.key_.size = sink.NumberOfBytesAppended();
1198         int prev = 0;
1199         if(nfdQCYesLimit != 0) {
1200             prev = BOCSU.writeIdenticalLevelRun(prev, s, 0, nfdQCYesLimit, sink.key_);
1201         }
1202         // Is there non-NFD text?
1203         if(nfdQCYesLimit < s.length()) {
1204             int destLengthEstimate = s.length() - nfdQCYesLimit;
1205             StringBuilder nfd = new StringBuilder();
1206             data.nfcImpl.decompose(s, nfdQCYesLimit, s.length(), nfd, destLengthEstimate);
1207             BOCSU.writeIdenticalLevelRun(prev, nfd, 0, nfd.length(), sink.key_);
1208         }
1209         // Sync the key with the buffer again which got bytes appended and may have been reallocated.
1210         sink.setBufferAndAppended(sink.key_.bytes, sink.key_.size);
1211     }
1212 
1213     /**
1214      * Returns the CEs for the string.
1215      * @param str the string
1216      * @internal for tests & tools
1217      * @deprecated This API is ICU internal only.
1218      */
1219     @Deprecated
internalGetCEs(CharSequence str)1220     public long[] internalGetCEs(CharSequence str) {
1221         CollationBuffer buffer = null;
1222         try {
1223             buffer = getCollationBuffer();
1224             boolean numeric = settings.readOnly().isNumeric();
1225             CollationIterator iter;
1226             if(settings.readOnly().dontCheckFCD()) {
1227                 buffer.leftUTF16CollIter.setText(numeric, str, 0);
1228                 iter = buffer.leftUTF16CollIter;
1229             } else {
1230                 buffer.leftFCDUTF16Iter.setText(numeric, str, 0);
1231                 iter = buffer.leftFCDUTF16Iter;
1232             }
1233             int length = iter.fetchCEs() - 1;
1234             assert length >= 0 && iter.getCE(length) == Collation.NO_CE;
1235             long[] ces = new long[length];
1236             System.arraycopy(iter.getCEs(), 0, ces, 0, length);
1237             return ces;
1238         } finally {
1239             releaseCollationBuffer(buffer);
1240         }
1241     }
1242 
1243     /**
1244      * Returns this Collator's strength attribute. The strength attribute
1245      * determines the minimum level of difference considered significant.
1246      *
1247      * <p>{@icunote} This can return QUATERNARY strength, which is not supported by the
1248      * JDK version.
1249      *
1250      * <p>See the Collator class description for more details.
1251      *
1252      * @return this Collator's current strength attribute.
1253      * @see #setStrength
1254      * @see #PRIMARY
1255      * @see #SECONDARY
1256      * @see #TERTIARY
1257      * @see #QUATERNARY
1258      * @see #IDENTICAL
1259      * @stable ICU 2.8
1260      */
1261     @Override
getStrength()1262     public int getStrength() {
1263         return settings.readOnly().getStrength();
1264     }
1265 
1266     /**
1267      * Returns the decomposition mode of this Collator. The decomposition mode
1268      * determines how Unicode composed characters are handled.
1269      *
1270      * <p>See the Collator class description for more details.
1271      *
1272      * @return the decomposition mode
1273      * @see #setDecomposition
1274      * @see #NO_DECOMPOSITION
1275      * @see #CANONICAL_DECOMPOSITION
1276      * @stable ICU 2.8
1277      */
1278     @Override
getDecomposition()1279     public int getDecomposition() {
1280         return (settings.readOnly().options & CollationSettings.CHECK_FCD) != 0 ?
1281                 CANONICAL_DECOMPOSITION : NO_DECOMPOSITION;
1282     }
1283 
1284     /**
1285      * Return true if an uppercase character is sorted before the corresponding lowercase character. See
1286      * setCaseFirst(boolean) for details.
1287      *
1288      * @see #setUpperCaseFirst
1289      * @see #setLowerCaseFirst
1290      * @see #isLowerCaseFirst
1291      * @see #setCaseFirstDefault
1292      * @return true if upper cased characters are sorted before lower cased characters, false otherwise
1293      * @stable ICU 2.8
1294      */
isUpperCaseFirst()1295     public boolean isUpperCaseFirst() {
1296         return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST_AND_UPPER_MASK);
1297     }
1298 
1299     /**
1300      * Return true if a lowercase character is sorted before the corresponding uppercase character. See
1301      * setCaseFirst(boolean) for details.
1302      *
1303      * @see #setUpperCaseFirst
1304      * @see #setLowerCaseFirst
1305      * @see #isUpperCaseFirst
1306      * @see #setCaseFirstDefault
1307      * @return true lower cased characters are sorted before upper cased characters, false otherwise
1308      * @stable ICU 2.8
1309      */
isLowerCaseFirst()1310     public boolean isLowerCaseFirst() {
1311         return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST);
1312     }
1313 
1314     /**
1315      * Checks if the alternate handling behavior is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true,
1316      * then the alternate handling attribute for the Collator is SHIFTED. Otherwise if return value is false, then the
1317      * alternate handling attribute for the Collator is NON_IGNORABLE See setAlternateHandlingShifted(boolean) for more
1318      * details.
1319      *
1320      * @return true or false
1321      * @see #setAlternateHandlingShifted(boolean)
1322      * @see #setAlternateHandlingDefault
1323      * @stable ICU 2.8
1324      */
isAlternateHandlingShifted()1325     public boolean isAlternateHandlingShifted() {
1326         return settings.readOnly().getAlternateHandling();
1327     }
1328 
1329     /**
1330      * Checks if case level is set to true. See setCaseLevel(boolean) for details.
1331      *
1332      * @return the case level mode
1333      * @see #setCaseLevelDefault
1334      * @see #isCaseLevel
1335      * @see #setCaseLevel(boolean)
1336      * @stable ICU 2.8
1337      */
isCaseLevel()1338     public boolean isCaseLevel() {
1339         return (settings.readOnly().options & CollationSettings.CASE_LEVEL) != 0;
1340     }
1341 
1342     /**
1343      * Checks if French Collation is set to true. See setFrenchCollation(boolean) for details.
1344      *
1345      * @return true if French Collation is set to true, false otherwise
1346      * @see #setFrenchCollation(boolean)
1347      * @see #setFrenchCollationDefault
1348      * @stable ICU 2.8
1349      */
isFrenchCollation()1350     public boolean isFrenchCollation() {
1351         return (settings.readOnly().options & CollationSettings.BACKWARD_SECONDARY) != 0;
1352     }
1353 
1354     /**
1355      * Checks if the Hiragana Quaternary mode is set on. See setHiraganaQuaternary(boolean) for more details.
1356      *
1357      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
1358      * Since ICU 50, this attribute is not settable any more via API functions.
1359      * Since CLDR 25/ICU 53, explicit quaternary relations are used
1360      * to achieve the same Japanese sort order.
1361      *
1362      * @return false
1363      * @see #setHiraganaQuaternaryDefault
1364      * @see #setHiraganaQuaternary(boolean)
1365      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
1366      */
1367     @Deprecated
isHiraganaQuaternary()1368     public boolean isHiraganaQuaternary() {
1369         return false;
1370     }
1371 
1372     /**
1373      * {@icu} Gets the variable top value of a Collator.
1374      *
1375      * @return the variable top primary weight
1376      * @see #getMaxVariable
1377      * @stable ICU 2.6
1378      */
1379     @Override
getVariableTop()1380     public int getVariableTop() {
1381         return (int)settings.readOnly().variableTop;
1382     }
1383 
1384     /**
1385      * Method to retrieve the numeric collation value. When numeric collation is turned on, this Collator generates a
1386      * collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER '2'
1387      *
1388      * @see #setNumericCollation
1389      * @see #setNumericCollationDefault
1390      * @return true if numeric collation is turned on, false otherwise
1391      * @stable ICU 2.8
1392      */
getNumericCollation()1393     public boolean getNumericCollation() {
1394         return (settings.readOnly().options & CollationSettings.NUMERIC) != 0;
1395     }
1396 
1397     /**
1398      * Retrieves the reordering codes for this collator.
1399      * These reordering codes are a combination of UScript codes and ReorderCodes.
1400      * @return a copy of the reordering codes for this collator;
1401      * if none are set then returns an empty array
1402      * @see #setReorderCodes
1403      * @see Collator#getEquivalentReorderCodes
1404      * @stable ICU 4.8
1405      */
1406     @Override
getReorderCodes()1407     public int[] getReorderCodes() {
1408         return settings.readOnly().reorderCodes.clone();
1409     }
1410 
1411     // public other methods -------------------------------------------------
1412 
1413     /**
1414      * {@inheritDoc}
1415      * @stable ICU 2.8
1416      */
1417     @Override
equals(Object obj)1418     public boolean equals(Object obj) {
1419         if (this == obj) {
1420             return true;
1421         }
1422         if (!super.equals(obj)) {
1423             return false;
1424         }
1425         RuleBasedCollator o = (RuleBasedCollator) obj;
1426         if(!settings.readOnly().equals(o.settings.readOnly())) { return false; }
1427         if(data == o.data) { return true; }
1428         boolean thisIsRoot = data.base == null;
1429         boolean otherIsRoot = o.data.base == null;
1430         assert(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
1431         if(thisIsRoot != otherIsRoot) { return false; }
1432         if((thisIsRoot || tailoring.rules.length() != 0) &&
1433                 (otherIsRoot || o.tailoring.rules.length() != 0)) {
1434             // Shortcut: If both collators have valid rule strings, then compare those.
1435             if(tailoring.rules.equals(o.tailoring.rules)) { return true; }
1436         }
1437         // Different rule strings can result in the same or equivalent tailoring.
1438         // The rule strings are optional in ICU resource bundles, although included by default.
1439         // cloneBinary() drops the rule string.
1440         UnicodeSet thisTailored = getTailoredSet();
1441         UnicodeSet otherTailored = o.getTailoredSet();
1442         if(!thisTailored.equals(otherTailored)) { return false; }
1443         // For completeness, we should compare all of the mappings;
1444         // or we should create a list of strings, sort it with one collator,
1445         // and check if both collators compare adjacent strings the same
1446         // (order & strength, down to quaternary); or similar.
1447         // Testing equality of collators seems unusual.
1448         return true;
1449     }
1450 
1451     /**
1452      * Generates a unique hash code for this RuleBasedCollator.
1453      *
1454      * @return the unique hash code for this Collator
1455      * @stable ICU 2.8
1456      */
1457     @Override
hashCode()1458     public int hashCode() {
1459         int h = settings.readOnly().hashCode();
1460         if(data.base == null) { return h; }  // root collator
1461         // Do not rely on the rule string, see comments in operator==().
1462         UnicodeSet set = getTailoredSet();
1463         UnicodeSetIterator iter = new UnicodeSetIterator(set);
1464         while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) {
1465             h ^= data.getCE32(iter.codepoint);
1466         }
1467         return h;
1468     }
1469 
1470     /**
1471      * Compares the source text String to the target text String according to the collation rules, strength and
1472      * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero
1473      * depending on whether the source String is less than, equal to or greater than the target String. See the Collator
1474      * class description for an example of use. </p>
1475      * <p>
1476      * General recommendation: <br>
1477      * If comparison are to be done to the same String multiple times, it would be more efficient to generate
1478      * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed
1479      * performance is critical and object instantiation is to be reduced, further optimization may be achieved by
1480      * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method
1481      * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey
1482      * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key
1483      * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String,
1484      * String) will have a better performance.
1485      * </p>
1486      *
1487      * @param source
1488      *            the source text String.
1489      * @param target
1490      *            the target text String.
1491      * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source
1492      *         and target are equal, value is greater than zero if source is greater than target.
1493      * @see CollationKey
1494      * @see #getCollationKey
1495      * @stable ICU 2.8
1496      */
1497     @Override
compare(String source, String target)1498     public int compare(String source, String target) {
1499         return doCompare(source, target);
1500     }
1501 
1502     /**
1503     * Abstract iterator for identical-level string comparisons.
1504     * Returns FCD code points and handles temporary switching to NFD.
1505     *
1506     * <p>As with CollationIterator,
1507     * Java NFDIterator instances are partially constructed and cached,
1508     * and completed when reset for use.
1509     * C++ NFDIterator instances are stack-allocated.
1510     */
1511     private static abstract class NFDIterator {
1512         /**
1513          * Partial constructor, must call reset().
1514          */
NFDIterator()1515         NFDIterator() {}
reset()1516         final void reset() {
1517             index = -1;
1518         }
1519 
1520         /**
1521          * Returns the next code point from the internal normalization buffer,
1522          * or else the next text code point.
1523          * Returns -1 at the end of the text.
1524          */
nextCodePoint()1525         final int nextCodePoint() {
1526             if(index >= 0) {
1527                 if(index == decomp.length()) {
1528                     index = -1;
1529                 } else {
1530                     int c = Character.codePointAt(decomp, index);
1531                     index += Character.charCount(c);
1532                     return c;
1533                 }
1534             }
1535             return nextRawCodePoint();
1536         }
1537         /**
1538          * @param nfcImpl
1539          * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
1540          * @return the first code point in c's decomposition,
1541          *         or c itself if it was decomposed already or if it does not decompose
1542          */
nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c)1543         final int nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c) {
1544             if(index >= 0) { return c; }
1545             decomp = nfcImpl.getDecomposition(c);
1546             if(decomp == null) { return c; }
1547             c = Character.codePointAt(decomp, 0);
1548             index = Character.charCount(c);
1549             return c;
1550         }
1551 
1552         /**
1553          * Returns the next text code point in FCD order.
1554          * Returns -1 at the end of the text.
1555          */
nextRawCodePoint()1556         protected abstract int nextRawCodePoint();
1557 
1558         private String decomp;
1559         private int index;
1560     }
1561 
1562     private static class UTF16NFDIterator extends NFDIterator {
UTF16NFDIterator()1563         UTF16NFDIterator() {}
setText(CharSequence seq, int start)1564         void setText(CharSequence seq, int start) {
1565             reset();
1566             s = seq;
1567             pos = start;
1568         }
1569 
1570         @Override
nextRawCodePoint()1571         protected int nextRawCodePoint() {
1572             if(pos == s.length()) { return Collation.SENTINEL_CP; }
1573             int c = Character.codePointAt(s, pos);
1574             pos += Character.charCount(c);
1575             return c;
1576         }
1577 
1578         protected CharSequence s;
1579         protected int pos;
1580     }
1581 
1582     private static final class FCDUTF16NFDIterator extends UTF16NFDIterator {
FCDUTF16NFDIterator()1583         FCDUTF16NFDIterator() {}
setText(Normalizer2Impl nfcImpl, CharSequence seq, int start)1584         void setText(Normalizer2Impl nfcImpl, CharSequence seq, int start) {
1585             reset();
1586             int spanLimit = nfcImpl.makeFCD(seq, start, seq.length(), null);
1587             if(spanLimit == seq.length()) {
1588                 s = seq;
1589                 pos = start;
1590             } else {
1591                 if(str == null) {
1592                     str = new StringBuilder();
1593                 } else {
1594                     str.setLength(0);
1595                 }
1596                 str.append(seq, start, spanLimit);
1597                 ReorderingBuffer buffer = new ReorderingBuffer(nfcImpl, str, seq.length() - start);
1598                 nfcImpl.makeFCD(seq, spanLimit, seq.length(), buffer);
1599                 s = str;
1600                 pos = 0;
1601             }
1602         }
1603 
1604         private StringBuilder str;
1605     }
1606 
compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right)1607     private static final int compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right) {
1608         for(;;) {
1609             // Fetch the next FCD code point from each string.
1610             int leftCp = left.nextCodePoint();
1611             int rightCp = right.nextCodePoint();
1612             if(leftCp == rightCp) {
1613                 if(leftCp < 0) { break; }
1614                 continue;
1615             }
1616             // If they are different, then decompose each and compare again.
1617             if(leftCp < 0) {
1618                 leftCp = -2;  // end of string
1619             } else if(leftCp == 0xfffe) {
1620                 leftCp = -1;  // U+FFFE: merge separator
1621             } else {
1622                 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
1623             }
1624             if(rightCp < 0) {
1625                 rightCp = -2;  // end of string
1626             } else if(rightCp == 0xfffe) {
1627                 rightCp = -1;  // U+FFFE: merge separator
1628             } else {
1629                 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
1630             }
1631             if(leftCp < rightCp) { return Collation.LESS; }
1632             if(leftCp > rightCp) { return Collation.GREATER; }
1633         }
1634         return Collation.EQUAL;
1635     }
1636 
1637     /**
1638      * Compares two CharSequences.
1639      * @internal
1640      * @deprecated This API is ICU internal only.
1641      */
1642     @Override
1643     @Deprecated
doCompare(CharSequence left, CharSequence right)1644     protected int doCompare(CharSequence left, CharSequence right) {
1645         if(left == right) {
1646             return Collation.EQUAL;
1647         }
1648 
1649         // Identical-prefix test.
1650         int equalPrefixLength = 0;
1651         for(;;) {
1652             if(equalPrefixLength == left.length()) {
1653                 if(equalPrefixLength == right.length()) { return Collation.EQUAL; }
1654                 break;
1655             } else if(equalPrefixLength == right.length() ||
1656                       left.charAt(equalPrefixLength) != right.charAt(equalPrefixLength)) {
1657                 break;
1658             }
1659             ++equalPrefixLength;
1660         }
1661 
1662         CollationSettings roSettings = settings.readOnly();
1663         boolean numeric = roSettings.isNumeric();
1664         if(equalPrefixLength > 0) {
1665             if((equalPrefixLength != left.length() &&
1666                         data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) ||
1667                     (equalPrefixLength != right.length() &&
1668                         data.isUnsafeBackward(right.charAt(equalPrefixLength), numeric))) {
1669                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1670                 while(--equalPrefixLength > 0 &&
1671                         data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) {}
1672             }
1673             // Notes:
1674             // - A longer string can compare equal to a prefix of it if only ignorables follow.
1675             // - With a backward level, a longer string can compare less-than a prefix of it.
1676 
1677             // Pass the actual start of each string into the CollationIterators,
1678             // plus the equalPrefixLength position,
1679             // so that prefix matches back into the equal prefix work.
1680         }
1681 
1682         int result;
1683         int fastLatinOptions = roSettings.fastLatinOptions;
1684         if(fastLatinOptions >= 0 &&
1685                 (equalPrefixLength == left.length() ||
1686                     left.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX) &&
1687                 (equalPrefixLength == right.length() ||
1688                     right.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX)) {
1689             result = CollationFastLatin.compareUTF16(data.fastLatinTable,
1690                                                       roSettings.fastLatinPrimaries,
1691                                                       fastLatinOptions,
1692                                                       left, right, equalPrefixLength);
1693         } else {
1694             result = CollationFastLatin.BAIL_OUT_RESULT;
1695         }
1696 
1697         if(result == CollationFastLatin.BAIL_OUT_RESULT) {
1698             CollationBuffer buffer = null;
1699             try {
1700                 buffer = getCollationBuffer();
1701                 if(roSettings.dontCheckFCD()) {
1702                     buffer.leftUTF16CollIter.setText(numeric, left, equalPrefixLength);
1703                     buffer.rightUTF16CollIter.setText(numeric, right, equalPrefixLength);
1704                     result = CollationCompare.compareUpToQuaternary(
1705                             buffer.leftUTF16CollIter, buffer.rightUTF16CollIter, roSettings);
1706                 } else {
1707                     buffer.leftFCDUTF16Iter.setText(numeric, left, equalPrefixLength);
1708                     buffer.rightFCDUTF16Iter.setText(numeric, right, equalPrefixLength);
1709                     result = CollationCompare.compareUpToQuaternary(
1710                             buffer.leftFCDUTF16Iter, buffer.rightFCDUTF16Iter, roSettings);
1711                 }
1712             } finally {
1713                 releaseCollationBuffer(buffer);
1714             }
1715         }
1716         if(result != Collation.EQUAL || roSettings.getStrength() < Collator.IDENTICAL) {
1717             return result;
1718         }
1719 
1720         CollationBuffer buffer = null;
1721         try {
1722             buffer = getCollationBuffer();
1723             // Compare identical level.
1724             Normalizer2Impl nfcImpl = data.nfcImpl;
1725             if(roSettings.dontCheckFCD()) {
1726                 buffer.leftUTF16NFDIter.setText(left, equalPrefixLength);
1727                 buffer.rightUTF16NFDIter.setText(right, equalPrefixLength);
1728                 return compareNFDIter(nfcImpl, buffer.leftUTF16NFDIter, buffer.rightUTF16NFDIter);
1729             } else {
1730                 buffer.leftFCDUTF16NFDIter.setText(nfcImpl, left, equalPrefixLength);
1731                 buffer.rightFCDUTF16NFDIter.setText(nfcImpl, right, equalPrefixLength);
1732                 return compareNFDIter(nfcImpl, buffer.leftFCDUTF16NFDIter, buffer.rightFCDUTF16NFDIter);
1733             }
1734         } finally {
1735             releaseCollationBuffer(buffer);
1736         }
1737     }
1738 
1739     // package private constructors ------------------------------------------
1740 
RuleBasedCollator(CollationTailoring t, ULocale vl)1741     RuleBasedCollator(CollationTailoring t, ULocale vl) {
1742         data = t.data;
1743         settings = t.settings.clone();
1744         tailoring = t;
1745         validLocale = vl;
1746         actualLocaleIsSameAsValid = false;
1747     }
1748 
adoptTailoring(CollationTailoring t)1749     private void adoptTailoring(CollationTailoring t) {
1750         assert(settings == null && data == null && tailoring == null);
1751         data = t.data;
1752         settings = t.settings.clone();
1753         tailoring = t;
1754         validLocale = t.actualLocale;
1755         actualLocaleIsSameAsValid = false;
1756     }
1757 
1758     // package private methods -----------------------------------------------
1759 
1760     /**
1761      * Tests whether a character is "unsafe" for use as a collation starting point.
1762      *
1763      * @param c code point or code unit
1764      * @return true if c is unsafe
1765      * @see CollationElementIterator#setOffset(int)
1766      */
isUnsafe(int c)1767     final boolean isUnsafe(int c) {
1768         return data.isUnsafeBackward(c, settings.readOnly().isNumeric());
1769     }
1770 
1771     /**
1772      * Frozen state of the collator.
1773      */
1774     private Lock frozenLock;
1775 
1776     private static final class CollationBuffer {
CollationBuffer(CollationData data)1777         private CollationBuffer(CollationData data) {
1778             leftUTF16CollIter = new UTF16CollationIterator(data);
1779             rightUTF16CollIter = new UTF16CollationIterator(data);
1780             leftFCDUTF16Iter = new FCDUTF16CollationIterator(data);
1781             rightFCDUTF16Iter = new FCDUTF16CollationIterator(data);
1782             leftUTF16NFDIter = new UTF16NFDIterator();
1783             rightUTF16NFDIter = new UTF16NFDIterator();
1784             leftFCDUTF16NFDIter = new FCDUTF16NFDIterator();
1785             rightFCDUTF16NFDIter = new FCDUTF16NFDIterator();
1786         }
1787 
1788         UTF16CollationIterator leftUTF16CollIter;
1789         UTF16CollationIterator rightUTF16CollIter;
1790         FCDUTF16CollationIterator leftFCDUTF16Iter;
1791         FCDUTF16CollationIterator rightFCDUTF16Iter;
1792 
1793         UTF16NFDIterator leftUTF16NFDIter;
1794         UTF16NFDIterator rightUTF16NFDIter;
1795         FCDUTF16NFDIterator leftFCDUTF16NFDIter;
1796         FCDUTF16NFDIterator rightFCDUTF16NFDIter;
1797 
1798         RawCollationKey rawCollationKey;
1799     }
1800 
1801     /**
1802      * Get the version of this collator object.
1803      *
1804      * @return the version object associated with this collator
1805      * @stable ICU 2.8
1806      */
1807     @Override
getVersion()1808     public VersionInfo getVersion() {
1809         int version = tailoring.version;
1810         int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();
1811         return VersionInfo.getInstance(
1812                 (version >>> 24) + (rtVersion << 4) + (rtVersion >> 4),
1813                 ((version >> 16) & 0xff), ((version >> 8) & 0xff), (version & 0xff));
1814     }
1815 
1816     /**
1817      * Get the UCA version of this collator object.
1818      *
1819      * @return the version object associated with this collator
1820      * @stable ICU 2.8
1821      */
1822     @Override
getUCAVersion()1823     public VersionInfo getUCAVersion() {
1824         VersionInfo v = getVersion();
1825         // Note: This is tied to how the current implementation encodes the UCA version
1826         // in the overall getVersion().
1827         // Alternatively, we could load the root collator and get at lower-level data from there.
1828         // Either way, it will reflect the input collator's UCA version only
1829         // if it is a known implementation.
1830         // (C++ comment) It would be cleaner to make this a virtual Collator method.
1831         // (In Java, it is virtual.)
1832         return VersionInfo.getInstance(v.getMinor() >> 3, v.getMinor() & 7, v.getMilli() >> 6, 0);
1833     }
1834 
1835     private CollationBuffer collationBuffer;
1836 
getCollationBuffer()1837     private final CollationBuffer getCollationBuffer() {
1838         if (isFrozen()) {
1839             frozenLock.lock();
1840         } else if (collationBuffer == null) {
1841             collationBuffer = new CollationBuffer(data);
1842         }
1843         return collationBuffer;
1844     }
1845 
releaseCollationBuffer(CollationBuffer buffer)1846     private final void releaseCollationBuffer(CollationBuffer buffer) {
1847         if (isFrozen()) {
1848             frozenLock.unlock();
1849         }
1850     }
1851 
1852     /**
1853      * {@inheritDoc}
1854      * @draft ICU 53 (retain)
1855      * @provisional This API might change or be removed in a future release.
1856      */
1857     @Override
getLocale(ULocale.Type type)1858     public ULocale getLocale(ULocale.Type type) {
1859         if (type == ULocale.ACTUAL_LOCALE) {
1860             return actualLocaleIsSameAsValid ? validLocale : tailoring.actualLocale;
1861         } else if(type == ULocale.VALID_LOCALE) {
1862             return validLocale;
1863         } else {
1864             throw new IllegalArgumentException("unknown ULocale.Type " + type);
1865         }
1866     }
1867 
1868     /**
1869      * {@inheritDoc}
1870      */
1871     @Override
setLocale(ULocale valid, ULocale actual)1872     void setLocale(ULocale valid, ULocale actual) {
1873         // This method is called
1874         // by other protected functions that checks and makes sure that
1875         // valid and actual are not null before passing
1876         assert (valid == null) == (actual == null);
1877         // Another check we could do is that the actual locale is at
1878         // the same level or less specific than the valid locale.
1879         // TODO: Starting with Java 7, use Objects.equals(a, b).
1880         if(Utility.objectEquals(actual, tailoring.actualLocale)) {
1881             actualLocaleIsSameAsValid = false;
1882         } else {
1883             assert(Utility.objectEquals(actual, valid));
1884             actualLocaleIsSameAsValid = true;
1885         }
1886         // Do not modify tailoring.actualLocale:
1887         // We cannot be sure that that would be thread-safe.
1888         validLocale = valid;
1889     }
1890 
1891     CollationData data;
1892     SharedObject.Reference<CollationSettings> settings;  // reference-counted
1893     CollationTailoring tailoring;  // C++: reference-counted
1894     private ULocale validLocale;
1895     // Note: No need in Java to track which attributes have been set explicitly.
1896     // int or EnumSet  explicitlySetAttributes;
1897 
1898     private boolean actualLocaleIsSameAsValid;
1899 }
1900