1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.  Oracle designates this
9  * particular file as subject to the "Classpath" exception as provided
10  * by Oracle in the LICENSE file that accompanied this code.
11  *
12  * This code is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15  * version 2 for more details (a copy is included in the LICENSE file that
16  * accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License version
19  * 2 along with this work; if not, write to the Free Software Foundation,
20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23  * or visit www.oracle.com if you need additional information or have any
24  * questions.
25  */
26 
27 /*
28  * (C) Copyright Taligent, Inc. 1996-1998 -  All Rights Reserved
29  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
30  *
31  *   The original version of this source code and documentation is copyrighted
32  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
33  * materials are provided under terms of a License Agreement between Taligent
34  * and Sun. This technology is protected by multiple US and International
35  * patents. This notice and attribution to Taligent may not be removed.
36  *   Taligent is a registered trademark of Taligent, Inc.
37  *
38  */
39 
40 package java.text;
41 
42 import java.util.Locale;
43 
44 import libcore.icu.ICU;
45 
46 /**
47  * The {@code Collator} class performs locale-sensitive
48  * {@code String} comparison. You use this class to build
49  * searching and sorting routines for natural language text.
50  *
51  * <p>
52  * {@code Collator} is an abstract base class. Subclasses
53  * implement specific collation strategies. One subclass,
54  * {@code RuleBasedCollator}, is currently provided with
55  * the Java Platform and is applicable to a wide set of languages. Other
56  * subclasses may be created to handle more specialized needs.
57  *
58  * <p>
59  * Like other locale-sensitive classes, you can use the static
60  * factory method, {@code getInstance}, to obtain the appropriate
61  * {@code Collator} object for a given locale. You will only need
62  * to look at the subclasses of {@code Collator} if you need
63  * to understand the details of a particular collation strategy or
64  * if you need to modify that strategy.
65  *
66  * <p>
67  * The following example shows how to compare two strings using
68  * the {@code Collator} for the default locale.
69  * <blockquote>
70  * <pre>{@code
71  * // Compare two strings in the default locale
72  * Collator myCollator = Collator.getInstance();
73  * if( myCollator.compare("abc", "ABC") < 0 )
74  *     System.out.println("abc is less than ABC");
75  * else
76  *     System.out.println("abc is greater than or equal to ABC");
77  * }</pre>
78  * </blockquote>
79  *
80  * <p>
81  * You can set a {@code Collator}'s <em>strength</em> property
82  * to determine the level of difference considered significant in
83  * comparisons. Four strengths are provided: {@code PRIMARY},
84  * {@code SECONDARY}, {@code TERTIARY}, and {@code IDENTICAL}.
85  * The exact assignment of strengths to language features is
86  * locale dependent.  For example, in Czech, "e" and "f" are considered
87  * primary differences, while "e" and "&#283;" are secondary differences,
88  * "e" and "E" are tertiary differences and "e" and "e" are identical.
89  * The following shows how both case and accents could be ignored for
90  * US English.
91  * <blockquote>
92  * <pre>
93  * //Get the Collator for US English and set its strength to PRIMARY
94  * Collator usCollator = Collator.getInstance(Locale.US);
95  * usCollator.setStrength(Collator.PRIMARY);
96  * if( usCollator.compare("abc", "ABC") == 0 ) {
97  *     System.out.println("Strings are equivalent");
98  * }
99  * </pre>
100  * </blockquote>
101  * <p>
102  * For comparing {@code String}s exactly once, the {@code compare}
103  * method provides the best performance. When sorting a list of
104  * {@code String}s however, it is generally necessary to compare each
105  * {@code String} multiple times. In this case, {@code CollationKey}s
106  * provide better performance. The {@code CollationKey} class converts
107  * a {@code String} to a series of bits that can be compared bitwise
108  * against other {@code CollationKey}s. A {@code CollationKey} is
109  * created by a {@code Collator} object for a given {@code String}.
110  * <br>
111  * <strong>Note:</strong> {@code CollationKey}s from different
112  * {@code Collator}s can not be compared. See the class description
113  * for {@link CollationKey}
114  * for an example using {@code CollationKey}s.
115  *
116  * @see         RuleBasedCollator
117  * @see         CollationKey
118  * @see         CollationElementIterator
119  * @see         Locale
120  * @author      Helena Shih, Laura Werner, Richard Gillam
121  * @since 1.1
122  */
123 
124 public abstract class Collator
125     implements java.util.Comparator<Object>, Cloneable
126 {
127     /**
128      * Collator strength value.  When set, only PRIMARY differences are
129      * considered significant during comparison. The assignment of strengths
130      * to language features is locale dependent. A common example is for
131      * different base letters ("a" vs "b") to be considered a PRIMARY difference.
132      * @see java.text.Collator#setStrength
133      * @see java.text.Collator#getStrength
134      */
135     public static final int PRIMARY = 0;
136     /**
137      * Collator strength value.  When set, only SECONDARY and above differences are
138      * considered significant during comparison. The assignment of strengths
139      * to language features is locale dependent. A common example is for
140      * different accented forms of the same base letter ("a" vs "\u00E4") to be
141      * considered a SECONDARY difference.
142      * @see java.text.Collator#setStrength
143      * @see java.text.Collator#getStrength
144      */
145     public static final int SECONDARY = 1;
146     /**
147      * Collator strength value.  When set, only TERTIARY and above differences are
148      * considered significant during comparison. The assignment of strengths
149      * to language features is locale dependent. A common example is for
150      * case differences ("a" vs "A") to be considered a TERTIARY difference.
151      * @see java.text.Collator#setStrength
152      * @see java.text.Collator#getStrength
153      */
154     public static final int TERTIARY = 2;
155 
156     /**
157      * Collator strength value.  When set, all differences are
158      * considered significant during comparison. The assignment of strengths
159      * to language features is locale dependent. A common example is for control
160      * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at the
161      * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
162      * level.  Additionally, differences between pre-composed accents such as
163      * "&#092;u00C0" (A-grave) and combining accents such as "A&#092;u0300"
164      * (A, combining-grave) will be considered significant at the IDENTICAL
165      * level if decomposition is set to NO_DECOMPOSITION.
166      */
167     public static final int IDENTICAL = 3;
168 
169     /**
170      * Decomposition mode value. With NO_DECOMPOSITION
171      * set, accented characters will not be decomposed for collation. This
172      * is the default setting and provides the fastest collation but
173      * will only produce correct results for languages that do not use accents.
174      * @see java.text.Collator#getDecomposition
175      * @see java.text.Collator#setDecomposition
176      */
177     public static final int NO_DECOMPOSITION = 0;
178 
179     /**
180      * Decomposition mode value. With CANONICAL_DECOMPOSITION
181      * set, characters that are canonical variants according to Unicode
182      * standard will be decomposed for collation. This should be used to get
183      * correct collation of accented characters.
184      * <p>
185      * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
186      * described in
187      * <a href="http://www.unicode.org/reports/tr15/">Unicode
188      * Standard Annex #15: Unicode Normalization Forms</a>.
189      * @see java.text.Collator#getDecomposition
190      * @see java.text.Collator#setDecomposition
191      */
192     public static final int CANONICAL_DECOMPOSITION = 1;
193 
194     /**
195      * Decomposition mode value. With FULL_DECOMPOSITION
196      * set, both Unicode canonical variants and Unicode compatibility variants
197      * will be decomposed for collation.  This causes not only accented
198      * characters to be collated, but also characters that have special formats
199      * to be collated with their norminal form. For example, the half-width and
200      * full-width ASCII and Katakana characters are then collated together.
201      * FULL_DECOMPOSITION is the most complete and therefore the slowest
202      * decomposition mode.
203      * <p>
204      * FULL_DECOMPOSITION corresponds to Normalization Form KD as
205      * described in
206      * <a href="http://www.unicode.org/reports/tr15/">Unicode
207      * Standard Annex #15: Unicode Normalization Forms</a>.
208      * @see java.text.Collator#getDecomposition
209      * @see java.text.Collator#setDecomposition
210      */
211     public static final int FULL_DECOMPOSITION = 2;
212 
213     /**
214      * Gets the Collator for the current default locale.
215      * The default locale is determined by java.util.Locale.getDefault.
216      * @return the Collator for the default locale.(for example, en_US)
217      * @see java.util.Locale#getDefault
218      */
getInstance()219     public static synchronized Collator getInstance() {
220         return getInstance(Locale.getDefault());
221     }
222 
223     /**
224      * Gets the Collator for the desired locale.
225      * @param desiredLocale the desired locale.
226      * @return the Collator for the desired locale.
227      * @see java.util.Locale
228      * @see java.util.ResourceBundle
229      */
getInstance(Locale desiredLocale)230     public static Collator getInstance(Locale desiredLocale) {
231         // BEGIN Android-changed: Switched to ICU.
232         synchronized(Collator.class) {
233             if (desiredLocale == null) {
234                 throw new NullPointerException("locale == null");
235             }
236             return new RuleBasedCollator((android.icu.text.RuleBasedCollator)
237                     android.icu.text.Collator.getInstance(desiredLocale));
238         }
239         // END Android-changed: Switched to ICU.
240     }
241 
242     /**
243      * Compares the source string to the target string according to the
244      * collation rules for this Collator.  Returns an integer less than,
245      * equal to or greater than zero depending on whether the source String is
246      * less than, equal to or greater than the target string.  See the Collator
247      * class description for an example of use.
248      * <p>
249      * For a one time comparison, this method has the best performance. If a
250      * given String will be involved in multiple comparisons, CollationKey.compareTo
251      * has the best performance. See the Collator class description for an example
252      * using CollationKeys.
253      * @param source the source string.
254      * @param target the target string.
255      * @return Returns an integer value. Value is less than zero if source is less than
256      * target, value is zero if source and target are equal, value is greater than zero
257      * if source is greater than target.
258      * @see java.text.CollationKey
259      * @see java.text.Collator#getCollationKey
260      */
compare(String source, String target)261     public abstract int compare(String source, String target);
262 
263     /**
264      * Compares its two arguments for order.  Returns a negative integer,
265      * zero, or a positive integer as the first argument is less than, equal
266      * to, or greater than the second.
267      * <p>
268      * This implementation merely returns
269      *  {@code  compare((String)o1, (String)o2) }.
270      *
271      * @return a negative integer, zero, or a positive integer as the
272      *         first argument is less than, equal to, or greater than the
273      *         second.
274      * @throws    ClassCastException the arguments cannot be cast to Strings.
275      * @see java.util.Comparator
276      * @since   1.2
277      */
278     @Override
compare(Object o1, Object o2)279     public int compare(Object o1, Object o2) {
280     return compare((String)o1, (String)o2);
281     }
282 
283     /**
284      * Transforms the String into a series of bits that can be compared bitwise
285      * to other CollationKeys. CollationKeys provide better performance than
286      * Collator.compare when Strings are involved in multiple comparisons.
287      * See the Collator class description for an example using CollationKeys.
288      * @param source the string to be transformed into a collation key.
289      * @return the CollationKey for the given String based on this Collator's collation
290      * rules. If the source String is null, a null CollationKey is returned.
291      * @see java.text.CollationKey
292      * @see java.text.Collator#compare
293      */
getCollationKey(String source)294     public abstract CollationKey getCollationKey(String source);
295 
296     /**
297      * Convenience method for comparing the equality of two strings based on
298      * this Collator's collation rules.
299      * @param source the source string to be compared with.
300      * @param target the target string to be compared with.
301      * @return true if the strings are equal according to the collation
302      * rules.  false, otherwise.
303      * @see java.text.Collator#compare
304      */
equals(String source, String target)305     public boolean equals(String source, String target)
306     {
307         // Android-changed: remove use of unnecessary EQUAL constant.
308         return (compare(source, target) == 0);
309     }
310 
311     /**
312      * Returns this Collator's strength property.  The strength property determines
313      * the minimum level of difference considered significant during comparison.
314      * See the Collator class description for an example of use.
315      * @return this Collator's current strength property.
316      * @see java.text.Collator#setStrength
317      * @see java.text.Collator#PRIMARY
318      * @see java.text.Collator#SECONDARY
319      * @see java.text.Collator#TERTIARY
320      * @see java.text.Collator#IDENTICAL
321      */
getStrength()322     public synchronized int getStrength()
323     {
324         // Android-changed: Switched to ICU.
325         // The value for IDENTICAL in ICU differs from that used in this class.
326         int value = icuColl.getStrength();
327         return (value == android.icu.text.Collator.IDENTICAL) ? IDENTICAL : value;
328     }
329 
330     /**
331      * Sets this Collator's strength property.  The strength property determines
332      * the minimum level of difference considered significant during comparison.
333      * See the Collator class description for an example of use.
334      * @param newStrength  the new strength value.
335      * @see java.text.Collator#getStrength
336      * @see java.text.Collator#PRIMARY
337      * @see java.text.Collator#SECONDARY
338      * @see java.text.Collator#TERTIARY
339      * @see java.text.Collator#IDENTICAL
340      * @throws     IllegalArgumentException If the new strength value is not one of
341      * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
342      */
setStrength(int newStrength)343     public synchronized void setStrength(int newStrength) {
344         // Android-changed: Switched to ICU.
345         // The ICU value for IDENTICAL differs from that defined in this class.
346         if (newStrength == IDENTICAL) {
347             newStrength = android.icu.text.Collator.IDENTICAL;
348         }
349         icuColl.setStrength(newStrength);
350     }
351 
352     /**
353      * Get the decomposition mode of this Collator. Decomposition mode
354      * determines how Unicode composed characters are handled. Adjusting
355      * decomposition mode allows the user to select between faster and more
356      * complete collation behavior.
357      * <p>The three values for decomposition mode are:
358      * <UL>
359      * <LI>NO_DECOMPOSITION,
360      * <LI>CANONICAL_DECOMPOSITION
361      * <LI>FULL_DECOMPOSITION.
362      * </UL>
363      * See the documentation for these three constants for a description
364      * of their meaning.
365      * @return the decomposition mode
366      * @see java.text.Collator#setDecomposition
367      * @see java.text.Collator#NO_DECOMPOSITION
368      * @see java.text.Collator#CANONICAL_DECOMPOSITION
369      * @see java.text.Collator#FULL_DECOMPOSITION
370      */
getDecomposition()371     public synchronized int getDecomposition()
372     {
373         // Android-changed: Switched to ICU.
374         return decompositionMode_ICU_Java(icuColl.getDecomposition());
375     }
376     /**
377      * Set the decomposition mode of this Collator. See getDecomposition
378      * for a description of decomposition mode.
379      * @param decompositionMode  the new decomposition mode.
380      * @see java.text.Collator#getDecomposition
381      * @see java.text.Collator#NO_DECOMPOSITION
382      * @see java.text.Collator#CANONICAL_DECOMPOSITION
383      * @see java.text.Collator#FULL_DECOMPOSITION
384      * @throws    IllegalArgumentException If the given value is not a valid decomposition
385      * mode.
386      */
setDecomposition(int decompositionMode)387     public synchronized void setDecomposition(int decompositionMode) {
388         // Android-changed: Switched to ICU.
389         icuColl.setDecomposition(decompositionMode_Java_ICU(decompositionMode));
390     }
391 
392     // Android-changed: Removed javadoc references to CollatorProvider.
393     /**
394      * Returns an array of all locales for which the
395      * {@code getInstance} methods of this class can return
396      * localized instances.
397      *
398      * @return An array of locales for which localized
399      *         {@code Collator} instances are available.
400      */
getAvailableLocales()401     public static synchronized Locale[] getAvailableLocales() {
402         // Android-changed: Removed reference to CollatorProvider. Switched to ICU.
403         return android.icu.text.Collator.getAvailableLocales();
404     }
405 
406     // BEGIN Android-added: conversion method for decompositionMode constants.
decompositionMode_Java_ICU(int mode)407     private int decompositionMode_Java_ICU(int mode) {
408         switch (mode) {
409             case Collator.CANONICAL_DECOMPOSITION:
410                 return android.icu.text.Collator.CANONICAL_DECOMPOSITION;
411             case Collator.NO_DECOMPOSITION:
412                 return android.icu.text.Collator.NO_DECOMPOSITION;
413         }
414         throw new IllegalArgumentException("Bad mode: " + mode);
415     }
416 
decompositionMode_ICU_Java(int mode)417     private int decompositionMode_ICU_Java(int mode) {
418         int javaMode = mode;
419         switch (mode) {
420             case android.icu.text.Collator.NO_DECOMPOSITION:
421                 javaMode = Collator.NO_DECOMPOSITION;
422                 break;
423             case android.icu.text.Collator.CANONICAL_DECOMPOSITION:
424                 javaMode = Collator.CANONICAL_DECOMPOSITION;
425                 break;
426         }
427         return javaMode;
428     }
429     // END Android-added: conversion method for decompositionMode constants.
430 
431     // Android-changed: improve clone() documentation.
432     /**
433      * Returns a new collator with the same decomposition mode and
434      * strength value as this collator.
435      *
436      * @return a shallow copy of this collator.
437      * @see java.lang.Cloneable
438      */
439     @Override
clone()440     public Object clone()
441     {
442         try {
443             // Android-changed: Switched to ICU.
444             Collator clone = (Collator) super.clone();
445             clone.icuColl = (android.icu.text.Collator) icuColl.clone();
446             return clone;
447         } catch (CloneNotSupportedException e) {
448             throw new AssertionError(e);
449         }
450     }
451 
452     /**
453      * Compares the equality of two Collators.
454      * @param that the Collator to be compared with this.
455      * @return true if this Collator is the same as that Collator;
456      * false otherwise.
457      */
458     @Override
equals(Object that)459     public boolean equals(Object that)
460     {
461         if (this == that) {
462             return true;
463         }
464         if (that == null) {
465             return false;
466         }
467         if (getClass() != that.getClass()) {
468             return false;
469         }
470         Collator other = (Collator) that;
471         // Android-changed: Switched to ICU.
472         return icuColl == null ? other.icuColl == null : icuColl.equals(other.icuColl);
473     }
474 
475     /**
476      * Generates the hash code for this Collator.
477      */
478     @Override
hashCode()479     public abstract int hashCode();
480 
481     /**
482      * Default constructor.  This constructor is
483      * protected so subclasses can get access to it. Users typically create
484      * a Collator sub-class by calling the factory method getInstance.
485      * @see java.text.Collator#getInstance
486      */
Collator()487     protected Collator()
488     {
489         // Android-changed: Switched to ICU.
490         // strength = TERTIARY;
491         // decmp = CANONICAL_DECOMPOSITION;
492         icuColl = android.icu.text.RuleBasedCollator.getInstance(Locale.getDefault());
493     }
494 
495     // Android-added: ICU Collator this delegates to.
496     android.icu.text.Collator icuColl;
497 
498     // Android-added: protected constructor taking a Collator.
Collator(android.icu.text.Collator icuColl)499     Collator(android.icu.text.Collator icuColl) {
500         this.icuColl = icuColl;
501     }
502 
503     // BEGIN Android-removed: Fields and constants.
504     /*
505     private int strength = 0;
506     private int decmp = 0;
507     private static final ConcurrentMap<Locale, SoftReference<Collator>> cache
508             = new ConcurrentHashMap<>();
509 
510     //
511     // FIXME: These three constants should be removed.
512     //
513     /**
514      * LESS is returned if source string is compared to be less than target
515      * string in the compare() method.
516      * @see java.text.Collator#compare
517      *
518     static final int LESS = -1;
519     /**
520      * EQUAL is returned if source string is compared to be equal to target
521      * string in the compare() method.
522      * @see java.text.Collator#compare
523      *
524     static final int EQUAL = 0;
525     /**
526      * GREATER is returned if source string is compared to be greater than
527      * target string in the compare() method.
528      * @see java.text.Collator#compare
529      *
530     static final int GREATER = 1;
531     */
532     // END Android-removed: Fields and constants.
533  }
534