1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.  Oracle designates this
9  * particular file as subject to the "Classpath" exception as provided
10  * by Oracle in the LICENSE file that accompanied this code.
11  *
12  * This code is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15  * version 2 for more details (a copy is included in the LICENSE file that
16  * accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License version
19  * 2 along with this work; if not, write to the Free Software Foundation,
20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23  * or visit www.oracle.com if you need additional information or have any
24  * questions.
25  */
26 
27 /*
28  * (C) Copyright Taligent, Inc. 1996-1998 -  All Rights Reserved
29  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
30  *
31  *   The original version of this source code and documentation is copyrighted
32  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
33  * materials are provided under terms of a License Agreement between Taligent
34  * and Sun. This technology is protected by multiple US and International
35  * patents. This notice and attribution to Taligent may not be removed.
36  *   Taligent is a registered trademark of Taligent, Inc.
37  *
38  */
39 
40 package java.text;
41 
42 import java.util.Locale;
43 
44 import libcore.icu.ICU;
45 
46 /**
47  * The <code>Collator</code> class performs locale-sensitive
48  * <code>String</code> comparison. You use this class to build
49  * searching and sorting routines for natural language text.
50  *
51  * <p>
52  * <code>Collator</code> is an abstract base class. Subclasses
53  * implement specific collation strategies. One subclass,
54  * <code>RuleBasedCollator</code>, is currently provided with
55  * the Java Platform and is applicable to a wide set of languages. Other
56  * subclasses may be created to handle more specialized needs.
57  *
58  * <p>
59  * Like other locale-sensitive classes, you can use the static
60  * factory method, <code>getInstance</code>, to obtain the appropriate
61  * <code>Collator</code> object for a given locale. You will only need
62  * to look at the subclasses of <code>Collator</code> if you need
63  * to understand the details of a particular collation strategy or
64  * if you need to modify that strategy.
65  *
66  * <p>
67  * The following example shows how to compare two strings using
68  * the <code>Collator</code> for the default locale.
69  * <blockquote>
70  * <pre>{@code
71  * // Compare two strings in the default locale
72  * Collator myCollator = Collator.getInstance();
73  * if( myCollator.compare("abc", "ABC") < 0 )
74  *     System.out.println("abc is less than ABC");
75  * else
76  *     System.out.println("abc is greater than or equal to ABC");
77  * }</pre>
78  * </blockquote>
79  *
80  * <p>
81  * You can set a <code>Collator</code>'s <em>strength</em> property
82  * to determine the level of difference considered significant in
83  * comparisons. Four strengths are provided: <code>PRIMARY</code>,
84  * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
85  * The exact assignment of strengths to language features is
86  * locale dependant.  For example, in Czech, "e" and "f" are considered
87  * primary differences, while "e" and "&#283;" are secondary differences,
88  * "e" and "E" are tertiary differences and "e" and "e" are identical.
89  * The following shows how both case and accents could be ignored for
90  * US English.
91  * <blockquote>
92  * <pre>
93  * //Get the Collator for US English and set its strength to PRIMARY
94  * Collator usCollator = Collator.getInstance(Locale.US);
95  * usCollator.setStrength(Collator.PRIMARY);
96  * if( usCollator.compare("abc", "ABC") == 0 ) {
97  *     System.out.println("Strings are equivalent");
98  * }
99  * </pre>
100  * </blockquote>
101  * <p>
102  * For comparing <code>String</code>s exactly once, the <code>compare</code>
103  * method provides the best performance. When sorting a list of
104  * <code>String</code>s however, it is generally necessary to compare each
105  * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
106  * provide better performance. The <code>CollationKey</code> class converts
107  * a <code>String</code> to a series of bits that can be compared bitwise
108  * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
109  * created by a <code>Collator</code> object for a given <code>String</code>.
110  * <br>
111  * <strong>Note:</strong> <code>CollationKey</code>s from different
112  * <code>Collator</code>s can not be compared. See the class description
113  * for {@link CollationKey}
114  * for an example using <code>CollationKey</code>s.
115  *
116  * @see         RuleBasedCollator
117  * @see         CollationKey
118  * @see         CollationElementIterator
119  * @see         Locale
120  * @author      Helena Shih, Laura Werner, Richard Gillam
121  */
122 
123 public abstract class Collator
124     implements java.util.Comparator<Object>, Cloneable
125 {
126     /**
127      * Collator strength value.  When set, only PRIMARY differences are
128      * considered significant during comparison. The assignment of strengths
129      * to language features is locale dependant. A common example is for
130      * different base letters ("a" vs "b") to be considered a PRIMARY difference.
131      * @see java.text.Collator#setStrength
132      * @see java.text.Collator#getStrength
133      */
134     public final static int PRIMARY = 0;
135     /**
136      * Collator strength value.  When set, only SECONDARY and above differences are
137      * considered significant during comparison. The assignment of strengths
138      * to language features is locale dependant. A common example is for
139      * different accented forms of the same base letter ("a" vs "\u00E4") to be
140      * considered a SECONDARY difference.
141      * @see java.text.Collator#setStrength
142      * @see java.text.Collator#getStrength
143      */
144     public final static int SECONDARY = 1;
145     /**
146      * Collator strength value.  When set, only TERTIARY and above differences are
147      * considered significant during comparison. The assignment of strengths
148      * to language features is locale dependant. A common example is for
149      * case differences ("a" vs "A") to be considered a TERTIARY difference.
150      * @see java.text.Collator#setStrength
151      * @see java.text.Collator#getStrength
152      */
153     public final static int TERTIARY = 2;
154 
155     /**
156      * Collator strength value.  When set, all differences are
157      * considered significant during comparison. The assignment of strengths
158      * to language features is locale dependant. A common example is for control
159      * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at the
160      * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
161      * level.  Additionally, differences between pre-composed accents such as
162      * "&#092;u00C0" (A-grave) and combining accents such as "A&#092;u0300"
163      * (A, combining-grave) will be considered significant at the IDENTICAL
164      * level if decomposition is set to NO_DECOMPOSITION.
165      */
166     public final static int IDENTICAL = 3;
167 
168     /**
169      * Decomposition mode value. With NO_DECOMPOSITION
170      * set, accented characters will not be decomposed for collation. This
171      * is the default setting and provides the fastest collation but
172      * will only produce correct results for languages that do not use accents.
173      * @see java.text.Collator#getDecomposition
174      * @see java.text.Collator#setDecomposition
175      */
176     public final static int NO_DECOMPOSITION = 0;
177 
178     /**
179      * Decomposition mode value. With CANONICAL_DECOMPOSITION
180      * set, characters that are canonical variants according to Unicode
181      * standard will be decomposed for collation. This should be used to get
182      * correct collation of accented characters.
183      * <p>
184      * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
185      * described in
186      * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
187      * Technical Report #15</a>.
188      * @see java.text.Collator#getDecomposition
189      * @see java.text.Collator#setDecomposition
190      */
191     public final static int CANONICAL_DECOMPOSITION = 1;
192 
193     /**
194      * Decomposition mode value. With FULL_DECOMPOSITION
195      * set, both Unicode canonical variants and Unicode compatibility variants
196      * will be decomposed for collation.  This causes not only accented
197      * characters to be collated, but also characters that have special formats
198      * to be collated with their norminal form. For example, the half-width and
199      * full-width ASCII and Katakana characters are then collated together.
200      * FULL_DECOMPOSITION is the most complete and therefore the slowest
201      * decomposition mode.
202      * <p>
203      * FULL_DECOMPOSITION corresponds to Normalization Form KD as
204      * described in
205      * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
206      * Technical Report #15</a>.
207      * @see java.text.Collator#getDecomposition
208      * @see java.text.Collator#setDecomposition
209      */
210     public final static int FULL_DECOMPOSITION = 2;
211 
212     /**
213      * Gets the Collator for the current default locale.
214      * The default locale is determined by java.util.Locale.getDefault.
215      * @return the Collator for the default locale.(for example, en_US)
216      * @see java.util.Locale#getDefault
217      */
getInstance()218     public static synchronized Collator getInstance() {
219         return getInstance(Locale.getDefault());
220     }
221 
222     /**
223      * Gets the Collator for the desired locale.
224      * @param desiredLocale the desired locale.
225      * @return the Collator for the desired locale.
226      * @see java.util.Locale
227      * @see java.util.ResourceBundle
228      */
229     public static synchronized
getInstance(Locale desiredLocale)230     Collator getInstance(Locale desiredLocale)
231     {
232         // Android-changed: Switched to ICU.
233         if (desiredLocale == null) {
234             throw new NullPointerException("locale == null");
235         }
236         return new RuleBasedCollator((android.icu.text.RuleBasedCollator)
237                 android.icu.text.Collator.getInstance(desiredLocale));
238     }
239 
240     /**
241      * Compares the source string to the target string according to the
242      * collation rules for this Collator.  Returns an integer less than,
243      * equal to or greater than zero depending on whether the source String is
244      * less than, equal to or greater than the target string.  See the Collator
245      * class description for an example of use.
246      * <p>
247      * For a one time comparison, this method has the best performance. If a
248      * given String will be involved in multiple comparisons, CollationKey.compareTo
249      * has the best performance. See the Collator class description for an example
250      * using CollationKeys.
251      * @param source the source string.
252      * @param target the target string.
253      * @return Returns an integer value. Value is less than zero if source is less than
254      * target, value is zero if source and target are equal, value is greater than zero
255      * if source is greater than target.
256      * @see java.text.CollationKey
257      * @see java.text.Collator#getCollationKey
258      */
compare(String source, String target)259     public abstract int compare(String source, String target);
260 
261     /**
262      * Compares its two arguments for order.  Returns a negative integer,
263      * zero, or a positive integer as the first argument is less than, equal
264      * to, or greater than the second.
265      * <p>
266      * This implementation merely returns
267      *  <code> compare((String)o1, (String)o2) </code>.
268      *
269      * @return a negative integer, zero, or a positive integer as the
270      *         first argument is less than, equal to, or greater than the
271      *         second.
272      * @exception ClassCastException the arguments cannot be cast to Strings.
273      * @see java.util.Comparator
274      * @since   1.2
275      */
276     @Override
compare(Object o1, Object o2)277     public int compare(Object o1, Object o2) {
278     return compare((String)o1, (String)o2);
279     }
280 
281     /**
282      * Transforms the String into a series of bits that can be compared bitwise
283      * to other CollationKeys. CollationKeys provide better performance than
284      * Collator.compare when Strings are involved in multiple comparisons.
285      * See the Collator class description for an example using CollationKeys.
286      * @param source the string to be transformed into a collation key.
287      * @return the CollationKey for the given String based on this Collator's collation
288      * rules. If the source String is null, a null CollationKey is returned.
289      * @see java.text.CollationKey
290      * @see java.text.Collator#compare
291      */
getCollationKey(String source)292     public abstract CollationKey getCollationKey(String source);
293 
294     /**
295      * Convenience method for comparing the equality of two strings based on
296      * this Collator's collation rules.
297      * @param source the source string to be compared with.
298      * @param target the target string to be compared with.
299      * @return true if the strings are equal according to the collation
300      * rules.  false, otherwise.
301      * @see java.text.Collator#compare
302      */
equals(String source, String target)303     public boolean equals(String source, String target)
304     {
305         return (compare(source, target) == 0);
306     }
307 
308     /**
309      * Returns this Collator's strength property.  The strength property determines
310      * the minimum level of difference considered significant during comparison.
311      * See the Collator class description for an example of use.
312      * @return this Collator's current strength property.
313      * @see java.text.Collator#setStrength
314      * @see java.text.Collator#PRIMARY
315      * @see java.text.Collator#SECONDARY
316      * @see java.text.Collator#TERTIARY
317      * @see java.text.Collator#IDENTICAL
318      */
getStrength()319     public synchronized int getStrength()
320     {
321         // Android-changed: Switched to ICU.
322         // The value for IDENTICAL in ICU differs from that used in this class.
323         int value = icuColl.getStrength();
324         return (value == android.icu.text.Collator.IDENTICAL) ? IDENTICAL : value;
325     }
326 
327     /**
328      * Sets this Collator's strength property.  The strength property determines
329      * the minimum level of difference considered significant during comparison.
330      * See the Collator class description for an example of use.
331      * @param newStrength  the new strength value.
332      * @see java.text.Collator#getStrength
333      * @see java.text.Collator#PRIMARY
334      * @see java.text.Collator#SECONDARY
335      * @see java.text.Collator#TERTIARY
336      * @see java.text.Collator#IDENTICAL
337      * @exception  IllegalArgumentException If the new strength value is not one of
338      * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
339      */
setStrength(int newStrength)340     public synchronized void setStrength(int newStrength) {
341         // Android-changed: Switched to ICU.
342         // The ICU value for IDENTICAL differs from that defined in this class.
343         if (newStrength == IDENTICAL) {
344             newStrength = android.icu.text.Collator.IDENTICAL;
345         }
346         icuColl.setStrength(newStrength);
347     }
348 
349     /**
350      * Get the decomposition mode of this Collator. Decomposition mode
351      * determines how Unicode composed characters are handled. Adjusting
352      * decomposition mode allows the user to select between faster and more
353      * complete collation behavior.
354      * <p>The three values for decomposition mode are:
355      * <UL>
356      * <LI>NO_DECOMPOSITION,
357      * <LI>CANONICAL_DECOMPOSITION
358      * <LI>FULL_DECOMPOSITION.
359      * </UL>
360      * See the documentation for these three constants for a description
361      * of their meaning.
362      * @return the decomposition mode
363      * @see java.text.Collator#setDecomposition
364      * @see java.text.Collator#NO_DECOMPOSITION
365      * @see java.text.Collator#CANONICAL_DECOMPOSITION
366      * @see java.text.Collator#FULL_DECOMPOSITION
367      */
getDecomposition()368     public synchronized int getDecomposition()
369     {
370         // Android-changed: Switched to ICU.
371         return decompositionMode_ICU_Java(icuColl.getDecomposition());
372     }
373     /**
374      * Set the decomposition mode of this Collator. See getDecomposition
375      * for a description of decomposition mode.
376      * @param decompositionMode  the new decomposition mode.
377      * @see java.text.Collator#getDecomposition
378      * @see java.text.Collator#NO_DECOMPOSITION
379      * @see java.text.Collator#CANONICAL_DECOMPOSITION
380      * @see java.text.Collator#FULL_DECOMPOSITION
381      * @exception IllegalArgumentException If the given value is not a valid decomposition
382      * mode.
383      */
setDecomposition(int decompositionMode)384     public synchronized void setDecomposition(int decompositionMode) {
385         // Android-changed: Switched to ICU.
386         icuColl.setDecomposition(decompositionMode_Java_ICU(decompositionMode));
387     }
388 
389     // Android-changed: Removed references to CollatorProvider.
390     /**
391      * Returns an array of all locales for which the
392      * <code>getInstance</code> methods of this class can return
393      * localized instances.
394      *
395      * @return An array of locales for which localized
396      *         <code>Collator</code> instances are available.
397      */
getAvailableLocales()398     public static synchronized Locale[] getAvailableLocales() {
399         // Android-changed: Removed reference to CollatorProvider. Switched to ICU.
400         return ICU.getAvailableCollatorLocales();
401     }
402 
decompositionMode_Java_ICU(int mode)403     private int decompositionMode_Java_ICU(int mode) {
404         switch (mode) {
405             case Collator.CANONICAL_DECOMPOSITION:
406                 return android.icu.text.Collator.CANONICAL_DECOMPOSITION;
407             case Collator.NO_DECOMPOSITION:
408                 return android.icu.text.Collator.NO_DECOMPOSITION;
409         }
410         throw new IllegalArgumentException("Bad mode: " + mode);
411     }
412 
decompositionMode_ICU_Java(int mode)413     private int decompositionMode_ICU_Java(int mode) {
414         int javaMode = mode;
415         switch (mode) {
416             case android.icu.text.Collator.NO_DECOMPOSITION:
417                 javaMode = Collator.NO_DECOMPOSITION;
418                 break;
419             case android.icu.text.Collator.CANONICAL_DECOMPOSITION:
420                 javaMode = Collator.CANONICAL_DECOMPOSITION;
421                 break;
422         }
423         return javaMode;
424     }
425 
426     /**
427      * Returns a new collator with the same decomposition mode and
428      * strength value as this collator.
429      *
430      * @return a shallow copy of this collator.
431      * @see java.lang.Cloneable
432      */
433     @Override
clone()434     public Object clone()
435     {
436         try {
437             // Android-changed: Switched to ICU.
438             Collator clone = (Collator) super.clone();
439             clone.icuColl = (android.icu.text.Collator) icuColl.clone();
440             return clone;
441         } catch (CloneNotSupportedException e) {
442             throw new AssertionError(e);
443         }
444     }
445 
446     /**
447      * Compares the equality of two Collators.
448      * @param that the Collator to be compared with this.
449      * @return true if this Collator is the same as that Collator;
450      * false otherwise.
451      */
452     @Override
equals(Object that)453     public boolean equals(Object that)
454     {
455         if (this == that) {
456             return true;
457         }
458         if (that == null) {
459             return false;
460         }
461         if (getClass() != that.getClass()) {
462             return false;
463         }
464         Collator other = (Collator) that;
465         // Android-changed: Switched to ICU.
466         return icuColl == null ? other.icuColl == null : icuColl.equals(other.icuColl);
467     }
468 
469     /**
470      * Generates the hash code for this Collator.
471      */
472     @Override
hashCode()473     abstract public int hashCode();
474 
475     /**
476      * Default constructor.  This constructor is
477      * protected so subclasses can get access to it. Users typically create
478      * a Collator sub-class by calling the factory method getInstance.
479      * @see java.text.Collator#getInstance
480      */
Collator()481     protected Collator()
482     {
483         // Android-changed: Switched to ICU.
484         icuColl = android.icu.text.RuleBasedCollator.getInstance(Locale.getDefault());
485     }
486 
487     android.icu.text.Collator icuColl;
488 
Collator(android.icu.text.Collator icuColl)489     Collator(android.icu.text.Collator icuColl) {
490         this.icuColl = icuColl;
491     }
492 }
493