1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. Oracle designates this 9 * particular file as subject to the "Classpath" exception as provided 10 * by Oracle in the LICENSE file that accompanied this code. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 23 * or visit www.oracle.com if you need additional information or have any 24 * questions. 25 */ 26 27 /* 28 * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved 29 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved 30 * 31 * The original version of this source code and documentation is copyrighted 32 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These 33 * materials are provided under terms of a License Agreement between Taligent 34 * and Sun. This technology is protected by multiple US and International 35 * patents. This notice and attribution to Taligent may not be removed. 36 * Taligent is a registered trademark of Taligent, Inc. 37 * 38 */ 39 40 package java.text; 41 42 import java.util.Locale; 43 44 import libcore.icu.ICU; 45 46 /** 47 * The {@code Collator} class performs locale-sensitive 48 * {@code String} comparison. You use this class to build 49 * searching and sorting routines for natural language text. 50 * 51 * <p> 52 * {@code Collator} is an abstract base class. Subclasses 53 * implement specific collation strategies. One subclass, 54 * {@code RuleBasedCollator}, is currently provided with 55 * the Java Platform and is applicable to a wide set of languages. Other 56 * subclasses may be created to handle more specialized needs. 57 * 58 * <p> 59 * Like other locale-sensitive classes, you can use the static 60 * factory method, {@code getInstance}, to obtain the appropriate 61 * {@code Collator} object for a given locale. You will only need 62 * to look at the subclasses of {@code Collator} if you need 63 * to understand the details of a particular collation strategy or 64 * if you need to modify that strategy. 65 * 66 * <p> 67 * The following example shows how to compare two strings using 68 * the {@code Collator} for the default locale. 69 * <blockquote> 70 * <pre>{@code 71 * // Compare two strings in the default locale 72 * Collator myCollator = Collator.getInstance(); 73 * if( myCollator.compare("abc", "ABC") < 0 ) 74 * System.out.println("abc is less than ABC"); 75 * else 76 * System.out.println("abc is greater than or equal to ABC"); 77 * }</pre> 78 * </blockquote> 79 * 80 * <p> 81 * You can set a {@code Collator}'s <em>strength</em> property 82 * to determine the level of difference considered significant in 83 * comparisons. Four strengths are provided: {@code PRIMARY}, 84 * {@code SECONDARY}, {@code TERTIARY}, and {@code IDENTICAL}. 85 * The exact assignment of strengths to language features is 86 * locale dependent. For example, in Czech, "e" and "f" are considered 87 * primary differences, while "e" and "ě" are secondary differences, 88 * "e" and "E" are tertiary differences and "e" and "e" are identical. 89 * The following shows how both case and accents could be ignored for 90 * US English. 91 * <blockquote> 92 * <pre> 93 * //Get the Collator for US English and set its strength to PRIMARY 94 * Collator usCollator = Collator.getInstance(Locale.US); 95 * usCollator.setStrength(Collator.PRIMARY); 96 * if( usCollator.compare("abc", "ABC") == 0 ) { 97 * System.out.println("Strings are equivalent"); 98 * } 99 * </pre> 100 * </blockquote> 101 * <p> 102 * For comparing {@code String}s exactly once, the {@code compare} 103 * method provides the best performance. When sorting a list of 104 * {@code String}s however, it is generally necessary to compare each 105 * {@code String} multiple times. In this case, {@code CollationKey}s 106 * provide better performance. The {@code CollationKey} class converts 107 * a {@code String} to a series of bits that can be compared bitwise 108 * against other {@code CollationKey}s. A {@code CollationKey} is 109 * created by a {@code Collator} object for a given {@code String}. 110 * <br> 111 * <strong>Note:</strong> {@code CollationKey}s from different 112 * {@code Collator}s can not be compared. See the class description 113 * for {@link CollationKey} 114 * for an example using {@code CollationKey}s. 115 * 116 * @see RuleBasedCollator 117 * @see CollationKey 118 * @see CollationElementIterator 119 * @see Locale 120 * @author Helena Shih, Laura Werner, Richard Gillam 121 * @since 1.1 122 */ 123 124 public abstract class Collator 125 implements java.util.Comparator<Object>, Cloneable 126 { 127 /** 128 * Collator strength value. When set, only PRIMARY differences are 129 * considered significant during comparison. The assignment of strengths 130 * to language features is locale dependent. A common example is for 131 * different base letters ("a" vs "b") to be considered a PRIMARY difference. 132 * @see java.text.Collator#setStrength 133 * @see java.text.Collator#getStrength 134 */ 135 public static final int PRIMARY = 0; 136 /** 137 * Collator strength value. When set, only SECONDARY and above differences are 138 * considered significant during comparison. The assignment of strengths 139 * to language features is locale dependent. A common example is for 140 * different accented forms of the same base letter ("a" vs "\u00E4") to be 141 * considered a SECONDARY difference. 142 * @see java.text.Collator#setStrength 143 * @see java.text.Collator#getStrength 144 */ 145 public static final int SECONDARY = 1; 146 /** 147 * Collator strength value. When set, only TERTIARY and above differences are 148 * considered significant during comparison. The assignment of strengths 149 * to language features is locale dependent. A common example is for 150 * case differences ("a" vs "A") to be considered a TERTIARY difference. 151 * @see java.text.Collator#setStrength 152 * @see java.text.Collator#getStrength 153 */ 154 public static final int TERTIARY = 2; 155 156 /** 157 * Collator strength value. When set, all differences are 158 * considered significant during comparison. The assignment of strengths 159 * to language features is locale dependent. A common example is for control 160 * characters ("\u0001" vs "\u0002") to be considered equal at the 161 * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL 162 * level. Additionally, differences between pre-composed accents such as 163 * "\u00C0" (A-grave) and combining accents such as "A\u0300" 164 * (A, combining-grave) will be considered significant at the IDENTICAL 165 * level if decomposition is set to NO_DECOMPOSITION. 166 */ 167 public static final int IDENTICAL = 3; 168 169 /** 170 * Decomposition mode value. With NO_DECOMPOSITION 171 * set, accented characters will not be decomposed for collation. This 172 * is the default setting and provides the fastest collation but 173 * will only produce correct results for languages that do not use accents. 174 * @see java.text.Collator#getDecomposition 175 * @see java.text.Collator#setDecomposition 176 */ 177 public static final int NO_DECOMPOSITION = 0; 178 179 /** 180 * Decomposition mode value. With CANONICAL_DECOMPOSITION 181 * set, characters that are canonical variants according to Unicode 182 * standard will be decomposed for collation. This should be used to get 183 * correct collation of accented characters. 184 * <p> 185 * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as 186 * described in 187 * <a href="http://www.unicode.org/reports/tr15/">Unicode 188 * Standard Annex #15: Unicode Normalization Forms</a>. 189 * @see java.text.Collator#getDecomposition 190 * @see java.text.Collator#setDecomposition 191 */ 192 public static final int CANONICAL_DECOMPOSITION = 1; 193 194 /** 195 * Decomposition mode value. With FULL_DECOMPOSITION 196 * set, both Unicode canonical variants and Unicode compatibility variants 197 * will be decomposed for collation. This causes not only accented 198 * characters to be collated, but also characters that have special formats 199 * to be collated with their norminal form. For example, the half-width and 200 * full-width ASCII and Katakana characters are then collated together. 201 * FULL_DECOMPOSITION is the most complete and therefore the slowest 202 * decomposition mode. 203 * <p> 204 * FULL_DECOMPOSITION corresponds to Normalization Form KD as 205 * described in 206 * <a href="http://www.unicode.org/reports/tr15/">Unicode 207 * Standard Annex #15: Unicode Normalization Forms</a>. 208 * @see java.text.Collator#getDecomposition 209 * @see java.text.Collator#setDecomposition 210 */ 211 public static final int FULL_DECOMPOSITION = 2; 212 213 /** 214 * Gets the Collator for the current default locale. 215 * The default locale is determined by java.util.Locale.getDefault. 216 * @return the Collator for the default locale.(for example, en_US) 217 * @see java.util.Locale#getDefault 218 */ getInstance()219 public static synchronized Collator getInstance() { 220 return getInstance(Locale.getDefault()); 221 } 222 223 /** 224 * Gets the Collator for the desired locale. 225 * @param desiredLocale the desired locale. 226 * @return the Collator for the desired locale. 227 * @see java.util.Locale 228 * @see java.util.ResourceBundle 229 */ getInstance(Locale desiredLocale)230 public static Collator getInstance(Locale desiredLocale) { 231 // BEGIN Android-changed: Switched to ICU. 232 synchronized(Collator.class) { 233 if (desiredLocale == null) { 234 throw new NullPointerException("locale == null"); 235 } 236 return new RuleBasedCollator((android.icu.text.RuleBasedCollator) 237 android.icu.text.Collator.getInstance(desiredLocale)); 238 } 239 // END Android-changed: Switched to ICU. 240 } 241 242 /** 243 * Compares the source string to the target string according to the 244 * collation rules for this Collator. Returns an integer less than, 245 * equal to or greater than zero depending on whether the source String is 246 * less than, equal to or greater than the target string. See the Collator 247 * class description for an example of use. 248 * <p> 249 * For a one time comparison, this method has the best performance. If a 250 * given String will be involved in multiple comparisons, CollationKey.compareTo 251 * has the best performance. See the Collator class description for an example 252 * using CollationKeys. 253 * @param source the source string. 254 * @param target the target string. 255 * @return Returns an integer value. Value is less than zero if source is less than 256 * target, value is zero if source and target are equal, value is greater than zero 257 * if source is greater than target. 258 * @see java.text.CollationKey 259 * @see java.text.Collator#getCollationKey 260 */ compare(String source, String target)261 public abstract int compare(String source, String target); 262 263 /** 264 * Compares its two arguments for order. Returns a negative integer, 265 * zero, or a positive integer as the first argument is less than, equal 266 * to, or greater than the second. 267 * <p> 268 * This implementation merely returns 269 * {@code compare((String)o1, (String)o2) }. 270 * 271 * @return a negative integer, zero, or a positive integer as the 272 * first argument is less than, equal to, or greater than the 273 * second. 274 * @throws ClassCastException the arguments cannot be cast to Strings. 275 * @see java.util.Comparator 276 * @since 1.2 277 */ 278 @Override compare(Object o1, Object o2)279 public int compare(Object o1, Object o2) { 280 return compare((String)o1, (String)o2); 281 } 282 283 /** 284 * Transforms the String into a series of bits that can be compared bitwise 285 * to other CollationKeys. CollationKeys provide better performance than 286 * Collator.compare when Strings are involved in multiple comparisons. 287 * See the Collator class description for an example using CollationKeys. 288 * @param source the string to be transformed into a collation key. 289 * @return the CollationKey for the given String based on this Collator's collation 290 * rules. If the source String is null, a null CollationKey is returned. 291 * @see java.text.CollationKey 292 * @see java.text.Collator#compare 293 */ getCollationKey(String source)294 public abstract CollationKey getCollationKey(String source); 295 296 /** 297 * Convenience method for comparing the equality of two strings based on 298 * this Collator's collation rules. 299 * @param source the source string to be compared with. 300 * @param target the target string to be compared with. 301 * @return true if the strings are equal according to the collation 302 * rules. false, otherwise. 303 * @see java.text.Collator#compare 304 */ equals(String source, String target)305 public boolean equals(String source, String target) 306 { 307 // Android-changed: remove use of unnecessary EQUAL constant. 308 return (compare(source, target) == 0); 309 } 310 311 /** 312 * Returns this Collator's strength property. The strength property determines 313 * the minimum level of difference considered significant during comparison. 314 * See the Collator class description for an example of use. 315 * @return this Collator's current strength property. 316 * @see java.text.Collator#setStrength 317 * @see java.text.Collator#PRIMARY 318 * @see java.text.Collator#SECONDARY 319 * @see java.text.Collator#TERTIARY 320 * @see java.text.Collator#IDENTICAL 321 */ getStrength()322 public synchronized int getStrength() 323 { 324 // Android-changed: Switched to ICU. 325 // The value for IDENTICAL in ICU differs from that used in this class. 326 int value = icuColl.getStrength(); 327 return (value == android.icu.text.Collator.IDENTICAL) ? IDENTICAL : value; 328 } 329 330 /** 331 * Sets this Collator's strength property. The strength property determines 332 * the minimum level of difference considered significant during comparison. 333 * See the Collator class description for an example of use. 334 * @param newStrength the new strength value. 335 * @see java.text.Collator#getStrength 336 * @see java.text.Collator#PRIMARY 337 * @see java.text.Collator#SECONDARY 338 * @see java.text.Collator#TERTIARY 339 * @see java.text.Collator#IDENTICAL 340 * @throws IllegalArgumentException If the new strength value is not one of 341 * PRIMARY, SECONDARY, TERTIARY or IDENTICAL. 342 */ setStrength(int newStrength)343 public synchronized void setStrength(int newStrength) { 344 // Android-changed: Switched to ICU. 345 // The ICU value for IDENTICAL differs from that defined in this class. 346 if (newStrength == IDENTICAL) { 347 newStrength = android.icu.text.Collator.IDENTICAL; 348 } 349 icuColl.setStrength(newStrength); 350 } 351 352 /** 353 * Get the decomposition mode of this Collator. Decomposition mode 354 * determines how Unicode composed characters are handled. Adjusting 355 * decomposition mode allows the user to select between faster and more 356 * complete collation behavior. 357 * <p>The three values for decomposition mode are: 358 * <UL> 359 * <LI>NO_DECOMPOSITION, 360 * <LI>CANONICAL_DECOMPOSITION 361 * <LI>FULL_DECOMPOSITION. 362 * </UL> 363 * See the documentation for these three constants for a description 364 * of their meaning. 365 * @return the decomposition mode 366 * @see java.text.Collator#setDecomposition 367 * @see java.text.Collator#NO_DECOMPOSITION 368 * @see java.text.Collator#CANONICAL_DECOMPOSITION 369 * @see java.text.Collator#FULL_DECOMPOSITION 370 */ getDecomposition()371 public synchronized int getDecomposition() 372 { 373 // Android-changed: Switched to ICU. 374 return decompositionMode_ICU_Java(icuColl.getDecomposition()); 375 } 376 /** 377 * Set the decomposition mode of this Collator. See getDecomposition 378 * for a description of decomposition mode. 379 * @param decompositionMode the new decomposition mode. 380 * @see java.text.Collator#getDecomposition 381 * @see java.text.Collator#NO_DECOMPOSITION 382 * @see java.text.Collator#CANONICAL_DECOMPOSITION 383 * @see java.text.Collator#FULL_DECOMPOSITION 384 * @throws IllegalArgumentException If the given value is not a valid decomposition 385 * mode. 386 */ setDecomposition(int decompositionMode)387 public synchronized void setDecomposition(int decompositionMode) { 388 // Android-changed: Switched to ICU. 389 icuColl.setDecomposition(decompositionMode_Java_ICU(decompositionMode)); 390 } 391 392 // Android-changed: Removed javadoc references to CollatorProvider. 393 /** 394 * Returns an array of all locales for which the 395 * {@code getInstance} methods of this class can return 396 * localized instances. 397 * 398 * @return An array of locales for which localized 399 * {@code Collator} instances are available. 400 */ getAvailableLocales()401 public static synchronized Locale[] getAvailableLocales() { 402 // Android-changed: Removed reference to CollatorProvider. Switched to ICU. 403 return android.icu.text.Collator.getAvailableLocales(); 404 } 405 406 // BEGIN Android-added: conversion method for decompositionMode constants. decompositionMode_Java_ICU(int mode)407 private int decompositionMode_Java_ICU(int mode) { 408 switch (mode) { 409 case Collator.CANONICAL_DECOMPOSITION: 410 return android.icu.text.Collator.CANONICAL_DECOMPOSITION; 411 case Collator.NO_DECOMPOSITION: 412 return android.icu.text.Collator.NO_DECOMPOSITION; 413 } 414 throw new IllegalArgumentException("Bad mode: " + mode); 415 } 416 decompositionMode_ICU_Java(int mode)417 private int decompositionMode_ICU_Java(int mode) { 418 int javaMode = mode; 419 switch (mode) { 420 case android.icu.text.Collator.NO_DECOMPOSITION: 421 javaMode = Collator.NO_DECOMPOSITION; 422 break; 423 case android.icu.text.Collator.CANONICAL_DECOMPOSITION: 424 javaMode = Collator.CANONICAL_DECOMPOSITION; 425 break; 426 } 427 return javaMode; 428 } 429 // END Android-added: conversion method for decompositionMode constants. 430 431 // Android-changed: improve clone() documentation. 432 /** 433 * Returns a new collator with the same decomposition mode and 434 * strength value as this collator. 435 * 436 * @return a shallow copy of this collator. 437 * @see java.lang.Cloneable 438 */ 439 @Override clone()440 public Object clone() 441 { 442 try { 443 // Android-changed: Switched to ICU. 444 Collator clone = (Collator) super.clone(); 445 clone.icuColl = (android.icu.text.Collator) icuColl.clone(); 446 return clone; 447 } catch (CloneNotSupportedException e) { 448 throw new AssertionError(e); 449 } 450 } 451 452 /** 453 * Compares the equality of two Collators. 454 * @param that the Collator to be compared with this. 455 * @return true if this Collator is the same as that Collator; 456 * false otherwise. 457 */ 458 @Override equals(Object that)459 public boolean equals(Object that) 460 { 461 if (this == that) { 462 return true; 463 } 464 if (that == null) { 465 return false; 466 } 467 if (getClass() != that.getClass()) { 468 return false; 469 } 470 Collator other = (Collator) that; 471 // Android-changed: Switched to ICU. 472 return icuColl == null ? other.icuColl == null : icuColl.equals(other.icuColl); 473 } 474 475 /** 476 * Generates the hash code for this Collator. 477 */ 478 @Override hashCode()479 public abstract int hashCode(); 480 481 /** 482 * Default constructor. This constructor is 483 * protected so subclasses can get access to it. Users typically create 484 * a Collator sub-class by calling the factory method getInstance. 485 * @see java.text.Collator#getInstance 486 */ Collator()487 protected Collator() 488 { 489 // Android-changed: Switched to ICU. 490 // strength = TERTIARY; 491 // decmp = CANONICAL_DECOMPOSITION; 492 icuColl = android.icu.text.RuleBasedCollator.getInstance(Locale.getDefault()); 493 } 494 495 // Android-added: ICU Collator this delegates to. 496 android.icu.text.Collator icuColl; 497 498 // Android-added: protected constructor taking a Collator. Collator(android.icu.text.Collator icuColl)499 Collator(android.icu.text.Collator icuColl) { 500 this.icuColl = icuColl; 501 } 502 503 // BEGIN Android-removed: Fields and constants. 504 /* 505 private int strength = 0; 506 private int decmp = 0; 507 private static final ConcurrentMap<Locale, SoftReference<Collator>> cache 508 = new ConcurrentHashMap<>(); 509 510 // 511 // FIXME: These three constants should be removed. 512 // 513 /** 514 * LESS is returned if source string is compared to be less than target 515 * string in the compare() method. 516 * @see java.text.Collator#compare 517 * 518 static final int LESS = -1; 519 /** 520 * EQUAL is returned if source string is compared to be equal to target 521 * string in the compare() method. 522 * @see java.text.Collator#compare 523 * 524 static final int EQUAL = 0; 525 /** 526 * GREATER is returned if source string is compared to be greater than 527 * target string in the compare() method. 528 * @see java.text.Collator#compare 529 * 530 static final int GREATER = 1; 531 */ 532 // END Android-removed: Fields and constants. 533 } 534