1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.text; 8 9 import java.lang.reflect.InvocationTargetException; 10 import java.lang.reflect.Method; 11 import java.text.CharacterIterator; 12 import java.text.ParseException; 13 import java.util.Arrays; 14 import java.util.concurrent.locks.Lock; 15 import java.util.concurrent.locks.ReentrantLock; 16 17 import com.ibm.icu.impl.ClassLoaderUtil; 18 import com.ibm.icu.impl.Normalizer2Impl; 19 import com.ibm.icu.impl.Normalizer2Impl.ReorderingBuffer; 20 import com.ibm.icu.impl.Utility; 21 import com.ibm.icu.impl.coll.BOCSU; 22 import com.ibm.icu.impl.coll.Collation; 23 import com.ibm.icu.impl.coll.CollationCompare; 24 import com.ibm.icu.impl.coll.CollationData; 25 import com.ibm.icu.impl.coll.CollationFastLatin; 26 import com.ibm.icu.impl.coll.CollationIterator; 27 import com.ibm.icu.impl.coll.CollationKeys; 28 import com.ibm.icu.impl.coll.CollationKeys.SortKeyByteSink; 29 import com.ibm.icu.impl.coll.CollationLoader; 30 import com.ibm.icu.impl.coll.CollationRoot; 31 import com.ibm.icu.impl.coll.CollationSettings; 32 import com.ibm.icu.impl.coll.CollationTailoring; 33 import com.ibm.icu.impl.coll.ContractionsAndExpansions; 34 import com.ibm.icu.impl.coll.FCDUTF16CollationIterator; 35 import com.ibm.icu.impl.coll.SharedObject; 36 import com.ibm.icu.impl.coll.TailoredSet; 37 import com.ibm.icu.impl.coll.UTF16CollationIterator; 38 import com.ibm.icu.lang.UScript; 39 import com.ibm.icu.util.ULocale; 40 import com.ibm.icu.util.VersionInfo; 41 42 /** 43 * <p> 44 * RuleBasedCollator is a concrete subclass of Collator. It allows customization of the Collator via user-specified rule 45 * sets. RuleBasedCollator is designed to be fully compliant to the <a 46 * href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation Algorithm (UCA)</a> and conforms to ISO 14651. 47 * </p> 48 * 49 * <p>A Collator is thread-safe only when frozen. See {{@link #isFrozen()} and {@link com.ibm.icu.util.Freezable}. 50 * 51 * <p> 52 * Users are strongly encouraged to read the <a href="http://userguide.icu-project.org/collation">User 53 * Guide</a> for more information about the collation service before using this class. 54 * </p> 55 * 56 * <p> 57 * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class 58 * Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the 59 * argument locale. If a customized collation ordering or attributes is required, use the RuleBasedCollator(String) 60 * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on the CLDR root collation, while 61 * re-adjusting the attributes and orders of the characters in the specified rule accordingly. 62 * </p> 63 * 64 * <p> 65 * RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale 66 * is not available, the orders eventually falls back to the 67 * <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. 68 * </p> 69 * 70 * <p> 71 * For information about the collation rule syntax and details about customization, please refer to the <a 72 * href="http://userguide.icu-project.org/collation/customization">Collation customization</a> section of the 73 * User Guide. 74 * </p> 75 * 76 * <p> 77 * <strong>Note</strong> that there are some differences between the Collation rule syntax used in Java and ICU4J: 78 * 79 * <ul> 80 * <li>According to the JDK documentation: <i> 81 * <p> 82 * Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule is in force when a Thai vowel of the range 83 * \U0E40-\U0E44 precedes a Thai consonant of the range \U0E01-\U0E2E OR a Lao vowel of the range 84 * \U0EC0-\U0EC4 precedes a Lao consonant of the range \U0E81-\U0EAE then the vowel is placed after the 85 * consonant for collation purposes. 86 * </p> 87 * <p> 88 * If a rule is without the modifier '!', the Thai/Lao vowel-consonant swapping is not turned on. 89 * </p> 90 * </i> 91 * <p> 92 * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao vowel-consonant swapping, since the UCA clearly 93 * states that it has to be supported to ensure a correct sorting order. If a '!' is encountered, it is ignored. 94 * </p> 95 * <li>As mentioned in the documentation of the base class Collator, compatibility decomposition mode is not supported. 96 * </ul> 97 * <p> 98 * <strong>Examples</strong> 99 * </p> 100 * <p> 101 * Creating Customized RuleBasedCollators: <blockquote> 102 * 103 * <pre> 104 * String simple = "& a < b < c < d"; 105 * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple); 106 * 107 * String norwegian = "& a , A < b , B < c , C < d , D < e , E " 108 * + "< f , F < g , G < h , H < i , I < j , " 109 * + "J < k , K < l , L < m , M < n , N < " 110 * + "o , O < p , P < q , Q < r , R < s , S < " 111 * + "t , T < u , U < v , V < w , W < x , X " 112 * + "< y , Y < z , Z < \u00E5 = a\u030A " 113 * + ", \u00C5 = A\u030A ; aa , AA < \u00E6 " 114 * + ", \u00C6 < \u00F8 , \u00D8"; 115 * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian); 116 * </pre> 117 * 118 * </blockquote> 119 * 120 * Concatenating rules to combine <code>Collator</code>s: <blockquote> 121 * 122 * <pre> 123 * // Create an en_US Collator object 124 * RuleBasedCollator en_USCollator = (RuleBasedCollator) 125 * Collator.getInstance(new Locale("en", "US", "")); 126 * // Create a da_DK Collator object 127 * RuleBasedCollator da_DKCollator = (RuleBasedCollator) 128 * Collator.getInstance(new Locale("da", "DK", "")); 129 * // Combine the two 130 * // First, get the collation rules from en_USCollator 131 * String en_USRules = en_USCollator.getRules(); 132 * // Second, get the collation rules from da_DKCollator 133 * String da_DKRules = da_DKCollator.getRules(); 134 * RuleBasedCollator newCollator = 135 * new RuleBasedCollator(en_USRules + da_DKRules); 136 * // newCollator has the combined rules 137 * </pre> 138 * 139 * </blockquote> 140 * 141 * Making changes to an existing RuleBasedCollator to create a new <code>Collator</code> object, by appending changes to 142 * the existing rule: <blockquote> 143 * 144 * <pre> 145 * // Create a new Collator object with additional rules 146 * String addRules = "& C < ch, cH, Ch, CH"; 147 * RuleBasedCollator myCollator = 148 * new RuleBasedCollator(en_USCollator.getRules() + addRules); 149 * // myCollator contains the new rules 150 * </pre> 151 * 152 * </blockquote> 153 * 154 * How to change the order of non-spacing accents: <blockquote> 155 * 156 * <pre> 157 * // old rule with main accents 158 * String oldRules = "= \u0301 ; \u0300 ; \u0302 ; \u0308 " 159 * + "; \u0327 ; \u0303 ; \u0304 ; \u0305 " 160 * + "; \u0306 ; \u0307 ; \u0309 ; \u030A " 161 * + "; \u030B ; \u030C ; \u030D ; \u030E " 162 * + "; \u030F ; \u0310 ; \u0311 ; \u0312 " 163 * + "< a , A ; ae, AE ; \u00e6 , \u00c6 " 164 * + "< b , B < c, C < e, E & C < d , D"; 165 * // change the order of accent characters 166 * String addOn = "& \u0300 ; \u0308 ; \u0302"; 167 * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn); 168 * </pre> 169 * 170 * </blockquote> 171 * 172 * Putting in a new primary ordering before the default setting, e.g. sort English characters before or after Japanese 173 * characters in the Japanese <code>Collator</code>: <blockquote> 174 * 175 * <pre> 176 * // get en_US Collator rules 177 * RuleBasedCollator en_USCollator 178 * = (RuleBasedCollator)Collator.getInstance(Locale.US); 179 * // add a few Japanese characters to sort before English characters 180 * // suppose the last character before the first base letter 'a' in 181 * // the English collation rule is \u2212 182 * String jaString = "& \u2212 < \u3041, \u3042 < \u3043, " 183 * + "\u3044"; 184 * RuleBasedCollator myJapaneseCollator 185 * = new RuleBasedCollator(en_USCollator.getRules() + jaString); 186 * </pre> 187 * 188 * </blockquote> 189 * </p> 190 * <p> 191 * This class is not subclassable 192 * </p> 193 * 194 * @author Syn Wee Quek 195 * @stable ICU 2.8 196 */ 197 public final class RuleBasedCollator extends Collator { 198 // public constructors --------------------------------------------------- 199 200 /** 201 * <p> 202 * Constructor that takes the argument rules for customization. 203 * The collator will be based on the CLDR root collation, with the 204 * attributes and re-ordering of the characters specified in the argument rules. 205 * </p> 206 * <p> 207 * See the User Guide's section on <a href="http://userguide.icu-project.org/collation/customization"> 208 * Collation Customization</a> for details on the rule syntax. 209 * </p> 210 * 211 * @param rules 212 * the collation rules to build the collation table from. 213 * @exception ParseException 214 * and IOException thrown. ParseException thrown when argument rules have an invalid syntax. 215 * IOException thrown when an error occurred while reading internal data. 216 * @stable ICU 2.8 217 */ RuleBasedCollator(String rules)218 public RuleBasedCollator(String rules) throws Exception { 219 if (rules == null) { 220 throw new IllegalArgumentException("Collation rules can not be null"); 221 } 222 validLocale = ULocale.ROOT; 223 internalBuildTailoring(rules); 224 } 225 226 /** 227 * Implements from-rule constructors. 228 * @param rules rule string 229 * @throws Exception 230 */ internalBuildTailoring(String rules)231 private final void internalBuildTailoring(String rules) throws Exception { 232 CollationTailoring base = CollationRoot.getRoot(); 233 // Most code using Collator does not need to build a Collator from rules. 234 // By using reflection, most code will not have a static dependency on the builder code. 235 // CollationBuilder builder = new CollationBuilder(base); 236 ClassLoader classLoader = ClassLoaderUtil.getClassLoader(getClass()); 237 CollationTailoring t; 238 try { 239 Class<?> builderClass = classLoader.loadClass("com.ibm.icu.impl.coll.CollationBuilder"); 240 Object builder = builderClass.getConstructor(CollationTailoring.class).newInstance(base); 241 // builder.parseAndBuild(rules); 242 Method parseAndBuild = builderClass.getMethod("parseAndBuild", String.class); 243 t = (CollationTailoring)parseAndBuild.invoke(builder, rules); 244 } catch(InvocationTargetException e) { 245 throw (Exception)e.getTargetException(); 246 } 247 t.actualLocale = null; 248 adoptTailoring(t); 249 } 250 251 // public methods -------------------------------------------------------- 252 253 /** 254 * Clones the RuleBasedCollator 255 * 256 * @return a new instance of this RuleBasedCollator object 257 * @stable ICU 2.8 258 */ 259 @Override clone()260 public Object clone() throws CloneNotSupportedException { 261 if (isFrozen()) { 262 return this; 263 } 264 return cloneAsThawed(); 265 } 266 initMaxExpansions()267 private final void initMaxExpansions() { 268 synchronized(tailoring) { 269 if (tailoring.maxExpansions == null) { 270 tailoring.maxExpansions = CollationElementIterator.computeMaxExpansions(tailoring.data); 271 } 272 } 273 } 274 275 /** 276 * Return a CollationElementIterator for the given String. 277 * 278 * @see CollationElementIterator 279 * @stable ICU 2.8 280 */ getCollationElementIterator(String source)281 public CollationElementIterator getCollationElementIterator(String source) { 282 initMaxExpansions(); 283 return new CollationElementIterator(source, this); 284 } 285 286 /** 287 * Return a CollationElementIterator for the given CharacterIterator. The source iterator's integrity will be 288 * preserved since a new copy will be created for use. 289 * 290 * @see CollationElementIterator 291 * @stable ICU 2.8 292 */ getCollationElementIterator(CharacterIterator source)293 public CollationElementIterator getCollationElementIterator(CharacterIterator source) { 294 initMaxExpansions(); 295 CharacterIterator newsource = (CharacterIterator) source.clone(); 296 return new CollationElementIterator(newsource, this); 297 } 298 299 /** 300 * Return a CollationElementIterator for the given UCharacterIterator. The source iterator's integrity will be 301 * preserved since a new copy will be created for use. 302 * 303 * @see CollationElementIterator 304 * @stable ICU 2.8 305 */ getCollationElementIterator(UCharacterIterator source)306 public CollationElementIterator getCollationElementIterator(UCharacterIterator source) { 307 initMaxExpansions(); 308 return new CollationElementIterator(source, this); 309 } 310 311 // Freezable interface implementation ------------------------------------------------- 312 313 /** 314 * Determines whether the object has been frozen or not. 315 * 316 * <p>An unfrozen Collator is mutable and not thread-safe. 317 * A frozen Collator is immutable and thread-safe. 318 * 319 * @stable ICU 4.8 320 */ 321 @Override isFrozen()322 public boolean isFrozen() { 323 return frozenLock != null; 324 } 325 326 /** 327 * Freezes the collator. 328 * @return the collator itself. 329 * @stable ICU 4.8 330 */ 331 @Override freeze()332 public Collator freeze() { 333 if (!isFrozen()) { 334 frozenLock = new ReentrantLock(); 335 if (collationBuffer == null) { 336 collationBuffer = new CollationBuffer(data); 337 } 338 } 339 return this; 340 } 341 342 /** 343 * Provides for the clone operation. Any clone is initially unfrozen. 344 * @stable ICU 4.8 345 */ 346 @Override cloneAsThawed()347 public RuleBasedCollator cloneAsThawed() { 348 try { 349 RuleBasedCollator result = (RuleBasedCollator) super.clone(); 350 // since all collation data in the RuleBasedCollator do not change 351 // we can safely assign the result.fields to this collator 352 // except in cases where we can't 353 result.settings = settings.clone(); 354 result.collationBuffer = null; 355 result.frozenLock = null; 356 return result; 357 } catch (CloneNotSupportedException e) { 358 // Clone is implemented 359 return null; 360 } 361 } 362 363 // public setters -------------------------------------------------------- 364 checkNotFrozen()365 private void checkNotFrozen() { 366 if (isFrozen()) { 367 throw new UnsupportedOperationException("Attempt to modify frozen RuleBasedCollator"); 368 } 369 } 370 getOwnedSettings()371 private final CollationSettings getOwnedSettings() { 372 return settings.copyOnWrite(); 373 } 374 getDefaultSettings()375 private final CollationSettings getDefaultSettings() { 376 return tailoring.settings.readOnly(); 377 } 378 379 /** 380 * Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator 381 * positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a 382 * correct JIS collation order, distinguishing between Katakana and Hiragana characters. 383 * 384 * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. 385 * Since ICU 50, this attribute is not settable any more via API functions. 386 * Since CLDR 25/ICU 53, explicit quaternary relations are used 387 * to achieve the same Japanese sort order. 388 * 389 * @param flag 390 * true if Hiragana Quaternary mode is to be on, false otherwise 391 * @see #setHiraganaQuaternaryDefault 392 * @see #isHiraganaQuaternary 393 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 394 */ 395 @Deprecated setHiraganaQuaternary(boolean flag)396 public void setHiraganaQuaternary(boolean flag) { 397 checkNotFrozen(); 398 } 399 400 /** 401 * Sets the Hiragana Quaternary mode to the initial mode set during construction of the RuleBasedCollator. See 402 * setHiraganaQuaternary(boolean) for more details. 403 * 404 * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. 405 * Since ICU 50, this attribute is not settable any more via API functions. 406 * Since CLDR 25/ICU 53, explicit quaternary relations are used 407 * to achieve the same Japanese sort order. 408 * 409 * @see #setHiraganaQuaternary(boolean) 410 * @see #isHiraganaQuaternary 411 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 412 */ 413 @Deprecated setHiraganaQuaternaryDefault()414 public void setHiraganaQuaternaryDefault() { 415 checkNotFrozen(); 416 } 417 418 /** 419 * Sets whether uppercase characters sort before lowercase characters or vice versa, in strength TERTIARY. The 420 * default mode is false, and so lowercase characters sort before uppercase characters. If true, sort upper case 421 * characters first. 422 * 423 * @param upperfirst 424 * true to sort uppercase characters before lowercase characters, false to sort lowercase characters 425 * before uppercase characters 426 * @see #isLowerCaseFirst 427 * @see #isUpperCaseFirst 428 * @see #setLowerCaseFirst 429 * @see #setCaseFirstDefault 430 * @stable ICU 2.8 431 */ setUpperCaseFirst(boolean upperfirst)432 public void setUpperCaseFirst(boolean upperfirst) { 433 checkNotFrozen(); 434 if (upperfirst == isUpperCaseFirst()) { return; } 435 CollationSettings ownedSettings = getOwnedSettings(); 436 ownedSettings.setCaseFirst(upperfirst ? CollationSettings.CASE_FIRST_AND_UPPER_MASK : 0); 437 setFastLatinOptions(ownedSettings); 438 } 439 440 /** 441 * Sets the orders of lower cased characters to sort before upper cased characters, in strength TERTIARY. The 442 * default mode is false. If true is set, the RuleBasedCollator will sort lower cased characters before the upper 443 * cased ones. Otherwise, if false is set, the RuleBasedCollator will ignore case preferences. 444 * 445 * @param lowerfirst 446 * true for sorting lower cased characters before upper cased characters, false to ignore case 447 * preferences. 448 * @see #isLowerCaseFirst 449 * @see #isUpperCaseFirst 450 * @see #setUpperCaseFirst 451 * @see #setCaseFirstDefault 452 * @stable ICU 2.8 453 */ setLowerCaseFirst(boolean lowerfirst)454 public void setLowerCaseFirst(boolean lowerfirst) { 455 checkNotFrozen(); 456 if (lowerfirst == isLowerCaseFirst()) { return; } 457 CollationSettings ownedSettings = getOwnedSettings(); 458 ownedSettings.setCaseFirst(lowerfirst ? CollationSettings.CASE_FIRST : 0); 459 setFastLatinOptions(ownedSettings); 460 } 461 462 /** 463 * Sets the case first mode to the initial mode set during construction of the RuleBasedCollator. See 464 * setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more details. 465 * 466 * @see #isLowerCaseFirst 467 * @see #isUpperCaseFirst 468 * @see #setLowerCaseFirst(boolean) 469 * @see #setUpperCaseFirst(boolean) 470 * @stable ICU 2.8 471 */ setCaseFirstDefault()472 public final void setCaseFirstDefault() { 473 checkNotFrozen(); 474 CollationSettings defaultSettings = getDefaultSettings(); 475 if(settings.readOnly() == defaultSettings) { return; } 476 CollationSettings ownedSettings = getOwnedSettings(); 477 ownedSettings.setCaseFirstDefault(defaultSettings.options); 478 setFastLatinOptions(ownedSettings); 479 } 480 481 /** 482 * Sets the alternate handling mode to the initial mode set during construction of the RuleBasedCollator. See 483 * setAlternateHandling(boolean) for more details. 484 * 485 * @see #setAlternateHandlingShifted(boolean) 486 * @see #isAlternateHandlingShifted() 487 * @stable ICU 2.8 488 */ setAlternateHandlingDefault()489 public void setAlternateHandlingDefault() { 490 checkNotFrozen(); 491 CollationSettings defaultSettings = getDefaultSettings(); 492 if(settings.readOnly() == defaultSettings) { return; } 493 CollationSettings ownedSettings = getOwnedSettings(); 494 ownedSettings.setAlternateHandlingDefault(defaultSettings.options); 495 setFastLatinOptions(ownedSettings); 496 } 497 498 /** 499 * Sets the case level mode to the initial mode set during construction of the RuleBasedCollator. See 500 * setCaseLevel(boolean) for more details. 501 * 502 * @see #setCaseLevel(boolean) 503 * @see #isCaseLevel 504 * @stable ICU 2.8 505 */ setCaseLevelDefault()506 public void setCaseLevelDefault() { 507 checkNotFrozen(); 508 CollationSettings defaultSettings = getDefaultSettings(); 509 if(settings.readOnly() == defaultSettings) { return; } 510 CollationSettings ownedSettings = getOwnedSettings(); 511 ownedSettings.setFlagDefault(CollationSettings.CASE_LEVEL, defaultSettings.options); 512 setFastLatinOptions(ownedSettings); 513 } 514 515 /** 516 * Sets the decomposition mode to the initial mode set during construction of the RuleBasedCollator. See 517 * setDecomposition(int) for more details. 518 * 519 * @see #getDecomposition 520 * @see #setDecomposition(int) 521 * @stable ICU 2.8 522 */ setDecompositionDefault()523 public void setDecompositionDefault() { 524 checkNotFrozen(); 525 CollationSettings defaultSettings = getDefaultSettings(); 526 if(settings.readOnly() == defaultSettings) { return; } 527 CollationSettings ownedSettings = getOwnedSettings(); 528 ownedSettings.setFlagDefault(CollationSettings.CHECK_FCD, defaultSettings.options); 529 setFastLatinOptions(ownedSettings); 530 } 531 532 /** 533 * Sets the French collation mode to the initial mode set during construction of the RuleBasedCollator. See 534 * setFrenchCollation(boolean) for more details. 535 * 536 * @see #isFrenchCollation 537 * @see #setFrenchCollation(boolean) 538 * @stable ICU 2.8 539 */ setFrenchCollationDefault()540 public void setFrenchCollationDefault() { 541 checkNotFrozen(); 542 CollationSettings defaultSettings = getDefaultSettings(); 543 if(settings.readOnly() == defaultSettings) { return; } 544 CollationSettings ownedSettings = getOwnedSettings(); 545 ownedSettings.setFlagDefault(CollationSettings.BACKWARD_SECONDARY, defaultSettings.options); 546 setFastLatinOptions(ownedSettings); 547 } 548 549 /** 550 * Sets the collation strength to the initial mode set during the construction of the RuleBasedCollator. See 551 * setStrength(int) for more details. 552 * 553 * @see #setStrength(int) 554 * @see #getStrength 555 * @stable ICU 2.8 556 */ setStrengthDefault()557 public void setStrengthDefault() { 558 checkNotFrozen(); 559 CollationSettings defaultSettings = getDefaultSettings(); 560 if(settings.readOnly() == defaultSettings) { return; } 561 CollationSettings ownedSettings = getOwnedSettings(); 562 ownedSettings.setStrengthDefault(defaultSettings.options); 563 setFastLatinOptions(ownedSettings); 564 } 565 566 /** 567 * Method to set numeric collation to its default value. 568 * 569 * @see #getNumericCollation 570 * @see #setNumericCollation 571 * @stable ICU 2.8 572 */ setNumericCollationDefault()573 public void setNumericCollationDefault() { 574 checkNotFrozen(); 575 CollationSettings defaultSettings = getDefaultSettings(); 576 if(settings.readOnly() == defaultSettings) { return; } 577 CollationSettings ownedSettings = getOwnedSettings(); 578 ownedSettings.setFlagDefault(CollationSettings.NUMERIC, defaultSettings.options); 579 setFastLatinOptions(ownedSettings); 580 } 581 582 /** 583 * Sets the mode for the direction of SECONDARY weights to be used in French collation. The default value is false, 584 * which treats SECONDARY weights in the order they appear. If set to true, the SECONDARY weights will be sorted 585 * backwards. See the section on <a href="http://userguide.icu-project.org/collation/architecture"> 586 * French collation</a> for more information. 587 * 588 * @param flag 589 * true to set the French collation on, false to set it off 590 * @stable ICU 2.8 591 * @see #isFrenchCollation 592 * @see #setFrenchCollationDefault 593 */ setFrenchCollation(boolean flag)594 public void setFrenchCollation(boolean flag) { 595 checkNotFrozen(); 596 if(flag == isFrenchCollation()) { return; } 597 CollationSettings ownedSettings = getOwnedSettings(); 598 ownedSettings.setFlag(CollationSettings.BACKWARD_SECONDARY, flag); 599 setFastLatinOptions(ownedSettings); 600 } 601 602 /** 603 * Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the UCA definition 604 * on <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">Variable Weighting</a>. This 605 * attribute will only be effective when QUATERNARY strength is set. The default value for this mode is false, 606 * corresponding to the NON_IGNORABLE mode in UCA. In the NON_IGNORABLE mode, the RuleBasedCollator treats all 607 * the code points with non-ignorable primary weights in the same way. If the mode is set to true, the behavior 608 * corresponds to SHIFTED defined in UCA, this causes code points with PRIMARY orders that are equal or below the 609 * variable top value to be ignored in PRIMARY order and moved to the QUATERNARY order. 610 * 611 * @param shifted 612 * true if SHIFTED behavior for alternate handling is desired, false for the NON_IGNORABLE behavior. 613 * @see #isAlternateHandlingShifted 614 * @see #setAlternateHandlingDefault 615 * @stable ICU 2.8 616 */ setAlternateHandlingShifted(boolean shifted)617 public void setAlternateHandlingShifted(boolean shifted) { 618 checkNotFrozen(); 619 if(shifted == isAlternateHandlingShifted()) { return; } 620 CollationSettings ownedSettings = getOwnedSettings(); 621 ownedSettings.setAlternateHandlingShifted(shifted); 622 setFastLatinOptions(ownedSettings); 623 } 624 625 /** 626 * <p> 627 * When case level is set to true, an additional weight is formed between the SECONDARY and TERTIARY weight, known 628 * as the case level. The case level is used to distinguish large and small Japanese Kana characters. Case level 629 * could also be used in other situations. For example to distinguish certain Pinyin characters. The default value 630 * is false, which means the case level is not generated. The contents of the case level are affected by the case 631 * first mode. A simple way to ignore accent differences in a string is to set the strength to PRIMARY and enable 632 * case level. 633 * </p> 634 * <p> 635 * See the section on <a href="http://userguide.icu-project.org/collation/architecture">case 636 * level</a> for more information. 637 * </p> 638 * 639 * @param flag 640 * true if case level sorting is required, false otherwise 641 * @stable ICU 2.8 642 * @see #setCaseLevelDefault 643 * @see #isCaseLevel 644 */ setCaseLevel(boolean flag)645 public void setCaseLevel(boolean flag) { 646 checkNotFrozen(); 647 if(flag == isCaseLevel()) { return; } 648 CollationSettings ownedSettings = getOwnedSettings(); 649 ownedSettings.setFlag(CollationSettings.CASE_LEVEL, flag); 650 setFastLatinOptions(ownedSettings); 651 } 652 653 /** 654 * Sets the decomposition mode of this Collator. Setting this 655 * decomposition attribute with CANONICAL_DECOMPOSITION allows the 656 * Collator to handle un-normalized text properly, producing the 657 * same results as if the text were normalized. If 658 * NO_DECOMPOSITION is set, it is the user's responsibility to 659 * insure that all text is already in the appropriate form before 660 * a comparison or before getting a CollationKey. Adjusting 661 * decomposition mode allows the user to select between faster and 662 * more complete collation behavior.</p> 663 * 664 * <p>Since a great many of the world's languages do not require 665 * text normalization, most locales set NO_DECOMPOSITION as the 666 * default decomposition mode.</p> 667 * 668 * The default decompositon mode for the Collator is 669 * NO_DECOMPOSITON, unless specified otherwise by the locale used 670 * to create the Collator.</p> 671 * 672 * <p>See getDecomposition for a description of decomposition 673 * mode.</p> 674 * 675 * @param decomposition the new decomposition mode 676 * @see #getDecomposition 677 * @see #NO_DECOMPOSITION 678 * @see #CANONICAL_DECOMPOSITION 679 * @throws IllegalArgumentException If the given value is not a valid 680 * decomposition mode. 681 * @stable ICU 2.8 682 */ 683 @Override setDecomposition(int decomposition)684 public void setDecomposition(int decomposition) 685 { 686 checkNotFrozen(); 687 boolean flag; 688 switch(decomposition) { 689 case NO_DECOMPOSITION: 690 flag = false; 691 break; 692 case CANONICAL_DECOMPOSITION: 693 flag = true; 694 break; 695 default: 696 throw new IllegalArgumentException("Wrong decomposition mode."); 697 } 698 if(flag == settings.readOnly().getFlag(CollationSettings.CHECK_FCD)) { return; } 699 CollationSettings ownedSettings = getOwnedSettings(); 700 ownedSettings.setFlag(CollationSettings.CHECK_FCD, flag); 701 setFastLatinOptions(ownedSettings); 702 } 703 704 /** 705 * Sets this Collator's strength attribute. The strength attribute determines the minimum level of difference 706 * considered significant during comparison. 707 * 708 * <p>See the Collator class description for an example of use. 709 * 710 * @param newStrength 711 * the new strength value. 712 * @see #getStrength 713 * @see #setStrengthDefault 714 * @see #PRIMARY 715 * @see #SECONDARY 716 * @see #TERTIARY 717 * @see #QUATERNARY 718 * @see #IDENTICAL 719 * @exception IllegalArgumentException 720 * If the new strength value is not one of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL. 721 * @stable ICU 2.8 722 */ 723 @Override setStrength(int newStrength)724 public void setStrength(int newStrength) { 725 checkNotFrozen(); 726 if(newStrength == getStrength()) { return; } 727 CollationSettings ownedSettings = getOwnedSettings(); 728 ownedSettings.setStrength(newStrength); 729 setFastLatinOptions(ownedSettings); 730 } 731 732 /** 733 * {@icu} Sets the variable top to the top of the specified reordering group. 734 * The variable top determines the highest-sorting character 735 * which is affected by the alternate handling behavior. 736 * If that attribute is set to NON_IGNORABLE, then the variable top has no effect. 737 * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION, 738 * Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY; 739 * or Collator.ReorderCodes.DEFAULT to restore the default max variable group 740 * @return this 741 * @see #getMaxVariable 742 * @stable ICU 53 743 */ 744 @Override setMaxVariable(int group)745 public RuleBasedCollator setMaxVariable(int group) { 746 // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1. 747 int value; 748 if(group == Collator.ReorderCodes.DEFAULT) { 749 value = -1; // UCOL_DEFAULT 750 } else if(Collator.ReorderCodes.FIRST <= group && group <= Collator.ReorderCodes.CURRENCY) { 751 value = group - Collator.ReorderCodes.FIRST; 752 } else { 753 throw new IllegalArgumentException("illegal max variable group " + group); 754 } 755 int oldValue = settings.readOnly().getMaxVariable(); 756 if(value == oldValue) { 757 return this; 758 } 759 CollationSettings defaultSettings = getDefaultSettings(); 760 if(settings.readOnly() == defaultSettings) { 761 if(value < 0) { // UCOL_DEFAULT 762 return this; 763 } 764 } 765 CollationSettings ownedSettings = getOwnedSettings(); 766 767 if(group == Collator.ReorderCodes.DEFAULT) { 768 group = Collator.ReorderCodes.FIRST + defaultSettings.getMaxVariable(); 769 } 770 long varTop = data.getLastPrimaryForGroup(group); 771 assert(varTop != 0); 772 ownedSettings.setMaxVariable(value, defaultSettings.options); 773 ownedSettings.variableTop = varTop; 774 setFastLatinOptions(ownedSettings); 775 return this; 776 } 777 778 /** 779 * {@icu} Returns the maximum reordering group whose characters are affected by 780 * the alternate handling behavior. 781 * @return the maximum variable reordering group. 782 * @see #setMaxVariable 783 * @stable ICU 53 784 */ 785 @Override getMaxVariable()786 public int getMaxVariable() { 787 return Collator.ReorderCodes.FIRST + settings.readOnly().getMaxVariable(); 788 } 789 790 /** 791 * {@icu} Sets the variable top to the primary weight of the specified string. 792 * 793 * <p>Beginning with ICU 53, the variable top is pinned to 794 * the top of one of the supported reordering groups, 795 * and it must not be beyond the last of those groups. 796 * See {@link #setMaxVariable(int)}. 797 * 798 * @param varTop 799 * one or more (if contraction) characters to which the variable top should be set 800 * @return variable top primary weight 801 * @exception IllegalArgumentException 802 * is thrown if varTop argument is not a valid variable top element. A variable top element is 803 * invalid when 804 * <ul> 805 * <li>it is a contraction that does not exist in the Collation order 806 * <li>the variable top is beyond 807 * the last reordering group supported by setMaxVariable() 808 * <li>when the varTop argument is null or zero in length. 809 * </ul> 810 * @see #getVariableTop 811 * @see RuleBasedCollator#setAlternateHandlingShifted 812 * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead. 813 */ 814 @Override 815 @Deprecated setVariableTop(String varTop)816 public int setVariableTop(String varTop) { 817 checkNotFrozen(); 818 if (varTop == null || varTop.length() == 0) { 819 throw new IllegalArgumentException("Variable top argument string can not be null or zero in length."); 820 } 821 boolean numeric = settings.readOnly().isNumeric(); 822 long ce1, ce2; 823 if(settings.readOnly().dontCheckFCD()) { 824 UTF16CollationIterator ci = new UTF16CollationIterator(data, numeric, varTop, 0); 825 ce1 = ci.nextCE(); 826 ce2 = ci.nextCE(); 827 } else { 828 FCDUTF16CollationIterator ci = new FCDUTF16CollationIterator(data, numeric, varTop, 0); 829 ce1 = ci.nextCE(); 830 ce2 = ci.nextCE(); 831 } 832 if(ce1 == Collation.NO_CE || ce2 != Collation.NO_CE) { 833 throw new IllegalArgumentException("Variable top argument string must map to exactly one collation element"); 834 } 835 internalSetVariableTop(ce1 >>> 32); 836 return (int)settings.readOnly().variableTop; 837 } 838 839 /** 840 * {@icu} Sets the variable top to the specified primary weight. 841 * 842 * <p>Beginning with ICU 53, the variable top is pinned to 843 * the top of one of the supported reordering groups, 844 * and it must not be beyond the last of those groups. 845 * See {@link #setMaxVariable(int)}. 846 * 847 * @param varTop primary weight, as returned by setVariableTop or getVariableTop 848 * @see #getVariableTop 849 * @see #setVariableTop(String) 850 * @deprecated ICU 53 Call setMaxVariable() instead. 851 */ 852 @Override 853 @Deprecated setVariableTop(int varTop)854 public void setVariableTop(int varTop) { 855 checkNotFrozen(); 856 internalSetVariableTop(varTop & 0xffffffffL); 857 } 858 internalSetVariableTop(long varTop)859 private void internalSetVariableTop(long varTop) { 860 if(varTop != settings.readOnly().variableTop) { 861 // Pin the variable top to the end of the reordering group which contains it. 862 // Only a few special groups are supported. 863 int group = data.getGroupForPrimary(varTop); 864 if(group < Collator.ReorderCodes.FIRST || Collator.ReorderCodes.CURRENCY < group) { 865 throw new IllegalArgumentException("The variable top must be a primary weight in " + 866 "the space/punctuation/symbols/currency symbols range"); 867 } 868 long v = data.getLastPrimaryForGroup(group); 869 assert(v != 0 && v >= varTop); 870 varTop = v; 871 if(varTop != settings.readOnly().variableTop) { 872 CollationSettings ownedSettings = getOwnedSettings(); 873 ownedSettings.setMaxVariable(group - Collator.ReorderCodes.FIRST, 874 getDefaultSettings().options); 875 ownedSettings.variableTop = varTop; 876 setFastLatinOptions(ownedSettings); 877 } 878 } 879 } 880 881 /** 882 * {@icu} When numeric collation is turned on, this Collator makes 883 * substrings of digits sort according to their numeric values. 884 * 885 * <p>This is a way to get '100' to sort AFTER '2'. Note that the longest 886 * digit substring that can be treated as a single unit is 887 * 254 digits (not counting leading zeros). If a digit substring is 888 * longer than that, the digits beyond the limit will be treated as a 889 * separate digit substring. 890 * 891 * <p>A "digit" in this sense is a code point with General_Category=Nd, 892 * which does not include circled numbers, roman numerals, etc. 893 * Only a contiguous digit substring is considered, that is, 894 * non-negative integers without separators. 895 * There is no support for plus/minus signs, decimals, exponents, etc. 896 * 897 * @param flag 898 * true to turn numeric collation on and false to turn it off 899 * @see #getNumericCollation 900 * @see #setNumericCollationDefault 901 * @stable ICU 2.8 902 */ setNumericCollation(boolean flag)903 public void setNumericCollation(boolean flag) { 904 checkNotFrozen(); 905 // sort substrings of digits as numbers 906 if(flag == getNumericCollation()) { return; } 907 CollationSettings ownedSettings = getOwnedSettings(); 908 ownedSettings.setFlag(CollationSettings.NUMERIC, flag); 909 setFastLatinOptions(ownedSettings); 910 } 911 912 /** 913 * {@inheritDoc} 914 * 915 * @param order the reordering codes to apply to this collator; if this is null or an empty array 916 * then this clears any existing reordering 917 * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts) 918 * @see #getReorderCodes 919 * @see Collator#getEquivalentReorderCodes 920 * @see Collator.ReorderCodes 921 * @see UScript 922 * @stable ICU 4.8 923 */ 924 @Override setReorderCodes(int... order)925 public void setReorderCodes(int... order) { 926 checkNotFrozen(); 927 int length = (order != null) ? order.length : 0; 928 if(length == 1 && order[0] == ReorderCodes.NONE) { 929 length = 0; 930 } 931 if(length == 0 ? 932 settings.readOnly().reorderCodes.length == 0 : 933 Arrays.equals(order, settings.readOnly().reorderCodes)) { 934 return; 935 } 936 CollationSettings defaultSettings = getDefaultSettings(); 937 if(length == 1 && order[0] == Collator.ReorderCodes.DEFAULT) { 938 if(settings.readOnly() != defaultSettings) { 939 CollationSettings ownedSettings = getOwnedSettings(); 940 ownedSettings.copyReorderingFrom(defaultSettings); 941 setFastLatinOptions(ownedSettings); 942 } 943 return; 944 } 945 CollationSettings ownedSettings = getOwnedSettings(); 946 if(length == 0) { 947 ownedSettings.resetReordering(); 948 } else { 949 ownedSettings.setReordering(data, order.clone()); 950 } 951 setFastLatinOptions(ownedSettings); 952 } 953 setFastLatinOptions(CollationSettings ownedSettings)954 private void setFastLatinOptions(CollationSettings ownedSettings) { 955 ownedSettings.fastLatinOptions = CollationFastLatin.getOptions( 956 data, ownedSettings, ownedSettings.fastLatinPrimaries); 957 } 958 959 // public getters -------------------------------------------------------- 960 961 /** 962 * Gets the collation tailoring rules for this RuleBasedCollator. 963 * Equivalent to String getRules(false). 964 * 965 * @return the collation tailoring rules 966 * @see #getRules(boolean) 967 * @stable ICU 2.8 968 */ getRules()969 public String getRules() { 970 return tailoring.rules; 971 } 972 973 /** 974 * Returns current rules. 975 * The argument defines whether full rules (root collation + tailored) rules are returned 976 * or just the tailoring. 977 * 978 * <p>The root collation rules are an <i>approximation</i> of the root collator's sort order. 979 * They are almost never used or useful at runtime and can be removed from the data. 980 * See <a href="http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales">User Guide: 981 * Collation Customization, Building on Existing Locales</a> 982 * 983 * <p>{@link #getRules()} should normally be used instead. 984 * @param fullrules 985 * true if the rules that defines the full set of collation order is required, otherwise false for 986 * returning only the tailored rules 987 * @return the current rules that defines this Collator. 988 * @see #getRules() 989 * @stable ICU 2.6 990 */ getRules(boolean fullrules)991 public String getRules(boolean fullrules) { 992 if (!fullrules) { 993 return tailoring.rules; 994 } 995 return CollationLoader.getRootRules() + tailoring.rules; 996 } 997 998 /** 999 * Get a UnicodeSet that contains all the characters and sequences tailored in this collator. 1000 * 1001 * @return a pointer to a UnicodeSet object containing all the code points and sequences that may sort differently 1002 * than in the root collator. 1003 * @stable ICU 2.4 1004 */ 1005 @Override getTailoredSet()1006 public UnicodeSet getTailoredSet() { 1007 UnicodeSet tailored = new UnicodeSet(); 1008 if(data.base != null) { 1009 new TailoredSet(tailored).forData(data); 1010 } 1011 return tailored; 1012 } 1013 1014 /** 1015 * Gets unicode sets containing contractions and/or expansions of a collator 1016 * 1017 * @param contractions 1018 * if not null, set to contain contractions 1019 * @param expansions 1020 * if not null, set to contain expansions 1021 * @param addPrefixes 1022 * add the prefix contextual elements to contractions 1023 * @throws Exception 1024 * Throws an exception if any errors occurs. 1025 * @stable ICU 3.4 1026 */ getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)1027 public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes) 1028 throws Exception { 1029 if (contractions != null) { 1030 contractions.clear(); 1031 } 1032 if (expansions != null) { 1033 expansions.clear(); 1034 } 1035 new ContractionsAndExpansions(contractions, expansions, null, addPrefixes).forData(data); 1036 } 1037 1038 /** 1039 * Adds the contractions that start with character c to the set. 1040 * Ignores prefixes. Used by AlphabeticIndex. 1041 * @internal 1042 * @deprecated This API is ICU internal only. 1043 */ internalAddContractions(int c, UnicodeSet set)1044 void internalAddContractions(int c, UnicodeSet set) { 1045 new ContractionsAndExpansions(set, null, null, false).forCodePoint(data, c); 1046 } 1047 1048 /** 1049 * <p> 1050 * Get a Collation key for the argument String source from this RuleBasedCollator. 1051 * </p> 1052 * <p> 1053 * General recommendation: <br> 1054 * If comparison are to be done to the same String multiple times, it would be more efficient to generate 1055 * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If the each 1056 * Strings are compared to only once, using the method RuleBasedCollator.compare(String, String) will have a better 1057 * performance. 1058 * </p> 1059 * <p> 1060 * See the class documentation for an explanation about CollationKeys. 1061 * </p> 1062 * 1063 * @param source 1064 * the text String to be transformed into a collation key. 1065 * @return the CollationKey for the given String based on this RuleBasedCollator's collation rules. If the source 1066 * String is null, a null CollationKey is returned. 1067 * @see CollationKey 1068 * @see #compare(String, String) 1069 * @see #getRawCollationKey 1070 * @stable ICU 2.8 1071 */ 1072 @Override getCollationKey(String source)1073 public CollationKey getCollationKey(String source) { 1074 if (source == null) { 1075 return null; 1076 } 1077 CollationBuffer buffer = null; 1078 try { 1079 buffer = getCollationBuffer(); 1080 return getCollationKey(source, buffer); 1081 } finally { 1082 releaseCollationBuffer(buffer); 1083 } 1084 } 1085 getCollationKey(String source, CollationBuffer buffer)1086 private CollationKey getCollationKey(String source, CollationBuffer buffer) { 1087 buffer.rawCollationKey = getRawCollationKey(source, buffer.rawCollationKey, buffer); 1088 return new CollationKey(source, buffer.rawCollationKey); 1089 } 1090 1091 /** 1092 * Gets the simpler form of a CollationKey for the String source following the rules of this Collator and stores the 1093 * result into the user provided argument key. If key has a internal byte array of length that's too small for the 1094 * result, the internal byte array will be grown to the exact required size. 1095 * 1096 * @param source the text String to be transformed into a RawCollationKey 1097 * @param key output RawCollationKey to store results 1098 * @return If key is null, a new instance of RawCollationKey will be created and returned, otherwise the user 1099 * provided key will be returned. 1100 * @see #getCollationKey 1101 * @see #compare(String, String) 1102 * @see RawCollationKey 1103 * @stable ICU 2.8 1104 */ 1105 @Override getRawCollationKey(String source, RawCollationKey key)1106 public RawCollationKey getRawCollationKey(String source, RawCollationKey key) { 1107 if (source == null) { 1108 return null; 1109 } 1110 CollationBuffer buffer = null; 1111 try { 1112 buffer = getCollationBuffer(); 1113 return getRawCollationKey(source, key, buffer); 1114 } finally { 1115 releaseCollationBuffer(buffer); 1116 } 1117 } 1118 1119 private static final class CollationKeyByteSink extends SortKeyByteSink { CollationKeyByteSink(RawCollationKey key)1120 CollationKeyByteSink(RawCollationKey key) { 1121 super(key.bytes); 1122 key_ = key; 1123 } 1124 1125 @Override AppendBeyondCapacity(byte[] bytes, int start, int n, int length)1126 protected void AppendBeyondCapacity(byte[] bytes, int start, int n, int length) { 1127 // n > 0 && appended_ > capacity_ 1128 if (Resize(n, length)) { 1129 System.arraycopy(bytes, start, buffer_, length, n); 1130 } 1131 } 1132 1133 @Override Resize(int appendCapacity, int length)1134 protected boolean Resize(int appendCapacity, int length) { 1135 int newCapacity = 2 * buffer_.length; 1136 int altCapacity = length + 2 * appendCapacity; 1137 if (newCapacity < altCapacity) { 1138 newCapacity = altCapacity; 1139 } 1140 if (newCapacity < 200) { 1141 newCapacity = 200; 1142 } 1143 // Do not call key_.ensureCapacity(newCapacity) because we do not 1144 // keep key_.size in sync with appended_. 1145 // We only set it when we are done. 1146 byte[] newBytes = new byte[newCapacity]; 1147 System.arraycopy(buffer_, 0, newBytes, 0, length); 1148 buffer_ = key_.bytes = newBytes; 1149 return true; 1150 } 1151 1152 private RawCollationKey key_; 1153 } 1154 getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer)1155 private RawCollationKey getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer) { 1156 if (key == null) { 1157 key = new RawCollationKey(simpleKeyLengthEstimate(source)); 1158 } else if (key.bytes == null) { 1159 key.bytes = new byte[simpleKeyLengthEstimate(source)]; 1160 } 1161 CollationKeyByteSink sink = new CollationKeyByteSink(key); 1162 writeSortKey(source, sink, buffer); 1163 key.size = sink.NumberOfBytesAppended(); 1164 return key; 1165 } 1166 simpleKeyLengthEstimate(CharSequence source)1167 private int simpleKeyLengthEstimate(CharSequence source) { 1168 return 2 * source.length() + 10; 1169 } 1170 writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer)1171 private void writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer) { 1172 boolean numeric = settings.readOnly().isNumeric(); 1173 if(settings.readOnly().dontCheckFCD()) { 1174 buffer.leftUTF16CollIter.setText(numeric, s, 0); 1175 CollationKeys.writeSortKeyUpToQuaternary( 1176 buffer.leftUTF16CollIter, data.compressibleBytes, settings.readOnly(), 1177 sink, Collation.PRIMARY_LEVEL, 1178 CollationKeys.SIMPLE_LEVEL_FALLBACK, true); 1179 } else { 1180 buffer.leftFCDUTF16Iter.setText(numeric, s, 0); 1181 CollationKeys.writeSortKeyUpToQuaternary( 1182 buffer.leftFCDUTF16Iter, data.compressibleBytes, settings.readOnly(), 1183 sink, Collation.PRIMARY_LEVEL, 1184 CollationKeys.SIMPLE_LEVEL_FALLBACK, true); 1185 } 1186 if(settings.readOnly().getStrength() == IDENTICAL) { 1187 writeIdenticalLevel(s, sink); 1188 } 1189 sink.Append(Collation.TERMINATOR_BYTE); 1190 } 1191 writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink)1192 private void writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink) { 1193 // NFD quick check 1194 int nfdQCYesLimit = data.nfcImpl.decompose(s, 0, s.length(), null); 1195 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 1196 // Sync the ByteArrayWrapper size with the key length. 1197 sink.key_.size = sink.NumberOfBytesAppended(); 1198 int prev = 0; 1199 if(nfdQCYesLimit != 0) { 1200 prev = BOCSU.writeIdenticalLevelRun(prev, s, 0, nfdQCYesLimit, sink.key_); 1201 } 1202 // Is there non-NFD text? 1203 if(nfdQCYesLimit < s.length()) { 1204 int destLengthEstimate = s.length() - nfdQCYesLimit; 1205 StringBuilder nfd = new StringBuilder(); 1206 data.nfcImpl.decompose(s, nfdQCYesLimit, s.length(), nfd, destLengthEstimate); 1207 BOCSU.writeIdenticalLevelRun(prev, nfd, 0, nfd.length(), sink.key_); 1208 } 1209 // Sync the key with the buffer again which got bytes appended and may have been reallocated. 1210 sink.setBufferAndAppended(sink.key_.bytes, sink.key_.size); 1211 } 1212 1213 /** 1214 * Returns the CEs for the string. 1215 * @param str the string 1216 * @internal for tests & tools 1217 * @deprecated This API is ICU internal only. 1218 */ 1219 @Deprecated internalGetCEs(CharSequence str)1220 public long[] internalGetCEs(CharSequence str) { 1221 CollationBuffer buffer = null; 1222 try { 1223 buffer = getCollationBuffer(); 1224 boolean numeric = settings.readOnly().isNumeric(); 1225 CollationIterator iter; 1226 if(settings.readOnly().dontCheckFCD()) { 1227 buffer.leftUTF16CollIter.setText(numeric, str, 0); 1228 iter = buffer.leftUTF16CollIter; 1229 } else { 1230 buffer.leftFCDUTF16Iter.setText(numeric, str, 0); 1231 iter = buffer.leftFCDUTF16Iter; 1232 } 1233 int length = iter.fetchCEs() - 1; 1234 assert length >= 0 && iter.getCE(length) == Collation.NO_CE; 1235 long[] ces = new long[length]; 1236 System.arraycopy(iter.getCEs(), 0, ces, 0, length); 1237 return ces; 1238 } finally { 1239 releaseCollationBuffer(buffer); 1240 } 1241 } 1242 1243 /** 1244 * Returns this Collator's strength attribute. The strength attribute 1245 * determines the minimum level of difference considered significant. 1246 * 1247 * <p>{@icunote} This can return QUATERNARY strength, which is not supported by the 1248 * JDK version. 1249 * 1250 * <p>See the Collator class description for more details. 1251 * 1252 * @return this Collator's current strength attribute. 1253 * @see #setStrength 1254 * @see #PRIMARY 1255 * @see #SECONDARY 1256 * @see #TERTIARY 1257 * @see #QUATERNARY 1258 * @see #IDENTICAL 1259 * @stable ICU 2.8 1260 */ 1261 @Override getStrength()1262 public int getStrength() { 1263 return settings.readOnly().getStrength(); 1264 } 1265 1266 /** 1267 * Returns the decomposition mode of this Collator. The decomposition mode 1268 * determines how Unicode composed characters are handled. 1269 * 1270 * <p>See the Collator class description for more details. 1271 * 1272 * @return the decomposition mode 1273 * @see #setDecomposition 1274 * @see #NO_DECOMPOSITION 1275 * @see #CANONICAL_DECOMPOSITION 1276 * @stable ICU 2.8 1277 */ 1278 @Override getDecomposition()1279 public int getDecomposition() { 1280 return (settings.readOnly().options & CollationSettings.CHECK_FCD) != 0 ? 1281 CANONICAL_DECOMPOSITION : NO_DECOMPOSITION; 1282 } 1283 1284 /** 1285 * Return true if an uppercase character is sorted before the corresponding lowercase character. See 1286 * setCaseFirst(boolean) for details. 1287 * 1288 * @see #setUpperCaseFirst 1289 * @see #setLowerCaseFirst 1290 * @see #isLowerCaseFirst 1291 * @see #setCaseFirstDefault 1292 * @return true if upper cased characters are sorted before lower cased characters, false otherwise 1293 * @stable ICU 2.8 1294 */ isUpperCaseFirst()1295 public boolean isUpperCaseFirst() { 1296 return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST_AND_UPPER_MASK); 1297 } 1298 1299 /** 1300 * Return true if a lowercase character is sorted before the corresponding uppercase character. See 1301 * setCaseFirst(boolean) for details. 1302 * 1303 * @see #setUpperCaseFirst 1304 * @see #setLowerCaseFirst 1305 * @see #isUpperCaseFirst 1306 * @see #setCaseFirstDefault 1307 * @return true lower cased characters are sorted before upper cased characters, false otherwise 1308 * @stable ICU 2.8 1309 */ isLowerCaseFirst()1310 public boolean isLowerCaseFirst() { 1311 return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST); 1312 } 1313 1314 /** 1315 * Checks if the alternate handling behavior is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true, 1316 * then the alternate handling attribute for the Collator is SHIFTED. Otherwise if return value is false, then the 1317 * alternate handling attribute for the Collator is NON_IGNORABLE See setAlternateHandlingShifted(boolean) for more 1318 * details. 1319 * 1320 * @return true or false 1321 * @see #setAlternateHandlingShifted(boolean) 1322 * @see #setAlternateHandlingDefault 1323 * @stable ICU 2.8 1324 */ isAlternateHandlingShifted()1325 public boolean isAlternateHandlingShifted() { 1326 return settings.readOnly().getAlternateHandling(); 1327 } 1328 1329 /** 1330 * Checks if case level is set to true. See setCaseLevel(boolean) for details. 1331 * 1332 * @return the case level mode 1333 * @see #setCaseLevelDefault 1334 * @see #isCaseLevel 1335 * @see #setCaseLevel(boolean) 1336 * @stable ICU 2.8 1337 */ isCaseLevel()1338 public boolean isCaseLevel() { 1339 return (settings.readOnly().options & CollationSettings.CASE_LEVEL) != 0; 1340 } 1341 1342 /** 1343 * Checks if French Collation is set to true. See setFrenchCollation(boolean) for details. 1344 * 1345 * @return true if French Collation is set to true, false otherwise 1346 * @see #setFrenchCollation(boolean) 1347 * @see #setFrenchCollationDefault 1348 * @stable ICU 2.8 1349 */ isFrenchCollation()1350 public boolean isFrenchCollation() { 1351 return (settings.readOnly().options & CollationSettings.BACKWARD_SECONDARY) != 0; 1352 } 1353 1354 /** 1355 * Checks if the Hiragana Quaternary mode is set on. See setHiraganaQuaternary(boolean) for more details. 1356 * 1357 * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. 1358 * Since ICU 50, this attribute is not settable any more via API functions. 1359 * Since CLDR 25/ICU 53, explicit quaternary relations are used 1360 * to achieve the same Japanese sort order. 1361 * 1362 * @return false 1363 * @see #setHiraganaQuaternaryDefault 1364 * @see #setHiraganaQuaternary(boolean) 1365 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 1366 */ 1367 @Deprecated isHiraganaQuaternary()1368 public boolean isHiraganaQuaternary() { 1369 return false; 1370 } 1371 1372 /** 1373 * {@icu} Gets the variable top value of a Collator. 1374 * 1375 * @return the variable top primary weight 1376 * @see #getMaxVariable 1377 * @stable ICU 2.6 1378 */ 1379 @Override getVariableTop()1380 public int getVariableTop() { 1381 return (int)settings.readOnly().variableTop; 1382 } 1383 1384 /** 1385 * Method to retrieve the numeric collation value. When numeric collation is turned on, this Collator generates a 1386 * collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER '2' 1387 * 1388 * @see #setNumericCollation 1389 * @see #setNumericCollationDefault 1390 * @return true if numeric collation is turned on, false otherwise 1391 * @stable ICU 2.8 1392 */ getNumericCollation()1393 public boolean getNumericCollation() { 1394 return (settings.readOnly().options & CollationSettings.NUMERIC) != 0; 1395 } 1396 1397 /** 1398 * Retrieves the reordering codes for this collator. 1399 * These reordering codes are a combination of UScript codes and ReorderCodes. 1400 * @return a copy of the reordering codes for this collator; 1401 * if none are set then returns an empty array 1402 * @see #setReorderCodes 1403 * @see Collator#getEquivalentReorderCodes 1404 * @stable ICU 4.8 1405 */ 1406 @Override getReorderCodes()1407 public int[] getReorderCodes() { 1408 return settings.readOnly().reorderCodes.clone(); 1409 } 1410 1411 // public other methods ------------------------------------------------- 1412 1413 /** 1414 * {@inheritDoc} 1415 * @stable ICU 2.8 1416 */ 1417 @Override equals(Object obj)1418 public boolean equals(Object obj) { 1419 if (this == obj) { 1420 return true; 1421 } 1422 if (!super.equals(obj)) { 1423 return false; 1424 } 1425 RuleBasedCollator o = (RuleBasedCollator) obj; 1426 if(!settings.readOnly().equals(o.settings.readOnly())) { return false; } 1427 if(data == o.data) { return true; } 1428 boolean thisIsRoot = data.base == null; 1429 boolean otherIsRoot = o.data.base == null; 1430 assert(!thisIsRoot || !otherIsRoot); // otherwise their data pointers should be == 1431 if(thisIsRoot != otherIsRoot) { return false; } 1432 if((thisIsRoot || tailoring.rules.length() != 0) && 1433 (otherIsRoot || o.tailoring.rules.length() != 0)) { 1434 // Shortcut: If both collators have valid rule strings, then compare those. 1435 if(tailoring.rules.equals(o.tailoring.rules)) { return true; } 1436 } 1437 // Different rule strings can result in the same or equivalent tailoring. 1438 // The rule strings are optional in ICU resource bundles, although included by default. 1439 // cloneBinary() drops the rule string. 1440 UnicodeSet thisTailored = getTailoredSet(); 1441 UnicodeSet otherTailored = o.getTailoredSet(); 1442 if(!thisTailored.equals(otherTailored)) { return false; } 1443 // For completeness, we should compare all of the mappings; 1444 // or we should create a list of strings, sort it with one collator, 1445 // and check if both collators compare adjacent strings the same 1446 // (order & strength, down to quaternary); or similar. 1447 // Testing equality of collators seems unusual. 1448 return true; 1449 } 1450 1451 /** 1452 * Generates a unique hash code for this RuleBasedCollator. 1453 * 1454 * @return the unique hash code for this Collator 1455 * @stable ICU 2.8 1456 */ 1457 @Override hashCode()1458 public int hashCode() { 1459 int h = settings.readOnly().hashCode(); 1460 if(data.base == null) { return h; } // root collator 1461 // Do not rely on the rule string, see comments in operator==(). 1462 UnicodeSet set = getTailoredSet(); 1463 UnicodeSetIterator iter = new UnicodeSetIterator(set); 1464 while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) { 1465 h ^= data.getCE32(iter.codepoint); 1466 } 1467 return h; 1468 } 1469 1470 /** 1471 * Compares the source text String to the target text String according to the collation rules, strength and 1472 * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero 1473 * depending on whether the source String is less than, equal to or greater than the target String. See the Collator 1474 * class description for an example of use. </p> 1475 * <p> 1476 * General recommendation: <br> 1477 * If comparison are to be done to the same String multiple times, it would be more efficient to generate 1478 * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed 1479 * performance is critical and object instantiation is to be reduced, further optimization may be achieved by 1480 * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method 1481 * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey 1482 * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key 1483 * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String, 1484 * String) will have a better performance. 1485 * </p> 1486 * 1487 * @param source 1488 * the source text String. 1489 * @param target 1490 * the target text String. 1491 * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source 1492 * and target are equal, value is greater than zero if source is greater than target. 1493 * @see CollationKey 1494 * @see #getCollationKey 1495 * @stable ICU 2.8 1496 */ 1497 @Override compare(String source, String target)1498 public int compare(String source, String target) { 1499 return doCompare(source, target); 1500 } 1501 1502 /** 1503 * Abstract iterator for identical-level string comparisons. 1504 * Returns FCD code points and handles temporary switching to NFD. 1505 * 1506 * <p>As with CollationIterator, 1507 * Java NFDIterator instances are partially constructed and cached, 1508 * and completed when reset for use. 1509 * C++ NFDIterator instances are stack-allocated. 1510 */ 1511 private static abstract class NFDIterator { 1512 /** 1513 * Partial constructor, must call reset(). 1514 */ NFDIterator()1515 NFDIterator() {} reset()1516 final void reset() { 1517 index = -1; 1518 } 1519 1520 /** 1521 * Returns the next code point from the internal normalization buffer, 1522 * or else the next text code point. 1523 * Returns -1 at the end of the text. 1524 */ nextCodePoint()1525 final int nextCodePoint() { 1526 if(index >= 0) { 1527 if(index == decomp.length()) { 1528 index = -1; 1529 } else { 1530 int c = Character.codePointAt(decomp, index); 1531 index += Character.charCount(c); 1532 return c; 1533 } 1534 } 1535 return nextRawCodePoint(); 1536 } 1537 /** 1538 * @param nfcImpl 1539 * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint() 1540 * @return the first code point in c's decomposition, 1541 * or c itself if it was decomposed already or if it does not decompose 1542 */ nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c)1543 final int nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c) { 1544 if(index >= 0) { return c; } 1545 decomp = nfcImpl.getDecomposition(c); 1546 if(decomp == null) { return c; } 1547 c = Character.codePointAt(decomp, 0); 1548 index = Character.charCount(c); 1549 return c; 1550 } 1551 1552 /** 1553 * Returns the next text code point in FCD order. 1554 * Returns -1 at the end of the text. 1555 */ nextRawCodePoint()1556 protected abstract int nextRawCodePoint(); 1557 1558 private String decomp; 1559 private int index; 1560 } 1561 1562 private static class UTF16NFDIterator extends NFDIterator { UTF16NFDIterator()1563 UTF16NFDIterator() {} setText(CharSequence seq, int start)1564 void setText(CharSequence seq, int start) { 1565 reset(); 1566 s = seq; 1567 pos = start; 1568 } 1569 1570 @Override nextRawCodePoint()1571 protected int nextRawCodePoint() { 1572 if(pos == s.length()) { return Collation.SENTINEL_CP; } 1573 int c = Character.codePointAt(s, pos); 1574 pos += Character.charCount(c); 1575 return c; 1576 } 1577 1578 protected CharSequence s; 1579 protected int pos; 1580 } 1581 1582 private static final class FCDUTF16NFDIterator extends UTF16NFDIterator { FCDUTF16NFDIterator()1583 FCDUTF16NFDIterator() {} setText(Normalizer2Impl nfcImpl, CharSequence seq, int start)1584 void setText(Normalizer2Impl nfcImpl, CharSequence seq, int start) { 1585 reset(); 1586 int spanLimit = nfcImpl.makeFCD(seq, start, seq.length(), null); 1587 if(spanLimit == seq.length()) { 1588 s = seq; 1589 pos = start; 1590 } else { 1591 if(str == null) { 1592 str = new StringBuilder(); 1593 } else { 1594 str.setLength(0); 1595 } 1596 str.append(seq, start, spanLimit); 1597 ReorderingBuffer buffer = new ReorderingBuffer(nfcImpl, str, seq.length() - start); 1598 nfcImpl.makeFCD(seq, spanLimit, seq.length(), buffer); 1599 s = str; 1600 pos = 0; 1601 } 1602 } 1603 1604 private StringBuilder str; 1605 } 1606 compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right)1607 private static final int compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right) { 1608 for(;;) { 1609 // Fetch the next FCD code point from each string. 1610 int leftCp = left.nextCodePoint(); 1611 int rightCp = right.nextCodePoint(); 1612 if(leftCp == rightCp) { 1613 if(leftCp < 0) { break; } 1614 continue; 1615 } 1616 // If they are different, then decompose each and compare again. 1617 if(leftCp < 0) { 1618 leftCp = -2; // end of string 1619 } else if(leftCp == 0xfffe) { 1620 leftCp = -1; // U+FFFE: merge separator 1621 } else { 1622 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp); 1623 } 1624 if(rightCp < 0) { 1625 rightCp = -2; // end of string 1626 } else if(rightCp == 0xfffe) { 1627 rightCp = -1; // U+FFFE: merge separator 1628 } else { 1629 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp); 1630 } 1631 if(leftCp < rightCp) { return Collation.LESS; } 1632 if(leftCp > rightCp) { return Collation.GREATER; } 1633 } 1634 return Collation.EQUAL; 1635 } 1636 1637 /** 1638 * Compares two CharSequences. 1639 * @internal 1640 * @deprecated This API is ICU internal only. 1641 */ 1642 @Override 1643 @Deprecated doCompare(CharSequence left, CharSequence right)1644 protected int doCompare(CharSequence left, CharSequence right) { 1645 if(left == right) { 1646 return Collation.EQUAL; 1647 } 1648 1649 // Identical-prefix test. 1650 int equalPrefixLength = 0; 1651 for(;;) { 1652 if(equalPrefixLength == left.length()) { 1653 if(equalPrefixLength == right.length()) { return Collation.EQUAL; } 1654 break; 1655 } else if(equalPrefixLength == right.length() || 1656 left.charAt(equalPrefixLength) != right.charAt(equalPrefixLength)) { 1657 break; 1658 } 1659 ++equalPrefixLength; 1660 } 1661 1662 CollationSettings roSettings = settings.readOnly(); 1663 boolean numeric = roSettings.isNumeric(); 1664 if(equalPrefixLength > 0) { 1665 if((equalPrefixLength != left.length() && 1666 data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) || 1667 (equalPrefixLength != right.length() && 1668 data.isUnsafeBackward(right.charAt(equalPrefixLength), numeric))) { 1669 // Identical prefix: Back up to the start of a contraction or reordering sequence. 1670 while(--equalPrefixLength > 0 && 1671 data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) {} 1672 } 1673 // Notes: 1674 // - A longer string can compare equal to a prefix of it if only ignorables follow. 1675 // - With a backward level, a longer string can compare less-than a prefix of it. 1676 1677 // Pass the actual start of each string into the CollationIterators, 1678 // plus the equalPrefixLength position, 1679 // so that prefix matches back into the equal prefix work. 1680 } 1681 1682 int result; 1683 int fastLatinOptions = roSettings.fastLatinOptions; 1684 if(fastLatinOptions >= 0 && 1685 (equalPrefixLength == left.length() || 1686 left.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX) && 1687 (equalPrefixLength == right.length() || 1688 right.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX)) { 1689 result = CollationFastLatin.compareUTF16(data.fastLatinTable, 1690 roSettings.fastLatinPrimaries, 1691 fastLatinOptions, 1692 left, right, equalPrefixLength); 1693 } else { 1694 result = CollationFastLatin.BAIL_OUT_RESULT; 1695 } 1696 1697 if(result == CollationFastLatin.BAIL_OUT_RESULT) { 1698 CollationBuffer buffer = null; 1699 try { 1700 buffer = getCollationBuffer(); 1701 if(roSettings.dontCheckFCD()) { 1702 buffer.leftUTF16CollIter.setText(numeric, left, equalPrefixLength); 1703 buffer.rightUTF16CollIter.setText(numeric, right, equalPrefixLength); 1704 result = CollationCompare.compareUpToQuaternary( 1705 buffer.leftUTF16CollIter, buffer.rightUTF16CollIter, roSettings); 1706 } else { 1707 buffer.leftFCDUTF16Iter.setText(numeric, left, equalPrefixLength); 1708 buffer.rightFCDUTF16Iter.setText(numeric, right, equalPrefixLength); 1709 result = CollationCompare.compareUpToQuaternary( 1710 buffer.leftFCDUTF16Iter, buffer.rightFCDUTF16Iter, roSettings); 1711 } 1712 } finally { 1713 releaseCollationBuffer(buffer); 1714 } 1715 } 1716 if(result != Collation.EQUAL || roSettings.getStrength() < Collator.IDENTICAL) { 1717 return result; 1718 } 1719 1720 CollationBuffer buffer = null; 1721 try { 1722 buffer = getCollationBuffer(); 1723 // Compare identical level. 1724 Normalizer2Impl nfcImpl = data.nfcImpl; 1725 if(roSettings.dontCheckFCD()) { 1726 buffer.leftUTF16NFDIter.setText(left, equalPrefixLength); 1727 buffer.rightUTF16NFDIter.setText(right, equalPrefixLength); 1728 return compareNFDIter(nfcImpl, buffer.leftUTF16NFDIter, buffer.rightUTF16NFDIter); 1729 } else { 1730 buffer.leftFCDUTF16NFDIter.setText(nfcImpl, left, equalPrefixLength); 1731 buffer.rightFCDUTF16NFDIter.setText(nfcImpl, right, equalPrefixLength); 1732 return compareNFDIter(nfcImpl, buffer.leftFCDUTF16NFDIter, buffer.rightFCDUTF16NFDIter); 1733 } 1734 } finally { 1735 releaseCollationBuffer(buffer); 1736 } 1737 } 1738 1739 // package private constructors ------------------------------------------ 1740 RuleBasedCollator(CollationTailoring t, ULocale vl)1741 RuleBasedCollator(CollationTailoring t, ULocale vl) { 1742 data = t.data; 1743 settings = t.settings.clone(); 1744 tailoring = t; 1745 validLocale = vl; 1746 actualLocaleIsSameAsValid = false; 1747 } 1748 adoptTailoring(CollationTailoring t)1749 private void adoptTailoring(CollationTailoring t) { 1750 assert(settings == null && data == null && tailoring == null); 1751 data = t.data; 1752 settings = t.settings.clone(); 1753 tailoring = t; 1754 validLocale = t.actualLocale; 1755 actualLocaleIsSameAsValid = false; 1756 } 1757 1758 // package private methods ----------------------------------------------- 1759 1760 /** 1761 * Tests whether a character is "unsafe" for use as a collation starting point. 1762 * 1763 * @param c code point or code unit 1764 * @return true if c is unsafe 1765 * @see CollationElementIterator#setOffset(int) 1766 */ isUnsafe(int c)1767 final boolean isUnsafe(int c) { 1768 return data.isUnsafeBackward(c, settings.readOnly().isNumeric()); 1769 } 1770 1771 /** 1772 * Frozen state of the collator. 1773 */ 1774 private Lock frozenLock; 1775 1776 private static final class CollationBuffer { CollationBuffer(CollationData data)1777 private CollationBuffer(CollationData data) { 1778 leftUTF16CollIter = new UTF16CollationIterator(data); 1779 rightUTF16CollIter = new UTF16CollationIterator(data); 1780 leftFCDUTF16Iter = new FCDUTF16CollationIterator(data); 1781 rightFCDUTF16Iter = new FCDUTF16CollationIterator(data); 1782 leftUTF16NFDIter = new UTF16NFDIterator(); 1783 rightUTF16NFDIter = new UTF16NFDIterator(); 1784 leftFCDUTF16NFDIter = new FCDUTF16NFDIterator(); 1785 rightFCDUTF16NFDIter = new FCDUTF16NFDIterator(); 1786 } 1787 1788 UTF16CollationIterator leftUTF16CollIter; 1789 UTF16CollationIterator rightUTF16CollIter; 1790 FCDUTF16CollationIterator leftFCDUTF16Iter; 1791 FCDUTF16CollationIterator rightFCDUTF16Iter; 1792 1793 UTF16NFDIterator leftUTF16NFDIter; 1794 UTF16NFDIterator rightUTF16NFDIter; 1795 FCDUTF16NFDIterator leftFCDUTF16NFDIter; 1796 FCDUTF16NFDIterator rightFCDUTF16NFDIter; 1797 1798 RawCollationKey rawCollationKey; 1799 } 1800 1801 /** 1802 * Get the version of this collator object. 1803 * 1804 * @return the version object associated with this collator 1805 * @stable ICU 2.8 1806 */ 1807 @Override getVersion()1808 public VersionInfo getVersion() { 1809 int version = tailoring.version; 1810 int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor(); 1811 return VersionInfo.getInstance( 1812 (version >>> 24) + (rtVersion << 4) + (rtVersion >> 4), 1813 ((version >> 16) & 0xff), ((version >> 8) & 0xff), (version & 0xff)); 1814 } 1815 1816 /** 1817 * Get the UCA version of this collator object. 1818 * 1819 * @return the version object associated with this collator 1820 * @stable ICU 2.8 1821 */ 1822 @Override getUCAVersion()1823 public VersionInfo getUCAVersion() { 1824 VersionInfo v = getVersion(); 1825 // Note: This is tied to how the current implementation encodes the UCA version 1826 // in the overall getVersion(). 1827 // Alternatively, we could load the root collator and get at lower-level data from there. 1828 // Either way, it will reflect the input collator's UCA version only 1829 // if it is a known implementation. 1830 // (C++ comment) It would be cleaner to make this a virtual Collator method. 1831 // (In Java, it is virtual.) 1832 return VersionInfo.getInstance(v.getMinor() >> 3, v.getMinor() & 7, v.getMilli() >> 6, 0); 1833 } 1834 1835 private CollationBuffer collationBuffer; 1836 getCollationBuffer()1837 private final CollationBuffer getCollationBuffer() { 1838 if (isFrozen()) { 1839 frozenLock.lock(); 1840 } else if (collationBuffer == null) { 1841 collationBuffer = new CollationBuffer(data); 1842 } 1843 return collationBuffer; 1844 } 1845 releaseCollationBuffer(CollationBuffer buffer)1846 private final void releaseCollationBuffer(CollationBuffer buffer) { 1847 if (isFrozen()) { 1848 frozenLock.unlock(); 1849 } 1850 } 1851 1852 /** 1853 * {@inheritDoc} 1854 * @draft ICU 53 (retain) 1855 * @provisional This API might change or be removed in a future release. 1856 */ 1857 @Override getLocale(ULocale.Type type)1858 public ULocale getLocale(ULocale.Type type) { 1859 if (type == ULocale.ACTUAL_LOCALE) { 1860 return actualLocaleIsSameAsValid ? validLocale : tailoring.actualLocale; 1861 } else if(type == ULocale.VALID_LOCALE) { 1862 return validLocale; 1863 } else { 1864 throw new IllegalArgumentException("unknown ULocale.Type " + type); 1865 } 1866 } 1867 1868 /** 1869 * {@inheritDoc} 1870 */ 1871 @Override setLocale(ULocale valid, ULocale actual)1872 void setLocale(ULocale valid, ULocale actual) { 1873 // This method is called 1874 // by other protected functions that checks and makes sure that 1875 // valid and actual are not null before passing 1876 assert (valid == null) == (actual == null); 1877 // Another check we could do is that the actual locale is at 1878 // the same level or less specific than the valid locale. 1879 // TODO: Starting with Java 7, use Objects.equals(a, b). 1880 if(Utility.objectEquals(actual, tailoring.actualLocale)) { 1881 actualLocaleIsSameAsValid = false; 1882 } else { 1883 assert(Utility.objectEquals(actual, valid)); 1884 actualLocaleIsSameAsValid = true; 1885 } 1886 // Do not modify tailoring.actualLocale: 1887 // We cannot be sure that that would be thread-safe. 1888 validLocale = valid; 1889 } 1890 1891 CollationData data; 1892 SharedObject.Reference<CollationSettings> settings; // reference-counted 1893 CollationTailoring tailoring; // C++: reference-counted 1894 private ULocale validLocale; 1895 // Note: No need in Java to track which attributes have been set explicitly. 1896 // int or EnumSet explicitlySetAttributes; 1897 1898 private boolean actualLocaleIsSameAsValid; 1899 } 1900