1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.text; 10 11 import java.text.CharacterIterator; 12 import java.util.HashMap; 13 import java.util.Map; 14 15 import com.ibm.icu.impl.CharacterIteratorWrapper; 16 import com.ibm.icu.impl.coll.Collation; 17 import com.ibm.icu.impl.coll.CollationData; 18 import com.ibm.icu.impl.coll.CollationIterator; 19 import com.ibm.icu.impl.coll.ContractionsAndExpansions; 20 import com.ibm.icu.impl.coll.FCDIterCollationIterator; 21 import com.ibm.icu.impl.coll.FCDUTF16CollationIterator; 22 import com.ibm.icu.impl.coll.IterCollationIterator; 23 import com.ibm.icu.impl.coll.UTF16CollationIterator; 24 import com.ibm.icu.impl.coll.UVector32; 25 26 /** 27 * <code>CollationElementIterator</code> is an iterator created by 28 * a RuleBasedCollator to walk through a string. The return result of 29 * each iteration is a 32-bit collation element (CE) that defines the 30 * ordering priority of the next character or sequence of characters 31 * in the source string. 32 * 33 * <p>For illustration, consider the following in Slovak and in traditional Spanish collation: 34 * <blockquote> 35 * <pre> 36 * "ca" -> the first collation element is CE('c') and the second 37 * collation element is CE('a'). 38 * "cha" -> the first collation element is CE('ch') and the second 39 * collation element is CE('a'). 40 * </pre> 41 * </blockquote> 42 * And in German phonebook collation, 43 * <blockquote> 44 * <pre> 45 * Since the character 'æ' is a composed character of 'a' and 'e', the 46 * iterator returns two collation elements for the single character 'æ' 47 * 48 * "æb" -> the first collation element is collation_element('a'), the 49 * second collation element is collation_element('e'), and the 50 * third collation element is collation_element('b'). 51 * </pre> 52 * </blockquote> 53 * 54 * <p>For collation ordering comparison, the collation element results 55 * can not be compared simply by using basic arithmetic operators, 56 * e.g. <, == or >, further processing has to be done. Details 57 * can be found in the ICU 58 * <a href="http://userguide.icu-project.org/collation/architecture"> 59 * User Guide</a>. An example of using the CollationElementIterator 60 * for collation ordering comparison is the class 61 * {@link com.ibm.icu.text.StringSearch}. 62 * 63 * <p>To construct a CollationElementIterator object, users 64 * call the method getCollationElementIterator() on a 65 * RuleBasedCollator that defines the desired sorting order. 66 * 67 * <p> Example: 68 * <blockquote> 69 * <pre> 70 * String testString = "This is a test"; 71 * RuleBasedCollator rbc = new RuleBasedCollator("&a<b"); 72 * CollationElementIterator iterator = rbc.getCollationElementIterator(testString); 73 * int primaryOrder = iterator.IGNORABLE; 74 * while (primaryOrder != iterator.NULLORDER) { 75 * int order = iterator.next(); 76 * if (order != iterator.IGNORABLE && 77 * order != iterator.NULLORDER) { 78 * // order is valid, not ignorable and we have not passed the end 79 * // of the iteration, we do something 80 * primaryOrder = CollationElementIterator.primaryOrder(order); 81 * System.out.println("Next primary order 0x" + 82 * Integer.toHexString(primaryOrder)); 83 * } 84 * } 85 * </pre> 86 * </blockquote> 87 * <p> 88 * The method next() returns the collation order of the next character based on 89 * the comparison level of the collator. The method previous() returns the 90 * collation order of the previous character based on the comparison level of 91 * the collator. The Collation Element Iterator moves only in one direction 92 * between calls to reset(), setOffset(), or setText(). That is, next() and 93 * previous() can not be inter-used. Whenever previous() is to be called after 94 * next() or vice versa, reset(), setOffset() or setText() has to be called first 95 * to reset the status, shifting current position to either the end or the start of 96 * the string (reset() or setText()), or the specified position (setOffset()). 97 * Hence at the next call of next() or previous(), the first or last collation order, 98 * or collation order at the specified position will be returned. If a change of 99 * direction is done without one of these calls, the result is undefined. 100 * <p> 101 * This class is not subclassable. 102 * @see Collator 103 * @see RuleBasedCollator 104 * @see StringSearch 105 * @author Syn Wee Quek 106 * @stable ICU 2.8 107 */ 108 public final class CollationElementIterator 109 { 110 private CollationIterator iter_; // owned 111 private RuleBasedCollator rbc_; // aliased 112 private int otherHalf_; 113 /** 114 * <0: backwards; 0: just after reset() (previous() begins from end); 115 * 1: just after setOffset(); >1: forward 116 */ 117 private byte dir_; 118 /** 119 * Stores offsets from expansions and from unsafe-backwards iteration, 120 * so that getOffset() returns intermediate offsets for the CEs 121 * that are consistent with forward iteration. 122 */ 123 private UVector32 offsets_; 124 125 private String string_; // TODO: needed in Java? if so, then add a UCharacterIterator field too? 126 127 128 /** 129 * This constant is returned by the iterator in the methods 130 * next() and previous() when the end or the beginning of the 131 * source string has been reached, and there are no more valid 132 * collation elements to return. 133 * 134 * <p>See class documentation for an example of use. 135 * @stable ICU 2.8 136 * @see #next 137 * @see #previous */ 138 public final static int NULLORDER = 0xffffffff; 139 140 /** 141 * This constant is returned by the iterator in the methods 142 * next() and previous() when a collation element result is to be 143 * ignored. 144 * 145 * <p>See class documentation for an example of use. 146 * @stable ICU 2.8 147 * @see #next 148 * @see #previous */ 149 public static final int IGNORABLE = 0; 150 151 /** 152 * Return the primary order of the specified collation element, 153 * i.e. the first 16 bits. This value is unsigned. 154 * @param ce the collation element 155 * @return the element's 16 bits primary order. 156 * @stable ICU 2.8 157 */ primaryOrder(int ce)158 public final static int primaryOrder(int ce) { 159 return (ce >>> 16) & 0xffff; 160 } 161 162 /** 163 * Return the secondary order of the specified collation element, 164 * i.e. the 16th to 23th bits, inclusive. This value is unsigned. 165 * @param ce the collation element 166 * @return the element's 8 bits secondary order 167 * @stable ICU 2.8 168 */ secondaryOrder(int ce)169 public final static int secondaryOrder(int ce) { 170 return (ce >>> 8) & 0xff; 171 } 172 173 /** 174 * Return the tertiary order of the specified collation element, i.e. the last 175 * 8 bits. This value is unsigned. 176 * @param ce the collation element 177 * @return the element's 8 bits tertiary order 178 * @stable ICU 2.8 179 */ tertiaryOrder(int ce)180 public final static int tertiaryOrder(int ce) { 181 return ce & 0xff; 182 } 183 184 getFirstHalf(long p, int lower32)185 private static final int getFirstHalf(long p, int lower32) { 186 return ((int)p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff); 187 } 188 getSecondHalf(long p, int lower32)189 private static final int getSecondHalf(long p, int lower32) { 190 return ((int)p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f); 191 } 192 ceNeedsTwoParts(long ce)193 private static final boolean ceNeedsTwoParts(long ce) { 194 return (ce & 0xffff00ff003fL) != 0; 195 } 196 CollationElementIterator(RuleBasedCollator collator)197 private CollationElementIterator(RuleBasedCollator collator) { 198 iter_ = null; 199 rbc_ = collator; 200 otherHalf_ = 0; 201 dir_ = 0; 202 offsets_ = null; 203 } 204 205 /** 206 * CollationElementIterator constructor. This takes a source 207 * string and a RuleBasedCollator. The iterator will walk through 208 * the source string based on the rules defined by the 209 * collator. If the source string is empty, NULLORDER will be 210 * returned on the first call to next(). 211 * 212 * @param source the source string. 213 * @param collator the RuleBasedCollator 214 * @stable ICU 2.8 215 */ CollationElementIterator(String source, RuleBasedCollator collator)216 CollationElementIterator(String source, RuleBasedCollator collator) { 217 this(collator); 218 setText(source); 219 } 220 // Note: The constructors should take settings & tailoring, not a collator, 221 // to avoid circular dependencies. 222 // However, for equals() we would need to be able to compare tailoring data for equality 223 // without making CollationData or CollationTailoring depend on TailoredSet. 224 // (See the implementation of RuleBasedCollator.equals().) 225 // That might require creating an intermediate class that would be used 226 // by both CollationElementIterator and RuleBasedCollator 227 // but only contain the part of RBC.equals() related to data and rules. 228 229 /** 230 * CollationElementIterator constructor. This takes a source 231 * character iterator and a RuleBasedCollator. The iterator will 232 * walk through the source string based on the rules defined by 233 * the collator. If the source string is empty, NULLORDER will be 234 * returned on the first call to next(). 235 * 236 * @param source the source string iterator. 237 * @param collator the RuleBasedCollator 238 * @stable ICU 2.8 239 */ CollationElementIterator(CharacterIterator source, RuleBasedCollator collator)240 CollationElementIterator(CharacterIterator source, RuleBasedCollator collator) { 241 this(collator); 242 setText(source); 243 } 244 245 /** 246 * CollationElementIterator constructor. This takes a source 247 * character iterator and a RuleBasedCollator. The iterator will 248 * walk through the source string based on the rules defined by 249 * the collator. If the source string is empty, NULLORDER will be 250 * returned on the first call to next(). 251 * 252 * @param source the source string iterator. 253 * @param collator the RuleBasedCollator 254 * @stable ICU 2.8 255 */ CollationElementIterator(UCharacterIterator source, RuleBasedCollator collator)256 CollationElementIterator(UCharacterIterator source, RuleBasedCollator collator) { 257 this(collator); 258 setText(source); 259 } 260 261 /** 262 * Returns the character offset in the source string 263 * corresponding to the next collation element. I.e., getOffset() 264 * returns the position in the source string corresponding to the 265 * collation element that will be returned by the next call to 266 * next() or previous(). This value could be any of: 267 * <ul> 268 * <li> The index of the <b>first</b> character corresponding to 269 * the next collation element. (This means that if 270 * <code>setOffset(offset)</code> sets the index in the middle of 271 * a contraction, <code>getOffset()</code> returns the index of 272 * the first character in the contraction, which may not be equal 273 * to the original offset that was set. Hence calling getOffset() 274 * immediately after setOffset(offset) does not guarantee that the 275 * original offset set will be returned.) 276 * <li> If normalization is on, the index of the <b>immediate</b> 277 * subsequent character, or composite character with the first 278 * character, having a combining class of 0. 279 * <li> The length of the source string, if iteration has reached 280 * the end. 281 *</ul> 282 * 283 * @return The character offset in the source string corresponding to the 284 * collation element that will be returned by the next call to 285 * next() or previous(). 286 * @stable ICU 2.8 287 */ getOffset()288 public int getOffset() { 289 if (dir_ < 0 && offsets_ != null && !offsets_.isEmpty()) { 290 // CollationIterator.previousCE() decrements the CEs length 291 // while it pops CEs from its internal buffer. 292 int i = iter_.getCEsLength(); 293 if (otherHalf_ != 0) { 294 // Return the trailing CE offset while we are in the middle of a 64-bit CE. 295 ++i; 296 } 297 assert (i < offsets_.size()); 298 return offsets_.elementAti(i); 299 } 300 return iter_.getOffset(); 301 } 302 303 /** 304 * Get the next collation element in the source string. 305 * 306 * <p>This iterator iterates over a sequence of collation elements 307 * that were built from the string. Because there isn't 308 * necessarily a one-to-one mapping from characters to collation 309 * elements, this doesn't mean the same thing as "return the 310 * collation element [or ordering priority] of the next character 311 * in the string". 312 * 313 * <p>This function returns the collation element that the 314 * iterator is currently pointing to, and then updates the 315 * internal pointer to point to the next element. 316 * 317 * @return the next collation element or NULLORDER if the end of the 318 * iteration has been reached. 319 * @stable ICU 2.8 320 */ 321 public int next() { 322 if (dir_ > 1) { 323 // Continue forward iteration. Test this first. 324 if (otherHalf_ != 0) { 325 int oh = otherHalf_; 326 otherHalf_ = 0; 327 return oh; 328 } 329 } else if (dir_ == 1) { 330 // next() after setOffset() 331 dir_ = 2; 332 } else if (dir_ == 0) { 333 // The iter_ is already reset to the start of the text. 334 dir_ = 2; 335 } else /* dir_ < 0 */{ 336 // illegal change of direction 337 throw new IllegalStateException("Illegal change of direction"); 338 // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status. 339 } 340 // No need to keep all CEs in the buffer when we iterate. 341 iter_.clearCEsIfNoneRemaining(); 342 long ce = iter_.nextCE(); 343 if (ce == Collation.NO_CE) { 344 return NULLORDER; 345 } 346 // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits. 347 long p = ce >>> 32; 348 int lower32 = (int) ce; 349 int firstHalf = getFirstHalf(p, lower32); 350 int secondHalf = getSecondHalf(p, lower32); 351 if (secondHalf != 0) { 352 otherHalf_ = secondHalf | 0xc0; // continuation CE 353 } 354 return firstHalf; 355 } 356 357 /** 358 * Get the previous collation element in the source string. 359 * 360 * <p>This iterator iterates over a sequence of collation elements 361 * that were built from the string. Because there isn't 362 * necessarily a one-to-one mapping from characters to collation 363 * elements, this doesn't mean the same thing as "return the 364 * collation element [or ordering priority] of the previous 365 * character in the string". 366 * 367 * <p>This function updates the iterator's internal pointer to 368 * point to the collation element preceding the one it's currently 369 * pointing to and then returns that element, while next() returns 370 * the current element and then updates the pointer. 371 * 372 * @return the previous collation element, or NULLORDER when the start of 373 * the iteration has been reached. 374 * @stable ICU 2.8 375 */ previous()376 public int previous() { 377 if (dir_ < 0) { 378 // Continue backwards iteration. Test this first. 379 if (otherHalf_ != 0) { 380 int oh = otherHalf_; 381 otherHalf_ = 0; 382 return oh; 383 } 384 } else if (dir_ == 0) { 385 iter_.resetToOffset(string_.length()); 386 dir_ = -1; 387 } else if (dir_ == 1) { 388 // previous() after setOffset() 389 dir_ = -1; 390 } else /* dir_ > 1 */{ 391 // illegal change of direction 392 throw new IllegalStateException("Illegal change of direction"); 393 // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status. 394 } 395 if (offsets_ == null) { 396 offsets_ = new UVector32(); 397 } 398 // If we already have expansion CEs, then we also have offsets. 399 // Otherwise remember the trailing offset in case we need to 400 // write offsets for an artificial expansion. 401 int limitOffset = iter_.getCEsLength() == 0 ? iter_.getOffset() : 0; 402 long ce = iter_.previousCE(offsets_); 403 if (ce == Collation.NO_CE) { 404 return NULLORDER; 405 } 406 // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits. 407 long p = ce >>> 32; 408 int lower32 = (int) ce; 409 int firstHalf = getFirstHalf(p, lower32); 410 int secondHalf = getSecondHalf(p, lower32); 411 if (secondHalf != 0) { 412 if (offsets_.isEmpty()) { 413 // When we convert a single 64-bit CE into two 32-bit CEs, 414 // we need to make this artificial expansion behave like a normal expansion. 415 // See CollationIterator.previousCE(). 416 offsets_.addElement(iter_.getOffset()); 417 offsets_.addElement(limitOffset); 418 } 419 otherHalf_ = firstHalf; 420 return secondHalf | 0xc0; // continuation CE 421 } 422 return firstHalf; 423 } 424 425 /** 426 * Resets the cursor to the beginning of the string. The next 427 * call to next() or previous() will return the first and last 428 * collation element in the string, respectively. 429 * 430 * <p>If the RuleBasedCollator used by this iterator has had its 431 * attributes changed, calling reset() will reinitialize the 432 * iterator to use the new attributes. 433 * 434 * @stable ICU 2.8 435 */ reset()436 public void reset() { 437 iter_ .resetToOffset(0); 438 otherHalf_ = 0; 439 dir_ = 0; 440 } 441 442 /** 443 * Sets the iterator to point to the collation element 444 * corresponding to the character at the specified offset. The 445 * value returned by the next call to next() will be the collation 446 * element corresponding to the characters at offset. 447 * 448 * <p>If offset is in the middle of a contracting character 449 * sequence, the iterator is adjusted to the start of the 450 * contracting sequence. This means that getOffset() is not 451 * guaranteed to return the same value set by this method. 452 * 453 * <p>If the decomposition mode is on, and offset is in the middle 454 * of a decomposible range of source text, the iterator may not 455 * return a correct result for the next forwards or backwards 456 * iteration. The user must ensure that the offset is not in the 457 * middle of a decomposible range. 458 * 459 * @param newOffset the character offset into the original source string to 460 * set. Note that this is not an offset into the corresponding 461 * sequence of collation elements. 462 * @stable ICU 2.8 463 */ setOffset(int newOffset)464 public void setOffset(int newOffset) { 465 if (0 < newOffset && newOffset < string_.length()) { 466 int offset = newOffset; 467 do { 468 char c = string_.charAt(offset); 469 if (!rbc_.isUnsafe(c) || 470 (Character.isHighSurrogate(c) && !rbc_.isUnsafe(string_.codePointAt(offset)))) { 471 break; 472 } 473 // Back up to before this unsafe character. 474 --offset; 475 } while (offset > 0); 476 if (offset < newOffset) { 477 // We might have backed up more than necessary. 478 // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe, 479 // but for text "chu" setOffset(2) should remain at 2 480 // although we initially back up to offset 0. 481 // Find the last safe offset no greater than newOffset by iterating forward. 482 int lastSafeOffset = offset; 483 do { 484 iter_.resetToOffset(lastSafeOffset); 485 do { 486 iter_.nextCE(); 487 } while ((offset = iter_.getOffset()) == lastSafeOffset); 488 if (offset <= newOffset) { 489 lastSafeOffset = offset; 490 } 491 } while (offset < newOffset); 492 newOffset = lastSafeOffset; 493 } 494 } 495 iter_.resetToOffset(newOffset); 496 otherHalf_ = 0; 497 dir_ = 1; 498 } 499 500 /** 501 * Set a new source string for iteration, and reset the offset 502 * to the beginning of the text. 503 * 504 * @param source the new source string for iteration. 505 * @stable ICU 2.8 506 */ setText(String source)507 public void setText(String source) { 508 string_ = source; // TODO: do we need to remember the source string in a field? 509 CollationIterator newIter; 510 boolean numeric = rbc_.settings.readOnly().isNumeric(); 511 if (rbc_.settings.readOnly().dontCheckFCD()) { 512 newIter = new UTF16CollationIterator(rbc_.data, numeric, string_, 0); 513 } else { 514 newIter = new FCDUTF16CollationIterator(rbc_.data, numeric, string_, 0); 515 } 516 iter_ = newIter; 517 otherHalf_ = 0; 518 dir_ = 0; 519 } 520 521 /** 522 * Set a new source string iterator for iteration, and reset the 523 * offset to the beginning of the text. 524 * 525 * <p>The source iterator's integrity will be preserved since a new copy 526 * will be created for use. 527 * @param source the new source string iterator for iteration. 528 * @stable ICU 2.8 529 */ setText(UCharacterIterator source)530 public void setText(UCharacterIterator source) { 531 string_ = source.getText(); // TODO: do we need to remember the source string in a field? 532 // Note: In C++, we just setText(source.getText()). 533 // In Java, we actually operate on a character iterator. 534 // (The old code apparently did so only for a CharacterIterator; 535 // for a UCharacterIterator it also just used source.getText()). 536 // TODO: do we need to remember the cloned iterator in a field? 537 UCharacterIterator src; 538 try { 539 src = (UCharacterIterator) source.clone(); 540 } catch (CloneNotSupportedException e) { 541 // Fall back to ICU 52 behavior of iterating over the text contents 542 // of the UCharacterIterator. 543 setText(source.getText()); 544 return; 545 } 546 src.setToStart(); 547 CollationIterator newIter; 548 boolean numeric = rbc_.settings.readOnly().isNumeric(); 549 if (rbc_.settings.readOnly().dontCheckFCD()) { 550 newIter = new IterCollationIterator(rbc_.data, numeric, src); 551 } else { 552 newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0); 553 } 554 iter_ = newIter; 555 otherHalf_ = 0; 556 dir_ = 0; 557 } 558 559 /** 560 * Set a new source string iterator for iteration, and reset the 561 * offset to the beginning of the text. 562 * 563 * @param source the new source string iterator for iteration. 564 * @stable ICU 2.8 565 */ setText(CharacterIterator source)566 public void setText(CharacterIterator source) { 567 // Note: In C++, we just setText(source.getText()). 568 // In Java, we actually operate on a character iterator. 569 // TODO: do we need to remember the iterator in a field? 570 // TODO: apparently we don't clone a CharacterIterator in Java, 571 // we only clone the text for a UCharacterIterator?? see the old code in the constructors 572 UCharacterIterator src = new CharacterIteratorWrapper(source); 573 src.setToStart(); 574 string_ = src.getText(); // TODO: do we need to remember the source string in a field? 575 CollationIterator newIter; 576 boolean numeric = rbc_.settings.readOnly().isNumeric(); 577 if (rbc_.settings.readOnly().dontCheckFCD()) { 578 newIter = new IterCollationIterator(rbc_.data, numeric, src); 579 } else { 580 newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0); 581 } 582 iter_ = newIter; 583 otherHalf_ = 0; 584 dir_ = 0; 585 } 586 587 private static final class MaxExpSink implements ContractionsAndExpansions.CESink { MaxExpSink(Map<Integer, Integer> h)588 MaxExpSink(Map<Integer, Integer> h) { 589 maxExpansions = h; 590 } 591 592 @Override handleCE(long ce)593 public void handleCE(long ce) { 594 } 595 596 @Override handleExpansion(long ces[], int start, int length)597 public void handleExpansion(long ces[], int start, int length) { 598 if (length <= 1) { 599 // We do not need to add single CEs into the map. 600 return; 601 } 602 int count = 0; // number of CE "halves" 603 for (int i = 0; i < length; ++i) { 604 count += ceNeedsTwoParts(ces[start + i]) ? 2 : 1; 605 } 606 // last "half" of the last CE 607 long ce = ces[start + length - 1]; 608 long p = ce >>> 32; 609 int lower32 = (int) ce; 610 int lastHalf = getSecondHalf(p, lower32); 611 if (lastHalf == 0) { 612 lastHalf = getFirstHalf(p, lower32); 613 assert (lastHalf != 0); 614 } else { 615 lastHalf |= 0xc0; // old-style continuation CE 616 } 617 Integer oldCount = maxExpansions.get(lastHalf); 618 if (oldCount == null || count > oldCount) { 619 maxExpansions.put(lastHalf, count); 620 } 621 } 622 623 private Map<Integer, Integer> maxExpansions; 624 } 625 computeMaxExpansions(CollationData data)626 static final Map<Integer, Integer> computeMaxExpansions(CollationData data) { 627 Map<Integer, Integer> maxExpansions = new HashMap<>(); 628 MaxExpSink sink = new MaxExpSink(maxExpansions); 629 new ContractionsAndExpansions(null, null, sink, true).forData(data); 630 return maxExpansions; 631 } 632 633 /** 634 * Returns the maximum length of any expansion sequence that ends with 635 * the specified collation element. If there is no expansion with this 636 * collation element as the last element, returns 1. 637 * 638 * @param ce a collation element returned by previous() or next(). 639 * @return the maximum length of any expansion sequence ending 640 * with the specified collation element. 641 * @stable ICU 2.8 642 */ getMaxExpansion(int ce)643 public int getMaxExpansion(int ce) { 644 return getMaxExpansion(rbc_.tailoring.maxExpansions, ce); 645 } 646 getMaxExpansion(Map<Integer, Integer> maxExpansions, int order)647 static int getMaxExpansion(Map<Integer, Integer> maxExpansions, int order) { 648 if (order == 0) { 649 return 1; 650 } 651 Integer max; 652 if (maxExpansions != null && (max = maxExpansions.get(order)) != null) { 653 return max; 654 } 655 if ((order & 0xc0) == 0xc0) { 656 // old-style continuation CE 657 return 2; 658 } else { 659 return 1; 660 } 661 } 662 663 /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */ normalizeDir()664 private byte normalizeDir() { 665 return dir_ == 1 ? 0 : dir_; 666 } 667 668 /** 669 * Tests that argument object is equals to this CollationElementIterator. 670 * Iterators are equal if the objects uses the same RuleBasedCollator, 671 * the same source text and have the same current position in iteration. 672 * @param that object to test if it is equals to this 673 * CollationElementIterator 674 * @stable ICU 2.8 675 */ 676 @Override equals(Object that)677 public boolean equals(Object that) { 678 if (that == this) { 679 return true; 680 } 681 if (that instanceof CollationElementIterator) { 682 CollationElementIterator thatceiter = (CollationElementIterator) that; 683 return rbc_.equals(thatceiter.rbc_) 684 && otherHalf_ == thatceiter.otherHalf_ 685 && normalizeDir() == thatceiter.normalizeDir() 686 && string_.equals(thatceiter.string_) 687 && iter_.equals(thatceiter.iter_); 688 } 689 return false; 690 } 691 692 /** 693 * Mock implementation of hashCode(). This implementation always returns a constant 694 * value. When Java assertion is enabled, this method triggers an assertion failure. 695 * @stable ICU 2.8 696 */ 697 @Override hashCode()698 public int hashCode() { 699 assert false : "hashCode not designed"; 700 return 42; 701 } 702 703 /** 704 * @internal 705 * @deprecated This API is ICU internal only. 706 */ 707 @Deprecated getRuleBasedCollator()708 public RuleBasedCollator getRuleBasedCollator() { 709 return rbc_; 710 } 711 } 712