1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.text; 8 9 import java.text.CharacterIterator; 10 import java.util.HashMap; 11 import java.util.Map; 12 13 import com.ibm.icu.impl.CharacterIteratorWrapper; 14 import com.ibm.icu.impl.coll.Collation; 15 import com.ibm.icu.impl.coll.CollationData; 16 import com.ibm.icu.impl.coll.CollationIterator; 17 import com.ibm.icu.impl.coll.ContractionsAndExpansions; 18 import com.ibm.icu.impl.coll.FCDIterCollationIterator; 19 import com.ibm.icu.impl.coll.FCDUTF16CollationIterator; 20 import com.ibm.icu.impl.coll.IterCollationIterator; 21 import com.ibm.icu.impl.coll.UTF16CollationIterator; 22 import com.ibm.icu.impl.coll.UVector32; 23 24 /** 25 * <p><code>CollationElementIterator</code> is an iterator created by 26 * a RuleBasedCollator to walk through a string. The return result of 27 * each iteration is a 32-bit collation element (CE) that defines the 28 * ordering priority of the next character or sequence of characters 29 * in the source string.</p> 30 * 31 * <p>For illustration, consider the following in Slovak and in traditional Spanish collation: 32 * <blockquote> 33 * <pre> 34 * "ca" -> the first collation element is CE('c') and the second 35 * collation element is CE('a'). 36 * "cha" -> the first collation element is CE('ch') and the second 37 * collation element is CE('a'). 38 * </pre> 39 * </blockquote> 40 * And in German phonebook collation, 41 * <blockquote> 42 * <pre> 43 * Since the character 'æ' is a composed character of 'a' and 'e', the 44 * iterator returns two collation elements for the single character 'æ' 45 * 46 * "æb" -> the first collation element is collation_element('a'), the 47 * second collation element is collation_element('e'), and the 48 * third collation element is collation_element('b'). 49 * </pre> 50 * </blockquote> 51 * </p> 52 * 53 * <p>For collation ordering comparison, the collation element results 54 * can not be compared simply by using basic arithmetic operators, 55 * e.g. <, == or >, further processing has to be done. Details 56 * can be found in the ICU 57 * <a href="http://userguide.icu-project.org/collation/architecture"> 58 * User Guide</a>. An example of using the CollationElementIterator 59 * for collation ordering comparison is the class 60 * {@link com.ibm.icu.text.StringSearch}.</p> 61 * 62 * <p>To construct a CollationElementIterator object, users 63 * call the method getCollationElementIterator() on a 64 * RuleBasedCollator that defines the desired sorting order.</p> 65 * 66 * <p> Example: 67 * <blockquote> 68 * <pre> 69 * String testString = "This is a test"; 70 * RuleBasedCollator rbc = new RuleBasedCollator("&a<b"); 71 * CollationElementIterator iterator = rbc.getCollationElementIterator(testString); 72 * int primaryOrder = iterator.IGNORABLE; 73 * while (primaryOrder != iterator.NULLORDER) { 74 * int order = iterator.next(); 75 * if (order != iterator.IGNORABLE && 76 * order != iterator.NULLORDER) { 77 * // order is valid, not ignorable and we have not passed the end 78 * // of the iteration, we do something 79 * primaryOrder = CollationElementIterator.primaryOrder(order); 80 * System.out.println("Next primary order 0x" + 81 * Integer.toHexString(primaryOrder)); 82 * } 83 * } 84 * </pre> 85 * </blockquote> 86 * </p> 87 * <p> 88 * The method next() returns the collation order of the next character based on 89 * the comparison level of the collator. The method previous() returns the 90 * collation order of the previous character based on the comparison level of 91 * the collator. The Collation Element Iterator moves only in one direction 92 * between calls to reset(), setOffset(), or setText(). That is, next() and 93 * previous() can not be inter-used. Whenever previous() is to be called after 94 * next() or vice versa, reset(), setOffset() or setText() has to be called first 95 * to reset the status, shifting current position to either the end or the start of 96 * the string (reset() or setText()), or the specified position (setOffset()). 97 * Hence at the next call of next() or previous(), the first or last collation order, 98 * or collation order at the specified position will be returned. If a change of 99 * direction is done without one of these calls, the result is undefined. 100 * </p> 101 * <p> 102 * This class is not subclassable. 103 * </p> 104 * @see Collator 105 * @see RuleBasedCollator 106 * @see StringSearch 107 * @author Syn Wee Quek 108 * @stable ICU 2.8 109 */ 110 public final class CollationElementIterator 111 { 112 private CollationIterator iter_; // owned 113 private RuleBasedCollator rbc_; // aliased 114 private int otherHalf_; 115 /** 116 * <0: backwards; 0: just after reset() (previous() begins from end); 117 * 1: just after setOffset(); >1: forward 118 */ 119 private byte dir_; 120 /** 121 * Stores offsets from expansions and from unsafe-backwards iteration, 122 * so that getOffset() returns intermediate offsets for the CEs 123 * that are consistent with forward iteration. 124 */ 125 private UVector32 offsets_; 126 127 private String string_; // TODO: needed in Java? if so, then add a UCharacterIterator field too? 128 129 130 /** 131 * <p>This constant is returned by the iterator in the methods 132 * next() and previous() when the end or the beginning of the 133 * source string has been reached, and there are no more valid 134 * collation elements to return.</p> 135 * 136 * <p>See class documentation for an example of use.</p> 137 * @stable ICU 2.8 138 * @see #next 139 * @see #previous */ 140 public final static int NULLORDER = 0xffffffff; 141 142 /** 143 * <p>This constant is returned by the iterator in the methods 144 * next() and previous() when a collation element result is to be 145 * ignored.</p> 146 * 147 * <p>See class documentation for an example of use.</p> 148 * @stable ICU 2.8 149 * @see #next 150 * @see #previous */ 151 public static final int IGNORABLE = 0; 152 153 /** 154 * Return the primary order of the specified collation element, 155 * i.e. the first 16 bits. This value is unsigned. 156 * @param ce the collation element 157 * @return the element's 16 bits primary order. 158 * @stable ICU 2.8 159 */ primaryOrder(int ce)160 public final static int primaryOrder(int ce) { 161 return (ce >>> 16) & 0xffff; 162 } 163 164 /** 165 * Return the secondary order of the specified collation element, 166 * i.e. the 16th to 23th bits, inclusive. This value is unsigned. 167 * @param ce the collation element 168 * @return the element's 8 bits secondary order 169 * @stable ICU 2.8 170 */ secondaryOrder(int ce)171 public final static int secondaryOrder(int ce) { 172 return (ce >>> 8) & 0xff; 173 } 174 175 /** 176 * Return the tertiary order of the specified collation element, i.e. the last 177 * 8 bits. This value is unsigned. 178 * @param ce the collation element 179 * @return the element's 8 bits tertiary order 180 * @stable ICU 2.8 181 */ tertiaryOrder(int ce)182 public final static int tertiaryOrder(int ce) { 183 return ce & 0xff; 184 } 185 186 getFirstHalf(long p, int lower32)187 private static final int getFirstHalf(long p, int lower32) { 188 return ((int)p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff); 189 } 190 getSecondHalf(long p, int lower32)191 private static final int getSecondHalf(long p, int lower32) { 192 return ((int)p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f); 193 } 194 ceNeedsTwoParts(long ce)195 private static final boolean ceNeedsTwoParts(long ce) { 196 return (ce & 0xffff00ff003fL) != 0; 197 } 198 CollationElementIterator(RuleBasedCollator collator)199 private CollationElementIterator(RuleBasedCollator collator) { 200 iter_ = null; 201 rbc_ = collator; 202 otherHalf_ = 0; 203 dir_ = 0; 204 offsets_ = null; 205 } 206 207 /** 208 * <p>CollationElementIterator constructor. This takes a source 209 * string and a RuleBasedCollator. The iterator will walk through 210 * the source string based on the rules defined by the 211 * collator. If the source string is empty, NULLORDER will be 212 * returned on the first call to next().</p> 213 * 214 * @param source the source string. 215 * @param collator the RuleBasedCollator 216 * @stable ICU 2.8 217 */ CollationElementIterator(String source, RuleBasedCollator collator)218 CollationElementIterator(String source, RuleBasedCollator collator) { 219 this(collator); 220 setText(source); 221 } 222 // Note: The constructors should take settings & tailoring, not a collator, 223 // to avoid circular dependencies. 224 // However, for equals() we would need to be able to compare tailoring data for equality 225 // without making CollationData or CollationTailoring depend on TailoredSet. 226 // (See the implementation of RuleBasedCollator.equals().) 227 // That might require creating an intermediate class that would be used 228 // by both CollationElementIterator and RuleBasedCollator 229 // but only contain the part of RBC.equals() related to data and rules. 230 231 /** 232 * <p>CollationElementIterator constructor. This takes a source 233 * character iterator and a RuleBasedCollator. The iterator will 234 * walk through the source string based on the rules defined by 235 * the collator. If the source string is empty, NULLORDER will be 236 * returned on the first call to next().</p> 237 * 238 * @param source the source string iterator. 239 * @param collator the RuleBasedCollator 240 * @stable ICU 2.8 241 */ CollationElementIterator(CharacterIterator source, RuleBasedCollator collator)242 CollationElementIterator(CharacterIterator source, RuleBasedCollator collator) { 243 this(collator); 244 setText(source); 245 } 246 247 /** 248 * <p>CollationElementIterator constructor. This takes a source 249 * character iterator and a RuleBasedCollator. The iterator will 250 * walk through the source string based on the rules defined by 251 * the collator. If the source string is empty, NULLORDER will be 252 * returned on the first call to next().</p> 253 * 254 * @param source the source string iterator. 255 * @param collator the RuleBasedCollator 256 * @stable ICU 2.8 257 */ CollationElementIterator(UCharacterIterator source, RuleBasedCollator collator)258 CollationElementIterator(UCharacterIterator source, RuleBasedCollator collator) { 259 this(collator); 260 setText(source); 261 } 262 263 /** 264 * <p>Returns the character offset in the source string 265 * corresponding to the next collation element. I.e., getOffset() 266 * returns the position in the source string corresponding to the 267 * collation element that will be returned by the next call to 268 * next() or previous(). This value could be any of: 269 * <ul> 270 * <li> The index of the <b>first</b> character corresponding to 271 * the next collation element. (This means that if 272 * <code>setOffset(offset)</code> sets the index in the middle of 273 * a contraction, <code>getOffset()</code> returns the index of 274 * the first character in the contraction, which may not be equal 275 * to the original offset that was set. Hence calling getOffset() 276 * immediately after setOffset(offset) does not guarantee that the 277 * original offset set will be returned.) 278 * <li> If normalization is on, the index of the <b>immediate</b> 279 * subsequent character, or composite character with the first 280 * character, having a combining class of 0. 281 * <li> The length of the source string, if iteration has reached 282 * the end. 283 *</ul> 284 * </p> 285 * @return The character offset in the source string corresponding to the 286 * collation element that will be returned by the next call to 287 * next() or previous(). 288 * @stable ICU 2.8 289 */ getOffset()290 public int getOffset() { 291 if (dir_ < 0 && offsets_ != null && !offsets_.isEmpty()) { 292 // CollationIterator.previousCE() decrements the CEs length 293 // while it pops CEs from its internal buffer. 294 int i = iter_.getCEsLength(); 295 if (otherHalf_ != 0) { 296 // Return the trailing CE offset while we are in the middle of a 64-bit CE. 297 ++i; 298 } 299 assert (i < offsets_.size()); 300 return offsets_.elementAti(i); 301 } 302 return iter_.getOffset(); 303 } 304 305 /** 306 * <p>Get the next collation element in the source string.</p> 307 * 308 * <p>This iterator iterates over a sequence of collation elements 309 * that were built from the string. Because there isn't 310 * necessarily a one-to-one mapping from characters to collation 311 * elements, this doesn't mean the same thing as "return the 312 * collation element [or ordering priority] of the next character 313 * in the string".</p> 314 * 315 * <p>This function returns the collation element that the 316 * iterator is currently pointing to, and then updates the 317 * internal pointer to point to the next element.</p> 318 * 319 * @return the next collation element or NULLORDER if the end of the 320 * iteration has been reached. 321 * @stable ICU 2.8 322 */ 323 public int next() { 324 if (dir_ > 1) { 325 // Continue forward iteration. Test this first. 326 if (otherHalf_ != 0) { 327 int oh = otherHalf_; 328 otherHalf_ = 0; 329 return oh; 330 } 331 } else if (dir_ == 1) { 332 // next() after setOffset() 333 dir_ = 2; 334 } else if (dir_ == 0) { 335 // The iter_ is already reset to the start of the text. 336 dir_ = 2; 337 } else /* dir_ < 0 */{ 338 // illegal change of direction 339 throw new IllegalStateException("Illegal change of direction"); 340 // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status. 341 } 342 // No need to keep all CEs in the buffer when we iterate. 343 iter_.clearCEsIfNoneRemaining(); 344 long ce = iter_.nextCE(); 345 if (ce == Collation.NO_CE) { 346 return NULLORDER; 347 } 348 // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits. 349 long p = ce >>> 32; 350 int lower32 = (int) ce; 351 int firstHalf = getFirstHalf(p, lower32); 352 int secondHalf = getSecondHalf(p, lower32); 353 if (secondHalf != 0) { 354 otherHalf_ = secondHalf | 0xc0; // continuation CE 355 } 356 return firstHalf; 357 } 358 359 /** 360 * <p>Get the previous collation element in the source string.</p> 361 * 362 * <p>This iterator iterates over a sequence of collation elements 363 * that were built from the string. Because there isn't 364 * necessarily a one-to-one mapping from characters to collation 365 * elements, this doesn't mean the same thing as "return the 366 * collation element [or ordering priority] of the previous 367 * character in the string".</p> 368 * 369 * <p>This function updates the iterator's internal pointer to 370 * point to the collation element preceding the one it's currently 371 * pointing to and then returns that element, while next() returns 372 * the current element and then updates the pointer.</p> 373 * 374 * @return the previous collation element, or NULLORDER when the start of 375 * the iteration has been reached. 376 * @stable ICU 2.8 377 */ previous()378 public int previous() { 379 if (dir_ < 0) { 380 // Continue backwards iteration. Test this first. 381 if (otherHalf_ != 0) { 382 int oh = otherHalf_; 383 otherHalf_ = 0; 384 return oh; 385 } 386 } else if (dir_ == 0) { 387 iter_.resetToOffset(string_.length()); 388 dir_ = -1; 389 } else if (dir_ == 1) { 390 // previous() after setOffset() 391 dir_ = -1; 392 } else /* dir_ > 1 */{ 393 // illegal change of direction 394 throw new IllegalStateException("Illegal change of direction"); 395 // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status. 396 } 397 if (offsets_ == null) { 398 offsets_ = new UVector32(); 399 } 400 // If we already have expansion CEs, then we also have offsets. 401 // Otherwise remember the trailing offset in case we need to 402 // write offsets for an artificial expansion. 403 int limitOffset = iter_.getCEsLength() == 0 ? iter_.getOffset() : 0; 404 long ce = iter_.previousCE(offsets_); 405 if (ce == Collation.NO_CE) { 406 return NULLORDER; 407 } 408 // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits. 409 long p = ce >>> 32; 410 int lower32 = (int) ce; 411 int firstHalf = getFirstHalf(p, lower32); 412 int secondHalf = getSecondHalf(p, lower32); 413 if (secondHalf != 0) { 414 if (offsets_.isEmpty()) { 415 // When we convert a single 64-bit CE into two 32-bit CEs, 416 // we need to make this artificial expansion behave like a normal expansion. 417 // See CollationIterator.previousCE(). 418 offsets_.addElement(iter_.getOffset()); 419 offsets_.addElement(limitOffset); 420 } 421 otherHalf_ = firstHalf; 422 return secondHalf | 0xc0; // continuation CE 423 } 424 return firstHalf; 425 } 426 427 /** 428 * <p> Resets the cursor to the beginning of the string. The next 429 * call to next() or previous() will return the first and last 430 * collation element in the string, respectively.</p> 431 * 432 * <p>If the RuleBasedCollator used by this iterator has had its 433 * attributes changed, calling reset() will reinitialize the 434 * iterator to use the new attributes.</p> 435 * 436 * @stable ICU 2.8 437 */ reset()438 public void reset() { 439 iter_ .resetToOffset(0); 440 otherHalf_ = 0; 441 dir_ = 0; 442 } 443 444 /** 445 * <p> Sets the iterator to point to the collation element 446 * corresponding to the character at the specified offset. The 447 * value returned by the next call to next() will be the collation 448 * element corresponding to the characters at offset.</p> 449 * 450 * <p>If offset is in the middle of a contracting character 451 * sequence, the iterator is adjusted to the start of the 452 * contracting sequence. This means that getOffset() is not 453 * guaranteed to return the same value set by this method.</p> 454 * 455 * <p>If the decomposition mode is on, and offset is in the middle 456 * of a decomposible range of source text, the iterator may not 457 * return a correct result for the next forwards or backwards 458 * iteration. The user must ensure that the offset is not in the 459 * middle of a decomposible range.</p> 460 * 461 * @param newOffset the character offset into the original source string to 462 * set. Note that this is not an offset into the corresponding 463 * sequence of collation elements. 464 * @stable ICU 2.8 465 */ setOffset(int newOffset)466 public void setOffset(int newOffset) { 467 if (0 < newOffset && newOffset < string_.length()) { 468 int offset = newOffset; 469 do { 470 char c = string_.charAt(offset); 471 if (!rbc_.isUnsafe(c) || 472 (Character.isHighSurrogate(c) && !rbc_.isUnsafe(string_.codePointAt(offset)))) { 473 break; 474 } 475 // Back up to before this unsafe character. 476 --offset; 477 } while (offset > 0); 478 if (offset < newOffset) { 479 // We might have backed up more than necessary. 480 // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe, 481 // but for text "chu" setOffset(2) should remain at 2 482 // although we initially back up to offset 0. 483 // Find the last safe offset no greater than newOffset by iterating forward. 484 int lastSafeOffset = offset; 485 do { 486 iter_.resetToOffset(lastSafeOffset); 487 do { 488 iter_.nextCE(); 489 } while ((offset = iter_.getOffset()) == lastSafeOffset); 490 if (offset <= newOffset) { 491 lastSafeOffset = offset; 492 } 493 } while (offset < newOffset); 494 newOffset = lastSafeOffset; 495 } 496 } 497 iter_.resetToOffset(newOffset); 498 otherHalf_ = 0; 499 dir_ = 1; 500 } 501 502 /** 503 * <p>Set a new source string for iteration, and reset the offset 504 * to the beginning of the text.</p> 505 * 506 * @param source the new source string for iteration. 507 * @stable ICU 2.8 508 */ setText(String source)509 public void setText(String source) { 510 string_ = source; // TODO: do we need to remember the source string in a field? 511 CollationIterator newIter; 512 boolean numeric = rbc_.settings.readOnly().isNumeric(); 513 if (rbc_.settings.readOnly().dontCheckFCD()) { 514 newIter = new UTF16CollationIterator(rbc_.data, numeric, string_, 0); 515 } else { 516 newIter = new FCDUTF16CollationIterator(rbc_.data, numeric, string_, 0); 517 } 518 iter_ = newIter; 519 otherHalf_ = 0; 520 dir_ = 0; 521 } 522 523 /** 524 * <p>Set a new source string iterator for iteration, and reset the 525 * offset to the beginning of the text. 526 * </p> 527 * <p>The source iterator's integrity will be preserved since a new copy 528 * will be created for use.</p> 529 * @param source the new source string iterator for iteration. 530 * @stable ICU 2.8 531 */ setText(UCharacterIterator source)532 public void setText(UCharacterIterator source) { 533 string_ = source.getText(); // TODO: do we need to remember the source string in a field? 534 // Note: In C++, we just setText(source.getText()). 535 // In Java, we actually operate on a character iterator. 536 // (The old code apparently did so only for a CharacterIterator; 537 // for a UCharacterIterator it also just used source.getText()). 538 // TODO: do we need to remember the cloned iterator in a field? 539 UCharacterIterator src; 540 try { 541 src = (UCharacterIterator) source.clone(); 542 } catch (CloneNotSupportedException e) { 543 // Fall back to ICU 52 behavior of iterating over the text contents 544 // of the UCharacterIterator. 545 setText(source.getText()); 546 return; 547 } 548 src.setToStart(); 549 CollationIterator newIter; 550 boolean numeric = rbc_.settings.readOnly().isNumeric(); 551 if (rbc_.settings.readOnly().dontCheckFCD()) { 552 newIter = new IterCollationIterator(rbc_.data, numeric, src); 553 } else { 554 newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0); 555 } 556 iter_ = newIter; 557 otherHalf_ = 0; 558 dir_ = 0; 559 } 560 561 /** 562 * <p>Set a new source string iterator for iteration, and reset the 563 * offset to the beginning of the text. 564 * </p> 565 * @param source the new source string iterator for iteration. 566 * @stable ICU 2.8 567 */ setText(CharacterIterator source)568 public void setText(CharacterIterator source) { 569 // Note: In C++, we just setText(source.getText()). 570 // In Java, we actually operate on a character iterator. 571 // TODO: do we need to remember the iterator in a field? 572 // TODO: apparently we don't clone a CharacterIterator in Java, 573 // we only clone the text for a UCharacterIterator?? see the old code in the constructors 574 UCharacterIterator src = new CharacterIteratorWrapper(source); 575 src.setToStart(); 576 string_ = src.getText(); // TODO: do we need to remember the source string in a field? 577 CollationIterator newIter; 578 boolean numeric = rbc_.settings.readOnly().isNumeric(); 579 if (rbc_.settings.readOnly().dontCheckFCD()) { 580 newIter = new IterCollationIterator(rbc_.data, numeric, src); 581 } else { 582 newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0); 583 } 584 iter_ = newIter; 585 otherHalf_ = 0; 586 dir_ = 0; 587 } 588 589 // Java porting note: This method is @stable ICU 2.0 in ICU4C, but not available 590 // in ICU4J. For now, keep it package local. 591 /** 592 * Gets the comparison order in the desired strength. Ignore the other 593 * differences. 594 * @param order The order value 595 */ strengthOrder(int order)596 int strengthOrder(int order) { 597 int s = rbc_.settings.readOnly().getStrength(); 598 // Mask off the unwanted differences. 599 if (s == Collator.PRIMARY) { 600 order &= 0xffff0000; 601 } 602 else if (s == Collator.SECONDARY) { 603 order &= 0xffffff00; 604 } 605 606 return order; 607 } 608 609 610 private static final class MaxExpSink implements ContractionsAndExpansions.CESink { MaxExpSink(Map<Integer, Integer> h)611 MaxExpSink(Map<Integer, Integer> h) { 612 maxExpansions = h; 613 } 614 615 // Java 6: @Override handleCE(long ce)616 public void handleCE(long ce) { 617 } 618 619 // Java 6: @Override handleExpansion(long ces[], int start, int length)620 public void handleExpansion(long ces[], int start, int length) { 621 if (length <= 1) { 622 // We do not need to add single CEs into the map. 623 return; 624 } 625 int count = 0; // number of CE "halves" 626 for (int i = 0; i < length; ++i) { 627 count += ceNeedsTwoParts(ces[start + i]) ? 2 : 1; 628 } 629 // last "half" of the last CE 630 long ce = ces[start + length - 1]; 631 long p = ce >>> 32; 632 int lower32 = (int) ce; 633 int lastHalf = getSecondHalf(p, lower32); 634 if (lastHalf == 0) { 635 lastHalf = getFirstHalf(p, lower32); 636 assert (lastHalf != 0); 637 } else { 638 lastHalf |= 0xc0; // old-style continuation CE 639 } 640 Integer oldCount = maxExpansions.get(lastHalf); 641 if (oldCount == null || count > oldCount) { 642 maxExpansions.put(lastHalf, count); 643 } 644 } 645 646 private Map<Integer, Integer> maxExpansions; 647 } 648 computeMaxExpansions(CollationData data)649 static final Map<Integer, Integer> computeMaxExpansions(CollationData data) { 650 Map<Integer, Integer> maxExpansions = new HashMap<Integer, Integer>(); 651 MaxExpSink sink = new MaxExpSink(maxExpansions); 652 new ContractionsAndExpansions(null, null, sink, true).forData(data); 653 return maxExpansions; 654 } 655 656 /** 657 * <p> Returns the maximum length of any expansion sequence that ends with 658 * the specified collation element. If there is no expansion with this 659 * collation element as the last element, returns 1. 660 * </p> 661 * @param ce a collation element returned by previous() or next(). 662 * @return the maximum length of any expansion sequence ending 663 * with the specified collation element. 664 * @stable ICU 2.8 665 */ getMaxExpansion(int ce)666 public int getMaxExpansion(int ce) { 667 return getMaxExpansion(rbc_.tailoring.maxExpansions, ce); 668 } 669 getMaxExpansion(Map<Integer, Integer> maxExpansions, int order)670 static int getMaxExpansion(Map<Integer, Integer> maxExpansions, int order) { 671 if (order == 0) { 672 return 1; 673 } 674 Integer max; 675 if (maxExpansions != null && (max = maxExpansions.get(order)) != null) { 676 return max; 677 } 678 if ((order & 0xc0) == 0xc0) { 679 // old-style continuation CE 680 return 2; 681 } else { 682 return 1; 683 } 684 } 685 686 /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */ normalizeDir()687 private byte normalizeDir() { 688 return dir_ == 1 ? 0 : dir_; 689 } 690 691 /** 692 * Tests that argument object is equals to this CollationElementIterator. 693 * Iterators are equal if the objects uses the same RuleBasedCollator, 694 * the same source text and have the same current position in iteration. 695 * @param that object to test if it is equals to this 696 * CollationElementIterator 697 * @stable ICU 2.8 698 */ equals(Object that)699 public boolean equals(Object that) { 700 if (that == this) { 701 return true; 702 } 703 if (that instanceof CollationElementIterator) { 704 CollationElementIterator thatceiter = (CollationElementIterator) that; 705 return rbc_.equals(thatceiter.rbc_) 706 && otherHalf_ == thatceiter.otherHalf_ 707 && normalizeDir() == thatceiter.normalizeDir() 708 && string_.equals(thatceiter.string_) 709 && iter_.equals(thatceiter.iter_); 710 } 711 return false; 712 } 713 714 /** 715 * Mock implementation of hashCode(). This implementation always returns a constant 716 * value. When Java assertion is enabled, this method triggers an assertion failure. 717 * @internal 718 * @deprecated This API is ICU internal only. 719 */ 720 @Deprecated hashCode()721 public int hashCode() { 722 assert false : "hashCode not designed"; 723 return 42; 724 } 725 726 /** 727 * @internal 728 * @deprecated This API is ICU internal only. 729 */ 730 @Deprecated getRuleBasedCollator()731 public RuleBasedCollator getRuleBasedCollator() { 732 return rbc_; 733 } 734 } 735