1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /**
4 *******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9 package com.ibm.icu.text;
10 
11 import java.text.CharacterIterator;
12 import java.util.HashMap;
13 import java.util.Map;
14 
15 import com.ibm.icu.impl.CharacterIteratorWrapper;
16 import com.ibm.icu.impl.coll.Collation;
17 import com.ibm.icu.impl.coll.CollationData;
18 import com.ibm.icu.impl.coll.CollationIterator;
19 import com.ibm.icu.impl.coll.ContractionsAndExpansions;
20 import com.ibm.icu.impl.coll.FCDIterCollationIterator;
21 import com.ibm.icu.impl.coll.FCDUTF16CollationIterator;
22 import com.ibm.icu.impl.coll.IterCollationIterator;
23 import com.ibm.icu.impl.coll.UTF16CollationIterator;
24 import com.ibm.icu.impl.coll.UVector32;
25 
26 /**
27  * <code>CollationElementIterator</code> is an iterator created by
28  * a RuleBasedCollator to walk through a string. The return result of
29  * each iteration is a 32-bit collation element (CE) that defines the
30  * ordering priority of the next character or sequence of characters
31  * in the source string.
32  *
33  * <p>For illustration, consider the following in Slovak and in traditional Spanish collation:
34  * <blockquote>
35  * <pre>
36  * "ca" -&gt; the first collation element is CE('c') and the second
37  *         collation element is CE('a').
38  * "cha" -&gt; the first collation element is CE('ch') and the second
39  *          collation element is CE('a').
40  * </pre>
41  * </blockquote>
42  * And in German phonebook collation,
43  * <blockquote>
44  * <pre>
45  * Since the character '&#230;' is a composed character of 'a' and 'e', the
46  * iterator returns two collation elements for the single character '&#230;'
47  *
48  * "&#230;b" -&gt; the first collation element is collation_element('a'), the
49  *              second collation element is collation_element('e'), and the
50  *              third collation element is collation_element('b').
51  * </pre>
52  * </blockquote>
53  *
54  * <p>For collation ordering comparison, the collation element results
55  * can not be compared simply by using basic arithmetic operators,
56  * e.g. &lt;, == or &gt;, further processing has to be done. Details
57  * can be found in the ICU
58  * <a href="http://userguide.icu-project.org/collation/architecture">
59  * User Guide</a>. An example of using the CollationElementIterator
60  * for collation ordering comparison is the class
61  * {@link com.ibm.icu.text.StringSearch}.
62  *
63  * <p>To construct a CollationElementIterator object, users
64  * call the method getCollationElementIterator() on a
65  * RuleBasedCollator that defines the desired sorting order.
66  *
67  * <p> Example:
68  * <blockquote>
69  * <pre>
70  *  String testString = "This is a test";
71  *  RuleBasedCollator rbc = new RuleBasedCollator("&amp;a&lt;b");
72  *  CollationElementIterator iterator = rbc.getCollationElementIterator(testString);
73  *  int primaryOrder = iterator.IGNORABLE;
74  *  while (primaryOrder != iterator.NULLORDER) {
75  *      int order = iterator.next();
76  *      if (order != iterator.IGNORABLE &amp;&amp;
77  *          order != iterator.NULLORDER) {
78  *          // order is valid, not ignorable and we have not passed the end
79  *          // of the iteration, we do something
80  *          primaryOrder = CollationElementIterator.primaryOrder(order);
81  *          System.out.println("Next primary order 0x" +
82  *                             Integer.toHexString(primaryOrder));
83  *      }
84  *  }
85  * </pre>
86  * </blockquote>
87  * <p>
88  * The method next() returns the collation order of the next character based on
89  * the comparison level of the collator. The method previous() returns the
90  * collation order of the previous character based on the comparison level of
91  * the collator. The Collation Element Iterator moves only in one direction
92  * between calls to reset(), setOffset(), or setText(). That is, next() and
93  * previous() can not be inter-used. Whenever previous() is to be called after
94  * next() or vice versa, reset(), setOffset() or setText() has to be called first
95  * to reset the status, shifting current position to either the end or the start of
96  * the string (reset() or setText()), or the specified position (setOffset()).
97  * Hence at the next call of next() or previous(), the first or last collation order,
98  * or collation order at the specified position will be returned. If a change of
99  * direction is done without one of these calls, the result is undefined.
100  * <p>
101  * This class is not subclassable.
102  * @see Collator
103  * @see RuleBasedCollator
104  * @see StringSearch
105  * @author Syn Wee Quek
106  * @stable ICU 2.8
107  */
108 public final class CollationElementIterator
109 {
110     private CollationIterator iter_;  // owned
111     private RuleBasedCollator rbc_;  // aliased
112     private int otherHalf_;
113     /**
114      * &lt;0: backwards; 0: just after reset() (previous() begins from end);
115      * 1: just after setOffset(); >1: forward
116      */
117     private byte dir_;
118     /**
119      * Stores offsets from expansions and from unsafe-backwards iteration,
120      * so that getOffset() returns intermediate offsets for the CEs
121      * that are consistent with forward iteration.
122      */
123     private UVector32 offsets_;
124 
125     private String string_;  // TODO: needed in Java? if so, then add a UCharacterIterator field too?
126 
127 
128     /**
129      * This constant is returned by the iterator in the methods
130      * next() and previous() when the end or the beginning of the
131      * source string has been reached, and there are no more valid
132      * collation elements to return.
133      *
134      * <p>See class documentation for an example of use.
135      * @stable ICU 2.8
136      * @see #next
137      * @see #previous */
138     public final static int NULLORDER = 0xffffffff;
139 
140     /**
141      * This constant is returned by the iterator in the methods
142      * next() and previous() when a collation element result is to be
143      * ignored.
144      *
145      * <p>See class documentation for an example of use.
146      * @stable ICU 2.8
147      * @see #next
148      * @see #previous */
149     public static final int IGNORABLE = 0;
150 
151     /**
152      * Return the primary order of the specified collation element,
153      * i.e. the first 16 bits.  This value is unsigned.
154      * @param ce the collation element
155      * @return the element's 16 bits primary order.
156      * @stable ICU 2.8
157      */
primaryOrder(int ce)158     public final static int primaryOrder(int ce) {
159         return (ce >>> 16) & 0xffff;
160     }
161 
162     /**
163      * Return the secondary order of the specified collation element,
164      * i.e. the 16th to 23th bits, inclusive.  This value is unsigned.
165      * @param ce the collation element
166      * @return the element's 8 bits secondary order
167      * @stable ICU 2.8
168      */
secondaryOrder(int ce)169     public final static int secondaryOrder(int ce) {
170         return (ce >>> 8) & 0xff;
171     }
172 
173     /**
174      * Return the tertiary order of the specified collation element, i.e. the last
175      * 8 bits.  This value is unsigned.
176      * @param ce the collation element
177      * @return the element's 8 bits tertiary order
178      * @stable ICU 2.8
179      */
tertiaryOrder(int ce)180     public final static int tertiaryOrder(int ce) {
181         return ce & 0xff;
182     }
183 
184 
getFirstHalf(long p, int lower32)185     private static final int getFirstHalf(long p, int lower32) {
186         return ((int)p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff);
187     }
188 
getSecondHalf(long p, int lower32)189     private static final int getSecondHalf(long p, int lower32) {
190         return ((int)p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f);
191     }
192 
ceNeedsTwoParts(long ce)193     private static final boolean ceNeedsTwoParts(long ce) {
194         return (ce & 0xffff00ff003fL) != 0;
195     }
196 
CollationElementIterator(RuleBasedCollator collator)197     private CollationElementIterator(RuleBasedCollator collator) {
198         iter_ = null;
199         rbc_ = collator;
200         otherHalf_ = 0;
201         dir_ = 0;
202         offsets_ = null;
203     }
204 
205     /**
206      * CollationElementIterator constructor. This takes a source
207      * string and a RuleBasedCollator. The iterator will walk through
208      * the source string based on the rules defined by the
209      * collator. If the source string is empty, NULLORDER will be
210      * returned on the first call to next().
211      *
212      * @param source the source string.
213      * @param collator the RuleBasedCollator
214      * @stable ICU 2.8
215      */
CollationElementIterator(String source, RuleBasedCollator collator)216     CollationElementIterator(String source, RuleBasedCollator collator) {
217         this(collator);
218         setText(source);
219     }
220     // Note: The constructors should take settings & tailoring, not a collator,
221     // to avoid circular dependencies.
222     // However, for equals() we would need to be able to compare tailoring data for equality
223     // without making CollationData or CollationTailoring depend on TailoredSet.
224     // (See the implementation of RuleBasedCollator.equals().)
225     // That might require creating an intermediate class that would be used
226     // by both CollationElementIterator and RuleBasedCollator
227     // but only contain the part of RBC.equals() related to data and rules.
228 
229     /**
230      * CollationElementIterator constructor. This takes a source
231      * character iterator and a RuleBasedCollator. The iterator will
232      * walk through the source string based on the rules defined by
233      * the collator. If the source string is empty, NULLORDER will be
234      * returned on the first call to next().
235      *
236      * @param source the source string iterator.
237      * @param collator the RuleBasedCollator
238      * @stable ICU 2.8
239      */
CollationElementIterator(CharacterIterator source, RuleBasedCollator collator)240     CollationElementIterator(CharacterIterator source, RuleBasedCollator collator) {
241         this(collator);
242         setText(source);
243     }
244 
245     /**
246      * CollationElementIterator constructor. This takes a source
247      * character iterator and a RuleBasedCollator. The iterator will
248      * walk through the source string based on the rules defined by
249      * the collator. If the source string is empty, NULLORDER will be
250      * returned on the first call to next().
251      *
252      * @param source the source string iterator.
253      * @param collator the RuleBasedCollator
254      * @stable ICU 2.8
255      */
CollationElementIterator(UCharacterIterator source, RuleBasedCollator collator)256     CollationElementIterator(UCharacterIterator source, RuleBasedCollator collator) {
257         this(collator);
258         setText(source);
259     }
260 
261     /**
262      * Returns the character offset in the source string
263      * corresponding to the next collation element. I.e., getOffset()
264      * returns the position in the source string corresponding to the
265      * collation element that will be returned by the next call to
266      * next() or previous(). This value could be any of:
267      * <ul>
268      * <li> The index of the <b>first</b> character corresponding to
269      * the next collation element. (This means that if
270      * <code>setOffset(offset)</code> sets the index in the middle of
271      * a contraction, <code>getOffset()</code> returns the index of
272      * the first character in the contraction, which may not be equal
273      * to the original offset that was set. Hence calling getOffset()
274      * immediately after setOffset(offset) does not guarantee that the
275      * original offset set will be returned.)
276      * <li> If normalization is on, the index of the <b>immediate</b>
277      * subsequent character, or composite character with the first
278      * character, having a combining class of 0.
279      * <li> The length of the source string, if iteration has reached
280      * the end.
281      *</ul>
282      *
283      * @return The character offset in the source string corresponding to the
284      *         collation element that will be returned by the next call to
285      *         next() or previous().
286      * @stable ICU 2.8
287      */
getOffset()288     public int getOffset() {
289         if (dir_ < 0 && offsets_ != null && !offsets_.isEmpty()) {
290             // CollationIterator.previousCE() decrements the CEs length
291             // while it pops CEs from its internal buffer.
292             int i = iter_.getCEsLength();
293             if (otherHalf_ != 0) {
294                 // Return the trailing CE offset while we are in the middle of a 64-bit CE.
295                 ++i;
296             }
297             assert (i < offsets_.size());
298             return offsets_.elementAti(i);
299         }
300         return iter_.getOffset();
301     }
302 
303     /**
304      * Get the next collation element in the source string.
305      *
306      * <p>This iterator iterates over a sequence of collation elements
307      * that were built from the string. Because there isn't
308      * necessarily a one-to-one mapping from characters to collation
309      * elements, this doesn't mean the same thing as "return the
310      * collation element [or ordering priority] of the next character
311      * in the string".
312      *
313      * <p>This function returns the collation element that the
314      * iterator is currently pointing to, and then updates the
315      * internal pointer to point to the next element.
316      *
317      * @return the next collation element or NULLORDER if the end of the
318      *         iteration has been reached.
319      * @stable ICU 2.8
320      */
321     public int next() {
322         if (dir_ > 1) {
323             // Continue forward iteration. Test this first.
324             if (otherHalf_ != 0) {
325                 int oh = otherHalf_;
326                 otherHalf_ = 0;
327                 return oh;
328             }
329         } else if (dir_ == 1) {
330             // next() after setOffset()
331             dir_ = 2;
332         } else if (dir_ == 0) {
333             // The iter_ is already reset to the start of the text.
334             dir_ = 2;
335         } else /* dir_ < 0 */{
336             // illegal change of direction
337             throw new IllegalStateException("Illegal change of direction");
338             // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status.
339         }
340         // No need to keep all CEs in the buffer when we iterate.
341         iter_.clearCEsIfNoneRemaining();
342         long ce = iter_.nextCE();
343         if (ce == Collation.NO_CE) {
344             return NULLORDER;
345         }
346         // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
347         long p = ce >>> 32;
348         int lower32 = (int) ce;
349         int firstHalf = getFirstHalf(p, lower32);
350         int secondHalf = getSecondHalf(p, lower32);
351         if (secondHalf != 0) {
352             otherHalf_ = secondHalf | 0xc0; // continuation CE
353         }
354         return firstHalf;
355     }
356 
357     /**
358      * Get the previous collation element in the source string.
359      *
360      * <p>This iterator iterates over a sequence of collation elements
361      * that were built from the string. Because there isn't
362      * necessarily a one-to-one mapping from characters to collation
363      * elements, this doesn't mean the same thing as "return the
364      * collation element [or ordering priority] of the previous
365      * character in the string".
366      *
367      * <p>This function updates the iterator's internal pointer to
368      * point to the collation element preceding the one it's currently
369      * pointing to and then returns that element, while next() returns
370      * the current element and then updates the pointer.
371      *
372      * @return the previous collation element, or NULLORDER when the start of
373      *             the iteration has been reached.
374      * @stable ICU 2.8
375      */
previous()376     public int previous() {
377         if (dir_ < 0) {
378             // Continue backwards iteration. Test this first.
379             if (otherHalf_ != 0) {
380                 int oh = otherHalf_;
381                 otherHalf_ = 0;
382                 return oh;
383             }
384         } else if (dir_ == 0) {
385             iter_.resetToOffset(string_.length());
386             dir_ = -1;
387         } else if (dir_ == 1) {
388             // previous() after setOffset()
389             dir_ = -1;
390         } else /* dir_ > 1 */{
391             // illegal change of direction
392             throw new IllegalStateException("Illegal change of direction");
393             // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status.
394         }
395         if (offsets_ == null) {
396             offsets_ = new UVector32();
397         }
398         // If we already have expansion CEs, then we also have offsets.
399         // Otherwise remember the trailing offset in case we need to
400         // write offsets for an artificial expansion.
401         int limitOffset = iter_.getCEsLength() == 0 ? iter_.getOffset() : 0;
402         long ce = iter_.previousCE(offsets_);
403         if (ce == Collation.NO_CE) {
404             return NULLORDER;
405         }
406         // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
407         long p = ce >>> 32;
408         int lower32 = (int) ce;
409         int firstHalf = getFirstHalf(p, lower32);
410         int secondHalf = getSecondHalf(p, lower32);
411         if (secondHalf != 0) {
412             if (offsets_.isEmpty()) {
413                 // When we convert a single 64-bit CE into two 32-bit CEs,
414                 // we need to make this artificial expansion behave like a normal expansion.
415                 // See CollationIterator.previousCE().
416                 offsets_.addElement(iter_.getOffset());
417                 offsets_.addElement(limitOffset);
418             }
419             otherHalf_ = firstHalf;
420             return secondHalf | 0xc0; // continuation CE
421         }
422         return firstHalf;
423     }
424 
425     /**
426      * Resets the cursor to the beginning of the string. The next
427      * call to next() or previous() will return the first and last
428      * collation element in the string, respectively.
429      *
430      * <p>If the RuleBasedCollator used by this iterator has had its
431      * attributes changed, calling reset() will reinitialize the
432      * iterator to use the new attributes.
433      *
434      * @stable ICU 2.8
435      */
reset()436     public void reset() {
437         iter_ .resetToOffset(0);
438         otherHalf_ = 0;
439         dir_ = 0;
440     }
441 
442     /**
443      * Sets the iterator to point to the collation element
444      * corresponding to the character at the specified offset. The
445      * value returned by the next call to next() will be the collation
446      * element corresponding to the characters at offset.
447      *
448      * <p>If offset is in the middle of a contracting character
449      * sequence, the iterator is adjusted to the start of the
450      * contracting sequence. This means that getOffset() is not
451      * guaranteed to return the same value set by this method.
452      *
453      * <p>If the decomposition mode is on, and offset is in the middle
454      * of a decomposible range of source text, the iterator may not
455      * return a correct result for the next forwards or backwards
456      * iteration.  The user must ensure that the offset is not in the
457      * middle of a decomposible range.
458      *
459      * @param newOffset the character offset into the original source string to
460      *        set. Note that this is not an offset into the corresponding
461      *        sequence of collation elements.
462      * @stable ICU 2.8
463      */
setOffset(int newOffset)464     public void setOffset(int newOffset) {
465         if (0 < newOffset && newOffset < string_.length()) {
466             int offset = newOffset;
467             do {
468                 char c = string_.charAt(offset);
469                 if (!rbc_.isUnsafe(c) ||
470                         (Character.isHighSurrogate(c) && !rbc_.isUnsafe(string_.codePointAt(offset)))) {
471                     break;
472                 }
473                 // Back up to before this unsafe character.
474                 --offset;
475             } while (offset > 0);
476             if (offset < newOffset) {
477                 // We might have backed up more than necessary.
478                 // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe,
479                 // but for text "chu" setOffset(2) should remain at 2
480                 // although we initially back up to offset 0.
481                 // Find the last safe offset no greater than newOffset by iterating forward.
482                 int lastSafeOffset = offset;
483                 do {
484                     iter_.resetToOffset(lastSafeOffset);
485                     do {
486                         iter_.nextCE();
487                     } while ((offset = iter_.getOffset()) == lastSafeOffset);
488                     if (offset <= newOffset) {
489                         lastSafeOffset = offset;
490                     }
491                 } while (offset < newOffset);
492                 newOffset = lastSafeOffset;
493             }
494         }
495         iter_.resetToOffset(newOffset);
496         otherHalf_ = 0;
497         dir_ = 1;
498     }
499 
500     /**
501      * Set a new source string for iteration, and reset the offset
502      * to the beginning of the text.
503      *
504      * @param source the new source string for iteration.
505      * @stable ICU 2.8
506      */
setText(String source)507     public void setText(String source) {
508         string_ = source; // TODO: do we need to remember the source string in a field?
509         CollationIterator newIter;
510         boolean numeric = rbc_.settings.readOnly().isNumeric();
511         if (rbc_.settings.readOnly().dontCheckFCD()) {
512             newIter = new UTF16CollationIterator(rbc_.data, numeric, string_, 0);
513         } else {
514             newIter = new FCDUTF16CollationIterator(rbc_.data, numeric, string_, 0);
515         }
516         iter_ = newIter;
517         otherHalf_ = 0;
518         dir_ = 0;
519     }
520 
521     /**
522      * Set a new source string iterator for iteration, and reset the
523      * offset to the beginning of the text.
524      *
525      * <p>The source iterator's integrity will be preserved since a new copy
526      * will be created for use.
527      * @param source the new source string iterator for iteration.
528      * @stable ICU 2.8
529      */
setText(UCharacterIterator source)530     public void setText(UCharacterIterator source) {
531         string_ = source.getText(); // TODO: do we need to remember the source string in a field?
532         // Note: In C++, we just setText(source.getText()).
533         // In Java, we actually operate on a character iterator.
534         // (The old code apparently did so only for a CharacterIterator;
535         // for a UCharacterIterator it also just used source.getText()).
536         // TODO: do we need to remember the cloned iterator in a field?
537         UCharacterIterator src;
538         try {
539             src = (UCharacterIterator) source.clone();
540         } catch (CloneNotSupportedException e) {
541             // Fall back to ICU 52 behavior of iterating over the text contents
542             // of the UCharacterIterator.
543             setText(source.getText());
544             return;
545         }
546         src.setToStart();
547         CollationIterator newIter;
548         boolean numeric = rbc_.settings.readOnly().isNumeric();
549         if (rbc_.settings.readOnly().dontCheckFCD()) {
550             newIter = new IterCollationIterator(rbc_.data, numeric, src);
551         } else {
552             newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0);
553         }
554         iter_ = newIter;
555         otherHalf_ = 0;
556         dir_ = 0;
557     }
558 
559     /**
560      * Set a new source string iterator for iteration, and reset the
561      * offset to the beginning of the text.
562      *
563      * @param source the new source string iterator for iteration.
564      * @stable ICU 2.8
565      */
setText(CharacterIterator source)566     public void setText(CharacterIterator source) {
567         // Note: In C++, we just setText(source.getText()).
568         // In Java, we actually operate on a character iterator.
569         // TODO: do we need to remember the iterator in a field?
570         // TODO: apparently we don't clone a CharacterIterator in Java,
571         // we only clone the text for a UCharacterIterator?? see the old code in the constructors
572         UCharacterIterator src = new CharacterIteratorWrapper(source);
573         src.setToStart();
574         string_ = src.getText(); // TODO: do we need to remember the source string in a field?
575         CollationIterator newIter;
576         boolean numeric = rbc_.settings.readOnly().isNumeric();
577         if (rbc_.settings.readOnly().dontCheckFCD()) {
578             newIter = new IterCollationIterator(rbc_.data, numeric, src);
579         } else {
580             newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0);
581         }
582         iter_ = newIter;
583         otherHalf_ = 0;
584         dir_ = 0;
585     }
586 
587     private static final class MaxExpSink implements ContractionsAndExpansions.CESink {
MaxExpSink(Map<Integer, Integer> h)588         MaxExpSink(Map<Integer, Integer> h) {
589             maxExpansions = h;
590         }
591 
592         @Override
handleCE(long ce)593         public void handleCE(long ce) {
594         }
595 
596         @Override
handleExpansion(long ces[], int start, int length)597         public void handleExpansion(long ces[], int start, int length) {
598             if (length <= 1) {
599                 // We do not need to add single CEs into the map.
600                 return;
601             }
602             int count = 0; // number of CE "halves"
603             for (int i = 0; i < length; ++i) {
604                 count += ceNeedsTwoParts(ces[start + i]) ? 2 : 1;
605             }
606             // last "half" of the last CE
607             long ce = ces[start + length - 1];
608             long p = ce >>> 32;
609             int lower32 = (int) ce;
610             int lastHalf = getSecondHalf(p, lower32);
611             if (lastHalf == 0) {
612                 lastHalf = getFirstHalf(p, lower32);
613                 assert (lastHalf != 0);
614             } else {
615                 lastHalf |= 0xc0; // old-style continuation CE
616             }
617             Integer oldCount = maxExpansions.get(lastHalf);
618             if (oldCount == null || count > oldCount) {
619                 maxExpansions.put(lastHalf, count);
620             }
621         }
622 
623         private Map<Integer, Integer> maxExpansions;
624     }
625 
computeMaxExpansions(CollationData data)626     static final Map<Integer, Integer> computeMaxExpansions(CollationData data) {
627         Map<Integer, Integer> maxExpansions = new HashMap<>();
628         MaxExpSink sink = new MaxExpSink(maxExpansions);
629         new ContractionsAndExpansions(null, null, sink, true).forData(data);
630         return maxExpansions;
631     }
632 
633     /**
634      * Returns the maximum length of any expansion sequence that ends with
635      * the specified collation element. If there is no expansion with this
636      * collation element as the last element, returns 1.
637      *
638      * @param ce a collation element returned by previous() or next().
639      * @return the maximum length of any expansion sequence ending
640      *         with the specified collation element.
641      * @stable ICU 2.8
642      */
getMaxExpansion(int ce)643     public int getMaxExpansion(int ce) {
644         return getMaxExpansion(rbc_.tailoring.maxExpansions, ce);
645     }
646 
getMaxExpansion(Map<Integer, Integer> maxExpansions, int order)647     static int getMaxExpansion(Map<Integer, Integer> maxExpansions, int order) {
648         if (order == 0) {
649             return 1;
650         }
651         Integer max;
652         if (maxExpansions != null && (max = maxExpansions.get(order)) != null) {
653             return max;
654         }
655         if ((order & 0xc0) == 0xc0) {
656             // old-style continuation CE
657             return 2;
658         } else {
659             return 1;
660         }
661     }
662 
663     /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */
normalizeDir()664     private byte normalizeDir() {
665         return dir_ == 1 ? 0 : dir_;
666     }
667 
668     /**
669      * Tests that argument object is equals to this CollationElementIterator.
670      * Iterators are equal if the objects uses the same RuleBasedCollator,
671      * the same source text and have the same current position in iteration.
672      * @param that object to test if it is equals to this
673      *             CollationElementIterator
674      * @stable ICU 2.8
675      */
676     @Override
equals(Object that)677     public boolean equals(Object that) {
678         if (that == this) {
679             return true;
680         }
681         if (that instanceof CollationElementIterator) {
682             CollationElementIterator thatceiter = (CollationElementIterator) that;
683             return rbc_.equals(thatceiter.rbc_)
684                     && otherHalf_ == thatceiter.otherHalf_
685                     && normalizeDir() == thatceiter.normalizeDir()
686                     && string_.equals(thatceiter.string_)
687                     && iter_.equals(thatceiter.iter_);
688         }
689         return false;
690     }
691 
692     /**
693      * Mock implementation of hashCode(). This implementation always returns a constant
694      * value. When Java assertion is enabled, this method triggers an assertion failure.
695      * @stable ICU 2.8
696      */
697     @Override
hashCode()698     public int hashCode() {
699         assert false : "hashCode not designed";
700         return 42;
701     }
702 
703     /**
704      * @internal
705      * @deprecated This API is ICU internal only.
706      */
707     @Deprecated
getRuleBasedCollator()708     public RuleBasedCollator getRuleBasedCollator() {
709         return rbc_;
710     }
711 }
712