1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2013, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.impl; 10 11 import java.text.CharacterIterator; 12 13 import com.ibm.icu.text.UTF16; 14 15 public final class CharacterIteration { 16 // disallow instantiation CharacterIteration()17 private CharacterIteration() { } 18 19 // 32 bit Char value returned from when an iterator has run out of range. 20 // Positive value so fast case (not end, not surrogate) can be checked 21 // with a single test. 22 public static final int DONE32 = 0x7fffffff; 23 24 /** 25 * Move the iterator forward to the next code point, and return that code point, 26 * leaving the iterator positioned at char returned. 27 * For Supplementary chars, the iterator is left positioned at the lead surrogate. 28 * @param ci The character iterator 29 * @return The next code point. 30 */ next32(CharacterIterator ci)31 public static int next32(CharacterIterator ci) { 32 // If the current position is at a surrogate pair, move to the trail surrogate 33 // which leaves it in position for underlying iterator's next() to work. 34 int c = ci.current(); 35 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) { 36 c = ci.next(); 37 if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) { 38 ci.previous(); 39 } 40 } 41 42 // For BMP chars, this next() is the real deal. 43 c = ci.next(); 44 45 // If we might have a lead surrogate, we need to peak ahead to get the trail 46 // even though we don't want to really be positioned there. 47 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) { 48 c = nextTrail32(ci, c); 49 } 50 51 if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) { 52 // We got a supplementary char. Back the iterator up to the postion 53 // of the lead surrogate. 54 ci.previous(); 55 } 56 return c; 57 } 58 59 60 // Out-of-line portion of the in-line Next32 code. 61 // The call site does an initial ci.next() and calls this function 62 // if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE. 63 // NOTE: we leave the underlying char iterator positioned in the 64 // middle of a surrogate pair. ci.next() will work correctly 65 // from there, but the ci.getIndex() will be wrong, and needs 66 // adjustment. nextTrail32(CharacterIterator ci, int lead)67 public static int nextTrail32(CharacterIterator ci, int lead) { 68 if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) { 69 return DONE32; 70 } 71 int retVal = lead; 72 if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) { 73 char cTrail = ci.next(); 74 if (UTF16.isTrailSurrogate(cTrail)) { 75 retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 76 (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 77 UTF16.SUPPLEMENTARY_MIN_VALUE; 78 } else { 79 ci.previous(); 80 } 81 } 82 return retVal; 83 } 84 previous32(CharacterIterator ci)85 public static int previous32(CharacterIterator ci) { 86 if (ci.getIndex() <= ci.getBeginIndex()) { 87 return DONE32; 88 } 89 char trail = ci.previous(); 90 int retVal = trail; 91 if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) { 92 char lead = ci.previous(); 93 if (UTF16.isLeadSurrogate(lead)) { 94 retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 95 ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 96 UTF16.SUPPLEMENTARY_MIN_VALUE; 97 } else { 98 ci.next(); 99 } 100 } 101 return retVal; 102 } 103 current32(CharacterIterator ci)104 public static int current32(CharacterIterator ci) { 105 char lead = ci.current(); 106 int retVal = lead; 107 if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) { 108 return retVal; 109 } 110 if (UTF16.isLeadSurrogate(lead)) { 111 int trail = (int)ci.next(); 112 ci.previous(); 113 if (UTF16.isTrailSurrogate((char)trail)) { 114 retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 115 (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 116 UTF16.SUPPLEMENTARY_MIN_VALUE; 117 } 118 } else { 119 if (lead == CharacterIterator.DONE) { 120 if (ci.getIndex() >= ci.getEndIndex()) { 121 retVal = DONE32; 122 } 123 } 124 } 125 return retVal; 126 } 127 } 128