1 /* 2 ******************************************************************************* 3 * Copyright (C) 2013, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.impl; 8 9 import java.text.CharacterIterator; 10 11 import com.ibm.icu.text.UTF16; 12 13 public final class CharacterIteration { 14 // disallow instantiation CharacterIteration()15 private CharacterIteration() { } 16 17 // 32 bit Char value returned from when an iterator has run out of range. 18 // Positive value so fast case (not end, not surrogate) can be checked 19 // with a single test. 20 public static final int DONE32 = 0x7fffffff; 21 22 /** 23 * Move the iterator forward to the next code point, and return that code point, 24 * leaving the iterator positioned at char returned. 25 * For Supplementary chars, the iterator is left positioned at the lead surrogate. 26 * @param ci The character iterator 27 * @return The next code point. 28 */ next32(CharacterIterator ci)29 public static int next32(CharacterIterator ci) { 30 // If the current position is at a surrogate pair, move to the trail surrogate 31 // which leaves it in position for underlying iterator's next() to work. 32 int c= ci.current(); 33 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) { 34 c = ci.next(); 35 if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) { 36 c = ci.previous(); 37 } 38 } 39 40 // For BMP chars, this next() is the real deal. 41 c = ci.next(); 42 43 // If we might have a lead surrogate, we need to peak ahead to get the trail 44 // even though we don't want to really be positioned there. 45 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) { 46 c = nextTrail32(ci, c); 47 } 48 49 if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) { 50 // We got a supplementary char. Back the iterator up to the postion 51 // of the lead surrogate. 52 ci.previous(); 53 } 54 return c; 55 } 56 57 58 // Out-of-line portion of the in-line Next32 code. 59 // The call site does an initial ci.next() and calls this function 60 // if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE. 61 // NOTE: we leave the underlying char iterator positioned in the 62 // middle of a surrogate pair. ci.next() will work correctly 63 // from there, but the ci.getIndex() will be wrong, and needs 64 // adjustment. nextTrail32(CharacterIterator ci, int lead)65 public static int nextTrail32(CharacterIterator ci, int lead) { 66 if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) { 67 return DONE32; 68 } 69 int retVal = lead; 70 if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) { 71 char cTrail = ci.next(); 72 if (UTF16.isTrailSurrogate(cTrail)) { 73 retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 74 (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 75 UTF16.SUPPLEMENTARY_MIN_VALUE; 76 } else { 77 ci.previous(); 78 } 79 } 80 return retVal; 81 } 82 previous32(CharacterIterator ci)83 public static int previous32(CharacterIterator ci) { 84 if (ci.getIndex() <= ci.getBeginIndex()) { 85 return DONE32; 86 } 87 char trail = ci.previous(); 88 int retVal = trail; 89 if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) { 90 char lead = ci.previous(); 91 if (UTF16.isLeadSurrogate(lead)) { 92 retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 93 ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 94 UTF16.SUPPLEMENTARY_MIN_VALUE; 95 } else { 96 ci.next(); 97 } 98 } 99 return retVal; 100 } 101 current32(CharacterIterator ci)102 public static int current32(CharacterIterator ci) { 103 char lead = ci.current(); 104 int retVal = lead; 105 if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) { 106 return retVal; 107 } 108 if (UTF16.isLeadSurrogate(lead)) { 109 int trail = (int)ci.next(); 110 ci.previous(); 111 if (UTF16.isTrailSurrogate((char)trail)) { 112 retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 113 (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 114 UTF16.SUPPLEMENTARY_MIN_VALUE; 115 } 116 } else { 117 if (lead == CharacterIterator.DONE) { 118 if (ci.getIndex() >= ci.getEndIndex()) { 119 retVal = DONE32; 120 } 121 } 122 } 123 return retVal; 124 } 125 } 126