1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2014, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.text; 8 9 10 import java.text.CharacterIterator; 11 12 import com.ibm.icu.impl.CharacterIteratorWrapper; 13 import com.ibm.icu.impl.ReplaceableUCharacterIterator; 14 import com.ibm.icu.impl.UCharArrayIterator; 15 import com.ibm.icu.impl.UCharacterIteratorWrapper; 16 import com.ibm.icu.impl.UCharacterProperty; 17 18 19 /** 20 * Abstract class that defines an API for iteration on text objects.This is an 21 * interface for forward and backward iteration and random access into a text 22 * object. Forward iteration is done with post-increment and backward iteration 23 * is done with pre-decrement semantics, while the 24 * <code>java.text.CharacterIterator</code> interface methods provided forward 25 * iteration with "pre-increment" and backward iteration with pre-decrement 26 * semantics. This API is more efficient for forward iteration over code points. 27 * The other major difference is that this API can do both code unit and code point 28 * iteration, <code>java.text.CharacterIterator</code> can only iterate over 29 * code units and is limited to BMP (0 - 0xFFFF) 30 * @author Ram 31 * @stable ICU 2.4 32 */ 33 public abstract class UCharacterIterator 34 implements Cloneable,UForwardCharacterIterator { 35 36 /** 37 * Protected default constructor for the subclasses 38 * @stable ICU 2.4 39 */ UCharacterIterator()40 protected UCharacterIterator(){ 41 } 42 43 // static final methods ---------------------------------------------------- 44 45 /** 46 * Returns a <code>UCharacterIterator</code> object given a 47 * <code>Replaceable</code> object. 48 * @param source a valid source as a <code>Replaceable</code> object 49 * @return UCharacterIterator object 50 * @exception IllegalArgumentException if the argument is null 51 * @stable ICU 2.4 52 */ getInstance(Replaceable source)53 public static final UCharacterIterator getInstance(Replaceable source){ 54 return new ReplaceableUCharacterIterator(source); 55 } 56 57 /** 58 * Returns a <code>UCharacterIterator</code> object given a 59 * source string. 60 * @param source a string 61 * @return UCharacterIterator object 62 * @exception IllegalArgumentException if the argument is null 63 * @stable ICU 2.4 64 */ getInstance(String source)65 public static final UCharacterIterator getInstance(String source){ 66 return new ReplaceableUCharacterIterator(source); 67 } 68 69 /** 70 * Returns a <code>UCharacterIterator</code> object given a 71 * source character array. 72 * @param source an array of UTF-16 code units 73 * @return UCharacterIterator object 74 * @exception IllegalArgumentException if the argument is null 75 * @stable ICU 2.4 76 */ getInstance(char[] source)77 public static final UCharacterIterator getInstance(char[] source){ 78 return getInstance(source,0,source.length); 79 } 80 81 /** 82 * Returns a <code>UCharacterIterator</code> object given a 83 * source character array. 84 * @param source an array of UTF-16 code units 85 * @return UCharacterIterator object 86 * @exception IllegalArgumentException if the argument is null 87 * @stable ICU 2.4 88 */ getInstance(char[] source, int start, int limit)89 public static final UCharacterIterator getInstance(char[] source, int start, int limit){ 90 return new UCharArrayIterator(source,start,limit); 91 } 92 /** 93 * Returns a <code>UCharacterIterator</code> object given a 94 * source StringBuffer. 95 * @param source an string buffer of UTF-16 code units 96 * @return UCharacterIterator object 97 * @exception IllegalArgumentException if the argument is null 98 * @stable ICU 2.4 99 */ getInstance(StringBuffer source)100 public static final UCharacterIterator getInstance(StringBuffer source){ 101 return new ReplaceableUCharacterIterator(source); 102 } 103 104 /** 105 * Returns a <code>UCharacterIterator</code> object given a 106 * CharacterIterator. 107 * @param source a valid CharacterIterator object. 108 * @return UCharacterIterator object 109 * @exception IllegalArgumentException if the argument is null 110 * @stable ICU 2.4 111 */ getInstance(CharacterIterator source)112 public static final UCharacterIterator getInstance(CharacterIterator source){ 113 return new CharacterIteratorWrapper(source); 114 } 115 116 // public methods ---------------------------------------------------------- 117 /** 118 * Returns a <code>java.text.CharacterIterator</code> object for 119 * the underlying text of this iterator. The returned iterator is 120 * independent of this iterator. 121 * @return java.text.CharacterIterator object 122 * @stable ICU 2.4 123 */ getCharacterIterator()124 public CharacterIterator getCharacterIterator(){ 125 return new UCharacterIteratorWrapper(this); 126 } 127 128 /** 129 * Returns the code unit at the current index. If index is out 130 * of range, returns DONE. Index is not changed. 131 * @return current code unit 132 * @stable ICU 2.4 133 */ current()134 public abstract int current(); 135 136 /** 137 * Returns the codepoint at the current index. 138 * If the current index is invalid, DONE is returned. 139 * If the current index points to a lead surrogate, and there is a following 140 * trail surrogate, then the code point is returned. Otherwise, the code 141 * unit at index is returned. Index is not changed. 142 * @return current codepoint 143 * @stable ICU 2.4 144 */ currentCodePoint()145 public int currentCodePoint(){ 146 int ch = current(); 147 if(UTF16.isLeadSurrogate((char)ch)){ 148 // advance the index to get the 149 // next code point 150 next(); 151 // due to post increment semantics 152 // current() after next() actually 153 // returns the char we want 154 int ch2 = current(); 155 // current should never change 156 // the current index so back off 157 previous(); 158 159 if(UTF16.isTrailSurrogate((char)ch2)){ 160 // we found a surrogate pair 161 // return the codepoint 162 return UCharacterProperty.getRawSupplementary( 163 (char)ch,(char)ch2 164 ); 165 } 166 } 167 return ch; 168 } 169 170 /** 171 * Returns the length of the text 172 * @return length of the text 173 * @stable ICU 2.4 174 */ getLength()175 public abstract int getLength(); 176 177 178 /** 179 * Gets the current index in text. 180 * @return current index in text. 181 * @stable ICU 2.4 182 */ getIndex()183 public abstract int getIndex(); 184 185 186 /** 187 * Returns the UTF16 code unit at index, and increments to the next 188 * code unit (post-increment semantics). If index is out of 189 * range, DONE is returned, and the iterator is reset to the limit 190 * of the text. 191 * @return the next UTF16 code unit, or DONE if the index is at the limit 192 * of the text. 193 * @stable ICU 2.4 194 */ next()195 public abstract int next(); 196 197 /** 198 * Returns the code point at index, and increments to the next code 199 * point (post-increment semantics). If index does not point to a 200 * valid surrogate pair, the behavior is the same as 201 * <code>next()</code>. Otherwise the iterator is incremented past 202 * the surrogate pair, and the code point represented by the pair 203 * is returned. 204 * @return the next codepoint in text, or DONE if the index is at 205 * the limit of the text. 206 * @stable ICU 2.4 207 */ nextCodePoint()208 public int nextCodePoint(){ 209 int ch1 = next(); 210 if(UTF16.isLeadSurrogate((char)ch1)){ 211 int ch2 = next(); 212 if(UTF16.isTrailSurrogate((char)ch2)){ 213 return UCharacterProperty.getRawSupplementary((char)ch1, 214 (char)ch2); 215 }else if (ch2 != DONE) { 216 // unmatched surrogate so back out 217 previous(); 218 } 219 } 220 return ch1; 221 } 222 223 /** 224 * Decrement to the position of the previous code unit in the 225 * text, and return it (pre-decrement semantics). If the 226 * resulting index is less than 0, the index is reset to 0 and 227 * DONE is returned. 228 * @return the previous code unit in the text, or DONE if the new 229 * index is before the start of the text. 230 * @stable ICU 2.4 231 */ previous()232 public abstract int previous(); 233 234 235 /** 236 * Retreat to the start of the previous code point in the text, 237 * and return it (pre-decrement semantics). If the index is not 238 * preceeded by a valid surrogate pair, the behavior is the same 239 * as <code>previous()</code>. Otherwise the iterator is 240 * decremented to the start of the surrogate pair, and the code 241 * point represented by the pair is returned. 242 * @return the previous code point in the text, or DONE if the new 243 * index is before the start of the text. 244 * @stable ICU 2.4 245 */ previousCodePoint()246 public int previousCodePoint(){ 247 int ch1 = previous(); 248 if(UTF16.isTrailSurrogate((char)ch1)){ 249 int ch2 = previous(); 250 if(UTF16.isLeadSurrogate((char)ch2)){ 251 return UCharacterProperty.getRawSupplementary((char)ch2, 252 (char)ch1); 253 }else if (ch2 != DONE) { 254 //unmatched trail surrogate so back out 255 next(); 256 } 257 } 258 return ch1; 259 } 260 261 /** 262 * Sets the index to the specified index in the text. 263 * @param index the index within the text. 264 * @exception IndexOutOfBoundsException is thrown if an invalid index is 265 * supplied 266 * @stable ICU 2.4 267 */ setIndex(int index)268 public abstract void setIndex(int index); 269 270 /** 271 * Sets the current index to the limit. 272 * @stable ICU 2.4 273 */ setToLimit()274 public void setToLimit() { 275 setIndex(getLength()); 276 } 277 278 /** 279 * Sets the current index to the start. 280 * @stable ICU 2.4 281 */ setToStart()282 public void setToStart() { 283 setIndex(0); 284 } 285 286 /** 287 * Fills the buffer with the underlying text storage of the iterator 288 * If the buffer capacity is not enough a exception is thrown. The capacity 289 * of the fill in buffer should at least be equal to length of text in the 290 * iterator obtained by calling <code>getLength()</code>). 291 * <b>Usage:</b> 292 * 293 * <code> 294 * <pre> 295 * UChacterIterator iter = new UCharacterIterator.getInstance(text); 296 * char[] buf = new char[iter.getLength()]; 297 * iter.getText(buf); 298 * 299 * OR 300 * char[] buf= new char[1]; 301 * int len = 0; 302 * for(;;){ 303 * try{ 304 * len = iter.getText(buf); 305 * break; 306 * }catch(IndexOutOfBoundsException e){ 307 * buf = new char[iter.getLength()]; 308 * } 309 * } 310 * </pre> 311 * </code> 312 * 313 * @param fillIn an array of chars to fill with the underlying UTF-16 code 314 * units. 315 * @param offset the position within the array to start putting the data. 316 * @return the number of code units added to fillIn, as a convenience 317 * @exception IndexOutOfBoundsException exception if there is not enough 318 * room after offset in the array, or if offset < 0. 319 * @stable ICU 2.4 320 */ getText(char[] fillIn, int offset)321 public abstract int getText(char[] fillIn, int offset); 322 323 /** 324 * Convenience override for <code>getText(char[], int)</code> that provides 325 * an offset of 0. 326 * @param fillIn an array of chars to fill with the underlying UTF-16 code 327 * units. 328 * @return the number of code units added to fillIn, as a convenience 329 * @exception IndexOutOfBoundsException exception if there is not enough 330 * room in the array. 331 * @stable ICU 2.4 332 */ getText(char[] fillIn)333 public final int getText(char[] fillIn) { 334 return getText(fillIn, 0); 335 } 336 337 /** 338 * Convenience method for returning the underlying text storage as as string 339 * @return the underlying text storage in the iterator as a string 340 * @stable ICU 2.4 341 */ getText()342 public String getText() { 343 char[] text = new char[getLength()]; 344 getText(text); 345 return new String(text); 346 } 347 348 /** 349 * Moves the current position by the number of code units 350 * specified, either forward or backward depending on the sign 351 * of delta (positive or negative respectively). If the resulting 352 * index would be less than zero, the index is set to zero, and if 353 * the resulting index would be greater than limit, the index is 354 * set to limit. 355 * 356 * @param delta the number of code units to move the current 357 * index. 358 * @return the new index. 359 * @exception IndexOutOfBoundsException is thrown if an invalid index is 360 * supplied 361 * @stable ICU 2.4 362 * 363 */ moveIndex(int delta)364 public int moveIndex(int delta) { 365 int x = Math.max(0, Math.min(getIndex() + delta, getLength())); 366 setIndex(x); 367 return x; 368 } 369 370 /** 371 * Moves the current position by the number of code points 372 * specified, either forward or backward depending on the sign of 373 * delta (positive or negative respectively). If the current index 374 * is at a trail surrogate then the first adjustment is by code 375 * unit, and the remaining adjustments are by code points. If the 376 * resulting index would be less than zero, the index is set to 377 * zero, and if the resulting index would be greater than limit, 378 * the index is set to limit. 379 * @param delta the number of code units to move the current index. 380 * @return the new index 381 * @exception IndexOutOfBoundsException is thrown if an invalid delta is 382 * supplied 383 * @stable ICU 2.4 384 */ moveCodePointIndex(int delta)385 public int moveCodePointIndex(int delta){ 386 if(delta>0){ 387 while(delta>0 && nextCodePoint() != DONE){delta--;} 388 }else{ 389 while(delta<0 && previousCodePoint() != DONE){delta++;} 390 } 391 if(delta!=0){ 392 throw new IndexOutOfBoundsException(); 393 } 394 395 return getIndex(); 396 } 397 398 /** 399 * Creates a copy of this iterator, independent from other iterators. 400 * If it is not possible to clone the iterator, returns null. 401 * @return copy of this iterator 402 * @stable ICU 2.4 403 */ clone()404 public Object clone() throws CloneNotSupportedException{ 405 return super.clone(); 406 } 407 408 } 409 410