1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2014, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.text;
8 
9 
10 import java.text.CharacterIterator;
11 
12 import com.ibm.icu.impl.CharacterIteratorWrapper;
13 import com.ibm.icu.impl.ReplaceableUCharacterIterator;
14 import com.ibm.icu.impl.UCharArrayIterator;
15 import com.ibm.icu.impl.UCharacterIteratorWrapper;
16 import com.ibm.icu.impl.UCharacterProperty;
17 
18 
19 /**
20  * Abstract class that defines an API for iteration on text objects.This is an
21  * interface for forward and backward iteration and random access into a text
22  * object. Forward iteration is done with post-increment and backward iteration
23  * is done with pre-decrement semantics, while the
24  * <code>java.text.CharacterIterator</code> interface methods provided forward
25  * iteration with "pre-increment" and backward iteration with pre-decrement
26  * semantics. This API is more efficient for forward iteration over code points.
27  * The other major difference is that this API can do both code unit and code point
28  * iteration, <code>java.text.CharacterIterator</code> can only iterate over
29  * code units and is limited to BMP (0 - 0xFFFF)
30  * @author Ram
31  * @stable ICU 2.4
32  */
33 public abstract class UCharacterIterator
34                       implements Cloneable,UForwardCharacterIterator {
35 
36     /**
37      * Protected default constructor for the subclasses
38      * @stable ICU 2.4
39      */
UCharacterIterator()40     protected UCharacterIterator(){
41     }
42 
43     // static final methods ----------------------------------------------------
44 
45     /**
46      * Returns a <code>UCharacterIterator</code> object given a
47      * <code>Replaceable</code> object.
48      * @param source a valid source as a <code>Replaceable</code> object
49      * @return UCharacterIterator object
50      * @exception IllegalArgumentException if the argument is null
51      * @stable ICU 2.4
52      */
getInstance(Replaceable source)53     public static final UCharacterIterator getInstance(Replaceable source){
54         return new ReplaceableUCharacterIterator(source);
55     }
56 
57     /**
58      * Returns a <code>UCharacterIterator</code> object given a
59      * source string.
60      * @param source a string
61      * @return UCharacterIterator object
62      * @exception IllegalArgumentException if the argument is null
63      * @stable ICU 2.4
64      */
getInstance(String source)65     public static final UCharacterIterator getInstance(String source){
66         return new ReplaceableUCharacterIterator(source);
67     }
68 
69     /**
70      * Returns a <code>UCharacterIterator</code> object given a
71      * source character array.
72      * @param source an array of UTF-16 code units
73      * @return UCharacterIterator object
74      * @exception IllegalArgumentException if the argument is null
75      * @stable ICU 2.4
76      */
getInstance(char[] source)77     public static final UCharacterIterator getInstance(char[] source){
78         return getInstance(source,0,source.length);
79     }
80 
81     /**
82      * Returns a <code>UCharacterIterator</code> object given a
83      * source character array.
84      * @param source an array of UTF-16 code units
85      * @return UCharacterIterator object
86      * @exception IllegalArgumentException if the argument is null
87      * @stable ICU 2.4
88      */
getInstance(char[] source, int start, int limit)89     public static final UCharacterIterator getInstance(char[] source, int start, int limit){
90         return new UCharArrayIterator(source,start,limit);
91     }
92     /**
93      * Returns a <code>UCharacterIterator</code> object given a
94      * source StringBuffer.
95      * @param source an string buffer of UTF-16 code units
96      * @return UCharacterIterator object
97      * @exception IllegalArgumentException if the argument is null
98      * @stable ICU 2.4
99      */
getInstance(StringBuffer source)100     public static final UCharacterIterator getInstance(StringBuffer source){
101         return new ReplaceableUCharacterIterator(source);
102     }
103 
104     /**
105      * Returns a <code>UCharacterIterator</code> object given a
106      * CharacterIterator.
107      * @param source a valid CharacterIterator object.
108      * @return UCharacterIterator object
109      * @exception IllegalArgumentException if the argument is null
110      * @stable ICU 2.4
111      */
getInstance(CharacterIterator source)112     public static final UCharacterIterator getInstance(CharacterIterator source){
113         return new CharacterIteratorWrapper(source);
114     }
115 
116     // public methods ----------------------------------------------------------
117     /**
118      * Returns a <code>java.text.CharacterIterator</code> object for
119      * the underlying text of this iterator.  The returned iterator is
120      * independent of this iterator.
121      * @return java.text.CharacterIterator object
122      * @stable ICU 2.4
123      */
getCharacterIterator()124     public CharacterIterator getCharacterIterator(){
125         return new UCharacterIteratorWrapper(this);
126     }
127 
128     /**
129      * Returns the code unit at the current index.  If index is out
130      * of range, returns DONE.  Index is not changed.
131      * @return current code unit
132      * @stable ICU 2.4
133      */
current()134     public abstract int current();
135 
136     /**
137      * Returns the codepoint at the current index.
138      * If the current index is invalid, DONE is returned.
139      * If the current index points to a lead surrogate, and there is a following
140      * trail surrogate, then the code point is returned.  Otherwise, the code
141      * unit at index is returned.  Index is not changed.
142      * @return current codepoint
143      * @stable ICU 2.4
144      */
currentCodePoint()145     public int currentCodePoint(){
146         int ch = current();
147         if(UTF16.isLeadSurrogate((char)ch)){
148             // advance the index to get the
149             // next code point
150             next();
151             // due to post increment semantics
152             // current() after next() actually
153             // returns the char we want
154             int ch2 = current();
155             // current should never change
156             // the current index so back off
157             previous();
158 
159             if(UTF16.isTrailSurrogate((char)ch2)){
160                 // we found a surrogate pair
161                 // return the codepoint
162                 return UCharacterProperty.getRawSupplementary(
163                                                           (char)ch,(char)ch2
164                                                              );
165             }
166         }
167         return ch;
168     }
169 
170     /**
171      * Returns the length of the text
172      * @return length of the text
173      * @stable ICU 2.4
174      */
getLength()175     public abstract int getLength();
176 
177 
178     /**
179      * Gets the current index in text.
180      * @return current index in text.
181      * @stable ICU 2.4
182      */
getIndex()183     public abstract int getIndex();
184 
185 
186     /**
187      * Returns the UTF16 code unit at index, and increments to the next
188      * code unit (post-increment semantics).  If index is out of
189      * range, DONE is returned, and the iterator is reset to the limit
190      * of the text.
191      * @return the next UTF16 code unit, or DONE if the index is at the limit
192      *         of the text.
193      * @stable ICU 2.4
194      */
next()195     public abstract int next();
196 
197     /**
198      * Returns the code point at index, and increments to the next code
199      * point (post-increment semantics).  If index does not point to a
200      * valid surrogate pair, the behavior is the same as
201      * <code>next()</code>.  Otherwise the iterator is incremented past
202      * the surrogate pair, and the code point represented by the pair
203      * is returned.
204      * @return the next codepoint in text, or DONE if the index is at
205      *         the limit of the text.
206      * @stable ICU 2.4
207      */
nextCodePoint()208     public int nextCodePoint(){
209         int ch1 = next();
210         if(UTF16.isLeadSurrogate((char)ch1)){
211             int ch2 = next();
212             if(UTF16.isTrailSurrogate((char)ch2)){
213                 return UCharacterProperty.getRawSupplementary((char)ch1,
214                                                               (char)ch2);
215             }else if (ch2 != DONE) {
216                 // unmatched surrogate so back out
217                 previous();
218             }
219         }
220         return ch1;
221     }
222 
223     /**
224      * Decrement to the position of the previous code unit in the
225      * text, and return it (pre-decrement semantics).  If the
226      * resulting index is less than 0, the index is reset to 0 and
227      * DONE is returned.
228      * @return the previous code unit in the text, or DONE if the new
229      *         index is before the start of the text.
230      * @stable ICU 2.4
231      */
previous()232     public abstract int previous();
233 
234 
235     /**
236      * Retreat to the start of the previous code point in the text,
237      * and return it (pre-decrement semantics).  If the index is not
238      * preceeded by a valid surrogate pair, the behavior is the same
239      * as <code>previous()</code>.  Otherwise the iterator is
240      * decremented to the start of the surrogate pair, and the code
241      * point represented by the pair is returned.
242      * @return the previous code point in the text, or DONE if the new
243      *         index is before the start of the text.
244      * @stable ICU 2.4
245      */
previousCodePoint()246     public int previousCodePoint(){
247         int ch1 = previous();
248         if(UTF16.isTrailSurrogate((char)ch1)){
249             int ch2 = previous();
250             if(UTF16.isLeadSurrogate((char)ch2)){
251                 return UCharacterProperty.getRawSupplementary((char)ch2,
252                                                               (char)ch1);
253             }else if (ch2 != DONE) {
254                 //unmatched trail surrogate so back out
255                 next();
256             }
257         }
258         return ch1;
259     }
260 
261     /**
262      * Sets the index to the specified index in the text.
263      * @param index the index within the text.
264      * @exception IndexOutOfBoundsException is thrown if an invalid index is
265      *            supplied
266      * @stable ICU 2.4
267      */
setIndex(int index)268     public abstract void setIndex(int index);
269 
270     /**
271      * Sets the current index to the limit.
272      * @stable ICU 2.4
273      */
setToLimit()274     public void setToLimit() {
275         setIndex(getLength());
276     }
277 
278     /**
279      * Sets the current index to the start.
280      * @stable ICU 2.4
281      */
setToStart()282     public void setToStart() {
283         setIndex(0);
284     }
285 
286     /**
287      * Fills the buffer with the underlying text storage of the iterator
288      * If the buffer capacity is not enough a exception is thrown. The capacity
289      * of the fill in buffer should at least be equal to length of text in the
290      * iterator obtained by calling <code>getLength()</code>).
291      * <b>Usage:</b>
292      *
293      * <code>
294      * <pre>
295      *         UChacterIterator iter = new UCharacterIterator.getInstance(text);
296      *         char[] buf = new char[iter.getLength()];
297      *         iter.getText(buf);
298      *
299      *         OR
300      *         char[] buf= new char[1];
301      *         int len = 0;
302      *         for(;;){
303      *             try{
304      *                 len = iter.getText(buf);
305      *                 break;
306      *             }catch(IndexOutOfBoundsException e){
307      *                 buf = new char[iter.getLength()];
308      *             }
309      *         }
310      * </pre>
311      * </code>
312      *
313      * @param fillIn an array of chars to fill with the underlying UTF-16 code
314      *         units.
315      * @param offset the position within the array to start putting the data.
316      * @return the number of code units added to fillIn, as a convenience
317      * @exception IndexOutOfBoundsException exception if there is not enough
318      *            room after offset in the array, or if offset < 0.
319      * @stable ICU 2.4
320      */
getText(char[] fillIn, int offset)321     public abstract int getText(char[] fillIn, int offset);
322 
323     /**
324      * Convenience override for <code>getText(char[], int)</code> that provides
325      * an offset of 0.
326      * @param fillIn an array of chars to fill with the underlying UTF-16 code
327      *         units.
328      * @return the number of code units added to fillIn, as a convenience
329      * @exception IndexOutOfBoundsException exception if there is not enough
330      *            room in the array.
331      * @stable ICU 2.4
332      */
getText(char[] fillIn)333     public final int getText(char[] fillIn) {
334         return getText(fillIn, 0);
335     }
336 
337     /**
338      * Convenience method for returning the underlying text storage as as string
339      * @return the underlying text storage in the iterator as a string
340      * @stable ICU 2.4
341      */
getText()342     public String getText() {
343         char[] text = new char[getLength()];
344         getText(text);
345         return new String(text);
346     }
347 
348     /**
349      * Moves the current position by the number of code units
350      * specified, either forward or backward depending on the sign
351      * of delta (positive or negative respectively).  If the resulting
352      * index would be less than zero, the index is set to zero, and if
353      * the resulting index would be greater than limit, the index is
354      * set to limit.
355      *
356      * @param delta the number of code units to move the current
357      *              index.
358      * @return the new index.
359      * @exception IndexOutOfBoundsException is thrown if an invalid index is
360      *            supplied
361      * @stable ICU 2.4
362      *
363      */
moveIndex(int delta)364     public int moveIndex(int delta) {
365         int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
366         setIndex(x);
367         return x;
368     }
369 
370     /**
371      * Moves the current position by the number of code points
372      * specified, either forward or backward depending on the sign of
373      * delta (positive or negative respectively). If the current index
374      * is at a trail surrogate then the first adjustment is by code
375      * unit, and the remaining adjustments are by code points.  If the
376      * resulting index would be less than zero, the index is set to
377      * zero, and if the resulting index would be greater than limit,
378      * the index is set to limit.
379      * @param delta the number of code units to move the current index.
380      * @return the new index
381      * @exception IndexOutOfBoundsException is thrown if an invalid delta is
382      *            supplied
383      * @stable ICU 2.4
384      */
moveCodePointIndex(int delta)385     public int moveCodePointIndex(int delta){
386         if(delta>0){
387             while(delta>0 && nextCodePoint() != DONE){delta--;}
388         }else{
389             while(delta<0 && previousCodePoint() != DONE){delta++;}
390         }
391         if(delta!=0){
392             throw new IndexOutOfBoundsException();
393         }
394 
395         return getIndex();
396     }
397 
398     /**
399      * Creates a copy of this iterator, independent from other iterators.
400      * If it is not possible to clone the iterator, returns null.
401      * @return copy of this iterator
402      * @stable ICU 2.4
403      */
clone()404     public Object clone() throws CloneNotSupportedException{
405         return super.clone();
406     }
407 
408 }
409 
410