1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2014, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.text;
8 
9 import java.util.Iterator;
10 
11 /**
12  * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
13  * iterates over either code points or code point ranges.  After all
14  * code points or ranges have been returned, it returns the
15  * multicharacter strings of the UnicodSet, if any.
16  *
17  * <p>To iterate over code points and multicharacter strings,
18  * use a loop like this:
19  * <pre>
20  * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
21  *   processString(it.getString());
22  * }
23  * </pre>
24  *
25  * <p>To iterate over code point ranges, use a loop like this:
26  * <pre>
27  * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.nextRange();) {
28  *   if (it.codepoint != UnicodeSetIterator.IS_STRING) {
29  *     processCodepointRange(it.codepoint, it.codepointEnd);
30  *   } else {
31  *     processString(it.getString());
32  *   }
33  * }
34  * </pre>
35  * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
36  * Do not alter the UnicodeSet while iterating.
37  * @author M. Davis
38  * @stable ICU 2.0
39  */
40 public class UnicodeSetIterator {
41 
42     /**
43      * Value of <tt>codepoint</tt> if the iterator points to a string.
44      * If <tt>codepoint == IS_STRING</tt>, then examine
45      * <tt>string</tt> for the current iteration result.
46      * @stable ICU 2.0
47      */
48     public static int IS_STRING = -1;
49 
50     /**
51      * Current code point, or the special value <tt>IS_STRING</tt>, if
52      * the iterator points to a string.
53      * @stable ICU 2.0
54      */
55     public int codepoint;
56 
57     /**
58      * When iterating over ranges using <tt>nextRange()</tt>,
59      * <tt>codepointEnd</tt> contains the inclusive end of the
60      * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
61      * iterating over code points using <tt>next()</tt>, or if
62      * <tt>codepoint == IS_STRING</tt>, then the value of
63      * <tt>codepointEnd</tt> is undefined.
64      * @stable ICU 2.0
65      */
66     public int codepointEnd;
67 
68     /**
69      * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
70      * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
71      * value of <tt>string</tt> is undefined.
72      * @stable ICU 2.0
73      */
74     public String string;
75 
76     /**
77      * Create an iterator over the given set.
78      * @param set set to iterate over
79      * @stable ICU 2.0
80      */
UnicodeSetIterator(UnicodeSet set)81     public UnicodeSetIterator(UnicodeSet set) {
82         reset(set);
83     }
84 
85     /**
86      * Create an iterator over nothing.  <tt>next()</tt> and
87      * <tt>nextRange()</tt> return false. This is a convenience
88      * constructor allowing the target to be set later.
89      * @stable ICU 2.0
90      */
UnicodeSetIterator()91     public UnicodeSetIterator() {
92         reset(new UnicodeSet());
93     }
94 
95     /**
96      * Returns the next element in the set, either a single code point
97      * or a string.  If there are no more elements in the set, return
98      * false.  If <tt>codepoint == IS_STRING</tt>, the value is a
99      * string in the <tt>string</tt> field.  Otherwise the value is a
100      * single code point in the <tt>codepoint</tt> field.
101      *
102      * <p>The order of iteration is all code points in sorted order,
103      * followed by all strings sorted order.  <tt>codepointEnd</tt> is
104      * undefined after calling this method.  <tt>string</tt> is
105      * undefined unless <tt>codepoint == IS_STRING</tt>.  Do not mix
106      * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
107      * calling <tt>reset()</tt> between them.  The results of doing so
108      * are undefined.
109      * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
110      * Do not alter the UnicodeSet while iterating.
111      * @return true if there was another element in the set and this
112      * object contains the element.
113      * @stable ICU 2.0
114      */
next()115     public boolean next() {
116         if (nextElement <= endElement) {
117             codepoint = codepointEnd = nextElement++;
118             return true;
119         }
120         if (range < endRange) {
121             loadRange(++range);
122             codepoint = codepointEnd = nextElement++;
123             return true;
124         }
125 
126         // stringIterator == null iff there are no string elements remaining
127 
128         if (stringIterator == null) {
129             return false;
130         }
131         codepoint = IS_STRING; // signal that value is actually a string
132         string = stringIterator.next();
133         if (!stringIterator.hasNext()) {
134             stringIterator = null;
135         }
136         return true;
137     }
138 
139     /**
140      * Returns the next element in the set, either a code point range
141      * or a string.  If there are no more elements in the set, return
142      * false.  If <tt>codepoint == IS_STRING</tt>, the value is a
143      * string in the <tt>string</tt> field.  Otherwise the value is a
144      * range of one or more code points from <tt>codepoint</tt> to
145      * <tt>codepointeEnd</tt> inclusive.
146      *
147      * <p>The order of iteration is all code points ranges in sorted
148      * order, followed by all strings sorted order.  Ranges are
149      * disjoint and non-contiguous.  <tt>string</tt> is undefined
150      * unless <tt>codepoint == IS_STRING</tt>.  Do not mix calls to
151      * <tt>next()</tt> and <tt>nextRange()</tt> without calling
152      * <tt>reset()</tt> between them.  The results of doing so are
153      * undefined.
154      *
155      * @return true if there was another element in the set and this
156      * object contains the element.
157      * @stable ICU 2.0
158      */
nextRange()159     public boolean nextRange() {
160         if (nextElement <= endElement) {
161             codepointEnd = endElement;
162             codepoint = nextElement;
163             nextElement = endElement+1;
164             return true;
165         }
166         if (range < endRange) {
167             loadRange(++range);
168             codepointEnd = endElement;
169             codepoint = nextElement;
170             nextElement = endElement+1;
171             return true;
172         }
173 
174         // stringIterator == null iff there are no string elements remaining
175 
176         if (stringIterator == null) {
177             return false;
178         }
179         codepoint = IS_STRING; // signal that value is actually a string
180         string = stringIterator.next();
181         if (!stringIterator.hasNext()) {
182             stringIterator = null;
183         }
184         return true;
185     }
186 
187     /**
188      * Sets this iterator to visit the elements of the given set and
189      * resets it to the start of that set.  The iterator is valid only
190      * so long as <tt>set</tt> is valid.
191      * @param uset the set to iterate over.
192      * @stable ICU 2.0
193      */
reset(UnicodeSet uset)194     public void reset(UnicodeSet uset) {
195         set = uset;
196         reset();
197     }
198 
199     /**
200      * Resets this iterator to the start of the set.
201      * @stable ICU 2.0
202      */
reset()203     public void reset() {
204         endRange = set.getRangeCount() - 1;
205         range = 0;
206         endElement = -1;
207         nextElement = 0;
208         if (endRange >= 0) {
209             loadRange(range);
210         }
211         stringIterator = null;
212         if (set.strings != null) {
213             stringIterator = set.strings.iterator();
214             if (!stringIterator.hasNext()) {
215                 stringIterator = null;
216             }
217         }
218     }
219 
220     /**
221      * Gets the current string from the iterator. Only use after calling next(), not nextRange().
222      * @stable ICU 4.0
223      */
getString()224     public String getString() {
225         if (codepoint != IS_STRING) {
226             return UTF16.valueOf(codepoint);
227         }
228         return string;
229     }
230 
231     // ======================= PRIVATES ===========================
232 
233     private UnicodeSet set;
234     private int endRange = 0;
235     private int range = 0;
236 
237     /**
238      * @internal
239      * @deprecated This API is ICU internal only.
240      */
241     @Deprecated
getSet()242     public UnicodeSet getSet() {
243         return set;
244     }
245 
246     /**
247      * @internal
248      * @deprecated This API is ICU internal only.
249      */
250     @Deprecated
251     protected int endElement;
252     /**
253      * @internal
254      * @deprecated This API is ICU internal only.
255      */
256     @Deprecated
257     protected int nextElement;
258     private Iterator<String> stringIterator = null;
259 
260     /**
261      * Invariant: stringIterator is null when there are no (more) strings remaining
262      */
263 
264     /**
265      * @internal
266      * @deprecated This API is ICU internal only.
267      */
268     @Deprecated
loadRange(int aRange)269     protected void loadRange(int aRange) {
270         nextElement = set.getRangeStart(aRange);
271         endElement = set.getRangeEnd(aRange);
272     }
273 }
274