1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7 #ifndef USETITER_H
8 #define USETITER_H
9 
10 #include "unicode/utypes.h"
11 #include "unicode/uobject.h"
12 #include "unicode/unistr.h"
13 
14 /**
15  * \file
16  * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
17  */
18 
19 U_NAMESPACE_BEGIN
20 
21 class UnicodeSet;
22 class UnicodeString;
23 
24 /**
25  *
26  * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
27  * iterates over either code points or code point ranges.  After all
28  * code points or ranges have been returned, it returns the
29  * multicharacter strings of the UnicodeSet, if any.
30  *
31  * This class is not intended to be subclassed.  Consider any fields
32  *  or methods declared as "protected" to be private.  The use of
33  *  protected in this class is an artifact of history.
34  *
35  * <p>To iterate over code points and strings, use a loop like this:
36  * <pre>
37  * UnicodeSetIterator it(set);
38  * while (it.next()) {
39  *     processItem(it.getString());
40  * }
41  * </pre>
42  * <p>Each item in the set is accessed as a string.  Set elements
43  *    consisting of single code points are returned as strings containing
44  *    just the one code point.
45  *
46  * <p>To iterate over code point ranges, instead of individual code points,
47  *    use a loop like this:
48  * <pre>
49  * UnicodeSetIterator it(set);
50  * while (it.nextRange()) {
51  *   if (it.isString()) {
52  *     processString(it.getString());
53  *   } else {
54  *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
55  *   }
56  * }
57  * </pre>
58  * @author M. Davis
59  * @stable ICU 2.4
60  */
61 class U_COMMON_API UnicodeSetIterator : public UObject {
62 
63  protected:
64 
65     /**
66      * Value of <tt>codepoint</tt> if the iterator points to a string.
67      * If <tt>codepoint == IS_STRING</tt>, then examine
68      * <tt>string</tt> for the current iteration result.
69      * @stable ICU 2.4
70      */
71     enum { IS_STRING = -1 };
72 
73     /**
74      * Current code point, or the special value <tt>IS_STRING</tt>, if
75      * the iterator points to a string.
76      * @stable ICU 2.4
77      */
78     UChar32 codepoint;
79 
80     /**
81      * When iterating over ranges using <tt>nextRange()</tt>,
82      * <tt>codepointEnd</tt> contains the inclusive end of the
83      * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
84      * iterating over code points using <tt>next()</tt>, or if
85      * <tt>codepoint == IS_STRING</tt>, then the value of
86      * <tt>codepointEnd</tt> is undefined.
87      * @stable ICU 2.4
88      */
89     UChar32 codepointEnd;
90 
91     /**
92      * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
93      * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
94      * value of <tt>string</tt> is undefined.
95      * @stable ICU 2.4
96      */
97     const UnicodeString* string;
98 
99  public:
100 
101     /**
102      * Create an iterator over the given set.  The iterator is valid
103      * only so long as <tt>set</tt> is valid.
104      * @param set set to iterate over
105      * @stable ICU 2.4
106      */
107     UnicodeSetIterator(const UnicodeSet& set);
108 
109     /**
110      * Create an iterator over nothing.  <tt>next()</tt> and
111      * <tt>nextRange()</tt> return false. This is a convenience
112      * constructor allowing the target to be set later.
113      * @stable ICU 2.4
114      */
115     UnicodeSetIterator();
116 
117     /**
118      * Destructor.
119      * @stable ICU 2.4
120      */
121     virtual ~UnicodeSetIterator();
122 
123     /**
124      * Returns true if the current element is a string.  If so, the
125      * caller can retrieve it with <tt>getString()</tt>.  If this
126      * method returns false, the current element is a code point or
127      * code point range, depending on whether <tt>next()</tt> or
128      * <tt>nextRange()</tt> was called.
129      * Elements of types string and codepoint can both be retrieved
130      * with the function <tt>getString()</tt>.
131      * Elements of type codepoint can also be retrieved with
132      * <tt>getCodepoint()</tt>.
133      * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
134      * of the range, and <tt>getCodepointEnd()</tt> returns the end
135      * of the range.
136      * @stable ICU 2.4
137      */
138     inline UBool isString() const;
139 
140     /**
141      * Returns the current code point, if <tt>isString()</tt> returned
142      * false.  Otherwise returns an undefined result.
143      * @stable ICU 2.4
144      */
145     inline UChar32 getCodepoint() const;
146 
147     /**
148      * Returns the end of the current code point range, if
149      * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
150      * called.  Otherwise returns an undefined result.
151      * @stable ICU 2.4
152      */
153     inline UChar32 getCodepointEnd() const;
154 
155     /**
156      * Returns the current string, if <tt>isString()</tt> returned
157      * true.  If the current iteration item is a code point, a UnicodeString
158      * containing that single code point is returned.
159      *
160      * Ownership of the returned string remains with the iterator.
161      * The string is guaranteed to remain valid only until the iterator is
162      *   advanced to the next item, or until the iterator is deleted.
163      *
164      * @stable ICU 2.4
165      */
166     const UnicodeString& getString();
167 
168     /**
169      * Advances the iteration position to the next element in the set,
170      * which can be either a single code point or a string.
171      * If there are no more elements in the set, return false.
172      *
173      * <p>
174      * If <tt>isString() == TRUE</tt>, the value is a
175      * string, otherwise the value is a
176      * single code point.  Elements of either type can be retrieved
177      * with the function <tt>getString()</tt>, while elements of
178      * consisting of a single code point can be retrieved with
179      * <tt>getCodepoint()</tt>
180      *
181      * <p>The order of iteration is all code points in sorted order,
182      * followed by all strings sorted order.    Do not mix
183      * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
184      * calling <tt>reset()</tt> between them.  The results of doing so
185      * are undefined.
186      *
187      * @return true if there was another element in the set.
188      * @stable ICU 2.4
189      */
190     UBool next();
191 
192     /**
193      * Returns the next element in the set, either a code point range
194      * or a string.  If there are no more elements in the set, return
195      * false.  If <tt>isString() == TRUE</tt>, the value is a
196      * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
197      * range of one or more code points from <tt>getCodepoint()</tt> to
198      * <tt>getCodepointeEnd()</tt> inclusive.
199      *
200      * <p>The order of iteration is all code points ranges in sorted
201      * order, followed by all strings sorted order.  Ranges are
202      * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
203      * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
204      * <tt>next()</tt> and <tt>nextRange()</tt> without calling
205      * <tt>reset()</tt> between them.  The results of doing so are
206      * undefined.
207      *
208      * @return true if there was another element in the set.
209      * @stable ICU 2.4
210      */
211     UBool nextRange();
212 
213     /**
214      * Sets this iterator to visit the elements of the given set and
215      * resets it to the start of that set.  The iterator is valid only
216      * so long as <tt>set</tt> is valid.
217      * @param set the set to iterate over.
218      * @stable ICU 2.4
219      */
220     void reset(const UnicodeSet& set);
221 
222     /**
223      * Resets this iterator to the start of the set.
224      * @stable ICU 2.4
225      */
226     void reset();
227 
228     /**
229      * ICU "poor man's RTTI", returns a UClassID for this class.
230      *
231      * @stable ICU 2.4
232      */
233     static UClassID U_EXPORT2 getStaticClassID();
234 
235     /**
236      * ICU "poor man's RTTI", returns a UClassID for the actual class.
237      *
238      * @stable ICU 2.4
239      */
240     virtual UClassID getDynamicClassID() const;
241 
242     // ======================= PRIVATES ===========================
243 
244  protected:
245 
246     // endElement and nextElements are really UChar32's, but we keep
247     // them as signed int32_t's so we can do comparisons with
248     // endElement set to -1.  Leave them as int32_t's.
249     /** The set
250      * @stable ICU 2.4
251      */
252     const UnicodeSet* set;
253     /** End range
254      * @stable ICU 2.4
255      */
256     int32_t endRange;
257     /** Range
258      * @stable ICU 2.4
259      */
260     int32_t range;
261     /** End element
262      * @stable ICU 2.4
263      */
264     int32_t endElement;
265     /** Next element
266      * @stable ICU 2.4
267      */
268     int32_t nextElement;
269     //UBool abbreviated;
270     /** Next string
271      * @stable ICU 2.4
272      */
273     int32_t nextString;
274     /** String count
275      * @stable ICU 2.4
276      */
277     int32_t stringCount;
278 
279     /**
280      *  Points to the string to use when the caller asks for a
281      *  string and the current iteration item is a code point, not a string.
282      *  @internal
283      */
284     UnicodeString *cpString;
285 
286     /** Copy constructor. Disallowed.
287      * @stable ICU 2.4
288      */
289     UnicodeSetIterator(const UnicodeSetIterator&); // disallow
290 
291     /** Assignment operator. Disallowed.
292      * @stable ICU 2.4
293      */
294     UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
295 
296     /** Load range
297      * @stable ICU 2.4
298      */
299     virtual void loadRange(int32_t range);
300 
301 };
302 
isString()303 inline UBool UnicodeSetIterator::isString() const {
304     return codepoint == (UChar32)IS_STRING;
305 }
306 
getCodepoint()307 inline UChar32 UnicodeSetIterator::getCodepoint() const {
308     return codepoint;
309 }
310 
getCodepointEnd()311 inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
312     return codepointEnd;
313 }
314 
315 
316 U_NAMESPACE_END
317 
318 #endif
319