1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (c) 2002-2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 #ifndef USETITER_H
10 #define USETITER_H
11 
12 #include "unicode/utypes.h"
13 #include "unicode/uobject.h"
14 #include "unicode/unistr.h"
15 
16 /**
17  * \file
18  * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
19  */
20 
21 U_NAMESPACE_BEGIN
22 
23 class UnicodeSet;
24 class UnicodeString;
25 
26 /**
27  *
28  * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
29  * iterates over either code points or code point ranges.  After all
30  * code points or ranges have been returned, it returns the
31  * multicharacter strings of the UnicodeSet, if any.
32  *
33  * This class is not intended to be subclassed.  Consider any fields
34  *  or methods declared as "protected" to be private.  The use of
35  *  protected in this class is an artifact of history.
36  *
37  * <p>To iterate over code points and strings, use a loop like this:
38  * <pre>
39  * UnicodeSetIterator it(set);
40  * while (it.next()) {
41  *     processItem(it.getString());
42  * }
43  * </pre>
44  * <p>Each item in the set is accessed as a string.  Set elements
45  *    consisting of single code points are returned as strings containing
46  *    just the one code point.
47  *
48  * <p>To iterate over code point ranges, instead of individual code points,
49  *    use a loop like this:
50  * <pre>
51  * UnicodeSetIterator it(set);
52  * while (it.nextRange()) {
53  *   if (it.isString()) {
54  *     processString(it.getString());
55  *   } else {
56  *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
57  *   }
58  * }
59  * </pre>
60  * @author M. Davis
61  * @stable ICU 2.4
62  */
63 class U_COMMON_API UnicodeSetIterator : public UObject {
64 
65  protected:
66 
67     /**
68      * Value of <tt>codepoint</tt> if the iterator points to a string.
69      * If <tt>codepoint == IS_STRING</tt>, then examine
70      * <tt>string</tt> for the current iteration result.
71      * @stable ICU 2.4
72      */
73     enum { IS_STRING = -1 };
74 
75     /**
76      * Current code point, or the special value <tt>IS_STRING</tt>, if
77      * the iterator points to a string.
78      * @stable ICU 2.4
79      */
80     UChar32 codepoint;
81 
82     /**
83      * When iterating over ranges using <tt>nextRange()</tt>,
84      * <tt>codepointEnd</tt> contains the inclusive end of the
85      * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
86      * iterating over code points using <tt>next()</tt>, or if
87      * <tt>codepoint == IS_STRING</tt>, then the value of
88      * <tt>codepointEnd</tt> is undefined.
89      * @stable ICU 2.4
90      */
91     UChar32 codepointEnd;
92 
93     /**
94      * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
95      * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
96      * value of <tt>string</tt> is undefined.
97      * @stable ICU 2.4
98      */
99     const UnicodeString* string;
100 
101  public:
102 
103     /**
104      * Create an iterator over the given set.  The iterator is valid
105      * only so long as <tt>set</tt> is valid.
106      * @param set set to iterate over
107      * @stable ICU 2.4
108      */
109     UnicodeSetIterator(const UnicodeSet& set);
110 
111     /**
112      * Create an iterator over nothing.  <tt>next()</tt> and
113      * <tt>nextRange()</tt> return false. This is a convenience
114      * constructor allowing the target to be set later.
115      * @stable ICU 2.4
116      */
117     UnicodeSetIterator();
118 
119     /**
120      * Destructor.
121      * @stable ICU 2.4
122      */
123     virtual ~UnicodeSetIterator();
124 
125     /**
126      * Returns true if the current element is a string.  If so, the
127      * caller can retrieve it with <tt>getString()</tt>.  If this
128      * method returns false, the current element is a code point or
129      * code point range, depending on whether <tt>next()</tt> or
130      * <tt>nextRange()</tt> was called.
131      * Elements of types string and codepoint can both be retrieved
132      * with the function <tt>getString()</tt>.
133      * Elements of type codepoint can also be retrieved with
134      * <tt>getCodepoint()</tt>.
135      * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
136      * of the range, and <tt>getCodepointEnd()</tt> returns the end
137      * of the range.
138      * @stable ICU 2.4
139      */
140     inline UBool isString() const;
141 
142     /**
143      * Returns the current code point, if <tt>isString()</tt> returned
144      * false.  Otherwise returns an undefined result.
145      * @stable ICU 2.4
146      */
147     inline UChar32 getCodepoint() const;
148 
149     /**
150      * Returns the end of the current code point range, if
151      * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
152      * called.  Otherwise returns an undefined result.
153      * @stable ICU 2.4
154      */
155     inline UChar32 getCodepointEnd() const;
156 
157     /**
158      * Returns the current string, if <tt>isString()</tt> returned
159      * true.  If the current iteration item is a code point, a UnicodeString
160      * containing that single code point is returned.
161      *
162      * Ownership of the returned string remains with the iterator.
163      * The string is guaranteed to remain valid only until the iterator is
164      *   advanced to the next item, or until the iterator is deleted.
165      *
166      * @stable ICU 2.4
167      */
168     const UnicodeString& getString();
169 
170     /**
171      * Advances the iteration position to the next element in the set,
172      * which can be either a single code point or a string.
173      * If there are no more elements in the set, return false.
174      *
175      * <p>
176      * If <tt>isString() == TRUE</tt>, the value is a
177      * string, otherwise the value is a
178      * single code point.  Elements of either type can be retrieved
179      * with the function <tt>getString()</tt>, while elements of
180      * consisting of a single code point can be retrieved with
181      * <tt>getCodepoint()</tt>
182      *
183      * <p>The order of iteration is all code points in sorted order,
184      * followed by all strings sorted order.    Do not mix
185      * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
186      * calling <tt>reset()</tt> between them.  The results of doing so
187      * are undefined.
188      *
189      * @return true if there was another element in the set.
190      * @stable ICU 2.4
191      */
192     UBool next();
193 
194     /**
195      * Returns the next element in the set, either a code point range
196      * or a string.  If there are no more elements in the set, return
197      * false.  If <tt>isString() == TRUE</tt>, the value is a
198      * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
199      * range of one or more code points from <tt>getCodepoint()</tt> to
200      * <tt>getCodepointeEnd()</tt> inclusive.
201      *
202      * <p>The order of iteration is all code points ranges in sorted
203      * order, followed by all strings sorted order.  Ranges are
204      * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
205      * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
206      * <tt>next()</tt> and <tt>nextRange()</tt> without calling
207      * <tt>reset()</tt> between them.  The results of doing so are
208      * undefined.
209      *
210      * @return true if there was another element in the set.
211      * @stable ICU 2.4
212      */
213     UBool nextRange();
214 
215     /**
216      * Sets this iterator to visit the elements of the given set and
217      * resets it to the start of that set.  The iterator is valid only
218      * so long as <tt>set</tt> is valid.
219      * @param set the set to iterate over.
220      * @stable ICU 2.4
221      */
222     void reset(const UnicodeSet& set);
223 
224     /**
225      * Resets this iterator to the start of the set.
226      * @stable ICU 2.4
227      */
228     void reset();
229 
230     /**
231      * ICU "poor man's RTTI", returns a UClassID for this class.
232      *
233      * @stable ICU 2.4
234      */
235     static UClassID U_EXPORT2 getStaticClassID();
236 
237     /**
238      * ICU "poor man's RTTI", returns a UClassID for the actual class.
239      *
240      * @stable ICU 2.4
241      */
242     virtual UClassID getDynamicClassID() const;
243 
244     // ======================= PRIVATES ===========================
245 
246  protected:
247 
248     // endElement and nextElements are really UChar32's, but we keep
249     // them as signed int32_t's so we can do comparisons with
250     // endElement set to -1.  Leave them as int32_t's.
251     /** The set
252      * @stable ICU 2.4
253      */
254     const UnicodeSet* set;
255     /** End range
256      * @stable ICU 2.4
257      */
258     int32_t endRange;
259     /** Range
260      * @stable ICU 2.4
261      */
262     int32_t range;
263     /** End element
264      * @stable ICU 2.4
265      */
266     int32_t endElement;
267     /** Next element
268      * @stable ICU 2.4
269      */
270     int32_t nextElement;
271     //UBool abbreviated;
272     /** Next string
273      * @stable ICU 2.4
274      */
275     int32_t nextString;
276     /** String count
277      * @stable ICU 2.4
278      */
279     int32_t stringCount;
280 
281     /**
282      *  Points to the string to use when the caller asks for a
283      *  string and the current iteration item is a code point, not a string.
284      *  @internal
285      */
286     UnicodeString *cpString;
287 
288     /** Copy constructor. Disallowed.
289      * @stable ICU 2.4
290      */
291     UnicodeSetIterator(const UnicodeSetIterator&); // disallow
292 
293     /** Assignment operator. Disallowed.
294      * @stable ICU 2.4
295      */
296     UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
297 
298     /** Load range
299      * @stable ICU 2.4
300      */
301     virtual void loadRange(int32_t range);
302 
303 };
304 
isString()305 inline UBool UnicodeSetIterator::isString() const {
306     return codepoint == (UChar32)IS_STRING;
307 }
308 
getCodepoint()309 inline UChar32 UnicodeSetIterator::getCodepoint() const {
310     return codepoint;
311 }
312 
getCodepointEnd()313 inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
314     return codepointEnd;
315 }
316 
317 
318 U_NAMESPACE_END
319 
320 #endif
321