1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2014, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.text; 8 9 import java.util.Iterator; 10 11 /** 12 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It 13 * iterates over either code points or code point ranges. After all 14 * code points or ranges have been returned, it returns the 15 * multicharacter strings of the UnicodSet, if any. 16 * 17 * <p>To iterate over code points and multicharacter strings, 18 * use a loop like this: 19 * <pre> 20 * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { 21 * processString(it.getString()); 22 * } 23 * </pre> 24 * 25 * <p>To iterate over code point ranges, use a loop like this: 26 * <pre> 27 * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.nextRange();) { 28 * if (it.codepoint != UnicodeSetIterator.IS_STRING) { 29 * processCodepointRange(it.codepoint, it.codepointEnd); 30 * } else { 31 * processString(it.getString()); 32 * } 33 * } 34 * </pre> 35 * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification. 36 * Do not alter the UnicodeSet while iterating. 37 * @author M. Davis 38 * @stable ICU 2.0 39 */ 40 public class UnicodeSetIterator { 41 42 /** 43 * Value of <tt>codepoint</tt> if the iterator points to a string. 44 * If <tt>codepoint == IS_STRING</tt>, then examine 45 * <tt>string</tt> for the current iteration result. 46 * @stable ICU 2.0 47 */ 48 public static int IS_STRING = -1; 49 50 /** 51 * Current code point, or the special value <tt>IS_STRING</tt>, if 52 * the iterator points to a string. 53 * @stable ICU 2.0 54 */ 55 public int codepoint; 56 57 /** 58 * When iterating over ranges using <tt>nextRange()</tt>, 59 * <tt>codepointEnd</tt> contains the inclusive end of the 60 * iteration range, if <tt>codepoint != IS_STRING</tt>. If 61 * iterating over code points using <tt>next()</tt>, or if 62 * <tt>codepoint == IS_STRING</tt>, then the value of 63 * <tt>codepointEnd</tt> is undefined. 64 * @stable ICU 2.0 65 */ 66 public int codepointEnd; 67 68 /** 69 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points 70 * to the current string. If <tt>codepoint != IS_STRING</tt>, the 71 * value of <tt>string</tt> is undefined. 72 * @stable ICU 2.0 73 */ 74 public String string; 75 76 /** 77 * Create an iterator over the given set. 78 * @param set set to iterate over 79 * @stable ICU 2.0 80 */ UnicodeSetIterator(UnicodeSet set)81 public UnicodeSetIterator(UnicodeSet set) { 82 reset(set); 83 } 84 85 /** 86 * Create an iterator over nothing. <tt>next()</tt> and 87 * <tt>nextRange()</tt> return false. This is a convenience 88 * constructor allowing the target to be set later. 89 * @stable ICU 2.0 90 */ UnicodeSetIterator()91 public UnicodeSetIterator() { 92 reset(new UnicodeSet()); 93 } 94 95 /** 96 * Returns the next element in the set, either a single code point 97 * or a string. If there are no more elements in the set, return 98 * false. If <tt>codepoint == IS_STRING</tt>, the value is a 99 * string in the <tt>string</tt> field. Otherwise the value is a 100 * single code point in the <tt>codepoint</tt> field. 101 * 102 * <p>The order of iteration is all code points in sorted order, 103 * followed by all strings sorted order. <tt>codepointEnd</tt> is 104 * undefined after calling this method. <tt>string</tt> is 105 * undefined unless <tt>codepoint == IS_STRING</tt>. Do not mix 106 * calls to <tt>next()</tt> and <tt>nextRange()</tt> without 107 * calling <tt>reset()</tt> between them. The results of doing so 108 * are undefined. 109 * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification. 110 * Do not alter the UnicodeSet while iterating. 111 * @return true if there was another element in the set and this 112 * object contains the element. 113 * @stable ICU 2.0 114 */ next()115 public boolean next() { 116 if (nextElement <= endElement) { 117 codepoint = codepointEnd = nextElement++; 118 return true; 119 } 120 if (range < endRange) { 121 loadRange(++range); 122 codepoint = codepointEnd = nextElement++; 123 return true; 124 } 125 126 // stringIterator == null iff there are no string elements remaining 127 128 if (stringIterator == null) { 129 return false; 130 } 131 codepoint = IS_STRING; // signal that value is actually a string 132 string = stringIterator.next(); 133 if (!stringIterator.hasNext()) { 134 stringIterator = null; 135 } 136 return true; 137 } 138 139 /** 140 * Returns the next element in the set, either a code point range 141 * or a string. If there are no more elements in the set, return 142 * false. If <tt>codepoint == IS_STRING</tt>, the value is a 143 * string in the <tt>string</tt> field. Otherwise the value is a 144 * range of one or more code points from <tt>codepoint</tt> to 145 * <tt>codepointeEnd</tt> inclusive. 146 * 147 * <p>The order of iteration is all code points ranges in sorted 148 * order, followed by all strings sorted order. Ranges are 149 * disjoint and non-contiguous. <tt>string</tt> is undefined 150 * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to 151 * <tt>next()</tt> and <tt>nextRange()</tt> without calling 152 * <tt>reset()</tt> between them. The results of doing so are 153 * undefined. 154 * 155 * @return true if there was another element in the set and this 156 * object contains the element. 157 * @stable ICU 2.0 158 */ nextRange()159 public boolean nextRange() { 160 if (nextElement <= endElement) { 161 codepointEnd = endElement; 162 codepoint = nextElement; 163 nextElement = endElement+1; 164 return true; 165 } 166 if (range < endRange) { 167 loadRange(++range); 168 codepointEnd = endElement; 169 codepoint = nextElement; 170 nextElement = endElement+1; 171 return true; 172 } 173 174 // stringIterator == null iff there are no string elements remaining 175 176 if (stringIterator == null) { 177 return false; 178 } 179 codepoint = IS_STRING; // signal that value is actually a string 180 string = stringIterator.next(); 181 if (!stringIterator.hasNext()) { 182 stringIterator = null; 183 } 184 return true; 185 } 186 187 /** 188 * Sets this iterator to visit the elements of the given set and 189 * resets it to the start of that set. The iterator is valid only 190 * so long as <tt>set</tt> is valid. 191 * @param uset the set to iterate over. 192 * @stable ICU 2.0 193 */ reset(UnicodeSet uset)194 public void reset(UnicodeSet uset) { 195 set = uset; 196 reset(); 197 } 198 199 /** 200 * Resets this iterator to the start of the set. 201 * @stable ICU 2.0 202 */ reset()203 public void reset() { 204 endRange = set.getRangeCount() - 1; 205 range = 0; 206 endElement = -1; 207 nextElement = 0; 208 if (endRange >= 0) { 209 loadRange(range); 210 } 211 stringIterator = null; 212 if (set.strings != null) { 213 stringIterator = set.strings.iterator(); 214 if (!stringIterator.hasNext()) { 215 stringIterator = null; 216 } 217 } 218 } 219 220 /** 221 * Gets the current string from the iterator. Only use after calling next(), not nextRange(). 222 * @stable ICU 4.0 223 */ getString()224 public String getString() { 225 if (codepoint != IS_STRING) { 226 return UTF16.valueOf(codepoint); 227 } 228 return string; 229 } 230 231 // ======================= PRIVATES =========================== 232 233 private UnicodeSet set; 234 private int endRange = 0; 235 private int range = 0; 236 237 /** 238 * @internal 239 * @deprecated This API is ICU internal only. 240 */ 241 @Deprecated getSet()242 public UnicodeSet getSet() { 243 return set; 244 } 245 246 /** 247 * @internal 248 * @deprecated This API is ICU internal only. 249 */ 250 @Deprecated 251 protected int endElement; 252 /** 253 * @internal 254 * @deprecated This API is ICU internal only. 255 */ 256 @Deprecated 257 protected int nextElement; 258 private Iterator<String> stringIterator = null; 259 260 /** 261 * Invariant: stringIterator is null when there are no (more) strings remaining 262 */ 263 264 /** 265 * @internal 266 * @deprecated This API is ICU internal only. 267 */ 268 @Deprecated loadRange(int aRange)269 protected void loadRange(int aRange) { 270 nextElement = set.getRangeStart(aRange); 271 endElement = set.getRangeEnd(aRange); 272 } 273 } 274