1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2003-2011, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Author: Alan Liu 9 * Created: September 24 2003 10 * Since: ICU 2.8 11 ********************************************************************** 12 */ 13 #include "ruleiter.h" 14 #include "unicode/parsepos.h" 15 #include "unicode/symtable.h" 16 #include "unicode/unistr.h" 17 #include "unicode/utf16.h" 18 #include "patternprops.h" 19 20 /* \U87654321 or \ud800\udc00 */ 21 #define MAX_U_NOTATION_LEN 12 22 23 U_NAMESPACE_BEGIN 24 25 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, 26 ParsePosition& thePos) : 27 text(theText), 28 pos(thePos), 29 sym(theSym), 30 buf(0), 31 bufPos(0) 32 {} 33 34 UBool RuleCharacterIterator::atEnd() const { 35 return buf == 0 && pos.getIndex() == text.length(); 36 } 37 38 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { 39 if (U_FAILURE(ec)) return DONE; 40 41 UChar32 c = DONE; 42 isEscaped = FALSE; 43 44 for (;;) { 45 c = _current(); 46 _advance(U16_LENGTH(c)); 47 48 if (c == SymbolTable::SYMBOL_REF && buf == 0 && 49 (options & PARSE_VARIABLES) != 0 && sym != 0) { 50 UnicodeString name = sym->parseReference(text, pos, text.length()); 51 // If name is empty there was an isolated SYMBOL_REF; 52 // return it. Caller must be prepared for this. 53 if (name.length() == 0) { 54 break; 55 } 56 bufPos = 0; 57 buf = sym->lookup(name); 58 if (buf == 0) { 59 ec = U_UNDEFINED_VARIABLE; 60 return DONE; 61 } 62 // Handle empty variable value 63 if (buf->length() == 0) { 64 buf = 0; 65 } 66 continue; 67 } 68 69 if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { 70 continue; 71 } 72 73 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { 74 UnicodeString tempEscape; 75 int32_t offset = 0; 76 c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); 77 jumpahead(offset); 78 isEscaped = TRUE; 79 if (c < 0) { 80 ec = U_MALFORMED_UNICODE_ESCAPE; 81 return DONE; 82 } 83 } 84 85 break; 86 } 87 88 return c; 89 } 90 91 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { 92 p.buf = buf; 93 p.pos = pos.getIndex(); 94 p.bufPos = bufPos; 95 } 96 97 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { 98 buf = p.buf; 99 pos.setIndex(p.pos); 100 bufPos = p.bufPos; 101 } 102 103 void RuleCharacterIterator::skipIgnored(int32_t options) { 104 if ((options & SKIP_WHITESPACE) != 0) { 105 for (;;) { 106 UChar32 a = _current(); 107 if (!PatternProps::isWhiteSpace(a)) break; 108 _advance(U16_LENGTH(a)); 109 } 110 } 111 } 112 113 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { 114 if (maxLookAhead < 0) { 115 maxLookAhead = 0x7FFFFFFF; 116 } 117 if (buf != 0) { 118 buf->extract(bufPos, maxLookAhead, result); 119 } else { 120 text.extract(pos.getIndex(), maxLookAhead, result); 121 } 122 return result; 123 } 124 125 void RuleCharacterIterator::jumpahead(int32_t count) { 126 _advance(count); 127 } 128 129 /* 130 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { 131 int32_t b = pos.getIndex(); 132 text.extract(0, b, result); 133 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index 134 } 135 */ 136 137 UChar32 RuleCharacterIterator::_current() const { 138 if (buf != 0) { 139 return buf->char32At(bufPos); 140 } else { 141 int i = pos.getIndex(); 142 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; 143 } 144 } 145 146 void RuleCharacterIterator::_advance(int32_t count) { 147 if (buf != 0) { 148 bufPos += count; 149 if (bufPos == buf->length()) { 150 buf = 0; 151 } 152 } else { 153 pos.setIndex(pos.getIndex() + count); 154 if (pos.getIndex() > text.length()) { 155 pos.setIndex(text.length()); 156 } 157 } 158 } 159 160 U_NAMESPACE_END 161 162 //eof 163