1 /*
2 **********************************************************************
3 * Copyright (c) 2003-2011, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: September 24 2003
8 * Since: ICU 2.8
9 **********************************************************************
10 */
11 #include "ruleiter.h"
12 #include "unicode/parsepos.h"
13 #include "unicode/symtable.h"
14 #include "unicode/unistr.h"
15 #include "unicode/utf16.h"
16 #include "patternprops.h"
17 
18 /* \U87654321 or \ud800\udc00 */
19 #define MAX_U_NOTATION_LEN 12
20 
21 U_NAMESPACE_BEGIN
22 
RuleCharacterIterator(const UnicodeString & theText,const SymbolTable * theSym,ParsePosition & thePos)23 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
24                       ParsePosition& thePos) :
25     text(theText),
26     pos(thePos),
27     sym(theSym),
28     buf(0),
29     bufPos(0)
30 {}
31 
atEnd() const32 UBool RuleCharacterIterator::atEnd() const {
33     return buf == 0 && pos.getIndex() == text.length();
34 }
35 
next(int32_t options,UBool & isEscaped,UErrorCode & ec)36 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
37     if (U_FAILURE(ec)) return DONE;
38 
39     UChar32 c = DONE;
40     isEscaped = FALSE;
41 
42     for (;;) {
43         c = _current();
44         _advance(U16_LENGTH(c));
45 
46         if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
47             (options & PARSE_VARIABLES) != 0 && sym != 0) {
48             UnicodeString name = sym->parseReference(text, pos, text.length());
49             // If name is empty there was an isolated SYMBOL_REF;
50             // return it.  Caller must be prepared for this.
51             if (name.length() == 0) {
52                 break;
53             }
54             bufPos = 0;
55             buf = sym->lookup(name);
56             if (buf == 0) {
57                 ec = U_UNDEFINED_VARIABLE;
58                 return DONE;
59             }
60             // Handle empty variable value
61             if (buf->length() == 0) {
62                 buf = 0;
63             }
64             continue;
65         }
66 
67         if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
68             continue;
69         }
70 
71         if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
72             UnicodeString tempEscape;
73             int32_t offset = 0;
74             c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
75             jumpahead(offset);
76             isEscaped = TRUE;
77             if (c < 0) {
78                 ec = U_MALFORMED_UNICODE_ESCAPE;
79                 return DONE;
80             }
81         }
82 
83         break;
84     }
85 
86     return c;
87 }
88 
getPos(RuleCharacterIterator::Pos & p) const89 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
90     p.buf = buf;
91     p.pos = pos.getIndex();
92     p.bufPos = bufPos;
93 }
94 
setPos(const RuleCharacterIterator::Pos & p)95 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
96     buf = p.buf;
97     pos.setIndex(p.pos);
98     bufPos = p.bufPos;
99 }
100 
skipIgnored(int32_t options)101 void RuleCharacterIterator::skipIgnored(int32_t options) {
102     if ((options & SKIP_WHITESPACE) != 0) {
103         for (;;) {
104             UChar32 a = _current();
105             if (!PatternProps::isWhiteSpace(a)) break;
106             _advance(U16_LENGTH(a));
107         }
108     }
109 }
110 
lookahead(UnicodeString & result,int32_t maxLookAhead) const111 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
112     if (maxLookAhead < 0) {
113         maxLookAhead = 0x7FFFFFFF;
114     }
115     if (buf != 0) {
116         buf->extract(bufPos, maxLookAhead, result);
117     } else {
118         text.extract(pos.getIndex(), maxLookAhead, result);
119     }
120     return result;
121 }
122 
jumpahead(int32_t count)123 void RuleCharacterIterator::jumpahead(int32_t count) {
124     _advance(count);
125 }
126 
127 /*
128 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
129     int32_t b = pos.getIndex();
130     text.extract(0, b, result);
131     return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
132 }
133 */
134 
_current() const135 UChar32 RuleCharacterIterator::_current() const {
136     if (buf != 0) {
137         return buf->char32At(bufPos);
138     } else {
139         int i = pos.getIndex();
140         return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
141     }
142 }
143 
_advance(int32_t count)144 void RuleCharacterIterator::_advance(int32_t count) {
145     if (buf != 0) {
146         bufPos += count;
147         if (bufPos == buf->length()) {
148             buf = 0;
149         }
150     } else {
151         pos.setIndex(pos.getIndex() + count);
152         if (pos.getIndex() > text.length()) {
153             pos.setIndex(text.length());
154         }
155     }
156 }
157 
158 U_NAMESPACE_END
159 
160 //eof
161