1 //
2 //  file:  rbbistbl.cpp    Implementation of the ICU RBBISymbolTable class
3 //
4 /*
5 ***************************************************************************
6 *   Copyright (C) 2002-2014 International Business Machines Corporation
7 *   and others. All rights reserved.
8 ***************************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_BREAK_ITERATION
14 
15 #include "unicode/unistr.h"
16 #include "unicode/uniset.h"
17 #include "unicode/uchar.h"
18 #include "unicode/parsepos.h"
19 
20 #include "umutex.h"
21 
22 #include "rbbirb.h"
23 #include "rbbinode.h"
24 
25 
26 //
27 //  RBBISymbolTableEntry_deleter    Used by the UHashTable to delete the contents
28 //                                  when the hash table is deleted.
29 //
30 U_CDECL_BEGIN
RBBISymbolTableEntry_deleter(void * p)31 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
32     icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
33     delete px;
34 }
35 U_CDECL_END
36 
37 
38 
39 U_NAMESPACE_BEGIN
40 
RBBISymbolTable(RBBIRuleScanner * rs,const UnicodeString & rules,UErrorCode & status)41 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
42     :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
43 {
44     fHashTable       = NULL;
45     fCachedSetLookup = NULL;
46 
47     fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
48     // uhash_open checks status
49     if (U_FAILURE(status)) {
50         return;
51     }
52     uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
53 }
54 
55 
56 
~RBBISymbolTable()57 RBBISymbolTable::~RBBISymbolTable()
58 {
59     uhash_close(fHashTable);
60 }
61 
62 
63 //
64 //  RBBISymbolTable::lookup       This function from the abstract symbol table inteface
65 //                                looks up a variable name and returns a UnicodeString
66 //                                containing the substitution text.
67 //
68 //                                The variable name does NOT include the leading $.
69 //
lookup(const UnicodeString & s) const70 const UnicodeString  *RBBISymbolTable::lookup(const UnicodeString& s) const
71 {
72     RBBISymbolTableEntry  *el;
73     RBBINode              *varRefNode;
74     RBBINode              *exprNode;
75     RBBINode              *usetNode;
76     const UnicodeString   *retString;
77     RBBISymbolTable       *This = (RBBISymbolTable *)this;   // cast off const
78 
79     el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
80     if (el == NULL) {
81         return NULL;
82     }
83 
84     varRefNode = el->val;
85     exprNode   = varRefNode->fLeftChild;     // Root node of expression for variable
86     if (exprNode->fType == RBBINode::setRef) {
87         // The $variable refers to a single UnicodeSet
88         //   return the ffffString, which will subsequently be interpreted as a
89         //   stand-in character for the set by RBBISymbolTable::lookupMatcher()
90         usetNode = exprNode->fLeftChild;
91         This->fCachedSetLookup = usetNode->fInputSet;
92         retString = &ffffString;
93     }
94     else
95     {
96         // The variable refers to something other than just a set.
97         // return the original source string for the expression
98         retString = &exprNode->fText;
99         This->fCachedSetLookup = NULL;
100     }
101     return retString;
102 }
103 
104 
105 
106 //
107 //  RBBISymbolTable::lookupMatcher   This function from the abstract symbol table
108 //                                   interface maps a single stand-in character to a
109 //                                   pointer to a Unicode Set.   The Unicode Set code uses this
110 //                                   mechanism to get all references to the same $variable
111 //                                   name to refer to a single common Unicode Set instance.
112 //
113 //    This implementation cheats a little, and does not maintain a map of stand-in chars
114 //    to sets.  Instead, it takes advantage of the fact that  the UnicodeSet
115 //    constructor will always call this function right after calling lookup(),
116 //    and we just need to remember what set to return between these two calls.
lookupMatcher(UChar32 ch) const117 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
118 {
119     UnicodeSet *retVal = NULL;
120     RBBISymbolTable *This = (RBBISymbolTable *)this;   // cast off const
121     if (ch == 0xffff) {
122         retVal = fCachedSetLookup;
123         This->fCachedSetLookup = 0;
124     }
125     return retVal;
126 }
127 
128 //
129 // RBBISymbolTable::parseReference   This function from the abstract symbol table interface
130 //                                   looks for a $variable name in the source text.
131 //                                   It does not look it up, only scans for it.
132 //                                   It is used by the UnicodeSet parser.
133 //
134 //                                   This implementation is lifted pretty much verbatim
135 //                                   from the rules based transliterator implementation.
136 //                                   I didn't see an obvious way of sharing it.
137 //
parseReference(const UnicodeString & text,ParsePosition & pos,int32_t limit) const138 UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,
139                                                 ParsePosition& pos, int32_t limit) const
140 {
141     int32_t start = pos.getIndex();
142     int32_t i = start;
143     UnicodeString result;
144     while (i < limit) {
145         UChar c = text.charAt(i);
146         if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
147             break;
148         }
149         ++i;
150     }
151     if (i == start) { // No valid name chars
152         return result; // Indicate failure with empty string
153     }
154     pos.setIndex(i);
155     text.extractBetween(start, i, result);
156     return result;
157 }
158 
159 
160 
161 //
162 // RBBISymbolTable::lookupNode      Given a key (a variable name), return the
163 //                                  corresponding RBBI Node.  If there is no entry
164 //                                  in the table for this name, return NULL.
165 //
lookupNode(const UnicodeString & key) const166 RBBINode       *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
167 
168     RBBINode             *retNode = NULL;
169     RBBISymbolTableEntry *el;
170 
171     el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
172     if (el != NULL) {
173         retNode = el->val;
174     }
175     return retNode;
176 }
177 
178 
179 //
180 //    RBBISymbolTable::addEntry     Add a new entry to the symbol table.
181 //                                  Indicate an error if the name already exists -
182 //                                    this will only occur in the case of duplicate
183 //                                    variable assignments.
184 //
addEntry(const UnicodeString & key,RBBINode * val,UErrorCode & err)185 void            RBBISymbolTable::addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
186     RBBISymbolTableEntry *e;
187     /* test for buffer overflows */
188     if (U_FAILURE(err)) {
189         return;
190     }
191     e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
192     if (e != NULL) {
193         err = U_BRK_VARIABLE_REDFINITION;
194         return;
195     }
196 
197     e = new RBBISymbolTableEntry;
198     if (e == NULL) {
199         err = U_MEMORY_ALLOCATION_ERROR;
200         return;
201     }
202     e->key = key;
203     e->val = val;
204     uhash_put( fHashTable, &e->key, e, &err);
205 }
206 
207 
RBBISymbolTableEntry()208 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
209 
~RBBISymbolTableEntry()210 RBBISymbolTableEntry::~RBBISymbolTableEntry() {
211     // The "val" of a symbol table entry is a variable reference node.
212     // The l. child of the val is the rhs expression from the assignment.
213     // Unlike other node types, children of variable reference nodes are not
214     //    automatically recursively deleted.  We do it manually here.
215     delete val->fLeftChild;
216     val->fLeftChild = NULL;
217 
218     delete  val;
219 
220     // Note: the key UnicodeString is destructed by virtue of being in the object by value.
221 }
222 
223 
224 //
225 //  RBBISymbolTable::print    Debugging function, dump out the symbol table contents.
226 //
227 #ifdef RBBI_DEBUG
rbbiSymtablePrint() const228 void RBBISymbolTable::rbbiSymtablePrint() const {
229     RBBIDebugPrintf("Variable Definitions\n"
230            "Name               Node Val     String Val\n"
231            "----------------------------------------------------------------------\n");
232 
233     int32_t pos = UHASH_FIRST;
234     const UHashElement  *e   = NULL;
235     for (;;) {
236         e = uhash_nextElement(fHashTable,  &pos);
237         if (e == NULL ) {
238             break;
239         }
240         RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
241 
242         RBBI_DEBUG_printUnicodeString(s->key, 15);
243         RBBIDebugPrintf("   %8p   ", (void *)s->val);
244         RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
245         RBBIDebugPrintf("\n");
246     }
247 
248     RBBIDebugPrintf("\nParsed Variable Definitions\n");
249     pos = -1;
250     for (;;) {
251         e = uhash_nextElement(fHashTable,  &pos);
252         if (e == NULL ) {
253             break;
254         }
255         RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
256         RBBI_DEBUG_printUnicodeString(s->key);
257         s->val->fLeftChild->printTree(TRUE);
258         RBBIDebugPrintf("\n");
259     }
260 }
261 #endif
262 
263 
264 
265 
266 
267 U_NAMESPACE_END
268 
269 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
270