1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4 ***************************************************************************
5 *   Copyright (C) 2002-2009 International Business Machines Corporation   *
6 *   and others. All rights reserved.                                      *
7 ***************************************************************************
8 */
9 package com.ibm.icu.text;
10 
11 import java.text.ParsePosition;
12 import java.util.HashMap;
13 
14 import com.ibm.icu.lang.UCharacter;
15 
16 class RBBISymbolTable implements SymbolTable{
17 
18     HashMap<String, RBBISymbolTableEntry> fHashTable;
19     RBBIRuleScanner      fRuleScanner;
20 
21     // These next two fields are part of the mechanism for passing references to
22     //   already-constructed UnicodeSets back to the UnicodeSet constructor
23     //   when the pattern includes $variable references.
24     String               ffffString;
25     UnicodeSet           fCachedSetLookup;
26 
27 
28 
29     static class RBBISymbolTableEntry  {
30         String          key;
31         RBBINode        val;
32     }
33 
34 
RBBISymbolTable(RBBIRuleScanner rs)35     RBBISymbolTable(RBBIRuleScanner rs) {
36         fRuleScanner = rs;
37         fHashTable = new HashMap<String, RBBISymbolTableEntry>();
38         ffffString = "\uffff";
39     }
40 
41     //
42     //  RBBISymbolTable::lookup       This function from the abstract symbol table inteface
43     //                                looks up a variable name and returns a UnicodeString
44     //                                containing the substitution text.
45     //
46     //                                The variable name does NOT include the leading $.
47     //
lookup(String s)48     public char[] lookup(String s) {
49         RBBISymbolTableEntry el;
50         RBBINode varRefNode;
51         RBBINode exprNode;
52 
53         RBBINode usetNode;
54         String retString;
55 
56         el = fHashTable.get(s);
57         if (el == null) {
58             return null;
59         }
60 
61         // Walk through any chain of variable assignments that ultimately resolve to a Set Ref.
62         varRefNode = el.val;
63         while (varRefNode.fLeftChild.fType == RBBINode.varRef) {
64             varRefNode = varRefNode.fLeftChild;
65         }
66 
67         exprNode = varRefNode.fLeftChild; // Root node of expression for variable
68         if (exprNode.fType == RBBINode.setRef) {
69             // The $variable refers to a single UnicodeSet
70             //   return the ffffString, which will subsequently be interpreted as a
71             //   stand-in character for the set by RBBISymbolTable::lookupMatcher()
72             usetNode = exprNode.fLeftChild;
73             fCachedSetLookup = usetNode.fInputSet;
74             retString = ffffString;
75         } else {
76             // The variable refers to something other than just a set.
77             // This is an error in the rules being compiled.  $Variables inside of UnicodeSets
78             //   must refer only to another set, not to some random non-set expression.
79             //   Note:  single characters are represented as sets, so they are ok.
80             fRuleScanner.error(RBBIRuleBuilder.U_BRK_MALFORMED_SET);
81             retString = exprNode.fText;
82             fCachedSetLookup = null;
83         }
84         return retString.toCharArray();
85     }
86 
87     //
88     //  RBBISymbolTable::lookupMatcher   This function from the abstract symbol table
89     //                                   interface maps a single stand-in character to a
90     //                                   pointer to a Unicode Set.   The Unicode Set code uses this
91     //                                   mechanism to get all references to the same $variable
92     //                                   name to refer to a single common Unicode Set instance.
93     //
94     //    This implementation cheats a little, and does not maintain a map of stand-in chars
95     //    to sets.  Instead, it takes advantage of the fact that  the UnicodeSet
96     //    constructor will always call this function right after calling lookup(),
97     //    and we just need to remember what set to return between these two calls.
lookupMatcher(int ch)98     public UnicodeMatcher lookupMatcher(int ch) {
99         UnicodeSet retVal = null;
100         if (ch == 0xffff) {
101             retVal = fCachedSetLookup;
102             fCachedSetLookup = null;
103         }
104         return retVal;
105     }
106 
107     //
108     // RBBISymbolTable::parseReference   This function from the abstract symbol table interface
109     //                                   looks for a $variable name in the source text.
110     //                                   It does not look it up, only scans for it.
111     //                                   It is used by the UnicodeSet parser.
112     //
parseReference(String text, ParsePosition pos, int limit)113     public String parseReference(String text, ParsePosition pos, int limit) {
114         int start = pos.getIndex();
115         int i = start;
116         String result = "";
117         while (i < limit) {
118             int c = UTF16.charAt(text, i);
119             if ((i == start && !UCharacter.isUnicodeIdentifierStart(c))
120                     || !UCharacter.isUnicodeIdentifierPart(c)) {
121                 break;
122             }
123             i += UTF16.getCharCount(c);
124         }
125         if (i == start) { // No valid name chars
126             return result; // Indicate failure with empty string
127         }
128         pos.setIndex(i);
129         result = text.substring(start, i);
130         return result;
131     }
132 
133     //
134     // RBBISymbolTable::lookupNode      Given a key (a variable name), return the
135     //                                  corresponding RBBI Node.  If there is no entry
136     //                                  in the table for this name, return NULL.
137     //
lookupNode(String key)138     RBBINode lookupNode(String key) {
139 
140         RBBINode retNode = null;
141         RBBISymbolTableEntry el;
142 
143         el = fHashTable.get(key);
144         if (el != null) {
145             retNode = el.val;
146         }
147         return retNode;
148     }
149 
150     //
151     //    RBBISymbolTable::addEntry     Add a new entry to the symbol table.
152     //                                  Indicate an error if the name already exists -
153     //                                    this will only occur in the case of duplicate
154     //                                    variable assignments.
155     //
addEntry(String key, RBBINode val)156     void addEntry(String key, RBBINode val) {
157         RBBISymbolTableEntry e;
158         e = fHashTable.get(key);
159         if (e != null) {
160             fRuleScanner.error(RBBIRuleBuilder.U_BRK_VARIABLE_REDFINITION);
161             return;
162         }
163 
164         e = new RBBISymbolTableEntry();
165         e.key = key;
166         e.val = val;
167         fHashTable.put(e.key, e);
168     }
169 
170     //
171     //  RBBISymbolTable::print    Debugging function, dump out the symbol table contents.
172     //
173     ///CLOVER:OFF
rbbiSymtablePrint()174     void rbbiSymtablePrint() {
175         System.out
176                 .print("Variable Definitions\n"
177                         + "Name               Node Val     String Val\n"
178                         + "----------------------------------------------------------------------\n");
179 
180         RBBISymbolTableEntry[] syms = fHashTable.values().toArray(new RBBISymbolTableEntry[0]);
181 
182         for (int i = 0; i < syms.length; i++) {
183             RBBISymbolTableEntry s = syms[i];
184 
185             System.out.print("  " + s.key + "  "); // TODO:  format output into columns.
186             System.out.print("  " + s.val + "  ");
187             System.out.print(s.val.fLeftChild.fText);
188             System.out.print("\n");
189         }
190 
191         System.out.println("\nParsed Variable Definitions\n");
192         for (int i = 0; i < syms.length; i++) {
193             RBBISymbolTableEntry s = syms[i];
194             System.out.print(s.key);
195             s.val.fLeftChild.printTree(true);
196             System.out.print("\n");
197         }
198     }
199     ///CLOVER:ON
200 
201 }
202