1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 *************************************************************************** 5 * Copyright (C) 2002-2009 International Business Machines Corporation * 6 * and others. All rights reserved. * 7 *************************************************************************** 8 */ 9 package com.ibm.icu.text; 10 11 import java.text.ParsePosition; 12 import java.util.HashMap; 13 14 import com.ibm.icu.lang.UCharacter; 15 16 class RBBISymbolTable implements SymbolTable{ 17 18 HashMap<String, RBBISymbolTableEntry> fHashTable; 19 RBBIRuleScanner fRuleScanner; 20 21 // These next two fields are part of the mechanism for passing references to 22 // already-constructed UnicodeSets back to the UnicodeSet constructor 23 // when the pattern includes $variable references. 24 String ffffString; 25 UnicodeSet fCachedSetLookup; 26 27 28 29 static class RBBISymbolTableEntry { 30 String key; 31 RBBINode val; 32 } 33 34 RBBISymbolTable(RBBIRuleScanner rs)35 RBBISymbolTable(RBBIRuleScanner rs) { 36 fRuleScanner = rs; 37 fHashTable = new HashMap<String, RBBISymbolTableEntry>(); 38 ffffString = "\uffff"; 39 } 40 41 // 42 // RBBISymbolTable::lookup This function from the abstract symbol table inteface 43 // looks up a variable name and returns a UnicodeString 44 // containing the substitution text. 45 // 46 // The variable name does NOT include the leading $. 47 // lookup(String s)48 public char[] lookup(String s) { 49 RBBISymbolTableEntry el; 50 RBBINode varRefNode; 51 RBBINode exprNode; 52 53 RBBINode usetNode; 54 String retString; 55 56 el = fHashTable.get(s); 57 if (el == null) { 58 return null; 59 } 60 61 // Walk through any chain of variable assignments that ultimately resolve to a Set Ref. 62 varRefNode = el.val; 63 while (varRefNode.fLeftChild.fType == RBBINode.varRef) { 64 varRefNode = varRefNode.fLeftChild; 65 } 66 67 exprNode = varRefNode.fLeftChild; // Root node of expression for variable 68 if (exprNode.fType == RBBINode.setRef) { 69 // The $variable refers to a single UnicodeSet 70 // return the ffffString, which will subsequently be interpreted as a 71 // stand-in character for the set by RBBISymbolTable::lookupMatcher() 72 usetNode = exprNode.fLeftChild; 73 fCachedSetLookup = usetNode.fInputSet; 74 retString = ffffString; 75 } else { 76 // The variable refers to something other than just a set. 77 // This is an error in the rules being compiled. $Variables inside of UnicodeSets 78 // must refer only to another set, not to some random non-set expression. 79 // Note: single characters are represented as sets, so they are ok. 80 fRuleScanner.error(RBBIRuleBuilder.U_BRK_MALFORMED_SET); 81 retString = exprNode.fText; 82 fCachedSetLookup = null; 83 } 84 return retString.toCharArray(); 85 } 86 87 // 88 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table 89 // interface maps a single stand-in character to a 90 // pointer to a Unicode Set. The Unicode Set code uses this 91 // mechanism to get all references to the same $variable 92 // name to refer to a single common Unicode Set instance. 93 // 94 // This implementation cheats a little, and does not maintain a map of stand-in chars 95 // to sets. Instead, it takes advantage of the fact that the UnicodeSet 96 // constructor will always call this function right after calling lookup(), 97 // and we just need to remember what set to return between these two calls. lookupMatcher(int ch)98 public UnicodeMatcher lookupMatcher(int ch) { 99 UnicodeSet retVal = null; 100 if (ch == 0xffff) { 101 retVal = fCachedSetLookup; 102 fCachedSetLookup = null; 103 } 104 return retVal; 105 } 106 107 // 108 // RBBISymbolTable::parseReference This function from the abstract symbol table interface 109 // looks for a $variable name in the source text. 110 // It does not look it up, only scans for it. 111 // It is used by the UnicodeSet parser. 112 // parseReference(String text, ParsePosition pos, int limit)113 public String parseReference(String text, ParsePosition pos, int limit) { 114 int start = pos.getIndex(); 115 int i = start; 116 String result = ""; 117 while (i < limit) { 118 int c = UTF16.charAt(text, i); 119 if ((i == start && !UCharacter.isUnicodeIdentifierStart(c)) 120 || !UCharacter.isUnicodeIdentifierPart(c)) { 121 break; 122 } 123 i += UTF16.getCharCount(c); 124 } 125 if (i == start) { // No valid name chars 126 return result; // Indicate failure with empty string 127 } 128 pos.setIndex(i); 129 result = text.substring(start, i); 130 return result; 131 } 132 133 // 134 // RBBISymbolTable::lookupNode Given a key (a variable name), return the 135 // corresponding RBBI Node. If there is no entry 136 // in the table for this name, return NULL. 137 // lookupNode(String key)138 RBBINode lookupNode(String key) { 139 140 RBBINode retNode = null; 141 RBBISymbolTableEntry el; 142 143 el = fHashTable.get(key); 144 if (el != null) { 145 retNode = el.val; 146 } 147 return retNode; 148 } 149 150 // 151 // RBBISymbolTable::addEntry Add a new entry to the symbol table. 152 // Indicate an error if the name already exists - 153 // this will only occur in the case of duplicate 154 // variable assignments. 155 // addEntry(String key, RBBINode val)156 void addEntry(String key, RBBINode val) { 157 RBBISymbolTableEntry e; 158 e = fHashTable.get(key); 159 if (e != null) { 160 fRuleScanner.error(RBBIRuleBuilder.U_BRK_VARIABLE_REDFINITION); 161 return; 162 } 163 164 e = new RBBISymbolTableEntry(); 165 e.key = key; 166 e.val = val; 167 fHashTable.put(e.key, e); 168 } 169 170 // 171 // RBBISymbolTable::print Debugging function, dump out the symbol table contents. 172 // 173 ///CLOVER:OFF rbbiSymtablePrint()174 void rbbiSymtablePrint() { 175 System.out 176 .print("Variable Definitions\n" 177 + "Name Node Val String Val\n" 178 + "----------------------------------------------------------------------\n"); 179 180 RBBISymbolTableEntry[] syms = fHashTable.values().toArray(new RBBISymbolTableEntry[0]); 181 182 for (int i = 0; i < syms.length; i++) { 183 RBBISymbolTableEntry s = syms[i]; 184 185 System.out.print(" " + s.key + " "); // TODO: format output into columns. 186 System.out.print(" " + s.val + " "); 187 System.out.print(s.val.fLeftChild.fText); 188 System.out.print("\n"); 189 } 190 191 System.out.println("\nParsed Variable Definitions\n"); 192 for (int i = 0; i < syms.length; i++) { 193 RBBISymbolTableEntry s = syms[i]; 194 System.out.print(s.key); 195 s.val.fLeftChild.printTree(true); 196 System.out.print("\n"); 197 } 198 } 199 ///CLOVER:ON 200 201 } 202