1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 2001-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 9 package com.ibm.icu.text; 10 11 import java.util.HashSet; 12 import java.util.List; 13 import java.util.Set; 14 15 import com.ibm.icu.impl.Assert; 16 17 /** 18 * This class represents a node in the parse tree created by the RBBI Rule compiler. 19 */ 20 class RBBINode { 21 22 23 // enum NodeType { 24 static final int setRef = 0; 25 static final int uset = 1; 26 static final int varRef = 2; 27 static final int leafChar = 3; 28 static final int lookAhead = 4; 29 static final int tag = 5; 30 static final int endMark = 6; 31 static final int opStart = 7; 32 static final int opCat = 8; 33 static final int opOr = 9; 34 static final int opStar = 10; 35 static final int opPlus = 11; 36 static final int opQuestion = 12; 37 static final int opBreak = 13; 38 static final int opReverse = 14; 39 static final int opLParen = 15; 40 static final int nodeTypeLimit = 16; // For Assertion checking only. 41 42 static final String [] nodeTypeNames = { 43 "setRef", 44 "uset", 45 "varRef", 46 "leafChar", 47 "lookAhead", 48 "tag", 49 "endMark", 50 "opStart", 51 "opCat", 52 "opOr", 53 "opStar", 54 "opPlus", 55 "opQuestion", 56 "opBreak", 57 "opReverse", 58 "opLParen" 59 }; 60 61 // enum OpPrecedence { 62 static final int precZero = 0; 63 static final int precStart = 1; 64 static final int precLParen = 2; 65 static final int precOpOr = 3; 66 static final int precOpCat = 4; 67 68 int fType; // enum NodeType 69 RBBINode fParent; 70 RBBINode fLeftChild; 71 RBBINode fRightChild; 72 UnicodeSet fInputSet; // For uset nodes only. 73 int fPrecedence = precZero; // enum OpPrecedence, For binary ops only. 74 75 String fText; // Text corresponding to this node. 76 // May be lazily evaluated when (if) needed 77 // for some node types. 78 int fFirstPos; // Position in the rule source string of the 79 // first text associated with the node. 80 // If there's a left child, this will be the same 81 // as that child's left pos. 82 int fLastPos; // Last position in the rule source string 83 // of any text associated with this node. 84 // If there's a right child, this will be the same 85 // as that child's last postion. 86 87 boolean fNullable; // See Aho DFA table generation algorithm 88 int fVal; // For leafChar nodes, the value. 89 // Values are the character category, 90 // corresponds to columns in the final 91 // state transition table. 92 93 boolean fLookAheadEnd; // For endMark nodes, set TRUE if 94 // marking the end of a look-ahead rule. 95 96 boolean fRuleRoot; // True if this node is the root of a rule. 97 boolean fChainIn; // True if chaining into this rule is allowed 98 // (no '^' present). 99 100 101 Set<RBBINode> fFirstPosSet; // See Aho DFA table generation algorithm 102 Set<RBBINode> fLastPosSet; // See Aho. 103 Set<RBBINode> fFollowPos; // See Aho. 104 105 int fSerialNum; // Debugging aids. Each node gets a unique serial number. 106 static int gLastSerial; 107 RBBINode(int t)108 RBBINode(int t) { 109 Assert.assrt(t < nodeTypeLimit); 110 fSerialNum = ++gLastSerial; 111 fType = t; 112 113 fFirstPosSet = new HashSet<RBBINode>(); 114 fLastPosSet = new HashSet<RBBINode>(); 115 fFollowPos = new HashSet<RBBINode>(); 116 if (t == opCat) { 117 fPrecedence = precOpCat; 118 } else if (t == opOr) { 119 fPrecedence = precOpOr; 120 } else if (t == opStart) { 121 fPrecedence = precStart; 122 } else if (t == opLParen) { 123 fPrecedence = precLParen; 124 } else { 125 fPrecedence = precZero; 126 } 127 } 128 129 RBBINode(RBBINode other) { 130 fSerialNum = ++gLastSerial; 131 fType = other.fType; 132 fInputSet = other.fInputSet; 133 fPrecedence = other.fPrecedence; 134 fText = other.fText; 135 fFirstPos = other.fFirstPos; 136 fLastPos = other.fLastPos; 137 fNullable = other.fNullable; 138 fVal = other.fVal; 139 fRuleRoot = false; 140 fChainIn = other.fChainIn; 141 fFirstPosSet = new HashSet<RBBINode>(other.fFirstPosSet); 142 fLastPosSet = new HashSet<RBBINode>(other.fLastPosSet); 143 fFollowPos = new HashSet<RBBINode>(other.fFollowPos); 144 } 145 146 //------------------------------------------------------------------------- 147 // 148 // cloneTree Make a copy of the subtree rooted at this node. 149 // Discard any variable references encountered along the way, 150 // and replace with copies of the variable's definitions. 151 // Used to replicate the expression underneath variable 152 // references in preparation for generating the DFA tables. 153 // 154 //------------------------------------------------------------------------- 155 RBBINode cloneTree() { 156 RBBINode n; 157 158 if (fType == RBBINode.varRef) { 159 // If the current node is a variable reference, skip over it 160 // and clone the definition of the variable instead. 161 n = fLeftChild.cloneTree(); 162 } else if (fType == RBBINode.uset) { 163 n = this; 164 } else { 165 n = new RBBINode(this); 166 if (fLeftChild != null) { 167 n.fLeftChild = fLeftChild.cloneTree(); 168 n.fLeftChild.fParent = n; 169 } 170 if (fRightChild != null) { 171 n.fRightChild = fRightChild.cloneTree(); 172 n.fRightChild.fParent = n; 173 } 174 } 175 return n; 176 } 177 178 179 180 //------------------------------------------------------------------------- 181 // 182 // flattenVariables Walk a parse tree, replacing any variable 183 // references with a copy of the variable's definition. 184 // Aside from variables, the tree is not changed. 185 // 186 // Return the root of the tree. If the root was not a variable 187 // reference, it remains unchanged - the root we started with 188 // is the root we return. If, however, the root was a variable 189 // reference, the root of the newly cloned replacement tree will 190 // be returned, and the original tree deleted. 191 // 192 // This function works by recursively walking the tree 193 // without doing anything until a variable reference is 194 // found, then calling cloneTree() at that point. Any 195 // nested references are handled by cloneTree(), not here. 196 // 197 //------------------------------------------------------------------------- 198 RBBINode flattenVariables() { 199 if (fType == varRef) { 200 RBBINode retNode = fLeftChild.cloneTree(); 201 retNode.fRuleRoot = this.fRuleRoot; 202 retNode.fChainIn = this.fChainIn; 203 return retNode; 204 } 205 206 if (fLeftChild != null) { 207 fLeftChild = fLeftChild.flattenVariables(); 208 fLeftChild.fParent = this; 209 } 210 if (fRightChild != null) { 211 fRightChild = fRightChild.flattenVariables(); 212 fRightChild.fParent = this; 213 } 214 return this; 215 } 216 217 //------------------------------------------------------------------------- 218 // 219 // flattenSets Walk the parse tree, replacing any nodes of type setRef 220 // with a copy of the expression tree for the set. A set's 221 // equivalent expression tree is precomputed and saved as 222 // the left child of the uset node. 223 // 224 //------------------------------------------------------------------------- 225 void flattenSets() { 226 Assert.assrt(fType != setRef); 227 228 if (fLeftChild != null) { 229 if (fLeftChild.fType == setRef) { 230 RBBINode setRefNode = fLeftChild; 231 RBBINode usetNode = setRefNode.fLeftChild; 232 RBBINode replTree = usetNode.fLeftChild; 233 fLeftChild = replTree.cloneTree(); 234 fLeftChild.fParent = this; 235 } else { 236 fLeftChild.flattenSets(); 237 } 238 } 239 240 if (fRightChild != null) { 241 if (fRightChild.fType == setRef) { 242 RBBINode setRefNode = fRightChild; 243 RBBINode usetNode = setRefNode.fLeftChild; 244 RBBINode replTree = usetNode.fLeftChild; 245 fRightChild = replTree.cloneTree(); 246 fRightChild.fParent = this; 247 // delete setRefNode; 248 } else { 249 fRightChild.flattenSets(); 250 } 251 } 252 } 253 254 //------------------------------------------------------------------------- 255 // 256 // findNodes() Locate all the nodes of the specified type, starting 257 // at the specified root. 258 // 259 //------------------------------------------------------------------------- 260 void findNodes(List<RBBINode> dest, int kind) { 261 if (fType == kind) { 262 dest.add(this); 263 } 264 if (fLeftChild != null) { 265 fLeftChild.findNodes(dest, kind); 266 } 267 if (fRightChild != null) { 268 fRightChild.findNodes(dest, kind); 269 } 270 } 271 272 273 274 //------------------------------------------------------------------------- 275 // 276 // print. Print out a single node, for debugging. 277 // 278 //------------------------------------------------------------------------- 279 ///CLOVER:OFF 280 static void printNode(RBBINode n) { 281 282 if (n==null) { 283 System.out.print (" -- null --\n"); 284 } else { 285 RBBINode.printInt( n.fSerialNum, 10); 286 RBBINode.printString(nodeTypeNames[n.fType], 11); 287 RBBINode.printInt(n.fParent==null? 0 : n.fParent.fSerialNum, 11); 288 RBBINode.printInt(n.fLeftChild==null? 0 : n.fLeftChild.fSerialNum, 11); 289 RBBINode.printInt(n.fRightChild==null? 0 : n.fRightChild.fSerialNum, 12); 290 RBBINode.printInt(n.fFirstPos, 12); 291 RBBINode.printInt(n.fVal, 7); 292 293 if (n.fType == varRef) { 294 System.out.print(" " + n.fText); 295 } 296 } 297 System.out.println(""); 298 } 299 ///CLOVER:ON 300 301 302 // Print a String in a fixed field size. 303 // Debugging function. 304 ///CLOVER:OFF 305 static void printString(String s, int minWidth) { 306 for (int i = minWidth; i < 0; i++) { 307 // negative width means pad leading spaces, not fixed width. 308 System.out.print(' '); 309 } 310 for (int i = s.length(); i < minWidth; i++) { 311 System.out.print(' '); 312 } 313 System.out.print(s); 314 } 315 ///CLOVER:ON 316 317 // 318 // Print an int in a fixed size field. 319 // Debugging function. 320 // 321 ///CLOVER:OFF 322 static void printInt(int i, int minWidth) { 323 String s = Integer.toString(i); 324 printString(s, Math.max(minWidth, s.length() + 1)); 325 } 326 ///CLOVER:ON 327 328 ///CLOVER:OFF 329 static void printHex(int i, int minWidth) { 330 String s = Integer.toString(i, 16); 331 String leadingZeroes = "00000" 332 .substring(0, Math.max(0, 5 - s.length())); 333 s = leadingZeroes + s; 334 printString(s, minWidth); 335 } 336 ///CLOVER:ON 337 338 339 // ------------------------------------------------------------------------- 340 // 341 // print. Print out the tree of nodes rooted at "this" 342 // 343 // ------------------------------------------------------------------------- 344 ///CLOVER:OFF 345 void printTree(boolean printHeading) { 346 if (printHeading) { 347 System.out.println( "-------------------------------------------------------------------"); 348 System.out.println(" Serial type Parent LeftChild RightChild position value"); 349 } 350 printNode(this); 351 // Only dump the definition under a variable reference if asked to. 352 // Unconditinally dump children of all other node types. 353 if (fType != varRef) { 354 if (fLeftChild != null) { 355 fLeftChild.printTree(false); 356 } 357 358 if (fRightChild != null) { 359 fRightChild.printTree(false); 360 } 361 } 362 } 363 ///CLOVER:ON 364 365 } 366