1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 *************************************************************************** 5 * Copyright (C) 2002-2016 International Business Machines Corporation * 6 * and others. All rights reserved. * 7 *************************************************************************** 8 */ 9 10 // 11 // File: rbbinode.cpp 12 // 13 // Implementation of class RBBINode, which represents a node in the 14 // tree generated when parsing the Rules Based Break Iterator rules. 15 // 16 // This "Class" is actually closer to a struct. 17 // Code using it is expected to directly access fields much of the time. 18 // 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_BREAK_ITERATION 23 24 #include "unicode/unistr.h" 25 #include "unicode/uniset.h" 26 #include "unicode/uchar.h" 27 #include "unicode/parsepos.h" 28 29 #include "cstr.h" 30 #include "uvector.h" 31 32 #include "rbbirb.h" 33 #include "rbbinode.h" 34 35 #include "uassert.h" 36 37 38 U_NAMESPACE_BEGIN 39 40 #ifdef RBBI_DEBUG 41 static int gLastSerial = 0; 42 #endif 43 44 45 //------------------------------------------------------------------------- 46 // 47 // Constructor. Just set the fields to reasonable default values. 48 // 49 //------------------------------------------------------------------------- 50 RBBINode::RBBINode(NodeType t) : UMemory() { 51 #ifdef RBBI_DEBUG 52 fSerialNum = ++gLastSerial; 53 #endif 54 fType = t; 55 fParent = NULL; 56 fLeftChild = NULL; 57 fRightChild = NULL; 58 fInputSet = NULL; 59 fFirstPos = 0; 60 fLastPos = 0; 61 fNullable = FALSE; 62 fLookAheadEnd = FALSE; 63 fRuleRoot = FALSE; 64 fChainIn = FALSE; 65 fVal = 0; 66 fPrecedence = precZero; 67 68 UErrorCode status = U_ZERO_ERROR; 69 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere 70 fLastPosSet = new UVector(status); 71 fFollowPos = new UVector(status); 72 if (t==opCat) {fPrecedence = precOpCat;} 73 else if (t==opOr) {fPrecedence = precOpOr;} 74 else if (t==opStart) {fPrecedence = precStart;} 75 else if (t==opLParen) {fPrecedence = precLParen;} 76 77 } 78 79 80 RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { 81 #ifdef RBBI_DEBUG 82 fSerialNum = ++gLastSerial; 83 #endif 84 fType = other.fType; 85 fParent = NULL; 86 fLeftChild = NULL; 87 fRightChild = NULL; 88 fInputSet = other.fInputSet; 89 fPrecedence = other.fPrecedence; 90 fText = other.fText; 91 fFirstPos = other.fFirstPos; 92 fLastPos = other.fLastPos; 93 fNullable = other.fNullable; 94 fVal = other.fVal; 95 fRuleRoot = FALSE; 96 fChainIn = other.fChainIn; 97 UErrorCode status = U_ZERO_ERROR; 98 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere 99 fLastPosSet = new UVector(status); 100 fFollowPos = new UVector(status); 101 } 102 103 104 //------------------------------------------------------------------------- 105 // 106 // Destructor. Deletes both this node AND any child nodes, 107 // except in the case of variable reference nodes. For 108 // these, the l. child points back to the definition, which 109 // is common for all references to the variable, meaning 110 // it can't be deleted here. 111 // 112 //------------------------------------------------------------------------- 113 RBBINode::~RBBINode() { 114 // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); 115 delete fInputSet; 116 fInputSet = NULL; 117 118 switch (this->fType) { 119 case varRef: 120 case setRef: 121 // for these node types, multiple instances point to the same "children" 122 // Storage ownership of children handled elsewhere. Don't delete here. 123 break; 124 125 default: 126 delete fLeftChild; 127 fLeftChild = NULL; 128 delete fRightChild; 129 fRightChild = NULL; 130 } 131 132 133 delete fFirstPosSet; 134 delete fLastPosSet; 135 delete fFollowPos; 136 137 } 138 139 140 //------------------------------------------------------------------------- 141 // 142 // cloneTree Make a copy of the subtree rooted at this node. 143 // Discard any variable references encountered along the way, 144 // and replace with copies of the variable's definitions. 145 // Used to replicate the expression underneath variable 146 // references in preparation for generating the DFA tables. 147 // 148 //------------------------------------------------------------------------- 149 RBBINode *RBBINode::cloneTree() { 150 RBBINode *n; 151 152 if (fType == RBBINode::varRef) { 153 // If the current node is a variable reference, skip over it 154 // and clone the definition of the variable instead. 155 n = fLeftChild->cloneTree(); 156 } else if (fType == RBBINode::uset) { 157 n = this; 158 } else { 159 n = new RBBINode(*this); 160 // Check for null pointer. 161 if (n != NULL) { 162 if (fLeftChild != NULL) { 163 n->fLeftChild = fLeftChild->cloneTree(); 164 n->fLeftChild->fParent = n; 165 } 166 if (fRightChild != NULL) { 167 n->fRightChild = fRightChild->cloneTree(); 168 n->fRightChild->fParent = n; 169 } 170 } 171 } 172 return n; 173 } 174 175 176 177 //------------------------------------------------------------------------- 178 // 179 // flattenVariables Walk a parse tree, replacing any variable 180 // references with a copy of the variable's definition. 181 // Aside from variables, the tree is not changed. 182 // 183 // Return the root of the tree. If the root was not a variable 184 // reference, it remains unchanged - the root we started with 185 // is the root we return. If, however, the root was a variable 186 // reference, the root of the newly cloned replacement tree will 187 // be returned, and the original tree deleted. 188 // 189 // This function works by recursively walking the tree 190 // without doing anything until a variable reference is 191 // found, then calling cloneTree() at that point. Any 192 // nested references are handled by cloneTree(), not here. 193 // 194 //------------------------------------------------------------------------- 195 RBBINode *RBBINode::flattenVariables() { 196 if (fType == varRef) { 197 RBBINode *retNode = fLeftChild->cloneTree(); 198 if (retNode != NULL) { 199 retNode->fRuleRoot = this->fRuleRoot; 200 retNode->fChainIn = this->fChainIn; 201 } 202 delete this; // TODO: undefined behavior. Fix. 203 return retNode; 204 } 205 206 if (fLeftChild != NULL) { 207 fLeftChild = fLeftChild->flattenVariables(); 208 fLeftChild->fParent = this; 209 } 210 if (fRightChild != NULL) { 211 fRightChild = fRightChild->flattenVariables(); 212 fRightChild->fParent = this; 213 } 214 return this; 215 } 216 217 218 //------------------------------------------------------------------------- 219 // 220 // flattenSets Walk the parse tree, replacing any nodes of type setRef 221 // with a copy of the expression tree for the set. A set's 222 // equivalent expression tree is precomputed and saved as 223 // the left child of the uset node. 224 // 225 //------------------------------------------------------------------------- 226 void RBBINode::flattenSets() { 227 U_ASSERT(fType != setRef); 228 229 if (fLeftChild != NULL) { 230 if (fLeftChild->fType==setRef) { 231 RBBINode *setRefNode = fLeftChild; 232 RBBINode *usetNode = setRefNode->fLeftChild; 233 RBBINode *replTree = usetNode->fLeftChild; 234 fLeftChild = replTree->cloneTree(); 235 fLeftChild->fParent = this; 236 delete setRefNode; 237 } else { 238 fLeftChild->flattenSets(); 239 } 240 } 241 242 if (fRightChild != NULL) { 243 if (fRightChild->fType==setRef) { 244 RBBINode *setRefNode = fRightChild; 245 RBBINode *usetNode = setRefNode->fLeftChild; 246 RBBINode *replTree = usetNode->fLeftChild; 247 fRightChild = replTree->cloneTree(); 248 fRightChild->fParent = this; 249 delete setRefNode; 250 } else { 251 fRightChild->flattenSets(); 252 } 253 } 254 } 255 256 257 258 //------------------------------------------------------------------------- 259 // 260 // findNodes() Locate all the nodes of the specified type, starting 261 // at the specified root. 262 // 263 //------------------------------------------------------------------------- 264 void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { 265 /* test for buffer overflows */ 266 if (U_FAILURE(status)) { 267 return; 268 } 269 if (fType == kind) { 270 dest->addElement(this, status); 271 } 272 if (fLeftChild != NULL) { 273 fLeftChild->findNodes(dest, kind, status); 274 } 275 if (fRightChild != NULL) { 276 fRightChild->findNodes(dest, kind, status); 277 } 278 } 279 280 281 //------------------------------------------------------------------------- 282 // 283 // print. Print out a single node, for debugging. 284 // 285 //------------------------------------------------------------------------- 286 #ifdef RBBI_DEBUG 287 288 static int32_t serial(const RBBINode *node) { 289 return (node == NULL? -1 : node->fSerialNum); 290 } 291 292 293 void RBBINode::printNode(const RBBINode *node) { 294 static const char * const nodeTypeNames[] = { 295 "setRef", 296 "uset", 297 "varRef", 298 "leafChar", 299 "lookAhead", 300 "tag", 301 "endMark", 302 "opStart", 303 "opCat", 304 "opOr", 305 "opStar", 306 "opPlus", 307 "opQuestion", 308 "opBreak", 309 "opReverse", 310 "opLParen" 311 }; 312 313 if (node==NULL) { 314 RBBIDebugPrintf("%10p", (void *)node); 315 } else { 316 RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ", 317 (void *)node, node->fSerialNum, nodeTypeNames[node->fType], 318 node->fRuleRoot?'R':' ', node->fChainIn?'C':' ', 319 serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent), 320 node->fFirstPos, node->fVal); 321 if (node->fType == varRef) { 322 RBBI_DEBUG_printUnicodeString(node->fText); 323 } 324 } 325 RBBIDebugPrintf("\n"); 326 } 327 #endif 328 329 330 #ifdef RBBI_DEBUG 331 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) { 332 RBBIDebugPrintf("%*s", minWidth, CStr(s)()); 333 } 334 #endif 335 336 337 //------------------------------------------------------------------------- 338 // 339 // print. Print out the tree of nodes rooted at "this" 340 // 341 //------------------------------------------------------------------------- 342 #ifdef RBBI_DEBUG 343 void RBBINode::printNodeHeader() { 344 RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n"); 345 } 346 347 void RBBINode::printTree(const RBBINode *node, UBool printHeading) { 348 if (printHeading) { 349 printNodeHeader(); 350 } 351 printNode(node); 352 if (node != NULL) { 353 // Only dump the definition under a variable reference if asked to. 354 // Unconditinally dump children of all other node types. 355 if (node->fType != varRef) { 356 if (node->fLeftChild != NULL) { 357 printTree(node->fLeftChild, FALSE); 358 } 359 360 if (node->fRightChild != NULL) { 361 printTree(node->fRightChild, FALSE); 362 } 363 } 364 } 365 } 366 #endif 367 368 369 370 U_NAMESPACE_END 371 372 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 373