1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ********************************************************************** 5 * Copyright (c) 2002-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 */ 9 10 package com.ibm.icu.text; 11 12 import java.util.ArrayList; 13 import java.util.Arrays; 14 import java.util.Collection; 15 import java.util.HashSet; 16 import java.util.List; 17 import java.util.Set; 18 import java.util.SortedSet; 19 import java.util.TreeSet; 20 21 import com.ibm.icu.impl.Assert; 22 import com.ibm.icu.impl.RBBIDataWrapper; 23 import com.ibm.icu.lang.UCharacter; 24 import com.ibm.icu.lang.UProperty; 25 import com.ibm.icu.text.RBBIRuleBuilder.IntPair; 26 27 /** 28 * This class is part of the RBBI rule compiler. 29 * It builds the state transition table used by the RBBI runtime 30 * from the expression syntax tree generated by the rule scanner. 31 * 32 * This class is part of the RBBI implementation only. 33 * There is no user-visible public API here. 34 */ 35 class RBBITableBuilder { 36 37 // 38 // RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors, 39 // one for each state. 40 static class RBBIStateDescriptor { 41 boolean fMarked; 42 int fAccepting; 43 int fLookAhead; 44 SortedSet<Integer> fTagVals; 45 int fTagsIdx; 46 Set<RBBINode> fPositions; // Set of parse tree positions associated 47 // with this state. Unordered (it's a set). 48 // UVector contents are RBBINode * 49 50 int[] fDtran; // Transitions out of this state. 51 // indexed by input character 52 // contents is int index of dest state 53 // in RBBITableBuilder.fDStates 54 RBBIStateDescriptor(int maxInputSymbol)55 RBBIStateDescriptor(int maxInputSymbol) { 56 fTagVals = new TreeSet<Integer>(); 57 fPositions = new HashSet<RBBINode>(); 58 fDtran = new int[maxInputSymbol+1]; // fDtran needs to be pre-sized. 59 // It is indexed by input symbols, and will 60 // hold the next state number for each 61 // symbol. 62 } 63 } 64 65 66 private RBBIRuleBuilder fRB; 67 68 /** The array index into RBBIRuleBuilder.fTreeRoots for the parse tree to operate on. */ 69 private int fRootIx; 70 71 /** D states (Aho's terminology). Index is state number. */ 72 private List<RBBIStateDescriptor> fDStates; 73 74 /** Synthesized safe table, a List of row arrays. */ 75 private List<short[]> fSafeTable; 76 77 //----------------------------------------------------------------------------- 78 // 79 // Constructor for RBBITableBuilder. 80 // 81 // rootNode is an index into the array of root nodes that is held by 82 // the overall RBBIRuleBuilder. 83 //----------------------------------------------------------------------------- RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx)84 RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx) { 85 fRootIx = rootNodeIx; 86 fRB = rb; 87 fDStates = new ArrayList<RBBIStateDescriptor>(); 88 } 89 90 91 92 93 //----------------------------------------------------------------------------- 94 // 95 // RBBITableBuilder::buildForwardTable - This is the main function for building 96 // the DFA state transition table from the RBBI rules parse tree. 97 // 98 //----------------------------------------------------------------------------- buildForwardTable()99 void buildForwardTable() { 100 // If there were no rules, just return. This situation can easily arise 101 // for the reverse rules. 102 if (fRB.fTreeRoots[fRootIx]==null) { 103 return; 104 } 105 106 // 107 // Walk through the tree, replacing any references to $variables with a copy of the 108 // parse tree for the substition expression. 109 // 110 fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables(); 111 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) { 112 System.out.println("Parse tree after flattening variable references."); 113 fRB.fTreeRoots[fRootIx].printTree(true); 114 } 115 116 // 117 // If the rules contained any references to {bof} 118 // add a {bof} <cat> <former root of tree> to the 119 // tree. Means that all matches must start out with the 120 // {bof} fake character. 121 // 122 if (fRB.fSetBuilder.sawBOF()) { 123 RBBINode bofTop = new RBBINode(RBBINode.opCat); 124 RBBINode bofLeaf = new RBBINode(RBBINode.leafChar); 125 bofTop.fLeftChild = bofLeaf; 126 bofTop.fRightChild = fRB.fTreeRoots[fRootIx]; 127 bofLeaf.fParent = bofTop; 128 bofLeaf.fVal = 2; // Reserved value for {bof}. 129 fRB.fTreeRoots[fRootIx] = bofTop; 130 } 131 132 // 133 // Add a unique right-end marker to the expression. 134 // Appears as a cat-node, left child being the original tree, 135 // right child being the end marker. 136 // 137 RBBINode cn = new RBBINode(RBBINode.opCat); 138 cn.fLeftChild = fRB.fTreeRoots[fRootIx]; 139 fRB.fTreeRoots[fRootIx].fParent = cn; 140 cn.fRightChild = new RBBINode(RBBINode.endMark); 141 cn.fRightChild.fParent = cn; 142 fRB.fTreeRoots[fRootIx] = cn; 143 144 // 145 // Replace all references to UnicodeSets with the tree for the equivalent 146 // expression. 147 // 148 fRB.fTreeRoots[fRootIx].flattenSets(); 149 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) { 150 System.out.println("Parse tree after flattening Unicode Set references."); 151 fRB.fTreeRoots[fRootIx].printTree(true); 152 } 153 154 155 // 156 // calculate the functions nullable, firstpos, lastpos and followpos on 157 // nodes in the parse tree. 158 // See the alogrithm description in Aho. 159 // Understanding how this works by looking at the code alone will be 160 // nearly impossible. 161 // 162 calcNullable(fRB.fTreeRoots[fRootIx]); 163 calcFirstPos(fRB.fTreeRoots[fRootIx]); 164 calcLastPos(fRB.fTreeRoots[fRootIx]); 165 calcFollowPos(fRB.fTreeRoots[fRootIx]); 166 if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) { 167 System.out.print("\n"); 168 printPosSets(fRB.fTreeRoots[fRootIx]); 169 } 170 171 // 172 // For "chained" rules, modify the followPos sets 173 // 174 if (fRB.fChainRules) { 175 calcChainedFollowPos(fRB.fTreeRoots[fRootIx]); 176 } 177 178 // 179 // BOF (start of input) test fixup. 180 // 181 if (fRB.fSetBuilder.sawBOF()) { 182 bofFixup(); 183 } 184 185 // 186 // Build the DFA state transition tables. 187 // 188 buildStateTable(); 189 flagAcceptingStates(); 190 flagLookAheadStates(); 191 flagTaggedStates(); 192 193 // 194 // Update the global table of rule status {tag} values 195 // The rule builder has a global vector of status values that are common 196 // for all tables. Merge the ones from this table into the global set. 197 // 198 mergeRuleStatusVals(); 199 } 200 201 202 203 //----------------------------------------------------------------------------- 204 // 205 // calcNullable. Impossible to explain succinctly. See Aho, section 3.9 206 // 207 //----------------------------------------------------------------------------- calcNullable(RBBINode n)208 void calcNullable(RBBINode n) { 209 if (n == null) { 210 return; 211 } 212 if (n.fType == RBBINode.setRef || 213 n.fType == RBBINode.endMark ) { 214 // These are non-empty leaf node types. 215 n.fNullable = false; 216 return; 217 } 218 219 if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) { 220 // Lookahead marker node. It's a leaf, so no recursion on children. 221 // It's nullable because it does not match any literal text from the input stream. 222 n.fNullable = true; 223 return; 224 } 225 226 227 // The node is not a leaf. 228 // Calculate nullable on its children. 229 calcNullable(n.fLeftChild); 230 calcNullable(n.fRightChild); 231 232 // Apply functions from table 3.40 in Aho 233 if (n.fType == RBBINode.opOr) { 234 n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable; 235 } 236 else if (n.fType == RBBINode.opCat) { 237 n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable; 238 } 239 else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) { 240 n.fNullable = true; 241 } 242 else { 243 n.fNullable = false; 244 } 245 } 246 247 248 249 250 //----------------------------------------------------------------------------- 251 // 252 // calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9 253 // 254 //----------------------------------------------------------------------------- calcFirstPos(RBBINode n)255 void calcFirstPos(RBBINode n) { 256 if (n == null) { 257 return; 258 } 259 if (n.fType == RBBINode.leafChar || 260 n.fType == RBBINode.endMark || 261 n.fType == RBBINode.lookAhead || 262 n.fType == RBBINode.tag) { 263 // These are non-empty leaf node types. 264 n.fFirstPosSet.add(n); 265 return; 266 } 267 268 // The node is not a leaf. 269 // Calculate firstPos on its children. 270 calcFirstPos(n.fLeftChild); 271 calcFirstPos(n.fRightChild); 272 273 // Apply functions from table 3.40 in Aho 274 if (n.fType == RBBINode.opOr) { 275 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 276 n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet); 277 } 278 else if (n.fType == RBBINode.opCat) { 279 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 280 if (n.fLeftChild.fNullable) { 281 n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet); 282 } 283 } 284 else if (n.fType == RBBINode.opStar || 285 n.fType == RBBINode.opQuestion || 286 n.fType == RBBINode.opPlus) { 287 n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); 288 } 289 } 290 291 292 293 //----------------------------------------------------------------------------- 294 // 295 // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 296 // 297 //----------------------------------------------------------------------------- calcLastPos(RBBINode n)298 void calcLastPos(RBBINode n) { 299 if (n == null) { 300 return; 301 } 302 if (n.fType == RBBINode.leafChar || 303 n.fType == RBBINode.endMark || 304 n.fType == RBBINode.lookAhead || 305 n.fType == RBBINode.tag) { 306 // These are non-empty leaf node types. 307 n.fLastPosSet.add(n); 308 return; 309 } 310 311 // The node is not a leaf. 312 // Calculate lastPos on its children. 313 calcLastPos(n.fLeftChild); 314 calcLastPos(n.fRightChild); 315 316 // Apply functions from table 3.40 in Aho 317 if (n.fType == RBBINode.opOr) { 318 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 319 n.fLastPosSet.addAll(n.fRightChild.fLastPosSet); 320 } 321 else if (n.fType == RBBINode.opCat) { 322 n.fLastPosSet.addAll(n.fRightChild.fLastPosSet); 323 if (n.fRightChild.fNullable) { 324 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 325 } 326 } 327 else if (n.fType == RBBINode.opStar || 328 n.fType == RBBINode.opQuestion || 329 n.fType == RBBINode.opPlus) { 330 n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); 331 } 332 } 333 334 335 336 //----------------------------------------------------------------------------- 337 // 338 // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 339 // 340 //----------------------------------------------------------------------------- calcFollowPos(RBBINode n)341 void calcFollowPos(RBBINode n) { 342 if (n == null || 343 n.fType == RBBINode.leafChar || 344 n.fType == RBBINode.endMark) { 345 return; 346 } 347 348 calcFollowPos(n.fLeftChild); 349 calcFollowPos(n.fRightChild); 350 351 // Aho rule #1 352 if (n.fType == RBBINode.opCat) { 353 for (RBBINode i /* is 'i' in Aho's description */ : n.fLeftChild.fLastPosSet) { 354 i.fFollowPos.addAll(n.fRightChild.fFirstPosSet); 355 } 356 } 357 358 // Aho rule #2 359 if (n.fType == RBBINode.opStar || 360 n.fType == RBBINode.opPlus) { 361 for (RBBINode i /* again, n and i are the names from Aho's description */ : n.fLastPosSet) { 362 i.fFollowPos.addAll(n.fFirstPosSet); 363 } 364 } 365 } 366 367 //----------------------------------------------------------------------------- 368 // 369 // addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged 370 // as roots of a rule to a destination vector. 371 // 372 //----------------------------------------------------------------------------- addRuleRootNodes(List<RBBINode> dest, RBBINode node)373 void addRuleRootNodes(List<RBBINode> dest, RBBINode node) { 374 if (node == null) { 375 return; 376 } 377 if (node.fRuleRoot) { 378 dest.add(node); 379 // Note: rules cannot nest. If we found a rule start node, 380 // no child node can also be a start node. 381 return; 382 } 383 addRuleRootNodes(dest, node.fLeftChild); 384 addRuleRootNodes(dest, node.fRightChild); 385 } 386 387 //----------------------------------------------------------------------------- 388 // 389 // calcChainedFollowPos. Modify the previously calculated followPos sets 390 // to implement rule chaining. NOT described by Aho 391 // 392 //----------------------------------------------------------------------------- calcChainedFollowPos(RBBINode tree)393 void calcChainedFollowPos(RBBINode tree) { 394 395 List<RBBINode> endMarkerNodes = new ArrayList<RBBINode>(); 396 List<RBBINode> leafNodes = new ArrayList<RBBINode>(); 397 398 // get a list of all endmarker nodes. 399 tree.findNodes(endMarkerNodes, RBBINode.endMark); 400 401 // get a list all leaf nodes 402 tree.findNodes(leafNodes, RBBINode.leafChar); 403 404 // Collect all leaf nodes that can start matches for rules 405 // with inbound chaining enabled, which is the union of the 406 // firstPosition sets from each of the rule root nodes. 407 408 List<RBBINode> ruleRootNodes = new ArrayList<RBBINode>(); 409 addRuleRootNodes(ruleRootNodes, tree); 410 411 Set<RBBINode> matchStartNodes = new HashSet<RBBINode>(); 412 for (RBBINode node: ruleRootNodes) { 413 if (node.fChainIn) { 414 matchStartNodes.addAll(node.fFirstPosSet); 415 } 416 } 417 418 // Iterate over all leaf nodes, 419 // 420 for (RBBINode tNode : leafNodes) { 421 RBBINode endNode = null; 422 423 // Identify leaf nodes that correspond to overall rule match positions. 424 // These include an endMarkerNode in their followPos sets. 425 for (RBBINode endMarkerNode : endMarkerNodes) { 426 if (tNode.fFollowPos.contains(endMarkerNode)) { 427 endNode = tNode; 428 break; 429 } 430 } 431 if (endNode == null) { 432 // node wasn't an end node. Try again with the next. 433 continue; 434 } 435 436 // We've got a node that can end a match. 437 438 // Line Break Specific hack: If this node's val correspond to the $CM char class, 439 // don't chain from it. 440 // TODO: Add rule syntax for this behavior, get specifics out of here and 441 // into the rule file. 442 if (fRB.fLBCMNoChain) { 443 int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal); 444 if (c != -1) { 445 // c == -1 occurs with sets containing only the {eof} marker string. 446 int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK); 447 if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) { 448 continue; 449 } 450 } 451 } 452 453 454 // Now iterate over the nodes that can start a match, looking for ones 455 // with the same char class as our ending node. 456 for (RBBINode startNode : matchStartNodes) { 457 if (startNode.fType != RBBINode.leafChar) { 458 continue; 459 } 460 461 if (endNode.fVal == startNode.fVal) { 462 // The end val (character class) of one possible match is the 463 // same as the start of another. 464 465 // Add all nodes from the followPos of the start node to the 466 // followPos set of the end node, which will have the effect of 467 // letting matches transition from a match state at endNode 468 // to the second char of a match starting with startNode. 469 endNode.fFollowPos.addAll(startNode.fFollowPos); 470 } 471 } 472 } 473 } 474 475 476 //----------------------------------------------------------------------------- 477 // 478 // bofFixup. Fixup for state tables that include {bof} beginning of input testing. 479 // Do an swizzle similar to chaining, modifying the followPos set of 480 // the bofNode to include the followPos nodes from other {bot} nodes 481 // scattered through the tree. 482 // 483 // This function has much in common with calcChainedFollowPos(). 484 // 485 //----------------------------------------------------------------------------- bofFixup()486 void bofFixup() { 487 // 488 // The parse tree looks like this ... 489 // fTree root --. <cat> 490 // / \ 491 // <cat> <#end node> 492 // / \ 493 // <bofNode> rest 494 // of tree 495 // 496 // We will be adding things to the followPos set of the <bofNode> 497 // 498 RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild; 499 Assert.assrt(bofNode.fType == RBBINode.leafChar); 500 Assert.assrt(bofNode.fVal == 2); 501 502 // Get all nodes that can be the start a match of the user-written rules 503 // (excluding the fake bofNode) 504 // We want the nodes that can start a match in the 505 // part labeled "rest of tree" 506 // 507 Set<RBBINode> matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet; 508 for (RBBINode startNode : matchStartNodes) { 509 if (startNode.fType != RBBINode.leafChar) { 510 continue; 511 } 512 513 if (startNode.fVal == bofNode.fVal) { 514 // We found a leaf node corresponding to a {bof} that was 515 // explicitly written into a rule. 516 // Add everything from the followPos set of this node to the 517 // followPos set of the fake bofNode at the start of the tree. 518 // 519 bofNode.fFollowPos.addAll(startNode.fFollowPos); 520 } 521 } 522 } 523 524 //----------------------------------------------------------------------------- 525 // 526 // buildStateTable() Determine the set of runtime DFA states and the 527 // transition tables for these states, by the algorithm 528 // of fig. 3.44 in Aho. 529 // 530 // Most of the comments are quotes of Aho's psuedo-code. 531 // 532 //----------------------------------------------------------------------------- buildStateTable()533 void buildStateTable() { 534 // 535 // Add a dummy state 0 - the stop state. Not from Aho. 536 int lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1; 537 RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol); 538 fDStates.add(failState); 539 540 // initially, the only unmarked state in Dstates is firstpos(root), 541 // where toot is the root of the syntax tree for (r)#; 542 RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol); 543 initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet); 544 fDStates.add(initialState); 545 546 // while there is an unmarked state T in Dstates do begin 547 for (;;) { 548 RBBIStateDescriptor T = null; 549 int tx; 550 for (tx=1; tx<fDStates.size(); tx++) { 551 RBBIStateDescriptor temp = fDStates.get(tx); 552 if (temp.fMarked == false) { 553 T = temp; 554 break; 555 } 556 } 557 if (T == null) { 558 break; 559 } 560 561 // mark T; 562 T.fMarked = true; 563 564 // for each input symbol a do begin 565 int a; 566 for (a = 1; a<=lastInputSymbol; a++) { 567 // let U be the set of positions that are in followpos(p) 568 // for some position p in T 569 // such that the symbol at position p is a; 570 Set<RBBINode> U = null; 571 for (RBBINode p : T.fPositions) { 572 if ((p.fType == RBBINode.leafChar) && (p.fVal == a)) { 573 if (U == null) { 574 U = new HashSet<RBBINode>(); 575 } 576 U.addAll(p.fFollowPos); 577 } 578 } 579 580 // if U is not empty and not in DStates then 581 int ux = 0; 582 boolean UinDstates = false; 583 if (U != null) { 584 Assert.assrt(U.size() > 0); 585 int ix; 586 for (ix=0; ix<fDStates.size(); ix++) { 587 RBBIStateDescriptor temp2; 588 temp2 = fDStates.get(ix); 589 if (U.equals(temp2.fPositions)) { 590 U = temp2.fPositions; 591 ux = ix; 592 UinDstates = true; 593 break; 594 } 595 } 596 597 // Add U as an unmarked state to Dstates 598 if (!UinDstates) 599 { 600 RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol); 601 newState.fPositions = U; 602 fDStates.add(newState); 603 ux = fDStates.size()-1; 604 } 605 606 // Dtran[T, a] := U; 607 T.fDtran[a] = ux; 608 } 609 } 610 } 611 } 612 613 614 615 //----------------------------------------------------------------------------- 616 // 617 // flagAcceptingStates Identify accepting states. 618 // First get a list of all of the end marker nodes. 619 // Then, for each state s, 620 // if s contains one of the end marker nodes in its list of tree positions then 621 // s is an accepting state. 622 // 623 //----------------------------------------------------------------------------- flagAcceptingStates()624 void flagAcceptingStates() { 625 List<RBBINode> endMarkerNodes = new ArrayList<RBBINode>(); 626 RBBINode endMarker; 627 int i; 628 int n; 629 630 fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark); 631 632 for (i=0; i<endMarkerNodes.size(); i++) { 633 endMarker = endMarkerNodes.get(i); 634 for (n=0; n<fDStates.size(); n++) { 635 RBBIStateDescriptor sd = fDStates.get(n); 636 //if (sd.fPositions.indexOf(endMarker) >= 0) { 637 if (sd.fPositions.contains(endMarker)) { 638 // Any non-zero value for fAccepting means this is an accepting node. 639 // The value is what will be returned to the user as the break status. 640 // If no other value was specified, force it to -1. 641 642 if (sd.fAccepting==0) { 643 // State hasn't been marked as accepting yet. Do it now. 644 sd.fAccepting = endMarker.fVal; 645 if (sd.fAccepting == 0) { 646 sd.fAccepting = -1; 647 } 648 } 649 if (sd.fAccepting==-1 && endMarker.fVal != 0) { 650 // Both lookahead and non-lookahead accepting for this state. 651 // Favor the look-ahead. Expedient for line break. 652 // TODO: need a more elegant resolution for conflicting rules. 653 sd.fAccepting = endMarker.fVal; 654 } 655 // implicit else: 656 // if sd.fAccepting already had a value other than 0 or -1, leave it be. 657 658 // If the end marker node is from a look-ahead rule, set 659 // the fLookAhead field for this state also. 660 if (endMarker.fLookAheadEnd) { 661 // TODO: don't change value if already set? 662 // TODO: allow for more than one active look-ahead rule in engine. 663 // Make value here an index to a side array in engine? 664 sd.fLookAhead = sd.fAccepting; 665 } 666 } 667 } 668 } 669 } 670 671 672 //----------------------------------------------------------------------------- 673 // 674 // flagLookAheadStates Very similar to flagAcceptingStates, above. 675 // 676 //----------------------------------------------------------------------------- flagLookAheadStates()677 void flagLookAheadStates() { 678 List<RBBINode> lookAheadNodes = new ArrayList<RBBINode>(); 679 RBBINode lookAheadNode; 680 int i; 681 int n; 682 683 fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead); 684 for (i=0; i<lookAheadNodes.size(); i++) { 685 lookAheadNode = lookAheadNodes.get(i); 686 687 for (n=0; n<fDStates.size(); n++) { 688 RBBIStateDescriptor sd = fDStates.get(n); 689 if (sd.fPositions.contains(lookAheadNode)) { 690 sd.fLookAhead = lookAheadNode.fVal; 691 } 692 } 693 } 694 } 695 696 697 698 699 //----------------------------------------------------------------------------- 700 // 701 // flagTaggedStates 702 // 703 //----------------------------------------------------------------------------- flagTaggedStates()704 void flagTaggedStates() { 705 List<RBBINode> tagNodes = new ArrayList<RBBINode>(); 706 RBBINode tagNode; 707 int i; 708 int n; 709 710 fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag); 711 for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em) 712 tagNode = tagNodes.get(i); 713 714 for (n=0; n<fDStates.size(); n++) { // For each state s (row in the state table) 715 RBBIStateDescriptor sd = fDStates.get(n); 716 if (sd.fPositions.contains(tagNode)) { // if s include the tag node t 717 sd.fTagVals.add(Integer.valueOf(tagNode.fVal)); 718 } 719 } 720 } 721 } 722 723 724 725 //----------------------------------------------------------------------------- 726 // 727 // mergeRuleStatusVals 728 // 729 // Allocate positions in the global array of rule status {tag} values 730 // 731 // The RBBI runtime uses an array of {sets of status values} that can 732 // be returned for boundaries. Each accepting state that has non-zero 733 // status includes an index into this array. The format of the array 734 // is 735 // Num of status values in group 1 736 // status val 737 // status val 738 // ... 739 // Num of status vals in group 2 740 // status val 741 // status val 742 // ... 743 // etc. 744 // 745 // 746 //----------------------------------------------------------------------------- 747 mergeRuleStatusVals()748 void mergeRuleStatusVals() { 749 // 750 // The basic outline of what happens here is this... 751 // 752 // for each state in this state table 753 // if the status tag list for this state is in the global statuses list 754 // record where and 755 // continue with the next state 756 // else 757 // add the tag list for this state to the global list. 758 // 759 int n; 760 761 // Pre-load a single tag of {0} into the table. 762 // We will need this as a default, for rule sets with no explicit tagging, 763 // or with explicit tagging of {0}. 764 if (fRB.fRuleStatusVals.size() == 0) { 765 fRB.fRuleStatusVals.add(Integer.valueOf(1)); // Num of statuses in group 766 fRB.fRuleStatusVals.add(Integer.valueOf(0)); // and our single status of zero 767 768 SortedSet<Integer> s0 = new TreeSet<Integer>(); 769 Integer izero = Integer.valueOf(0); 770 fRB.fStatusSets.put(s0, izero); 771 SortedSet<Integer> s1 = new TreeSet<Integer>(); 772 s1.add(izero); 773 fRB.fStatusSets.put(s0, izero); 774 } 775 776 // For each state, check whether the state's status tag values are 777 // already entered into the status values array, and add them if not. 778 for (n=0; n<fDStates.size(); n++) { 779 RBBIStateDescriptor sd = fDStates.get(n); 780 Set<Integer> statusVals = sd.fTagVals; 781 Integer arrayIndexI = fRB.fStatusSets.get(statusVals); 782 if (arrayIndexI == null) { 783 // This is the first encounter of this set of status values. 784 // Add them to the statusSets map, This map associates 785 // the set of status values with an index in the runtime status 786 // values array. 787 arrayIndexI = Integer.valueOf(fRB.fRuleStatusVals.size()); 788 fRB.fStatusSets.put(statusVals, arrayIndexI); 789 790 // Add the new set of status values to the vector of values that 791 // will eventually become the array used by the runtime engine. 792 fRB.fRuleStatusVals.add(Integer.valueOf(statusVals.size())); 793 fRB.fRuleStatusVals.addAll(statusVals); 794 } 795 796 // Save the runtime array index back into the state descriptor. 797 sd.fTagsIdx = arrayIndexI.intValue(); 798 } 799 } 800 801 802 803 804 805 806 807 //----------------------------------------------------------------------------- 808 // 809 // printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos 810 // for each node in the tree. 811 // 812 //----------------------------------------------------------------------------- 813 printPosSets(RBBINode n)814 void printPosSets(RBBINode n) { 815 if (n==null) { 816 return; 817 } 818 RBBINode.printNode(n); 819 System.out.print(" Nullable: " + n.fNullable); 820 821 System.out.print(" firstpos: "); 822 printSet(n.fFirstPosSet); 823 824 System.out.print(" lastpos: "); 825 printSet(n.fLastPosSet); 826 827 System.out.print(" followpos: "); 828 printSet(n.fFollowPos); 829 830 printPosSets(n.fLeftChild); 831 printPosSets(n.fRightChild); 832 } 833 834 835 836 /** 837 * Find duplicate (redundant) character classes. Begin looking with categories.first. 838 * Duplicates, if found are returned in the categories parameter. 839 * This is an iterator-like function, used to identify character classes 840 * (state table columns) that can be eliminated. 841 * @param categories in/out parameter, specifies where to start looking for duplicates, 842 * and returns the first pair of duplicates found, if any. 843 * @return true if duplicate char classes were found, false otherwise. 844 * @internal 845 */ findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories)846 boolean findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories) { 847 int numStates = fDStates.size(); 848 int numCols = fRB.fSetBuilder.getNumCharCategories(); 849 850 int table_base = 0; 851 int table_dupl = 0; 852 for (; categories.first < numCols-1; ++categories.first) { 853 for (categories.second=categories.first+1; categories.second < numCols; ++categories.second) { 854 for (int state=0; state<numStates; state++) { 855 RBBIStateDescriptor sd = fDStates.get(state); 856 table_base = sd.fDtran[categories.first]; 857 table_dupl = sd.fDtran[categories.second]; 858 if (table_base != table_dupl) { 859 break; 860 } 861 } 862 if (table_base == table_dupl) { 863 return true; 864 } 865 } 866 } 867 return false; 868 } 869 870 /** 871 * Remove a column from the state table. Used when two character categories 872 * have been found equivalent, and merged together, to eliminate the unneeded table column. 873 */ removeColumn(int column)874 void removeColumn(int column) { 875 int numStates = fDStates.size(); 876 for (int state=0; state<numStates; state++) { 877 RBBIStateDescriptor sd = fDStates.get(state); 878 assert(column < sd.fDtran.length); 879 int[] newArray = Arrays.copyOf(sd.fDtran, sd.fDtran.length - 1); 880 System.arraycopy(sd.fDtran, column+1, newArray, column, newArray.length - column); 881 sd.fDtran = newArray; 882 } 883 } 884 885 886 /** 887 * Find duplicate (redundant) states, beginning at the specified pair, 888 * within this state table. This is an iterator-like function, used to 889 * identify states (state table rows) that can be eliminated. 890 * @param states in/out parameter, specifies where to start looking for duplicates, 891 * and returns the first pair of duplicates found, if any. 892 * @return true if duplicate states were found, false otherwise. 893 * @internal 894 */ 895 boolean findDuplicateState(RBBIRuleBuilder.IntPair states) { 896 int numStates = fDStates.size(); 897 int numCols = fRB.fSetBuilder.getNumCharCategories(); 898 899 for (; states.first<numStates-1; ++states.first) { 900 RBBIStateDescriptor firstSD = fDStates.get(states.first); 901 for (states.second=states.first+1; states.second<numStates; ++states.second) { 902 RBBIStateDescriptor duplSD = fDStates.get(states.second); 903 if (firstSD.fAccepting != duplSD.fAccepting || 904 firstSD.fLookAhead != duplSD.fLookAhead || 905 firstSD.fTagsIdx != duplSD.fTagsIdx) { 906 continue; 907 } 908 boolean rowsMatch = true; 909 for (int col=0; col < numCols; ++col) { 910 int firstVal = firstSD.fDtran[col]; 911 int duplVal = duplSD.fDtran[col]; 912 if (!((firstVal == duplVal) || 913 ((firstVal == states.first || firstVal == states.second) && 914 (duplVal == states.first || duplVal == states.second)))) { 915 rowsMatch = false; 916 break; 917 } 918 } 919 if (rowsMatch) { 920 return true; 921 } 922 } 923 } 924 return false; 925 } 926 927 /** 928 * Find the next duplicate state in the safe reverse table. An iterator function. 929 * @param states in/out parameter, specifies where to start looking for duplicates, 930 * and returns the first pair of duplicates found, if any. 931 * @return true if duplicate states were found, false otherwise. 932 * @internal 933 */ 934 boolean findDuplicateSafeState(RBBIRuleBuilder.IntPair states) { 935 int numStates = fSafeTable.size(); 936 937 for (; states.first<numStates-1; ++states.first) { 938 short[] firstRow = fSafeTable.get(states.first); 939 for (states.second=states.first+1; states.second<numStates; ++states.second) { 940 short[] duplRow = fSafeTable.get(states.second); 941 boolean rowsMatch = true; 942 int numCols = firstRow.length; 943 for (int col=0; col < numCols; ++col) { 944 int firstVal = firstRow[col]; 945 int duplVal = duplRow[col]; 946 if (!((firstVal == duplVal) || 947 ((firstVal == states.first || firstVal == states.second) && 948 (duplVal == states.first || duplVal == states.second)))) { 949 rowsMatch = false; 950 break; 951 } 952 } 953 if (rowsMatch) { 954 return true; 955 } 956 } 957 } 958 return false; 959 } 960 961 /** 962 * Remove a duplicate state (row) from the state table. All references to the deleted (second) state 963 * are redirected to first state. 964 * @param duplStates The duplicate pair of states. 965 * @internal 966 */ 967 void removeState(IntPair duplStates) { 968 final int keepState = duplStates.first; 969 final int duplState = duplStates.second; 970 assert(keepState < duplState); 971 assert(duplState < fDStates.size()); 972 973 fDStates.remove(duplState); 974 975 int numStates = fDStates.size(); 976 int numCols = fRB.fSetBuilder.getNumCharCategories(); 977 for (int state=0; state<numStates; ++state) { 978 RBBIStateDescriptor sd = fDStates.get(state); 979 for (int col=0; col<numCols; col++) { 980 int existingVal = sd.fDtran[col]; 981 int newVal = existingVal; 982 if (existingVal == duplState) { 983 newVal = keepState; 984 } else if (existingVal > duplState) { 985 newVal = existingVal - 1; 986 } 987 sd.fDtran[col] = newVal; 988 } 989 if (sd.fAccepting == duplState) { 990 sd.fAccepting = keepState; 991 } else if (sd.fAccepting > duplState) { 992 sd.fAccepting--; 993 } 994 if (sd.fLookAhead == duplState) { 995 sd.fLookAhead = keepState; 996 } else if (sd.fLookAhead > duplState) { 997 sd.fLookAhead--; 998 } 999 } 1000 } 1001 1002 /** 1003 * Remove a duplicate state from the safe table. 1004 * @param duplStates The duplicate pair of states. The first is kept, the second is removed. 1005 * All references to the second in the state table are retargeted 1006 * to the first. 1007 * @internal 1008 */ 1009 void removeSafeState(IntPair duplStates) { 1010 final int keepState = duplStates.first; 1011 final int duplState = duplStates.second; 1012 assert(keepState < duplState); 1013 assert(duplState < fSafeTable.size()); 1014 1015 fSafeTable.remove(duplState); 1016 int numStates = fSafeTable.size(); 1017 for (int state=0; state<numStates; ++state) { 1018 short[] row = fSafeTable.get(state); 1019 for (int col=0; col<row.length; col++) { 1020 int existingVal = row[col]; 1021 int newVal = existingVal; 1022 if (existingVal == duplState) { 1023 newVal = keepState; 1024 } else if (existingVal > duplState) { 1025 newVal = existingVal - 1; 1026 } 1027 row[col] = (short)newVal; 1028 } 1029 } 1030 } 1031 1032 1033 /** 1034 * Check for, and remove duplicate states (table rows). 1035 * @return the number of states removed. 1036 * @internal 1037 */ 1038 int removeDuplicateStates() { 1039 IntPair dupls = new IntPair(3, 0); 1040 int numStatesRemoved = 0; 1041 1042 while (findDuplicateState(dupls)) { 1043 // System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); 1044 removeState(dupls); 1045 ++numStatesRemoved; 1046 } 1047 return numStatesRemoved; 1048 } 1049 1050 1051 /** 1052 * Calculate the size in bytes of the serialized form of this state transition table, 1053 * which is identical to the ICU4C runtime form. 1054 * Refer to common/rbbidata.h from ICU4C for the declarations of the structures 1055 * being matched by this calculation. 1056 */ 1057 int getTableSize() { 1058 if (fRB.fTreeRoots[fRootIx] == null) { 1059 return 0; 1060 } 1061 int size = 16; // The header of 4 ints, with no rows to the table. 1062 int numRows = fDStates.size(); 1063 int numCols = fRB.fSetBuilder.getNumCharCategories(); 1064 int rowSize = 8 + 2*numCols; 1065 size += numRows * rowSize; 1066 size = (size + 7) & ~7; // round up to a multiple of 8 bytes 1067 return size; 1068 } 1069 1070 1071 1072 /** 1073 * Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table. 1074 * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C, 1075 * in common/rbbidata.h 1076 */ 1077 RBBIDataWrapper.RBBIStateTable exportTable() { 1078 int state; 1079 int col; 1080 1081 RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable(); 1082 if (fRB.fTreeRoots[fRootIx] == null) { 1083 return table; 1084 } 1085 1086 Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff && 1087 fDStates.size() < 0x7fff); 1088 table.fNumStates = fDStates.size(); 1089 1090 // Size of table size in shorts. 1091 // the "4" is the size of struct RBBIStateTableRow, the row header part only. 1092 int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories(); // Row Length in shorts. 1093 int tableSize = (getTableSize() - 16) / 2; // fTable length in shorts. 1094 table.fTable = new short[tableSize]; 1095 table.fRowLen = rowLen * 2; // Row length in bytes. 1096 1097 if (fRB.fLookAheadHardBreak) { 1098 table.fFlags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK; 1099 } 1100 if (fRB.fSetBuilder.sawBOF()) { 1101 table.fFlags |= RBBIDataWrapper.RBBI_BOF_REQUIRED; 1102 } 1103 1104 int numCharCategories = fRB.fSetBuilder.getNumCharCategories(); 1105 for (state=0; state<table.fNumStates; state++) { 1106 RBBIStateDescriptor sd = fDStates.get(state); 1107 int row = state*rowLen; 1108 Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767); 1109 Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767); 1110 table.fTable[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting; 1111 table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead; 1112 table.fTable[row + RBBIDataWrapper.TAGIDX] = (short)sd.fTagsIdx; 1113 for (col=0; col<numCharCategories; col++) { 1114 table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col]; 1115 } 1116 } 1117 return table; 1118 } 1119 1120 /** 1121 * Synthesize a safe state table from the main state table. 1122 */ 1123 void buildSafeReverseTable() { 1124 // Safe Reverse table construction is described in more detail in the corresponding 1125 // function in ICU4C, in source/common/rbbitblb.cpp. Not duplicated here because 1126 // it is too likely to get out of sync. 1127 1128 // Each safe pair is stored as two chars in the safePair stringBuilder. 1129 StringBuilder safePairs = new StringBuilder(); 1130 1131 int numCharClasses = fRB.fSetBuilder.getNumCharCategories(); 1132 int numStates = fDStates.size(); 1133 1134 for (int c1=0; c1<numCharClasses; ++c1) { 1135 for (int c2=0; c2 < numCharClasses; ++c2) { 1136 int wantedEndState = -1; 1137 int endState = 0; 1138 for (int startState = 1; startState < numStates; ++startState) { 1139 RBBIStateDescriptor startStateD = fDStates.get(startState); 1140 int s2 = startStateD.fDtran[c1]; 1141 RBBIStateDescriptor s2StateD = fDStates.get(s2); 1142 endState = s2StateD.fDtran[c2]; 1143 if (wantedEndState < 0) { 1144 wantedEndState = endState; 1145 } else { 1146 if (wantedEndState != endState) { 1147 break; 1148 } 1149 } 1150 } 1151 if (wantedEndState == endState) { 1152 safePairs.append((char)c1); 1153 safePairs.append((char)c2); 1154 // System.out.printf("(%d, %d) ", c1, c2); 1155 } 1156 } 1157 // System.out.printf("\n"); 1158 } 1159 1160 // Populate the initial safe table. 1161 // The table as a whole is a List<short[]> 1162 // Row 0 is the stop state. 1163 // Row 1 is the start sate. 1164 // Row 2 and beyond are other states, initially one per char class, but 1165 // after initial construction, many of the states will be combined, compacting the table.) 1166 // The String holds the nextState data only. The four leading fields of a row, fAccepting, 1167 // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building. 1168 1169 assert(fSafeTable == null); 1170 fSafeTable = new ArrayList<short[]>(); 1171 for (int row=0; row<numCharClasses + 2; ++row) { 1172 fSafeTable.add(new short[numCharClasses]); 1173 } 1174 1175 // From the start state, each input char class transitions to the state for that input. 1176 short[] startState = fSafeTable.get(1); 1177 for (int charClass=0; charClass < numCharClasses; ++charClass) { 1178 // Note: +2 to skip the start & stop state rows. 1179 startState[charClass] = (short)(charClass+2); 1180 } 1181 1182 // Initially make every other state table row look like the start state row 1183 // (except for the stop state, which remains all 0) 1184 for (int row=2; row<numCharClasses+2; ++row) { 1185 System.arraycopy(startState, 0, fSafeTable.get(row), 0, startState.length); 1186 } 1187 1188 // Run through the safe pairs, set the next state to zero when pair has been seen. 1189 // Zero being the stop state, meaning we found a safe point. 1190 for (int pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) { 1191 int c1 = safePairs.charAt(pairIdx); 1192 int c2 = safePairs.charAt(pairIdx + 1); 1193 1194 short[] rowState = fSafeTable.get(c2 + 2); 1195 rowState[c1] = 0; 1196 } 1197 1198 // Remove duplicate or redundant rows from the table. 1199 RBBIRuleBuilder.IntPair states = new RBBIRuleBuilder.IntPair(1, 0); 1200 while (findDuplicateSafeState(states)) { 1201 // System.out.printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second); 1202 removeSafeState(states); 1203 } 1204 } 1205 1206 1207 /** 1208 * Calculate the size of the runtime form of this safe state table. 1209 */ 1210 int getSafeTableSize() { 1211 if (fSafeTable == null) { 1212 return 0; 1213 } 1214 int size = 16; // The header of 4 ints, with no rows to the table. 1215 int numRows = fSafeTable.size(); 1216 int numCols = fSafeTable.get(0).length; 1217 int rowSize = 8 + 2*numCols; 1218 size += numRows * rowSize; 1219 // TODO: there are redundant round-up. Figure out best place, get rid of the rest. 1220 size = (size + 7) & ~7; // round up to a multiple of 8 bytes 1221 return size; 1222 } 1223 1224 1225 /** 1226 * Create a RBBIDataWrapper.RBBIStateTable for the safe reverse table. 1227 * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C, 1228 * in common/rbbidata.h 1229 */ 1230 RBBIDataWrapper.RBBIStateTable exportSafeTable() { 1231 RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable(); 1232 table.fNumStates = fSafeTable.size(); 1233 int numCharCategories = fSafeTable.get(0).length; 1234 1235 // Size of table size in shorts. 1236 // the "4" is the size of struct RBBIStateTableRow, the row header part only. 1237 int rowLen = 4 + numCharCategories; 1238 // TODO: tableSize is basically numStates * numCharCategories, 1239 // except for alignment padding. Clean up here, and in main exportTable(). 1240 int tableSize = (getSafeTableSize() - 16) / 2; // fTable length in shorts. 1241 table.fTable = new short[tableSize]; 1242 table.fRowLen = rowLen * 2; // Row length in bytes. 1243 1244 for (int state=0; state<table.fNumStates; state++) { 1245 short[] rowArray = fSafeTable.get(state); 1246 int row = state * rowLen; 1247 1248 for (int col=0; col<numCharCategories; col++) { 1249 table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = rowArray[col]; 1250 } 1251 } 1252 return table; 1253 } 1254 1255 1256 //----------------------------------------------------------------------------- 1257 // 1258 // printSet Debug function. Print the contents of a set of Nodes 1259 // 1260 //----------------------------------------------------------------------------- 1261 1262 void printSet(Collection<RBBINode> s) { 1263 for (RBBINode n : s) { 1264 RBBINode.printInt(n.fSerialNum, 8); 1265 } 1266 System.out.println(); 1267 } 1268 1269 1270 1271 //----------------------------------------------------------------------------- 1272 // 1273 // printStates Debug Function. Dump the fully constructed state transition table. 1274 // 1275 //----------------------------------------------------------------------------- 1276 1277 void printStates() { 1278 int c; // input "character" 1279 int n; // state number 1280 1281 System.out.print("state | i n p u t s y m b o l s \n"); 1282 System.out.print(" | Acc LA Tag"); 1283 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 1284 RBBINode.printInt(c, 3); 1285 } 1286 System.out.print("\n"); 1287 System.out.print(" |---------------"); 1288 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 1289 System.out.print("---"); 1290 } 1291 System.out.print("\n"); 1292 1293 for (n=0; n<fDStates.size(); n++) { 1294 RBBIStateDescriptor sd = fDStates.get(n); 1295 RBBINode.printInt(n, 5); 1296 System.out.print(" | "); 1297 1298 RBBINode.printInt(sd.fAccepting, 3); 1299 RBBINode.printInt(sd.fLookAhead, 4); 1300 RBBINode.printInt(sd.fTagsIdx, 6); 1301 System.out.print(" "); 1302 for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { 1303 RBBINode.printInt(sd.fDtran[c], 3); 1304 } 1305 System.out.print("\n"); 1306 } 1307 System.out.print("\n\n"); 1308 } 1309 1310 1311 /** 1312 * Debug Function. Dump the fully constructed safe reverse table. 1313 */ 1314 void printReverseTable() { 1315 int c; // input "character" 1316 1317 System.out.printf(" Safe Reverse Table \n"); 1318 if (fSafeTable == null) { 1319 System.out.printf(" --- nullptr ---\n"); 1320 return; 1321 } 1322 int numCharCategories = fSafeTable.get(0).length; 1323 System.out.printf("state | i n p u t s y m b o l s \n"); 1324 System.out.printf(" | Acc LA Tag"); 1325 for (c=0; c< numCharCategories; c++) { 1326 System.out.printf(" %2d", c); 1327 } 1328 System.out.printf("\n"); 1329 System.out.printf(" |---------------"); 1330 for (c=0; c<numCharCategories; c++) { 1331 System.out.printf("---"); 1332 } 1333 System.out.printf("\n"); 1334 1335 for (int n=0; n<fSafeTable.size(); n++) { 1336 short rowArray[] = fSafeTable.get(n); 1337 System.out.printf(" %3d | " , n); 1338 System.out.printf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags 1339 for (c=0; c<numCharCategories; c++) { 1340 System.out.printf(" %2d", rowArray[c]); 1341 } 1342 System.out.printf("\n"); 1343 } 1344 System.out.printf("\n\n"); 1345 } 1346 1347 1348 1349 1350 1351 //----------------------------------------------------------------------------- 1352 // 1353 // printRuleStatusTable Debug Function. Dump the common rule status table 1354 // 1355 //----------------------------------------------------------------------------- 1356 1357 void printRuleStatusTable() { 1358 int thisRecord = 0; 1359 int nextRecord = 0; 1360 int i; 1361 List<Integer> tbl = fRB.fRuleStatusVals; 1362 1363 System.out.print("index | tags \n"); 1364 System.out.print("-------------------\n"); 1365 1366 while (nextRecord < tbl.size()) { 1367 thisRecord = nextRecord; 1368 nextRecord = thisRecord + tbl.get(thisRecord).intValue() + 1; 1369 RBBINode.printInt(thisRecord, 7); 1370 for (i=thisRecord+1; i<nextRecord; i++) { 1371 int val = tbl.get(i).intValue(); 1372 RBBINode.printInt(val, 7); 1373 } 1374 System.out.print("\n"); 1375 } 1376 System.out.print("\n\n"); 1377 } 1378 1379 1380 1381 } 1382