1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4 **********************************************************************
5 *   Copyright (c) 2002-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 
10 package com.ibm.icu.text;
11 
12 import java.util.ArrayList;
13 import java.util.Arrays;
14 import java.util.Collection;
15 import java.util.HashSet;
16 import java.util.List;
17 import java.util.Set;
18 import java.util.SortedSet;
19 import java.util.TreeSet;
20 
21 import com.ibm.icu.impl.Assert;
22 import com.ibm.icu.impl.RBBIDataWrapper;
23 import com.ibm.icu.lang.UCharacter;
24 import com.ibm.icu.lang.UProperty;
25 import com.ibm.icu.text.RBBIRuleBuilder.IntPair;
26 
27 /**
28  *  This class is part of the RBBI rule compiler.
29  *  It builds the state transition table used by the RBBI runtime
30  *  from the expression syntax tree generated by the rule scanner.
31  *
32  *  This class is part of the RBBI implementation only.
33  *  There is no user-visible public API here.
34  */
35 class RBBITableBuilder {
36 
37     //
38     //  RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors,
39     //                        one for each state.
40     static class RBBIStateDescriptor {
41         boolean      fMarked;
42         int          fAccepting;
43         int          fLookAhead;
44         SortedSet<Integer> fTagVals;
45         int          fTagsIdx;
46         Set<RBBINode> fPositions;                 // Set of parse tree positions associated
47                                                   //   with this state.  Unordered (it's a set).
48                                                   //   UVector contents are RBBINode *
49 
50         int[]        fDtran;                      // Transitions out of this state.
51                                                    //   indexed by input character
52                                                    //   contents is int index of dest state
53                                                    //   in RBBITableBuilder.fDStates
54 
RBBIStateDescriptor(int maxInputSymbol)55         RBBIStateDescriptor(int maxInputSymbol) {
56             fTagVals = new TreeSet<Integer>();
57             fPositions = new HashSet<RBBINode>();
58             fDtran = new int[maxInputSymbol+1];    // fDtran needs to be pre-sized.
59                                                     //   It is indexed by input symbols, and will
60                                                     //   hold  the next state number for each
61                                                     //   symbol.
62         }
63     }
64 
65 
66     private  RBBIRuleBuilder  fRB;
67 
68     /** The array index into RBBIRuleBuilder.fTreeRoots for the parse tree to operate on. */
69     private  int  fRootIx;
70 
71     /** D states (Aho's terminology). Index is state number. */
72     private  List<RBBIStateDescriptor> fDStates;
73 
74     /** Synthesized safe table, a List of row arrays.  */
75     private List<short[]>    fSafeTable;
76 
77     //-----------------------------------------------------------------------------
78     //
79     //  Constructor    for RBBITableBuilder.
80     //
81     //                 rootNode is an index into the array of root nodes that is held by
82     //                          the overall RBBIRuleBuilder.
83     //-----------------------------------------------------------------------------
RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx)84     RBBITableBuilder(RBBIRuleBuilder rb,  int rootNodeIx)  {
85            fRootIx     = rootNodeIx;
86            fRB         = rb;
87            fDStates    = new ArrayList<RBBIStateDescriptor>();
88         }
89 
90 
91 
92 
93        //-----------------------------------------------------------------------------
94        //
95        //   RBBITableBuilder::buildForwardTable  -  This is the main function for building
96        //                          the DFA state transition table from the RBBI rules parse tree.
97        //
98        //-----------------------------------------------------------------------------
buildForwardTable()99        void  buildForwardTable() {
100            // If there were no rules, just return.  This situation can easily arise
101            //   for the reverse rules.
102            if (fRB.fTreeRoots[fRootIx]==null) {
103                return;
104            }
105 
106            //
107            // Walk through the tree, replacing any references to $variables with a copy of the
108            //   parse tree for the substition expression.
109            //
110            fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables();
111            if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) {
112                System.out.println("Parse tree after flattening variable references.");
113                fRB.fTreeRoots[fRootIx].printTree(true);
114            }
115 
116            //
117            // If the rules contained any references to {bof}
118            //   add a {bof} <cat> <former root of tree> to the
119            //   tree.  Means that all matches must start out with the
120            //   {bof} fake character.
121            //
122            if (fRB.fSetBuilder.sawBOF()) {
123                RBBINode bofTop    = new RBBINode(RBBINode.opCat);
124                RBBINode bofLeaf   = new RBBINode(RBBINode.leafChar);
125                bofTop.fLeftChild  = bofLeaf;
126                bofTop.fRightChild = fRB.fTreeRoots[fRootIx];
127                bofLeaf.fParent    = bofTop;
128                bofLeaf.fVal       = 2;      // Reserved value for {bof}.
129                fRB.fTreeRoots[fRootIx] = bofTop;
130            }
131 
132            //
133            // Add a unique right-end marker to the expression.
134            //   Appears as a cat-node, left child being the original tree,
135            //   right child being the end marker.
136            //
137            RBBINode cn = new RBBINode(RBBINode.opCat);
138            cn.fLeftChild = fRB.fTreeRoots[fRootIx];
139            fRB.fTreeRoots[fRootIx].fParent = cn;
140            cn.fRightChild = new RBBINode(RBBINode.endMark);
141            cn.fRightChild.fParent = cn;
142            fRB.fTreeRoots[fRootIx] = cn;
143 
144            //
145            //  Replace all references to UnicodeSets with the tree for the equivalent
146            //      expression.
147            //
148            fRB.fTreeRoots[fRootIx].flattenSets();
149            if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) {
150                System.out.println("Parse tree after flattening Unicode Set references.");
151                fRB.fTreeRoots[fRootIx].printTree(true);
152            }
153 
154 
155            //
156            // calculate the functions nullable, firstpos, lastpos and followpos on
157            // nodes in the parse tree.
158            //    See the alogrithm description in Aho.
159            //    Understanding how this works by looking at the code alone will be
160            //       nearly impossible.
161            //
162            calcNullable(fRB.fTreeRoots[fRootIx]);
163            calcFirstPos(fRB.fTreeRoots[fRootIx]);
164            calcLastPos(fRB.fTreeRoots[fRootIx]);
165            calcFollowPos(fRB.fTreeRoots[fRootIx]);
166            if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) {
167                System.out.print("\n");
168                printPosSets(fRB.fTreeRoots[fRootIx]);
169            }
170 
171            //
172            //  For "chained" rules, modify the followPos sets
173            //
174            if (fRB.fChainRules) {
175                calcChainedFollowPos(fRB.fTreeRoots[fRootIx]);
176            }
177 
178            //
179            //  BOF (start of input) test fixup.
180            //
181            if (fRB.fSetBuilder.sawBOF()) {
182                bofFixup();
183            }
184 
185            //
186            // Build the DFA state transition tables.
187            //
188            buildStateTable();
189            flagAcceptingStates();
190            flagLookAheadStates();
191            flagTaggedStates();
192 
193            //
194            // Update the global table of rule status {tag} values
195            // The rule builder has a global vector of status values that are common
196            //    for all tables.  Merge the ones from this table into the global set.
197            //
198            mergeRuleStatusVals();
199        }
200 
201 
202 
203        //-----------------------------------------------------------------------------
204        //
205        //   calcNullable.    Impossible to explain succinctly.  See Aho, section 3.9
206        //
207        //-----------------------------------------------------------------------------
calcNullable(RBBINode n)208        void calcNullable(RBBINode n) {
209            if (n == null) {
210                return;
211            }
212            if (n.fType == RBBINode.setRef ||
213                n.fType == RBBINode.endMark ) {
214                // These are non-empty leaf node types.
215                n.fNullable = false;
216                return;
217            }
218 
219            if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) {
220                // Lookahead marker node.  It's a leaf, so no recursion on children.
221                // It's nullable because it does not match any literal text from the input stream.
222                n.fNullable = true;
223                return;
224            }
225 
226 
227            // The node is not a leaf.
228            //  Calculate nullable on its children.
229            calcNullable(n.fLeftChild);
230            calcNullable(n.fRightChild);
231 
232            // Apply functions from table 3.40 in Aho
233            if (n.fType == RBBINode.opOr) {
234                n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable;
235            }
236            else if (n.fType == RBBINode.opCat) {
237                n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable;
238            }
239            else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) {
240                n.fNullable = true;
241            }
242            else {
243                n.fNullable = false;
244            }
245        }
246 
247 
248 
249 
250        //-----------------------------------------------------------------------------
251        //
252        //   calcFirstPos.    Impossible to explain succinctly.  See Aho, section 3.9
253        //
254        //-----------------------------------------------------------------------------
calcFirstPos(RBBINode n)255        void calcFirstPos(RBBINode n) {
256            if (n == null) {
257                return;
258            }
259            if (n.fType == RBBINode.leafChar  ||
260                n.fType == RBBINode.endMark   ||
261                n.fType == RBBINode.lookAhead ||
262                n.fType == RBBINode.tag) {
263                // These are non-empty leaf node types.
264                n.fFirstPosSet.add(n);
265                return;
266            }
267 
268            // The node is not a leaf.
269            //  Calculate firstPos on its children.
270            calcFirstPos(n.fLeftChild);
271            calcFirstPos(n.fRightChild);
272 
273            // Apply functions from table 3.40 in Aho
274            if (n.fType == RBBINode.opOr) {
275                n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
276                n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
277            }
278            else if (n.fType == RBBINode.opCat) {
279                n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
280                if (n.fLeftChild.fNullable) {
281                    n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
282                }
283            }
284            else if (n.fType == RBBINode.opStar ||
285                     n.fType == RBBINode.opQuestion ||
286                     n.fType == RBBINode.opPlus) {
287                n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
288            }
289        }
290 
291 
292 
293        //-----------------------------------------------------------------------------
294        //
295        //   calcLastPos.    Impossible to explain succinctly.  See Aho, section 3.9
296        //
297        //-----------------------------------------------------------------------------
calcLastPos(RBBINode n)298        void calcLastPos(RBBINode n) {
299            if (n == null) {
300                return;
301            }
302            if (n.fType == RBBINode.leafChar  ||
303                n.fType == RBBINode.endMark   ||
304                n.fType == RBBINode.lookAhead ||
305                n.fType == RBBINode.tag) {
306                // These are non-empty leaf node types.
307                n.fLastPosSet.add(n);
308                return;
309            }
310 
311            // The node is not a leaf.
312            //  Calculate lastPos on its children.
313            calcLastPos(n.fLeftChild);
314            calcLastPos(n.fRightChild);
315 
316            // Apply functions from table 3.40 in Aho
317            if (n.fType == RBBINode.opOr) {
318                n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
319                n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
320            }
321            else if (n.fType == RBBINode.opCat) {
322                n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
323                if (n.fRightChild.fNullable) {
324                    n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
325                }
326            }
327            else if (n.fType == RBBINode.opStar     ||
328                     n.fType == RBBINode.opQuestion ||
329                     n.fType == RBBINode.opPlus) {
330                n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
331            }
332        }
333 
334 
335 
336        //-----------------------------------------------------------------------------
337        //
338        //   calcFollowPos.    Impossible to explain succinctly.  See Aho, section 3.9
339        //
340        //-----------------------------------------------------------------------------
calcFollowPos(RBBINode n)341        void calcFollowPos(RBBINode n) {
342            if (n == null ||
343                n.fType == RBBINode.leafChar ||
344                n.fType == RBBINode.endMark) {
345                return;
346            }
347 
348            calcFollowPos(n.fLeftChild);
349            calcFollowPos(n.fRightChild);
350 
351            // Aho rule #1
352            if (n.fType == RBBINode.opCat) {
353                for (RBBINode i /* is 'i' in Aho's description */ : n.fLeftChild.fLastPosSet) {
354                    i.fFollowPos.addAll(n.fRightChild.fFirstPosSet);
355                }
356            }
357 
358            // Aho rule #2
359            if (n.fType == RBBINode.opStar ||
360                n.fType == RBBINode.opPlus) {
361                for (RBBINode i /* again, n and i are the names from Aho's description */ : n.fLastPosSet) {
362                    i.fFollowPos.addAll(n.fFirstPosSet);
363                }
364            }
365        }
366 
367        //-----------------------------------------------------------------------------
368        //
369        //           addRuleRootNodes    Recursively walk a parse tree, adding all nodes flagged
370        //                               as roots of a rule to a destination vector.
371        //
372        //-----------------------------------------------------------------------------
addRuleRootNodes(List<RBBINode> dest, RBBINode node)373        void addRuleRootNodes(List<RBBINode> dest, RBBINode node) {
374            if (node == null) {
375                return;
376            }
377            if (node.fRuleRoot) {
378                dest.add(node);
379                // Note: rules cannot nest. If we found a rule start node,
380                //       no child node can also be a start node.
381                return;
382            }
383            addRuleRootNodes(dest, node.fLeftChild);
384            addRuleRootNodes(dest, node.fRightChild);
385        }
386 
387        //-----------------------------------------------------------------------------
388        //
389        //   calcChainedFollowPos.    Modify the previously calculated followPos sets
390        //                            to implement rule chaining.  NOT described by Aho
391        //
392        //-----------------------------------------------------------------------------
calcChainedFollowPos(RBBINode tree)393        void calcChainedFollowPos(RBBINode tree) {
394 
395            List<RBBINode> endMarkerNodes = new ArrayList<RBBINode>();
396            List<RBBINode> leafNodes      = new ArrayList<RBBINode>();
397 
398             // get a list of all endmarker nodes.
399            tree.findNodes(endMarkerNodes, RBBINode.endMark);
400 
401            // get a list all leaf nodes
402            tree.findNodes(leafNodes, RBBINode.leafChar);
403 
404            // Collect all leaf nodes that can start matches for rules
405            // with inbound chaining enabled, which is the union of the
406            // firstPosition sets from each of the rule root nodes.
407 
408            List<RBBINode> ruleRootNodes = new ArrayList<RBBINode>();
409            addRuleRootNodes(ruleRootNodes, tree);
410 
411            Set<RBBINode> matchStartNodes = new HashSet<RBBINode>();
412            for (RBBINode node: ruleRootNodes) {
413                if (node.fChainIn) {
414                    matchStartNodes.addAll(node.fFirstPosSet);
415                }
416            }
417 
418            // Iterate over all leaf nodes,
419            //
420            for (RBBINode tNode : leafNodes) {
421                RBBINode endNode = null;
422 
423                // Identify leaf nodes that correspond to overall rule match positions.
424                //   These include an endMarkerNode in their followPos sets.
425                for (RBBINode endMarkerNode : endMarkerNodes) {
426                    if (tNode.fFollowPos.contains(endMarkerNode)) {
427                        endNode = tNode;
428                        break;
429                    }
430                }
431                if (endNode == null) {
432                    // node wasn't an end node.  Try again with the next.
433                    continue;
434                }
435 
436                // We've got a node that can end a match.
437 
438                // Line Break Specific hack:  If this node's val correspond to the $CM char class,
439                //                            don't chain from it.
440                // TODO:  Add rule syntax for this behavior, get specifics out of here and
441                //        into the rule file.
442                if (fRB.fLBCMNoChain) {
443                    int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal);
444                    if (c != -1) {
445                        // c == -1 occurs with sets containing only the {eof} marker string.
446                        int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
447                        if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) {
448                            continue;
449                        }
450                    }
451                }
452 
453 
454                // Now iterate over the nodes that can start a match, looking for ones
455                //   with the same char class as our ending node.
456                for (RBBINode startNode : matchStartNodes) {
457                    if (startNode.fType != RBBINode.leafChar) {
458                        continue;
459                    }
460 
461                    if (endNode.fVal == startNode.fVal) {
462                        // The end val (character class) of one possible match is the
463                        //   same as the start of another.
464 
465                        // Add all nodes from the followPos of the start node to the
466                        //  followPos set of the end node, which will have the effect of
467                        //  letting matches transition from a match state at endNode
468                        //  to the second char of a match starting with startNode.
469                        endNode.fFollowPos.addAll(startNode.fFollowPos);
470                    }
471                }
472            }
473        }
474 
475 
476        //-----------------------------------------------------------------------------
477        //
478        //   bofFixup.    Fixup for state tables that include {bof} beginning of input testing.
479        //                Do an swizzle similar to chaining, modifying the followPos set of
480        //                the bofNode to include the followPos nodes from other {bot} nodes
481        //                scattered through the tree.
482        //
483        //                This function has much in common with calcChainedFollowPos().
484        //
485        //-----------------------------------------------------------------------------
bofFixup()486        void bofFixup() {
487            //
488            //   The parse tree looks like this ...
489            //         fTree root  --.       <cat>
490            //                               /     \
491            //                            <cat>   <#end node>
492            //                           /     \
493            //                     <bofNode>   rest
494            //                               of tree
495            //
496            //    We will be adding things to the followPos set of the <bofNode>
497            //
498            RBBINode  bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild;
499            Assert.assrt(bofNode.fType == RBBINode.leafChar);
500            Assert.assrt(bofNode.fVal == 2);
501 
502            // Get all nodes that can be the start a match of the user-written rules
503            //  (excluding the fake bofNode)
504            //  We want the nodes that can start a match in the
505            //     part labeled "rest of tree"
506            //
507            Set<RBBINode> matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet;
508            for (RBBINode startNode : matchStartNodes) {
509                if (startNode.fType != RBBINode.leafChar) {
510                    continue;
511                }
512 
513                if (startNode.fVal == bofNode.fVal) {
514                    //  We found a leaf node corresponding to a {bof} that was
515                    //    explicitly written into a rule.
516                    //  Add everything from the followPos set of this node to the
517                    //    followPos set of the fake bofNode at the start of the tree.
518                    //
519                    bofNode.fFollowPos.addAll(startNode.fFollowPos);
520                }
521            }
522        }
523 
524        //-----------------------------------------------------------------------------
525        //
526        //   buildStateTable()    Determine the set of runtime DFA states and the
527        //                        transition tables for these states, by the algorithm
528        //                        of fig. 3.44 in Aho.
529        //
530        //                        Most of the comments are quotes of Aho's psuedo-code.
531        //
532        //-----------------------------------------------------------------------------
buildStateTable()533        void buildStateTable() {
534            //
535            // Add a dummy state 0 - the stop state.  Not from Aho.
536            int      lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1;
537            RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol);
538            fDStates.add(failState);
539 
540            // initially, the only unmarked state in Dstates is firstpos(root),
541            //       where toot is the root of the syntax tree for (r)#;
542            RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol);
543            initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet);
544            fDStates.add(initialState);
545 
546            // while there is an unmarked state T in Dstates do begin
547            for (;;) {
548                RBBIStateDescriptor T = null;
549                int              tx;
550                for (tx=1; tx<fDStates.size(); tx++) {
551                    RBBIStateDescriptor temp  = fDStates.get(tx);
552                    if (temp.fMarked == false) {
553                        T = temp;
554                        break;
555                    }
556                }
557                if (T == null) {
558                    break;
559                }
560 
561                // mark T;
562                T.fMarked = true;
563 
564                // for each input symbol a do begin
565                int  a;
566                for (a = 1; a<=lastInputSymbol; a++) {
567                    // let U be the set of positions that are in followpos(p)
568                    //    for some position p in T
569                    //    such that the symbol at position p is a;
570                    Set<RBBINode> U = null;
571                    for (RBBINode p : T.fPositions) {
572                        if ((p.fType == RBBINode.leafChar) &&  (p.fVal == a)) {
573                            if (U == null) {
574                                U = new HashSet<RBBINode>();
575                            }
576                            U.addAll(p.fFollowPos);
577                        }
578                    }
579 
580                    // if U is not empty and not in DStates then
581                    int  ux = 0;
582                    boolean    UinDstates = false;
583                    if (U != null) {
584                        Assert.assrt(U.size() > 0);
585                        int  ix;
586                        for (ix=0; ix<fDStates.size(); ix++) {
587                            RBBIStateDescriptor temp2;
588                            temp2 = fDStates.get(ix);
589                            if (U.equals(temp2.fPositions)) {
590                                U  = temp2.fPositions;
591                                ux = ix;
592                                UinDstates = true;
593                                break;
594                            }
595                        }
596 
597                        // Add U as an unmarked state to Dstates
598                        if (!UinDstates)
599                        {
600                            RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol);
601                            newState.fPositions = U;
602                            fDStates.add(newState);
603                            ux = fDStates.size()-1;
604                        }
605 
606                        // Dtran[T, a] := U;
607                        T.fDtran[a] = ux;
608                    }
609                }
610            }
611        }
612 
613 
614 
615        //-----------------------------------------------------------------------------
616        //
617        //   flagAcceptingStates    Identify accepting states.
618        //                          First get a list of all of the end marker nodes.
619        //                          Then, for each state s,
620        //                              if s contains one of the end marker nodes in its list of tree positions then
621        //                                  s is an accepting state.
622        //
623        //-----------------------------------------------------------------------------
flagAcceptingStates()624        void     flagAcceptingStates() {
625            List<RBBINode> endMarkerNodes = new ArrayList<RBBINode>();
626            RBBINode    endMarker;
627            int     i;
628            int     n;
629 
630            fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark);
631 
632            for (i=0; i<endMarkerNodes.size(); i++) {
633                endMarker = endMarkerNodes.get(i);
634                for (n=0; n<fDStates.size(); n++) {
635                    RBBIStateDescriptor sd = fDStates.get(n);
636                    //if (sd.fPositions.indexOf(endMarker) >= 0) {
637                    if (sd.fPositions.contains(endMarker)) {
638                        // Any non-zero value for fAccepting means this is an accepting node.
639                        // The value is what will be returned to the user as the break status.
640                        // If no other value was specified, force it to -1.
641 
642                        if (sd.fAccepting==0) {
643                         // State hasn't been marked as accepting yet.  Do it now.
644                            sd.fAccepting = endMarker.fVal;
645                            if (sd.fAccepting == 0) {
646                                sd.fAccepting = -1;
647                         }
648                        }
649                        if (sd.fAccepting==-1 && endMarker.fVal != 0) {
650                         // Both lookahead and non-lookahead accepting for this state.
651                         // Favor the look-ahead.  Expedient for line break.
652                         // TODO:  need a more elegant resolution for conflicting rules.
653                         sd.fAccepting = endMarker.fVal;
654                     }
655                         // implicit else:
656                         // if sd.fAccepting already had a value other than 0 or -1, leave it be.
657 
658                        // If the end marker node is from a look-ahead rule, set
659                        //   the fLookAhead field for this state also.
660                        if (endMarker.fLookAheadEnd) {
661                         // TODO:  don't change value if already set?
662                         // TODO:  allow for more than one active look-ahead rule in engine.
663                         //        Make value here an index to a side array in engine?
664                            sd.fLookAhead = sd.fAccepting;
665                        }
666                    }
667                }
668            }
669        }
670 
671 
672        //-----------------------------------------------------------------------------
673        //
674        //    flagLookAheadStates   Very similar to flagAcceptingStates, above.
675        //
676        //-----------------------------------------------------------------------------
flagLookAheadStates()677        void     flagLookAheadStates() {
678            List<RBBINode> lookAheadNodes = new ArrayList<RBBINode>();
679            RBBINode    lookAheadNode;
680            int     i;
681            int     n;
682 
683            fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead);
684            for (i=0; i<lookAheadNodes.size(); i++) {
685                lookAheadNode = lookAheadNodes.get(i);
686 
687                for (n=0; n<fDStates.size(); n++) {
688                    RBBIStateDescriptor sd = fDStates.get(n);
689                    if (sd.fPositions.contains(lookAheadNode)) {
690                        sd.fLookAhead = lookAheadNode.fVal;
691                    }
692                }
693            }
694        }
695 
696 
697 
698 
699        //-----------------------------------------------------------------------------
700        //
701        //    flagTaggedStates
702        //
703        //-----------------------------------------------------------------------------
flagTaggedStates()704        void     flagTaggedStates() {
705            List<RBBINode> tagNodes = new ArrayList<RBBINode>();
706            RBBINode    tagNode;
707            int     i;
708            int     n;
709 
710            fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag);
711            for (i=0; i<tagNodes.size(); i++) {                   // For each tag node t (all of 'em)
712                tagNode = tagNodes.get(i);
713 
714                for (n=0; n<fDStates.size(); n++) {              //    For each state  s (row in the state table)
715                    RBBIStateDescriptor sd = fDStates.get(n);
716                    if (sd.fPositions.contains(tagNode)) {       //       if  s include the tag node t
717                        sd.fTagVals.add(Integer.valueOf(tagNode.fVal));
718                    }
719                }
720            }
721        }
722 
723 
724 
725        //-----------------------------------------------------------------------------
726        //
727        //  mergeRuleStatusVals
728        //
729        //      Allocate positions in the  global array of rule status {tag} values
730        //
731        //      The RBBI runtime uses an array of {sets of status values} that can
732        //      be returned for boundaries.  Each accepting state that has non-zero
733        //      status includes an index into this array.  The format of the array
734        //      is
735        //           Num of status values in group 1
736        //              status val
737        //              status val
738        //              ...
739        //           Num of status vals in group 2
740        //              status val
741        //              status val
742        //              ...
743        //           etc.
744        //
745        //
746        //-----------------------------------------------------------------------------
747 
mergeRuleStatusVals()748        void  mergeRuleStatusVals() {
749            //
750            //  The basic outline of what happens here is this...
751            //
752            //    for each state in this state table
753            //       if the status tag list for this state is in the global statuses list
754            //           record where and
755            //           continue with the next state
756            //       else
757            //           add the tag list for this state to the global list.
758            //
759            int n;
760 
761            // Pre-load a single tag of {0} into the table.
762            //   We will need this as a default, for rule sets with no explicit tagging,
763            //   or with explicit tagging of {0}.
764            if (fRB.fRuleStatusVals.size() == 0) {
765                fRB.fRuleStatusVals.add(Integer.valueOf(1));    // Num of statuses in group
766                fRB.fRuleStatusVals.add(Integer.valueOf(0));    //   and our single status of zero
767 
768                SortedSet<Integer> s0 = new TreeSet<Integer>();
769                Integer izero = Integer.valueOf(0);
770                fRB.fStatusSets.put(s0, izero);
771                SortedSet<Integer> s1 = new TreeSet<Integer>();
772                s1.add(izero);
773                fRB.fStatusSets.put(s0, izero);
774            }
775 
776            //    For each state, check whether the state's status tag values are
777            //       already entered into the status values array, and add them if not.
778            for (n=0; n<fDStates.size(); n++) {
779                RBBIStateDescriptor sd = fDStates.get(n);
780                Set<Integer> statusVals = sd.fTagVals;
781                Integer arrayIndexI = fRB.fStatusSets.get(statusVals);
782                if (arrayIndexI == null) {
783                    // This is the first encounter of this set of status values.
784                    //   Add them to the statusSets map, This map associates
785                    //   the set of status values with an index in the runtime status
786                    //   values array.
787                    arrayIndexI = Integer.valueOf(fRB.fRuleStatusVals.size());
788                    fRB.fStatusSets.put(statusVals, arrayIndexI);
789 
790                    // Add the new set of status values to the vector of values that
791                    //   will eventually become the array used by the runtime engine.
792                    fRB.fRuleStatusVals.add(Integer.valueOf(statusVals.size()));
793                    fRB.fRuleStatusVals.addAll(statusVals);
794                }
795 
796                // Save the runtime array index back into the state descriptor.
797                sd.fTagsIdx = arrayIndexI.intValue();
798            }
799        }
800 
801 
802 
803 
804 
805 
806 
807        //-----------------------------------------------------------------------------
808        //
809        //  printPosSets   Debug function.  Dump Nullable, firstpos, lastpos and followpos
810        //                 for each node in the tree.
811        //
812        //-----------------------------------------------------------------------------
813 
printPosSets(RBBINode n)814        void printPosSets(RBBINode n) {
815            if (n==null) {
816                return;
817            }
818            RBBINode.printNode(n);
819            System.out.print("         Nullable:  " + n.fNullable);
820 
821            System.out.print("         firstpos:  ");
822            printSet(n.fFirstPosSet);
823 
824            System.out.print("         lastpos:   ");
825            printSet(n.fLastPosSet);
826 
827            System.out.print("         followpos: ");
828            printSet(n.fFollowPos);
829 
830            printPosSets(n.fLeftChild);
831            printPosSets(n.fRightChild);
832        }
833 
834 
835 
836        /**
837         *  Find duplicate (redundant) character classes. Begin looking with categories.first.
838         *  Duplicates, if found are returned in the categories parameter.
839         *  This is an iterator-like function, used to identify character classes
840         *  (state table columns) that can be eliminated.
841         *  @param categories in/out parameter, specifies where to start looking for duplicates,
842         *                and returns the first pair of duplicates found, if any.
843         *  @return true if duplicate char classes were found, false otherwise.
844         *  @internal
845         */
findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories)846        boolean findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories) {
847            int numStates = fDStates.size();
848            int numCols = fRB.fSetBuilder.getNumCharCategories();
849 
850            int table_base = 0;
851            int table_dupl = 0;
852            for (; categories.first < numCols-1; ++categories.first) {
853                for (categories.second=categories.first+1; categories.second < numCols; ++categories.second) {
854                    for (int state=0; state<numStates; state++) {
855                        RBBIStateDescriptor sd = fDStates.get(state);
856                        table_base = sd.fDtran[categories.first];
857                        table_dupl = sd.fDtran[categories.second];
858                        if (table_base != table_dupl) {
859                            break;
860                        }
861                    }
862                    if (table_base == table_dupl) {
863                        return true;
864                    }
865                }
866            }
867            return false;
868        }
869 
870        /**
871         * Remove a column from the state table. Used when two character categories
872         * have been found equivalent, and merged together, to eliminate the unneeded table column.
873         */
removeColumn(int column)874        void removeColumn(int column) {
875            int numStates = fDStates.size();
876            for (int state=0; state<numStates; state++) {
877                RBBIStateDescriptor sd = fDStates.get(state);
878                assert(column < sd.fDtran.length);
879                int[] newArray = Arrays.copyOf(sd.fDtran, sd.fDtran.length - 1);
880                System.arraycopy(sd.fDtran, column+1, newArray, column, newArray.length - column);
881                sd.fDtran = newArray;
882            }
883        }
884 
885 
886        /**
887         *  Find duplicate (redundant) states, beginning at the specified pair,
888         *  within this state table. This is an iterator-like function, used to
889         *  identify states (state table rows) that can be eliminated.
890         *  @param states in/out parameter, specifies where to start looking for duplicates,
891         *                and returns the first pair of duplicates found, if any.
892         *  @return true if duplicate states were found, false otherwise.
893         *  @internal
894         */
895        boolean findDuplicateState(RBBIRuleBuilder.IntPair states) {
896            int numStates = fDStates.size();
897            int numCols = fRB.fSetBuilder.getNumCharCategories();
898 
899            for (; states.first<numStates-1; ++states.first) {
900                RBBIStateDescriptor firstSD = fDStates.get(states.first);
901                for (states.second=states.first+1; states.second<numStates; ++states.second) {
902                    RBBIStateDescriptor duplSD = fDStates.get(states.second);
903                    if (firstSD.fAccepting != duplSD.fAccepting ||
904                            firstSD.fLookAhead != duplSD.fLookAhead ||
905                            firstSD.fTagsIdx   != duplSD.fTagsIdx) {
906                        continue;
907                    }
908                    boolean rowsMatch = true;
909                    for (int col=0; col < numCols; ++col) {
910                        int firstVal = firstSD.fDtran[col];
911                        int duplVal = duplSD.fDtran[col];
912                        if (!((firstVal == duplVal) ||
913                                ((firstVal == states.first || firstVal == states.second) &&
914                                        (duplVal  == states.first || duplVal  == states.second)))) {
915                            rowsMatch = false;
916                            break;
917                        }
918                    }
919                    if (rowsMatch) {
920                        return true;
921                    }
922                }
923            }
924            return false;
925        }
926 
927        /**
928         *  Find the next duplicate state in the safe reverse table. An iterator function.
929         *  @param states in/out parameter, specifies where to start looking for duplicates,
930         *                and returns the first pair of duplicates found, if any.
931         *  @return true if duplicate states were found, false otherwise.
932         *  @internal
933         */
934        boolean findDuplicateSafeState(RBBIRuleBuilder.IntPair states) {
935            int numStates = fSafeTable.size();
936 
937            for (; states.first<numStates-1; ++states.first) {
938                short[] firstRow = fSafeTable.get(states.first);
939                for (states.second=states.first+1; states.second<numStates; ++states.second) {
940                    short[] duplRow = fSafeTable.get(states.second);
941                    boolean rowsMatch = true;
942                    int numCols = firstRow.length;
943                    for (int col=0; col < numCols; ++col) {
944                        int firstVal = firstRow[col];
945                        int duplVal = duplRow[col];
946                        if (!((firstVal == duplVal) ||
947                                ((firstVal == states.first || firstVal == states.second) &&
948                                        (duplVal  == states.first || duplVal  == states.second)))) {
949                            rowsMatch = false;
950                            break;
951                        }
952                    }
953                    if (rowsMatch) {
954                        return true;
955                    }
956                }
957            }
958            return false;
959        }
960 
961        /**
962         * Remove a duplicate state (row) from the state table. All references to the deleted (second) state
963         * are redirected to first state.
964         * @param duplStates The duplicate pair of states.
965         * @internal
966         */
967        void removeState(IntPair duplStates) {
968            final int keepState = duplStates.first;
969            final int duplState = duplStates.second;
970            assert(keepState < duplState);
971            assert(duplState < fDStates.size());
972 
973            fDStates.remove(duplState);
974 
975            int numStates = fDStates.size();
976            int numCols = fRB.fSetBuilder.getNumCharCategories();
977            for (int state=0; state<numStates; ++state) {
978                RBBIStateDescriptor sd = fDStates.get(state);
979                for (int col=0; col<numCols; col++) {
980                    int existingVal = sd.fDtran[col];
981                    int newVal = existingVal;
982                    if (existingVal == duplState) {
983                        newVal = keepState;
984                    } else if (existingVal > duplState) {
985                        newVal = existingVal - 1;
986                    }
987                    sd.fDtran[col] = newVal;
988                }
989                if (sd.fAccepting == duplState) {
990                    sd.fAccepting = keepState;
991                } else if (sd.fAccepting > duplState) {
992                    sd.fAccepting--;
993                }
994                if (sd.fLookAhead == duplState) {
995                    sd.fLookAhead = keepState;
996                } else if (sd.fLookAhead > duplState) {
997                    sd.fLookAhead--;
998                }
999            }
1000        }
1001 
1002        /**
1003         * Remove a duplicate state from the safe table.
1004         * @param duplStates The duplicate pair of states.  The first is kept, the second is removed.
1005         *                   All references to the second in the state table are retargeted
1006         *                   to the first.
1007         * @internal
1008         */
1009        void removeSafeState(IntPair duplStates) {
1010            final int keepState = duplStates.first;
1011            final int duplState = duplStates.second;
1012            assert(keepState < duplState);
1013            assert(duplState < fSafeTable.size());
1014 
1015            fSafeTable.remove(duplState);
1016            int numStates = fSafeTable.size();
1017            for (int state=0; state<numStates; ++state) {
1018                short[] row = fSafeTable.get(state);
1019                for (int col=0; col<row.length; col++) {
1020                    int existingVal = row[col];
1021                    int newVal = existingVal;
1022                    if (existingVal == duplState) {
1023                        newVal = keepState;
1024                    } else if (existingVal > duplState) {
1025                        newVal = existingVal - 1;
1026                    }
1027                    row[col] = (short)newVal;
1028                }
1029            }
1030        }
1031 
1032 
1033        /**
1034         *  Check for, and remove duplicate states (table rows).
1035         *  @return the number of states removed.
1036         *  @internal
1037         */
1038        int removeDuplicateStates() {
1039            IntPair dupls = new IntPair(3, 0);
1040            int numStatesRemoved = 0;
1041 
1042            while (findDuplicateState(dupls)) {
1043                // System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
1044                removeState(dupls);
1045                ++numStatesRemoved;
1046            }
1047            return numStatesRemoved;
1048        }
1049 
1050 
1051        /**
1052         *  Calculate the size in bytes of the serialized form of this state transition table,
1053         *  which is identical to the ICU4C runtime form.
1054         *  Refer to common/rbbidata.h from ICU4C for the declarations of the structures
1055         *  being matched by this calculation.
1056         */
1057        int  getTableSize()  {
1058            if (fRB.fTreeRoots[fRootIx] == null) {
1059                return 0;
1060            }
1061            int size    = 16;    // The header of 4 ints, with no rows to the table.
1062            int numRows = fDStates.size();
1063            int numCols = fRB.fSetBuilder.getNumCharCategories();
1064            int rowSize = 8 + 2*numCols;
1065            size   += numRows * rowSize;
1066            size = (size + 7) & ~7;   // round up to a multiple of 8 bytes
1067            return size;
1068        }
1069 
1070 
1071 
1072        /**
1073         * Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table.
1074         * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C,
1075         * in common/rbbidata.h
1076         */
1077        RBBIDataWrapper.RBBIStateTable exportTable() {
1078            int                state;
1079            int                col;
1080 
1081            RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
1082            if (fRB.fTreeRoots[fRootIx] == null) {
1083                return table;
1084            }
1085 
1086            Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff &&
1087                fDStates.size() < 0x7fff);
1088            table.fNumStates = fDStates.size();
1089 
1090            // Size of table size in shorts.
1091            //  the "4" is the size of struct RBBIStateTableRow, the row header part only.
1092            int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories();   // Row Length in shorts.
1093            int tableSize = (getTableSize() - 16) / 2;       // fTable length in shorts.
1094            table.fTable = new short[tableSize];
1095            table.fRowLen = rowLen * 2;                      // Row length in bytes.
1096 
1097            if (fRB.fLookAheadHardBreak) {
1098                table.fFlags  |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
1099            }
1100            if (fRB.fSetBuilder.sawBOF()) {
1101                table.fFlags  |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
1102            }
1103 
1104            int numCharCategories = fRB.fSetBuilder.getNumCharCategories();
1105            for (state=0; state<table.fNumStates; state++) {
1106                RBBIStateDescriptor sd = fDStates.get(state);
1107                int row = state*rowLen;
1108                Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767);
1109                Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767);
1110                table.fTable[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting;
1111                table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead;
1112                table.fTable[row + RBBIDataWrapper.TAGIDX]    = (short)sd.fTagsIdx;
1113                for (col=0; col<numCharCategories; col++) {
1114                    table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col];
1115                }
1116            }
1117            return table;
1118        }
1119 
1120        /**
1121         *   Synthesize a safe state table from the main state table.
1122         */
1123        void buildSafeReverseTable() {
1124            // Safe Reverse table construction is described in more detail in the corresponding
1125            // function in ICU4C, in source/common/rbbitblb.cpp. Not duplicated here because
1126            // it is too likely to get out of sync.
1127 
1128            // Each safe pair is stored as two chars in the safePair stringBuilder.
1129            StringBuilder safePairs = new StringBuilder();
1130 
1131            int numCharClasses = fRB.fSetBuilder.getNumCharCategories();
1132            int numStates = fDStates.size();
1133 
1134            for (int c1=0; c1<numCharClasses; ++c1) {
1135                for (int c2=0; c2 < numCharClasses; ++c2) {
1136                    int wantedEndState = -1;
1137                    int endState = 0;
1138                    for (int startState = 1; startState < numStates; ++startState) {
1139                        RBBIStateDescriptor startStateD = fDStates.get(startState);
1140                        int s2 = startStateD.fDtran[c1];
1141                        RBBIStateDescriptor s2StateD = fDStates.get(s2);
1142                        endState = s2StateD.fDtran[c2];
1143                        if (wantedEndState < 0) {
1144                            wantedEndState = endState;
1145                        } else {
1146                            if (wantedEndState != endState) {
1147                                break;
1148                            }
1149                        }
1150                    }
1151                    if (wantedEndState == endState) {
1152                        safePairs.append((char)c1);
1153                        safePairs.append((char)c2);
1154                        // System.out.printf("(%d, %d) ", c1, c2);
1155                    }
1156                }
1157                // System.out.printf("\n");
1158            }
1159 
1160            // Populate the initial safe table.
1161            // The table as a whole is a List<short[]>
1162            // Row 0 is the stop state.
1163            // Row 1 is the start sate.
1164            // Row 2 and beyond are other states, initially one per char class, but
1165            //   after initial construction, many of the states will be combined, compacting the table.)
1166            // The String holds the nextState data only. The four leading fields of a row, fAccepting,
1167            // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
1168 
1169            assert(fSafeTable == null);
1170            fSafeTable = new ArrayList<short[]>();
1171            for (int row=0; row<numCharClasses + 2; ++row) {
1172                fSafeTable.add(new short[numCharClasses]);
1173            }
1174 
1175            // From the start state, each input char class transitions to the state for that input.
1176            short[] startState = fSafeTable.get(1);
1177            for (int charClass=0; charClass < numCharClasses; ++charClass) {
1178                // Note: +2 to skip the start & stop state rows.
1179                startState[charClass] = (short)(charClass+2);
1180            }
1181 
1182            // Initially make every other state table row look like the start state row
1183            //    (except for the stop state, which remains all 0)
1184            for (int row=2; row<numCharClasses+2; ++row) {
1185                System.arraycopy(startState, 0, fSafeTable.get(row), 0, startState.length);
1186            }
1187 
1188            // Run through the safe pairs, set the next state to zero when pair has been seen.
1189            // Zero being the stop state, meaning we found a safe point.
1190            for (int pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
1191                int c1 = safePairs.charAt(pairIdx);
1192                int c2 = safePairs.charAt(pairIdx + 1);
1193 
1194                short[] rowState = fSafeTable.get(c2 + 2);
1195                rowState[c1] = 0;
1196            }
1197 
1198            // Remove duplicate or redundant rows from the table.
1199            RBBIRuleBuilder.IntPair states = new RBBIRuleBuilder.IntPair(1, 0);
1200            while (findDuplicateSafeState(states)) {
1201                // System.out.printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
1202                removeSafeState(states);
1203            }
1204        }
1205 
1206 
1207        /**
1208         *  Calculate the size of the runtime form of this safe state table.
1209         */
1210        int getSafeTableSize() {
1211            if (fSafeTable == null) {
1212                return 0;
1213            }
1214            int size    = 16;    // The header of 4 ints, with no rows to the table.
1215            int numRows = fSafeTable.size();
1216            int numCols = fSafeTable.get(0).length;
1217            int rowSize = 8 + 2*numCols;
1218            size += numRows * rowSize;
1219            // TODO: there are redundant round-up. Figure out best place, get rid of the rest.
1220            size = (size + 7) & ~7;   // round up to a multiple of 8 bytes
1221            return size;
1222        }
1223 
1224 
1225        /**
1226         *  Create a RBBIDataWrapper.RBBIStateTable for the safe reverse table.
1227         *  RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C,
1228         *  in common/rbbidata.h
1229         */
1230        RBBIDataWrapper.RBBIStateTable exportSafeTable() {
1231            RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
1232            table.fNumStates = fSafeTable.size();
1233            int numCharCategories = fSafeTable.get(0).length;
1234 
1235            // Size of table size in shorts.
1236            //  the "4" is the size of struct RBBIStateTableRow, the row header part only.
1237            int rowLen = 4 + numCharCategories;
1238            // TODO: tableSize is basically numStates * numCharCategories,
1239            //       except for alignment padding. Clean up here, and in main exportTable().
1240            int tableSize = (getSafeTableSize() - 16) / 2;   // fTable length in shorts.
1241            table.fTable = new short[tableSize];
1242            table.fRowLen = rowLen * 2;                      // Row length in bytes.
1243 
1244            for (int state=0; state<table.fNumStates; state++) {
1245                short[] rowArray = fSafeTable.get(state);
1246                int row = state * rowLen;
1247 
1248                for (int col=0; col<numCharCategories; col++) {
1249                    table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = rowArray[col];
1250                }
1251            }
1252            return table;
1253        }
1254 
1255 
1256        //-----------------------------------------------------------------------------
1257        //
1258        //   printSet    Debug function.   Print the contents of a set of Nodes
1259        //
1260        //-----------------------------------------------------------------------------
1261 
1262        void printSet(Collection<RBBINode> s) {
1263            for (RBBINode n : s) {
1264                RBBINode.printInt(n.fSerialNum, 8);
1265            }
1266            System.out.println();
1267        }
1268 
1269 
1270 
1271        //-----------------------------------------------------------------------------
1272        //
1273        //   printStates    Debug Function.  Dump the fully constructed state transition table.
1274        //
1275        //-----------------------------------------------------------------------------
1276 
1277        void printStates() {
1278            int     c;    // input "character"
1279            int     n;    // state number
1280 
1281            System.out.print("state |           i n p u t     s y m b o l s \n");
1282            System.out.print("      | Acc  LA    Tag");
1283            for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
1284                RBBINode.printInt(c, 3);
1285            }
1286            System.out.print("\n");
1287            System.out.print("      |---------------");
1288            for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
1289                System.out.print("---");
1290            }
1291            System.out.print("\n");
1292 
1293            for (n=0; n<fDStates.size(); n++) {
1294                RBBIStateDescriptor sd = fDStates.get(n);
1295                RBBINode.printInt(n, 5);
1296                System.out.print(" | ");
1297 
1298                RBBINode.printInt(sd.fAccepting, 3);
1299                RBBINode.printInt(sd.fLookAhead, 4);
1300                RBBINode.printInt(sd.fTagsIdx, 6);
1301                System.out.print(" ");
1302                for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
1303                    RBBINode.printInt(sd.fDtran[c], 3);
1304                }
1305                System.out.print("\n");
1306            }
1307            System.out.print("\n\n");
1308        }
1309 
1310 
1311        /**
1312         * Debug Function.  Dump the fully constructed safe reverse table.
1313         */
1314        void printReverseTable() {
1315            int     c;    // input "character"
1316 
1317            System.out.printf("    Safe Reverse Table \n");
1318            if (fSafeTable == null) {
1319                System.out.printf("   --- nullptr ---\n");
1320                return;
1321            }
1322            int numCharCategories = fSafeTable.get(0).length;
1323            System.out.printf("state |           i n p u t     s y m b o l s \n");
1324            System.out.printf("      | Acc  LA    Tag");
1325            for (c=0; c< numCharCategories;  c++) {
1326                System.out.printf(" %2d", c);
1327            }
1328            System.out.printf("\n");
1329            System.out.printf("      |---------------");
1330            for (c=0; c<numCharCategories; c++) {
1331                System.out.printf("---");
1332            }
1333            System.out.printf("\n");
1334 
1335            for (int n=0; n<fSafeTable.size(); n++) {
1336                short rowArray[]  = fSafeTable.get(n);
1337                System.out.printf("  %3d | " , n);
1338                System.out.printf("%3d %3d %5d ", 0, 0, 0);  // Accepting, LookAhead, Tags
1339                for (c=0; c<numCharCategories; c++) {
1340                    System.out.printf(" %2d", rowArray[c]);
1341                }
1342                System.out.printf("\n");
1343            }
1344            System.out.printf("\n\n");
1345        }
1346 
1347 
1348 
1349 
1350 
1351        //-----------------------------------------------------------------------------
1352        //
1353        //   printRuleStatusTable    Debug Function.  Dump the common rule status table
1354        //
1355        //-----------------------------------------------------------------------------
1356 
1357        void printRuleStatusTable() {
1358            int  thisRecord = 0;
1359            int  nextRecord = 0;
1360            int      i;
1361            List<Integer> tbl = fRB.fRuleStatusVals;
1362 
1363            System.out.print("index |  tags \n");
1364            System.out.print("-------------------\n");
1365 
1366            while (nextRecord < tbl.size()) {
1367                thisRecord = nextRecord;
1368                nextRecord = thisRecord + tbl.get(thisRecord).intValue() + 1;
1369                RBBINode.printInt(thisRecord, 7);
1370                for (i=thisRecord+1; i<nextRecord; i++) {
1371                    int val = tbl.get(i).intValue();
1372                    RBBINode.printInt(val, 7);
1373                }
1374                System.out.print("\n");
1375            }
1376            System.out.print("\n\n");
1377        }
1378 
1379 
1380 
1381 }
1382