1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2001-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 package com.ibm.icu.text;
10 
11 import java.util.HashSet;
12 import java.util.List;
13 import java.util.Set;
14 
15 import com.ibm.icu.impl.Assert;
16 
17 /**
18  *   This class represents a node in the parse tree created by the RBBI Rule compiler.
19  */
20 class RBBINode {
21 
22 
23  //   enum NodeType {
24      static final int    setRef = 0;
25      static final int    uset = 1;
26      static final int    varRef = 2;
27      static final int    leafChar = 3;
28      static final int    lookAhead = 4;
29      static final int    tag = 5;
30      static final int    endMark = 6;
31      static final int    opStart = 7;
32      static final int    opCat = 8;
33      static final int    opOr = 9;
34      static final int    opStar = 10;
35      static final int    opPlus = 11;
36      static final int    opQuestion = 12;
37      static final int    opBreak = 13;
38      static final int    opReverse = 14;
39      static final int    opLParen = 15;
40      static final int    nodeTypeLimit = 16;    //  For Assertion checking only.
41 
42      static final String []  nodeTypeNames = {
43          "setRef",
44          "uset",
45          "varRef",
46          "leafChar",
47          "lookAhead",
48          "tag",
49          "endMark",
50          "opStart",
51          "opCat",
52          "opOr",
53          "opStar",
54          "opPlus",
55          "opQuestion",
56          "opBreak",
57          "opReverse",
58          "opLParen"
59      };
60 
61 //    enum OpPrecedence {
62     static final int    precZero   = 0;
63     static final int    precStart  = 1;
64     static final int    precLParen = 2;
65     static final int    precOpOr   = 3;
66     static final int    precOpCat  = 4;
67 
68     int          fType;   // enum NodeType
69     RBBINode      fParent;
70     RBBINode      fLeftChild;
71     RBBINode      fRightChild;
72     UnicodeSet    fInputSet;           // For uset nodes only.
73     int          fPrecedence = precZero;   // enum OpPrecedence, For binary ops only.
74 
75     String       fText;                 // Text corresponding to this node.
76                                         //   May be lazily evaluated when (if) needed
77                                         //   for some node types.
78     int           fFirstPos;            // Position in the rule source string of the
79                                         //   first text associated with the node.
80                                         //   If there's a left child, this will be the same
81                                         //   as that child's left pos.
82     int           fLastPos;             //  Last position in the rule source string
83                                         //    of any text associated with this node.
84                                         //    If there's a right child, this will be the same
85                                         //    as that child's last postion.
86 
87     boolean      fNullable;            //  See Aho DFA table generation algorithm
88     int           fVal;                 // For leafChar nodes, the value.
89                                         //   Values are the character category,
90                                         //   corresponds to columns in the final
91                                         //   state transition table.
92 
93     boolean      fLookAheadEnd;        // For endMark nodes, set TRUE if
94                                        //   marking the end of a look-ahead rule.
95 
96     boolean      fRuleRoot;             // True if this node is the root of a rule.
97     boolean      fChainIn;              // True if chaining into this rule is allowed
98                                         //     (no '^' present).
99 
100 
101     Set<RBBINode> fFirstPosSet;         // See Aho DFA table generation algorithm
102     Set<RBBINode> fLastPosSet;          // See Aho.
103     Set<RBBINode> fFollowPos;           // See Aho.
104 
105     int           fSerialNum;           //  Debugging aids.  Each node gets a unique serial number.
106     static int    gLastSerial;
107 
RBBINode(int t)108     RBBINode(int t) {
109         Assert.assrt(t < nodeTypeLimit);
110         fSerialNum = ++gLastSerial;
111         fType = t;
112 
113         fFirstPosSet = new HashSet<RBBINode>();
114         fLastPosSet = new HashSet<RBBINode>();
115         fFollowPos = new HashSet<RBBINode>();
116         if (t == opCat) {
117             fPrecedence = precOpCat;
118         } else if (t == opOr) {
119             fPrecedence = precOpOr;
120         } else if (t == opStart) {
121             fPrecedence = precStart;
122         } else if (t == opLParen) {
123             fPrecedence = precLParen;
124         } else {
125             fPrecedence = precZero;
126         }
127     }
128 
129     RBBINode(RBBINode other) {
130         fSerialNum = ++gLastSerial;
131         fType = other.fType;
132         fInputSet = other.fInputSet;
133         fPrecedence = other.fPrecedence;
134         fText = other.fText;
135         fFirstPos = other.fFirstPos;
136         fLastPos = other.fLastPos;
137         fNullable = other.fNullable;
138         fVal = other.fVal;
139         fRuleRoot = false;
140         fChainIn = other.fChainIn;
141         fFirstPosSet = new HashSet<RBBINode>(other.fFirstPosSet);
142         fLastPosSet = new HashSet<RBBINode>(other.fLastPosSet);
143         fFollowPos = new HashSet<RBBINode>(other.fFollowPos);
144     }
145 
146     //-------------------------------------------------------------------------
147     //
148     //        cloneTree Make a copy of the subtree rooted at this node.
149     //                      Discard any variable references encountered along the way,
150     //                      and replace with copies of the variable's definitions.
151     //                      Used to replicate the expression underneath variable
152     //                      references in preparation for generating the DFA tables.
153     //
154     //-------------------------------------------------------------------------
155     RBBINode cloneTree() {
156         RBBINode n;
157 
158         if (fType == RBBINode.varRef) {
159             // If the current node is a variable reference, skip over it
160             //   and clone the definition of the variable instead.
161             n = fLeftChild.cloneTree();
162         } else if (fType == RBBINode.uset) {
163             n = this;
164         } else {
165             n = new RBBINode(this);
166             if (fLeftChild != null) {
167                 n.fLeftChild = fLeftChild.cloneTree();
168                 n.fLeftChild.fParent = n;
169             }
170             if (fRightChild != null) {
171                 n.fRightChild = fRightChild.cloneTree();
172                 n.fRightChild.fParent = n;
173             }
174         }
175         return n;
176     }
177 
178 
179 
180     //-------------------------------------------------------------------------
181     //
182     //       flattenVariables Walk a parse tree, replacing any variable
183     //                          references with a copy of the variable's definition.
184     //                          Aside from variables, the tree is not changed.
185     //
186     //                          Return the root of the tree. If the root was not a variable
187     //                          reference, it remains unchanged - the root we started with
188     //                          is the root we return. If, however, the root was a variable
189     //                          reference, the root of the newly cloned replacement tree will
190     //                          be returned, and the original tree deleted.
191     //
192     //                          This function works by recursively walking the tree
193     //                          without doing anything until a variable reference is
194     //                          found, then calling cloneTree() at that point. Any
195     //                          nested references are handled by cloneTree(), not here.
196     //
197     //-------------------------------------------------------------------------
198     RBBINode flattenVariables() {
199         if (fType == varRef) {
200             RBBINode retNode  = fLeftChild.cloneTree();
201             retNode.fRuleRoot = this.fRuleRoot;
202             retNode.fChainIn  = this.fChainIn;
203             return retNode;
204         }
205 
206         if (fLeftChild != null) {
207             fLeftChild = fLeftChild.flattenVariables();
208             fLeftChild.fParent = this;
209         }
210         if (fRightChild != null) {
211             fRightChild = fRightChild.flattenVariables();
212             fRightChild.fParent = this;
213         }
214         return this;
215     }
216 
217     //-------------------------------------------------------------------------
218     //
219     //      flattenSets Walk the parse tree, replacing any nodes of type setRef
220     //                     with a copy of the expression tree for the set. A set's
221     //                     equivalent expression tree is precomputed and saved as
222     //                     the left child of the uset node.
223     //
224     //-------------------------------------------------------------------------
225     void flattenSets() {
226         Assert.assrt(fType != setRef);
227 
228         if (fLeftChild != null) {
229             if (fLeftChild.fType == setRef) {
230                 RBBINode setRefNode = fLeftChild;
231                 RBBINode usetNode = setRefNode.fLeftChild;
232                 RBBINode replTree = usetNode.fLeftChild;
233                 fLeftChild = replTree.cloneTree();
234                 fLeftChild.fParent = this;
235             } else {
236                 fLeftChild.flattenSets();
237             }
238         }
239 
240         if (fRightChild != null) {
241             if (fRightChild.fType == setRef) {
242                 RBBINode setRefNode = fRightChild;
243                 RBBINode usetNode = setRefNode.fLeftChild;
244                 RBBINode replTree = usetNode.fLeftChild;
245                 fRightChild = replTree.cloneTree();
246                 fRightChild.fParent = this;
247                 // delete setRefNode;
248             } else {
249                 fRightChild.flattenSets();
250             }
251         }
252     }
253 
254     //-------------------------------------------------------------------------
255     //
256     //       findNodes() Locate all the nodes of the specified type, starting
257     //                       at the specified root.
258     //
259     //-------------------------------------------------------------------------
260     void findNodes(List<RBBINode> dest, int kind) {
261         if (fType == kind) {
262             dest.add(this);
263         }
264         if (fLeftChild != null) {
265             fLeftChild.findNodes(dest, kind);
266         }
267         if (fRightChild != null) {
268             fRightChild.findNodes(dest, kind);
269         }
270     }
271 
272 
273 
274     //-------------------------------------------------------------------------
275     //
276     //        print. Print out a single node, for debugging.
277     //
278     //-------------------------------------------------------------------------
279     ///CLOVER:OFF
280     static void printNode(RBBINode n) {
281 
282         if (n==null) {
283             System.out.print (" -- null --\n");
284         } else {
285             RBBINode.printInt( n.fSerialNum, 10);
286             RBBINode.printString(nodeTypeNames[n.fType], 11);
287             RBBINode.printInt(n.fParent==null? 0     : n.fParent.fSerialNum, 11);
288             RBBINode.printInt(n.fLeftChild==null? 0  : n.fLeftChild.fSerialNum, 11);
289             RBBINode.printInt(n.fRightChild==null? 0 : n.fRightChild.fSerialNum, 12);
290             RBBINode.printInt(n.fFirstPos, 12);
291             RBBINode.printInt(n.fVal, 7);
292 
293             if (n.fType == varRef) {
294                 System.out.print(" " + n.fText);
295             }
296         }
297         System.out.println("");
298     }
299     ///CLOVER:ON
300 
301 
302     // Print a String in a fixed field size.
303     // Debugging function.
304     ///CLOVER:OFF
305     static void printString(String s, int minWidth) {
306         for (int i = minWidth; i < 0; i++) {
307             // negative width means pad leading spaces, not fixed width.
308             System.out.print(' ');
309         }
310         for (int i = s.length(); i < minWidth; i++) {
311             System.out.print(' ');
312         }
313         System.out.print(s);
314     }
315     ///CLOVER:ON
316 
317     //
318     //  Print an int in a fixed size field.
319     //  Debugging function.
320     //
321     ///CLOVER:OFF
322     static void printInt(int i, int minWidth) {
323         String s = Integer.toString(i);
324         printString(s, Math.max(minWidth, s.length() + 1));
325     }
326     ///CLOVER:ON
327 
328     ///CLOVER:OFF
329     static void printHex(int i, int minWidth) {
330         String s = Integer.toString(i, 16);
331         String leadingZeroes = "00000"
332                 .substring(0, Math.max(0, 5 - s.length()));
333         s = leadingZeroes + s;
334         printString(s, minWidth);
335     }
336     ///CLOVER:ON
337 
338 
339     // -------------------------------------------------------------------------
340     //
341     //        print. Print out the tree of nodes rooted at "this"
342     //
343     // -------------------------------------------------------------------------
344     ///CLOVER:OFF
345     void printTree(boolean printHeading) {
346         if (printHeading) {
347             System.out.println( "-------------------------------------------------------------------");
348             System.out.println("    Serial       type     Parent  LeftChild  RightChild    position  value");
349         }
350         printNode(this);
351             // Only dump the definition under a variable reference if asked to.
352             // Unconditinally dump children of all other node types.
353             if (fType != varRef) {
354                 if (fLeftChild != null) {
355                     fLeftChild.printTree(false);
356                 }
357 
358                 if (fRightChild != null) {
359                     fRightChild.printTree(false);
360                 }
361             }
362     }
363     ///CLOVER:ON
364 
365 }
366