1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.analysis;
29 
30 import org.antlr.misc.IntSet;
31 import org.antlr.misc.IntervalSet;
32 import org.antlr.tool.Grammar;
33 
34 /** A state machine transition label.  A label can be either a simple
35  *  label such as a token or character.  A label can be a set of char or
36  *  tokens.  It can be an epsilon transition.  It can be a semantic predicate
37  *  (which assumes an epsilon transition) or a tree of predicates (in a DFA).
38  *  Special label types have to be < 0 to avoid conflict with char.
39  */
40 public class Label implements Comparable<Label>, Cloneable {
41     public static final int INVALID = -7;
42 
43 	public static final int ACTION = -6;
44 
45 	public static final int EPSILON = -5;
46 
47     public static final String EPSILON_STR = "<EPSILON>";
48 
49     /** label is a semantic predicate; implies label is epsilon also */
50     public static final int SEMPRED = -4;
51 
52     /** label is a set of tokens or char */
53     public static final int SET = -3;
54 
55     /** End of Token is like EOF for lexer rules.  It implies that no more
56      *  characters are available and that NFA conversion should terminate
57      *  for this path.  For example
58      *
59      *  A : 'a' 'b' | 'a' ;
60      *
61      *  yields a DFA predictor:
62      *
63      *  o-a-&gt;o-b-&gt;1   predict alt 1
64      *       |
65      *       |-EOT-&gt;o predict alt 2
66      *
67      *  To generate code for EOT, treat it as the "default" path, which
68      *  implies there is no way to mismatch a char for the state from
69      *  which the EOT emanates.
70      */
71     public static final int EOT = -2;
72 
73     public static final int EOF = -1;
74 
75 	/** We have labels like EPSILON that are below 0; it's hard to
76 	 *  store them in an array with negative index so use this
77 	 *  constant as an index shift when accessing arrays based upon
78 	 *  token type.  If real token type is i, then array index would be
79 	 *  NUM_FAUX_LABELS + i.
80 	 */
81 	public static final int NUM_FAUX_LABELS = -INVALID;
82 
83     /** Anything at this value or larger can be considered a simple atom int
84      *  for easy comparison during analysis only; faux labels are not used
85 	 *  during parse time for real token types or char values.
86      */
87     public static final int MIN_ATOM_VALUE = EOT;
88 
89     // TODO: is 0 a valid unicode char? max is FFFF -1, right?
90     public static final int MIN_CHAR_VALUE = '\u0000';
91     public static final int MAX_CHAR_VALUE = '\uFFFF';
92 
93 	/** End of rule token type; imaginary token type used only for
94 	 *  local, partial FOLLOW sets to indicate that the local FOLLOW
95 	 *  hit the end of rule.  During error recovery, the local FOLLOW
96 	 *  of a token reference may go beyond the end of the rule and have
97 	 *  to use FOLLOW(rule).  I have to just shift the token types to 2..n
98 	 *  rather than 1..n to accommodate this imaginary token in my bitsets.
99 	 *  If I didn't use a bitset implementation for runtime sets, I wouldn't
100 	 *  need this.  EOF is another candidate for a run time token type for
101 	 *  parsers.  Follow sets are not computed for lexers so we do not have
102 	 *  this issue.
103 	 */
104 	public static final int EOR_TOKEN_TYPE =
105 		org.antlr.runtime.Token.EOR_TOKEN_TYPE;
106 
107 	public static final int DOWN = org.antlr.runtime.Token.DOWN;
108 	public static final int UP = org.antlr.runtime.Token.UP;
109 
110     /** tokens and char range overlap; tokens are MIN_TOKEN_TYPE..n */
111 	public static final int MIN_TOKEN_TYPE =
112 		org.antlr.runtime.Token.MIN_TOKEN_TYPE;
113 
114     /** The wildcard '.' char atom implies all valid characters==UNICODE */
115     //public static final IntSet ALLCHAR = IntervalSet.of(MIN_CHAR_VALUE,MAX_CHAR_VALUE);
116 
117     /** The token type or character value; or, signifies special label. */
118     protected int label;
119 
120     /** A set of token types or character codes if label==SET */
121 	// TODO: try IntervalSet for everything
122     protected IntSet labelSet;
123 
Label(int label)124     public Label(int label) {
125         this.label = label;
126     }
127 
128     /** Make a set label */
Label(IntSet labelSet)129     public Label(IntSet labelSet) {
130 		if ( labelSet==null ) {
131 			this.label = SET;
132 			this.labelSet = IntervalSet.of(INVALID);
133 			return;
134 		}
135 		int singleAtom = labelSet.getSingleElement();
136         if ( singleAtom!=INVALID ) {
137             // convert back to a single atomic element if |labelSet|==1
138             label = singleAtom;
139             return;
140         }
141         this.label = SET;
142         this.labelSet = labelSet;
143     }
144 
145 	@Override
clone()146 	public Object clone() {
147 		Label l;
148 		try {
149 			l = (Label)super.clone();
150 			l.label = this.label;
151             l.labelSet = new IntervalSet();
152 			l.labelSet.addAll(this.labelSet);
153 		}
154 		catch (CloneNotSupportedException e) {
155 			throw new InternalError();
156 		}
157 		return l;
158 	}
159 
add(Label a)160 	public void add(Label a) {
161 		if ( isAtom() ) {
162 			labelSet = IntervalSet.of(label);
163 			label=SET;
164 			if ( a.isAtom() ) {
165 				labelSet.add(a.getAtom());
166 			}
167 			else if ( a.isSet() ) {
168 				labelSet.addAll(a.getSet());
169 			}
170 			else {
171 				throw new IllegalStateException("can't add element to Label of type "+label);
172 			}
173 			return;
174 		}
175 		if ( isSet() ) {
176 			if ( a.isAtom() ) {
177 				labelSet.add(a.getAtom());
178 			}
179 			else if ( a.isSet() ) {
180 				labelSet.addAll(a.getSet());
181 			}
182 			else {
183 				throw new IllegalStateException("can't add element to Label of type "+label);
184 			}
185 			return;
186 		}
187 		throw new IllegalStateException("can't add element to Label of type "+label);
188 	}
189 
isAtom()190     public boolean isAtom() {
191         return label>=MIN_ATOM_VALUE;
192     }
193 
isEpsilon()194     public boolean isEpsilon() {
195         return label==EPSILON;
196     }
197 
isSemanticPredicate()198 	public boolean isSemanticPredicate() {
199 		return false;
200 	}
201 
isAction()202 	public boolean isAction() {
203 		return false;
204 	}
205 
isSet()206     public boolean isSet() {
207         return label==SET;
208     }
209 
210     /** return the single atom label or INVALID if not a single atom */
getAtom()211     public int getAtom() {
212         if ( isAtom() ) {
213             return label;
214         }
215         return INVALID;
216     }
217 
getSet()218     public IntSet getSet() {
219         if ( label!=SET ) {
220             // convert single element to a set if they ask for it.
221             return IntervalSet.of(label);
222         }
223         return labelSet;
224     }
225 
setSet(IntSet set)226     public void setSet(IntSet set) {
227         label=SET;
228         labelSet = set;
229     }
230 
getSemanticContext()231     public SemanticContext getSemanticContext() {
232         return null;
233     }
234 
matches(int atom)235 	public boolean matches(int atom) {
236 		if ( label==atom ) {
237 			return true; // handle the single atom case efficiently
238 		}
239 		if ( isSet() ) {
240 			return labelSet.member(atom);
241 		}
242 		return false;
243 	}
244 
matches(IntSet set)245 	public boolean matches(IntSet set) {
246 		if ( isAtom() ) {
247 			return set.member(getAtom());
248 		}
249 		if ( isSet() ) {
250 			// matches if intersection non-nil
251 			return !getSet().and(set).isNil();
252 		}
253 		return false;
254 	}
255 
256 
matches(Label other)257 	public boolean matches(Label other) {
258 		if ( other.isSet() ) {
259 			return matches(other.getSet());
260 		}
261 		if ( other.isAtom() ) {
262 			return matches(other.getAtom());
263 		}
264 		return false;
265 	}
266 
267 	@Override
hashCode()268     public int hashCode() {
269         if (label==SET) {
270             return labelSet.hashCode();
271 		}
272 		else {
273 			return label;
274 		}
275 	}
276 
277 	// TODO: do we care about comparing set {A} with atom A? Doesn't now.
278 	@Override
equals(Object o)279 	public boolean equals(Object o) {
280 		if ( o==null ) {
281 			return false;
282 		}
283 		if ( this == o ) {
284 			return true; // equals if same object
285 		}
286 		// labels must be the same even if epsilon or set or sempred etc...
287         if ( label!=((Label)o).label ) {
288             return false;
289         }
290 		if ( label==SET ) {
291 			return this.labelSet.equals(((Label)o).labelSet);
292 		}
293 		return true;  // label values are same, so true
294     }
295 
296 	@Override
compareTo(Label o)297     public int compareTo(Label o) {
298         return this.label-o.label;
299     }
300 
301     /** Predicates are lists of AST nodes from the NFA created from the
302      *  grammar, but the same predicate could be cut/paste into multiple
303      *  places in the grammar.  I must compare the text of all the
304      *  predicates to truly answer whether {p1,p2} .equals {p1,p2}.
305      *  Unfortunately, I cannot rely on the AST.equals() to work properly
306      *  so I must do a brute force O(n^2) nested traversal of the Set
307      *  doing a String compare.
308      *
309      *  At this point, Labels are not compared for equals when they are
310      *  predicates, but here's the code for future use.
311      */
312     /*
313     protected boolean predicatesEquals(Set others) {
314         Iterator iter = semanticContext.iterator();
315         while (iter.hasNext()) {
316             AST predAST = (AST) iter.next();
317             Iterator inner = semanticContext.iterator();
318             while (inner.hasNext()) {
319                 AST otherPredAST = (AST) inner.next();
320                 if ( !predAST.getText().equals(otherPredAST.getText()) ) {
321                     return false;
322                 }
323             }
324         }
325         return true;
326     }
327       */
328 
329 	@Override
toString()330     public String toString() {
331         switch (label) {
332             case SET :
333                 return labelSet.toString();
334             default :
335                 return String.valueOf(label);
336         }
337     }
338 
toString(Grammar g)339     public String toString(Grammar g) {
340         switch (label) {
341             case SET :
342                 return labelSet.toString(g);
343             default :
344                 return g.getTokenDisplayName(label);
345         }
346     }
347 
348     /*
349     public String predicatesToString() {
350         if ( semanticContext==NFAConfiguration.DEFAULT_CLAUSE_SEMANTIC_CONTEXT ) {
351             return "!other preds";
352         }
353         StringBuffer buf = new StringBuffer();
354         Iterator iter = semanticContext.iterator();
355         while (iter.hasNext()) {
356             AST predAST = (AST) iter.next();
357             buf.append(predAST.getText());
358             if ( iter.hasNext() ) {
359                 buf.append("&");
360             }
361         }
362         return buf.toString();
363     }
364     */
365 
intersect(Label label, Label edgeLabel)366 	public static boolean intersect(Label label, Label edgeLabel) {
367 		boolean hasIntersection = false;
368 		boolean labelIsSet = label.isSet();
369 		boolean edgeIsSet = edgeLabel.isSet();
370 		if ( !labelIsSet && !edgeIsSet && edgeLabel.label==label.label ) {
371 			hasIntersection = true;
372 		}
373 		else if ( labelIsSet && edgeIsSet &&
374 				  !edgeLabel.getSet().and(label.getSet()).isNil() ) {
375 			hasIntersection = true;
376 		}
377 		else if ( labelIsSet && !edgeIsSet &&
378 				  label.getSet().member(edgeLabel.label) ) {
379 			hasIntersection = true;
380 		}
381 		else if ( !labelIsSet && edgeIsSet &&
382 				  edgeLabel.getSet().member(label.label) ) {
383 			hasIntersection = true;
384 		}
385 		return hasIntersection;
386 	}
387 }
388