1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.analysis;
29 
30 import org.antlr.grammar.v3.ANTLRParser;
31 import org.antlr.misc.IntSet;
32 import org.antlr.misc.IntervalSet;
33 import org.antlr.tool.Grammar;
34 import org.antlr.tool.Rule;
35 
36 import java.util.HashMap;
37 import java.util.HashSet;
38 import java.util.Map;
39 import java.util.Set;
40 
41 /**
42  * Created by IntelliJ IDEA.
43  * User: parrt
44  * Date: Dec 31, 2007
45  * Time: 1:31:16 PM
46  * To change this template use File | Settings | File Templates.
47  */
48 public class LL1Analyzer {
49 	/**	0	if we hit end of rule and invoker should keep going (epsilon) */
50 	public static final int DETECT_PRED_EOR = 0;
51 	/**	1	if we found a nonautobacktracking pred */
52 	public static final int DETECT_PRED_FOUND = 1;
53 	/**	2	if we didn't find such a pred */
54 	public static final int DETECT_PRED_NOT_FOUND = 2;
55 
56 	public Grammar grammar;
57 
58 	/** Used during LOOK to detect computation cycles */
59 	protected Set<NFAState> lookBusy = new HashSet<NFAState>();
60 
61 	public Map<NFAState, LookaheadSet> FIRSTCache = new HashMap<NFAState, LookaheadSet>();
62 	public Map<Rule, LookaheadSet> FOLLOWCache = new HashMap<Rule, LookaheadSet>();
63 
LL1Analyzer(Grammar grammar)64 	public LL1Analyzer(Grammar grammar) {
65 		this.grammar = grammar;
66 	}
67 
68 	/*
69 	public void computeRuleFIRSTSets() {
70 		if ( getNumberOfDecisions()==0 ) {
71 			createNFAs();
72 		}
73 		for (Iterator it = getRules().iterator(); it.hasNext();) {
74 			Rule r = (Rule)it.next();
75 			if ( r.isSynPred ) {
76 				continue;
77 			}
78 			LookaheadSet s = FIRST(r);
79 			System.out.println("FIRST("+r.name+")="+s);
80 		}
81 	}
82 	*/
83 
84 	/*
85 	public Set<String> getOverriddenRulesWithDifferentFIRST() {
86 		// walk every rule in this grammar and compare FIRST set with
87 		// those in imported grammars.
88 		Set<String> rules = new HashSet();
89 		for (Iterator it = getRules().iterator(); it.hasNext();) {
90 			Rule r = (Rule)it.next();
91 			//System.out.println(r.name+" FIRST="+r.FIRST);
92 			for (int i = 0; i < delegates.size(); i++) {
93 				Grammar g = delegates.get(i);
94 				Rule importedRule = g.getRule(r.name);
95 				if ( importedRule != null ) { // exists in imported grammar
96 					// System.out.println(r.name+" exists in imported grammar: FIRST="+importedRule.FIRST);
97 					if ( !r.FIRST.equals(importedRule.FIRST) ) {
98 						rules.add(r.name);
99 					}
100 				}
101 			}
102 		}
103 		return rules;
104 	}
105 
106 	public Set<Rule> getImportedRulesSensitiveToOverriddenRulesDueToLOOK() {
107 		Set<String> diffFIRSTs = getOverriddenRulesWithDifferentFIRST();
108 		Set<Rule> rules = new HashSet();
109 		for (Iterator it = diffFIRSTs.iterator(); it.hasNext();) {
110 			String r = (String) it.next();
111 			for (int i = 0; i < delegates.size(); i++) {
112 				Grammar g = delegates.get(i);
113 				Set<Rule> callers = g.ruleSensitivity.get(r);
114 				// somebody invokes rule whose FIRST changed in subgrammar?
115 				if ( callers!=null ) {
116 					rules.addAll(callers);
117 					//System.out.println(g.name+" rules "+callers+" sensitive to "+r+"; dup 'em");
118 				}
119 			}
120 		}
121 		return rules;
122 	}
123 */
124 
125 	/*
126 	public LookaheadSet LOOK(Rule r) {
127 		if ( r.FIRST==null ) {
128 			r.FIRST = FIRST(r.startState);
129 		}
130 		return r.FIRST;
131 	}
132 */
133 
134 	/** From an NFA state, s, find the set of all labels reachable from s.
135 	 *  Used to compute follow sets for error recovery.  Never computes
136 	 *  a FOLLOW operation.  FIRST stops at end of rules, returning EOR, unless
137 	 *  invoked from another rule.  I.e., routine properly handles
138 	 *
139 	 *     a : b A ;
140 	 *
141 	 *  where b is nullable.
142 	 *
143 	 *  We record with EOR_TOKEN_TYPE if we hit the end of a rule so we can
144 	 *  know at runtime (when these sets are used) to start walking up the
145 	 *  follow chain to compute the real, correct follow set (as opposed to
146 	 *  the FOLLOW, which is a superset).
147 	 *
148 	 *  This routine will only be used on parser and tree parser grammars.
149 	 */
FIRST(NFAState s)150 	public LookaheadSet FIRST(NFAState s) {
151 		//System.out.println("> FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule);
152 		lookBusy.clear();
153 		LookaheadSet look = _FIRST(s, false);
154 		//System.out.println("< FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule+"="+look.toString(this.grammar));
155 		return look;
156 	}
157 
FOLLOW(Rule r)158 	public LookaheadSet FOLLOW(Rule r) {
159         //System.out.println("> FOLLOW("+r.name+") in rule "+r.startState.enclosingRule);
160 		LookaheadSet f = FOLLOWCache.get(r);
161 		if ( f!=null ) {
162 			return f;
163 		}
164 		f = _FIRST(r.stopState, true);
165 		FOLLOWCache.put(r, f);
166         //System.out.println("< FOLLOW("+r+") in rule "+r.startState.enclosingRule+"="+f.toString(this.grammar));
167 		return f;
168 	}
169 
LOOK(NFAState s)170 	public LookaheadSet LOOK(NFAState s) {
171 		if ( NFAToDFAConverter.debug ) {
172 			System.out.println("> LOOK("+s+")");
173 		}
174 		lookBusy.clear();
175 		LookaheadSet look = _FIRST(s, true);
176 		// FOLLOW makes no sense (at the moment!) for lexical rules.
177 		if ( grammar.type!=Grammar.LEXER && look.member(Label.EOR_TOKEN_TYPE) ) {
178 			// avoid altering FIRST reset as it is cached
179 			LookaheadSet f = FOLLOW(s.enclosingRule);
180 			f.orInPlace(look);
181 			f.remove(Label.EOR_TOKEN_TYPE);
182 			look = f;
183 			//look.orInPlace(FOLLOW(s.enclosingRule));
184 		}
185 		else if ( grammar.type==Grammar.LEXER && look.member(Label.EOT) ) {
186 			// if this has EOT, lookahead is all char (all char can follow rule)
187 			//look = new LookaheadSet(Label.EOT);
188 			look = new LookaheadSet(IntervalSet.COMPLETE_SET);
189 		}
190 		if ( NFAToDFAConverter.debug ) {
191 			System.out.println("< LOOK("+s+")="+look.toString(grammar));
192 		}
193 		return look;
194 	}
195 
_FIRST(NFAState s, boolean chaseFollowTransitions)196 	protected LookaheadSet _FIRST(NFAState s, boolean chaseFollowTransitions) {
197 		/*
198 		System.out.println("_LOOK("+s+") in rule "+s.enclosingRule);
199 		if ( s.transition[0] instanceof RuleClosureTransition ) {
200 			System.out.println("go to rule "+((NFAState)s.transition[0].target).enclosingRule);
201 		}
202 		*/
203 		if ( !chaseFollowTransitions && s.isAcceptState() ) {
204 			if ( grammar.type==Grammar.LEXER ) {
205 				// FOLLOW makes no sense (at the moment!) for lexical rules.
206 				// assume all char can follow
207 				return new LookaheadSet(IntervalSet.COMPLETE_SET);
208 			}
209 			return new LookaheadSet(Label.EOR_TOKEN_TYPE);
210 		}
211 
212 		if ( lookBusy.contains(s) ) {
213 			// return a copy of an empty set; we may modify set inline
214 			return new LookaheadSet();
215 		}
216 		lookBusy.add(s);
217 
218 		Transition transition0 = s.transition[0];
219 		if ( transition0==null ) {
220 			return null;
221 		}
222 
223 		if ( transition0.label.isAtom() ) {
224 			int atom = transition0.label.getAtom();
225 			return new LookaheadSet(atom);
226 		}
227 		if ( transition0.label.isSet() ) {
228 			IntSet sl = transition0.label.getSet();
229 			return new LookaheadSet(sl);
230 		}
231 
232 		// compute FIRST of transition 0
233 		LookaheadSet tset = null;
234 		// if transition 0 is a rule call and we don't want FOLLOW, check cache
235 		if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) {
236 			tset = FIRSTCache.get((NFAState)transition0.target);
237 		}
238 
239 		// if not in cache, must compute
240 		if ( tset==null ) {
241 			tset = _FIRST((NFAState)transition0.target, chaseFollowTransitions);
242 			// save FIRST cache for transition 0 if rule call
243 			if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) {
244 				FIRSTCache.put((NFAState)transition0.target, tset);
245 			}
246 		}
247 
248         LookaheadSet tsetCached = tset; // tset is stored in cache. We can't return the same instance
249 
250 		// did we fall off the end?
251 		if ( grammar.type!=Grammar.LEXER && tset.member(Label.EOR_TOKEN_TYPE) ) {
252 			if ( transition0 instanceof RuleClosureTransition ) {
253 				// we called a rule that found the end of the rule.
254 				// That means the rule is nullable and we need to
255 				// keep looking at what follows the rule ref.  E.g.,
256 				// a : b A ; where b is nullable means that LOOK(a)
257 				// should include A.
258 				RuleClosureTransition ruleInvocationTrans =
259 					(RuleClosureTransition)transition0;
260 				// remove the EOR and get what follows
261 				//tset.remove(Label.EOR_TOKEN_TYPE);
262 				NFAState following = ruleInvocationTrans.followState;
263 				LookaheadSet fset =	_FIRST(following, chaseFollowTransitions);
264 				fset.orInPlace(tset); // tset cached; or into new set
265 				fset.remove(Label.EOR_TOKEN_TYPE);
266 				tset = fset;
267 			}
268 		}
269 
270 		Transition transition1 = s.transition[1];
271 		if ( transition1!=null ) {
272 			LookaheadSet tset1 =
273 				_FIRST((NFAState)transition1.target, chaseFollowTransitions);
274 			tset1.orInPlace(tset);
275 			tset = tset1;
276 		}
277 
278 		// never return a cached set; clone
279 		return tset==tsetCached ? new LookaheadSet(tset) : tset;
280 	}
281 
282 	/** Is there a non-syn-pred predicate visible from s that is not in
283 	 *  the rule enclosing s?  This accounts for most predicate situations
284 	 *  and lets ANTLR do a simple LL(1)+pred computation.
285 	 *
286 	 *  TODO: what about gated vs regular preds?
287 	 */
detectConfoundingPredicates(NFAState s)288 	public boolean detectConfoundingPredicates(NFAState s) {
289 		lookBusy.clear();
290 		Rule r = s.enclosingRule;
291 		return _detectConfoundingPredicates(s, r, false) == DETECT_PRED_FOUND;
292 	}
293 
_detectConfoundingPredicates(NFAState s, Rule enclosingRule, boolean chaseFollowTransitions)294 	protected int _detectConfoundingPredicates(NFAState s,
295 											   Rule enclosingRule,
296 											   boolean chaseFollowTransitions)
297 	{
298 		//System.out.println("_detectNonAutobacktrackPredicates("+s+")");
299 		if ( !chaseFollowTransitions && s.isAcceptState() ) {
300 			if ( grammar.type==Grammar.LEXER ) {
301 				// FOLLOW makes no sense (at the moment!) for lexical rules.
302 				// assume all char can follow
303 				return DETECT_PRED_NOT_FOUND;
304 			}
305 			return DETECT_PRED_EOR;
306 		}
307 
308 		if ( lookBusy.contains(s) ) {
309 			// return a copy of an empty set; we may modify set inline
310 			return DETECT_PRED_NOT_FOUND;
311 		}
312 		lookBusy.add(s);
313 
314 		Transition transition0 = s.transition[0];
315 		if ( transition0==null ) {
316 			return DETECT_PRED_NOT_FOUND;
317 		}
318 
319 		if ( !(transition0.label.isSemanticPredicate()||
320 			   transition0.label.isEpsilon()) ) {
321 			return DETECT_PRED_NOT_FOUND;
322 		}
323 
324 		if ( transition0.label.isSemanticPredicate() ) {
325 			//System.out.println("pred "+transition0.label);
326 			SemanticContext ctx = transition0.label.getSemanticContext();
327 			SemanticContext.Predicate p = (SemanticContext.Predicate)ctx;
328 			if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED ) {
329 				return DETECT_PRED_FOUND;
330 			}
331 		}
332 
333 		/*
334 		if ( transition0.label.isSemanticPredicate() ) {
335 			System.out.println("pred "+transition0.label);
336 			SemanticContext ctx = transition0.label.getSemanticContext();
337 			SemanticContext.Predicate p = (SemanticContext.Predicate)ctx;
338 			// if a non-syn-pred found not in enclosingRule, say we found one
339 			if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED &&
340 				 !p.predicateAST.enclosingRuleName.equals(enclosingRule.name) )
341 			{
342 				System.out.println("found pred "+p+" not in "+enclosingRule.name);
343 				return DETECT_PRED_FOUND;
344 			}
345 		}
346 		*/
347 
348 		int result = _detectConfoundingPredicates((NFAState)transition0.target,
349 												  enclosingRule,
350 												  chaseFollowTransitions);
351 		if ( result == DETECT_PRED_FOUND ) {
352 			return DETECT_PRED_FOUND;
353 		}
354 
355 		if ( result == DETECT_PRED_EOR ) {
356 			if ( transition0 instanceof RuleClosureTransition ) {
357 				// we called a rule that found the end of the rule.
358 				// That means the rule is nullable and we need to
359 				// keep looking at what follows the rule ref.  E.g.,
360 				// a : b A ; where b is nullable means that LOOK(a)
361 				// should include A.
362 				RuleClosureTransition ruleInvocationTrans =
363 					(RuleClosureTransition)transition0;
364 				NFAState following = ruleInvocationTrans.followState;
365 				int afterRuleResult =
366 					_detectConfoundingPredicates(following,
367 												 enclosingRule,
368 												 chaseFollowTransitions);
369 				if ( afterRuleResult == DETECT_PRED_FOUND ) {
370 					return DETECT_PRED_FOUND;
371 				}
372 			}
373 		}
374 
375 		Transition transition1 = s.transition[1];
376 		if ( transition1!=null ) {
377 			int t1Result =
378 				_detectConfoundingPredicates((NFAState)transition1.target,
379 											 enclosingRule,
380 											 chaseFollowTransitions);
381 			if ( t1Result == DETECT_PRED_FOUND ) {
382 				return DETECT_PRED_FOUND;
383 			}
384 		}
385 
386 		return DETECT_PRED_NOT_FOUND;
387 	}
388 
389 	/** Return predicate expression found via epsilon edges from s.  Do
390 	 *  not look into other rules for now.  Do something simple.  Include
391 	 *  backtracking synpreds.
392 	 */
getPredicates(NFAState altStartState)393 	public SemanticContext getPredicates(NFAState altStartState) {
394 		lookBusy.clear();
395 		return _getPredicates(altStartState, altStartState);
396 	}
397 
_getPredicates(NFAState s, NFAState altStartState)398 	protected SemanticContext _getPredicates(NFAState s, NFAState altStartState) {
399 		//System.out.println("_getPredicates("+s+")");
400 		if ( s.isAcceptState() ) {
401 			return null;
402 		}
403 
404 		// avoid infinite loops from (..)* etc...
405 		if ( lookBusy.contains(s) ) {
406 			return null;
407 		}
408 		lookBusy.add(s);
409 
410 		Transition transition0 = s.transition[0];
411 		// no transitions
412 		if ( transition0==null ) {
413 			return null;
414 		}
415 
416 		// not a predicate and not even an epsilon
417 		if ( !(transition0.label.isSemanticPredicate()||
418 			   transition0.label.isEpsilon()) ) {
419 			return null;
420 		}
421 
422 		SemanticContext p = null;
423 		SemanticContext p0;
424 		SemanticContext p1 = null;
425 		if ( transition0.label.isSemanticPredicate() ) {
426 			//System.out.println("pred "+transition0.label);
427 			p = transition0.label.getSemanticContext();
428 			// ignore backtracking preds not on left edge for this decision
429 			if ( ((SemanticContext.Predicate)p).predicateAST.getType() ==
430 				  ANTLRParser.BACKTRACK_SEMPRED  &&
431 				 s == altStartState.transition[0].target )
432 			{
433 				p = null; // don't count
434 			}
435 		}
436 
437 		// get preds from beyond this state
438 		p0 = _getPredicates((NFAState)transition0.target, altStartState);
439 
440 		// get preds from other transition
441 		Transition transition1 = s.transition[1];
442 		if ( transition1!=null ) {
443 			p1 = _getPredicates((NFAState)transition1.target, altStartState);
444 		}
445 
446 		// join this&following-right|following-down
447 		return SemanticContext.and(p,SemanticContext.or(p0,p1));
448 	}
449 }
450