1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.analysis;
29 
30 import org.antlr.grammar.v3.ANTLRParser;
31 import org.antlr.misc.MultiMap;
32 import org.antlr.misc.Utils;
33 import org.antlr.runtime.Token;
34 import org.antlr.tool.ErrorManager;
35 import org.antlr.tool.Grammar;
36 import org.antlr.tool.GrammarAST;
37 
38 import java.util.*;
39 
40 /** Collection of information about what is wrong with a decision as
41  *  discovered while building the DFA predictor.
42  *
43  *  The information is collected during NFA→DFA conversion and, while
44  *  some of this is available elsewhere, it is nice to have it all tracked
45  *  in one spot so a great error message can be easily had.  I also like
46  *  the fact that this object tracks it all for later perusing to make an
47  *  excellent error message instead of lots of imprecise on-the-fly warnings
48  *  (during conversion).
49  *
50  *  A decision normally only has one problem; e.g., some input sequence
51  *  can be matched by multiple alternatives.  Unfortunately, some decisions
52  *  such as
53  *
54  *  a : ( A | B ) | ( A | B ) | A ;
55  *
56  *  have multiple problems.  So in general, you should approach a decision
57  *  as having multiple flaws each one uniquely identified by a DFAState.
58  *  For example, statesWithSyntacticallyAmbiguousAltsSet tracks the set of
59  *  all DFAStates where ANTLR has discovered a problem.  Recall that a decision
60  *  is represented internall with a DFA comprised of multiple states, each of
61  *  which could potentially have problems.
62  *
63  *  Because of this, you need to iterate over this list of DFA states.  You'll
64  *  note that most of the informational methods like
65  *  getSampleNonDeterministicInputSequence() require a DFAState.  This state
66  *  will be one of the iterated states from stateToSyntacticallyAmbiguousAltsSet.
67  *
68  *  This class is not thread safe due to shared use of visited maps etc...
69  *  Only one thread should really need to access one DecisionProbe anyway.
70  */
71 public class DecisionProbe {
72 	public DFA dfa;
73 
74 	/** Track all DFA states with nondeterministic alternatives.
75 	 *  By reaching the same DFA state, a path through the NFA for some input
76 	 *  is able to reach the same NFA state by starting at more than one
77 	 *  alternative's left edge.  Though, later, we may find that predicates
78 	 *  resolve the issue, but track info anyway.
79 	 *  Note that from the DFA state, you can ask for
80 	 *  which alts are nondeterministic.
81 	 */
82 	protected Set<DFAState> statesWithSyntacticallyAmbiguousAltsSet = new HashSet<DFAState>();
83 
84 	/** Track just like stateToSyntacticallyAmbiguousAltsMap, but only
85 	 *  for nondeterminisms that arise in the Tokens rule such as keyword vs
86 	 *  ID rule.  The state maps to the list of Tokens rule alts that are
87 	 *  in conflict.
88 	 */
89 	protected Map<DFAState, Set<Integer>> stateToSyntacticallyAmbiguousTokensRuleAltsMap =
90 		new HashMap<DFAState, Set<Integer>>();
91 
92 	/** Was a syntactic ambiguity resolved with predicates?  Any DFA
93 	 *  state that predicts more than one alternative, must be resolved
94 	 *  with predicates or it should be reported to the user.
95 	 */
96 	protected Set<DFAState> statesResolvedWithSemanticPredicatesSet = new HashSet<DFAState>();
97 
98 	/** Track the predicates for each alt per DFA state;
99 	 *  more than one DFA state might have syntactically ambig alt prediction.
100 	 *  Maps DFA state to another map, mapping alt number to a
101 	 *  SemanticContext (pred(s) to execute to resolve syntactic ambiguity).
102 	 */
103 	protected Map<DFAState, Map<Integer,SemanticContext>> stateToAltSetWithSemanticPredicatesMap =
104 		new HashMap<DFAState, Map<Integer,SemanticContext>>();
105 
106 	/** Tracks alts insufficiently covered.
107 	 *  For example, p1||true gets reduced to true and so leaves
108 	 *  whole alt uncovered.  This maps DFA state to the set of alts
109 	 */
110 	protected Map<DFAState,Map<Integer, Set<Token>>> stateToIncompletelyCoveredAltsMap =
111 		new HashMap<DFAState,Map<Integer, Set<Token>>>();
112 
113 	/** The set of states w/o emanating edges and w/o resolving sem preds. */
114 	protected Set<DFAState> danglingStates = new HashSet<DFAState>();
115 
116 	/** The overall list of alts within the decision that have at least one
117 	 *  conflicting input sequence.
118 	 */
119 	protected Set<Integer> altsWithProblem = new HashSet<Integer>();
120 
121 	/** If decision with &gt; 1 alt has recursion in &gt; 1 alt, it's (likely) nonregular
122 	 *  lookahead.  The decision cannot be made with a DFA.
123 	 *  the alts are stored in altsWithProblem.
124 	 */
125 	public boolean nonLLStarDecision = false;
126 
127 	/** Recursion is limited to a particular depth.  If that limit is exceeded
128 	 *  the proposed new NFAConfiguration is recorded for the associated DFA state.
129 	 */
130 	protected MultiMap<Integer, NFAConfiguration> stateToRecursionOverflowConfigurationsMap =
131 		new MultiMap<Integer, NFAConfiguration>();
132 	/*
133 	protected Map<Integer, List<NFAConfiguration>> stateToRecursionOverflowConfigurationsMap =
134 		new HashMap<Integer, List<NFAConfiguration>>();
135 		*/
136 
137 	/** Left recursion discovered.  The proposed new NFAConfiguration
138 	 *  is recorded for the associated DFA state.
139 	protected Map<Integer,List<NFAConfiguration>> stateToLeftRecursiveConfigurationsMap =
140 		new HashMap<Integer,List<NFAConfiguration>>();
141 	 */
142 
143 	/** Did ANTLR have to terminate early on the analysis of this decision? */
144 	protected boolean timedOut = false;
145 
146 	/** Used to find paths through syntactically ambiguous DFA. If we've
147 	 *  seen statement number before, what did we learn?
148 	 */
149 	protected Map<Integer, Integer> stateReachable;
150 
151 	public static final Integer REACHABLE_BUSY = Utils.integer(-1);
152 	public static final Integer REACHABLE_NO = Utils.integer(0);
153 	public static final Integer REACHABLE_YES = Utils.integer(1);
154 
155 	/** Used while finding a path through an NFA whose edge labels match
156 	 *  an input sequence.  Tracks the input position
157 	 *  we were at the last time at this node.  If same input position, then
158 	 *  we'd have reached same state without consuming input...probably an
159 	 *  infinite loop.  Stop.  Set&lt;String&gt;.  The strings look like
160 	 *  stateNumber_labelIndex.
161 	 */
162 	protected Set<String> statesVisitedAtInputDepth;
163 
164 	protected Set<Integer> statesVisitedDuringSampleSequence;
165 
166 	public static boolean verbose = false;
167 
DecisionProbe(DFA dfa)168 	public DecisionProbe(DFA dfa) {
169 		this.dfa = dfa;
170 	}
171 
172 	// I N F O R M A T I O N  A B O U T  D E C I S I O N
173 
174 	/** Return a string like "3:22: ( A {;} | B )" that describes this
175 	 *  decision.
176 	 */
getDescription()177 	public String getDescription() {
178 		return dfa.getNFADecisionStartState().getDescription();
179 	}
180 
isReduced()181 	public boolean isReduced() {
182 		return dfa.isReduced();
183 	}
184 
isCyclic()185 	public boolean isCyclic() {
186 		return dfa.isCyclic();
187 	}
188 
189 	/** If no states are dead-ends, no alts are unreachable, there are
190 	 *  no nondeterminisms unresolved by syn preds, all is ok with decision.
191 	 */
isDeterministic()192 	public boolean isDeterministic() {
193 		if ( danglingStates.isEmpty() &&
194 			 statesWithSyntacticallyAmbiguousAltsSet.isEmpty() &&
195 			 dfa.getUnreachableAlts().isEmpty() )
196 		{
197 			return true;
198 		}
199 
200 		if ( statesWithSyntacticallyAmbiguousAltsSet.size()>0 ) {
201 			for (DFAState d : statesWithSyntacticallyAmbiguousAltsSet) {
202 				if ( !statesResolvedWithSemanticPredicatesSet.contains(d) ) {
203 					return false;
204 				}
205 			}
206 			// no syntactically ambig alts were left unresolved by predicates
207 			return true;
208 		}
209 		return false;
210 	}
211 
212 	/** Did the analysis complete it's work? */
213 //	public boolean analysisTimedOut() {
214 //		return timedOut;
215 //	}
216 
217 	/** Took too long to analyze a DFA */
analysisOverflowed()218 	public boolean analysisOverflowed() {
219 		return stateToRecursionOverflowConfigurationsMap.size()>0;
220 	}
221 
222 	/** Found recursion in &gt; 1 alt */
isNonLLStarDecision()223 	public boolean isNonLLStarDecision() {
224 		return nonLLStarDecision;
225 	}
226 
227 	/** How many states does the DFA predictor have? */
getNumberOfStates()228 	public int getNumberOfStates() {
229 		return dfa.getNumberOfStates();
230 	}
231 
232 	/** Get a list of all unreachable alternatives for this decision.  There
233 	 *  may be multiple alternatives with ambiguous input sequences, but this
234 	 *  is the overall list of unreachable alternatives (either due to
235 	 *  conflict resolution or alts w/o accept states).
236 	 */
getUnreachableAlts()237 	public List<Integer> getUnreachableAlts() {
238 		return dfa.getUnreachableAlts();
239 	}
240 
241 	/** return set of states w/o emanating edges and w/o resolving sem preds.
242 	 *  These states come about because the analysis algorithm had to
243 	 *  terminate early to avoid infinite recursion for example (due to
244 	 *  left recursion perhaps).
245 	 */
getDanglingStates()246 	public Set<DFAState> getDanglingStates() {
247 		return danglingStates;
248 	}
249 
getNonDeterministicAlts()250     public Set<Integer> getNonDeterministicAlts() {
251         return altsWithProblem;
252 	}
253 
254 	/** Return the sorted list of alts that conflict within a single state.
255 	 *  Note that predicates may resolve the conflict.
256 	 */
getNonDeterministicAltsForState(DFAState targetState)257 	public List<Integer> getNonDeterministicAltsForState(DFAState targetState) {
258 		Set<Integer> nondetAlts = targetState.getNonDeterministicAlts();
259 		if ( nondetAlts==null ) {
260 			return null;
261 		}
262 		List<Integer> sorted = new LinkedList<Integer>();
263 		sorted.addAll(nondetAlts);
264 		Collections.sort(sorted); // make sure it's 1, 2, ...
265 		return sorted;
266 	}
267 
268 	/** Return all DFA states in this DFA that have NFA configurations that
269 	 *  conflict.  You must report a problem for each state in this set
270 	 *  because each state represents a different input sequence.
271 	 */
getDFAStatesWithSyntacticallyAmbiguousAlts()272 	public Set<DFAState> getDFAStatesWithSyntacticallyAmbiguousAlts() {
273 		return statesWithSyntacticallyAmbiguousAltsSet;
274 	}
275 
276 	/** Which alts were specifically turned off to resolve nondeterminisms?
277 	 *  This is different than the unreachable alts.  Disabled doesn't mean that
278 	 *  the alternative is totally unreachable necessarily, it just means
279 	 *  that for this DFA state, that alt is disabled.  There may be other
280 	 *  accept states for that alt that make an alt reachable.
281 	 */
getDisabledAlternatives(DFAState d)282 	public Set<Integer> getDisabledAlternatives(DFAState d) {
283 		return d.getDisabledAlternatives();
284 	}
285 
286 	/** If a recursion overflow is resolve with predicates, then we need
287 	 *  to shut off the warning that would be generated.
288 	 */
removeRecursiveOverflowState(DFAState d)289 	public void removeRecursiveOverflowState(DFAState d) {
290 		Integer stateI = Utils.integer(d.stateNumber);
291 		stateToRecursionOverflowConfigurationsMap.remove(stateI);
292 	}
293 
294 	/** Return a List&lt;Label&gt; indicating an input sequence that can be matched
295 	 *  from the start state of the DFA to the targetState (which is known
296 	 *  to have a problem).
297 	 */
getSampleNonDeterministicInputSequence(DFAState targetState)298 	public List<Label> getSampleNonDeterministicInputSequence(DFAState targetState) {
299 		Set<DFAState> dfaStates = getDFAPathStatesToTarget(targetState);
300 		statesVisitedDuringSampleSequence = new HashSet<Integer>();
301 		List<Label> labels = new ArrayList<Label>(); // may access ith element; use array
302 		if ( dfa==null || dfa.startState==null ) {
303 			return labels;
304 		}
305 		getSampleInputSequenceUsingStateSet(dfa.startState,
306 											targetState,
307 											dfaStates,
308 											labels);
309 		return labels;
310 	}
311 
312 	/** Given List&lt;Label&gt;, return a String with a useful representation
313 	 *  of the associated input string.  One could show something different
314 	 *  for lexers and parsers, for example.
315 	 */
getInputSequenceDisplay(List<? extends Label> labels)316 	public String getInputSequenceDisplay(List<? extends Label> labels) {
317         Grammar g = dfa.nfa.grammar;
318 		StringBuilder buf = new StringBuilder();
319 		for (Iterator<? extends Label> it = labels.iterator(); it.hasNext();) {
320 			Label label = it.next();
321 			buf.append(label.toString(g));
322 			if ( it.hasNext() && g.type!=Grammar.LEXER ) {
323 				buf.append(' ');
324 			}
325 		}
326 		return buf.toString();
327 	}
328 
329     /** Given an alternative associated with a nondeterministic DFA state,
330 	 *  find the path of NFA states associated with the labels sequence.
331 	 *  Useful tracing where in the NFA, a single input sequence can be
332 	 *  matched.  For different alts, you should get different NFA paths.
333 	 *
334 	 *  The first NFA state for all NFA paths will be the same: the starting
335 	 *  NFA state of the first nondeterministic alt.  Imagine (A|B|A|A):
336 	 *
337 	 * 	5-&gt;9-A-&gt;o
338 	 *  |
339 	 *  6-&gt;10-B-&gt;o
340 	 *  |
341 	 *  7-&gt;11-A-&gt;o
342 	 *  |
343 	 *  8-&gt;12-A-&gt;o
344 	 *
345 	 *  There are 3 nondeterministic alts.  The paths should be:
346 	 *  5 9 ...
347 	 *  5 6 7 11 ...
348 	 *  5 6 7 8 12 ...
349 	 *
350 	 *  The NFA path matching the sample input sequence (labels) is computed
351 	 *  using states 9, 11, and 12 rather than 5, 7, 8 because state 5, for
352 	 *  example can get to all ambig paths.  Must isolate for each alt (hence,
353 	 *  the extra state beginning each alt in my NFA structures).  Here,
354 	 *  firstAlt=1.
355 	 */
getNFAPathStatesForAlt(int firstAlt, int alt, List<? extends Label> labels)356 	public List<? extends NFAState> getNFAPathStatesForAlt(int firstAlt,
357 									   int alt,
358 									   List<? extends Label> labels)
359 	{
360 		NFAState nfaStart = dfa.getNFADecisionStartState();
361 		List<NFAState> path = new LinkedList<NFAState>();
362 		// first add all NFA states leading up to altStart state
363 		for (int a=firstAlt; a<=alt; a++) {
364 			NFAState s =
365 				dfa.nfa.grammar.getNFAStateForAltOfDecision(nfaStart,a);
366 			path.add(s);
367 		}
368 
369 		// add first state of actual alt
370 		NFAState altStart = dfa.nfa.grammar.getNFAStateForAltOfDecision(nfaStart,alt);
371 		NFAState isolatedAltStart = (NFAState)altStart.transition[0].target;
372 		path.add(isolatedAltStart);
373 
374 		// add the actual path now
375 		statesVisitedAtInputDepth = new HashSet<String>();
376 		getNFAPath(isolatedAltStart,
377 				   0,
378 				   labels,
379 				   path);
380         return path;
381 	}
382 
383 	/** Each state in the DFA represents a different input sequence for an
384 	 *  alt of the decision.  Given a DFA state, what is the semantic
385 	 *  predicate context for a particular alt.
386 	 */
getSemanticContextForAlt(DFAState d, int alt)387     public SemanticContext getSemanticContextForAlt(DFAState d, int alt) {
388 		Map<Integer, SemanticContext> altToPredMap = stateToAltSetWithSemanticPredicatesMap.get(d);
389 		if ( altToPredMap==null ) {
390 			return null;
391 		}
392 		return altToPredMap.get(Utils.integer(alt));
393 	}
394 
395 	/** At least one alt refs a sem or syn pred */
hasPredicate()396 	public boolean hasPredicate() {
397 		return stateToAltSetWithSemanticPredicatesMap.size()>0;
398 	}
399 
getNondeterministicStatesResolvedWithSemanticPredicate()400 	public Set<DFAState> getNondeterministicStatesResolvedWithSemanticPredicate() {
401 		return statesResolvedWithSemanticPredicatesSet;
402 	}
403 
404 	/** Return a list of alts whose predicate context was insufficient to
405 	 *  resolve a nondeterminism for state d.
406 	 */
getIncompletelyCoveredAlts(DFAState d)407 	public Map<Integer, Set<Token>> getIncompletelyCoveredAlts(DFAState d) {
408 		return stateToIncompletelyCoveredAltsMap.get(d);
409 	}
410 
issueWarnings()411 	public void issueWarnings() {
412 		// NONREGULAR DUE TO RECURSION > 1 ALTS
413 		// Issue this before aborted analysis, which might also occur
414 		// if we take too long to terminate
415 		if ( nonLLStarDecision && !dfa.getAutoBacktrackMode() ) {
416 			ErrorManager.nonLLStarDecision(this);
417 		}
418 
419 		issueRecursionWarnings();
420 
421 		// generate a separate message for each problem state in DFA
422 		Set<DFAState> resolvedStates = getNondeterministicStatesResolvedWithSemanticPredicate();
423 		Set<DFAState> problemStates = getDFAStatesWithSyntacticallyAmbiguousAlts();
424 		if ( problemStates.size()>0 ) {
425 			Iterator<DFAState> it =
426 				problemStates.iterator();
427 			while (	it.hasNext() && !dfa.nfa.grammar.NFAToDFAConversionExternallyAborted() ) {
428 				DFAState d = it.next();
429 				Map<Integer, Set<Token>> insufficientAltToLocations = getIncompletelyCoveredAlts(d);
430 				if ( insufficientAltToLocations!=null && insufficientAltToLocations.size()>0 ) {
431 					ErrorManager.insufficientPredicates(this,d,insufficientAltToLocations);
432 				}
433 				// don't report problem if resolved
434 				if ( resolvedStates==null || !resolvedStates.contains(d) ) {
435 					// first strip last alt from disableAlts if it's wildcard
436 					// then don't print error if no more disable alts
437 					Set<Integer> disabledAlts = getDisabledAlternatives(d);
438 					stripWildCardAlts(disabledAlts);
439 					if ( disabledAlts.size()>0 ) {
440 						// nondeterminism; same input predicts multiple alts.
441 						// but don't emit error if greedy=true explicitly set
442 						boolean explicitlyGreedy = false;
443 						GrammarAST blockAST =
444 							d.dfa.nfa.grammar.getDecisionBlockAST(d.dfa.decisionNumber);
445 						if ( blockAST!=null ) {
446 							String greedyS = (String)blockAST.getBlockOption("greedy");
447 							if ( greedyS!=null && greedyS.equals("true") ) explicitlyGreedy = true;
448 						}
449 						if ( !explicitlyGreedy) ErrorManager.nondeterminism(this,d);
450 					}
451 				}
452 			}
453 		}
454 
455 		Set<DFAState> danglingStates = getDanglingStates();
456 		if ( danglingStates.size()>0 ) {
457 			//System.err.println("no emanating edges for states: "+danglingStates);
458 			for (DFAState d : danglingStates) {
459 				ErrorManager.danglingState(this,d);
460 			}
461 		}
462 
463 		if ( !nonLLStarDecision ) {
464 			List<Integer> unreachableAlts = dfa.getUnreachableAlts();
465 			if ( unreachableAlts!=null && unreachableAlts.size()>0 ) {
466 				// give different msg if it's an empty Tokens rule from delegate
467 				boolean isInheritedTokensRule = false;
468 				if ( dfa.isTokensRuleDecision() ) {
469 					for (Integer altI : unreachableAlts) {
470 						GrammarAST decAST = dfa.getDecisionASTNode();
471 						GrammarAST altAST = (GrammarAST)decAST.getChild(altI-1);
472 						GrammarAST delegatedTokensAlt =
473 							(GrammarAST)altAST.getFirstChildWithType(ANTLRParser.DOT);
474 						if ( delegatedTokensAlt !=null ) {
475 							isInheritedTokensRule = true;
476 							ErrorManager.grammarWarning(ErrorManager.MSG_IMPORTED_TOKENS_RULE_EMPTY,
477 														dfa.nfa.grammar,
478 														null,
479 														dfa.nfa.grammar.name,
480 														delegatedTokensAlt.getChild(0).getText());
481 						}
482 					}
483 				}
484 				if ( isInheritedTokensRule ) {
485 				}
486 				else {
487 					ErrorManager.unreachableAlts(this,unreachableAlts);
488 				}
489 			}
490 		}
491 	}
492 
493 	/** Get the last disabled alt number and check in the grammar to see
494 	 *  if that alt is a simple wildcard.  If so, treat like an else clause
495 	 *  and don't emit the error.  Strip out the last alt if it's wildcard.
496 	 */
stripWildCardAlts(Set<Integer> disabledAlts)497 	protected void stripWildCardAlts(Set<Integer> disabledAlts) {
498 		List<Integer> sortedDisableAlts = new ArrayList<Integer>(disabledAlts);
499 		Collections.sort(sortedDisableAlts);
500 		Integer lastAlt =
501 			sortedDisableAlts.get(sortedDisableAlts.size()-1);
502 		GrammarAST blockAST =
503 			dfa.nfa.grammar.getDecisionBlockAST(dfa.decisionNumber);
504 		//System.out.println("block with error = "+blockAST.toStringTree());
505 		GrammarAST lastAltAST;
506 		if ( blockAST.getChild(0).getType()==ANTLRParser.OPTIONS ) {
507 			// if options, skip first child: ( options { ( = greedy false ) )
508 			lastAltAST = (GrammarAST)blockAST.getChild(lastAlt.intValue());
509 		}
510 		else {
511 			lastAltAST = (GrammarAST)blockAST.getChild(lastAlt -1);
512 		}
513 		//System.out.println("last alt is "+lastAltAST.toStringTree());
514 		// if last alt looks like ( ALT . <end-of-alt> ) then wildcard
515 		// Avoid looking at optional blocks etc... that have last alt
516 		// as the EOB:
517 		// ( BLOCK ( ALT 'else' statement <end-of-alt> ) <end-of-block> )
518 		if ( lastAltAST.getType()!=ANTLRParser.EOB &&
519 			 lastAltAST.getChild(0).getType()== ANTLRParser.WILDCARD &&
520 			 lastAltAST.getChild(1).getType()== ANTLRParser.EOA )
521 		{
522 			//System.out.println("wildcard");
523 			disabledAlts.remove(lastAlt);
524 		}
525 	}
526 
issueRecursionWarnings()527 	protected void issueRecursionWarnings() {
528 		// RECURSION OVERFLOW
529 		Set<Integer> dfaStatesWithRecursionProblems =
530 			stateToRecursionOverflowConfigurationsMap.keySet();
531 		// now walk truly unique (unaliased) list of dfa states with inf recur
532 		// Goal: create a map from alt to map<target,List<callsites>>
533 		// Map<Map<String target, List<NFAState call sites>>
534 		Map<Integer, Map<String, Set<NFAState>>> altToTargetToCallSitesMap =
535 			new HashMap<Integer, Map<String, Set<NFAState>>>();
536 		// track a single problem DFA state for each alt
537 		Map<Integer, DFAState> altToDFAState = new HashMap<Integer, DFAState>();
538 		computeAltToProblemMaps(dfaStatesWithRecursionProblems,
539 								stateToRecursionOverflowConfigurationsMap,
540 								altToTargetToCallSitesMap, // output param
541 								altToDFAState);            // output param
542 
543 		// walk each alt with recursion overflow problems and generate error
544 		Set<Integer> alts = altToTargetToCallSitesMap.keySet();
545 		List<Integer> sortedAlts = new ArrayList<Integer>(alts);
546 		Collections.sort(sortedAlts);
547 		for (Integer altI : sortedAlts) {
548 			Map<String, Set<NFAState>> targetToCallSiteMap =
549 				altToTargetToCallSitesMap.get(altI);
550 			Set<String> targetRules = targetToCallSiteMap.keySet();
551 			Collection<Set<NFAState>> callSiteStates = targetToCallSiteMap.values();
552 			DFAState sampleBadState = altToDFAState.get(altI);
553 			ErrorManager.recursionOverflow(this,
554 										   sampleBadState,
555 										   altI,
556 										   targetRules,
557 										   callSiteStates);
558 		}
559 	}
560 
computeAltToProblemMaps(Set<Integer> dfaStatesUnaliased, Map<Integer, List<NFAConfiguration>> configurationsMap, Map<Integer, Map<String, Set<NFAState>>> altToTargetToCallSitesMap, Map<Integer, DFAState> altToDFAState)561 	private void computeAltToProblemMaps(Set<Integer> dfaStatesUnaliased,
562 										 Map<Integer, List<NFAConfiguration>> configurationsMap,
563 										 Map<Integer, Map<String, Set<NFAState>>> altToTargetToCallSitesMap,
564 										 Map<Integer, DFAState> altToDFAState)
565 	{
566 		for (Integer stateI : dfaStatesUnaliased) {
567 			// walk this DFA's config list
568 			List<? extends NFAConfiguration> configs = configurationsMap.get(stateI);
569 			for (int i = 0; i < configs.size(); i++) {
570 				NFAConfiguration c = configs.get(i);
571 				NFAState ruleInvocationState = dfa.nfa.getState(c.state);
572 				Transition transition0 = ruleInvocationState.transition[0];
573 				RuleClosureTransition ref = (RuleClosureTransition)transition0;
574 				String targetRule = ((NFAState) ref.target).enclosingRule.name;
575 				Integer altI = Utils.integer(c.alt);
576 				Map<String, Set<NFAState>> targetToCallSiteMap =
577 					altToTargetToCallSitesMap.get(altI);
578 				if ( targetToCallSiteMap==null ) {
579 					targetToCallSiteMap = new HashMap<String, Set<NFAState>>();
580 					altToTargetToCallSitesMap.put(altI, targetToCallSiteMap);
581 				}
582 				Set<NFAState> callSites =
583 					targetToCallSiteMap.get(targetRule);
584 				if ( callSites==null ) {
585 					callSites = new HashSet<NFAState>();
586 					targetToCallSiteMap.put(targetRule, callSites);
587 				}
588 				callSites.add(ruleInvocationState);
589 				// track one problem DFA state per alt
590 				if ( altToDFAState.get(altI)==null ) {
591 					DFAState sampleBadState = dfa.getState(stateI);
592 					altToDFAState.put(altI, sampleBadState);
593 				}
594 			}
595 		}
596 	}
597 
getUnaliasedDFAStateSet(Set<Integer> dfaStatesWithRecursionProblems)598 	private Set<Integer> getUnaliasedDFAStateSet(Set<Integer> dfaStatesWithRecursionProblems) {
599 		Set<Integer> dfaStatesUnaliased = new HashSet<Integer>();
600 		for (Integer stateI : dfaStatesWithRecursionProblems) {
601 			DFAState d = dfa.getState(stateI);
602 			dfaStatesUnaliased.add(Utils.integer(d.stateNumber));
603 		}
604 		return dfaStatesUnaliased;
605 	}
606 
607 
608 	// T R A C K I N G  M E T H O D S
609 
610     /** Report the fact that DFA state d is not a state resolved with
611      *  predicates and yet it has no emanating edges.  Usually this
612      *  is a result of the closure/reach operations being unable to proceed
613      */
reportDanglingState(DFAState d)614 	public void reportDanglingState(DFAState d) {
615 		danglingStates.add(d);
616 	}
617 
618 //	public void reportAnalysisTimeout() {
619 //		timedOut = true;
620 //		dfa.nfa.grammar.setOfDFAWhoseAnalysisTimedOut.add(dfa);
621 //	}
622 
623 	/** Report that at least 2 alts have recursive constructs.  There is
624 	 *  no way to build a DFA so we terminated.
625 	 */
reportNonLLStarDecision(DFA dfa)626 	public void reportNonLLStarDecision(DFA dfa) {
627 		/*
628 		System.out.println("non-LL(*) DFA "+dfa.decisionNumber+", alts: "+
629 						   dfa.recursiveAltSet.toList());
630 						   */
631 		nonLLStarDecision = true;
632 		dfa.nfa.grammar.numNonLLStar++;
633 		altsWithProblem.addAll(dfa.recursiveAltSet.toList());
634 	}
635 
reportRecursionOverflow(DFAState d, NFAConfiguration recursionNFAConfiguration)636 	public void reportRecursionOverflow(DFAState d,
637 										NFAConfiguration recursionNFAConfiguration)
638 	{
639 		// track the state number rather than the state as d will change
640 		// out from underneath us; hash wouldn't return any value
641 
642 		// left-recursion is detected in start state.  Since we can't
643 		// call resolveNondeterminism() on the start state (it would
644 		// not look k=1 to get min single token lookahead), we must
645 		// prevent errors derived from this state.  Avoid start state
646 		if ( d.stateNumber > 0 ) {
647 			Integer stateI = Utils.integer(d.stateNumber);
648 			stateToRecursionOverflowConfigurationsMap.map(stateI, recursionNFAConfiguration);
649 		}
650 	}
651 
reportNondeterminism(DFAState d, Set<Integer> nondeterministicAlts)652 	public void reportNondeterminism(DFAState d, Set<Integer> nondeterministicAlts) {
653 		altsWithProblem.addAll(nondeterministicAlts); // track overall list
654 		statesWithSyntacticallyAmbiguousAltsSet.add(d);
655 		dfa.nfa.grammar.setOfNondeterministicDecisionNumbers.add(
656 			Utils.integer(dfa.getDecisionNumber())
657 		);
658 	}
659 
660 	/** Currently the analysis reports issues between token definitions, but
661 	 *  we don't print out warnings in favor of just picking the first token
662 	 *  definition found in the grammar ala lex/flex.
663 	 */
reportLexerRuleNondeterminism(DFAState d, Set<Integer> nondeterministicAlts)664 	public void reportLexerRuleNondeterminism(DFAState d, Set<Integer> nondeterministicAlts) {
665 		stateToSyntacticallyAmbiguousTokensRuleAltsMap.put(d,nondeterministicAlts);
666 	}
667 
reportNondeterminismResolvedWithSemanticPredicate(DFAState d)668 	public void reportNondeterminismResolvedWithSemanticPredicate(DFAState d) {
669 		// First, prevent a recursion warning on this state due to
670 		// pred resolution
671 		if ( d.abortedDueToRecursionOverflow ) {
672 			d.dfa.probe.removeRecursiveOverflowState(d);
673 		}
674 		statesResolvedWithSemanticPredicatesSet.add(d);
675 		//System.out.println("resolved with pred: "+d);
676 		dfa.nfa.grammar.setOfNondeterministicDecisionNumbersResolvedWithPredicates.add(
677 			Utils.integer(dfa.getDecisionNumber())
678 		);
679 	}
680 
681 	/** Report the list of predicates found for each alternative; copy
682 	 *  the list because this set gets altered later by the method
683 	 *  tryToResolveWithSemanticPredicates() while flagging NFA configurations
684 	 *  in d as resolved.
685 	 */
reportAltPredicateContext(DFAState d, Map<Integer, ? extends SemanticContext> altPredicateContext)686 	public void reportAltPredicateContext(DFAState d, Map<Integer, ? extends SemanticContext> altPredicateContext) {
687 		Map<Integer, SemanticContext> copy = new HashMap<Integer, SemanticContext>();
688 		copy.putAll(altPredicateContext);
689 		stateToAltSetWithSemanticPredicatesMap.put(d,copy);
690 	}
691 
reportIncompletelyCoveredAlts(DFAState d, Map<Integer, Set<Token>> altToLocationsReachableWithoutPredicate)692 	public void reportIncompletelyCoveredAlts(DFAState d,
693 											  Map<Integer, Set<Token>> altToLocationsReachableWithoutPredicate)
694 	{
695 		stateToIncompletelyCoveredAltsMap.put(d, altToLocationsReachableWithoutPredicate);
696 	}
697 
698 	// S U P P O R T
699 
700 	/** Given a start state and a target state, return true if start can reach
701 	 *  target state.  Also, compute the set of DFA states
702 	 *  that are on a path from start to target; return in states parameter.
703 	 */
reachesState(DFAState startState, DFAState targetState, Set<DFAState> states)704 	protected boolean reachesState(DFAState startState,
705 								   DFAState targetState,
706 								   Set<DFAState> states) {
707 		if ( startState==targetState ) {
708 			states.add(targetState);
709 			//System.out.println("found target DFA state "+targetState.getStateNumber());
710 			stateReachable.put(startState.stateNumber, REACHABLE_YES);
711 			return true;
712 		}
713 
714 		DFAState s = startState;
715 		// avoid infinite loops
716 		stateReachable.put(s.stateNumber, REACHABLE_BUSY);
717 
718 		// look for a path to targetState among transitions for this state
719 		// stop when you find the first one; I'm pretty sure there is
720 		// at most one path to any DFA state with conflicting predictions
721 		for (int i=0; i<s.getNumberOfTransitions(); i++) {
722 			Transition t = s.transition(i);
723 			DFAState edgeTarget = (DFAState)t.target;
724 			Integer targetStatus = stateReachable.get(edgeTarget.stateNumber);
725 			if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing
726 				continue;
727 			}
728 			if ( targetStatus==REACHABLE_YES ) { // return success!
729 				stateReachable.put(s.stateNumber, REACHABLE_YES);
730 				return true;
731 			}
732 			if ( targetStatus==REACHABLE_NO ) { // try another transition
733 				continue;
734 			}
735 			// if null, target must be REACHABLE_UNKNOWN (i.e., unvisited)
736 			if ( reachesState(edgeTarget, targetState, states) ) {
737 				states.add(s);
738 				stateReachable.put(s.stateNumber, REACHABLE_YES);
739 				return true;
740 			}
741 		}
742 
743 		stateReachable.put(s.stateNumber, REACHABLE_NO);
744 		return false; // no path to targetState found.
745 	}
746 
getDFAPathStatesToTarget(DFAState targetState)747 	protected Set<DFAState> getDFAPathStatesToTarget(DFAState targetState) {
748 		Set<DFAState> dfaStates = new HashSet<DFAState>();
749 		stateReachable = new HashMap<Integer, Integer>();
750 		if ( dfa==null || dfa.startState==null ) {
751 			return dfaStates;
752 		}
753 		boolean reaches = reachesState(dfa.startState, targetState, dfaStates);
754 		return dfaStates;
755 	}
756 
757 	/** Given a start state and a final state, find a list of edge labels
758 	 *  between the two ignoring epsilon.  Limit your scan to a set of states
759 	 *  passed in.  This is used to show a sample input sequence that is
760 	 *  nondeterministic with respect to this decision.  Return List&lt;Label&gt; as
761 	 *  a parameter.  The incoming states set must be all states that lead
762 	 *  from startState to targetState and no others so this algorithm doesn't
763 	 *  take a path that eventually leads to a state other than targetState.
764 	 *  Don't follow loops, leading to short (possibly shortest) path.
765 	 */
getSampleInputSequenceUsingStateSet(State startState, State targetState, Set<DFAState> states, List<Label> labels)766 	protected void getSampleInputSequenceUsingStateSet(State startState,
767 													   State targetState,
768 													   Set<DFAState> states,
769 													   List<Label> labels)
770 	{
771 		statesVisitedDuringSampleSequence.add(startState.stateNumber);
772 
773 		// pick the first edge in states as the one to traverse
774 		for (int i=0; i<startState.getNumberOfTransitions(); i++) {
775 			Transition t = startState.transition(i);
776 			DFAState edgeTarget = (DFAState)t.target;
777 			if ( states.contains(edgeTarget) &&
778 				 !statesVisitedDuringSampleSequence.contains(edgeTarget.stateNumber) )
779 			{
780 				labels.add(t.label); // traverse edge and track label
781 				if ( edgeTarget!=targetState ) {
782 					// get more labels if not at target
783 					getSampleInputSequenceUsingStateSet(edgeTarget,
784 														targetState,
785 														states,
786 														labels);
787 				}
788 				// done with this DFA state as we've found a good path to target
789 				return;
790 			}
791 		}
792 		labels.add(new Label(Label.EPSILON)); // indicate no input found
793 		// this happens on a : {p1}? a | A ;
794 		//ErrorManager.error(ErrorManager.MSG_CANNOT_COMPUTE_SAMPLE_INPUT_SEQ);
795 	}
796 
797 	/** Given a sample input sequence, you usually would like to know the
798 	 *  path taken through the NFA.  Return the list of NFA states visited
799 	 *  while matching a list of labels.  This cannot use the usual
800 	 *  interpreter, which does a deterministic walk.  We need to be able to
801 	 *  take paths that are turned off during nondeterminism resolution. So,
802 	 *  just do a depth-first walk traversing edges labeled with the current
803 	 *  label.  Return true if a path was found emanating from state s.
804 	 */
getNFAPath(NFAState s, int labelIndex, List<? extends Label> labels, List<? super NFAState> path)805 	protected boolean getNFAPath(NFAState s,     // starting where?
806 								 int labelIndex, // 0..labels.size()-1
807 								 List<? extends Label> labels,    // input sequence
808 								 List<? super NFAState> path)      // output list of NFA states
809 	{
810 		// track a visit to state s at input index labelIndex if not seen
811 		String thisStateKey = getStateLabelIndexKey(s.stateNumber,labelIndex);
812 		if ( statesVisitedAtInputDepth.contains(thisStateKey) ) {
813 			/*
814 			System.out.println("### already visited "+s.stateNumber+" previously at index "+
815 						   labelIndex);
816 			*/
817 			return false;
818 		}
819 		statesVisitedAtInputDepth.add(thisStateKey);
820 
821 		/*
822 		System.out.println("enter state "+s.stateNumber+" visited states: "+
823 						   statesVisitedAtInputDepth);
824         */
825 
826 		// pick the first edge whose target is in states and whose
827 		// label is labels[labelIndex]
828 		for (int i=0; i<s.getNumberOfTransitions(); i++) {
829 			Transition t = s.transition[i];
830 			NFAState edgeTarget = (NFAState)t.target;
831 			Label label = (Label)labels.get(labelIndex);
832 			/*
833 			System.out.println(s.stateNumber+"-"+
834 							   t.label.toString(dfa.nfa.grammar)+"->"+
835 							   edgeTarget.stateNumber+" =="+
836 							   label.toString(dfa.nfa.grammar)+"?");
837 			*/
838 			if ( t.label.isEpsilon() || t.label.isSemanticPredicate() ) {
839 				// nondeterministically backtrack down epsilon edges
840 				path.add(edgeTarget);
841 				boolean found =
842 					getNFAPath(edgeTarget, labelIndex, labels, path);
843 				if ( found ) {
844 					statesVisitedAtInputDepth.remove(thisStateKey);
845 					return true; // return to "calling" state
846 				}
847 				path.remove(path.size()-1); // remove; didn't work out
848 				continue; // look at the next edge
849 			}
850 			if ( t.label.matches(label) ) {
851 				path.add(edgeTarget);
852 				/*
853 				System.out.println("found label "+
854 								   t.label.toString(dfa.nfa.grammar)+
855 								   " at state "+s.stateNumber+"; labelIndex="+labelIndex);
856 				*/
857 				if ( labelIndex==labels.size()-1 ) {
858 					// found last label; done!
859 					statesVisitedAtInputDepth.remove(thisStateKey);
860 					return true;
861 				}
862 				// otherwise try to match remaining input
863 				boolean found =
864 					getNFAPath(edgeTarget, labelIndex+1, labels, path);
865 				if ( found ) {
866 					statesVisitedAtInputDepth.remove(thisStateKey);
867 					return true;
868 				}
869 				/*
870 				System.out.println("backtrack; path from "+s.stateNumber+"->"+
871 								   t.label.toString(dfa.nfa.grammar)+" didn't work");
872 				*/
873 				path.remove(path.size()-1); // remove; didn't work out
874 				continue; // keep looking for a path for labels
875 			}
876 		}
877 		//System.out.println("no epsilon or matching edge; removing "+thisStateKey);
878 		// no edge was found matching label; is ok, some state will have it
879 		statesVisitedAtInputDepth.remove(thisStateKey);
880 		return false;
881 	}
882 
getStateLabelIndexKey(int s, int i)883 	protected String getStateLabelIndexKey(int s, int i) {
884 		StringBuilder buf = new StringBuilder();
885 		buf.append(s);
886 		buf.append('_');
887 		buf.append(i);
888 		return buf.toString();
889 	}
890 
891 	/** From an alt number associated with artificial Tokens rule, return
892 	 *  the name of the token that is associated with that alt.
893 	 */
getTokenNameForTokensRuleAlt(int alt)894 	public String getTokenNameForTokensRuleAlt(int alt) {
895 		NFAState decisionState = dfa.getNFADecisionStartState();
896 		NFAState altState =
897 			dfa.nfa.grammar.getNFAStateForAltOfDecision(decisionState,alt);
898 		NFAState decisionLeft = (NFAState)altState.transition[0].target;
899 		RuleClosureTransition ruleCallEdge =
900 			(RuleClosureTransition)decisionLeft.transition[0];
901 		NFAState ruleStartState = (NFAState)ruleCallEdge.target;
902 		//System.out.println("alt = "+decisionLeft.getEnclosingRule());
903 		return ruleStartState.enclosingRule.name;
904 	}
905 
reset()906 	public void reset() {
907 		stateToRecursionOverflowConfigurationsMap.clear();
908 	}
909 }
910