1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.analysis;
29 
30 import org.antlr.misc.OrderedHashSet;
31 import org.antlr.misc.Utils;
32 import org.antlr.runtime.Token;
33 import org.antlr.tool.ErrorManager;
34 
35 import java.util.*;
36 
37 /** Code that embodies the NFA conversion to DFA. A new object is needed
38  *  per DFA (also required for thread safety if multiple conversions
39  *  launched).
40  */
41 public class NFAToDFAConverter {
42 	/** A list of DFA states we still need to process during NFA conversion */
43 	protected List<DFAState> work = new LinkedList<DFAState>();
44 
45 	/** While converting NFA, we must track states that
46 	 *  reference other rule's NFAs so we know what to do
47 	 *  at the end of a rule.  We need to know what context invoked
48 	 *  this rule so we can know where to continue looking for NFA
49 	 *  states.  I'm tracking a context tree (record of rule invocation
50 	 *  stack trace) for each alternative that could be predicted.
51 	 */
52 	protected NFAContext[] contextTrees;
53 
54 	/** We are converting which DFA? */
55 	protected DFA dfa;
56 
57 	public static boolean debug = false;
58 
59 	/** Should ANTLR launch multiple threads to convert NFAs to DFAs?
60 	 *  With a 2-CPU box, I note that it's about the same single or
61 	 *  multithreaded.  Both CPU meters are going even when single-threaded
62 	 *  so I assume the GC is killing us.  Could be the compiler.  When I
63 	 *  run java -Xint mode, I get about 15% speed improvement with multiple
64 	 *  threads.
65 	 */
66 	public static boolean SINGLE_THREADED_NFA_CONVERSION = true;
67 
68 	protected boolean computingStartState = false;
69 
NFAToDFAConverter(DFA dfa)70 	public NFAToDFAConverter(DFA dfa) {
71 		this.dfa = dfa;
72 		int nAlts = dfa.getNumberOfAlts();
73 		initContextTrees(nAlts);
74 	}
75 
convert()76 	public void convert() {
77 		//dfa.conversionStartTime = System.currentTimeMillis();
78 
79 		// create the DFA start state
80 		dfa.startState = computeStartState();
81 
82 		// while more DFA states to check, process them
83 		while ( work.size()>0 &&
84 				!dfa.nfa.grammar.NFAToDFAConversionExternallyAborted() )
85 		{
86 			DFAState d = work.get(0);
87 			if ( dfa.nfa.grammar.composite.watchNFAConversion ) {
88 				System.out.println("convert DFA state "+d.stateNumber+
89 								   " ("+d.nfaConfigurations.size()+" nfa states)");
90 			}
91 			int k = dfa.getUserMaxLookahead();
92 			if ( k>0 && k==d.getLookaheadDepth() ) {
93 				// we've hit max lookahead, make this a stop state
94 				//System.out.println("stop state @k="+k+" (terminated early)");
95 				/*
96 				List<Label> sampleInputLabels = d.dfa.probe.getSampleNonDeterministicInputSequence(d);
97 				String input = d.dfa.probe.getInputSequenceDisplay(sampleInputLabels);
98 				System.out.println("sample input: "+input);
99 				 */
100 				resolveNonDeterminisms(d);
101 				// Check to see if we need to add any semantic predicate transitions
102 				if ( d.isResolvedWithPredicates() ) {
103 					addPredicateTransitions(d);
104 				}
105 				else {
106 					d.setAcceptState(true); // must convert to accept state at k
107 				}
108 			}
109 			else {
110 				findNewDFAStatesAndAddDFATransitions(d);
111 			}
112 			work.remove(0); // done with it; remove from work list
113 		}
114 
115 		// Find all manual syn preds (gated).  These are not discovered
116 		// in tryToResolveWithSemanticPredicates because they are implicitly
117 		// added to every edge by code gen, DOT generation etc...
118 		dfa.findAllGatedSynPredsUsedInDFAAcceptStates();
119 	}
120 
121 	/** From this first NFA state of a decision, create a DFA.
122 	 *  Walk each alt in decision and compute closure from the start of that
123 	 *  rule, making sure that the closure does not include other alts within
124 	 *  that same decision.  The idea is to associate a specific alt number
125 	 *  with the starting closure so we can trace the alt number for all states
126 	 *  derived from this.  At a stop state in the DFA, we can return this alt
127 	 *  number, indicating which alt is predicted.
128 	 *
129 	 *  If this DFA is derived from an loop back NFA state, then the first
130 	 *  transition is actually the exit branch of the loop.  Rather than make
131 	 *  this alternative one, let's make this alt n+1 where n is the number of
132 	 *  alts in this block.  This is nice to keep the alts of the block 1..n;
133 	 *  helps with error messages.
134 	 *
135 	 *  I handle nongreedy in findNewDFAStatesAndAddDFATransitions
136 	 *  when nongreedy and EOT transition.  Make state with EOT emanating
137 	 *  from it the accept state.
138 	 */
computeStartState()139 	protected DFAState computeStartState() {
140 		NFAState alt = dfa.decisionNFAStartState;
141 		DFAState startState = dfa.newState();
142 		computingStartState = true;
143 		int i = 0;
144 		int altNum = 1;
145 		while ( alt!=null ) {
146 			// find the set of NFA states reachable without consuming
147 			// any input symbols for each alt.  Keep adding to same
148 			// overall closure that will represent the DFA start state,
149 			// but track the alt number
150 			NFAContext initialContext = contextTrees[i];
151 			// if first alt is derived from loopback/exit branch of loop,
152 			// make alt=n+1 for n alts instead of 1
153 			if ( i==0 &&
154 				 dfa.getNFADecisionStartState().decisionStateType==NFAState.LOOPBACK )
155 			{
156 				int numAltsIncludingExitBranch = dfa.nfa.grammar
157 					.getNumberOfAltsForDecisionNFA(dfa.decisionNFAStartState);
158 				altNum = numAltsIncludingExitBranch;
159 				closure((NFAState)alt.transition[0].target,
160 						altNum,
161 						initialContext,
162 						SemanticContext.EMPTY_SEMANTIC_CONTEXT,
163 						startState,
164 						true
165 				);
166 				altNum = 1; // make next alt the first
167 			}
168 			else {
169 				closure((NFAState)alt.transition[0].target,
170 						altNum,
171 						initialContext,
172 						SemanticContext.EMPTY_SEMANTIC_CONTEXT,
173 						startState,
174 						true
175 				);
176 				altNum++;
177 			}
178 			i++;
179 
180 			// move to next alternative
181 			if ( alt.transition[1] ==null ) {
182 				break;
183 			}
184 			alt = (NFAState)alt.transition[1].target;
185 		}
186 
187 		// now DFA start state has the complete closure for the decision
188 		// but we have tracked which alt is associated with which
189 		// NFA states.
190 		dfa.addState(startState); // make sure dfa knows about this state
191 		work.add(startState);
192 		computingStartState = false;
193 		return startState;
194 	}
195 
196 	/** From this node, add a d--a--&gt;t transition for all
197 	 *  labels 'a' where t is a DFA node created
198 	 *  from the set of NFA states reachable from any NFA
199 	 *  state in DFA state d.
200 	 */
findNewDFAStatesAndAddDFATransitions(DFAState d)201 	protected void findNewDFAStatesAndAddDFATransitions(DFAState d) {
202 		//System.out.println("work on DFA state "+d);
203 		OrderedHashSet<Label> labels = d.getReachableLabels();
204 		//System.out.println("reachable labels="+labels);
205 
206 		/*
207 		System.out.println("|reachable|/|nfaconfigs|="+
208 				labels.size()+"/"+d.getNFAConfigurations().size()+"="+
209 				labels.size()/(float)d.getNFAConfigurations().size());
210 		*/
211 
212 		// normally EOT is the "default" clause and decisions just
213 		// choose that last clause when nothing else matches.  DFA conversion
214 		// continues searching for a unique sequence that predicts the
215 		// various alts or until it finds EOT.  So this rule
216 		//
217 		// DUH : ('x'|'y')* "xy!";
218 		//
219 		// does not need a greedy indicator.  The following rule works fine too
220 		//
221 		// A : ('x')+ ;
222 		//
223 		// When the follow branch could match what is in the loop, by default,
224 		// the nondeterminism is resolved in favor of the loop.  You don't
225 		// get a warning because the only way to get this condition is if
226 		// the DFA conversion hits the end of the token.  In that case,
227 		// we're not *sure* what will happen next, but it could be anything.
228 		// Anyway, EOT is the default case which means it will never be matched
229 		// as resolution goes to the lowest alt number.  Exit branches are
230 		// always alt n+1 for n alts in a block.
231 		//
232 		// When a loop is nongreedy and we find an EOT transition, the DFA
233 		// state should become an accept state, predicting exit of loop.  It's
234 		// just reversing the resolution of ambiguity.
235 		// TODO: should this be done in the resolveAmbig method?
236 		Label EOTLabel = new Label(Label.EOT);
237 		boolean containsEOT = labels!=null && labels.contains(EOTLabel);
238 		if ( !dfa.isGreedy() && containsEOT ) {
239 			convertToEOTAcceptState(d);
240 			return; // no more work to do on this accept state
241 		}
242 
243 		// if in filter mode for lexer, want to match shortest not longest
244 		// string so if we see an EOT edge emanating from this state, then
245 		// convert this state to an accept state.  This only counts for
246 		// The Tokens rule as all other decisions must continue to look for
247 		// longest match.
248 		// [Taking back out a few days later on Jan 17, 2006.  This could
249 		//  be an option for the future, but this was wrong soluion for
250 		//  filtering.]
251 		/*
252 		if ( dfa.nfa.grammar.type==Grammar.LEXER && containsEOT ) {
253 			String filterOption = (String)dfa.nfa.grammar.getOption("filter");
254 			boolean filterMode = filterOption!=null && filterOption.equals("true");
255 			if ( filterMode && d.dfa.isTokensRuleDecision() ) {
256 				DFAState t = reach(d, EOTLabel);
257 				if ( t.getNFAConfigurations().size()>0 ) {
258 					convertToEOTAcceptState(d);
259 					//System.out.println("state "+d+" has EOT target "+t.stateNumber);
260 					return;
261 				}
262 			}
263 		}
264 		*/
265 
266 		int numberOfEdgesEmanating = 0;
267 		Map<Integer, Transition> targetToLabelMap = new HashMap<Integer, Transition>();
268 		// for each label that could possibly emanate from NFAStates of d
269 		int numLabels = 0;
270 		if ( labels!=null ) {
271 			numLabels = labels.size();
272 		}
273 		for (int i=0; i<numLabels; i++) {
274 			Label label = labels.get(i);
275 			DFAState t = reach(d, label);
276 			if ( debug ) {
277 				System.out.println("DFA state after reach "+label+" "+d+"-" +
278 								   label.toString(dfa.nfa.grammar)+"->"+t);
279 			}
280 			if ( t==null ) {
281 				// nothing was reached by label due to conflict resolution
282 				// EOT also seems to be in here occasionally probably due
283 				// to an end-of-rule state seeing it even though we'll pop
284 				// an invoking state off the state; don't bother to conflict
285 				// as this labels set is a covering approximation only.
286 				continue;
287 			}
288 			//System.out.println("dfa.k="+dfa.getUserMaxLookahead());
289 			if ( t.getUniqueAlt()==NFA.INVALID_ALT_NUMBER ) {
290 				// Only compute closure if a unique alt number is not known.
291 				// If a unique alternative is mentioned among all NFA
292 				// configurations then there is no possibility of needing to look
293 				// beyond this state; also no possibility of a nondeterminism.
294 				// This optimization May 22, 2006 just dropped -Xint time
295 				// for analysis of Java grammar from 11.5s to 2s!  Wow.
296 				closure(t);  // add any NFA states reachable via epsilon
297 			}
298 
299 			/*
300 			System.out.println("DFA state after closure "+d+"-"+
301 							   label.toString(dfa.nfa.grammar)+
302 							   "->"+t);
303 							   */
304 
305 			// add if not in DFA yet and then make d-label->t
306 			DFAState targetState = addDFAStateToWorkList(t);
307 
308 			numberOfEdgesEmanating +=
309 				addTransition(d, label, targetState, targetToLabelMap);
310 
311 			// lookahead of target must be one larger than d's k
312 			// We are possibly setting the depth of a pre-existing state
313 			// that is equal to one we just computed...not sure if that's
314 			// ok.
315 			targetState.setLookaheadDepth(d.getLookaheadDepth() + 1);
316 		}
317 
318 		//System.out.println("DFA after reach / closures:\n"+dfa);
319 		if ( !d.isResolvedWithPredicates() && numberOfEdgesEmanating==0 ) {
320 			//System.out.println("dangling DFA state "+d+"\nAfter reach / closures:\n"+dfa);
321 			// TODO: can fixed lookahead hit a dangling state case?
322 			// TODO: yes, with left recursion
323 			//System.err.println("dangling state alts: "+d.getAltSet());
324 			dfa.probe.reportDanglingState(d);
325 			// turn off all configurations except for those associated with
326 			// min alt number; somebody has to win else some input will not
327 			// predict any alt.
328 			int minAlt = resolveByPickingMinAlt(d, null);
329 			// force it to be an accept state
330 			// don't call convertToAcceptState() which merges stop states.
331 			// other states point at us; don't want them pointing to dead states
332 			d.setAcceptState(true); // might be adding new accept state for alt
333 			dfa.setAcceptState(minAlt, d);
334 			//convertToAcceptState(d, minAlt); // force it to be an accept state
335 		}
336 
337 		// Check to see if we need to add any semantic predicate transitions
338 		// might have both token and predicated edges from d
339 		if ( d.isResolvedWithPredicates() ) {
340 			addPredicateTransitions(d);
341 		}
342 	}
343 
344 	/** Add a transition from state d to targetState with label in normal case.
345 	 *  if COLLAPSE_ALL_INCIDENT_EDGES, however, try to merge all edges from
346 	 *  d to targetState; this means merging their labels.  Another optimization
347 	 *  is to reduce to a single EOT edge any set of edges from d to targetState
348 	 *  where there exists an EOT state.  EOT is like the wildcard so don't
349 	 *  bother to test any other edges.  Example:
350 	 *
351 	 *  NUM_INT
352 	 *    : '1'..'9' ('0'..'9')* ('l'|'L')?
353      *    | '0' ('x'|'X') ('0'..'9'|'a'..'f'|'A'..'F')+ ('l'|'L')?
354      *    | '0' ('0'..'7')* ('l'|'L')?
355 	 *    ;
356 	 *
357 	 *  The normal decision to predict alts 1, 2, 3 is:
358 	 *
359 	 *  if ( (input.LA(1)&gt;='1' &amp;&amp; input.LA(1)&lt;='9') ) {
360      *       alt7=1;
361      *  }
362      *  else if ( input.LA(1)=='0' ) {
363      *      if ( input.LA(2)=='X'||input.LA(2)=='x' ) {
364      *          alt7=2;
365      *      }
366      *      else if ( (input.LA(2)&gt;='0' &amp;&amp; input.LA(2)&lt;='7') ) {
367      *           alt7=3;
368      *      }
369      *      else if ( input.LA(2)=='L'||input.LA(2)=='l' ) {
370      *           alt7=3;
371      *      }
372      *      else {
373      *           alt7=3;
374      *      }
375      *  }
376      *  else error
377 	 *
378      *  Clearly, alt 3 is predicted with extra work since it tests 0..7
379 	 *  and [lL] before finally realizing that any character is actually
380 	 *  ok at k=2.
381 	 *
382 	 *  A better decision is as follows:
383      *
384 	 *  if ( (input.LA(1)&gt;='1' &amp;&amp; input.LA(1)&lt;='9') ) {
385 	 *      alt7=1;
386 	 *  }
387 	 *  else if ( input.LA(1)=='0' ) {
388 	 *      if ( input.LA(2)=='X'||input.LA(2)=='x' ) {
389 	 *          alt7=2;
390 	 *      }
391 	 *      else {
392 	 *          alt7=3;
393 	 *      }
394 	 *  }
395 	 *
396 	 *  The DFA originally has 3 edges going to the state the predicts alt 3,
397 	 *  but upon seeing the EOT edge (the "else"-clause), this method
398 	 *  replaces the old merged label (which would have (0..7|l|L)) with EOT.
399 	 *  The code generator then leaves alt 3 predicted with a simple else-
400 	 *  clause. :)
401 	 *
402 	 *  The only time the EOT optimization makes no sense is in the Tokens
403 	 *  rule.  We want EOT to truly mean you have matched an entire token
404 	 *  so don't bother actually rewinding to execute that rule unless there
405 	 *  are actions in that rule.  For now, since I am not preventing
406 	 *  backtracking from Tokens rule, I will simply allow the optimization.
407 	 */
addTransition(DFAState d, Label label, DFAState targetState, Map<Integer, Transition> targetToLabelMap)408 	protected static int addTransition(DFAState d,
409 									   Label label,
410 									   DFAState targetState,
411 									   Map<Integer, Transition> targetToLabelMap)
412 	{
413 		//System.out.println(d.stateNumber+"-"+label.toString(dfa.nfa.grammar)+"->"+targetState.stateNumber);
414 		int n = 0;
415 		if ( DFAOptimizer.COLLAPSE_ALL_PARALLEL_EDGES ) {
416 			// track which targets we've hit
417 			Integer tI = Utils.integer(targetState.stateNumber);
418 			Transition oldTransition = targetToLabelMap.get(tI);
419 			if ( oldTransition!=null ) {
420 				//System.out.println("extra transition to "+tI+" upon "+label.toString(dfa.nfa.grammar));
421 				// already seen state d to target transition, just add label
422 				// to old label unless EOT
423 				if ( label.getAtom()==Label.EOT ) {
424 					// merge with EOT means old edge can go away
425 					oldTransition.label = new Label(Label.EOT);
426 				}
427 				else {
428 					// don't add anything to EOT, it's essentially the wildcard
429 					if ( oldTransition.label.getAtom()!=Label.EOT ) {
430 						// ok, not EOT, add in this label to old label
431 						oldTransition.label.add(label);
432 					}
433 					//System.out.println("label updated to be "+oldTransition.label.toString(dfa.nfa.grammar));
434 				}
435 			}
436 			else {
437 				// make a transition from d to t upon 'a'
438 				n = 1;
439 				label = (Label)label.clone(); // clone in case we alter later
440 				int transitionIndex = d.addTransition(targetState, label);
441 				Transition trans = d.getTransition(transitionIndex);
442 				// track target/transition pairs
443 				targetToLabelMap.put(tI, trans);
444 			}
445 		}
446 		else {
447 			n = 1;
448 			d.addTransition(targetState, label);
449 		}
450 		return n;
451 	}
452 
453 	/** For all NFA states (configurations) merged in d,
454 	 *  compute the epsilon closure; that is, find all NFA states reachable
455 	 *  from the NFA states in d via purely epsilon transitions.
456 	 */
closure(DFAState d)457 	public void closure(DFAState d) {
458 		if ( debug ) {
459 			System.out.println("closure("+d+")");
460 		}
461 
462 		List<NFAConfiguration> configs = new ArrayList<NFAConfiguration>();
463 		// Because we are adding to the configurations in closure
464 		// must clone initial list so we know when to stop doing closure
465 		configs.addAll(d.nfaConfigurations);
466 		// for each NFA configuration in d (abort if we detect non-LL(*) state)
467 		int numConfigs = configs.size();
468 		for (int i = 0; i < numConfigs; i++) {
469 			NFAConfiguration c = configs.get(i);
470 			if ( c.singleAtomTransitionEmanating ) {
471 				continue; // ignore NFA states w/o epsilon transitions
472 			}
473 			//System.out.println("go do reach for NFA state "+c.state);
474 			// figure out reachable NFA states from each of d's nfa states
475 			// via epsilon transitions.
476 			// Fill configsInClosure rather than altering d configs inline
477 			closure(dfa.nfa.getState(c.state),
478 					c.alt,
479 					c.context,
480 					c.semanticContext,
481 					d,
482 					false);
483 		}
484 		//System.out.println("after closure d="+d);
485 		d.closureBusy = null; // wack all that memory used during closure
486 	}
487 
488 	/** Where can we get from NFA state p traversing only epsilon transitions?
489 	 *  Add new NFA states + context to DFA state d.  Also add semantic
490 	 *  predicates to semantic context if collectPredicates is set.  We only
491 	 *  collect predicates at hoisting depth 0, meaning before any token/char
492 	 *  have been recognized.  This corresponds, during analysis, to the
493 	 *  initial DFA start state construction closure() invocation.
494 	 *
495 	 *  There are four cases of interest (the last being the usual transition):
496 	 *
497 	 *   1. Traverse an edge that takes us to the start state of another
498 	 *      rule, r.  We must push this state so that if the DFA
499 	 *      conversion hits the end of rule r, then it knows to continue
500 	 *      the conversion at state following state that "invoked" r. By
501 	 *      construction, there is a single transition emanating from a rule
502 	 *      ref node.
503 	 *
504 	 *   2. Reach an NFA state associated with the end of a rule, r, in the
505 	 *      grammar from which it was built.  We must add an implicit (i.e.,
506 	 *      don't actually add an epsilon transition) epsilon transition
507 	 *      from r's end state to the NFA state following the NFA state
508 	 *      that transitioned to rule r's start state.  Because there are
509 	 *      many states that could reach r, the context for a rule invocation
510 	 *      is part of a call tree not a simple stack.  When we fall off end
511 	 *      of rule, "pop" a state off the call tree and add that state's
512 	 *      "following" node to d's NFA configuration list.  The context
513 	 *      for this new addition will be the new "stack top" in the call tree.
514 	 *
515 	 *   3. Like case 2, we reach an NFA state associated with the end of a
516 	 *      rule, r, in the grammar from which NFA was built.  In this case,
517 	 *      however, we realize that during this NFA&rarr;DFA conversion, no state
518 	 *      invoked the current rule's NFA.  There is no choice but to add
519 	 *      all NFA states that follow references to r's start state.  This is
520 	 *      analogous to computing the FOLLOW(r) in the LL(k) world.  By
521 	 *      construction, even rule stop state has a chain of nodes emanating
522 	 *      from it that points to every possible following node.  This case
523 	 *      is conveniently handled then by the 4th case.
524 	 *
525 	 *   4. Normal case.  If p can reach another NFA state q, then add
526 	 *      q to d's configuration list, copying p's context for q's context.
527 	 *      If there is a semantic predicate on the transition, then AND it
528 	 *      with any existing semantic context.
529 	 *
530 	 *   Current state p is always added to d's configuration list as it's part
531 	 *   of the closure as well.
532 	 *
533 	 *  When is a closure operation in a cycle condition?  While it is
534 	 *  very possible to have the same NFA state mentioned twice
535 	 *  within the same DFA state, there are two situations that
536 	 *  would lead to nontermination of closure operation:
537 	 *
538 	 *  o   Whenever closure reaches a configuration where the same state
539 	 *      with same or a suffix context already exists.  This catches
540 	 *      the IF-THEN-ELSE tail recursion cycle and things like
541 	 *
542 	 *      a : A a | B ;
543 	 *
544 	 *      the context will be $ (empty stack).
545 	 *
546 	 *      We have to check
547 	 *      larger context stacks because of (...)+ loops.  For
548 	 *      example, the context of a (...)+ can be nonempty if the
549 	 *      surrounding rule is invoked by another rule:
550 	 *
551 	 *      a : b A | X ;
552 	 *      b : (B|)+ ;  // nondeterministic by the way
553 	 *
554 	 *      The context of the (B|)+ loop is "invoked from item
555 	 *      a : . b A ;" and then the empty alt of the loop can reach back
556 	 *      to itself.  The context stack will have one "return
557 	 *      address" element and so we must check for same state, same
558 	 *      context for arbitrary context stacks.
559 	 *
560 	 *      Idea: If we've seen this configuration before during closure, stop.
561 	 *      We also need to avoid reaching same state with conflicting context.
562 	 *      Ultimately analysis would stop and we'd find the conflict, but we
563 	 *      should stop the computation.  Previously I only checked for
564 	 *      exact config.  Need to check for same state, suffix context
565 	 * 		not just exact context.
566 	 *
567 	 *  o   Whenever closure reaches a configuration where state p
568 	 *      is present in its own context stack.  This means that
569 	 *      p is a rule invocation state and the target rule has
570 	 *      been called before.  NFAContext.MAX_RECURSIVE_INVOCATIONS
571 	 *      (See the comment there also) determines how many times
572 	 *      it's possible to recurse; clearly we cannot recurse forever.
573 	 *      Some grammars such as the following actually require at
574 	 *      least one recursive call to correctly compute the lookahead:
575 	 *
576 	 *      a : L ID R
577 	 *        | b
578 	 *        ;
579 	 *      b : ID
580 	 *        | L a R
581 	 *        ;
582 	 *
583 	 *      Input L ID R is ambiguous but to figure this out, ANTLR
584 	 *      needs to go a-&gt;b-&gt;a-&gt;b to find the L ID sequence.
585 	 *
586 	 *      Do not allow closure to add a configuration that would
587 	 *      allow too much recursion.
588 	 *
589 	 *      This case also catches infinite left recursion.
590 	 */
closure(NFAState p, int alt, NFAContext context, SemanticContext semanticContext, DFAState d, boolean collectPredicates)591 	public void closure(NFAState p,
592 						int alt,
593 						NFAContext context,
594 						SemanticContext semanticContext,
595 						DFAState d,
596 						boolean collectPredicates)
597 	{
598 		if ( debug ){
599 			System.out.println("closure at "+p.enclosingRule.name+" state "+p.stateNumber+"|"+
600 							   alt+" filling DFA state "+d.stateNumber+" with context "+context
601 							   );
602 		}
603 
604 //		if ( DFA.MAX_TIME_PER_DFA_CREATION>0 &&
605 //			 System.currentTimeMillis() - d.dfa.conversionStartTime >=
606 //			 DFA.MAX_TIME_PER_DFA_CREATION )
607 //		{
608 //			// bail way out; we've blown up somehow
609 //			throw new AnalysisTimeoutException(d.dfa);
610 //		}
611 
612 		NFAConfiguration proposedNFAConfiguration =
613 				new NFAConfiguration(p.stateNumber,
614 						alt,
615 						context,
616 						semanticContext);
617 
618 		// Avoid infinite recursion
619 		if ( closureIsBusy(d, proposedNFAConfiguration) ) {
620 			if ( debug ) {
621 				System.out.println("avoid visiting exact closure computation NFA config: "+
622 								   proposedNFAConfiguration+" in "+p.enclosingRule.name);
623 				System.out.println("state is "+d.dfa.decisionNumber+"."+d.stateNumber);
624 			}
625 			return;
626 		}
627 
628 		// set closure to be busy for this NFA configuration
629 		d.closureBusy.add(proposedNFAConfiguration);
630 
631 		// p itself is always in closure
632 		d.addNFAConfiguration(p, proposedNFAConfiguration);
633 
634 		// Case 1: are we a reference to another rule?
635 		Transition transition0 = p.transition[0];
636 		if ( transition0 instanceof RuleClosureTransition ) {
637 			int depth = context.recursionDepthEmanatingFromState(p.stateNumber);
638 			// Detect recursion by more than a single alt, which indicates
639 			// that the decision's lookahead language is potentially non-regular; terminate
640 			if ( depth == 1 && d.dfa.getUserMaxLookahead()==0 ) { // k=* only
641 				d.dfa.recursiveAltSet.add(alt); // indicate that this alt is recursive
642 				if ( d.dfa.recursiveAltSet.size()>1 ) {
643 					//System.out.println("recursive alts: "+d.dfa.recursiveAltSet.toString());
644 					d.abortedDueToMultipleRecursiveAlts = true;
645 					throw new NonLLStarDecisionException(d.dfa);
646 				}
647 				/*
648 				System.out.println("alt "+alt+" in rule "+p.enclosingRule+" dec "+d.dfa.decisionNumber+
649 					" ctx: "+context);
650 				System.out.println("d="+d);
651 				*/
652 			}
653 			// Detect an attempt to recurse too high
654 			// if this context has hit the max recursions for p.stateNumber,
655 			// don't allow it to enter p.stateNumber again
656 			if ( depth >= NFAContext.MAX_SAME_RULE_INVOCATIONS_PER_NFA_CONFIG_STACK ) {
657 				/*
658 				System.out.println("OVF state "+d);
659 				System.out.println("proposed "+proposedNFAConfiguration);
660 				*/
661 				d.abortedDueToRecursionOverflow = true;
662 				d.dfa.probe.reportRecursionOverflow(d, proposedNFAConfiguration);
663 				if ( debug ) {
664 					System.out.println("analysis overflow in closure("+d.stateNumber+")");
665 				}
666 				return;
667 			}
668 
669 			// otherwise, it's cool to (re)enter target of this rule ref
670 			RuleClosureTransition ref = (RuleClosureTransition)transition0;
671 			// first create a new context and push onto call tree,
672 			// recording the fact that we are invoking a rule and
673 			// from which state (case 2 below will get the following state
674 			// via the RuleClosureTransition emanating from the invoking state
675 			// pushed on the stack).
676 			// Reset the context to reflect the fact we invoked rule
677 			NFAContext newContext = new NFAContext(context, p);
678 			//System.out.println("invoking rule "+ref.rule.name);
679 			// System.out.println(" context="+context);
680 			// traverse epsilon edge to new rule
681 			NFAState ruleTarget = (NFAState)ref.target;
682 			closure(ruleTarget, alt, newContext, semanticContext, d, collectPredicates);
683 		}
684 		// Case 2: end of rule state, context (i.e., an invoker) exists
685 		else if ( p.isAcceptState() && context.parent!=null ) {
686 			NFAState whichStateInvokedRule = context.invokingState;
687 			RuleClosureTransition edgeToRule =
688 				(RuleClosureTransition)whichStateInvokedRule.transition[0];
689 			NFAState continueState = edgeToRule.followState;
690 			NFAContext newContext = context.parent; // "pop" invoking state
691 			closure(continueState, alt, newContext, semanticContext, d, collectPredicates);
692 		}
693 		// Case 3: end of rule state, nobody invoked this rule (no context)
694 		//    Fall thru to be handled by case 4 automagically.
695 		// Case 4: ordinary NFA->DFA conversion case: simple epsilon transition
696 		else {
697 			// recurse down any epsilon transitions
698 			if ( transition0!=null && transition0.isEpsilon() ) {
699 				boolean collectPredicatesAfterAction = collectPredicates;
700 				if ( transition0.isAction() && collectPredicates ) {
701 					collectPredicatesAfterAction = false;
702 					/*
703 					if ( computingStartState ) {
704 						System.out.println("found action during prediction closure "+((ActionLabel)transition0.label).actionAST.token);
705 					}
706 					 */
707 				}
708 				closure((NFAState)transition0.target,
709 						alt,
710 						context,
711 						semanticContext,
712 						d,
713 						collectPredicatesAfterAction
714 				);
715 			}
716 			else if ( transition0!=null && transition0.isSemanticPredicate() ) {
717                 SemanticContext labelContext = transition0.label.getSemanticContext();
718                 if ( computingStartState ) {
719                     if ( collectPredicates ) {
720                         // only indicate we can see a predicate if we're collecting preds
721                         // Could be computing start state & seen an action before this.
722                         dfa.predicateVisible = true;
723                     }
724                     else {
725                         // this state has a pred, but we can't see it.
726                         dfa.hasPredicateBlockedByAction = true;
727                         // System.out.println("found pred during prediction but blocked by action found previously");
728                     }
729                 }
730                 // continue closure here too, but add the sem pred to ctx
731                 SemanticContext newSemanticContext = semanticContext;
732                 if ( collectPredicates ) {
733                     // AND the previous semantic context with new pred
734                     // do not hoist syn preds from other rules; only get if in
735                     // starting state's rule (i.e., context is empty)
736                     int walkAlt =
737 						dfa.decisionNFAStartState.translateDisplayAltToWalkAlt(alt);
738 					NFAState altLeftEdge =
739 						dfa.nfa.grammar.getNFAStateForAltOfDecision(dfa.decisionNFAStartState,walkAlt);
740 					/*
741 					System.out.println("state "+p.stateNumber+" alt "+alt+" walkAlt "+walkAlt+" trans to "+transition0.target);
742 					System.out.println("DFA start state "+dfa.decisionNFAStartState.stateNumber);
743 					System.out.println("alt left edge "+altLeftEdge.stateNumber+
744 						", epsilon target "+
745 						altLeftEdge.transition(0).target.stateNumber);
746 					*/
747 					if ( !labelContext.isSyntacticPredicate() ||
748 						 p==altLeftEdge.transition[0].target )
749 					{
750 						//System.out.println("&"+labelContext+" enclosingRule="+p.enclosingRule);
751 						newSemanticContext =
752 							SemanticContext.and(semanticContext, labelContext);
753 					}
754 				}
755 				closure((NFAState)transition0.target,
756 						alt,
757 						context,
758 						newSemanticContext,
759 						d,
760 						collectPredicates);
761 			}
762 			Transition transition1 = p.transition[1];
763 			if ( transition1!=null && transition1.isEpsilon() ) {
764 				closure((NFAState)transition1.target,
765 						alt,
766 						context,
767 						semanticContext,
768 						d,
769 						collectPredicates);
770 			}
771 		}
772 
773 		// don't remove "busy" flag as we want to prevent all
774 		// references to same config of state|alt|ctx|semCtx even
775 		// if resulting from another NFA state
776 	}
777 
778 	/** A closure operation should abort if that computation has already
779 	 *  been done or a computation with a conflicting context has already
780 	 *  been done.  If proposed NFA config's state and alt are the same
781 	 *  there is potentially a problem.  If the stack context is identical
782 	 *  then clearly the exact same computation is proposed.  If a context
783 	 *  is a suffix of the other, then again the computation is in an
784 	 *  identical context.  ?$ and ??$ are considered the same stack.
785 	 *  We could walk configurations linearly doing the comparison instead
786 	 *  of a set for exact matches but it's much slower because you can't
787 	 *  do a Set lookup.  I use exact match as ANTLR
788 	 *  always detect the conflict later when checking for context suffixes...
789 	 *  I check for left-recursive stuff and terminate before analysis to
790 	 *  avoid need to do this more expensive computation.
791 	 *
792 	 *  12-31-2007: I had to use the loop again rather than simple
793 	 *  closureBusy.contains(proposedNFAConfiguration) lookup.  The
794 	 *  semantic context should not be considered when determining if
795 	 *  a closure operation is busy.  I saw a FOLLOW closure operation
796 	 *  spin until time out because the predicate context kept increasing
797 	 *  in size even though it's same boolean value.  This seems faster also
798 	 *  because I'm not doing String.equals on the preds all the time.
799 	 *
800 	 *  05-05-2008: Hmm...well, i think it was a mistake to remove the sem
801 	 *  ctx check below...adding back in.  Coincides with report of ANTLR
802 	 *  getting super slow: http://www.antlr.org:8888/browse/ANTLR-235
803 	 *  This could be because it doesn't properly compute then resolve
804 	 *  a predicate expression.  Seems to fix unit test:
805 	 *  TestSemanticPredicates.testSemanticContextPreventsEarlyTerminationOfClosure()
806 	 *  Changing back to Set from List.  Changed a large grammar from 8 minutes
807 	 *  to 11 seconds.  Cool.  Closing ANTLR-235.
808 	 */
closureIsBusy(DFAState d, NFAConfiguration proposedNFAConfiguration)809 	public static boolean closureIsBusy(DFAState d,
810 										NFAConfiguration proposedNFAConfiguration)
811 	{
812 		return d.closureBusy.contains(proposedNFAConfiguration);
813 /*
814 		int numConfigs = d.closureBusy.size();
815 		// Check epsilon cycle (same state, same alt, same context)
816 		for (int i = 0; i < numConfigs; i++) {
817 			NFAConfiguration c = (NFAConfiguration) d.closureBusy.get(i);
818 			if ( proposedNFAConfiguration.state==c.state &&
819 				 proposedNFAConfiguration.alt==c.alt &&
820 				 proposedNFAConfiguration.semanticContext.equals(c.semanticContext) &&
821 				 proposedNFAConfiguration.context.suffix(c.context) )
822 			{
823 				return true;
824 			}
825 		}
826 		return false;
827 		*/
828 	}
829 
830 	/** Given the set of NFA states in DFA state d, find all NFA states
831 	 *  reachable traversing label arcs.  By definition, there can be
832 	 *  only one DFA state reachable by an atom from DFA state d so we must
833 	 *  find and merge all NFA states reachable via label.  Return a new
834 	 *  DFAState that has all of those NFA states with their context (i.e.,
835 	 *  which alt do they predict and where to return to if they fall off
836 	 *  end of a rule).
837 	 *
838 	 *  Because we cannot jump to another rule nor fall off the end of a rule
839 	 *  via a non-epsilon transition, NFA states reachable from d have the
840 	 *  same configuration as the NFA state in d.  So if NFA state 7 in d's
841 	 *  configurations can reach NFA state 13 then 13 will be added to the
842 	 *  new DFAState (labelDFATarget) with the same configuration as state
843 	 *  7 had.
844 	 *
845 	 *  This method does not see EOT transitions off the end of token rule
846 	 *  accept states if the rule was invoked by somebody.
847 	 */
reach(DFAState d, Label label)848 	public DFAState reach(DFAState d, Label label) {
849 		//System.out.println("reach "+label.toString(dfa.nfa.grammar)+" from "+d.stateNumber);
850 		DFAState labelDFATarget = dfa.newState();
851 
852 		// for each NFA state in d with a labeled edge,
853 		// add in target states for label
854 		//System.out.println("size(d.state="+d.stateNumber+")="+d.nfaConfigurations.size());
855 		//System.out.println("size(labeled edge states)="+d.configurationsWithLabeledEdges.size());
856 		List<NFAConfiguration> configs = d.configurationsWithLabeledEdges;
857 		int numConfigs = configs.size();
858 		for (int i = 0; i < numConfigs; i++) {
859 			NFAConfiguration c = configs.get(i);
860 			if ( c.resolved || c.resolveWithPredicate ) {
861 				continue; // the conflict resolver indicates we must leave alone
862 			}
863 			NFAState p = dfa.nfa.getState(c.state);
864 			// by design of the grammar->NFA conversion, only transition 0
865 			// may have a non-epsilon edge.
866 			Transition edge = p.transition[0];
867 			if ( edge==null || !c.singleAtomTransitionEmanating ) {
868 				continue;
869 			}
870 			Label edgeLabel = edge.label;
871 
872 			// SPECIAL CASE
873 			// if it's an EOT transition on end of lexer rule, but context
874 			// stack is not empty, then don't see the EOT; the closure
875 			// will have added in the proper states following the reference
876 			// to this rule in the invoking rule.  In other words, if
877 			// somebody called this rule, don't see the EOT emanating from
878 			// this accept state.
879 			if ( c.context.parent!=null && edgeLabel.label==Label.EOT )	{
880 				continue;
881 			}
882 
883 			// Labels not unique at this point (not until addReachableLabels)
884 			// so try simple int label match before general set intersection
885 			//System.out.println("comparing "+edgeLabel+" with "+label);
886 			if ( Label.intersect(label, edgeLabel) ) {
887 				// found a transition with label;
888 				// add NFA target to (potentially) new DFA state
889 				NFAConfiguration newC = labelDFATarget.addNFAConfiguration(
890 					(NFAState)edge.target,
891 					c.alt,
892 					c.context,
893 					c.semanticContext);
894 			}
895 		}
896 		if ( labelDFATarget.nfaConfigurations.size()==0 ) {
897 			// kill; it's empty
898 			dfa.setState(labelDFATarget.stateNumber, null);
899 			labelDFATarget = null;
900 		}
901         return labelDFATarget;
902 	}
903 
904 	/** Walk the configurations of this DFA state d looking for the
905 	 *  configuration, c, that has a transition on EOT.  State d should
906 	 *  be converted to an accept state predicting the c.alt.  Blast
907 	 *  d's current configuration set and make it just have config c.
908 	 *
909 	 *  TODO: can there be more than one config with EOT transition?
910 	 *  That would mean that two NFA configurations could reach the
911 	 *  end of the token with possibly different predicted alts.
912 	 *  Seems like that would be rare or impossible.  Perhaps convert
913 	 *  this routine to find all such configs and give error if &gt;1.
914 	 */
convertToEOTAcceptState(DFAState d)915 	protected void convertToEOTAcceptState(DFAState d) {
916 		Label eot = new Label(Label.EOT);
917 		int numConfigs = d.nfaConfigurations.size();
918 		for (int i = 0; i < numConfigs; i++) {
919 			NFAConfiguration c = d.nfaConfigurations.get(i);
920 			if ( c.resolved || c.resolveWithPredicate ) {
921 				continue; // the conflict resolver indicates we must leave alone
922 			}
923 			NFAState p = dfa.nfa.getState(c.state);
924 			Transition edge = p.transition[0];
925 			Label edgeLabel = edge.label;
926 			if ( edgeLabel.equals(eot) ) {
927 				//System.out.println("config with EOT: "+c);
928 				d.setAcceptState(true);
929 				//System.out.println("d goes from "+d);
930 				d.nfaConfigurations.clear();
931 				d.addNFAConfiguration(p,c.alt,c.context,c.semanticContext);
932 				//System.out.println("to "+d);
933 				return; // assume only one EOT transition
934 			}
935 		}
936 	}
937 
938 	/** Add a new DFA state to the DFA if not already present.
939      *  If the DFA state uniquely predicts a single alternative, it
940      *  becomes a stop state; don't add to work list.  Further, if
941      *  there exists an NFA state predicted by &gt; 1 different alternatives
942      *  and with the same syn and sem context, the DFA is nondeterministic for
943      *  at least one input sequence reaching that NFA state.
944      */
addDFAStateToWorkList(DFAState d)945     protected DFAState addDFAStateToWorkList(DFAState d) {
946         DFAState existingState = dfa.addState(d);
947 		if ( d != existingState ) {
948 			// already there...use/return the existing DFA state.
949 			// But also set the states[d.stateNumber] to the existing
950 			// DFA state because the closureIsBusy must report
951 			// infinite recursion on a state before it knows
952 			// whether or not the state will already be
953 			// found after closure on it finishes.  It could be
954 			// referring to a state that will ultimately not make it
955 			// into the reachable state space and the error
956 			// reporting must be able to compute the path from
957 			// start to the error state with infinite recursion
958 			dfa.setState(d.stateNumber, existingState);
959 			return existingState;
960 		}
961 
962 		// if not there, then examine new state.
963 
964 		// resolve syntactic conflicts by choosing a single alt or
965         // by using semantic predicates if present.
966         resolveNonDeterminisms(d);
967 
968         // If deterministic, don't add this state; it's an accept state
969         // Just return as a valid DFA state
970 		int alt = d.getUniquelyPredictedAlt();
971 		if ( alt!=NFA.INVALID_ALT_NUMBER ) { // uniquely predicts an alt?
972 			d = convertToAcceptState(d, alt);
973 			/*
974 			System.out.println("convert to accept; DFA "+d.dfa.decisionNumber+" state "+d.stateNumber+" uniquely predicts alt "+
975 				d.getUniquelyPredictedAlt());
976 				*/
977 		}
978 		else {
979             // unresolved, add to work list to continue NFA conversion
980             work.add(d);
981         }
982         return d;
983     }
984 
convertToAcceptState(DFAState d, int alt)985 	protected DFAState convertToAcceptState(DFAState d, int alt) {
986 		// only merge stop states if they are deterministic and no
987 		// recursion problems and only if they have the same gated pred
988 		// context!
989 		// Later, the error reporting may want to trace the path from
990 		// the start state to the nondet state
991 		if ( DFAOptimizer.MERGE_STOP_STATES &&
992 			d.getNonDeterministicAlts()==null &&
993 			!d.abortedDueToRecursionOverflow &&
994 			!d.abortedDueToMultipleRecursiveAlts )
995 		{
996 			// check to see if we already have an accept state for this alt
997 			// [must do this after we resolve nondeterminisms in general]
998 			DFAState acceptStateForAlt = dfa.getAcceptState(alt);
999 			if ( acceptStateForAlt!=null ) {
1000 				// we already have an accept state for alt;
1001 				// Are their gate sem pred contexts the same?
1002 				// For now we assume a braindead version: both must not
1003 				// have gated preds or share exactly same single gated pred.
1004 				// The equals() method is only defined on Predicate contexts not
1005 				// OR etc...
1006 				SemanticContext gatedPreds = d.getGatedPredicatesInNFAConfigurations();
1007 				SemanticContext existingStateGatedPreds =
1008 					acceptStateForAlt.getGatedPredicatesInNFAConfigurations();
1009 				if ( (gatedPreds==null && existingStateGatedPreds==null) ||
1010 				     ((gatedPreds!=null && existingStateGatedPreds!=null) &&
1011 					  gatedPreds.equals(existingStateGatedPreds)) )
1012 				{
1013 					// make this d.statenumber point at old DFA state
1014 					dfa.setState(d.stateNumber, acceptStateForAlt);
1015 					dfa.removeState(d);    // remove this state from unique DFA state set
1016 					d = acceptStateForAlt; // use old accept state; throw this one out
1017 					return d;
1018 				}
1019 				// else consider it a new accept state; fall through.
1020 			}
1021 		}
1022 		d.setAcceptState(true); // new accept state for alt
1023 		dfa.setAcceptState(alt, d);
1024 		return d;
1025 	}
1026 
1027 	/** If &gt; 1 NFA configurations within this DFA state have identical
1028 	 *  NFA state and context, but differ in their predicted
1029 	 *  TODO update for new context suffix stuff 3-9-2005
1030 	 *  alternative then a single input sequence predicts multiple alts.
1031 	 *  The NFA decision is therefore syntactically indistinguishable
1032 	 *  from the left edge upon at least one input sequence.  We may
1033 	 *  terminate the NFA to DFA conversion for these paths since no
1034 	 *  paths emanating from those NFA states can possibly separate
1035 	 *  these conjoined twins once interwined to make things
1036 	 *  deterministic (unless there are semantic predicates; see below).
1037 	 *
1038 	 *  Upon a nondeterministic set of NFA configurations, we should
1039 	 *  report a problem to the grammar designer and resolve the issue
1040 	 *  by aribitrarily picking the first alternative (this usually
1041 	 *  ends up producing the most natural behavior).  Pick the lowest
1042 	 *  alt number and just turn off all NFA configurations
1043 	 *  associated with the other alts. Rather than remove conflicting
1044 	 *  NFA configurations, I set the "resolved" bit so that future
1045 	 *  computations will ignore them.  In this way, we maintain the
1046 	 *  complete DFA state with all its configurations, but prevent
1047 	 *  future DFA conversion operations from pursuing undesirable
1048 	 *  paths.  Remember that we want to terminate DFA conversion as
1049 	 *  soon as we know the decision is deterministic *or*
1050 	 *  nondeterministic.
1051 	 *
1052 	 *  [BTW, I have convinced myself that there can be at most one
1053 	 *  set of nondeterministic configurations in a DFA state.  Only NFA
1054 	 *  configurations arising from the same input sequence can appear
1055 	 *  in a DFA state.  There is no way to have another complete set
1056 	 *  of nondeterministic NFA configurations without another input
1057 	 *  sequence, which would reach a different DFA state.  Therefore,
1058 	 *  the two nondeterministic NFA configuration sets cannot collide
1059 	 *  in the same DFA state.]
1060 	 *
1061 	 *  Consider DFA state {(s|1),(s|2),(s|3),(t|3),(v|4)} where (s|a)
1062 	 *  is state 's' and alternative 'a'.  Here, configuration set
1063 	 *  {(s|1),(s|2),(s|3)} predicts 3 different alts.  Configurations
1064 	 *  (s|2) and (s|3) are "resolved", leaving {(s|1),(t|3),(v|4)} as
1065 	 *  items that must still be considered by the DFA conversion
1066 	 *  algorithm in DFA.findNewDFAStatesAndAddDFATransitions().
1067 	 *
1068 	 *  Consider the following grammar where alts 1 and 2 are no
1069 	 *  problem because of the 2nd lookahead symbol.  Alts 3 and 4 are
1070 	 *  identical and will therefore reach the rule end NFA state but
1071 	 *  predicting 2 different alts (no amount of future lookahead
1072 	 *  will render them deterministic/separable):
1073 	 *
1074 	 *  a : A B
1075 	 *    | A C
1076 	 *    | A
1077 	 *    | A
1078 	 *    ;
1079 	 *
1080 	 *  Here is a (slightly reduced) NFA of this grammar:
1081 	 *
1082 	 *  (1)-A-&gt;(2)-B-&gt;(end)-EOF-&gt;(8)
1083 	 *   |              ^
1084 	 *  (2)-A-&gt;(3)-C----|
1085 	 *   |              ^
1086 	 *  (4)-A-&gt;(5)------|
1087 	 *   |              ^
1088 	 *  (6)-A-&gt;(7)------|
1089 	 *
1090 	 *  where (n) is NFA state n.  To begin DFA conversion, the start
1091 	 *  state is created:
1092 	 *
1093 	 *  {(1|1),(2|2),(4|3),(6|4)}
1094 	 *
1095 	 *  Upon A, all NFA configurations lead to new NFA states yielding
1096 	 *  new DFA state:
1097 	 *
1098 	 *  {(2|1),(3|2),(5|3),(7|4),(end|3),(end|4)}
1099 	 *
1100 	 *  where the configurations with state end in them are added
1101 	 *  during the epsilon closure operation.  State end predicts both
1102 	 *  alts 3 and 4.  An error is reported, the latter configuration is
1103 	 *  flagged as resolved leaving the DFA state as:
1104 	 *
1105 	 *  {(2|1),(3|2),(5|3),(7|4|resolved),(end|3),(end|4|resolved)}
1106 	 *
1107 	 *  As NFA configurations are added to a DFA state during its
1108 	 *  construction, the reachable set of labels is computed.  Here
1109 	 *  reachable is {B,C,EOF} because there is at least one NFA state
1110 	 *  in the DFA state that can transition upon those symbols.
1111 	 *
1112 	 *  The final DFA looks like:
1113 	 *
1114 	 *  {(1|1),(2|2),(4|3),(6|4)}
1115 	 *              |
1116 	 *              v
1117 	 *  {(2|1),(3|2),(5|3),(7|4),(end|3),(end|4)} -B-&gt; (end|1)
1118 	 *              |                        |
1119 	 *              C                        ----EOF-&gt; (8,3)
1120 	 *              |
1121 	 *              v
1122 	 *           (end|2)
1123 	 *
1124 	 *  Upon AB, alt 1 is predicted.  Upon AC, alt 2 is predicted.
1125 	 *  Upon A EOF, alt 3 is predicted.  Alt 4 is not a viable
1126 	 *  alternative.
1127 	 *
1128 	 *  The algorithm is essentially to walk all the configurations
1129 	 *  looking for a conflict of the form (s|i) and (s|j) for i!=j.
1130 	 *  Use a hash table to track state+context pairs for collisions
1131 	 *  so that we have O(n) to walk the n configurations looking for
1132 	 *  a conflict.  Upon every conflict, track the alt number so
1133 	 *  we have a list of all nondeterministically predicted alts. Also
1134 	 *  track the minimum alt.  Next go back over the configurations, setting
1135 	 *  the "resolved" bit for any that have an alt that is a member of
1136 	 *  the nondeterministic set.  This will effectively remove any alts
1137 	 *  but the one we want from future consideration.
1138 	 *
1139 	 *  See resolveWithSemanticPredicates()
1140 	 *
1141 	 *  AMBIGUOUS TOKENS
1142 	 *
1143 	 *  With keywords and ID tokens, there is an inherit ambiguity in that
1144 	 *  "int" can be matched by ID also.  Each lexer rule has an EOT
1145 	 *  transition emanating from it which is used whenever the end of
1146 	 *  a rule is reached and another token rule did not invoke it.  EOT
1147 	 *  is the only thing that can be seen next.  If two rules are identical
1148 	 *  like "int" and "int" then the 2nd def is unreachable and you'll get
1149 	 *  a warning.  We prevent a warning though for the keyword/ID issue as
1150 	 *  ID is still reachable.  This can be a bit weird.  '+' rule then a
1151 	 *  '+'|'+=' rule will fail to match '+' for the 2nd rule.
1152 	 *
1153 	 *  If all NFA states in this DFA state are targets of EOT transitions,
1154 	 *  (and there is more than one state plus no unique alt is predicted)
1155 	 *  then DFA conversion will leave this state as a dead state as nothing
1156 	 *  can be reached from this state.  To resolve the ambiguity, just do
1157 	 *  what flex and friends do: pick the first rule (alt in this case) to
1158 	 *  win.  This means you should put keywords before the ID rule.
1159 	 *  If the DFA state has only one NFA state then there is no issue:
1160 	 *  it uniquely predicts one alt. :)  Problem
1161 	 *  states will look like this during conversion:
1162 	 *
1163 	 *  DFA 1:{9|1, 19|2, 14|3, 20|2, 23|2, 24|2, ...}-&lt;EOT&gt;-&gt;5:{41|3, 42|2}
1164 	 *
1165 	 *  Worse, when you have two identical literal rules, you will see 3 alts
1166 	 *  in the EOT state (one for ID and one each for the identical rules).
1167 	 */
resolveNonDeterminisms(DFAState d)1168 	public void resolveNonDeterminisms(DFAState d) {
1169 		if ( debug ) {
1170 			System.out.println("resolveNonDeterminisms "+d.toString());
1171 		}
1172 		boolean conflictingLexerRules = false;
1173 		Set<Integer> nondeterministicAlts = d.getNonDeterministicAlts();
1174 		if ( debug && nondeterministicAlts!=null ) {
1175 			System.out.println("nondet alts="+nondeterministicAlts);
1176 		}
1177 
1178 		// CHECK FOR AMBIGUOUS EOT (if |allAlts|>1 and EOT state, resolve)
1179 		// grab any config to see if EOT state; any other configs must
1180 		// transition on EOT to get to this DFA state as well so all
1181 		// states in d must be targets of EOT.  These are the end states
1182 		// created in NFAFactory.build_EOFState
1183 		NFAConfiguration anyConfig = d.nfaConfigurations.get(0);
1184 		NFAState anyState = dfa.nfa.getState(anyConfig.state);
1185 
1186 		// if d is target of EOT and more than one predicted alt
1187 		// indicate that d is nondeterministic on all alts otherwise
1188 		// it looks like state has no problem
1189 		if ( anyState.isEOTTargetState() ) {
1190 			Set<Integer> allAlts = d.getAltSet();
1191 			// is more than 1 alt predicted?
1192 			if ( allAlts!=null && allAlts.size()>1 ) {
1193 				nondeterministicAlts = allAlts;
1194 				// track Tokens rule issues differently than other decisions
1195 				if ( d.dfa.isTokensRuleDecision() ) {
1196 					dfa.probe.reportLexerRuleNondeterminism(d,allAlts);
1197 					//System.out.println("Tokens rule DFA state "+d+" nondeterministic");
1198 					conflictingLexerRules = true;
1199 				}
1200 			}
1201 		}
1202 
1203 		// if no problems return unless we aborted work on d to avoid inf recursion
1204 		if ( !d.abortedDueToRecursionOverflow && nondeterministicAlts==null ) {
1205 			return; // no problems, return
1206 		}
1207 
1208 		// if we're not a conflicting lexer rule and we didn't abort, report ambig
1209 		// We should get a report for abort so don't give another
1210 		if ( !d.abortedDueToRecursionOverflow && !conflictingLexerRules ) {
1211 			// TODO: with k=x option set, this is called twice for same state
1212 			dfa.probe.reportNondeterminism(d, nondeterministicAlts);
1213 			// TODO: how to turn off when it's only the FOLLOW that is
1214 			// conflicting.  This used to shut off even alts i,j < n
1215 			// conflict warnings. :(
1216 		}
1217 
1218 		// ATTEMPT TO RESOLVE WITH SEMANTIC PREDICATES
1219 		boolean resolved =
1220 			tryToResolveWithSemanticPredicates(d, nondeterministicAlts);
1221 		if ( resolved ) {
1222 			if ( debug ) {
1223 				System.out.println("resolved DFA state "+d.stateNumber+" with pred");
1224 			}
1225 			d.resolvedWithPredicates = true;
1226 			dfa.probe.reportNondeterminismResolvedWithSemanticPredicate(d);
1227 			return;
1228 		}
1229 
1230 		// RESOLVE SYNTACTIC CONFLICT BY REMOVING ALL BUT ONE ALT
1231 		resolveByChoosingFirstAlt(d, nondeterministicAlts);
1232 
1233 		//System.out.println("state "+d.stateNumber+" resolved to alt "+winningAlt);
1234 	}
1235 
resolveByChoosingFirstAlt(DFAState d, Set<Integer> nondeterministicAlts)1236 	protected int resolveByChoosingFirstAlt(DFAState d, Set<Integer> nondeterministicAlts) {
1237 		int winningAlt;
1238 		if ( dfa.isGreedy() ) {
1239 			winningAlt = resolveByPickingMinAlt(d,nondeterministicAlts);
1240 		}
1241 		else {
1242 			// If nongreedy, the exit alt shout win, but only if it's
1243 			// involved in the nondeterminism!
1244 			/*
1245 			System.out.println("resolving exit alt for decision="+
1246 				dfa.decisionNumber+" state="+d);
1247 			System.out.println("nondet="+nondeterministicAlts);
1248 			System.out.println("exit alt "+exitAlt);
1249 			*/
1250 			int exitAlt = dfa.getNumberOfAlts();
1251 			if ( nondeterministicAlts.contains(Utils.integer(exitAlt)) ) {
1252 				// if nongreedy and exit alt is one of those nondeterministic alts
1253 				// predicted, resolve in favor of what follows block
1254 				winningAlt = resolveByPickingExitAlt(d,nondeterministicAlts);
1255 			}
1256 			else {
1257 				winningAlt = resolveByPickingMinAlt(d,nondeterministicAlts);
1258 			}
1259 		}
1260 		return winningAlt;
1261 	}
1262 
1263 	/** Turn off all configurations associated with the
1264 	 *  set of incoming nondeterministic alts except the min alt number.
1265 	 *  There may be many alts among the configurations but only turn off
1266 	 *  the ones with problems (other than the min alt of course).
1267 	 *
1268 	 *  If nondeterministicAlts is null then turn off all configs 'cept those
1269 	 *  associated with the minimum alt.
1270 	 *
1271 	 *  Return the min alt found.
1272 	 */
resolveByPickingMinAlt(DFAState d, Set<Integer> nondeterministicAlts)1273 	protected int resolveByPickingMinAlt(DFAState d, Set<Integer> nondeterministicAlts) {
1274 		int min;
1275 		if ( nondeterministicAlts!=null ) {
1276 			min = getMinAlt(nondeterministicAlts);
1277 		}
1278 		else {
1279 			min = d.minAltInConfigurations;
1280 		}
1281 
1282 		turnOffOtherAlts(d, min, nondeterministicAlts);
1283 
1284 		return min;
1285 	}
1286 
1287 	/** Resolve state d by choosing exit alt, which is same value as the
1288 	 *  number of alternatives.  Return that exit alt.
1289 	 */
resolveByPickingExitAlt(DFAState d, Set<Integer> nondeterministicAlts)1290 	protected int resolveByPickingExitAlt(DFAState d, Set<Integer> nondeterministicAlts) {
1291 		int exitAlt = dfa.getNumberOfAlts();
1292 		turnOffOtherAlts(d, exitAlt, nondeterministicAlts);
1293 		return exitAlt;
1294 	}
1295 
1296 	/** turn off all states associated with alts other than the good one
1297 	 *  (as long as they are one of the nondeterministic ones)
1298 	 */
turnOffOtherAlts(DFAState d, int min, Set<Integer> nondeterministicAlts)1299 	protected static void turnOffOtherAlts(DFAState d, int min, Set<Integer> nondeterministicAlts) {
1300 		int numConfigs = d.nfaConfigurations.size();
1301 		for (int i = 0; i < numConfigs; i++) {
1302 			NFAConfiguration configuration = d.nfaConfigurations.get(i);
1303 			if ( configuration.alt!=min ) {
1304 				if ( nondeterministicAlts==null ||
1305 					 nondeterministicAlts.contains(Utils.integer(configuration.alt)) )
1306 				{
1307 					configuration.resolved = true;
1308 				}
1309 			}
1310 		}
1311 	}
1312 
getMinAlt(Set<Integer> nondeterministicAlts)1313 	protected static int getMinAlt(Set<Integer> nondeterministicAlts) {
1314 		int min = Integer.MAX_VALUE;
1315 		for (Integer altI : nondeterministicAlts) {
1316 			int alt = altI;
1317 			if ( alt < min ) {
1318 				min = alt;
1319 			}
1320 		}
1321 		return min;
1322 	}
1323 
1324 	/** See if a set of nondeterministic alternatives can be disambiguated
1325 	 *  with the semantic predicate contexts of the alternatives.
1326 	 *
1327 	 *  Without semantic predicates, syntactic conflicts are resolved
1328 	 *  by simply choosing the first viable alternative.  In the
1329 	 *  presence of semantic predicates, you can resolve the issue by
1330 	 *  evaluating boolean expressions at run time.  During analysis,
1331 	 *  this amounts to suppressing grammar error messages to the
1332 	 *  developer.  NFA configurations are always marked as "to be
1333 	 *  resolved with predicates" so that
1334 	 *  DFA.findNewDFAStatesAndAddDFATransitions() will know to ignore
1335 	 *  these configurations and add predicate transitions to the DFA
1336 	 *  after adding token/char labels.
1337 	 *
1338 	 *  During analysis, we can simply make sure that for n
1339 	 *  ambiguously predicted alternatives there are at least n-1
1340 	 *  unique predicate sets.  The nth alternative can be predicted
1341 	 *  with "not" the "or" of all other predicates.  NFA configurations without
1342 	 *  predicates are assumed to have the default predicate of
1343 	 *  "true" from a user point of view.  When true is combined via || with
1344 	 *  another predicate, the predicate is a tautology and must be removed
1345 	 *  from consideration for disambiguation:
1346 	 *
1347 	 *  a : b | B ; // hoisting p1||true out of rule b, yields no predicate
1348 	 *  b : {p1}? B | B ;
1349 	 *
1350 	 *  This is done down in getPredicatesPerNonDeterministicAlt().
1351 	 */
tryToResolveWithSemanticPredicates(DFAState d, Set<Integer> nondeterministicAlts)1352 	protected boolean tryToResolveWithSemanticPredicates(DFAState d,
1353 														 Set<Integer> nondeterministicAlts)
1354 	{
1355 		Map<Integer, SemanticContext> altToPredMap =
1356 				getPredicatesPerNonDeterministicAlt(d, nondeterministicAlts);
1357 
1358 		if ( altToPredMap.isEmpty() ) {
1359 			return false;
1360 		}
1361 
1362 		//System.out.println("nondeterministic alts with predicates: "+altToPredMap);
1363 		dfa.probe.reportAltPredicateContext(d, altToPredMap);
1364 
1365 		if ( nondeterministicAlts.size()-altToPredMap.size()>1 ) {
1366 			// too few predicates to resolve; just return
1367 			return false;
1368 		}
1369 
1370 		// Handle case where 1 predicate is missing
1371 		// Case 1. Semantic predicates
1372 		// If the missing pred is on nth alt, !(union of other preds)==true
1373 		// so we can avoid that computation.  If naked alt is ith, then must
1374 		// test it with !(union) since semantic predicated alts are order
1375 		// independent
1376 		// Case 2: Syntactic predicates
1377 		// The naked alt is always assumed to be true as the order of
1378 		// alts is the order of precedence.  The naked alt will be a tautology
1379 		// anyway as it's !(union of other preds).  This implies
1380 		// that there is no such thing as noviable alt for synpred edges
1381 		// emanating from a DFA state.
1382 		if ( altToPredMap.size()==nondeterministicAlts.size()-1 ) {
1383 			// if there are n-1 predicates for n nondeterministic alts, can fix
1384 			org.antlr.misc.BitSet ndSet = org.antlr.misc.BitSet.of(nondeterministicAlts);
1385 			org.antlr.misc.BitSet predSet = org.antlr.misc.BitSet.of(altToPredMap);
1386 			int nakedAlt = ndSet.subtract(predSet).getSingleElement();
1387 			SemanticContext nakedAltPred;
1388 			if ( nakedAlt == max(nondeterministicAlts) ) {
1389 				// the naked alt is the last nondet alt and will be the default clause
1390 				nakedAltPred = new SemanticContext.TruePredicate();
1391 			}
1392 			else {
1393 				// pretend naked alternative is covered with !(union other preds)
1394 				// unless one of preds from other alts is a manually specified synpred
1395 				// since those have precedence same as alt order.  Missing synpred
1396 				// is true so that alt wins (or is at least attempted).
1397 				// Note: can't miss any preds on alts (can't be here) if auto backtrack
1398 				// since it prefixes all.
1399 				// In LL(*) paper, i'll just have algorithm emit warning about uncovered
1400 				// pred
1401 				SemanticContext unionOfPredicatesFromAllAlts =
1402 					getUnionOfPredicates(altToPredMap);
1403 				//System.out.println("all predicates "+unionOfPredicatesFromAllAlts);
1404 				if ( unionOfPredicatesFromAllAlts.isSyntacticPredicate() ) {
1405 					nakedAltPred = new SemanticContext.TruePredicate();
1406 				}
1407 				else {
1408 					nakedAltPred =
1409 						SemanticContext.not(unionOfPredicatesFromAllAlts);
1410 				}
1411 			}
1412 
1413 			//System.out.println("covering naked alt="+nakedAlt+" with "+nakedAltPred);
1414 
1415 			altToPredMap.put(Utils.integer(nakedAlt), nakedAltPred);
1416 			// set all config with alt=nakedAlt to have the computed predicate
1417 			int numConfigs = d.nfaConfigurations.size();
1418 			for (int i = 0; i < numConfigs; i++) { // TODO: I don't think we need to do this; altToPredMap has it
1419 			 //7/27/10  theok, I removed it and it still seems to work with everything; leave in anyway just in case
1420 				NFAConfiguration configuration = d.nfaConfigurations.get(i);
1421 				if ( configuration.alt == nakedAlt ) {
1422 					configuration.semanticContext = nakedAltPred;
1423 				}
1424 			}
1425 		}
1426 
1427 		if ( altToPredMap.size()==nondeterministicAlts.size() ) {
1428 			// RESOLVE CONFLICT by picking one NFA configuration for each alt
1429 			// and setting its resolvedWithPredicate flag
1430 			// First, prevent a recursion warning on this state due to
1431 			// pred resolution
1432 			if ( d.abortedDueToRecursionOverflow ) {
1433 				d.dfa.probe.removeRecursiveOverflowState(d);
1434 			}
1435 			int numConfigs = d.nfaConfigurations.size();
1436 			//System.out.println("pred map="+altToPredMap);
1437 			for (int i = 0; i < numConfigs; i++) {
1438 				NFAConfiguration configuration = d.nfaConfigurations.get(i);
1439 				SemanticContext semCtx = altToPredMap.get(Utils.integer(configuration.alt));
1440 				if ( semCtx!=null ) {
1441 					// resolve (first found) with pred
1442 					// and remove alt from problem list
1443 					//System.out.println("c="+configuration);
1444 					configuration.resolveWithPredicate = true;
1445 					// altToPredMap has preds from all alts; store into "annointed" config
1446 					configuration.semanticContext = semCtx; // reset to combined
1447 					altToPredMap.remove(Utils.integer(configuration.alt));
1448 
1449 					// notify grammar that we've used the preds contained in semCtx
1450 					if ( semCtx.isSyntacticPredicate() ) {
1451 						dfa.nfa.grammar.synPredUsedInDFA(dfa, semCtx);
1452 					}
1453 				}
1454 				else if ( nondeterministicAlts.contains(Utils.integer(configuration.alt)) ) {
1455 					// resolve all configurations for nondeterministic alts
1456 					// for which there is no predicate context by turning it off
1457 					configuration.resolved = true;
1458 				}
1459 			}
1460 			return true;
1461 		}
1462 
1463 		return false;  // couldn't fix the problem with predicates
1464 	}
1465 
1466 	/** Return a mapping from nondeterministc alt to combined list of predicates.
1467 	 *  If both (s|i|semCtx1) and (t|i|semCtx2) exist, then the proper predicate
1468 	 *  for alt i is semCtx1||semCtx2 because you have arrived at this single
1469 	 *  DFA state via two NFA paths, both of which have semantic predicates.
1470 	 *  We ignore deterministic alts because syntax alone is sufficient
1471 	 *  to predict those.  Do not include their predicates.
1472 	 *
1473 	 *  Alts with no predicate are assumed to have {true}? pred.
1474 	 *
1475 	 *  When combining via || with "true", all predicates are removed from
1476 	 *  consideration since the expression will always be true and hence
1477 	 *  not tell us how to resolve anything.  So, if any NFA configuration
1478 	 *  in this DFA state does not have a semantic context, the alt cannot
1479 	 *  be resolved with a predicate.
1480 	 *
1481 	 *  If nonnull, incidentEdgeLabel tells us what NFA transition label
1482 	 *  we did a reach on to compute state d.  d may have insufficient
1483 	 *  preds, so we really want this for the error message.
1484 	 */
getPredicatesPerNonDeterministicAlt(DFAState d, Set<Integer> nondeterministicAlts)1485 	protected Map<Integer, SemanticContext> getPredicatesPerNonDeterministicAlt(DFAState d,
1486 																				Set<Integer> nondeterministicAlts)
1487 	{
1488 		// map alt to combined SemanticContext
1489 		Map<Integer, SemanticContext> altToPredicateContextMap =
1490 			new HashMap<Integer, SemanticContext>();
1491 		// init the alt to predicate set map
1492 		Map<Integer, OrderedHashSet<SemanticContext>> altToSetOfContextsMap =
1493 			new HashMap<Integer, OrderedHashSet<SemanticContext>>();
1494 		for (Integer altI : nondeterministicAlts) {
1495 			altToSetOfContextsMap.put(altI, new OrderedHashSet<SemanticContext>());
1496 		}
1497 
1498 		/*
1499 		List<Label> sampleInputLabels = d.dfa.probe.getSampleNonDeterministicInputSequence(d);
1500 		String input = d.dfa.probe.getInputSequenceDisplay(sampleInputLabels);
1501 		System.out.println("sample input: "+input);
1502 		*/
1503 
1504 		// for each configuration, create a unique set of predicates
1505 		// Also, track the alts with at least one uncovered configuration
1506 		// (one w/o a predicate); tracks tautologies like p1||true
1507 		Map<Integer, Set<Token>> altToLocationsReachableWithoutPredicate = new HashMap<Integer, Set<Token>>();
1508 		Set<Integer> nondetAltsWithUncoveredConfiguration = new HashSet<Integer>();
1509 		//System.out.println("configs="+d.nfaConfigurations);
1510 		//System.out.println("configs with preds?"+d.atLeastOneConfigurationHasAPredicate);
1511 		//System.out.println("configs with preds="+d.configurationsWithPredicateEdges);
1512 		int numConfigs = d.nfaConfigurations.size();
1513 		for (int i = 0; i < numConfigs; i++) {
1514 			NFAConfiguration configuration = d.nfaConfigurations.get(i);
1515 			Integer altI = Utils.integer(configuration.alt);
1516 			// if alt is nondeterministic, combine its predicates
1517 			if ( nondeterministicAlts.contains(altI) ) {
1518 				// if there is a predicate for this NFA configuration, OR in
1519 				if ( configuration.semanticContext !=
1520 					 SemanticContext.EMPTY_SEMANTIC_CONTEXT )
1521 				{
1522 					Set<SemanticContext> predSet = altToSetOfContextsMap.get(altI);
1523 					predSet.add(configuration.semanticContext);
1524 				}
1525 				else {
1526 					// if no predicate, but it's part of nondeterministic alt
1527 					// then at least one path exists not covered by a predicate.
1528 					// must remove predicate for this alt; track incomplete alts
1529 					nondetAltsWithUncoveredConfiguration.add(altI);
1530 					/*
1531 					NFAState s = dfa.nfa.getState(configuration.state);
1532 					System.out.println("###\ndec "+dfa.decisionNumber+" alt "+configuration.alt+
1533 									   " enclosing rule for nfa state not covered "+
1534 									   s.enclosingRule);
1535 					if ( s.associatedASTNode!=null ) {
1536 						System.out.println("token="+s.associatedASTNode.token);
1537 					}
1538 					System.out.println("nfa state="+s);
1539 
1540 					if ( s.incidentEdgeLabel!=null && Label.intersect(incidentEdgeLabel, s.incidentEdgeLabel) ) {
1541 						Set<Token> locations = altToLocationsReachableWithoutPredicate.get(altI);
1542 						if ( locations==null ) {
1543 							locations = new HashSet<Token>();
1544 							altToLocationsReachableWithoutPredicate.put(altI, locations);
1545 						}
1546 						locations.add(s.associatedASTNode.token);
1547 					}
1548 					*/
1549 				}
1550 			}
1551 		}
1552 
1553 		// For each alt, OR together all unique predicates associated with
1554 		// all configurations
1555 		// Also, track the list of incompletely covered alts: those alts
1556 		// with at least 1 predicate and at least one configuration w/o a
1557 		// predicate. We want this in order to report to the decision probe.
1558 		List<Integer> incompletelyCoveredAlts = new ArrayList<Integer>();
1559 		for (Integer altI : nondeterministicAlts) {
1560 			Set<SemanticContext> contextsForThisAlt = altToSetOfContextsMap.get(altI);
1561 			if ( nondetAltsWithUncoveredConfiguration.contains(altI) ) { // >= 1 config has no ctx
1562 				if ( contextsForThisAlt.size()>0 ) {    // && at least one pred
1563 					incompletelyCoveredAlts.add(altI);  // this alt incompleted covered
1564 				}
1565 				continue; // don't include at least 1 config has no ctx
1566 			}
1567 			SemanticContext combinedContext = null;
1568 			for (SemanticContext ctx : contextsForThisAlt) {
1569 				combinedContext =
1570 						SemanticContext.or(combinedContext,ctx);
1571 			}
1572 			altToPredicateContextMap.put(altI, combinedContext);
1573 		}
1574 
1575 		if ( incompletelyCoveredAlts.size()>0 ) {
1576 			/*
1577 			System.out.println("prob in dec "+dfa.decisionNumber+" state="+d);
1578 			FASerializer serializer = new FASerializer(dfa.nfa.grammar);
1579 			String result = serializer.serialize(dfa.startState);
1580 			System.out.println("dfa: "+result);
1581 			System.out.println("incomplete alts: "+incompletelyCoveredAlts);
1582 			System.out.println("nondet="+nondeterministicAlts);
1583 			System.out.println("nondetAltsWithUncoveredConfiguration="+ nondetAltsWithUncoveredConfiguration);
1584 			System.out.println("altToCtxMap="+altToSetOfContextsMap);
1585 			System.out.println("altToPredicateContextMap="+altToPredicateContextMap);
1586 			*/
1587 			for (int i = 0; i < numConfigs; i++) {
1588 				NFAConfiguration configuration = d.nfaConfigurations.get(i);
1589 				Integer altI = Utils.integer(configuration.alt);
1590 				if ( incompletelyCoveredAlts.contains(altI) &&
1591 					 configuration.semanticContext == SemanticContext.EMPTY_SEMANTIC_CONTEXT )
1592 				{
1593 					NFAState s = dfa.nfa.getState(configuration.state);
1594 					/*
1595 					System.out.print("nondet config w/o context "+configuration+
1596 									 " incident "+(s.incidentEdgeLabel!=null?s.incidentEdgeLabel.toString(dfa.nfa.grammar):null));
1597 					if ( s.associatedASTNode!=null ) {
1598 						System.out.print(" token="+s.associatedASTNode.token);
1599 					}
1600 					else System.out.println();
1601 					*/
1602                     // We want to report getting to an NFA state with an
1603                     // incoming label, unless it's EOF, w/o a predicate.
1604                     if ( s.incidentEdgeLabel!=null && s.incidentEdgeLabel.label != Label.EOF ) {
1605                         if ( s.associatedASTNode==null || s.associatedASTNode.token==null ) {
1606 							ErrorManager.internalError("no AST/token for nonepsilon target w/o predicate");
1607 						}
1608 						else {
1609 							Set<Token> locations = altToLocationsReachableWithoutPredicate.get(altI);
1610 							if ( locations==null ) {
1611 								locations = new HashSet<Token>();
1612 								altToLocationsReachableWithoutPredicate.put(altI, locations);
1613 							}
1614 							locations.add(s.associatedASTNode.token);
1615 						}
1616 					}
1617 				}
1618 			}
1619 			dfa.probe.reportIncompletelyCoveredAlts(d,
1620 													altToLocationsReachableWithoutPredicate);
1621 		}
1622 
1623 		return altToPredicateContextMap;
1624 	}
1625 
1626 	/** OR together all predicates from the alts.  Note that the predicate
1627 	 *  for an alt could itself be a combination of predicates.
1628 	 */
getUnionOfPredicates(Map<?, SemanticContext> altToPredMap)1629 	protected static SemanticContext getUnionOfPredicates(Map<?, SemanticContext> altToPredMap) {
1630 		Iterator<SemanticContext> iter;
1631 		SemanticContext unionOfPredicatesFromAllAlts = null;
1632 		iter = altToPredMap.values().iterator();
1633 		while ( iter.hasNext() ) {
1634 			SemanticContext semCtx = iter.next();
1635 			if ( unionOfPredicatesFromAllAlts==null ) {
1636 				unionOfPredicatesFromAllAlts = semCtx;
1637 			}
1638 			else {
1639 				unionOfPredicatesFromAllAlts =
1640 						SemanticContext.or(unionOfPredicatesFromAllAlts,semCtx);
1641 			}
1642 		}
1643 		return unionOfPredicatesFromAllAlts;
1644 	}
1645 
1646 	/** for each NFA config in d, look for "predicate required" sign set
1647 	 *  during nondeterminism resolution.
1648 	 *
1649 	 *  Add the predicate edges sorted by the alternative number; I'm fairly
1650 	 *  sure that I could walk the configs backwards so they are added to
1651 	 *  the predDFATarget in the right order, but it's best to make sure.
1652 	 *  Predicates succeed in the order they are specifed.  Alt i wins
1653 	 *  over alt i+1 if both predicates are true.
1654 	 */
addPredicateTransitions(DFAState d)1655 	protected void addPredicateTransitions(DFAState d) {
1656 		List<NFAConfiguration> configsWithPreds = new ArrayList<NFAConfiguration>();
1657 		// get a list of all configs with predicates
1658 		int numConfigs = d.nfaConfigurations.size();
1659 		for (int i = 0; i < numConfigs; i++) {
1660 			NFAConfiguration c = d.nfaConfigurations.get(i);
1661 			if ( c.resolveWithPredicate ) {
1662 				configsWithPreds.add(c);
1663 			}
1664 		}
1665 		// Sort ascending according to alt; alt i has higher precedence than i+1
1666 		Collections.sort(configsWithPreds,
1667 			 new Comparator<NFAConfiguration>() {
1668 			@Override
1669 				 public int compare(NFAConfiguration a, NFAConfiguration b) {
1670 					 if ( a.alt < b.alt ) return -1;
1671 					 else if ( a.alt > b.alt ) return 1;
1672 					 return 0;
1673 				 }
1674 			 });
1675 		List<NFAConfiguration> predConfigsSortedByAlt = configsWithPreds;
1676 		// Now, we can add edges emanating from d for these preds in right order
1677 		for (int i = 0; i < predConfigsSortedByAlt.size(); i++) {
1678 			NFAConfiguration c = predConfigsSortedByAlt.get(i);
1679 			DFAState predDFATarget = d.dfa.getAcceptState(c.alt);
1680 			if ( predDFATarget==null ) {
1681 				predDFATarget = dfa.newState(); // create if not there.
1682 				// create a new DFA state that is a target of the predicate from d
1683 				predDFATarget.addNFAConfiguration(dfa.nfa.getState(c.state),
1684 												  c.alt,
1685 												  c.context,
1686 												  c.semanticContext);
1687 				predDFATarget.setAcceptState(true);
1688 				dfa.setAcceptState(c.alt, predDFATarget);
1689 				DFAState existingState = dfa.addState(predDFATarget);
1690 				if ( predDFATarget != existingState ) {
1691 					// already there...use/return the existing DFA state that
1692 					// is a target of this predicate.  Make this state number
1693 					// point at the existing state
1694 					dfa.setState(predDFATarget.stateNumber, existingState);
1695 					predDFATarget = existingState;
1696 				}
1697 			}
1698 			// add a transition to pred target from d
1699 			d.addTransition(predDFATarget, new PredicateLabel(c.semanticContext));
1700 		}
1701 	}
1702 
initContextTrees(int numberOfAlts)1703 	protected void initContextTrees(int numberOfAlts) {
1704         contextTrees = new NFAContext[numberOfAlts];
1705         for (int i = 0; i < contextTrees.length; i++) {
1706             int alt = i+1;
1707             // add a dummy root node so that an NFA configuration can
1708             // always point at an NFAContext.  If a context refers to this
1709             // node then it implies there is no call stack for
1710             // that configuration
1711             contextTrees[i] = new NFAContext(null, null);
1712         }
1713     }
1714 
max(Set<Integer> s)1715 	public static int max(Set<Integer> s) {
1716 		if ( s==null ) {
1717 			return Integer.MIN_VALUE;
1718 		}
1719 		int i = 0;
1720 		int m = 0;
1721 		for (Integer value : s) {
1722 			i++;
1723 			Integer I = value;
1724 			if ( i==1 ) { // init m with first value
1725 				m = I;
1726 				continue;
1727 			}
1728 			if ( I>m ) {
1729 				m = I;
1730 			}
1731 		}
1732 		return m;
1733 	}
1734 }
1735