1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.analysis;
29 
30 import org.antlr.codegen.CodeGenerator;
31 import org.antlr.misc.IntSet;
32 import org.antlr.misc.IntervalSet;
33 import org.antlr.misc.Utils;
34 import org.antlr.runtime.IntStream;
35 import org.stringtemplate.v4.ST;
36 import org.antlr.tool.*;
37 
38 import java.util.*;
39 
40 /** A DFA (converted from a grammar's NFA).
41  *  DFAs are used as prediction machine for alternative blocks in all kinds
42  *  of recognizers (lexers, parsers, tree walkers).
43  */
44 public class DFA {
45 	public static final int REACHABLE_UNKNOWN = -2;
46 	public static final int REACHABLE_BUSY = -1; // in process of computing
47 	public static final int REACHABLE_NO = 0;
48 	public static final int REACHABLE_YES = 1;
49 
50 	public static final int CYCLIC_UNKNOWN = -2;
51 	public static final int CYCLIC_BUSY = -1; // in process of computing
52 	public static final int CYCLIC_DONE = 0;
53 
54 	/** Prevent explosion of DFA states during conversion. The max number
55 	 *  of states per alt in a single decision's DFA.
56 	public static final int MAX_STATES_PER_ALT_IN_DFA = 450;
57 	 */
58 
59 	/** Set to 0 to not terminate early (time in ms) */
60 	public static int MAX_TIME_PER_DFA_CREATION = 1*1000;
61 
62 	/** How many edges can each DFA state have before a "special" state
63 	 *  is created that uses IF expressions instead of a table?
64 	 */
65 	public static int MAX_STATE_TRANSITIONS_FOR_TABLE = 65534;
66 
67 	/** What's the start state for this DFA? */
68     public DFAState startState;
69 
70 	/** This DFA is being built for which decision? */
71 	public int decisionNumber = 0;
72 
73     /** From what NFAState did we create the DFA? */
74     public NFAState decisionNFAStartState;
75 
76 	/** The printable grammar fragment associated with this DFA */
77 	public String description;
78 
79 	/** A set of all uniquely-numbered DFA states.  Maps hash of DFAState
80      *  to the actual DFAState object.  We use this to detect
81      *  existing DFA states.  Map<DFAState,DFAState>.  Use Map so
82 	 *  we can get old state back (Set only allows you to see if it's there).
83 	 *  Not used during fixed k lookahead as it's a waste to fill it with
84 	 *  a dup of states array.
85      */
86     protected Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>();
87 
88 	/** Maps the state number to the actual DFAState.  Use a Vector as it
89 	 *  grows automatically when I set the ith element.  This contains all
90 	 *  states, but the states are not unique.  s3 might be same as s1 so
91 	 *  s3 -> s1 in this table.  This is how cycles occur.  If fixed k,
92 	 *  then these states will all be unique as states[i] always points
93 	 *  at state i when no cycles exist.
94 	 *
95 	 *  This is managed in parallel with uniqueStates and simply provides
96 	 *  a way to go from state number to DFAState rather than via a
97 	 *  hash lookup.
98 	 */
99 	protected Vector<DFAState> states = new Vector<DFAState>();
100 
101 	/** Unique state numbers per DFA */
102 	protected int stateCounter = 0;
103 
104 	/** count only new states not states that were rejected as already present */
105 	protected int numberOfStates = 0;
106 
107 	/** User specified max fixed lookahead.  If 0, nothing specified.  -1
108 	 *  implies we have not looked at the options table yet to set k.
109 	 */
110 	protected int user_k = -1;
111 
112 	/** While building the DFA, track max lookahead depth if not cyclic */
113 	protected int max_k = -1;
114 
115     /** Is this DFA reduced?  I.e., can all states lead to an accept state? */
116     protected boolean reduced = true;
117 
118     /** Are there any loops in this DFA?
119 	 *  Computed by doesStateReachAcceptState()
120 	 */
121     protected boolean cyclic = false;
122 
123 	/** Track whether this DFA has at least one sem/syn pred encountered
124 	 *  during a closure operation.  This is useful for deciding whether
125 	 *  to retry a non-LL(*) with k=1.  If no pred, it will not work w/o
126 	 *  a pred so don't bother.  It would just give another error message.
127 	 */
128 	public boolean predicateVisible = false;
129 
130 	public boolean hasPredicateBlockedByAction = false;
131 
132 	/** Each alt in an NFA derived from a grammar must have a DFA state that
133      *  predicts it lest the parser not know what to do.  Nondeterminisms can
134      *  lead to this situation (assuming no semantic predicates can resolve
135      *  the problem) and when for some reason, I cannot compute the lookahead
136      *  (which might arise from an error in the algorithm or from
137      *  left-recursion etc...).  This list starts out with all alts contained
138      *  and then in method doesStateReachAcceptState() I remove the alts I
139      *  know to be uniquely predicted.
140      */
141     protected List<Integer> unreachableAlts;
142 
143 	protected int nAlts = 0;
144 
145 	/** We only want one accept state per predicted alt; track here */
146 	protected DFAState[] altToAcceptState;
147 
148 	/** Track whether an alt discovers recursion for each alt during
149 	 *  NFA to DFA conversion; >1 alt with recursion implies nonregular.
150 	 */
151 	public IntSet recursiveAltSet = new IntervalSet();
152 
153 	/** Which NFA are we converting (well, which piece of the NFA)? */
154     public NFA nfa;
155 
156 	protected NFAToDFAConverter nfaConverter;
157 
158 	/** This probe tells you a lot about a decision and is useful even
159 	 *  when there is no error such as when a syntactic nondeterminism
160 	 *  is solved via semantic predicates.  Perhaps a GUI would want
161 	 *  the ability to show that.
162 	 */
163 	public DecisionProbe probe = new DecisionProbe(this);
164 
165 	/** Track absolute time of the conversion so we can have a failsafe:
166 	 *  if it takes too long, then terminate.  Assume bugs are in the
167 	 *  analysis engine.
168 	 */
169 	//protected long conversionStartTime;
170 
171 	/** Map an edge transition table to a unique set number; ordered so
172 	 *  we can push into the output template as an ordered list of sets
173 	 *  and then ref them from within the transition[][] table.  Like this
174 	 *  for C# target:
175 	 *     public static readonly DFA30_transition0 =
176 	 *     	new short[] { 46, 46, -1, 46, 46, -1, -1, -1, -1, -1, -1, -1,...};
177 	 *         public static readonly DFA30_transition1 =
178 	 *     	new short[] { 21 };
179 	 *      public static readonly short[][] DFA30_transition = {
180 	 *     	  DFA30_transition0,
181 	 *     	  DFA30_transition0,
182 	 *     	  DFA30_transition1,
183 	 *     	  ...
184 	 *      };
185 	 */
186 	public Map edgeTransitionClassMap = new LinkedHashMap();
187 
188 	/** The unique edge transition class number; every time we see a new
189 	 *  set of edges emanating from a state, we number it so we can reuse
190 	 *  if it's every seen again for another state.  For Java grammar,
191 	 *  some of the big edge transition tables are seen about 57 times.
192 	 */
193 	protected int edgeTransitionClass =0;
194 
195 	/* This DFA can be converted to a transition[state][char] table and
196 	 * the following tables are filled by createStateTables upon request.
197 	 * These are injected into the templates for code generation.
198 	 * See March 25, 2006 entry for description:
199 	 *   http://www.antlr.org/blog/antlr3/codegen.tml
200 	 * Often using Vector as can't set ith position in a List and have
201 	 * it extend list size; bizarre.
202 	 */
203 
204 	/** List of special DFAState objects */
205 	public List specialStates;
206 	/** List of ST for special states. */
207 	public List specialStateSTs;
208 	public Vector accept;
209 	public Vector eot;
210 	public Vector eof;
211 	public Vector min;
212 	public Vector max;
213 	public Vector special;
214 	public Vector transition;
215 	/** just the Vector<Integer> indicating which unique edge table is at
216 	 *  position i.
217 	 */
218 	public Vector transitionEdgeTables; // not used by java yet
219 	protected int uniqueCompressedSpecialStateNum = 0;
220 
221 	/** Which generator to use if we're building state tables */
222 	protected CodeGenerator generator = null;
223 
DFA()224 	protected DFA() {;}
225 
DFA(int decisionNumber, NFAState decisionStartState)226 	public DFA(int decisionNumber, NFAState decisionStartState) {
227 		this.decisionNumber = decisionNumber;
228         this.decisionNFAStartState = decisionStartState;
229         nfa = decisionStartState.nfa;
230         nAlts = nfa.grammar.getNumberOfAltsForDecisionNFA(decisionStartState);
231         //setOptions( nfa.grammar.getDecisionOptions(getDecisionNumber()) );
232         initAltRelatedInfo();
233 
234 		//long start = System.currentTimeMillis();
235         nfaConverter = new NFAToDFAConverter(this);
236 		try {
237 			nfaConverter.convert();
238 
239 			// figure out if there are problems with decision
240 			verify();
241 
242 			if ( !probe.isDeterministic() || probe.analysisOverflowed() ) {
243 				probe.issueWarnings();
244 			}
245 
246 			// must be after verify as it computes cyclic, needed by this routine
247 			// should be after warnings because early termination or something
248 			// will not allow the reset to operate properly in some cases.
249 			resetStateNumbersToBeContiguous();
250 
251 			//long stop = System.currentTimeMillis();
252 			//System.out.println("verify cost: "+(int)(stop-start)+" ms");
253 		}
254 //		catch (AnalysisTimeoutException at) {
255 //			probe.reportAnalysisTimeout();
256 //			if ( !okToRetryDFAWithK1() ) {
257 //				probe.issueWarnings();
258 //			}
259 //		}
260 		catch (NonLLStarDecisionException nonLL) {
261 			probe.reportNonLLStarDecision(this);
262 			// >1 alt recurses, k=* and no auto backtrack nor manual sem/syn
263 			if ( !okToRetryDFAWithK1() ) {
264 				probe.issueWarnings();
265 			}
266 		}
267     }
268 
269 	/** Walk all states and reset their numbers to be a contiguous sequence
270 	 *  of integers starting from 0.  Only cyclic DFA can have unused positions
271 	 *  in states list.  State i might be identical to a previous state j and
272 	 *  will result in states[i] == states[j].  We don't want to waste a state
273 	 *  number on this.  Useful mostly for code generation in tables.
274 	 *
275 	 *  At the start of this routine, states[i].stateNumber <= i by definition.
276 	 *  If states[50].stateNumber is 50 then a cycle during conversion may
277 	 *  try to add state 103, but we find that an identical DFA state, named
278 	 *  50, already exists, hence, states[103]==states[50] and both have
279 	 *  stateNumber 50 as they point at same object.  Afterwards, the set
280 	 *  of state numbers from all states should represent a contiguous range
281 	 *  from 0..n-1 where n is the number of unique states.
282 	 */
resetStateNumbersToBeContiguous()283 	public void resetStateNumbersToBeContiguous() {
284 		if ( getUserMaxLookahead()>0 ) {
285 			// all numbers are unique already; no states are thrown out.
286 			return;
287 		}
288 
289         // walk list of DFAState objects by state number,
290 		// setting state numbers to 0..n-1
291 		int snum=0;
292 		for (int i = 0; i <= getMaxStateNumber(); i++) {
293 			DFAState s = getState(i);
294             // some states are unused after creation most commonly due to cycles
295             // or conflict resolution.
296             if ( s==null ) {
297                 continue;
298             }
299 			// state i is mapped to DFAState with state number set to i originally
300 			// so if it's less than i, then we renumbered it already; that
301 			// happens when states have been merged or cycles occurred I think.
302 			// states[50] will point to DFAState with s50 in it but
303 			// states[103] might also point at this same DFAState.  Since
304 			// 50 < 103 then it's already been renumbered as it points downwards.
305 			boolean alreadyRenumbered = s.stateNumber<i;
306 			if ( !alreadyRenumbered ) {
307 				// state i is a valid state, reset it's state number
308 				s.stateNumber = snum; // rewrite state numbers to be 0..n-1
309 				snum++;
310 			}
311 		}
312         if ( snum!=getNumberOfStates() ) {
313 			ErrorManager.internalError("DFA "+decisionNumber+": "+
314 				decisionNFAStartState.getDescription()+" num unique states "+getNumberOfStates()+
315 				"!= num renumbered states "+snum);
316 		}
317 	}
318 
319 	// JAVA-SPECIFIC Accessors!!!!!  It is so impossible to get arrays
320 	// or even consistently formatted strings acceptable to java that
321 	// I am forced to build the individual char elements here
322 
323 	public List getJavaCompressedAccept() { return getRunLengthEncoding(accept); }
324 	public List getJavaCompressedEOT() { return getRunLengthEncoding(eot); }
325 	public List getJavaCompressedEOF() { return getRunLengthEncoding(eof); }
326 	public List getJavaCompressedMin() { return getRunLengthEncoding(min); }
327 	public List getJavaCompressedMax() { return getRunLengthEncoding(max); }
328 	public List getJavaCompressedSpecial() { return getRunLengthEncoding(special); }
329 	public List getJavaCompressedTransition() {
330 		if ( transition==null || transition.size()==0 ) {
331 			return null;
332 		}
333 		List encoded = new ArrayList(transition.size());
334 		// walk Vector<Vector<FormattedInteger>> which is the transition[][] table
335 		for (int i = 0; i < transition.size(); i++) {
336 			Vector transitionsForState = (Vector) transition.elementAt(i);
337 			encoded.add(getRunLengthEncoding(transitionsForState));
338 		}
339 		return encoded;
340 	}
341 
342 	/** Compress the incoming data list so that runs of same number are
343 	 *  encoded as number,value pair sequences.  3 -1 -1 -1 28 is encoded
344 	 *  as 1 3 3 -1 1 28.  I am pretty sure this is the lossless compression
345 	 *  that GIF files use.  Transition tables are heavily compressed by
346 	 *  this technique.  I got the idea from JFlex http://jflex.de/
347 	 *
348 	 *  Return List<String> where each string is either \xyz for 8bit char
349 	 *  and \uFFFF for 16bit.  Hideous and specific to Java, but it is the
350 	 *  only target bad enough to need it.
351 	 */
352 	public List getRunLengthEncoding(List data) {
353 		if ( data==null || data.size()==0 ) {
354 			// for states with no transitions we want an empty string ""
355 			// to hold its place in the transitions array.
356 			List empty = new ArrayList();
357 			empty.add("");
358 			return empty;
359 		}
360 		int size = Math.max(2,data.size()/2);
361 		List encoded = new ArrayList(size); // guess at size
362 		// scan values looking for runs
363 		int i = 0;
364 		Integer emptyValue = Utils.integer(-1);
365 		while ( i < data.size() ) {
366 			Integer I = (Integer)data.get(i);
367 			if ( I==null ) {
368 				I = emptyValue;
369 			}
370 			// count how many v there are?
371 			int n = 0;
372 			for (int j = i; j < data.size(); j++) {
373 				Integer v = (Integer)data.get(j);
374 				if ( v==null ) {
375 					v = emptyValue;
376 				}
377 				if ( I.equals(v) ) {
378 					n++;
379 				}
380 				else {
381 					break;
382 				}
383 			}
384 			encoded.add(generator.target.encodeIntAsCharEscape((char)n));
385 			encoded.add(generator.target.encodeIntAsCharEscape((char)I.intValue()));
386 			i+=n;
387 		}
388 		return encoded;
389 	}
390 
391 	public void createStateTables(CodeGenerator generator) {
392 		//System.out.println("createTables:\n"+this);
393 		this.generator = generator;
394 		description = getNFADecisionStartState().getDescription();
395 		description =
396 			generator.target.getTargetStringLiteralFromString(description);
397 
398 		// create all the tables
399 		special = new Vector(this.getNumberOfStates()); // Vector<short>
400 		special.setSize(this.getNumberOfStates());
401 		specialStates = new ArrayList();				// List<DFAState>
402 		specialStateSTs = new ArrayList();				// List<ST>
403 		accept = new Vector(this.getNumberOfStates()); // Vector<int>
404 		accept.setSize(this.getNumberOfStates());
405 		eot = new Vector(this.getNumberOfStates()); // Vector<int>
406 		eot.setSize(this.getNumberOfStates());
407 		eof = new Vector(this.getNumberOfStates()); // Vector<int>
408 		eof.setSize(this.getNumberOfStates());
409 		min = new Vector(this.getNumberOfStates()); // Vector<int>
410 		min.setSize(this.getNumberOfStates());
411 		max = new Vector(this.getNumberOfStates()); // Vector<int>
412 		max.setSize(this.getNumberOfStates());
413 		transition = new Vector(this.getNumberOfStates()); // Vector<Vector<int>>
414 		transition.setSize(this.getNumberOfStates());
415 		transitionEdgeTables = new Vector(this.getNumberOfStates()); // Vector<Vector<int>>
416 		transitionEdgeTables.setSize(this.getNumberOfStates());
417 
418 		// for each state in the DFA, fill relevant tables.
419 		Iterator it = null;
420 		if ( getUserMaxLookahead()>0 ) {
421 			it = states.iterator();
422 		}
423 		else {
424 			it = getUniqueStates().values().iterator();
425 		}
426 		while ( it.hasNext() ) {
427 			DFAState s = (DFAState)it.next();
428 			if ( s==null ) {
429 				// ignore null states; some acylic DFA see this condition
430 				// when inlining DFA (due to lacking of exit branch pruning?)
431 				continue;
432 			}
433 			if ( s.isAcceptState() ) {
434 				// can't compute min,max,special,transition on accepts
435 				accept.set(s.stateNumber,
436 						   Utils.integer(s.getUniquelyPredictedAlt()));
437 			}
438 			else {
439 				createMinMaxTables(s);
440 				createTransitionTableEntryForState(s);
441 				createSpecialTable(s);
442 				createEOTAndEOFTables(s);
443 			}
444 		}
445 
446 		// now that we have computed list of specialStates, gen code for 'em
447 		for (int i = 0; i < specialStates.size(); i++) {
448 			DFAState ss = (DFAState) specialStates.get(i);
449 			ST stateST =
450 				generator.generateSpecialState(ss);
451 			specialStateSTs.add(stateST);
452 		}
453 
454 		// check that the tables are not messed up by encode/decode
455 		/*
456 		testEncodeDecode(min);
457 		testEncodeDecode(max);
458 		testEncodeDecode(accept);
459 		testEncodeDecode(special);
460 		System.out.println("min="+min);
461 		System.out.println("max="+max);
462 		System.out.println("eot="+eot);
463 		System.out.println("eof="+eof);
464 		System.out.println("accept="+accept);
465 		System.out.println("special="+special);
466 		System.out.println("transition="+transition);
467 		*/
468 	}
469 
470 	/*
471 	private void testEncodeDecode(List data) {
472 		System.out.println("data="+data);
473 		List encoded = getRunLengthEncoding(data);
474 		StringBuffer buf = new StringBuffer();
475 		for (int i = 0; i < encoded.size(); i++) {
476 			String I = (String)encoded.get(i);
477 			int v = 0;
478 			if ( I.startsWith("\\u") ) {
479 				v = Integer.parseInt(I.substring(2,I.length()), 16);
480 			}
481 			else {
482 				v = Integer.parseInt(I.substring(1,I.length()), 8);
483 			}
484 			buf.append((char)v);
485 		}
486 		String encodedS = buf.toString();
487 		short[] decoded = org.antlr.runtime.DFA.unpackEncodedString(encodedS);
488 		//System.out.println("decoded:");
489 		for (int i = 0; i < decoded.length; i++) {
490 			short x = decoded[i];
491 			if ( x!=((Integer)data.get(i)).intValue() ) {
492 				System.err.println("problem with encoding");
493 			}
494 			//System.out.print(", "+x);
495 		}
496 		//System.out.println();
497 	}
498 	*/
499 
500 	protected void createMinMaxTables(DFAState s) {
501 		int smin = Label.MAX_CHAR_VALUE + 1;
502 		int smax = Label.MIN_ATOM_VALUE - 1;
503 		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
504 			Transition edge = (Transition) s.transition(j);
505 			Label label = edge.label;
506 			if ( label.isAtom() ) {
507 				if ( label.getAtom()>=Label.MIN_CHAR_VALUE ) {
508 					if ( label.getAtom()<smin ) {
509 						smin = label.getAtom();
510 					}
511 					if ( label.getAtom()>smax ) {
512 						smax = label.getAtom();
513 					}
514 				}
515 			}
516 			else if ( label.isSet() ) {
517 				IntervalSet labels = (IntervalSet)label.getSet();
518 				int lmin = labels.getMinElement();
519 				// if valid char (don't do EOF) and less than current min
520 				if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) {
521 					smin = labels.getMinElement();
522 				}
523 				if ( labels.getMaxElement()>smax ) {
524 					smax = labels.getMaxElement();
525 				}
526 			}
527 		}
528 
529 		if ( smax<0 ) {
530 			// must be predicates or pure EOT transition; just zero out min, max
531 			smin = Label.MIN_CHAR_VALUE;
532 			smax = Label.MIN_CHAR_VALUE;
533 		}
534 
535 		min.set(s.stateNumber, Utils.integer((char)smin));
536 		max.set(s.stateNumber, Utils.integer((char)smax));
537 
538 		if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) {
539 			ErrorManager.internalError("messed up: min="+min+", max="+max);
540 		}
541 	}
542 
543 	protected void createTransitionTableEntryForState(DFAState s) {
544 		/*
545 		System.out.println("createTransitionTableEntryForState s"+s.stateNumber+
546 			" dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic());
547 			*/
548 		int smax = ((Integer)max.get(s.stateNumber)).intValue();
549 		int smin = ((Integer)min.get(s.stateNumber)).intValue();
550 
551 		Vector stateTransitions = new Vector(smax-smin+1);
552 		stateTransitions.setSize(smax-smin+1);
553 		transition.set(s.stateNumber, stateTransitions);
554 		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
555 			Transition edge = (Transition) s.transition(j);
556 			Label label = edge.label;
557 			if ( label.isAtom() && label.getAtom()>=Label.MIN_CHAR_VALUE ) {
558 				int labelIndex = label.getAtom()-smin; // offset from 0
559 				stateTransitions.set(labelIndex,
560 									 Utils.integer(edge.target.stateNumber));
561 			}
562 			else if ( label.isSet() ) {
563 				IntervalSet labels = (IntervalSet)label.getSet();
564 				int[] atoms = labels.toArray();
565 				for (int a = 0; a < atoms.length; a++) {
566 					// set the transition if the label is valid (don't do EOF)
567 					if ( atoms[a]>=Label.MIN_CHAR_VALUE ) {
568 						int labelIndex = atoms[a]-smin; // offset from 0
569 						stateTransitions.set(labelIndex,
570 											 Utils.integer(edge.target.stateNumber));
571 					}
572 				}
573 			}
574 		}
575 		// track unique state transition tables so we can reuse
576 		Integer edgeClass = (Integer)edgeTransitionClassMap.get(stateTransitions);
577 		if ( edgeClass!=null ) {
578 			//System.out.println("we've seen this array before; size="+stateTransitions.size());
579 			transitionEdgeTables.set(s.stateNumber, edgeClass);
580 		}
581 		else {
582 			edgeClass = Utils.integer(edgeTransitionClass);
583 			transitionEdgeTables.set(s.stateNumber, edgeClass);
584 			edgeTransitionClassMap.put(stateTransitions, edgeClass);
585 			edgeTransitionClass++;
586 		}
587 	}
588 
589 	/** Set up the EOT and EOF tables; we cannot put -1 min/max values so
590 	 *  we need another way to test that in the DFA transition function.
591 	 */
592 	protected void createEOTAndEOFTables(DFAState s) {
593 		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
594 			Transition edge = (Transition) s.transition(j);
595 			Label label = edge.label;
596 			if ( label.isAtom() ) {
597 				if ( label.getAtom()==Label.EOT ) {
598 					// eot[s] points to accept state
599 					eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
600 				}
601 				else if ( label.getAtom()==Label.EOF ) {
602 					// eof[s] points to accept state
603 					eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
604 				}
605 			}
606 			else if ( label.isSet() ) {
607 				IntervalSet labels = (IntervalSet)label.getSet();
608 				int[] atoms = labels.toArray();
609 				for (int a = 0; a < atoms.length; a++) {
610 					if ( atoms[a]==Label.EOT ) {
611 						// eot[s] points to accept state
612 						eot.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
613 					}
614 					else if ( atoms[a]==Label.EOF ) {
615 						eof.set(s.stateNumber, Utils.integer(edge.target.stateNumber));
616 					}
617 				}
618 			}
619 		}
620 	}
621 
622 	protected void createSpecialTable(DFAState s) {
623 		// number all special states from 0...n-1 instead of their usual numbers
624 		boolean hasSemPred = false;
625 
626 		// TODO this code is very similar to canGenerateSwitch.  Refactor to share
627 		for (int j = 0; j < s.getNumberOfTransitions(); j++) {
628 			Transition edge = (Transition) s.transition(j);
629 			Label label = edge.label;
630 			// can't do a switch if the edges have preds or are going to
631 			// require gated predicates
632 			if ( label.isSemanticPredicate() ||
633 				 ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null)
634 			{
635 				hasSemPred = true;
636 				break;
637 			}
638 		}
639 		// if has pred or too big for table, make it special
640 		int smax = ((Integer)max.get(s.stateNumber)).intValue();
641 		int smin = ((Integer)min.get(s.stateNumber)).intValue();
642 		if ( hasSemPred || smax-smin>MAX_STATE_TRANSITIONS_FOR_TABLE ) {
643 			special.set(s.stateNumber,
644 						Utils.integer(uniqueCompressedSpecialStateNum));
645 			uniqueCompressedSpecialStateNum++;
646 			specialStates.add(s);
647 		}
648 		else {
649 			special.set(s.stateNumber, Utils.integer(-1)); // not special
650 		}
651 	}
652 
653 	public int predict(IntStream input) {
654 		Interpreter interp = new Interpreter(nfa.grammar, input);
655 		return interp.predict(this);
656 	}
657 
658 	/** Add a new DFA state to this DFA if not already present.
659      *  To force an acyclic, fixed maximum depth DFA, just always
660 	 *  return the incoming state.  By not reusing old states,
661 	 *  no cycles can be created.  If we're doing fixed k lookahead
662 	 *  don't updated uniqueStates, just return incoming state, which
663 	 *  indicates it's a new state.
664      */
665     protected DFAState addState(DFAState d) {
666 		if ( getUserMaxLookahead()>0 ) {
667 			return d;
668 		}
669 		// does a DFA state exist already with everything the same
670 		// except its state number?
671 		DFAState existing = (DFAState)uniqueStates.get(d);
672 		if ( existing != null ) {
673             /*
674             System.out.println("state "+d.stateNumber+" exists as state "+
675                 existing.stateNumber);
676                 */
677             // already there...get the existing DFA state
678 			return existing;
679 		}
680 
681 		// if not there, then add new state.
682 		uniqueStates.put(d,d);
683         numberOfStates++;
684 		return d;
685 	}
686 
687 	public void removeState(DFAState d) {
688 		DFAState it = (DFAState)uniqueStates.remove(d);
689 		if ( it!=null ) {
690 			numberOfStates--;
691 		}
692 	}
693 
694 	public Map<DFAState, DFAState> getUniqueStates() {
695 		return uniqueStates;
696 	}
697 
698 	/** What is the max state number ever created?  This may be beyond
699 	 *  getNumberOfStates().
700 	 */
701 	public int getMaxStateNumber() {
702 		return states.size()-1;
703 	}
704 
705 	public DFAState getState(int stateNumber) {
706 		return (DFAState)states.get(stateNumber);
707 	}
708 
709 	public void setState(int stateNumber, DFAState d) {
710 		states.set(stateNumber, d);
711 	}
712 
713 	/** Is the DFA reduced?  I.e., does every state have a path to an accept
714      *  state?  If not, don't delete as we need to generate an error indicating
715      *  which paths are "dead ends".  Also tracks list of alts with no accept
716      *  state in the DFA.  Must call verify() first before this makes sense.
717      */
718     public boolean isReduced() {
719         return reduced;
720     }
721 
722     /** Is this DFA cyclic?  That is, are there any loops?  If not, then
723      *  the DFA is essentially an LL(k) predictor for some fixed, max k value.
724      *  We can build a series of nested IF statements to match this.  In the
725      *  presence of cycles, we need to build a general DFA and interpret it
726      *  to distinguish between alternatives.
727      */
728     public boolean isCyclic() {
729         return cyclic && getUserMaxLookahead()==0;
730     }
731 
732 	public boolean isClassicDFA() {
733 		return !isCyclic() &&
734 			   !nfa.grammar.decisionsWhoseDFAsUsesSemPreds.contains(this) &&
735 			   !nfa.grammar.decisionsWhoseDFAsUsesSynPreds.contains(this);
736 	}
737 
738 	public boolean canInlineDecision() {
739 		return !isCyclic() &&
740 		    !probe.isNonLLStarDecision() &&
741 			getNumberOfStates() < CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE;
742 	}
743 
744 	/** Is this DFA derived from the NFA for the Tokens rule? */
745 	public boolean isTokensRuleDecision() {
746 		if ( nfa.grammar.type!=Grammar.LEXER ) {
747 			return false;
748 		}
749 		NFAState nfaStart = getNFADecisionStartState();
750 		Rule r = nfa.grammar.getLocallyDefinedRule(Grammar.ARTIFICIAL_TOKENS_RULENAME);
751 		NFAState TokensRuleStart = r.startState;
752 		NFAState TokensDecisionStart =
753 			(NFAState)TokensRuleStart.transition[0].target;
754 		return nfaStart == TokensDecisionStart;
755 	}
756 
757 	/** The user may specify a max, acyclic lookahead for any decision.  No
758 	 *  DFA cycles are created when this value, k, is greater than 0.
759 	 *  If this decision has no k lookahead specified, then try the grammar.
760 	 */
761 	public int getUserMaxLookahead() {
762 		if ( user_k>=0 ) { // cache for speed
763 			return user_k;
764 		}
765 		user_k = nfa.grammar.getUserMaxLookahead(decisionNumber);
766 		return user_k;
767 	}
768 
769 	public boolean getAutoBacktrackMode() {
770 		return nfa.grammar.getAutoBacktrackMode(decisionNumber);
771 	}
772 
773 	public void setUserMaxLookahead(int k) {
774 		this.user_k = k;
775 	}
776 
777 	/** Return k if decision is LL(k) for some k else return max int
778      */
779 	public int getMaxLookaheadDepth() {
780 		if ( hasCycle() ) return Integer.MAX_VALUE;
781 		// compute to be sure
782 		return _getMaxLookaheadDepth(startState, 0);
783 	}
784 
785 	int _getMaxLookaheadDepth(DFAState d, int depth) {
786 		// not cyclic; don't worry about termination
787 		// fail if pred edge.
788 		int max = depth;
789 		for (int i=0; i<d.getNumberOfTransitions(); i++) {
790 			Transition t = d.transition(i);
791 //			if ( t.isSemanticPredicate() ) return Integer.MAX_VALUE;
792 			if ( !t.isSemanticPredicate() ) {
793 				// if pure pred not gated, it must target stop state; don't count
794 				DFAState edgeTarget = (DFAState)t.target;
795 				int m = _getMaxLookaheadDepth(edgeTarget, depth+1);
796 				max = Math.max(max, m);
797 			}
798 		}
799 		return max;
800 	}
801 
802 	/** Count all disambiguating syn preds (ignore synpred tests
803 	 *  for gated edges, which occur for nonambig input sequences).
804 	 *  E.g.,
805 	 *  x  : (X)=> (X|Y)\n" +
806 	 *     | X\n" +
807 	 *     ;
808 	 *
809 	 *  gives
810 	 *
811 	 * .s0-X->.s1
812 	 * .s0-Y&&{synpred1_t}?->:s2=>1
813 	 * .s1-{synpred1_t}?->:s2=>1
814 	 * .s1-{true}?->:s3=>2
815 	 */
816 	public boolean hasSynPred() {
817 		boolean has = _hasSynPred(startState, new HashSet<DFAState>());
818 //		if ( !has ) {
819 //			System.out.println("no synpred in dec "+decisionNumber);
820 //			FASerializer serializer = new FASerializer(nfa.grammar);
821 //			String result = serializer.serialize(startState);
822 //			System.out.println(result);
823 //		}
824 		return has;
825 	}
826 
827 	public boolean getHasSynPred() { return hasSynPred(); } // for ST
828 
829 	boolean _hasSynPred(DFAState d, Set<DFAState> busy) {
830 		busy.add(d);
831 		for (int i=0; i<d.getNumberOfTransitions(); i++) {
832 			Transition t = d.transition(i);
833 			if ( t.isSemanticPredicate() ) {
834 				SemanticContext ctx = t.label.getSemanticContext();
835 //				if ( ctx.toString().indexOf("synpred")>=0 ) {
836 //					System.out.println("has pred "+ctx.toString()+" "+ctx.isSyntacticPredicate());
837 //					System.out.println(((SemanticContext.Predicate)ctx).predicateAST.token);
838 //				}
839 				if ( ctx.isSyntacticPredicate() ) return true;
840 			}
841 			DFAState edgeTarget = (DFAState)t.target;
842 			if ( !busy.contains(edgeTarget) && _hasSynPred(edgeTarget, busy) ) return true;
843 		}
844 
845 		return false;
846 	}
847 
848 	public boolean hasSemPred() { // has user-defined sempred
849 		boolean has = _hasSemPred(startState, new HashSet<DFAState>());
850 		return has;
851 	}
852 
853 	boolean _hasSemPred(DFAState d, Set<DFAState> busy) {
854 		busy.add(d);
855 		for (int i=0; i<d.getNumberOfTransitions(); i++) {
856 			Transition t = d.transition(i);
857 			if ( t.isSemanticPredicate() ) {
858 				SemanticContext ctx = t.label.getSemanticContext();
859 				if ( ctx.hasUserSemanticPredicate() ) return true;
860 			}
861 			DFAState edgeTarget = (DFAState)t.target;
862 			if ( !busy.contains(edgeTarget) && _hasSemPred(edgeTarget, busy) ) return true;
863 		}
864 
865 		return false;
866 	}
867 
868 	/** Compute cyclic w/o relying on state computed during analysis. just check. */
869 	public boolean hasCycle() {
870 		boolean cyclic = _hasCycle(startState, new HashMap<DFAState, Integer>());
871 		return cyclic;
872 	}
873 
874 	boolean _hasCycle(DFAState d, Map<DFAState, Integer> busy) {
875 		busy.put(d, CYCLIC_BUSY);
876 		for (int i=0; i<d.getNumberOfTransitions(); i++) {
877 			Transition t = d.transition(i);
878 			DFAState target = (DFAState)t.target;
879 			int cond = CYCLIC_UNKNOWN;
880 			if ( busy.get(target)!=null ) cond = busy.get(target);
881 			if ( cond==CYCLIC_BUSY ) return true;
882 			if ( cond!=CYCLIC_DONE && _hasCycle(target, busy) ) return true;
883 		}
884 		busy.put(d, CYCLIC_DONE);
885 		return false;
886 	}
887 
888 
889     /** Return a list of Integer alt numbers for which no lookahead could
890      *  be computed or for which no single DFA accept state predicts those
891      *  alts.  Must call verify() first before this makes sense.
892      */
893     public List<Integer> getUnreachableAlts() {
894         return unreachableAlts;
895     }
896 
897 	/** Once this DFA has been built, need to verify that:
898 	 *
899 	 *  1. it's reduced
900 	 *  2. all alts have an accept state
901 	 *
902 	 *  Elsewhere, in the NFA converter, we need to verify that:
903 	 *
904 	 *  3. alts i and j have disjoint lookahead if no sem preds
905 	 *  4. if sem preds, nondeterministic alts must be sufficiently covered
906 	 *
907 	 *  This is avoided if analysis bails out for any reason.
908 	 */
909 	public void verify() {
910 		doesStateReachAcceptState(startState);
911 	}
912 
913     /** figure out if this state eventually reaches an accept state and
914      *  modify the instance variable 'reduced' to indicate if we find
915      *  at least one state that cannot reach an accept state.  This implies
916      *  that the overall DFA is not reduced.  This algorithm should be
917      *  linear in the number of DFA states.
918      *
919      *  The algorithm also tracks which alternatives have no accept state,
920      *  indicating a nondeterminism.
921 	 *
922 	 *  Also computes whether the DFA is cyclic.
923 	 *
924      *  TODO: I call getUniquelyPredicatedAlt too much; cache predicted alt
925      */
926     protected boolean doesStateReachAcceptState(DFAState d) {
927 		if ( d.isAcceptState() ) {
928             // accept states have no edges emanating from them so we can return
929             d.setAcceptStateReachable(REACHABLE_YES);
930             // this alt is uniquely predicted, remove from nondeterministic list
931             int predicts = d.getUniquelyPredictedAlt();
932             unreachableAlts.remove(Utils.integer(predicts));
933             return true;
934         }
935 
936         // avoid infinite loops
937         d.setAcceptStateReachable(REACHABLE_BUSY);
938 
939         boolean anEdgeReachesAcceptState = false;
940         // Visit every transition, track if at least one edge reaches stop state
941 		// Cannot terminate when we know this state reaches stop state since
942 		// all transitions must be traversed to set status of each DFA state.
943 		for (int i=0; i<d.getNumberOfTransitions(); i++) {
944             Transition t = d.transition(i);
945             DFAState edgeTarget = (DFAState)t.target;
946             int targetStatus = edgeTarget.getAcceptStateReachable();
947             if ( targetStatus==REACHABLE_BUSY ) { // avoid cycles; they say nothing
948                 cyclic = true;
949                 continue;
950             }
951             if ( targetStatus==REACHABLE_YES ) { // avoid unnecessary work
952                 anEdgeReachesAcceptState = true;
953                 continue;
954             }
955             if ( targetStatus==REACHABLE_NO ) {  // avoid unnecessary work
956                 continue;
957             }
958 			// target must be REACHABLE_UNKNOWN (i.e., unvisited)
959             if ( doesStateReachAcceptState(edgeTarget) ) {
960                 anEdgeReachesAcceptState = true;
961                 // have to keep looking so don't break loop
962                 // must cover all states even if we find a path for this state
963             }
964         }
965         if ( anEdgeReachesAcceptState ) {
966             d.setAcceptStateReachable(REACHABLE_YES);
967         }
968         else {
969             d.setAcceptStateReachable(REACHABLE_NO);
970 			reduced = false;
971         }
972         return anEdgeReachesAcceptState;
973     }
974 
975 	/** Walk all accept states and find the manually-specified synpreds.
976 	 *  Gated preds are not always hoisted
977 	 *  I used to do this in the code generator, but that is too late.
978 	 *  This converter tries to avoid computing DFA for decisions in
979 	 *  syntactic predicates that are not ever used such as those
980 	 *  created by autobacktrack mode.
981 	 */
982 	public void findAllGatedSynPredsUsedInDFAAcceptStates() {
983 		int nAlts = getNumberOfAlts();
984 		for (int i=1; i<=nAlts; i++) {
985 			DFAState a = getAcceptState(i);
986 			//System.out.println("alt "+i+": "+a);
987 			if ( a!=null ) {
988 				Set synpreds = a.getGatedSyntacticPredicatesInNFAConfigurations();
989 				if ( synpreds!=null ) {
990 					// add all the predicates we find (should be just one, right?)
991 					for (Iterator it = synpreds.iterator(); it.hasNext();) {
992 						SemanticContext semctx = (SemanticContext) it.next();
993 						// System.out.println("synpreds: "+semctx);
994 						nfa.grammar.synPredUsedInDFA(this, semctx);
995 					}
996 				}
997 			}
998 		}
999 	}
1000 
1001 	public NFAState getNFADecisionStartState() {
1002         return decisionNFAStartState;
1003     }
1004 
1005 	public DFAState getAcceptState(int alt) {
1006 		return altToAcceptState[alt];
1007 	}
1008 
1009 	public void setAcceptState(int alt, DFAState acceptState) {
1010 		altToAcceptState[alt] = acceptState;
1011 	}
1012 
1013 	public String getDescription() {
1014 		return description;
1015 	}
1016 
1017 	public int getDecisionNumber() {
1018         return decisionNFAStartState.getDecisionNumber();
1019     }
1020 
1021 	/** If this DFA failed to finish during construction, we might be
1022 	 *  able to retry with k=1 but we need to know whether it will
1023 	 *  potentially succeed.  Can only succeed if there is a predicate
1024 	 *  to resolve the issue.  Don't try if k=1 already as it would
1025 	 *  cycle forever.  Timeout can retry with k=1 even if no predicate
1026 	 *  if k!=1.
1027 	 */
1028 	public boolean okToRetryDFAWithK1() {
1029 		boolean nonLLStarOrOverflowAndPredicateVisible =
1030 			(probe.isNonLLStarDecision()||probe.analysisOverflowed()) &&
1031 		    predicateVisible; // auto backtrack or manual sem/syn
1032 		return getUserMaxLookahead()!=1 &&
1033 			 nonLLStarOrOverflowAndPredicateVisible;
1034 	}
1035 
1036 	public String getReasonForFailure() {
1037 		StringBuffer buf = new StringBuffer();
1038 		if ( probe.isNonLLStarDecision() ) {
1039 			buf.append("non-LL(*)");
1040 			if ( predicateVisible ) {
1041 				buf.append(" && predicate visible");
1042 			}
1043 		}
1044 		if ( probe.analysisOverflowed() ) {
1045 			buf.append("recursion overflow");
1046 			if ( predicateVisible ) {
1047 				buf.append(" && predicate visible");
1048 			}
1049 		}
1050 		buf.append("\n");
1051 		return buf.toString();
1052 	}
1053 
1054 	/** What GrammarAST node (derived from the grammar) is this DFA
1055      *  associated with?  It will point to the start of a block or
1056      *  the loop back of a (...)+ block etc...
1057      */
1058     public GrammarAST getDecisionASTNode() {
1059         return decisionNFAStartState.associatedASTNode;
1060     }
1061 
1062     public boolean isGreedy() {
1063 		GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decisionNumber);
1064 		Object v = nfa.grammar.getBlockOption(blockAST,"greedy");
1065 		if ( v!=null && v.equals("false") ) {
1066 			return false;
1067 		}
1068         return true;
1069 
1070 	}
1071 
1072     public DFAState newState() {
1073         DFAState n = new DFAState(this);
1074         n.stateNumber = stateCounter;
1075         stateCounter++;
1076 		states.setSize(n.stateNumber+1);
1077 		states.set(n.stateNumber, n); // track state num to state
1078         return n;
1079     }
1080 
1081 	public int getNumberOfStates() {
1082 		if ( getUserMaxLookahead()>0 ) {
1083 			// if using fixed lookahead then uniqueSets not set
1084 			return states.size();
1085 		}
1086 		return numberOfStates;
1087 	}
1088 
1089 	public int getNumberOfAlts() {
1090 		return nAlts;
1091 	}
1092 
1093 //	public boolean analysisTimedOut() {
1094 //		return probe.analysisTimedOut();
1095 //	}
1096 
1097     protected void initAltRelatedInfo() {
1098         unreachableAlts = new LinkedList();
1099         for (int i = 1; i <= nAlts; i++) {
1100             unreachableAlts.add(Utils.integer(i));
1101         }
1102 		altToAcceptState = new DFAState[nAlts+1];
1103     }
1104 
1105 	public String toString() {
1106 		FASerializer serializer = new FASerializer(nfa.grammar);
1107 		if ( startState==null ) {
1108 			return "";
1109 		}
1110 		return serializer.serialize(startState, false);
1111 	}
1112 
1113 	/** EOT (end of token) is a label that indicates when the DFA conversion
1114 	 *  algorithm would "fall off the end of a lexer rule".  It normally
1115 	 *  means the default clause.  So for ('a'..'z')+ you would see a DFA
1116 	 *  with a state that has a..z and EOT emanating from it.  a..z would
1117 	 *  jump to a state predicting alt 1 and EOT would jump to a state
1118 	 *  predicting alt 2 (the exit loop branch).  EOT implies anything other
1119 	 *  than a..z.  If for some reason, the set is "all char" such as with
1120 	 *  the wildcard '.', then EOT cannot match anything.  For example,
1121 	 *
1122 	 *     BLOCK : '{' (.)* '}'
1123 	 *
1124 	 *  consumes all char until EOF when greedy=true.  When all edges are
1125 	 *  combined for the DFA state after matching '}', you will find that
1126 	 *  it is all char.  The EOT transition has nothing to match and is
1127 	 *  unreachable.  The findNewDFAStatesAndAddDFATransitions() method
1128 	 *  must know to ignore the EOT, so we simply remove it from the
1129 	 *  reachable labels.  Later analysis will find that the exit branch
1130 	 *  is not predicted by anything.  For greedy=false, we leave only
1131 	 *  the EOT label indicating that the DFA should stop immediately
1132 	 *  and predict the exit branch. The reachable labels are often a
1133 	 *  set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}]
1134 	 *  due to DFA conversion so must construct a pure set to see if
1135 	 *  it is same as Label.ALLCHAR.
1136 	 *
1137 	 *  Only do this for Lexers.
1138 	 *
1139 	 *  If EOT coexists with ALLCHAR:
1140 	 *  1. If not greedy, modify the labels parameter to be EOT
1141 	 *  2. If greedy, remove EOT from the labels set
1142 	protected boolean reachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels)
1143 	{
1144 		Label eot = new Label(Label.EOT);
1145 		if ( !labels.containsKey(eot) ) {
1146 			return false;
1147 		}
1148 		System.out.println("### contains EOT");
1149 		boolean containsAllChar = false;
1150 		IntervalSet completeVocab = new IntervalSet();
1151 		int n = labels.size();
1152 		for (int i=0; i<n; i++) {
1153 			Label rl = (Label)labels.get(i);
1154 			if ( !rl.equals(eot) ) {
1155 				completeVocab.addAll(rl.getSet());
1156 			}
1157 		}
1158 		System.out.println("completeVocab="+completeVocab);
1159 		if ( completeVocab.equals(Label.ALLCHAR) ) {
1160 			System.out.println("all char");
1161 			containsAllChar = true;
1162 		}
1163 		return containsAllChar;
1164 	}
1165 	 */
1166 }
1167 
1168