1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.tool;
29 
30 import org.antlr.Tool;
31 import org.antlr.analysis.DFA;
32 import org.antlr.analysis.DFAState;
33 import org.antlr.analysis.LL1Analyzer;
34 import org.antlr.analysis.LL1DFA;
35 import org.antlr.analysis.Label;
36 import org.antlr.analysis.LookaheadSet;
37 import org.antlr.analysis.NFA;
38 import org.antlr.analysis.NFAConversionThread;
39 import org.antlr.analysis.NFAState;
40 import org.antlr.analysis.NFAToDFAConverter;
41 import org.antlr.analysis.SemanticContext;
42 import org.antlr.analysis.Transition;
43 import org.antlr.codegen.CodeGenerator;
44 import org.antlr.codegen.Target;
45 import org.antlr.grammar.v3.ANTLRLexer;
46 import org.antlr.grammar.v3.ANTLRParser;
47 import org.antlr.grammar.v3.ANTLRTreePrinter;
48 import org.antlr.grammar.v3.ActionAnalysis;
49 import org.antlr.grammar.v3.DefineGrammarItemsWalker;
50 import org.antlr.grammar.v3.TreeToNFAConverter;
51 import org.antlr.misc.Barrier;
52 import org.antlr.misc.IntSet;
53 import org.antlr.misc.IntervalSet;
54 import org.antlr.misc.MultiMap;
55 import org.antlr.misc.OrderedHashSet;
56 import org.antlr.misc.Utils;
57 import org.antlr.runtime.ANTLRReaderStream;
58 import org.antlr.runtime.ANTLRStringStream;
59 import org.antlr.runtime.CommonToken;
60 import org.antlr.runtime.CommonTokenStream;
61 import org.antlr.runtime.RecognitionException;
62 import org.antlr.runtime.Token;
63 import org.antlr.runtime.tree.CommonTreeNodeStream;
64 import org.stringtemplate.v4.ST;
65 import org.stringtemplate.v4.STGroup;
66 import org.stringtemplate.v4.STGroupString;
67 
68 import java.io.BufferedReader;
69 import java.io.File;
70 import java.io.FileNotFoundException;
71 import java.io.FileReader;
72 import java.io.IOException;
73 import java.io.PrintStream;
74 import java.io.Reader;
75 import java.io.StreamTokenizer;
76 import java.io.StringReader;
77 import java.util.ArrayList;
78 import java.util.Arrays;
79 import java.util.BitSet;
80 import java.util.Collection;
81 import java.util.HashMap;
82 import java.util.HashSet;
83 import java.util.Iterator;
84 import java.util.LinkedHashMap;
85 import java.util.List;
86 import java.util.Map;
87 import java.util.Set;
88 import java.util.Vector;
89 
90 /** Represents a grammar in memory. */
91 public class Grammar {
92 	public static final String SYNPRED_RULE_PREFIX = "synpred";
93 
94 	public static final String GRAMMAR_FILE_EXTENSION = ".g";
95 
96 	/** used for generating lexer temp files */
97 	public static final String LEXER_GRAMMAR_FILE_EXTENSION = ".g";
98 
99 	public static final int INITIAL_DECISION_LIST_SIZE = 300;
100 	public static final int INVALID_RULE_INDEX = -1;
101 
102 	// the various kinds of labels. t=type, id=ID, types+=type ids+=ID
103 	public static final int RULE_LABEL = 1;
104 	public static final int TOKEN_LABEL = 2;
105 	public static final int RULE_LIST_LABEL = 3;
106 	public static final int TOKEN_LIST_LABEL = 4;
107     public static final int CHAR_LABEL = 5; // used in lexer for x='a'
108     public static final int WILDCARD_TREE_LABEL = 6; // Used in tree grammar x=.
109     public static final int WILDCARD_TREE_LIST_LABEL = 7; // Used in tree grammar x+=.
110 
111 
112     public static String[] LabelTypeToString =
113 		{"<invalid>", "rule", "token", "rule-list", "token-list", "wildcard-tree", "wildcard-tree-list"};
114 
115 	public static final String ARTIFICIAL_TOKENS_RULENAME = "Tokens";
116 	public static final String FRAGMENT_RULE_MODIFIER = "fragment";
117 
118 	public static final String SYNPREDGATE_ACTION_NAME = "synpredgate";
119 
120 	/** When converting ANTLR char and string literals, here is the
121 	 *  value set of escape chars.
122 	 */
123 	public static int ANTLRLiteralEscapedCharValue[] = new int[255];
124 
125 	/** Given a char, we need to be able to show as an ANTLR literal.
126 	 */
127 	public static String ANTLRLiteralCharValueEscape[] = new String[255];
128 
129 	static {
130 		ANTLRLiteralEscapedCharValue['n'] = '\n';
131 		ANTLRLiteralEscapedCharValue['r'] = '\r';
132 		ANTLRLiteralEscapedCharValue['t'] = '\t';
133 		ANTLRLiteralEscapedCharValue['b'] = '\b';
134 		ANTLRLiteralEscapedCharValue['f'] = '\f';
135 		ANTLRLiteralEscapedCharValue['\\'] = '\\';
136 		ANTLRLiteralEscapedCharValue['\''] = '\'';
137 		ANTLRLiteralEscapedCharValue['"'] = '"';
138 		ANTLRLiteralCharValueEscape['\n'] = "\\n";
139 		ANTLRLiteralCharValueEscape['\r'] = "\\r";
140 		ANTLRLiteralCharValueEscape['\t'] = "\\t";
141 		ANTLRLiteralCharValueEscape['\b'] = "\\b";
142 		ANTLRLiteralCharValueEscape['\f'] = "\\f";
143 		ANTLRLiteralCharValueEscape['\\'] = "\\\\";
144 		ANTLRLiteralCharValueEscape['\''] = "\\'";
145 	}
146 
147 	public static final int LEXER = 1;
148 	public static final int PARSER = 2;
149 	public static final int TREE_PARSER = 3;
150 	public static final int COMBINED = 4;
151 	public static final String[] grammarTypeToString = new String[] {
152 		"<invalid>",
153 		"lexer",
154 		"parser",
155 		"tree",
156 		"combined"
157 	};
158 
159 	public static final String[] grammarTypeToFileNameSuffix = new String[] {
160 		"<invalid>",
161 		"Lexer",
162 		"Parser",
163 		"", // no suffix for tree grammars
164 		"Parser" // if combined grammar, gen Parser and Lexer will be done later
165 	};
166 
167 	/** Set of valid imports.  E.g., can only import a tree parser into
168 	 *  another tree parser.  Maps delegate to set of delegator grammar types.
169 	 *  validDelegations.get(LEXER) gives list of the kinds of delegators
170 	 *  that can import lexers.
171 	 */
172 	public static MultiMap<Integer,Integer> validDelegations =
173 		new MultiMap<Integer,Integer>() {
174 			{
175 				map(LEXER, LEXER);
176 				map(LEXER, PARSER);
177 				map(LEXER, COMBINED);
178 
179 				map(PARSER, PARSER);
180 				map(PARSER, COMBINED);
181 
182 				map(TREE_PARSER, TREE_PARSER);
183 
184 				// TODO: allow COMBINED
185 				// map(COMBINED, COMBINED);
186 			}
187 		};
188 
189 	/** This is the buffer of *all* tokens found in the grammar file
190 	 *  including whitespace tokens etc...  I use this to extract
191 	 *  lexer rules from combined grammars.
192 	 */
193 	public CommonTokenStream tokenBuffer;
194 	public static final String IGNORE_STRING_IN_GRAMMAR_FILE_NAME = "__";
195 	public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
196 
197 	public static class Decision {
198 		public Grammar grammar;
199 		public int decision;
200 		public NFAState startState;
201 		public GrammarAST blockAST;
202 		public DFA dfa;
203 	}
204 
205 	public class LabelElementPair {
206 		public Token label;
207 		public GrammarAST elementRef;
208 		public String referencedRuleName;
209 		/** Has an action referenced the label?  Set by ActionAnalysis.g
210 		 *  Currently only set for rule labels.
211 		 */
212 		public boolean actionReferencesLabel;
213 		public int type; // in {RULE_LABEL,TOKEN_LABEL,RULE_LIST_LABEL,TOKEN_LIST_LABEL}
LabelElementPair(Token label, GrammarAST elementRef)214 		public LabelElementPair(Token label, GrammarAST elementRef) {
215 			this.label = label;
216 			this.elementRef = elementRef;
217 			this.referencedRuleName = elementRef.getText();
218 		}
getReferencedRule()219 		public Rule getReferencedRule() {
220 			return getRule(referencedRuleName);
221 		}
222 		@Override
toString()223 		public String toString() {
224 			return elementRef.toString();
225 		}
226 	}
227 
228 	/** What name did the user provide for this grammar? */
229 	public String name;
230 
231 	/** What type of grammar is this: lexer, parser, tree walker */
232 	public int type;
233 
234 	/** A list of options specified at the grammar level such as language=Java.
235 	 *  The value can be an AST for complicated values such as character sets.
236 	 *  There may be code generator specific options in here.  I do no
237 	 *  interpretation of the key/value pairs...they are simply available for
238 	 *  who wants them.
239 	 */
240 	protected Map<String, Object> options;
241 
242 	public static final Set<String> legalLexerOptions =
243 			new HashSet<String>() {
244 				{
245 				add("language"); add("tokenVocab");
246 				add("TokenLabelType");
247 				add("superClass");
248 				add("filter");
249 				add("k");
250 				add("backtrack");
251 				add("memoize");
252 				}
253 			};
254 
255 	public static final Set<String> legalParserOptions =
256 			new HashSet<String>() {
257 				{
258 				add("language"); add("tokenVocab");
259 				add("output"); add("rewrite"); add("ASTLabelType");
260 				add("TokenLabelType");
261 				add("superClass");
262 				add("k");
263 				add("backtrack");
264 				add("memoize");
265 				}
266 			};
267 
268     public static final Set<String> legalTreeParserOptions =
269         new HashSet<String>() {
270             {
271                 add("language"); add("tokenVocab");
272                 add("output"); add("rewrite"); add("ASTLabelType");
273                 add("TokenLabelType");
274                 add("superClass");
275                 add("k");
276                 add("backtrack");
277                 add("memoize");
278                 add("filter");
279             }
280         };
281 
282 	public static final Set<String> doNotCopyOptionsToLexer =
283 		new HashSet<String>() {
284 			{
285 				add("output"); add("ASTLabelType"); add("superClass");
286 				add("k"); add("backtrack"); add("memoize"); add("rewrite");
287 			}
288 		};
289 
290 	public static final Map<String, String> defaultOptions =
291 			new HashMap<String, String>() {
292 				{
293 					put("language","Java");
294 				}
295 			};
296 
297 	public static final Set<String> legalBlockOptions =
298 			new HashSet<String>() {{add("k"); add("greedy"); add("backtrack"); add("memoize");}};
299 
300 	/** What are the default options for a subrule? */
301 	public static final Map<String, String> defaultBlockOptions =
302 			new HashMap<String, String>() {{put("greedy","true");}};
303 
304 	public static final Map<String, String> defaultLexerBlockOptions =
305 			new HashMap<String, String>() {{put("greedy","true");}};
306 
307 	// Token options are here to avoid contaminating Token object in runtime
308 
309 	/** Legal options for terminal refs like ID&lt;node=MyVarNode&gt; */
310 	public static final Set<String> legalTokenOptions =
311 		new HashSet<String>() {
312 			{
313 				add(defaultTokenOption);
314 				add("type");
315 				add("text");
316 				add("assoc");
317 			}
318 		};
319 
320 	public static final String defaultTokenOption = "node";
321 
322 	/** Is there a global fixed lookahead set for this grammar?
323 	 *  If 0, nothing specified.  -1 implies we have not looked at
324 	 *  the options table yet to set k.
325 	 */
326 	protected int global_k = -1;
327 
328 	/** Map a scope to a map of name:action pairs.
329 	 *  Map&lt;String, Map&lt;String,GrammarAST&gt;&gt;
330 	 *  The code generator will use this to fill holes in the output files.
331 	 *  I track the AST node for the action in case I need the line number
332 	 *  for errors.
333 	 */
334 	private Map<String, Map<String, Object>> actions =
335 		new HashMap<String, Map<String, Object>>();
336 
337 	/** The NFA that represents the grammar with edges labelled with tokens
338 	 *  or epsilon.  It is more suitable to analysis than an AST representation.
339 	 */
340 	public NFA nfa;
341 
342 	protected NFAFactory factory;
343 
344 	/** If this grammar is part of a larger composite grammar via delegate
345 	 *  statement, then this points at the composite.  The composite holds
346 	 *  a global list of rules, token types, decision numbers, etc...
347 	 */
348 	public CompositeGrammar composite;
349 
350 	/** A pointer back into grammar tree.  Needed so we can add delegates. */
351 	public CompositeGrammarTree compositeTreeNode;
352 
353 	/** If this is a delegate of another grammar, this is the label used
354 	 *  as an instance var by that grammar to point at this grammar. null
355 	 *  if no label was specified in the delegate statement.
356 	 */
357 	public String label;
358 
359 	/** TODO: hook this to the charVocabulary option */
360 	protected IntSet charVocabulary = null;
361 
362 	/** For ANTLRWorks, we want to be able to map a line:col to a specific
363 	 *  decision DFA so it can display DFA.
364 	 */
365 	Map<String, DFA> lineColumnToLookaheadDFAMap = new HashMap<String, DFA>();
366 
367 	public Tool tool;
368 
369 	/** The unique set of all rule references in any rule; set of tree node
370 	 *  objects so two refs to same rule can exist but at different line/position.
371 	 */
372 	protected Set<GrammarAST> ruleRefs = new HashSet<GrammarAST>();
373 
374 	protected Set<GrammarAST> scopedRuleRefs = new HashSet<GrammarAST>();
375 
376 	/** The unique set of all token ID references in any rule */
377 	protected Set<Token> tokenIDRefs = new HashSet<Token>();
378 
379 	/** Be able to assign a number to every decision in grammar;
380 	 *  decisions in 1..n
381 	 */
382 	protected int decisionCount = 0;
383 
384 	/** A list of all rules that are in any left-recursive cycle.  There
385 	 *  could be multiple cycles, but this is a flat list of all problematic
386 	 *  rules. This is stuff we couldn't refactor to precedence rule.
387 	 */
388 	protected Set<Rule> leftRecursiveRules;
389 
390 	/** An external tool requests that DFA analysis abort prematurely.  Stops
391 	 *  at DFA granularity, which are limited to a DFA size and time computation
392 	 *  as failsafe.
393 	 */
394 	protected boolean externalAnalysisAbort;
395 
396 	public int numNonLLStar = 0; // hack to track for -report
397 
398 	/** When we read in a grammar, we track the list of syntactic predicates
399 	 *  and build faux rules for them later.  See my blog entry Dec 2, 2005:
400 	 *  http://www.antlr.org/blog/antlr3/lookahead.tml
401 	 *  This maps the name (we make up) for a pred to the AST grammar fragment.
402 	 */
403 	protected LinkedHashMap<String, GrammarAST> nameToSynpredASTMap;
404 
405 	/** Each left-recursive precedence rule must define precedence array
406 	 *  for binary operators like:
407 	 *
408 	 *  	static int[] e_prec = new int[tokenNames.length];
409 	 *  	static {
410    	 *  		e_prec[75] = 1;
411 	 *  	}
412 	 *  Track and we push into parser later; this is computed
413 	 *  early when we look for prec rules.
414 	 */
415 	public List<String> precRuleInitCodeBlocks = new ArrayList<String>();
416 
417     /** At least one rule has memoize=true */
418     public boolean atLeastOneRuleMemoizes;
419 
420     /** At least one backtrack=true in rule or decision or grammar. */
421     public boolean atLeastOneBacktrackOption;
422 
423 	/** Was this created from a COMBINED grammar? */
424 	public boolean implicitLexer;
425 
426 	/** Map a rule to it's Rule object */
427 	protected LinkedHashMap<String,Rule> nameToRuleMap = new LinkedHashMap<String,Rule>();
428 
429 	/** If this rule is a delegate, some rules might be overridden; don't
430 	 *  want to gen code for them.
431 	 */
432 	public Set<String> overriddenRules = new HashSet<String>();
433 
434 	/** The list of all rules referenced in this grammar, not defined here,
435 	 *  and defined in a delegate grammar.  Not all of these will be generated
436 	 *  in the recognizer for this file; only those that are affected by rule
437 	 *  definitions in this grammar.  I am not sure the Java target will need
438 	 *  this but I'm leaving in case other targets need it.
439 	 *  see NameSpaceChecker.lookForReferencesToUndefinedSymbols()
440 	 */
441 	protected Set<Rule> delegatedRuleReferences = new HashSet<Rule>();
442 
443 	/** The ANTLRParser tracks lexer rules when reading combined grammars
444 	 *  so we can build the Tokens rule.
445 	 */
446 	public List<String> lexerRuleNamesInCombined = new ArrayList<String>();
447 
448 	/** Track the scopes defined outside of rules and the scopes associated
449 	 *  with all rules (even if empty).
450 	 */
451 	protected Map<String, AttributeScope> scopes = new HashMap<String, AttributeScope>();
452 
453 	/** An AST that records entire input grammar with all rules.  A simple
454 	 *  grammar with one rule, "grammar t; a : A | B ;", looks like:
455 	 * ( grammar t ( rule a ( BLOCK ( ALT A ) ( ALT B ) ) &lt;end-of-rule&gt; ) )
456 	 */
457 	protected GrammarAST grammarTree = null;
458 
459 	/** Each subrule/rule is a decision point and we must track them so we
460 	 *  can go back later and build DFA predictors for them.  This includes
461 	 *  all the rules, subrules, optional blocks, ()+, ()* etc...
462 	 */
463 	protected Vector<Decision> indexToDecision =
464 		new Vector<Decision>(INITIAL_DECISION_LIST_SIZE);
465 
466 	/** If non-null, this is the code generator we will use to generate
467 	 *  recognizers in the target language.
468 	 */
469 	protected CodeGenerator generator;
470 
471 	public NameSpaceChecker nameSpaceChecker = new NameSpaceChecker(this);
472 
473 	public LL1Analyzer ll1Analyzer = new LL1Analyzer(this);
474 
475 	/** For merged lexer/parsers, we must construct a separate lexer spec.
476 	 *  This is the template for lexer; put the literals first then the
477 	 *  regular rules.  We don't need to specify a token vocab import as
478 	 *  I make the new grammar import from the old all in memory; don't want
479 	 *  to force it to read from the disk.  Lexer grammar will have same
480 	 *  name as original grammar but will be in different filename.  Foo.g
481 	 *  with combined grammar will have FooParser.java generated and
482 	 *  Foo__.g with again Foo inside.  It will however generate FooLexer.java
483 	 *  as it's a lexer grammar.  A bit odd, but autogenerated.  Can tweak
484 	 *  later if we want.
485 	 */
486 	protected String lexerGrammarTemplate =
487 			"grammar(name, options, imports, actionNames, actions, literals, rules) ::= <<\n" +
488 			"lexer grammar <name>;\n" +
489 			"<if(options)>" +
490 			"options {\n" +
491 			"  <options:{it | <it.name>=<it.value>;<\\n>}>\n" +
492 			"}<\\n>\n" +
493 			"<endif>\n" +
494 			"<if(imports)>import <imports; separator=\", \">;<endif>\n" +
495 			"<actionNames,actions:{n,a|@<n> {<a>\\}\n}>\n" +
496 			"<literals:{it | <it.ruleName> : <it.literal> ;\n}>\n" +
497 			"<rules>\n" +
498 			">>\n";
499 	protected ST lexerGrammarST;
500 
501 	/** What file name holds this grammar? */
502 	protected String fileName;
503 
504 	/** How long in ms did it take to build DFAs for this grammar?
505 	 *  If this grammar is a combined grammar, it only records time for
506 	 *  the parser grammar component.  This only records the time to
507 	 *  do the LL(*) work; NFA&rarr;DFA conversion.
508 	 */
509 	public long DFACreationWallClockTimeInMS;
510 
511 	public int numberOfSemanticPredicates = 0;
512 	public int numberOfManualLookaheadOptions = 0;
513 	public Set<Integer> setOfNondeterministicDecisionNumbers = new HashSet<Integer>();
514 	public Set<Integer> setOfNondeterministicDecisionNumbersResolvedWithPredicates =
515 		new HashSet<Integer>();
516 
517 	/** Track decisions with syn preds specified for reporting.
518 	 *  This is the a set of BLOCK type AST nodes.
519 	 */
520 	public Set<GrammarAST> blocksWithSynPreds = new HashSet<GrammarAST>();
521 
522 	/** Track decisions that actually use the syn preds in the DFA.
523 	 *  Computed during NFA to DFA conversion.
524 	 */
525 	public Set<DFA> decisionsWhoseDFAsUsesSynPreds = new HashSet<DFA>();
526 
527 	/** Track names of preds so we can avoid generating preds that aren't used
528 	 *  Computed during NFA to DFA conversion.  Just walk accept states
529 	 *  and look for synpreds because that is the only state target whose
530 	 *  incident edges can have synpreds.  Same is try for
531 	 *  decisionsWhoseDFAsUsesSynPreds.
532 	 */
533 	public Set<String> synPredNamesUsedInDFA = new HashSet<String>();
534 
535 	/** Track decisions with syn preds specified for reporting.
536 	 *  This is the a set of BLOCK type AST nodes.
537 	 */
538 	public Set<GrammarAST> blocksWithSemPreds = new HashSet<GrammarAST>();
539 
540 	/** Track decisions that actually use the syn preds in the DFA. */
541 	public Set<DFA> decisionsWhoseDFAsUsesSemPreds = new HashSet<DFA>();
542 
543 	protected boolean allDecisionDFACreated = false;
544 
545 	/** We need a way to detect when a lexer grammar is autogenerated from
546 	 *  another grammar or we are just sending in a string representing a
547 	 *  grammar.  We don't want to generate a .tokens file, for example,
548 	 *  in such cases.
549 	 */
550 	protected boolean builtFromString = false;
551 
552 	/** Factored out the sanity checking code; delegate to it. */
553 	GrammarSanity sanity = new GrammarSanity(this);
554 
555 	/** Useful for asking questions about target during analysis */
556 	Target target;
557 
558 	/** Create a grammar from file name.  */
Grammar(Tool tool, String fileName, CompositeGrammar composite)559 	public Grammar(Tool tool, String fileName, CompositeGrammar composite) {
560 		this.composite = composite;
561 		setTool(tool);
562 		setFileName(fileName);
563 		// ensure we have the composite set to something
564 		if ( composite.delegateGrammarTreeRoot==null ) {
565 			composite.setDelegationRoot(this);
566 		}
567 		STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate);
568 		lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar");
569 		target = CodeGenerator.loadLanguageTarget((String) getOption("language"));
570 	}
571 
572 	/** Useful for when you are sure that you are not part of a composite
573 	 *  already.  Used in Interp/RandomPhrase and testing.
574 	 */
Grammar()575 	public Grammar() { this((Tool)null); }
576 
Grammar(Tool tool)577 	public Grammar(Tool tool) {
578 		setTool(tool);
579 		builtFromString = true;
580 		composite = new CompositeGrammar(this);
581 		STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate);
582 		lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar");
583 		target = CodeGenerator.loadLanguageTarget((String)getOption("language"));
584 	}
585 
586 	/** Used for testing; only useful on noncomposite grammars.*/
Grammar(String grammarString)587 	public Grammar(String grammarString)
588 			throws RecognitionException
589 	{
590 		this(null, grammarString);
591 	}
592 
593 	/** Used for testing and Interp/RandomPhrase.  Only useful on
594 	 *  noncomposite grammars.
595 	 */
Grammar(Tool tool, String grammarString)596 	public Grammar(Tool tool, String grammarString)
597 		throws RecognitionException
598 	{
599 		this(tool);
600 		setFileName("<string>");
601 		StringReader r = new StringReader(grammarString);
602 		parseAndBuildAST(r);
603 		composite.assignTokenTypes();
604 		//composite.translateLeftRecursiveRules();
605 		addRulesForSyntacticPredicates();
606 		composite.defineGrammarSymbols();
607 		//composite.createNFAs();
608 		checkNameSpaceAndActions();
609 	}
610 
setFileName(String fileName)611 	public void setFileName(String fileName) {
612 		this.fileName = fileName;
613 	}
614 
getFileName()615 	public String getFileName() {
616 		return fileName;
617 	}
618 
setName(String name)619 	public void setName(String name) {
620 		if ( name==null ) {
621 			return;
622 		}
623 		// don't error check autogenerated files (those with '__' in them)
624 		String saneFile = fileName.replace('\\', '/');
625 		int lastSlash = saneFile.lastIndexOf('/');
626 		String onlyFileName = saneFile.substring(lastSlash+1, fileName.length());
627 		if ( !builtFromString ) {
628 			int lastDot = onlyFileName.lastIndexOf('.');
629 			String onlyFileNameNoSuffix;
630 			if ( lastDot < 0 ) {
631 				ErrorManager.error(ErrorManager.MSG_FILENAME_EXTENSION_ERROR, fileName);
632 				onlyFileNameNoSuffix = onlyFileName+GRAMMAR_FILE_EXTENSION;
633 			}
634 			else {
635 				onlyFileNameNoSuffix = onlyFileName.substring(0,lastDot);
636 			}
637 			if ( !name.equals(onlyFileNameNoSuffix) ) {
638 				ErrorManager.error(ErrorManager.MSG_FILE_AND_GRAMMAR_NAME_DIFFER,
639 								   name,
640 								   fileName);
641 			}
642 		}
643 		this.name = name;
644 	}
645 
setGrammarContent(String grammarString)646 	public void setGrammarContent(String grammarString) throws RecognitionException {
647 		StringReader r = new StringReader(grammarString);
648 		parseAndBuildAST(r);
649 		composite.assignTokenTypes();
650 		composite.defineGrammarSymbols();
651 	}
652 
parseAndBuildAST()653 	public void parseAndBuildAST()
654 		throws IOException
655 	{
656 		FileReader fr;
657 		BufferedReader br = null;
658 		try {
659 			fr = new FileReader(fileName);
660 			br = new BufferedReader(fr);
661 			parseAndBuildAST(br);
662 			br.close();
663 			br = null;
664 		}
665 		finally {
666 			if ( br!=null ) {
667 				br.close();
668 			}
669 		}
670 	}
671 
parseAndBuildAST(Reader r)672 	public void parseAndBuildAST(Reader r) {
673 		// BUILD AST FROM GRAMMAR
674 		ANTLRLexer lexer;
675 		try {
676 			lexer = new ANTLRLexer(new ANTLRReaderStream(r));
677 		} catch (IOException e) {
678 			ErrorManager.internalError("unexpected stream error from parsing "+fileName, e);
679 			return;
680 		}
681 
682 		lexer.setFileName(this.getFileName());
683 		tokenBuffer = new CommonTokenStream(lexer);
684 		ANTLRParser parser = ANTLRParser.createParser(tokenBuffer);
685 		parser.setFileName(this.getFileName());
686 		ANTLRParser.grammar__return result = null;
687 		try {
688 			result = parser.grammar_(this);
689 		}
690 		catch (RecognitionException re) {
691 			ErrorManager.internalError("unexpected parser recognition error from "+fileName, re);
692 		}
693 
694         dealWithTreeFilterMode(); // tree grammar and filter=true?
695 
696         if ( lexer.hasASTOperator && !buildAST() ) {
697 			Object value = getOption("output");
698 			if ( value == null ) {
699 				ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_OR_OP_WITH_NO_OUTPUT_OPTION,
700 										    this, null);
701 				setOption("output", "AST", null);
702 			}
703 			else {
704 				ErrorManager.grammarError(ErrorManager.MSG_AST_OP_WITH_NON_AST_OUTPUT_OPTION,
705 										  this, null, value);
706 			}
707 		}
708 
709 		setGrammarTree(result.getTree());
710 
711 		//if ( grammarTree!=null ) System.out.println("grammar tree: "+grammarTree.toStringTree());
712 
713 		grammarTree.setUnknownTokenBoundaries();
714 
715 		setFileName(lexer.getFileName()); // the lexer #src might change name
716 		if ( grammarTree.findFirstType(ANTLRParser.RULE)==null ) {
717 			ErrorManager.error(ErrorManager.MSG_NO_RULES, getFileName());
718 		}
719 	}
720 
dealWithTreeFilterMode()721     protected void dealWithTreeFilterMode() {
722         Object filterMode = (String)getOption("filter");
723         if ( type==TREE_PARSER && filterMode!=null && filterMode.toString().equals("true") ) {
724             // check for conflicting options
725             // filter => backtrack=true
726             // filter&&output=AST => rewrite=true
727             // filter&&output!=AST => error
728             // any deviation from valid option set is an error
729             Object backtrack = (String)getOption("backtrack");
730             Object output = getOption("output");
731             Object rewrite = getOption("rewrite");
732             if ( backtrack!=null && !backtrack.toString().equals("true") ) {
733                 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER,
734                                    "backtrack", backtrack);
735             }
736             if ( output!=null && !output.toString().equals("AST") ) {
737                 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER,
738                                    "output", output);
739                 setOption("output", "", null);
740             }
741             if ( rewrite!=null && !rewrite.toString().equals("true") ) {
742                 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER,
743                                    "rewrite", rewrite);
744             }
745             // set options properly
746             setOption("backtrack", "true", null);
747             if ( output!=null && output.toString().equals("AST") ) {
748                 setOption("rewrite", "true", null);
749             }
750             // @synpredgate set to state.backtracking==1 by code gen when filter=true
751             // superClass set in template target::treeParser
752         }
753     }
754 
translateLeftRecursiveRule(GrammarAST ruleAST)755 	public void translateLeftRecursiveRule(GrammarAST ruleAST) {
756 		//System.out.println(ruleAST.toStringTree());
757 		CommonTreeNodeStream input = new CommonTreeNodeStream(ruleAST);
758 		LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker =
759 			new LeftRecursiveRuleAnalyzer(input, this, ruleAST.enclosingRuleName);
760 		boolean isLeftRec = false;
761 		try {
762 			//System.out.println("TESTING "+ruleAST.enclosingRuleName);
763 			isLeftRec = leftRecursiveRuleWalker.rec_rule(this);
764 		}
765 		catch (RecognitionException re) {
766 			ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re);
767 		}
768 		if ( !isLeftRec ) return;
769 		List<String> rules = new ArrayList<String>();
770 		rules.add( leftRecursiveRuleWalker.getArtificialPrecStartRule() ) ;
771 		rules.add( leftRecursiveRuleWalker.getArtificialOpPrecRule() );
772 		rules.add( leftRecursiveRuleWalker.getArtificialPrimaryRule() );
773 		for (String r : rules) {
774 			GrammarAST t = parseArtificialRule(r);
775 			addRule(grammarTree, t);
776 			//System.out.println(t.toStringTree());
777 		}
778 
779 		//precRuleInitCodeBlocks.add( precRuleWalker.getOpPrecJavaCode() );
780 	}
781 
defineGrammarSymbols()782 	public void defineGrammarSymbols() {
783 		if ( Tool.internalOption_PrintGrammarTree ) {
784 			System.out.println(grammarTree.toStringList());
785 		}
786 
787 		// DEFINE RULES
788 		//System.out.println("### define "+name+" rules");
789 		DefineGrammarItemsWalker defineItemsWalker = new DefineGrammarItemsWalker(new CommonTreeNodeStream(getGrammarTree()));
790 		try {
791 			defineItemsWalker.grammar_(this);
792 		}
793 		catch (RecognitionException re) {
794 			ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
795 							   re);
796 		}
797 	}
798 
799 	/** ANALYZE ACTIONS, LOOKING FOR LABEL AND ATTR REFS, sanity check */
checkNameSpaceAndActions()800 	public void checkNameSpaceAndActions() {
801 		examineAllExecutableActions();
802 		checkAllRulesForUselessLabels();
803 
804 		nameSpaceChecker.checkConflicts();
805 	}
806 
807 	/** Many imports are illegal such as lexer into a tree grammar */
validImport(Grammar delegate)808 	public boolean validImport(Grammar delegate) {
809 		List<Integer> validDelegators = validDelegations.get(delegate.type);
810 		return validDelegators!=null && validDelegators.contains(this.type);
811 	}
812 
813 	/** If the grammar is a combined grammar, return the text of the implicit
814 	 *  lexer grammar.
815 	 */
getLexerGrammar()816 	public String getLexerGrammar() {
817 		if ( lexerGrammarST.getAttribute("literals")==null &&
818 			 lexerGrammarST.getAttribute("rules")==null )
819 		{
820 			// if no rules, return nothing
821 			return null;
822 		}
823 		lexerGrammarST.add("name", name);
824 		// if there are any actions set for lexer, pass them in
825 		if ( getActions().get("lexer")!=null ) {
826 			lexerGrammarST.add("actionNames",
827 										getActions().get("lexer").keySet());
828 			lexerGrammarST.add("actions",
829 										getActions().get("lexer").values());
830 		}
831 		// make sure generated grammar has the same options
832 		if ( options!=null ) {
833 			for (String optionName : options.keySet()) {
834 				if ( !doNotCopyOptionsToLexer.contains(optionName) ) {
835 					Object value = options.get(optionName);
836 					lexerGrammarST.addAggr("options.{name,value}", optionName, value);
837 				}
838 			}
839 		}
840 		return lexerGrammarST.render();
841 	}
842 
getImplicitlyGeneratedLexerFileName()843 	public String getImplicitlyGeneratedLexerFileName() {
844 		return name+
845 			   IGNORE_STRING_IN_GRAMMAR_FILE_NAME +
846 			   LEXER_GRAMMAR_FILE_EXTENSION;
847 	}
848 
849 	/** Get the name of the generated recognizer; may or may not be same
850 	 *  as grammar name.
851 	 *  Recognizer is TParser and TLexer from T if combined, else
852 	 *  just use T regardless of grammar type.
853 	 */
getRecognizerName()854 	public String getRecognizerName() {
855 		String suffix = "";
856 		List<Grammar> grammarsFromRootToMe = composite.getDelegators(this);
857 		//System.out.println("grammarsFromRootToMe="+grammarsFromRootToMe);
858 		String qualifiedName = name;
859 		if ( grammarsFromRootToMe!=null ) {
860 			StringBuilder buf = new StringBuilder();
861 			for (Grammar g : grammarsFromRootToMe) {
862 				buf.append(g.name);
863 				buf.append('_');
864 			}
865 			buf.append(name);
866 			qualifiedName = buf.toString();
867 		}
868 		if ( type==Grammar.COMBINED ||
869 			 (type==Grammar.LEXER && implicitLexer) )
870 		{
871 			suffix = Grammar.grammarTypeToFileNameSuffix[type];
872 		}
873 		return qualifiedName+suffix;
874 	}
875 
876 	/** Parse a rule we add artificially that is a list of the other lexer
877 	 *  rules like this: "Tokens : ID | INT | SEMI ;"  nextToken() will invoke
878 	 *  this to set the current token.  Add char literals before
879 	 *  the rule references.
880 	 *
881 	 *  If in filter mode, we want every alt to backtrack and we need to
882 	 *  do k=1 to force the "first token def wins" rule.  Otherwise, the
883 	 *  longest-match rule comes into play with LL(*).
884 	 *
885 	 *  The ANTLRParser antlr.g file now invokes this when parsing a lexer
886 	 *  grammar, which I think is proper even though it peeks at the info
887 	 *  that later phases will (re)compute.  It gets a list of lexer rules
888 	 *  and builds a string representing the rule; then it creates a parser
889 	 *  and adds the resulting tree to the grammar's tree.
890 	 */
addArtificialMatchTokensRule(GrammarAST grammarAST, List<String> ruleNames, List<String> delegateNames, boolean filterMode)891 	public GrammarAST addArtificialMatchTokensRule(GrammarAST grammarAST,
892 												   List<String> ruleNames,
893 												   List<String> delegateNames,
894 												   boolean filterMode) {
895 		ST matchTokenRuleST;
896 		if ( filterMode ) {
897 			matchTokenRuleST = new ST(
898 					ARTIFICIAL_TOKENS_RULENAME+
899 					" options {k=1; backtrack=true;} : <rules; separator=\"|\">;");
900 		}
901 		else {
902 			matchTokenRuleST = new ST(
903 					ARTIFICIAL_TOKENS_RULENAME+" : <rules; separator=\"|\">;");
904 		}
905 
906 		// Now add token rule references
907 		for (int i = 0; i < ruleNames.size(); i++) {
908 			String rname = ruleNames.get(i);
909 			matchTokenRuleST.add("rules", rname);
910 		}
911 		for (int i = 0; i < delegateNames.size(); i++) {
912 			String dname = delegateNames.get(i);
913 			matchTokenRuleST.add("rules", dname+".Tokens");
914 		}
915 		//System.out.println("tokens rule: "+matchTokenRuleST.toString());
916 		GrammarAST r = parseArtificialRule(matchTokenRuleST.render());
917 		addRule(grammarAST, r);
918 		//addRule((GrammarAST)parser.getAST());
919 		//return (GrammarAST)parser.getAST();
920 		return r;
921 	}
922 
parseArtificialRule(String ruleText)923 	public GrammarAST parseArtificialRule(String ruleText) {
924 		ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText));
925 		ANTLRParser parser = ANTLRParser.createParser(new CommonTokenStream(lexer));
926 		parser.setGrammar(this);
927 		parser.setGrammarType(this.type);
928 		try {
929 			ANTLRParser.rule_return result = parser.rule();
930 			return result.getTree();
931 		}
932 		catch (Exception e) {
933 			ErrorManager.error(ErrorManager.MSG_ERROR_CREATING_ARTIFICIAL_RULE,
934 							   e);
935 			return null;
936 		}
937 	}
938 
addRule(GrammarAST grammarTree, GrammarAST t)939 	public void addRule(GrammarAST grammarTree, GrammarAST t) {
940 		GrammarAST p = null;
941 		for (int i = 0; i < grammarTree.getChildCount(); i++ ) {
942 			p = (GrammarAST)grammarTree.getChild(i);
943 			if (p == null || p.getType() == ANTLRParser.RULE || p.getType() == ANTLRParser.PREC_RULE) {
944 				break;
945 			}
946 		}
947 
948 		if (p != null) {
949 			grammarTree.addChild(t);
950 		}
951 	}
952 
953 	/** for any syntactic predicates, we need to define rules for them; they will get
954 	 *  defined automatically like any other rule. :)
955 	 */
getArtificialRulesForSyntacticPredicates(LinkedHashMap<String,GrammarAST> nameToSynpredASTMap)956 	protected List<? extends GrammarAST> getArtificialRulesForSyntacticPredicates(LinkedHashMap<String,GrammarAST> nameToSynpredASTMap)
957 	{
958 		List<GrammarAST> rules = new ArrayList<GrammarAST>();
959 		if ( nameToSynpredASTMap==null ) {
960 			return rules;
961 		}
962 		boolean isLexer = grammarTree.getType()==ANTLRParser.LEXER_GRAMMAR;
963 		for (Map.Entry<String, GrammarAST> entry : nameToSynpredASTMap.entrySet()) {
964 			String synpredName = entry.getKey();
965 			GrammarAST fragmentAST = entry.getValue();
966 			GrammarAST ruleAST =
967 				ANTLRParser.createSimpleRuleAST(synpredName,
968 												fragmentAST,
969 												isLexer);
970 			rules.add(ruleAST);
971 		}
972 		return rules;
973 	}
974 
addRulesForSyntacticPredicates()975 	public void addRulesForSyntacticPredicates() {
976 		// Get syn pred rules and add to existing tree
977 		List<? extends GrammarAST> synpredRules =
978 			getArtificialRulesForSyntacticPredicates(nameToSynpredASTMap);
979 		for (int i = 0; i < synpredRules.size(); i++) {
980 			GrammarAST rAST = (GrammarAST) synpredRules.get(i);
981 			grammarTree.addChild(rAST);
982 		}
983 	}
984 
985 	/** Walk the list of options, altering this Grammar object according
986 	 *  to any I recognize.
987 	protected void processOptions() {
988 		Iterator optionNames = options.keySet().iterator();
989 		while (optionNames.hasNext()) {
990 			String optionName = (String) optionNames.next();
991 			Object value = options.get(optionName);
992 			if ( optionName.equals("tokenVocab") ) {
993 
994 			}
995 		}
996 	}
997 	 */
998 
999 	/** Define all the rule begin/end NFAStates to solve forward reference
1000 	 *  issues.  Critical for composite grammars too.
1001 	 *  This is normally called on all root/delegates manually and then
1002 	 *  buildNFA() is called afterwards because the NFA construction needs
1003 	 *  to see rule start/stop states from potentially every grammar. Has
1004 	 *  to be have these created a priori.  Testing routines will often
1005 	 *  just call buildNFA(), which forces a call to this method if not
1006 	 *  done already. Works ONLY for single noncomposite grammars.
1007 	 */
createRuleStartAndStopNFAStates()1008 	public void createRuleStartAndStopNFAStates() {
1009 		//System.out.println("### createRuleStartAndStopNFAStates "+getGrammarTypeString()+" grammar "+name+" NFAs");
1010 		if ( nfa!=null ) {
1011 			return;
1012 		}
1013 		nfa = new NFA(this);
1014 		factory = new NFAFactory(nfa);
1015 
1016 		Collection<Rule> rules = getRules();
1017 		for (Rule r : rules) {
1018 			String ruleName = r.name;
1019 			NFAState ruleBeginState = factory.newState();
1020 			ruleBeginState.setDescription("rule "+ruleName+" start");
1021 			ruleBeginState.enclosingRule = r;
1022 			r.startState = ruleBeginState;
1023 			NFAState ruleEndState = factory.newState();
1024 			ruleEndState.setDescription("rule "+ruleName+" end");
1025 			ruleEndState.setAcceptState(true);
1026 			ruleEndState.enclosingRule = r;
1027 			r.stopState = ruleEndState;
1028 		}
1029 	}
1030 
buildNFA()1031 	public void buildNFA() {
1032 		if ( nfa==null ) {
1033 			createRuleStartAndStopNFAStates();
1034 		}
1035 		if ( nfa.complete ) {
1036 			// don't let it create more than once; has side-effects
1037 			return;
1038 		}
1039 		//System.out.println("### build "+getGrammarTypeString()+" grammar "+name+" NFAs");
1040 		if ( getRules().isEmpty() ) {
1041 			return;
1042 		}
1043 
1044 		CommonTreeNodeStream input = new CommonTreeNodeStream(getGrammarTree());
1045 		TreeToNFAConverter nfaBuilder = new TreeToNFAConverter(input, this, nfa, factory);
1046 		try {
1047 			nfaBuilder.grammar_();
1048 		}
1049 		catch (RecognitionException re) {
1050 			ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
1051 							   name,
1052 							   re);
1053 		}
1054 		nfa.complete = true;
1055 	}
1056 
1057 	/** For each decision in this grammar, compute a single DFA using the
1058 	 *  NFA states associated with the decision.  The DFA construction
1059 	 *  determines whether or not the alternatives in the decision are
1060 	 *  separable using a regular lookahead language.
1061 	 *
1062 	 *  Store the lookahead DFAs in the AST created from the user's grammar
1063 	 *  so the code generator or whoever can easily access it.
1064 	 *
1065 	 *  This is a separate method because you might want to create a
1066 	 *  Grammar without doing the expensive analysis.
1067 	 */
createLookaheadDFAs()1068 	public void createLookaheadDFAs() {
1069 		createLookaheadDFAs(true);
1070 	}
1071 
createLookaheadDFAs(boolean wackTempStructures)1072 	public void createLookaheadDFAs(boolean wackTempStructures) {
1073 		if ( nfa==null ) {
1074 			buildNFA();
1075 		}
1076 
1077 		// CHECK FOR LEFT RECURSION; Make sure we can actually do analysis
1078 		checkAllRulesForLeftRecursion();
1079 
1080 		/*
1081 		// was there a severe problem while sniffing the grammar?
1082 		if ( ErrorManager.doNotAttemptAnalysis() ) {
1083 			return;
1084 		}
1085 		*/
1086 
1087 		long start = System.currentTimeMillis();
1088 
1089 		//System.out.println("### create DFAs");
1090 		int numDecisions = getNumberOfDecisions();
1091 		if ( NFAToDFAConverter.SINGLE_THREADED_NFA_CONVERSION ) {
1092 			for (int decision=1; decision<=numDecisions; decision++) {
1093 				NFAState decisionStartState = getDecisionNFAStartState(decision);
1094 				if ( leftRecursiveRules.contains(decisionStartState.enclosingRule) ) {
1095 					// don't bother to process decisions within left recursive rules.
1096 					if ( composite.watchNFAConversion ) {
1097 						System.out.println("ignoring decision "+decision+
1098 										   " within left-recursive rule "+decisionStartState.enclosingRule.name);
1099 					}
1100 					continue;
1101 				}
1102 				if ( !externalAnalysisAbort && decisionStartState.getNumberOfTransitions()>1 ) {
1103 					Rule r = decisionStartState.enclosingRule;
1104 					if ( r.isSynPred && !synPredNamesUsedInDFA.contains(r.name) ) {
1105 						continue;
1106 					}
1107 					DFA dfa = null;
1108 					// if k=* or k=1, try LL(1)
1109 					if ( getUserMaxLookahead(decision)==0 ||
1110 						 getUserMaxLookahead(decision)==1 )
1111 					{
1112 						dfa = createLL_1_LookaheadDFA(decision);
1113 					}
1114 					if ( dfa==null ) {
1115 						if ( composite.watchNFAConversion ) {
1116 							System.out.println("decision "+decision+
1117 											   " not suitable for LL(1)-optimized DFA analysis");
1118 						}
1119 						dfa = createLookaheadDFA(decision, wackTempStructures);
1120 					}
1121 					if ( dfa.startState==null ) {
1122 						// something went wrong; wipe out DFA
1123 						setLookaheadDFA(decision, null);
1124 					}
1125 					if ( Tool.internalOption_PrintDFA ) {
1126 						System.out.println("DFA d="+decision);
1127 						FASerializer serializer = new FASerializer(nfa.grammar);
1128 						String result = serializer.serialize(dfa.startState);
1129 						System.out.println(result);
1130 					}
1131 				}
1132 			}
1133 		}
1134 		else {
1135 			ErrorManager.info("two-threaded DFA conversion");
1136 			// create a barrier expecting n DFA and this main creation thread
1137 			Barrier barrier = new Barrier(3);
1138 			// assume 2 CPU for now
1139 			int midpoint = numDecisions/2;
1140 			NFAConversionThread t1 =
1141 				new NFAConversionThread(this, barrier, 1, midpoint);
1142 			new Thread(t1).start();
1143 			if ( midpoint == (numDecisions/2) ) {
1144 				midpoint++;
1145 			}
1146 			NFAConversionThread t2 =
1147 				new NFAConversionThread(this, barrier, midpoint, numDecisions);
1148 			new Thread(t2).start();
1149 			// wait for these two threads to finish
1150 			try {
1151 				barrier.waitForRelease();
1152 			}
1153 			catch(InterruptedException e) {
1154 				ErrorManager.internalError("what the hell? DFA interruptus", e);
1155 			}
1156 		}
1157 
1158 		long stop = System.currentTimeMillis();
1159 		DFACreationWallClockTimeInMS = stop - start;
1160 
1161 		// indicate that we've finished building DFA (even if #decisions==0)
1162 		allDecisionDFACreated = true;
1163 	}
1164 
createLL_1_LookaheadDFA(int decision)1165 	public DFA createLL_1_LookaheadDFA(int decision) {
1166 		Decision d = getDecision(decision);
1167 		String enclosingRule = d.startState.enclosingRule.name;
1168 		Rule r = d.startState.enclosingRule;
1169 		NFAState decisionStartState = getDecisionNFAStartState(decision);
1170 
1171 		if ( composite.watchNFAConversion ) {
1172 			System.out.println("--------------------\nattempting LL(1) DFA (d="
1173 							   +decisionStartState.getDecisionNumber()+") for "+
1174 							   decisionStartState.getDescription());
1175 		}
1176 
1177 		if ( r.isSynPred && !synPredNamesUsedInDFA.contains(enclosingRule) ) {
1178 			return null;
1179 		}
1180 
1181 		// compute lookahead for each alt
1182 		int numAlts = getNumberOfAltsForDecisionNFA(decisionStartState);
1183 		LookaheadSet[] altLook = new LookaheadSet[numAlts+1];
1184 		for (int alt = 1; alt <= numAlts; alt++) {
1185 			int walkAlt =
1186 				decisionStartState.translateDisplayAltToWalkAlt(alt);
1187 			NFAState altLeftEdge = getNFAStateForAltOfDecision(decisionStartState, walkAlt);
1188 			NFAState altStartState = (NFAState)altLeftEdge.transition[0].target;
1189 			//System.out.println("alt "+alt+" start state = "+altStartState.stateNumber);
1190 			altLook[alt] = ll1Analyzer.LOOK(altStartState);
1191 			//System.out.println("alt "+alt+": "+altLook[alt].toString(this));
1192 		}
1193 
1194 		// compare alt i with alt j for disjointness
1195 		boolean decisionIsLL_1 = true;
1196 outer:
1197 		for (int i = 1; i <= numAlts; i++) {
1198 			for (int j = i+1; j <= numAlts; j++) {
1199 				/*
1200 				System.out.println("compare "+i+", "+j+": "+
1201 								   altLook[i].toString(this)+" with "+
1202 								   altLook[j].toString(this));
1203 				*/
1204 				LookaheadSet collision = altLook[i].intersection(altLook[j]);
1205 				if ( !collision.isNil() ) {
1206 					//System.out.println("collision (non-LL(1)): "+collision.toString(this));
1207 					decisionIsLL_1 = false;
1208 					break outer;
1209 				}
1210 			}
1211 		}
1212 
1213 		boolean foundConfoundingPredicate =
1214 			ll1Analyzer.detectConfoundingPredicates(decisionStartState);
1215 		if ( decisionIsLL_1 && !foundConfoundingPredicate ) {
1216 			// build an LL(1) optimized DFA with edge for each altLook[i]
1217 			if ( NFAToDFAConverter.debug ) {
1218 				System.out.println("decision "+decision+" is simple LL(1)");
1219 			}
1220 			DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, altLook);
1221 			setLookaheadDFA(decision, lookaheadDFA);
1222 			updateLineColumnToLookaheadDFAMap(lookaheadDFA);
1223 			return lookaheadDFA;
1224 		}
1225 
1226 		// not LL(1) but perhaps we can solve with simplified predicate search
1227 		// even if k=1 set manually, only resolve here if we have preds; i.e.,
1228 		// don't resolve etc...
1229 
1230 		/*
1231 		SemanticContext visiblePredicates =
1232 			ll1Analyzer.getPredicates(decisionStartState);
1233 		boolean foundConfoundingPredicate =
1234 			ll1Analyzer.detectConfoundingPredicates(decisionStartState);
1235 			*/
1236 
1237 		// exit if not forced k=1 or we found a predicate situation we
1238 		// can't handle: predicates in rules invoked from this decision.
1239 		if ( getUserMaxLookahead(decision)!=1 || // not manually set to k=1
1240 			 !getAutoBacktrackMode(decision) ||
1241 			 foundConfoundingPredicate )
1242 		{
1243 			//System.out.println("trying LL(*)");
1244 			return null;
1245 		}
1246 
1247 		List<IntervalSet> edges = new ArrayList<IntervalSet>();
1248 		for (int i = 1; i < altLook.length; i++) {
1249 			LookaheadSet s = altLook[i];
1250 			edges.add(s.tokenTypeSet);
1251 		}
1252 		List<IntervalSet> disjoint = makeEdgeSetsDisjoint(edges);
1253 		//System.out.println("disjoint="+disjoint);
1254 
1255 		MultiMap<IntervalSet, Integer> edgeMap = new MultiMap<IntervalSet, Integer>();
1256 		for (int i = 0; i < disjoint.size(); i++) {
1257 			IntervalSet ds = disjoint.get(i);
1258 			for (int alt = 1; alt < altLook.length; alt++) {
1259 				LookaheadSet look = altLook[alt];
1260 				if ( !ds.and(look.tokenTypeSet).isNil() ) {
1261 					edgeMap.map(ds, alt);
1262 				}
1263 			}
1264 		}
1265 		//System.out.println("edge map: "+edgeMap);
1266 
1267 		// TODO: how do we know we covered stuff?
1268 
1269 		// build an LL(1) optimized DFA with edge for each altLook[i]
1270 		DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, edgeMap);
1271 		setLookaheadDFA(decision, lookaheadDFA);
1272 
1273 		// create map from line:col to decision DFA (for ANTLRWorks)
1274 		updateLineColumnToLookaheadDFAMap(lookaheadDFA);
1275 
1276 		return lookaheadDFA;
1277 	}
1278 
updateLineColumnToLookaheadDFAMap(DFA lookaheadDFA)1279 	private void updateLineColumnToLookaheadDFAMap(DFA lookaheadDFA) {
1280 		GrammarAST decisionAST = nfa.grammar.getDecisionBlockAST(lookaheadDFA.decisionNumber);
1281 		int line = decisionAST.getLine();
1282 		int col = decisionAST.getCharPositionInLine();
1283 		lineColumnToLookaheadDFAMap.put(new StringBuffer().append(line).append(":")
1284 										.append(col).toString(), lookaheadDFA);
1285 	}
1286 
makeEdgeSetsDisjoint(List<IntervalSet> edges)1287 	protected List<IntervalSet> makeEdgeSetsDisjoint(List<IntervalSet> edges) {
1288 		OrderedHashSet<IntervalSet> disjointSets = new OrderedHashSet<IntervalSet>();
1289 		// walk each incoming edge label/set and add to disjoint set
1290 		int numEdges = edges.size();
1291 		for (int e = 0; e < numEdges; e++) {
1292 			IntervalSet t = edges.get(e);
1293 			if ( disjointSets.contains(t) ) { // exact set present
1294 				continue;
1295 			}
1296 
1297 			// compare t with set i for disjointness
1298 			IntervalSet remainder = t; // remainder starts out as whole set to add
1299 			int numDisjointElements = disjointSets.size();
1300 			for (int i = 0; i < numDisjointElements; i++) {
1301 				IntervalSet s_i = disjointSets.get(i);
1302 
1303 				if ( t.and(s_i).isNil() ) { // nothing in common
1304 					continue;
1305 				}
1306 				//System.out.println(label+" collides with "+rl);
1307 
1308 				// For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t)
1309 				// (ignoring s_i-t if nil; don't put in list)
1310 
1311 				// Replace existing s_i with intersection since we
1312 				// know that will always be a non nil character class
1313 				IntervalSet intersection = s_i.and(t);
1314 				disjointSets.set(i, intersection);
1315 
1316 				// Compute s_i-t to see what is in current set and not in incoming
1317 				IntervalSet existingMinusNewElements = s_i.subtract(t);
1318 				//System.out.println(s_i+"-"+t+"="+existingMinusNewElements);
1319 				if ( !existingMinusNewElements.isNil() ) {
1320 					// found a new character class, add to the end (doesn't affect
1321 					// outer loop duration due to n computation a priori.
1322 					disjointSets.add(existingMinusNewElements);
1323 				}
1324 
1325 				// anything left to add to the reachableLabels?
1326 				remainder = t.subtract(s_i);
1327 				if ( remainder.isNil() ) {
1328 					break; // nothing left to add to set.  done!
1329 				}
1330 
1331 				t = remainder;
1332 			}
1333 			if ( !remainder.isNil() ) {
1334 				disjointSets.add(remainder);
1335 			}
1336 		}
1337 		return disjointSets.elements();
1338 	}
1339 
createLookaheadDFA(int decision, boolean wackTempStructures)1340 	public DFA createLookaheadDFA(int decision, boolean wackTempStructures) {
1341 		Decision d = getDecision(decision);
1342 		String enclosingRule = d.startState.enclosingRule.name;
1343 		Rule r = d.startState.enclosingRule;
1344 
1345 		//System.out.println("createLookaheadDFA(): "+enclosingRule+" dec "+decision+"; synprednames prev used "+synPredNamesUsedInDFA);
1346 		NFAState decisionStartState = getDecisionNFAStartState(decision);
1347 		long startDFA=0,stopDFA;
1348 		if ( composite.watchNFAConversion ) {
1349 			System.out.println("--------------------\nbuilding lookahead DFA (d="
1350 							   +decisionStartState.getDecisionNumber()+") for "+
1351 							   decisionStartState.getDescription());
1352 			startDFA = System.currentTimeMillis();
1353 		}
1354 
1355 		DFA lookaheadDFA = new DFA(decision, decisionStartState);
1356 		// Retry to create a simpler DFA if analysis failed (non-LL(*),
1357 		// recursion overflow, or time out).
1358 		boolean failed =
1359 			lookaheadDFA.probe.isNonLLStarDecision() ||
1360 			lookaheadDFA.probe.analysisOverflowed();
1361 		if ( failed && lookaheadDFA.okToRetryDFAWithK1() ) {
1362 			// set k=1 option and try again.
1363 			// First, clean up tracking stuff
1364 			decisionsWhoseDFAsUsesSynPreds.remove(lookaheadDFA);
1365 			// TODO: clean up synPredNamesUsedInDFA also (harder)
1366 			d.blockAST.setBlockOption(this, "k", Utils.integer(1));
1367 			if ( composite.watchNFAConversion ) {
1368 				System.out.print("trying decision "+decision+
1369 								 " again with k=1; reason: "+
1370 								 lookaheadDFA.getReasonForFailure());
1371 			}
1372 			lookaheadDFA = null; // make sure other memory is "free" before redoing
1373 			lookaheadDFA = new DFA(decision, decisionStartState);
1374 		}
1375 
1376 		setLookaheadDFA(decision, lookaheadDFA);
1377 
1378 		if ( wackTempStructures ) {
1379 			for (DFAState s : lookaheadDFA.getUniqueStates().values()) {
1380 				s.reset();
1381 			}
1382 		}
1383 
1384 		// create map from line:col to decision DFA (for ANTLRWorks)
1385 		updateLineColumnToLookaheadDFAMap(lookaheadDFA);
1386 
1387 		if ( composite.watchNFAConversion ) {
1388 			stopDFA = System.currentTimeMillis();
1389 			System.out.println("cost: "+lookaheadDFA.getNumberOfStates()+
1390 							   " states, "+(int)(stopDFA-startDFA)+" ms");
1391 		}
1392 		//System.out.println("after create DFA; synPredNamesUsedInDFA="+synPredNamesUsedInDFA);
1393 		return lookaheadDFA;
1394 	}
1395 
1396 	/** Terminate DFA creation (grammar analysis).
1397 	 */
externallyAbortNFAToDFAConversion()1398 	public void externallyAbortNFAToDFAConversion() {
1399 		externalAnalysisAbort = true;
1400 	}
1401 
NFAToDFAConversionExternallyAborted()1402 	public boolean NFAToDFAConversionExternallyAborted() {
1403 		return externalAnalysisAbort;
1404 	}
1405 
1406 	/** Return a new unique integer in the token type space */
getNewTokenType()1407 	public int getNewTokenType() {
1408 		composite.maxTokenType++;
1409 		return composite.maxTokenType;
1410 	}
1411 
1412 	/** Define a token at a particular token type value.  Blast an
1413 	 *  old value with a new one.  This is called normal grammar processsing
1414 	 *  and during import vocab operations to set tokens with specific values.
1415 	 */
defineToken(String text, int tokenType)1416 	public void defineToken(String text, int tokenType) {
1417 		//System.out.println("defineToken("+text+", "+tokenType+")");
1418 		if ( composite.tokenIDToTypeMap.get(text)!=null ) {
1419 			// already defined?  Must be predefined one like EOF;
1420 			// do nothing
1421 			return;
1422 		}
1423 		// the index in the typeToTokenList table is actually shifted to
1424 		// hold faux labels as you cannot have negative indices.
1425 		if ( text.charAt(0)=='\'' ) {
1426 			composite.stringLiteralToTypeMap.put(text, Utils.integer(tokenType));
1427 			// track in reverse index too
1428 			if ( tokenType>=composite.typeToStringLiteralList.size() ) {
1429 				composite.typeToStringLiteralList.setSize(tokenType+1);
1430 			}
1431 			composite.typeToStringLiteralList.set(tokenType, text);
1432 		}
1433 		else { // must be a label like ID
1434 			composite.tokenIDToTypeMap.put(text, Utils.integer(tokenType));
1435 		}
1436 		int index = Label.NUM_FAUX_LABELS+tokenType-1;
1437 		//System.out.println("defining "+name+" token "+text+" at type="+tokenType+", index="+index);
1438 		composite.maxTokenType = Math.max(composite.maxTokenType, tokenType);
1439 		if ( index>=composite.typeToTokenList.size() ) {
1440 			composite.typeToTokenList.setSize(index+1);
1441 		}
1442 		String prevToken = composite.typeToTokenList.get(index);
1443 		if ( prevToken==null || prevToken.charAt(0)=='\'' ) {
1444 			// only record if nothing there before or if thing before was a literal
1445 			composite.typeToTokenList.set(index, text);
1446 		}
1447 	}
1448 
1449 	/** Define a new rule.  A new rule index is created by incrementing
1450 	 *  ruleIndex.
1451 	 */
defineRule(Token ruleToken, String modifier, Map<String, Object> options, GrammarAST tree, GrammarAST argActionAST, int numAlts)1452 	public void defineRule(Token ruleToken,
1453 						   String modifier,
1454 						   Map<String, Object> options,
1455 						   GrammarAST tree,
1456 						   GrammarAST argActionAST,
1457 						   int numAlts)
1458 	{
1459 		String ruleName = ruleToken.getText();
1460 		if ( getLocallyDefinedRule(ruleName)!=null ) {
1461 			ErrorManager.grammarError(ErrorManager.MSG_RULE_REDEFINITION,
1462 									  this, ruleToken, ruleName);
1463 			return;
1464 		}
1465 
1466 		if ( (type==Grammar.PARSER||type==Grammar.TREE_PARSER) &&
1467 			 Character.isUpperCase(ruleName.charAt(0)) )
1468 		{
1469 			ErrorManager.grammarError(ErrorManager.MSG_LEXER_RULES_NOT_ALLOWED,
1470 									  this, ruleToken, ruleName);
1471 			return;
1472 		}
1473 
1474 		Rule r = new Rule(this, ruleName, composite.ruleIndex, numAlts);
1475 		/*
1476 		System.out.println("defineRule("+ruleName+",modifier="+modifier+
1477 						   "): index="+r.index+", nalts="+numAlts);
1478 		*/
1479 		r.modifier = modifier;
1480 		nameToRuleMap.put(ruleName, r);
1481 		setRuleAST(ruleName, tree);
1482 		r.setOptions(options, ruleToken);
1483 		r.argActionAST = argActionAST;
1484 		composite.ruleIndexToRuleList.setSize(composite.ruleIndex+1);
1485 		composite.ruleIndexToRuleList.set(composite.ruleIndex, r);
1486 		composite.ruleIndex++;
1487 		if ( ruleName.startsWith(SYNPRED_RULE_PREFIX) ) {
1488 			r.isSynPred = true;
1489 		}
1490 	}
1491 
1492 	/** Define a new predicate and get back its name for use in building
1493 	 *  a semantic predicate reference to the syn pred.
1494 	 */
defineSyntacticPredicate(GrammarAST blockAST, String currentRuleName)1495 	public String defineSyntacticPredicate(GrammarAST blockAST,
1496 										   String currentRuleName)
1497 	{
1498 		if ( nameToSynpredASTMap==null ) {
1499 			nameToSynpredASTMap = new LinkedHashMap<String, GrammarAST>();
1500 		}
1501 		String predName =
1502 			SYNPRED_RULE_PREFIX+(nameToSynpredASTMap.size() + 1)+"_"+name;
1503 		blockAST.setTreeEnclosingRuleNameDeeply(predName);
1504 		nameToSynpredASTMap.put(predName, blockAST);
1505 		return predName;
1506 	}
1507 
getSyntacticPredicates()1508 	public LinkedHashMap<String, GrammarAST> getSyntacticPredicates() {
1509 		return nameToSynpredASTMap;
1510 	}
1511 
getSyntacticPredicate(String name)1512 	public GrammarAST getSyntacticPredicate(String name) {
1513 		if ( nameToSynpredASTMap==null ) {
1514 			return null;
1515 		}
1516 		return nameToSynpredASTMap.get(name);
1517 	}
1518 
synPredUsedInDFA(DFA dfa, SemanticContext semCtx)1519 	public void synPredUsedInDFA(DFA dfa, SemanticContext semCtx) {
1520 		decisionsWhoseDFAsUsesSynPreds.add(dfa);
1521 		semCtx.trackUseOfSyntacticPredicates(this); // walk ctx looking for preds
1522 	}
1523 
1524 	/*
1525 	public Set<Rule> getRuleNamesVisitedDuringLOOK() {
1526 		return rulesSensitiveToOtherRules;
1527 	}
1528 	*/
1529 
1530 	/** Given @scope::name {action} define it for this grammar.  Later,
1531 	 *  the code generator will ask for the actions table.  For composite
1532      *  grammars, make sure header action propogates down to all delegates.
1533 	 */
defineNamedAction(GrammarAST ampersandAST, String scope, GrammarAST nameAST, GrammarAST actionAST)1534 	public void defineNamedAction(GrammarAST ampersandAST,
1535 								  String scope,
1536 								  GrammarAST nameAST,
1537 								  GrammarAST actionAST)
1538 	{
1539 		if ( scope==null ) {
1540 			scope = getDefaultActionScope(type);
1541 		}
1542 		//System.out.println("Grammar "+name+" define @"+scope+"::"+nameAST.getText()+"{"+actionAST.getText()+"}");
1543 		String actionName = nameAST.getText();
1544 		Map<String, Object> scopeActions = getActions().get(scope);
1545 		if ( scopeActions==null ) {
1546 			scopeActions = new HashMap<String, Object>();
1547 			getActions().put(scope, scopeActions);
1548 		}
1549 		Object a = scopeActions.get(actionName);
1550 		if ( a!=null ) {
1551 			ErrorManager.grammarError(
1552 				ErrorManager.MSG_ACTION_REDEFINITION,this,
1553 				nameAST.getToken(),nameAST.getText());
1554 		}
1555 		else {
1556 			scopeActions.put(actionName,actionAST);
1557 		}
1558         // propogate header (regardless of scope (lexer, parser, ...) ?
1559         if ( this==composite.getRootGrammar() && actionName.equals("header") ) {
1560             List<Grammar> allgrammars = composite.getRootGrammar().getDelegates();
1561             for (Grammar delegate : allgrammars) {
1562 				if ( target.isValidActionScope(delegate.type, scope) ) {
1563 					//System.out.println("propogate to "+delegate.name);
1564                 	delegate.defineNamedAction(ampersandAST, scope, nameAST, actionAST);
1565 				}
1566             }
1567         }
1568     }
1569 
setSynPredGateIfNotAlready(ST gateST)1570     public void setSynPredGateIfNotAlready(ST gateST) {
1571         String scope = getDefaultActionScope(type);
1572         Map<String, Object> actionsForGrammarScope = getActions().get(scope);
1573         // if no synpredgate action set by user then set
1574         if ( (actionsForGrammarScope==null ||
1575              !actionsForGrammarScope.containsKey(Grammar.SYNPREDGATE_ACTION_NAME)) )
1576         {
1577             if ( actionsForGrammarScope==null ) {
1578                 actionsForGrammarScope=new HashMap<String, Object>();
1579                 getActions().put(scope, actionsForGrammarScope);
1580             }
1581             actionsForGrammarScope.put(Grammar.SYNPREDGATE_ACTION_NAME,
1582                                        gateST);
1583         }
1584     }
1585 
getActions()1586 	public Map<String, Map<String, Object>> getActions() {
1587 		return actions;
1588 	}
1589 
1590 	/** Given a grammar type, what should be the default action scope?
1591 	 *  If I say @members in a COMBINED grammar, for example, the
1592 	 *  default scope should be "parser".
1593 	 */
getDefaultActionScope(int grammarType)1594 	public String getDefaultActionScope(int grammarType) {
1595 		switch (grammarType) {
1596 			case Grammar.LEXER :
1597 				return "lexer";
1598 			case Grammar.PARSER :
1599 			case Grammar.COMBINED :
1600 				return "parser";
1601 			case Grammar.TREE_PARSER :
1602 				return "treeparser";
1603 		}
1604 		return null;
1605 	}
1606 
defineLexerRuleFoundInParser(Token ruleToken, GrammarAST ruleAST)1607 	public void defineLexerRuleFoundInParser(Token ruleToken,
1608 											 GrammarAST ruleAST)
1609 	{
1610 //		System.out.println("rule tree is:\n"+ruleAST.toStringTree());
1611 		/*
1612 		String ruleText = tokenBuffer.toOriginalString(ruleAST.ruleStartTokenIndex,
1613 											   ruleAST.ruleStopTokenIndex);
1614 		*/
1615 		// first, create the text of the rule
1616 		StringBuilder buf = new StringBuilder();
1617 		buf.append("// $ANTLR src \"");
1618 		buf.append(getFileName());
1619 		buf.append("\" ");
1620 		buf.append(ruleAST.getLine());
1621 		buf.append("\n");
1622 		for (int i=ruleAST.getTokenStartIndex();
1623 			 i<=ruleAST.getTokenStopIndex() && i<tokenBuffer.size();
1624 			 i++)
1625 		{
1626 			CommonToken t = (CommonToken)tokenBuffer.get(i);
1627 			// undo the text deletions done by the lexer (ugh)
1628 			if ( t.getType()==ANTLRParser.BLOCK ) {
1629 				buf.append("(");
1630 			}
1631 			else if ( t.getType()==ANTLRParser.ACTION ) {
1632 				buf.append("{");
1633 				buf.append(t.getText());
1634 				buf.append("}");
1635 			}
1636 			else if ( t.getType()==ANTLRParser.SEMPRED ||
1637 					  t.getType()==ANTLRParser.SYN_SEMPRED ||
1638 					  t.getType()==ANTLRParser.GATED_SEMPRED ||
1639 					  t.getType()==ANTLRParser.BACKTRACK_SEMPRED )
1640 			{
1641 				buf.append("{");
1642 				buf.append(t.getText());
1643 				buf.append("}?");
1644 			}
1645 			else if ( t.getType()==ANTLRParser.ARG_ACTION ) {
1646 				buf.append("[");
1647 				buf.append(t.getText());
1648 				buf.append("]");
1649 			}
1650 			else {
1651 				buf.append(t.getText());
1652 			}
1653 		}
1654 		String ruleText = buf.toString();
1655 		//System.out.println("[["+ruleText+"]]");
1656 		// now put the rule into the lexer grammar template
1657 		if ( getGrammarIsRoot() ) { // don't build lexers for delegates
1658 			lexerGrammarST.add("rules", ruleText);
1659 		}
1660 		// track this lexer rule's name
1661 		composite.lexerRules.add(ruleToken.getText());
1662 	}
1663 
1664 	/** If someone does PLUS='+' in the parser, must make sure we get
1665 	 *  "PLUS : '+' ;" in lexer not "T73 : '+';"
1666 	 */
defineLexerRuleForAliasedStringLiteral(String tokenID, String literal, int tokenType)1667 	public void defineLexerRuleForAliasedStringLiteral(String tokenID,
1668 													   String literal,
1669 													   int tokenType)
1670 	{
1671 		if ( getGrammarIsRoot() ) { // don't build lexers for delegates
1672 			//System.out.println("defineLexerRuleForAliasedStringLiteral: "+literal+" "+tokenType);
1673 			lexerGrammarST.addAggr("literals.{ruleName,type,literal}",
1674 										tokenID,
1675 										Utils.integer(tokenType),
1676 										literal);
1677 		}
1678 		// track this lexer rule's name
1679 		composite.lexerRules.add(tokenID);
1680 	}
1681 
defineLexerRuleForStringLiteral(String literal, int tokenType)1682 	public void defineLexerRuleForStringLiteral(String literal, int tokenType) {
1683 		//System.out.println("defineLexerRuleForStringLiteral: "+literal+" "+tokenType);
1684 		// compute new token name like T237 and define it as having tokenType
1685 		String tokenID = computeTokenNameFromLiteral(tokenType,literal);
1686 		defineToken(tokenID, tokenType);
1687 		// tell implicit lexer to define a rule to match the literal
1688 		if ( getGrammarIsRoot() ) { // don't build lexers for delegates
1689 			lexerGrammarST.addAggr("literals.{ruleName,type,literal}",
1690 										tokenID,
1691 										Utils.integer(tokenType),
1692 										literal);
1693 		}
1694 	}
1695 
getLocallyDefinedRule(String ruleName)1696 	public Rule getLocallyDefinedRule(String ruleName) {
1697 		Rule r = nameToRuleMap.get(ruleName);
1698 		return r;
1699 	}
1700 
getRule(String ruleName)1701 	public Rule getRule(String ruleName) {
1702 		Rule r = composite.getRule(ruleName);
1703 		/*
1704 		if ( r!=null && r.grammar != this ) {
1705 			System.out.println(name+".getRule("+ruleName+")="+r);
1706 		}
1707 		*/
1708 		return r;
1709 	}
1710 
getRule(String scopeName, String ruleName)1711 	public Rule getRule(String scopeName, String ruleName) {
1712 		if ( scopeName!=null ) { // scope override
1713 			Grammar scope = composite.getGrammar(scopeName);
1714 			if ( scope==null ) {
1715 				return null;
1716 			}
1717 			return scope.getLocallyDefinedRule(ruleName);
1718 		}
1719 		return getRule(ruleName);
1720 	}
1721 
getRuleIndex(String scopeName, String ruleName)1722 	public int getRuleIndex(String scopeName, String ruleName) {
1723 		Rule r = getRule(scopeName, ruleName);
1724 		if ( r!=null ) {
1725 			return r.index;
1726 		}
1727 		return INVALID_RULE_INDEX;
1728 	}
1729 
getRuleIndex(String ruleName)1730 	public int getRuleIndex(String ruleName) {
1731 		return getRuleIndex(null, ruleName);
1732 	}
1733 
getRuleName(int ruleIndex)1734 	public String getRuleName(int ruleIndex) {
1735 		Rule r = composite.ruleIndexToRuleList.get(ruleIndex);
1736 		if ( r!=null ) {
1737 			return r.name;
1738 		}
1739 		return null;
1740 	}
1741 
1742 	/** Should codegen.g gen rule for ruleName?
1743 	 * 	If synpred, only gen if used in a DFA.
1744 	 *  If regular rule, only gen if not overridden in delegator
1745 	 *  Always gen Tokens rule though.
1746 	 */
generateMethodForRule(String ruleName)1747 	public boolean generateMethodForRule(String ruleName) {
1748 		if ( ruleName.equals(ARTIFICIAL_TOKENS_RULENAME) ) {
1749 			// always generate Tokens rule to satisfy lexer interface
1750 			// but it may have no alternatives.
1751 			return true;
1752 		}
1753 		if ( overriddenRules.contains(ruleName) ) {
1754 			// don't generate any overridden rules
1755 			return false;
1756 		}
1757 		// generate if non-synpred or synpred used in a DFA
1758 		Rule r = getLocallyDefinedRule(ruleName);
1759 		return !r.isSynPred ||
1760 			   (r.isSynPred&&synPredNamesUsedInDFA.contains(ruleName));
1761 	}
1762 
defineGlobalScope(String name, Token scopeAction)1763 	public AttributeScope defineGlobalScope(String name, Token scopeAction) {
1764 		AttributeScope scope = new AttributeScope(this, name, scopeAction);
1765 		scopes.put(name,scope);
1766 		return scope;
1767 	}
1768 
createReturnScope(String ruleName, Token retAction)1769 	public AttributeScope createReturnScope(String ruleName, Token retAction) {
1770 		AttributeScope scope = new AttributeScope(this, ruleName, retAction);
1771 		scope.isReturnScope = true;
1772 		return scope;
1773 	}
1774 
createRuleScope(String ruleName, Token scopeAction)1775 	public AttributeScope createRuleScope(String ruleName, Token scopeAction) {
1776 		AttributeScope scope = new AttributeScope(this, ruleName, scopeAction);
1777 		scope.isDynamicRuleScope = true;
1778 		return scope;
1779 	}
1780 
createParameterScope(String ruleName, Token argAction)1781 	public AttributeScope createParameterScope(String ruleName, Token argAction) {
1782 		AttributeScope scope = new AttributeScope(this, ruleName, argAction);
1783 		scope.isParameterScope = true;
1784 		return scope;
1785 	}
1786 
1787 	/** Get a global scope */
getGlobalScope(String name)1788 	public AttributeScope getGlobalScope(String name) {
1789 		return scopes.get(name);
1790 	}
1791 
getGlobalScopes()1792 	public Map<String, AttributeScope> getGlobalScopes() {
1793 		return scopes;
1794 	}
1795 
1796 	/** Define a label defined in a rule r; check the validity then ask the
1797 	 *  Rule object to actually define it.
1798 	 */
defineLabel(Rule r, Token label, GrammarAST element, int type)1799 	protected void defineLabel(Rule r, Token label, GrammarAST element, int type) {
1800 		boolean err = nameSpaceChecker.checkForLabelTypeMismatch(r, label, type);
1801 		if ( err ) {
1802 			return;
1803 		}
1804 		r.defineLabel(label, element, type);
1805 	}
1806 
defineTokenRefLabel(String ruleName, Token label, GrammarAST tokenRef)1807 	public void defineTokenRefLabel(String ruleName,
1808 									Token label,
1809 									GrammarAST tokenRef)
1810 	{
1811 		Rule r = getLocallyDefinedRule(ruleName);
1812 		if ( r!=null ) {
1813 			if ( type==LEXER &&
1814 				 (tokenRef.getType()==ANTLRParser.CHAR_LITERAL||
1815 				  tokenRef.getType()==ANTLRParser.BLOCK||
1816 				  tokenRef.getType()==ANTLRParser.NOT||
1817 				  tokenRef.getType()==ANTLRParser.CHAR_RANGE||
1818 				  tokenRef.getType()==ANTLRParser.WILDCARD))
1819 			{
1820 				defineLabel(r, label, tokenRef, CHAR_LABEL);
1821 			}
1822             else {
1823 				defineLabel(r, label, tokenRef, TOKEN_LABEL);
1824 			}
1825 		}
1826 	}
1827 
defineWildcardTreeLabel(String ruleName, Token label, GrammarAST tokenRef)1828     public void defineWildcardTreeLabel(String ruleName,
1829                                            Token label,
1830                                            GrammarAST tokenRef)
1831     {
1832         Rule r = getLocallyDefinedRule(ruleName);
1833         if ( r!=null ) {
1834             defineLabel(r, label, tokenRef, WILDCARD_TREE_LABEL);
1835         }
1836     }
1837 
defineWildcardTreeListLabel(String ruleName, Token label, GrammarAST tokenRef)1838     public void defineWildcardTreeListLabel(String ruleName,
1839                                            Token label,
1840                                            GrammarAST tokenRef)
1841     {
1842         Rule r = getLocallyDefinedRule(ruleName);
1843         if ( r!=null ) {
1844             defineLabel(r, label, tokenRef, WILDCARD_TREE_LIST_LABEL);
1845         }
1846     }
1847 
defineRuleRefLabel(String ruleName, Token label, GrammarAST ruleRef)1848     public void defineRuleRefLabel(String ruleName,
1849 								   Token label,
1850 								   GrammarAST ruleRef)
1851 	{
1852 		Rule r = getLocallyDefinedRule(ruleName);
1853 		if ( r!=null ) {
1854 			defineLabel(r, label, ruleRef, RULE_LABEL);
1855 		}
1856 	}
1857 
defineTokenListLabel(String ruleName, Token label, GrammarAST element)1858 	public void defineTokenListLabel(String ruleName,
1859 									 Token label,
1860 									 GrammarAST element)
1861 	{
1862 		Rule r = getLocallyDefinedRule(ruleName);
1863 		if ( r!=null ) {
1864 			defineLabel(r, label, element, TOKEN_LIST_LABEL);
1865 		}
1866 	}
1867 
defineRuleListLabel(String ruleName, Token label, GrammarAST element)1868 	public void defineRuleListLabel(String ruleName,
1869 									Token label,
1870 									GrammarAST element)
1871 	{
1872 		Rule r = getLocallyDefinedRule(ruleName);
1873 		if ( r!=null ) {
1874 			if ( !r.getHasMultipleReturnValues() ) {
1875 				ErrorManager.grammarError(
1876 					ErrorManager.MSG_LIST_LABEL_INVALID_UNLESS_RETVAL_STRUCT,this,
1877 					label,label.getText());
1878 			}
1879 			defineLabel(r, label, element, RULE_LIST_LABEL);
1880 		}
1881 	}
1882 
1883 	/** Given a set of all rewrite elements on right of -&gt;, filter for
1884 	 *  label types such as Grammar.TOKEN_LABEL, Grammar.TOKEN_LIST_LABEL, ...
1885 	 *  Return a displayable token type name computed from the GrammarAST.
1886 	 */
getLabels(Set<GrammarAST> rewriteElements, int labelType)1887 	public Set<String> getLabels(Set<GrammarAST> rewriteElements, int labelType) {
1888 		Set<String> labels = new HashSet<String>();
1889 		for (GrammarAST el : rewriteElements) {
1890 			if ( el.getType()==ANTLRParser.LABEL ) {
1891 				String labelName = el.getText();
1892 				Rule enclosingRule = getLocallyDefinedRule(el.enclosingRuleName);
1893 				if ( enclosingRule==null ) continue;
1894 				LabelElementPair pair = enclosingRule.getLabel(labelName);
1895                 /*
1896                 // if tree grammar and we have a wildcard, only notice it
1897                 // when looking for rule labels not token label. x=. should
1898                 // look like a rule ref since could be subtree.
1899                 if ( type==TREE_PARSER && pair!=null &&
1900                      pair.elementRef.getType()==ANTLRParser.WILDCARD )
1901                 {
1902                     if ( labelType==WILDCARD_TREE_LABEL ) {
1903                         labels.add(labelName);
1904                         continue;
1905                     }
1906                     else continue;
1907                 }
1908                  */
1909                 // if valid label and type is what we're looking for
1910 				// and not ref to old value val $rule, add to list
1911 				if ( pair!=null && pair.type==labelType &&
1912 					 !labelName.equals(el.enclosingRuleName) )
1913 				{
1914 					labels.add(labelName);
1915 				}
1916 			}
1917 		}
1918 		return labels;
1919 	}
1920 
1921 	/** Before generating code, we examine all actions that can have
1922 	 *  $x.y and $y stuff in them because some code generation depends on
1923 	 *  Rule.referencedPredefinedRuleAttributes.  I need to remove unused
1924 	 *  rule labels for example.
1925 	 */
examineAllExecutableActions()1926 	protected void examineAllExecutableActions() {
1927 		Collection<Rule> rules = getRules();
1928 		for (Rule r : rules) {
1929 			// walk all actions within the rule elements, args, and exceptions
1930 			List<GrammarAST> actions = r.getInlineActions();
1931 			for (int i = 0; i < actions.size(); i++) {
1932 				GrammarAST actionAST = actions.get(i);
1933 				ActionAnalysis sniffer =
1934 					new ActionAnalysis(this, r.name, actionAST);
1935 				sniffer.analyze();
1936 			}
1937 			// walk any named actions like @init, @after
1938 			Collection<? extends Object> namedActions = r.getActions().values();
1939 			for (Object namedAction : namedActions) {
1940 				GrammarAST actionAST = (GrammarAST)namedAction;
1941 				ActionAnalysis sniffer =
1942 					new ActionAnalysis(this, r.name, actionAST);
1943 				sniffer.analyze();
1944 			}
1945 		}
1946 	}
1947 
1948 	/** Remove all labels on rule refs whose target rules have no return value.
1949 	 *  Do this for all rules in grammar.
1950 	 */
checkAllRulesForUselessLabels()1951 	public void checkAllRulesForUselessLabels() {
1952 		if ( type==LEXER ) {
1953 			return;
1954 		}
1955 		Set<String> rules = nameToRuleMap.keySet();
1956 		for (String ruleName : rules) {
1957 			Rule r = getRule(ruleName);
1958 			removeUselessLabels(r.getRuleLabels());
1959 			removeUselessLabels(r.getRuleListLabels());
1960 		}
1961 	}
1962 
1963 	/** A label on a rule is useless if the rule has no return value, no
1964 	 *  tree or template output, and it is not referenced in an action.
1965 	 */
removeUselessLabels(Map<String, LabelElementPair> ruleToElementLabelPairMap)1966 	protected void removeUselessLabels(Map<String, LabelElementPair> ruleToElementLabelPairMap) {
1967 		if ( ruleToElementLabelPairMap==null ) {
1968 			return;
1969 		}
1970 		Collection<LabelElementPair> labels = ruleToElementLabelPairMap.values();
1971 		List<String> kill = new ArrayList<String>();
1972 		for (LabelElementPair pair : labels) {
1973 			Rule refdRule = getRule(pair.elementRef.getText());
1974 			if ( refdRule!=null && !refdRule.getHasReturnValue() && !pair.actionReferencesLabel ) {
1975 				//System.out.println(pair.label.getText()+" is useless");
1976 				kill.add(pair.label.getText());
1977 			}
1978 		}
1979 		for (int i = 0; i < kill.size(); i++) {
1980 			String labelToKill = kill.get(i);
1981 			// System.out.println("kill "+labelToKill);
1982 			ruleToElementLabelPairMap.remove(labelToKill);
1983 		}
1984 	}
1985 
1986 	/** Track a rule reference within an outermost alt of a rule.  Used
1987 	 *  at the moment to decide if $ruleref refers to a unique rule ref in
1988 	 *  the alt.  Rewrite rules force tracking of all rule AST results.
1989 	 *
1990 	 *  This data is also used to verify that all rules have been defined.
1991 	 */
altReferencesRule(String enclosingRuleName, GrammarAST refScopeAST, GrammarAST refAST, int outerAltNum)1992 	public void altReferencesRule(String enclosingRuleName,
1993 								  GrammarAST refScopeAST,
1994 								  GrammarAST refAST,
1995 								  int outerAltNum)
1996 	{
1997 		/* Do nothing for now; not sure need; track S.x as x
1998 		String scope = null;
1999 		Grammar scopeG = null;
2000 		if ( refScopeAST!=null ) {
2001 			if ( !scopedRuleRefs.contains(refScopeAST) ) {
2002 				scopedRuleRefs.add(refScopeAST);
2003 			}
2004 			scope = refScopeAST.getText();
2005 		}
2006 		*/
2007 		Rule r = getRule(enclosingRuleName);
2008 		if ( r==null ) {
2009 			return; // no error here; see NameSpaceChecker
2010 		}
2011 		r.trackRuleReferenceInAlt(refAST, outerAltNum);
2012 		Token refToken = refAST.getToken();
2013 		if ( !ruleRefs.contains(refAST) ) {
2014 			ruleRefs.add(refAST);
2015 		}
2016 	}
2017 
2018 	/** Track a token reference within an outermost alt of a rule.  Used
2019 	 *  to decide if $tokenref refers to a unique token ref in
2020 	 *  the alt. Does not track literals!
2021 	 *
2022 	 *  Rewrite rules force tracking of all tokens.
2023 	 */
altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum)2024 	public void altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum) {
2025 		Rule r = getLocallyDefinedRule(ruleName);
2026 		if ( r==null ) {
2027 			return;
2028 		}
2029 		r.trackTokenReferenceInAlt(refAST, outerAltNum);
2030 		if ( !tokenIDRefs.contains(refAST.getToken()) ) {
2031 			tokenIDRefs.add(refAST.getToken());
2032 		}
2033 	}
2034 
2035 	/** To yield smaller, more readable code, track which rules have their
2036 	 *  predefined attributes accessed.  If the rule has no user-defined
2037 	 *  return values, then don't generate the return value scope classes
2038 	 *  etc...  Make the rule have void return value.  Don't track for lexer
2039 	 *  rules.
2040 	 */
referenceRuleLabelPredefinedAttribute(String ruleName)2041 	public void referenceRuleLabelPredefinedAttribute(String ruleName) {
2042 		Rule r = getRule(ruleName);
2043 		if ( r!=null && type!=LEXER ) {
2044 			// indicate that an action ref'd an attr unless it's in a lexer
2045 			// so that $ID.text refs don't force lexer rules to define
2046 			// return values...Token objects are created by the caller instead.
2047 			r.referencedPredefinedRuleAttributes = true;
2048 		}
2049 	}
2050 
checkAllRulesForLeftRecursion()2051 	public List<? extends Collection<? extends Rule>> checkAllRulesForLeftRecursion() {
2052 		return sanity.checkAllRulesForLeftRecursion();
2053 	}
2054 
2055 	/** Return a list of left-recursive rules; no analysis can be done
2056 	 *  successfully on these.  Useful to skip these rules then and also
2057 	 *  for ANTLRWorks to highlight them.
2058 	 */
getLeftRecursiveRules()2059 	public Set<Rule> getLeftRecursiveRules() {
2060 		if ( nfa==null ) {
2061 			buildNFA();
2062 		}
2063 		if ( leftRecursiveRules!=null ) {
2064 			return leftRecursiveRules;
2065 		}
2066 		sanity.checkAllRulesForLeftRecursion();
2067 		return leftRecursiveRules;
2068 	}
2069 
checkRuleReference(GrammarAST scopeAST, GrammarAST refAST, GrammarAST argsAST, String currentRuleName)2070 	public void checkRuleReference(GrammarAST scopeAST,
2071 								   GrammarAST refAST,
2072 								   GrammarAST argsAST,
2073 								   String currentRuleName)
2074 	{
2075 		sanity.checkRuleReference(scopeAST, refAST, argsAST, currentRuleName);
2076 	}
2077 
2078 	/** Rules like "a : ;" and "a : {...} ;" should not generate
2079 	 *  try/catch blocks for RecognitionException.  To detect this
2080 	 *  it's probably ok to just look for any reference to an atom
2081 	 *  that can match some input.  W/o that, the rule is unlikey to have
2082 	 *  any else.
2083 	 */
isEmptyRule(GrammarAST block)2084 	public boolean isEmptyRule(GrammarAST block) {
2085 		BitSet nonEmptyTerminals = new BitSet();
2086 		nonEmptyTerminals.set(ANTLRParser.TOKEN_REF);
2087 		nonEmptyTerminals.set(ANTLRParser.STRING_LITERAL);
2088 		nonEmptyTerminals.set(ANTLRParser.CHAR_LITERAL);
2089 		nonEmptyTerminals.set(ANTLRParser.WILDCARD);
2090 		nonEmptyTerminals.set(ANTLRParser.RULE_REF);
2091 		return findFirstTypeOutsideRewrite(block, nonEmptyTerminals) == null;
2092 	}
2093 
findFirstTypeOutsideRewrite(GrammarAST block, BitSet types)2094 	protected GrammarAST findFirstTypeOutsideRewrite(GrammarAST block, BitSet types) {
2095 		ArrayList<GrammarAST> worklist = new ArrayList<GrammarAST>();
2096 		worklist.add(block);
2097 		while (!worklist.isEmpty()) {
2098 			GrammarAST current = worklist.remove(worklist.size() - 1);
2099 			if (current.getType() == ANTLRParser.REWRITE) {
2100 				continue;
2101 			}
2102 
2103 			if (current.getType() >= 0 && types.get(current.getType())) {
2104 				return current;
2105 			}
2106 
2107 			worklist.addAll(Arrays.asList(current.getChildrenAsArray()));
2108 		}
2109 
2110 		return null;
2111 	}
2112 
isAtomTokenType(int ttype)2113 	public boolean isAtomTokenType(int ttype) {
2114 		return ttype == ANTLRParser.WILDCARD||
2115 			   ttype == ANTLRParser.CHAR_LITERAL||
2116 			   ttype == ANTLRParser.CHAR_RANGE||
2117 			   ttype == ANTLRParser.STRING_LITERAL||
2118 			   ttype == ANTLRParser.NOT||
2119 			   (type != LEXER && ttype == ANTLRParser.TOKEN_REF);
2120 	}
2121 
getTokenType(String tokenName)2122 	public int getTokenType(String tokenName) {
2123 		Integer I;
2124 		if ( tokenName.charAt(0)=='\'') {
2125 			I = composite.stringLiteralToTypeMap.get(tokenName);
2126 		}
2127 		else { // must be a label like ID
2128 			I = composite.tokenIDToTypeMap.get(tokenName);
2129 		}
2130 		int i = (I!=null)? I :Label.INVALID;
2131 		//System.out.println("grammar type "+type+" "+tokenName+"->"+i);
2132 		return i;
2133 	}
2134 
2135 	/** Get the list of tokens that are IDs like BLOCK and LPAREN */
getTokenIDs()2136 	public Set<String> getTokenIDs() {
2137 		return composite.tokenIDToTypeMap.keySet();
2138 	}
2139 
2140 	/** Return an ordered integer list of token types that have no
2141 	 *  corresponding token ID like INT or KEYWORD_BEGIN; for stuff
2142 	 *  like 'begin'.
2143 	 */
getTokenTypesWithoutID()2144 	public Collection<Integer> getTokenTypesWithoutID() {
2145 		List<Integer> types = new ArrayList<Integer>();
2146 		for (int t =Label.MIN_TOKEN_TYPE; t<=getMaxTokenType(); t++) {
2147 			String name = getTokenDisplayName(t);
2148 			if ( name.charAt(0)=='\'' ) {
2149 				types.add(Utils.integer(t));
2150 			}
2151 		}
2152 		return types;
2153 	}
2154 
2155 	/** Get a list of all token IDs and literals that have an associated
2156 	 *  token type.
2157 	 */
getTokenDisplayNames()2158 	public Set<String> getTokenDisplayNames() {
2159 		Set<String> names = new HashSet<String>();
2160 		for (int t =Label.MIN_TOKEN_TYPE; t <=getMaxTokenType(); t++) {
2161 			names.add(getTokenDisplayName(t));
2162 		}
2163 		return names;
2164 	}
2165 
2166 	/** Given a literal like (the 3 char sequence with single quotes) 'a',
2167 	 *  return the int value of 'a'. Convert escape sequences here also.
2168 	 *  ANTLR's antlr.g parser does not convert escape sequences.
2169 	 *
2170 	 *  11/26/2005: I changed literals to always be '...' even for strings.
2171 	 *  This routine still works though.
2172 	 */
getCharValueFromGrammarCharLiteral(String literal)2173 	public static int getCharValueFromGrammarCharLiteral(String literal) {
2174 		switch ( literal.length() ) {
2175 			case 3 :
2176 				// 'x'
2177 				return literal.charAt(1); // no escape char
2178 			case 4 :
2179 				// '\x'  (antlr lexer will catch invalid char)
2180 				if ( Character.isDigit(literal.charAt(2)) ) {
2181 					ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
2182 									   "invalid char literal: "+literal);
2183 					return -1;
2184 				}
2185 				int escChar = literal.charAt(2);
2186 				int charVal = ANTLRLiteralEscapedCharValue[escChar];
2187 				if ( charVal==0 ) {
2188 					// Unnecessary escapes like '\{' should just yield {
2189 					return escChar;
2190 				}
2191 				return charVal;
2192 			case 8 :
2193 				// '\u1234'
2194 				String unicodeChars = literal.substring(3,literal.length()-1);
2195 				return Integer.parseInt(unicodeChars, 16);
2196 			default :
2197 				ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
2198 								   "invalid char literal: "+literal);
2199 				return -1;
2200 		}
2201 	}
2202 
2203 	/** ANTLR does not convert escape sequences during the parse phase because
2204 	 *  it could not know how to print String/char literals back out when
2205 	 *  printing grammars etc...  Someone in China might use the real unicode
2206 	 *  char in a literal as it will display on their screen; when printing
2207 	 *  back out, I could not know whether to display or use a unicode escape.
2208 	 *
2209 	 *  This routine converts a string literal with possible escape sequences
2210 	 *  into a pure string of 16-bit char values.  Escapes and unicode \u0000
2211 	 *  specs are converted to pure chars.  return in a buffer; people may
2212 	 *  want to walk/manipulate further.
2213 	 *
2214 	 *  The NFA construction routine must know the actual char values.
2215 	 */
getUnescapedStringFromGrammarStringLiteral(String literal)2216 	public static StringBuffer getUnescapedStringFromGrammarStringLiteral(String literal) {
2217 		//System.out.println("escape: ["+literal+"]");
2218 		StringBuffer buf = new StringBuffer();
2219 		int last = literal.length()-1; // skip quotes on outside
2220 		for (int i=1; i<last; i++) {
2221 			char c = literal.charAt(i);
2222 			if ( c=='\\' ) {
2223 				i++;
2224 				c = literal.charAt(i);
2225 				if ( Character.toUpperCase(c)=='U' ) {
2226 					// \u0000
2227 					i++;
2228 					String unicodeChars = literal.substring(i,i+4);
2229 					// parse the unicode 16 bit hex value
2230 					int val = Integer.parseInt(unicodeChars, 16);
2231 					i+=4-1; // loop will inc by 1; only jump 3 then
2232 					buf.append((char)val);
2233 				}
2234 				else if ( Character.isDigit(c) ) {
2235 					ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
2236 									   "invalid char literal: "+literal);
2237 					buf.append("\\").append(c);
2238 				}
2239 				else {
2240 					buf.append((char)ANTLRLiteralEscapedCharValue[c]); // normal \x escape
2241 				}
2242 			}
2243 			else {
2244 				buf.append(c); // simple char x
2245 			}
2246 		}
2247 		//System.out.println("string: ["+buf.toString()+"]");
2248 		return buf;
2249 	}
2250 
2251 	/** Pull your token definitions from an existing grammar in memory.
2252 	 *  You must use Grammar() ctor then this method then setGrammarContent()
2253 	 *  to make this work.  This was useful primarily for testing and
2254 	 *  interpreting grammars until I added import grammar functionality.
2255 	 *  When you import a grammar you implicitly import its vocabulary as well
2256 	 *  and keep the same token type values.
2257 	 *
2258 	 *  Returns the max token type found.
2259 	 */
importTokenVocabulary(Grammar importFromGr)2260 	public int importTokenVocabulary(Grammar importFromGr) {
2261 		Set<String> importedTokenIDs = importFromGr.getTokenIDs();
2262 		for (String tokenID : importedTokenIDs) {
2263 			int tokenType = importFromGr.getTokenType(tokenID);
2264 			composite.maxTokenType = Math.max(composite.maxTokenType,tokenType);
2265 			if ( tokenType>=Label.MIN_TOKEN_TYPE ) {
2266 				//System.out.println("import token from grammar "+tokenID+"="+tokenType);
2267 				defineToken(tokenID, tokenType);
2268 			}
2269 		}
2270 		return composite.maxTokenType; // return max found
2271 	}
2272 
2273 	/** Import the rules/tokens of a delegate grammar. All delegate grammars are
2274 	 *  read during the ctor of first Grammar created.
2275 	 *
2276 	 *  Do not create NFA here because NFA construction needs to hook up with
2277 	 *  overridden rules in delegation root grammar.
2278 	 */
importGrammar(GrammarAST grammarNameAST, String label)2279 	public void importGrammar(GrammarAST grammarNameAST, String label) {
2280 		String grammarName = grammarNameAST.getText();
2281 		//System.out.println("import "+gfile.getName());
2282 		String gname = grammarName + GRAMMAR_FILE_EXTENSION;
2283 		BufferedReader br = null;
2284 		try {
2285 			String fullName = tool.getLibraryFile(gname);
2286 			FileReader fr = new FileReader(fullName);
2287 			br = new BufferedReader(fr);
2288 			Grammar delegateGrammar;
2289 			delegateGrammar = new Grammar(tool, gname, composite);
2290 			delegateGrammar.label = label;
2291 
2292 			addDelegateGrammar(delegateGrammar);
2293 
2294 			delegateGrammar.parseAndBuildAST(br);
2295 			delegateGrammar.addRulesForSyntacticPredicates();
2296 			if ( !validImport(delegateGrammar) ) {
2297 				ErrorManager.grammarError(ErrorManager.MSG_INVALID_IMPORT,
2298 										  this,
2299 										  grammarNameAST.token,
2300 										  this,
2301 										  delegateGrammar);
2302 				return;
2303 			}
2304 			if ( this.type==COMBINED &&
2305 				 (delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[LEXER])||
2306 				  delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[PARSER])) )
2307 			{
2308 				ErrorManager.grammarError(ErrorManager.MSG_IMPORT_NAME_CLASH,
2309 										  this,
2310 										  grammarNameAST.token,
2311 										  this,
2312 										  delegateGrammar);
2313 				return;
2314 			}
2315 			if ( delegateGrammar.grammarTree!=null ) {
2316 				// we have a valid grammar
2317 				// deal with combined grammars
2318 				if ( delegateGrammar.type == LEXER && this.type == COMBINED ) {
2319 					// ooops, we wasted some effort; tell lexer to read it in
2320 					// later
2321 					lexerGrammarST.add("imports", grammarName);
2322 					// but, this parser grammar will need the vocab
2323 					// so add to composite anyway so we suck in the tokens later
2324 				}
2325 			}
2326 			//System.out.println("Got grammar:\n"+delegateGrammar);
2327 		}
2328 		catch (IOException ioe) {
2329 			ErrorManager.error(ErrorManager.MSG_CANNOT_OPEN_FILE,
2330 							   gname,
2331 							   ioe);
2332 		}
2333 		finally {
2334 			if ( br!=null ) {
2335 				try {
2336 					br.close();
2337 				}
2338 				catch (IOException ioe) {
2339 					ErrorManager.error(ErrorManager.MSG_CANNOT_CLOSE_FILE,
2340 									   gname,
2341 									   ioe);
2342 				}
2343 			}
2344 		}
2345 	}
2346 
2347 	/** add new delegate to composite tree */
addDelegateGrammar(Grammar delegateGrammar)2348 	protected void addDelegateGrammar(Grammar delegateGrammar) {
2349 		CompositeGrammarTree t = composite.delegateGrammarTreeRoot.findNode(this);
2350 		t.addChild(new CompositeGrammarTree(delegateGrammar));
2351 		// make sure new grammar shares this composite
2352 		delegateGrammar.composite = this.composite;
2353 	}
2354 
2355 	/** Load a vocab file &lt;vocabName&gt;.tokens and return max token type found. */
importTokenVocabulary(GrammarAST tokenVocabOptionAST, String vocabName)2356 	public int importTokenVocabulary(GrammarAST tokenVocabOptionAST,
2357 									 String vocabName)
2358 	{
2359 		if ( !getGrammarIsRoot() ) {
2360 			ErrorManager.grammarWarning(ErrorManager.MSG_TOKEN_VOCAB_IN_DELEGATE,
2361 										this,
2362 										tokenVocabOptionAST.token,
2363 										name);
2364 			return composite.maxTokenType;
2365 		}
2366 
2367 		File fullFile = tool.getImportedVocabFile(vocabName);
2368 		try {
2369 			FileReader fr = new FileReader(fullFile);
2370 			BufferedReader br = new BufferedReader(fr);
2371 			StreamTokenizer tokenizer = new StreamTokenizer(br);
2372 			tokenizer.parseNumbers();
2373 			tokenizer.wordChars('_', '_');
2374 			tokenizer.eolIsSignificant(true);
2375 			tokenizer.slashSlashComments(true);
2376 			tokenizer.slashStarComments(true);
2377 			tokenizer.ordinaryChar('=');
2378 			tokenizer.quoteChar('\'');
2379 			tokenizer.whitespaceChars(' ',' ');
2380 			tokenizer.whitespaceChars('\t','\t');
2381 			int lineNum = 1;
2382 			int token = tokenizer.nextToken();
2383 			while (token != StreamTokenizer.TT_EOF) {
2384 				String tokenID;
2385 				if ( token == StreamTokenizer.TT_WORD ) {
2386 					tokenID = tokenizer.sval;
2387 				}
2388 				else if ( token == '\'' ) {
2389 					tokenID = "'"+tokenizer.sval+"'";
2390 				}
2391 				else {
2392 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
2393 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
2394 									   Utils.integer(lineNum));
2395 					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {}
2396 					token = tokenizer.nextToken();
2397 					continue;
2398 				}
2399 				token = tokenizer.nextToken();
2400 				if ( token != '=' ) {
2401 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
2402 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
2403 									   Utils.integer(lineNum));
2404 					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {}
2405 					token = tokenizer.nextToken();
2406 					continue;
2407 				}
2408 				token = tokenizer.nextToken(); // skip '='
2409 				if ( token != StreamTokenizer.TT_NUMBER ) {
2410 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
2411 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
2412 									   Utils.integer(lineNum));
2413 					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {}
2414 					token = tokenizer.nextToken();
2415 					continue;
2416 				}
2417 				int tokenType = (int)tokenizer.nval;
2418 				token = tokenizer.nextToken();
2419 				//System.out.println("import "+tokenID+"="+tokenType);
2420 				composite.maxTokenType = Math.max(composite.maxTokenType,tokenType);
2421 				defineToken(tokenID, tokenType);
2422 				lineNum++;
2423 				if ( token != StreamTokenizer.TT_EOL ) {
2424 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
2425 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
2426 									   Utils.integer(lineNum));
2427 					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {}
2428 					token = tokenizer.nextToken();
2429 					continue;
2430 				}
2431 				token = tokenizer.nextToken(); // skip newline
2432 			}
2433 			br.close();
2434 		}
2435 		catch (FileNotFoundException fnfe) {
2436 			ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_TOKENS_FILE,
2437 							   fullFile);
2438 		}
2439 		catch (IOException ioe) {
2440 			ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
2441 							   fullFile,
2442 							   ioe);
2443 		}
2444 		catch (Exception e) {
2445 			ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
2446 							   fullFile,
2447 							   e);
2448 		}
2449 		return composite.maxTokenType;
2450 	}
2451 
2452 	/** Given a token type, get a meaningful name for it such as the ID
2453 	 *  or string literal.  If this is a lexer and the ttype is in the
2454 	 *  char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
2455 	 */
getTokenDisplayName(int ttype)2456 	public String getTokenDisplayName(int ttype) {
2457 		String tokenName;
2458 		int index;
2459 		// inside any target's char range and is lexer grammar?
2460 		if ( this.type==LEXER &&
2461 			 ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE )
2462 		{
2463 			return getANTLRCharLiteralForChar(ttype);
2464 		}
2465 		// faux label?
2466 		else if ( ttype<0 ) {
2467 			tokenName = composite.typeToTokenList.get(Label.NUM_FAUX_LABELS+ttype);
2468 		}
2469 		else {
2470 			// compute index in typeToTokenList for ttype
2471 			index = ttype-1; // normalize to 0..n-1
2472 			index += Label.NUM_FAUX_LABELS;     // jump over faux tokens
2473 
2474 			if ( index<composite.typeToTokenList.size() ) {
2475 				tokenName = composite.typeToTokenList.get(index);
2476 				if ( tokenName!=null &&
2477 					 tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) )
2478 				{
2479 					tokenName = composite.typeToStringLiteralList.get(ttype);
2480 				}
2481 			}
2482 			else {
2483 				tokenName = String.valueOf(ttype);
2484 			}
2485 		}
2486 		//System.out.println("getTokenDisplayName ttype="+ttype+", index="+index+", name="+tokenName);
2487 		return tokenName;
2488 	}
2489 
2490 	/** Get the list of ANTLR String literals */
getStringLiterals()2491 	public Set<String> getStringLiterals() {
2492 		return composite.stringLiteralToTypeMap.keySet();
2493 	}
2494 
getGrammarTypeString()2495 	public String getGrammarTypeString() {
2496 		return grammarTypeToString[type];
2497 	}
2498 
getGrammarMaxLookahead()2499 	public int getGrammarMaxLookahead() {
2500 		if ( global_k>=0 ) {
2501 			return global_k;
2502 		}
2503 		Object k = getOption("k");
2504 		if ( k==null ) {
2505 			global_k = 0;
2506 		}
2507 		else if (k instanceof Integer) {
2508 			Integer kI = (Integer)k;
2509 			global_k = kI;
2510 		}
2511 		else {
2512 			// must be String "*"
2513 			if ( k.equals("*") ) {  // this the default anyway
2514 				global_k = 0;
2515 			}
2516 		}
2517 		return global_k;
2518 	}
2519 
2520 	/** Save the option key/value pair and process it; return the key
2521 	 *  or null if invalid option.
2522 	 */
setOption(String key, Object value, Token optionsStartToken)2523 	public String setOption(String key, Object value, Token optionsStartToken) {
2524 		if ( legalOption(key) ) {
2525 			ErrorManager.grammarError(ErrorManager.MSG_ILLEGAL_OPTION,
2526 									  this,
2527 									  optionsStartToken,
2528 									  key);
2529 			return null;
2530 		}
2531 		if ( !optionIsValid(key, value) ) {
2532 			return null;
2533 		}
2534         if ( key.equals("backtrack") && value.toString().equals("true") ) {
2535             composite.getRootGrammar().atLeastOneBacktrackOption = true;
2536         }
2537         if ( options==null ) {
2538 			options = new HashMap<String, Object>();
2539 		}
2540 		options.put(key, value);
2541 		return key;
2542 	}
2543 
legalOption(String key)2544 	public boolean legalOption(String key) {
2545 		switch ( type ) {
2546 			case LEXER :
2547 				return !legalLexerOptions.contains(key);
2548 			case PARSER :
2549 				return !legalParserOptions.contains(key);
2550 			case TREE_PARSER :
2551 				return !legalTreeParserOptions.contains(key);
2552 			default :
2553 				return !legalParserOptions.contains(key);
2554 		}
2555 	}
2556 
setOptions(Map<String, Object> options, Token optionsStartToken)2557 	public void setOptions(Map<String, Object> options, Token optionsStartToken) {
2558 		if ( options==null ) {
2559 			this.options = null;
2560 			return;
2561 		}
2562 		Set<String> keys = options.keySet();
2563 		for (Iterator<String> it = keys.iterator(); it.hasNext();) {
2564 			String optionName = it.next();
2565 			Object optionValue = options.get(optionName);
2566 			String stored=setOption(optionName, optionValue, optionsStartToken);
2567 			if ( stored==null ) {
2568 				it.remove();
2569 			}
2570 		}
2571 	}
2572 
getOption(String key)2573 	public Object getOption(String key) {
2574 		return composite.getOption(key);
2575 	}
2576 
getLocallyDefinedOption(String key)2577 	public Object getLocallyDefinedOption(String key) {
2578 		Object value = null;
2579 		if ( options!=null ) {
2580 			value = options.get(key);
2581 		}
2582 		if ( value==null ) {
2583 			value = defaultOptions.get(key);
2584 		}
2585 		return value;
2586 	}
2587 
getBlockOption(GrammarAST blockAST, String key)2588 	public Object getBlockOption(GrammarAST blockAST, String key) {
2589 		String v = (String)blockAST.getBlockOption(key);
2590 		if ( v!=null ) {
2591 			return v;
2592 		}
2593 		if ( type==Grammar.LEXER ) {
2594 			return defaultLexerBlockOptions.get(key);
2595 		}
2596 		return defaultBlockOptions.get(key);
2597 	}
2598 
getUserMaxLookahead(int decision)2599 	public int getUserMaxLookahead(int decision) {
2600 		int user_k = 0;
2601 		GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decision);
2602 		Object k = blockAST.getBlockOption("k");
2603 		if ( k==null ) {
2604 			user_k = nfa.grammar.getGrammarMaxLookahead();
2605 			return user_k;
2606 		}
2607 		if (k instanceof Integer) {
2608 			Integer kI = (Integer)k;
2609 			user_k = kI;
2610 		}
2611 		else {
2612 			// must be String "*"
2613 			if ( k.equals("*") ) {
2614 				user_k = 0;
2615 			}
2616 		}
2617 		return user_k;
2618 	}
2619 
getAutoBacktrackMode(int decision)2620 	public boolean getAutoBacktrackMode(int decision) {
2621 		NFAState decisionNFAStartState = getDecisionNFAStartState(decision);
2622 		String autoBacktrack =
2623 			(String)getBlockOption(decisionNFAStartState.associatedASTNode, "backtrack");
2624 
2625 		if ( autoBacktrack==null ) {
2626 			autoBacktrack = (String)nfa.grammar.getOption("backtrack");
2627 		}
2628 		return autoBacktrack!=null&&autoBacktrack.equals("true");
2629 	}
2630 
optionIsValid(String key, Object value)2631 	public boolean optionIsValid(String key, Object value) {
2632 		return true;
2633 	}
2634 
buildAST()2635 	public boolean buildAST() {
2636 		String outputType = (String)getOption("output");
2637 		if ( outputType!=null ) {
2638 			return outputType.toString().equals("AST");
2639 		}
2640 		return false;
2641 	}
2642 
rewriteMode()2643 	public boolean rewriteMode() {
2644 		Object outputType = getOption("rewrite");
2645 		if ( outputType!=null ) {
2646 			return outputType.toString().equals("true");
2647 		}
2648 		return false;
2649 	}
2650 
isBuiltFromString()2651 	public boolean isBuiltFromString() {
2652 		return builtFromString;
2653 	}
2654 
buildTemplate()2655 	public boolean buildTemplate() {
2656 		String outputType = (String)getOption("output");
2657 		if ( outputType!=null ) {
2658 			return outputType.toString().equals("template");
2659 		}
2660 		return false;
2661 	}
2662 
getRules()2663 	public Collection<Rule> getRules() {
2664 		return nameToRuleMap.values();
2665 	}
2666 
2667 	/** Get the set of Rules that need to have manual delegations
2668 	 *  like "void rule() { importedGrammar.rule(); }"
2669 	 *
2670 	 *  If this grammar is master, get list of all rule definitions from all
2671 	 *  delegate grammars.  Only master has complete interface from combined
2672 	 *  grammars...we will generated delegates as helper objects.
2673 	 *
2674 	 *  Composite grammars that are not the root/master do not have complete
2675 	 *  interfaces.  It is not my intention that people use subcomposites.
2676 	 *  Only the outermost grammar should be used from outside code.  The
2677 	 *  other grammar components are specifically generated to work only
2678 	 *  with the master/root.
2679 	 *
2680 	 *  delegatedRules = imported - overridden
2681 	 */
getDelegatedRules()2682 	public Set<? extends Rule> getDelegatedRules() {
2683 		return composite.getDelegatedRules(this);
2684 	}
2685 
2686 	/** Get set of all rules imported from all delegate grammars even if
2687 	 *  indirectly delegated.
2688 	 */
getAllImportedRules()2689 	public Set<? extends Rule> getAllImportedRules() {
2690 		return composite.getAllImportedRules(this);
2691 	}
2692 
2693 	/** Get list of all delegates from all grammars directly or indirectly
2694 	 *  imported into this grammar.
2695 	 */
getDelegates()2696 	public List<Grammar> getDelegates() {
2697 		return composite.getDelegates(this);
2698 	}
2699 
getHasDelegates()2700 	public boolean getHasDelegates() {
2701 	   return !getDelegates().isEmpty();
2702 	}
2703 
getDelegateNames()2704 	public List<String> getDelegateNames() {
2705 		// compute delegates:{Grammar g | return g.name;}
2706 		List<String> names = new ArrayList<String>();
2707 		List<Grammar> delegates = composite.getDelegates(this);
2708 		if ( delegates!=null ) {
2709 			for (Grammar g : delegates) {
2710 				names.add(g.name);
2711 			}
2712 		}
2713 		return names;
2714 	}
2715 
getDirectDelegates()2716 	public List<Grammar> getDirectDelegates() {
2717 		return composite.getDirectDelegates(this);
2718 	}
2719 
2720 	/** Get delegates below direct delegates */
getIndirectDelegates()2721 	public List<Grammar> getIndirectDelegates() {
2722 		return composite.getIndirectDelegates(this);
2723 	}
2724 
2725 	/** Get list of all delegators.  This amounts to the grammars on the path
2726 	 *  to the root of the delegation tree.
2727 	 */
getDelegators()2728 	public List<Grammar> getDelegators() {
2729 		return composite.getDelegators(this);
2730 	}
2731 
2732 	/** Who's my direct parent grammar? */
getDelegator()2733 	public Grammar getDelegator() {
2734 		return composite.getDelegator(this);
2735 	}
2736 
getDelegatedRuleReferences()2737 	public Set<Rule> getDelegatedRuleReferences() {
2738 		return delegatedRuleReferences;
2739 	}
2740 
getGrammarIsRoot()2741 	public boolean getGrammarIsRoot() {
2742 		return composite.delegateGrammarTreeRoot.grammar == this;
2743 	}
2744 
setRuleAST(String ruleName, GrammarAST t)2745 	public void setRuleAST(String ruleName, GrammarAST t) {
2746 		Rule r = getLocallyDefinedRule(ruleName);
2747 		if ( r!=null ) {
2748 			r.tree = t;
2749 			r.EORNode = t.getLastChild();
2750 		}
2751 	}
2752 
getRuleStartState(String ruleName)2753 	public NFAState getRuleStartState(String ruleName) {
2754 		return getRuleStartState(null, ruleName);
2755 	}
2756 
getRuleStartState(String scopeName, String ruleName)2757 	public NFAState getRuleStartState(String scopeName, String ruleName) {
2758 		Rule r = getRule(scopeName, ruleName);
2759 		if ( r!=null ) {
2760 			//System.out.println("getRuleStartState("+scopeName+", "+ruleName+")="+r.startState);
2761 			return r.startState;
2762 		}
2763 		//System.out.println("getRuleStartState("+scopeName+", "+ruleName+")=null");
2764 		return null;
2765 	}
2766 
getRuleModifier(String ruleName)2767 	public String getRuleModifier(String ruleName) {
2768 		Rule r = getRule(ruleName);
2769 		if ( r!=null ) {
2770 			return r.modifier;
2771 		}
2772 		return null;
2773 	}
2774 
getRuleStopState(String ruleName)2775 	public NFAState getRuleStopState(String ruleName) {
2776 		Rule r = getRule(ruleName);
2777 		if ( r!=null ) {
2778 			return r.stopState;
2779 		}
2780 		return null;
2781 	}
2782 
assignDecisionNumber(NFAState state)2783 	public int assignDecisionNumber(NFAState state) {
2784 		decisionCount++;
2785 		state.setDecisionNumber(decisionCount);
2786 		return decisionCount;
2787 	}
2788 
getDecision(int decision)2789 	protected Decision getDecision(int decision) {
2790 		int index = decision-1;
2791 		if ( index >= indexToDecision.size() ) {
2792 			return null;
2793 		}
2794 		Decision d = indexToDecision.get(index);
2795 		return d;
2796 	}
2797 
getDecisions()2798 	public List<Decision> getDecisions() {
2799 		return indexToDecision;
2800 	}
2801 
createDecision(int decision)2802 	protected Decision createDecision(int decision) {
2803 		int index = decision-1;
2804 		if ( index < indexToDecision.size() ) {
2805 			return getDecision(decision); // don't recreate
2806 		}
2807 		Decision d = new Decision();
2808 		d.decision = decision;
2809 		d.grammar = this;
2810 		indexToDecision.setSize(getNumberOfDecisions());
2811 		indexToDecision.set(index, d);
2812 		return d;
2813 	}
2814 
getDecisionNFAStartStateList()2815 	public List<NFAState> getDecisionNFAStartStateList() {
2816 		List<NFAState> states = new ArrayList<NFAState>(100);
2817 		for (int d = 0; d < indexToDecision.size(); d++) {
2818 			Decision dec = indexToDecision.get(d);
2819 			states.add(dec.startState);
2820 		}
2821 		return states;
2822 	}
2823 
getDecisionNFAStartState(int decision)2824 	public NFAState getDecisionNFAStartState(int decision) {
2825 		Decision d = getDecision(decision);
2826 		if ( d==null ) {
2827 			return null;
2828 		}
2829 		return d.startState;
2830 	}
2831 
getLookaheadDFA(int decision)2832 	public DFA getLookaheadDFA(int decision) {
2833 		Decision d = getDecision(decision);
2834 		if ( d==null ) {
2835 			return null;
2836 		}
2837 		return d.dfa;
2838 	}
2839 
getDecisionBlockAST(int decision)2840 	public GrammarAST getDecisionBlockAST(int decision) {
2841 		Decision d = getDecision(decision);
2842 		if ( d==null ) {
2843 			return null;
2844 		}
2845 		return d.blockAST;
2846 	}
2847 
2848 	/** returns a list of column numbers for all decisions
2849 	 *  on a particular line so ANTLRWorks choose the decision
2850 	 *  depending on the location of the cursor (otherwise,
2851 	 *  ANTLRWorks has to give the *exact* location which
2852 	 *  is not easy from the user point of view).
2853 	 *
2854 	 *  This is not particularly fast as it walks entire line:col&rarr;DFA map
2855 	 *  looking for a prefix of "line:".
2856 	 */
getLookaheadDFAColumnsForLineInFile(int line)2857 	public List<Integer> getLookaheadDFAColumnsForLineInFile(int line) {
2858 		String prefix = line+":";
2859 		List<Integer> columns = new ArrayList<Integer>();
2860 		for (String key : lineColumnToLookaheadDFAMap.keySet()) {
2861 			if(key.startsWith(prefix)) {
2862 				columns.add(Integer.valueOf(key.substring(prefix.length())));
2863 			}
2864 		}
2865 		return columns;
2866 	}
2867 
2868 	/** Useful for ANTLRWorks to map position in file to the DFA for display */
getLookaheadDFAFromPositionInFile(int line, int col)2869 	public DFA getLookaheadDFAFromPositionInFile(int line, int col) {
2870 		return lineColumnToLookaheadDFAMap.get(
2871 			new StringBuffer().append(line).append(":").append(col).toString());
2872 	}
2873 
getLineColumnToLookaheadDFAMap()2874 	public Map<String, DFA> getLineColumnToLookaheadDFAMap() {
2875 		return lineColumnToLookaheadDFAMap;
2876 	}
2877 
2878 	/*
2879 	public void setDecisionOptions(int decision, Map options) {
2880 		Decision d = createDecision(decision);
2881 		d.options = options;
2882 	}
2883 
2884 	public void setDecisionOption(int decision, String name, Object value) {
2885 		Decision d = getDecision(decision);
2886 		if ( d!=null ) {
2887 			if ( d.options==null ) {
2888 				d.options = new HashMap();
2889 			}
2890 			d.options.put(name,value);
2891 		}
2892 	}
2893 
2894 	public Map getDecisionOptions(int decision) {
2895 		Decision d = getDecision(decision);
2896 		if ( d==null ) {
2897 			return null;
2898 		}
2899 		return d.options;
2900     }
2901     */
2902 
getNumberOfDecisions()2903 	public int getNumberOfDecisions() {
2904 		return decisionCount;
2905 	}
2906 
getNumberOfCyclicDecisions()2907 	public int getNumberOfCyclicDecisions() {
2908 		int n = 0;
2909 		for (int i=1; i<=getNumberOfDecisions(); i++) {
2910 			Decision d = getDecision(i);
2911 			if ( d.dfa!=null && d.dfa.isCyclic() ) {
2912 				n++;
2913 			}
2914 		}
2915 		return n;
2916 	}
2917 
2918 	/** Set the lookahead DFA for a particular decision.  This means
2919 	 *  that the appropriate AST node must updated to have the new lookahead
2920 	 *  DFA.  This method could be used to properly set the DFAs without
2921 	 *  using the createLookaheadDFAs() method.  You could do this
2922 	 *
2923 	 *    Grammar g = new Grammar("...");
2924 	 *    g.setLookahead(1, dfa1);
2925 	 *    g.setLookahead(2, dfa2);
2926 	 *    ...
2927 	 */
setLookaheadDFA(int decision, DFA lookaheadDFA)2928 	public void setLookaheadDFA(int decision, DFA lookaheadDFA) {
2929 		Decision d = createDecision(decision);
2930 		d.dfa = lookaheadDFA;
2931 		GrammarAST ast = d.startState.associatedASTNode;
2932 		ast.setLookaheadDFA(lookaheadDFA);
2933 	}
2934 
setDecisionNFA(int decision, NFAState state)2935 	public void setDecisionNFA(int decision, NFAState state) {
2936 		Decision d = createDecision(decision);
2937 		d.startState = state;
2938 	}
2939 
setDecisionBlockAST(int decision, GrammarAST blockAST)2940 	public void setDecisionBlockAST(int decision, GrammarAST blockAST) {
2941 		//System.out.println("setDecisionBlockAST("+decision+", "+blockAST.token);
2942 		Decision d = createDecision(decision);
2943 		d.blockAST = blockAST;
2944 	}
2945 
allDecisionDFAHaveBeenCreated()2946 	public boolean allDecisionDFAHaveBeenCreated() {
2947 		return allDecisionDFACreated;
2948 	}
2949 
2950 	/** How many token types have been allocated so far? */
getMaxTokenType()2951 	public int getMaxTokenType() {
2952 		return composite.maxTokenType;
2953 	}
2954 
2955 	/** What is the max char value possible for this grammar's target?  Use
2956 	 *  unicode max if no target defined.
2957 	 */
getMaxCharValue()2958 	public int getMaxCharValue() {
2959 		if ( generator!=null ) {
2960 			return generator.target.getMaxCharValue(generator);
2961 		}
2962 		else {
2963 			return Label.MAX_CHAR_VALUE;
2964 		}
2965 	}
2966 
2967 	/** Return a set of all possible token or char types for this grammar */
getTokenTypes()2968 	public IntSet getTokenTypes() {
2969 		if ( type==LEXER ) {
2970 			return getAllCharValues();
2971 		}
2972 		return IntervalSet.of(Label.MIN_TOKEN_TYPE, getMaxTokenType());
2973 	}
2974 
2975 	/** If there is a char vocabulary, use it; else return min to max char
2976 	 *  as defined by the target.  If no target, use max unicode char value.
2977 	 */
getAllCharValues()2978 	public IntSet getAllCharValues() {
2979 		if ( charVocabulary!=null ) {
2980 			return charVocabulary;
2981 		}
2982 		IntSet allChar = IntervalSet.of(Label.MIN_CHAR_VALUE, getMaxCharValue());
2983 		return allChar;
2984 	}
2985 
2986 	/** Return a string representing the escaped char for code c.  E.g., If c
2987 	 *  has value 0x100, you will get "\u0100".  ASCII gets the usual
2988 	 *  char (non-hex) representation.  Control characters are spit out
2989 	 *  as unicode.  While this is specially set up for returning Java strings,
2990 	 *  it can be used by any language target that has the same syntax. :)
2991 	 *
2992 	 *  11/26/2005: I changed this to use double quotes, consistent with antlr.g
2993 	 *  12/09/2005: I changed so everything is single quotes
2994 	 */
getANTLRCharLiteralForChar(int c)2995 	public static String getANTLRCharLiteralForChar(int c) {
2996 		if ( c<Label.MIN_CHAR_VALUE ) {
2997 			ErrorManager.internalError("invalid char value "+c);
2998 			return "'<INVALID>'";
2999 		}
3000 		if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
3001 			return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
3002 		}
3003 		if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
3004 			 !Character.isISOControl((char)c) ) {
3005 			if ( c=='\\' ) {
3006 				return "'\\\\'";
3007 			}
3008 			if ( c=='\'') {
3009 				return "'\\''";
3010 			}
3011 			return '\''+Character.toString((char)c)+'\'';
3012 		}
3013 		// turn on the bit above max "\uFFFF" value so that we pad with zeros
3014 		// then only take last 4 digits
3015 		String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
3016 		String unicodeStr = "'\\u"+hex+"'";
3017 		return unicodeStr;
3018 	}
3019 
3020 	/** For lexer grammars, return everything in unicode not in set.
3021 	 *  For parser and tree grammars, return everything in token space
3022 	 *  from MIN_TOKEN_TYPE to last valid token type or char value.
3023 	 */
complement(IntSet set)3024 	public IntSet complement(IntSet set) {
3025 		//System.out.println("complement "+set.toString(this));
3026 		//System.out.println("vocabulary "+getTokenTypes().toString(this));
3027 		IntSet c = set.complement(getTokenTypes());
3028 		//System.out.println("result="+c.toString(this));
3029 		return c;
3030 	}
3031 
complement(int atom)3032 	public IntSet complement(int atom) {
3033 		return complement(IntervalSet.of(atom));
3034 	}
3035 
3036 	/** Given set tree like ( SET A B ), check that A and B
3037 	 *  are both valid sets themselves, else we must tree like a BLOCK
3038 	 */
isValidSet(TreeToNFAConverter nfabuilder, GrammarAST t)3039 	public boolean isValidSet(TreeToNFAConverter nfabuilder, GrammarAST t) {
3040 		boolean valid;
3041 		try {
3042 			//System.out.println("parse BLOCK as set tree: "+t.toStringTree());
3043 			int alts = nfabuilder.testBlockAsSet(t);
3044 			valid = alts > 1;
3045 		}
3046 		catch (RecognitionException re) {
3047 			// The rule did not parse as a set, return null; ignore exception
3048 			valid = false;
3049 		}
3050 		//System.out.println("valid? "+valid);
3051 		return valid;
3052 	}
3053 
3054 	/** Get the set equivalent (if any) of the indicated rule from this
3055 	 *  grammar.  Mostly used in the lexer to do ~T for some fragment rule
3056 	 *  T.  If the rule AST has a SET use that.  If the rule is a single char
3057 	 *  convert it to a set and return.  If rule is not a simple set (w/o actions)
3058 	 *  then return null.
3059 	 *  Rules have AST form:
3060 	 *
3061 	 *		^( RULE ID modifier ARG RET SCOPE block EOR )
3062 	 */
getSetFromRule(TreeToNFAConverter nfabuilder, String ruleName)3063 	public IntSet getSetFromRule(TreeToNFAConverter nfabuilder, String ruleName)
3064 		throws RecognitionException
3065 	{
3066 		Rule r = getRule(ruleName);
3067 		if ( r==null ) {
3068 			return null;
3069 		}
3070 		IntSet elements;
3071 		//System.out.println("parsed tree: "+r.tree.toStringTree());
3072 		elements = nfabuilder.setRule(r.tree);
3073 		//System.out.println("elements="+elements);
3074 		return elements;
3075 	}
3076 
3077 	/** Decisions are linked together with transition(1).  Count how
3078 	 *  many there are.  This is here rather than in NFAState because
3079 	 *  a grammar decides how NFAs are put together to form a decision.
3080 	 */
getNumberOfAltsForDecisionNFA(NFAState decisionState)3081 	public int getNumberOfAltsForDecisionNFA(NFAState decisionState) {
3082 		if ( decisionState==null ) {
3083 			return 0;
3084 		}
3085 		int n = 1;
3086 		NFAState p = decisionState;
3087 		while ( p.transition[1] !=null ) {
3088 			n++;
3089 			p = (NFAState)p.transition[1].target;
3090 		}
3091 		return n;
3092 	}
3093 
3094 	/** Get the ith alternative (1..n) from a decision; return null when
3095 	 *  an invalid alt is requested.  I must count in to find the right
3096 	 *  alternative number.  For (A|B), you get NFA structure (roughly):
3097 	 *
3098 	 *  o-&gt;o-A-&gt;o
3099 	 *  |
3100 	 *  o-&gt;o-B-&gt;o
3101 	 *
3102 	 *  This routine returns the leftmost state for each alt.  So alt=1, returns
3103 	 *  the upperleft most state in this structure.
3104 	 */
getNFAStateForAltOfDecision(NFAState decisionState, int alt)3105 	public NFAState getNFAStateForAltOfDecision(NFAState decisionState, int alt) {
3106 		if ( decisionState==null || alt<=0 ) {
3107 			return null;
3108 		}
3109 		int n = 1;
3110 		NFAState p = decisionState;
3111 		while ( p!=null ) {
3112 			if ( n==alt ) {
3113 				return p;
3114 			}
3115 			n++;
3116 			Transition next = p.transition[1];
3117 			p = null;
3118 			if ( next!=null ) {
3119 				p = (NFAState)next.target;
3120 			}
3121 		}
3122 		return null;
3123 	}
3124 
3125 	/*
3126 	public void computeRuleFOLLOWSets() {
3127 		if ( getNumberOfDecisions()==0 ) {
3128 			createNFAs();
3129 		}
3130 		for (Iterator it = getRules().iterator(); it.hasNext();) {
3131 			Rule r = (Rule)it.next();
3132 			if ( r.isSynPred ) {
3133 				continue;
3134 			}
3135 			LookaheadSet s = ll1Analyzer.FOLLOW(r);
3136 			System.out.println("FOLLOW("+r.name+")="+s);
3137 		}
3138 	}
3139 	*/
3140 
FIRST(NFAState s)3141 	public LookaheadSet FIRST(NFAState s) {
3142 		return ll1Analyzer.FIRST(s);
3143 	}
3144 
LOOK(NFAState s)3145 	public LookaheadSet LOOK(NFAState s) {
3146 		return ll1Analyzer.LOOK(s);
3147 	}
3148 
setCodeGenerator(CodeGenerator generator)3149 	public void setCodeGenerator(CodeGenerator generator) {
3150 		this.generator = generator;
3151 	}
3152 
getCodeGenerator()3153 	public CodeGenerator getCodeGenerator() {
3154 		return generator;
3155 	}
3156 
getGrammarTree()3157 	public GrammarAST getGrammarTree() {
3158 		return grammarTree;
3159 	}
3160 
setGrammarTree(GrammarAST value)3161 	public void setGrammarTree(GrammarAST value) {
3162 		grammarTree = value;
3163 	}
3164 
getTool()3165 	public Tool getTool() {
3166 		return tool;
3167 	}
3168 
setTool(Tool tool)3169 	public void setTool(Tool tool) {
3170 		this.tool = tool;
3171 	}
3172 
3173 	/** given a token type and the text of the literal, come up with a
3174 	 *  decent token type label.  For now it's just T&lt;type&gt;.  Actually,
3175 	 *  if there is an aliased name from tokens like PLUS='+', use it.
3176 	 */
computeTokenNameFromLiteral(int tokenType, String literal)3177 	public String computeTokenNameFromLiteral(int tokenType, String literal) {
3178 		return AUTO_GENERATED_TOKEN_NAME_PREFIX +tokenType;
3179 	}
3180 
3181 	@Override
toString()3182 	public String toString() {
3183 	//	return "FFFFFFFFFFFFFF";
3184 		return grammarTreeToString(grammarTree);
3185 	}
3186 
grammarTreeToString(GrammarAST t)3187 	public String grammarTreeToString(GrammarAST t) {
3188 		return grammarTreeToString(t, true);
3189 	}
3190 
grammarTreeToString(GrammarAST t, boolean showActions)3191 	public String grammarTreeToString(GrammarAST t, boolean showActions) {
3192 		String s;
3193 		try {
3194 			s = t.getLine()+":"+(t.getCharPositionInLine()+1)+": ";
3195 			s += new ANTLRTreePrinter(new CommonTreeNodeStream(t)).toString(this, showActions);
3196 		}
3197 		catch (Exception e) {
3198 			s = "<invalid or missing tree structure>";
3199 		}
3200 		return s;
3201 	}
3202 
printGrammar(PrintStream output)3203 	public void printGrammar(PrintStream output) {
3204 		ANTLRTreePrinter printer = new ANTLRTreePrinter(new CommonTreeNodeStream(getGrammarTree()));
3205 		try {
3206 			String g = printer.toString(this, false);
3207 			output.println(g);
3208 		}
3209 		catch (RecognitionException re) {
3210 			ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,re);
3211 		}
3212 	}
3213 
3214 }
3215