1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.analysis;
29 
30 import org.antlr.misc.Utils;
31 import org.antlr.tool.Grammar;
32 
33 import java.util.HashSet;
34 import java.util.Set;
35 
36 /** A module to perform optimizations on DFAs.
37  *
38  *  I could more easily (and more quickly) do some optimizations (such as
39  *  PRUNE_EBNF_EXIT_BRANCHES) during DFA construction, but then it
40  *  messes up the determinism checking.  For example, it looks like
41  *  loop exit branches are unreachable if you prune exit branches
42  *  during DFA construction and before determinism checks.
43  *
44  *  In general, ANTLR's NFA→DFA→codegen pipeline seems very robust
45  *  to me which I attribute to a uniform and consistent set of data
46  *  structures.  Regardless of what I want to "say"/implement, I do so
47  *  within the confines of, for example, a DFA.  The code generator
48  *  can then just generate code--it doesn't have to do much thinking.
49  *  Putting optimizations in the code gen code really starts to make
50  *  it a spagetti factory (uh oh, now I'm hungry!).  The pipeline is
51  *  very testable; each stage has well defined input/output pairs.
52  *
53  *  ### Optimization: PRUNE_EBNF_EXIT_BRANCHES
54  *
55  *  There is no need to test EBNF block exit branches.  Not only is it
56  *  an unneeded computation, but counter-intuitively, you actually get
57  *  better errors. You can report an error at the missing or extra
58  *  token rather than as soon as you've figured out you will fail.
59  *
60  *  Imagine optional block "( DOT CLASS )? SEMI".  ANTLR generates:
61  *
62  *  int alt=0;
63  *  if ( input.LA(1)==DOT ) {
64  *      alt=1;
65  *  }
66  *  else if ( input.LA(1)==SEMI ) {
67  *      alt=2;
68  *  }
69  *
70  *  Clearly, since Parser.match() will ultimately find the error, we
71  *  do not want to report an error nor do we want to bother testing
72  *  lookahead against what follows the (...)?  We want to generate
73  *  simply "should I enter the subrule?":
74  *
75  *  int alt=2;
76  *  if ( input.LA(1)==DOT ) {
77  *      alt=1;
78  *  }
79  *
80  *  NOTE 1. Greedy loops cannot be optimized in this way.  For example,
81  *  "(greedy=false:'x'|.)* '\n'".  You specifically need the exit branch
82  *  to tell you when to terminate the loop as the same input actually
83  *  predicts one of the alts (i.e., staying in the loop).
84  *
85  *  NOTE 2.  I do not optimize cyclic DFAs at the moment as it doesn't
86  *  seem to work. ;)  I'll have to investigate later to see what work I
87  *  can do on cyclic DFAs to make them have fewer edges.  Might have
88  *  something to do with the EOT token.
89  *
90  *  ### PRUNE_SUPERFLUOUS_EOT_EDGES
91  *
92  *  When a token is a subset of another such as the following rules, ANTLR
93  *  quietly assumes the first token to resolve the ambiguity.
94  *
95  *  EQ			: '=' ;
96  *  ASSIGNOP	: '=' | '+=' ;
97  *
98  *  It can yield states that have only a single edge on EOT to an accept
99  *  state.  This is a waste and messes up my code generation. ;)  If
100  *  Tokens rule DFA goes
101  *
102  * 		s0 -'='-> s3 -EOT-> s5 (accept)
103  *
104  *  then s5 should be pruned and s3 should be made an accept.  Do NOT do this
105  *  for keyword versus ID as the state with EOT edge emanating from it will
106  *  also have another edge.
107  *
108  *  ### Optimization: COLLAPSE_ALL_INCIDENT_EDGES
109  *
110  *  Done during DFA construction.  See method addTransition() in
111  *  NFAToDFAConverter.
112  *
113  *  ### Optimization: MERGE_STOP_STATES
114  *
115  *  Done during DFA construction.  See addDFAState() in NFAToDFAConverter.
116  */
117 public class DFAOptimizer {
118 	public static boolean PRUNE_EBNF_EXIT_BRANCHES = true;
119 	public static boolean PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES = true;
120 	public static boolean COLLAPSE_ALL_PARALLEL_EDGES = true;
121 	public static boolean MERGE_STOP_STATES = true;
122 
123 	/** Used by DFA state machine generator to avoid infinite recursion
124 	 *  resulting from cycles int the DFA.  This is a set of int state #s.
125 	 *  This is a side-effect of calling optimize; can't clear after use
126 	 *  because code gen needs it.
127 	 */
128 	protected Set<Integer> visited = new HashSet<Integer>();
129 
130     protected Grammar grammar;
131 
DFAOptimizer(Grammar grammar)132     public DFAOptimizer(Grammar grammar) {
133 		this.grammar = grammar;
134     }
135 
optimize()136 	public void optimize() {
137 		// optimize each DFA in this grammar
138 		for (int decisionNumber=1;
139 			 decisionNumber<=grammar.getNumberOfDecisions();
140 			 decisionNumber++)
141 		{
142 			DFA dfa = grammar.getLookaheadDFA(decisionNumber);
143 			optimize(dfa);
144 		}
145 	}
146 
optimize(DFA dfa)147 	protected void optimize(DFA dfa) {
148 		if ( dfa==null ) {
149 			return; // nothing to do
150 		}
151 		/*
152 		System.out.println("Optimize DFA "+dfa.decisionNFAStartState.decisionNumber+
153 						   " num states="+dfa.getNumberOfStates());
154 		*/
155 		//long start = System.currentTimeMillis();
156 		if ( PRUNE_EBNF_EXIT_BRANCHES && dfa.canInlineDecision() ) {
157 			visited.clear();
158 			int decisionType =
159 				dfa.getNFADecisionStartState().decisionStateType;
160 			if ( dfa.isGreedy() &&
161 				 (decisionType==NFAState.OPTIONAL_BLOCK_START ||
162 				 decisionType==NFAState.LOOPBACK) )
163 			{
164 				optimizeExitBranches(dfa.startState);
165 			}
166 		}
167 		// If the Tokens rule has syntactically ambiguous rules, try to prune
168 		if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES &&
169 			 dfa.isTokensRuleDecision() &&
170 			 dfa.probe.stateToSyntacticallyAmbiguousTokensRuleAltsMap.size()>0 )
171 		{
172 			visited.clear();
173 			optimizeEOTBranches(dfa.startState);
174 		}
175 
176 		/* ack...code gen needs this, cannot optimize
177 		visited.clear();
178 		unlinkUnneededStateData(dfa.startState);
179 		*/
180 		//long stop = System.currentTimeMillis();
181 		//System.out.println("minimized in "+(int)(stop-start)+" ms");
182     }
183 
optimizeExitBranches(DFAState d)184 	protected void optimizeExitBranches(DFAState d) {
185 		Integer sI = Utils.integer(d.stateNumber);
186 		if ( visited.contains(sI) ) {
187 			return; // already visited
188 		}
189 		visited.add(sI);
190 		int nAlts = d.dfa.getNumberOfAlts();
191 		for (int i = 0; i < d.getNumberOfTransitions(); i++) {
192 			Transition edge = d.transition(i);
193 			DFAState edgeTarget = ((DFAState)edge.target);
194 			/*
195 			System.out.println(d.stateNumber+"-"+
196 							   edge.label.toString(d.dfa.nfa.grammar)+"->"+
197 							   edgeTarget.stateNumber);
198 			*/
199 			// if target is an accept state and that alt is the exit alt
200 			if ( edgeTarget.isAcceptState() &&
201 				edgeTarget.getUniquelyPredictedAlt()==nAlts)
202 			{
203 				/*
204 				System.out.println("ignoring transition "+i+" to max alt "+
205 					d.dfa.getNumberOfAlts());
206 				*/
207 				d.removeTransition(i);
208 				i--; // back up one so that i++ of loop iteration stays within bounds
209 			}
210 			optimizeExitBranches(edgeTarget);
211 		}
212 	}
213 
optimizeEOTBranches(DFAState d)214 	protected void optimizeEOTBranches(DFAState d) {
215 		Integer sI = Utils.integer(d.stateNumber);
216 		if ( visited.contains(sI) ) {
217 			return; // already visited
218 		}
219 		visited.add(sI);
220 		for (int i = 0; i < d.getNumberOfTransitions(); i++) {
221 			Transition edge = d.transition(i);
222 			DFAState edgeTarget = ((DFAState)edge.target);
223 			/*
224 			System.out.println(d.stateNumber+"-"+
225 							   edge.label.toString(d.dfa.nfa.grammar)+"->"+
226 							   edgeTarget.stateNumber);
227 			*/
228 			// if only one edge coming out, it is EOT, and target is accept prune
229 			if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES &&
230 				edgeTarget.isAcceptState() &&
231 				d.getNumberOfTransitions()==1 &&
232 				edge.label.isAtom() &&
233 				edge.label.getAtom()==Label.EOT )
234 			{
235 				//System.out.println("state "+d+" can be pruned");
236 				// remove the superfluous EOT edge
237 				d.removeTransition(i);
238 				d.setAcceptState(true); // make it an accept state
239 				// force it to uniquely predict the originally predicted state
240 				d.cachedUniquelyPredicatedAlt =
241 					edgeTarget.getUniquelyPredictedAlt();
242 				i--; // back up one so that i++ of loop iteration stays within bounds
243 			}
244 			optimizeEOTBranches(edgeTarget);
245 		}
246 	}
247 
248 	/** Walk DFA states, unlinking the nfa configs and whatever else I
249 	 *  can to reduce memory footprint.
250 	protected void unlinkUnneededStateData(DFAState d) {
251 		Integer sI = Utils.integer(d.stateNumber);
252 		if ( visited.contains(sI) ) {
253 			return; // already visited
254 		}
255 		visited.add(sI);
256 		d.nfaConfigurations = null;
257 		for (int i = 0; i < d.getNumberOfTransitions(); i++) {
258 			Transition edge = (Transition) d.transition(i);
259 			DFAState edgeTarget = ((DFAState)edge.target);
260 			unlinkUnneededStateData(edgeTarget);
261 		}
262 	}
263 	 */
264 
265 }
266