1 /*
2  [The "BSD license"]
3  Copyright (c) 2005-2009 Terence Parr
4  All rights reserved.
5 
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions
8  are met:
9  1. Redistributions of source code must retain the above copyright
10      notice, this list of conditions and the following disclaimer.
11  2. Redistributions in binary form must reproduce the above copyright
12      notice, this list of conditions and the following disclaimer in the
13      documentation and/or other materials provided with the distribution.
14  3. The name of the author may not be used to endorse or promote products
15      derived from this software without specific prior written permission.
16 
17  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.runtime;
29 
30 /** A DFA implemented as a set of transition tables.
31  *
32  *  Any state that has a semantic predicate edge is special; those states
33  *  are generated with if-then-else structures in a specialStateTransition()
34  *  which is generated by cyclicDFA template.
35  *
36  *  There are at most 32767 states (16-bit signed short).
37  *  Could get away with byte sometimes but would have to generate different
38  *  types and the simulation code too.  For a point of reference, the Java
39  *  lexer's Tokens rule DFA has 326 states roughly.
40  */
41 public class DFA {
42 	protected short[] eot;
43 	protected short[] eof;
44 	protected char[] min;
45     protected char[] max;
46     protected short[] accept;
47     protected short[] special;
48     protected short[][] transition;
49 
50 	protected int decisionNumber;
51 
52 	/** Which recognizer encloses this DFA?  Needed to check backtracking */
53 	protected BaseRecognizer recognizer;
54 
55 	public static final boolean debug = false;
56 
57 	/** From the input stream, predict what alternative will succeed
58 	 *  using this DFA (representing the covering regular approximation
59 	 *  to the underlying CFL).  Return an alternative number 1..n.  Throw
60 	 *  an exception upon error.
61 	 */
predict(IntStream input)62 	public int predict(IntStream input)
63 		throws RecognitionException
64 	{
65 		if ( debug ) {
66 			System.err.println("Enter DFA.predict for decision "+decisionNumber);
67 		}
68 		int mark = input.mark(); // remember where decision started in input
69 		int s = 0; // we always start at s0
70 		try {
71 			while ( true ) {
72 				if ( debug ) System.err.println("DFA "+decisionNumber+" state "+s+" LA(1)="+(char)input.LA(1)+"("+input.LA(1)+
73 												"), index="+input.index());
74 				int specialState = special[s];
75 				if ( specialState>=0 ) {
76 					if ( debug ) {
77 						System.err.println("DFA "+decisionNumber+
78 							" state "+s+" is special state "+specialState);
79 					}
80 					s = specialStateTransition(specialState,input);
81 					if ( debug ) {
82 						System.err.println("DFA "+decisionNumber+
83 							" returns from special state "+specialState+" to "+s);
84 					}
85 					if ( s==-1 ) {
86 						noViableAlt(s,input);
87 						return 0;
88 					}
89 					input.consume();
90 					continue;
91 				}
92 				if ( accept[s] >= 1 ) {
93 					if ( debug ) System.err.println("accept; predict "+accept[s]+" from state "+s);
94 					return accept[s];
95 				}
96 				// look for a normal char transition
97 				char c = (char)input.LA(1); // -1 == \uFFFF, all tokens fit in 65000 space
98 				if (c>=min[s] && c<=max[s]) {
99 					int snext = transition[s][c-min[s]]; // move to next state
100 					if ( snext < 0 ) {
101 						// was in range but not a normal transition
102 						// must check EOT, which is like the else clause.
103 						// eot[s]>=0 indicates that an EOT edge goes to another
104 						// state.
105 						if ( eot[s]>=0 ) {  // EOT Transition to accept state?
106 							if ( debug ) System.err.println("EOT transition");
107 							s = eot[s];
108 							input.consume();
109 							// TODO: I had this as return accept[eot[s]]
110 							// which assumed here that the EOT edge always
111 							// went to an accept...faster to do this, but
112 							// what about predicated edges coming from EOT
113 							// target?
114 							continue;
115 						}
116 						noViableAlt(s,input);
117 						return 0;
118 					}
119 					s = snext;
120 					input.consume();
121 					continue;
122 				}
123 				if ( eot[s]>=0 ) {  // EOT Transition?
124 					if ( debug ) System.err.println("EOT transition");
125 					s = eot[s];
126 					input.consume();
127 					continue;
128 				}
129 				if ( c==(char)Token.EOF && eof[s]>=0 ) {  // EOF Transition to accept state?
130 					if ( debug ) System.err.println("accept via EOF; predict "+accept[eof[s]]+" from "+eof[s]);
131 					return accept[eof[s]];
132 				}
133 				// not in range and not EOF/EOT, must be invalid symbol
134 				if ( debug ) {
135 					System.err.println("min["+s+"]="+min[s]);
136 					System.err.println("max["+s+"]="+max[s]);
137 					System.err.println("eot["+s+"]="+eot[s]);
138 					System.err.println("eof["+s+"]="+eof[s]);
139 					for (int p=0; p<transition[s].length; p++) {
140 						System.err.print(transition[s][p]+" ");
141 					}
142 					System.err.println();
143 				}
144 				noViableAlt(s,input);
145 				return 0;
146 			}
147 		}
148 		finally {
149 			input.rewind(mark);
150 		}
151 	}
152 
noViableAlt(int s, IntStream input)153 	protected void noViableAlt(int s, IntStream input) throws NoViableAltException {
154 		if (recognizer.state.backtracking>0) {
155 			recognizer.state.failed=true;
156 			return;
157 		}
158 		NoViableAltException nvae =
159 			new NoViableAltException(getDescription(),
160 									 decisionNumber,
161 									 s,
162 									 input);
163 		error(nvae);
164 		throw nvae;
165 	}
166 
167 	/** A hook for debugging interface */
error(NoViableAltException nvae)168 	protected void error(NoViableAltException nvae) { ; }
169 
specialStateTransition(int s, IntStream input)170 	public int specialStateTransition(int s, IntStream input)
171 		throws NoViableAltException
172 	{
173 		return -1;
174 	}
175 
getDescription()176 	public String getDescription() {
177 		return "n/a";
178 	}
179 
180 	/** Given a String that has a run-length-encoding of some unsigned shorts
181 	 *  like "\1\2\3\9", convert to short[] {2,9,9,9}.  We do this to avoid
182 	 *  static short[] which generates so much init code that the class won't
183 	 *  compile. :(
184 	 */
unpackEncodedString(String encodedString)185 	public static short[] unpackEncodedString(String encodedString) {
186 		// walk first to find how big it is.
187 		int size = 0;
188 		for (int i=0; i<encodedString.length(); i+=2) {
189 			size += encodedString.charAt(i);
190 		}
191 		short[] data = new short[size];
192 		int di = 0;
193 		for (int i=0; i<encodedString.length(); i+=2) {
194 			char n = encodedString.charAt(i);
195 			char v = encodedString.charAt(i+1);
196 			// add v n times to data
197 			for (int j=1; j<=n; j++) {
198 				data[di++] = (short)v;
199 			}
200 		}
201 		return data;
202 	}
203 
204 	/** Hideous duplication of code, but I need different typed arrays out :( */
unpackEncodedStringToUnsignedChars(String encodedString)205 	public static char[] unpackEncodedStringToUnsignedChars(String encodedString) {
206 		// walk first to find how big it is.
207 		int size = 0;
208 		for (int i=0; i<encodedString.length(); i+=2) {
209 			size += encodedString.charAt(i);
210 		}
211 		char[] data = new char[size];
212 		int di = 0;
213 		for (int i=0; i<encodedString.length(); i+=2) {
214 			char n = encodedString.charAt(i);
215 			char v = encodedString.charAt(i+1);
216 			// add v n times to data
217 			for (int j=1; j<=n; j++) {
218 				data[di++] = v;
219 			}
220 		}
221 		return data;
222 	}
223 
224 	/*
225 	public int specialTransition(int state, int symbol) {
226 		return 0;
227 	}
228 	*/
229 }
230