1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.tool;
29 
30 import org.antlr.Tool;
31 import org.antlr.analysis.Label;
32 import org.antlr.analysis.NFAState;
33 import org.antlr.analysis.RuleClosureTransition;
34 import org.antlr.analysis.Transition;
35 import org.antlr.misc.IntervalSet;
36 import org.antlr.misc.Utils;
37 
38 import java.io.BufferedReader;
39 import java.io.FileReader;
40 import java.util.ArrayList;
41 import java.util.Collection;
42 import java.util.List;
43 import java.util.Random;
44 import java.util.Stack;
45 
46 /** Generate a random phrase given a grammar.
47  *  Usage:
48  *     java org.antlr.tool.RandomPhrase grammarFile.g startRule [seed]
49  *
50  *  For example:
51  *     java org.antlr.tool.RandomPhrase simple.g program 342
52  *
53  *  The seed acts like a unique identifier so you can get the same random
54  *  phrase back during unit testing, for example.
55  *
56  *  If you do not specify a seed then the current time in milliseconds is used
57  *  guaranteeing that you'll never see that seed again.
58  *
59  *  NOTE: this does not work well for large grammars...it tends to recurse
60  *  too much and build really long strings.  I need throttle control; later.
61  */
62 public class RandomPhrase {
63 	public static final boolean debug = false;
64 
65 	protected static Random random;
66 
67 	/** an experimental method to generate random phrases for a given
68 	 *  grammar given a start rule.  Return a list of token types.
69 	 */
randomPhrase(Grammar g, List<Integer> tokenTypes, String startRule)70 	protected static void randomPhrase(Grammar g, List<Integer> tokenTypes, String startRule) {
71 		NFAState state = g.getRuleStartState(startRule);
72 		NFAState stopState = g.getRuleStopState(startRule);
73 
74 		Stack<NFAState> ruleInvocationStack = new Stack<NFAState>();
75 		while ( true ) {
76 			if ( state==stopState && ruleInvocationStack.isEmpty() ) {
77 				break;
78 			}
79 			if ( debug ) System.out.println("state "+state);
80 			if ( state.getNumberOfTransitions()==0 ) {
81 				if ( debug ) System.out.println("dangling state: "+state);
82 				return;
83 			}
84 			// end of rule node
85 			if ( state.isAcceptState() ) {
86 				NFAState invokingState = ruleInvocationStack.pop();
87 				if ( debug ) System.out.println("pop invoking state "+invokingState);
88 				//System.out.println("leave "+state.enclosingRule.name);
89 				RuleClosureTransition invokingTransition =
90 					(RuleClosureTransition)invokingState.transition[0];
91 				// move to node after state that invoked this rule
92 				state = invokingTransition.followState;
93 				continue;
94 			}
95 			if ( state.getNumberOfTransitions()==1 ) {
96 				// no branching, just take this path
97 				Transition t0 = state.transition[0];
98 				if ( t0 instanceof RuleClosureTransition ) {
99 					ruleInvocationStack.push(state);
100 					if ( debug ) System.out.println("push state "+state);
101 					//System.out.println("call "+((RuleClosureTransition)t0).rule.name);
102 					//System.out.println("stack depth="+ruleInvocationStack.size());
103 				}
104 				else if ( t0.label.isSet() || t0.label.isAtom() ) {
105 					tokenTypes.add( getTokenType(t0.label) );
106 				}
107 				state = (NFAState)t0.target;
108 				continue;
109 			}
110 
111 			int decisionNumber = state.getDecisionNumber();
112 			if ( decisionNumber==0 ) {
113 				System.out.println("weird: no decision number but a choice node");
114 				continue;
115 			}
116 			// decision point, pick ith alternative randomly
117 			int n = g.getNumberOfAltsForDecisionNFA(state);
118 			int randomAlt = random.nextInt(n) + 1;
119 			if ( debug ) System.out.println("randomAlt="+randomAlt);
120 			NFAState altStartState =
121 				g.getNFAStateForAltOfDecision(state, randomAlt);
122 			Transition t = altStartState.transition[0];
123 			state = (NFAState)t.target;
124 		}
125 	}
126 
getTokenType(Label label)127 	protected static Integer getTokenType(Label label) {
128 		if ( label.isSet() ) {
129 			// pick random element of set
130 			IntervalSet typeSet = (IntervalSet)label.getSet();
131 			int randomIndex = random.nextInt(typeSet.size());
132 			return typeSet.get(randomIndex);
133 		}
134 		else {
135 			return Utils.integer(label.getAtom());
136 		}
137 		//System.out.println(t0.label.toString(g));
138 	}
139 
140 	/** Used to generate random strings */
main(String[] args)141 	public static void main(String[] args) {
142 		if ( args.length < 2 ) {
143 			System.err.println("usage: java org.antlr.tool.RandomPhrase grammarfile startrule");
144 			return;
145 		}
146 		String grammarFileName = args[0];
147 		String startRule = args[1];
148 		long seed = System.currentTimeMillis(); // use random seed unless spec.
149 		if ( args.length==3 ) {
150 			String seedStr = args[2];
151 			seed = Long.parseLong(seedStr);
152 		}
153 		try {
154 			random = new Random(seed);
155 
156 			CompositeGrammar composite = new CompositeGrammar();
157 			Tool tool = new Tool();
158 			Grammar parser = new Grammar(tool, grammarFileName, composite);
159 			composite.setDelegationRoot(parser);
160 
161 			FileReader fr = new FileReader(grammarFileName);
162 			BufferedReader br = new BufferedReader(fr);
163 			parser.parseAndBuildAST(br);
164 			br.close();
165 
166 			parser.composite.assignTokenTypes();
167 			parser.composite.defineGrammarSymbols();
168 			parser.composite.createNFAs();
169 
170 			List<? extends Collection<? extends Rule>> leftRecursiveRules = parser.checkAllRulesForLeftRecursion();
171 			if ( leftRecursiveRules.size()>0 ) {
172 				return;
173 			}
174 
175 			if ( parser.getRule(startRule)==null ) {
176 				System.out.println("undefined start rule "+startRule);
177 				return;
178 			}
179 
180 			String lexerGrammarText = parser.getLexerGrammar();
181 			Grammar lexer = new Grammar(tool);
182 			lexer.importTokenVocabulary(parser);
183 			lexer.fileName = grammarFileName;
184 			if ( lexerGrammarText!=null ) {
185 				lexer.setGrammarContent(lexerGrammarText);
186 			}
187 			else {
188 				System.err.println("no lexer grammar found in "+grammarFileName);
189 			}
190 			lexer.buildNFA();
191 			leftRecursiveRules = lexer.checkAllRulesForLeftRecursion();
192 			if ( leftRecursiveRules.size()>0 ) {
193 				return;
194 			}
195 			//System.out.println("lexer:\n"+lexer);
196 
197 			List<Integer> tokenTypes = new ArrayList<Integer>(100);
198 			randomPhrase(parser, tokenTypes, startRule);
199 			System.out.println("token types="+tokenTypes);
200 			for (int i = 0; i < tokenTypes.size(); i++) {
201 				Integer ttypeI = tokenTypes.get(i);
202 				int ttype = ttypeI;
203 				String ttypeDisplayName = parser.getTokenDisplayName(ttype);
204 				if ( Character.isUpperCase(ttypeDisplayName.charAt(0)) ) {
205 					List<Integer> charsInToken = new ArrayList<Integer>(10);
206 					randomPhrase(lexer, charsInToken, ttypeDisplayName);
207 					System.out.print(" ");
208 					for (int j = 0; j < charsInToken.size(); j++) {
209 						Integer cI = charsInToken.get(j);
210 						System.out.print((char)cI.intValue());
211 					}
212 				}
213 				else { // it's a literal
214 					String literal =
215 						ttypeDisplayName.substring(1,ttypeDisplayName.length()-1);
216 					System.out.print(" "+literal);
217 				}
218 			}
219 			System.out.println();
220 		}
221 		catch (Error er) {
222 			System.err.println("Error walking "+grammarFileName+" rule "+startRule+" seed "+seed);
223 			er.printStackTrace(System.err);
224 		}
225 		catch (Exception e) {
226 			System.err.println("Exception walking "+grammarFileName+" rule "+startRule+" seed "+seed);
227 			e.printStackTrace(System.err);
228 		}
229 	}
230 }
231