1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.codegen;
29 
30 import org.antlr.Tool;
31 import org.stringtemplate.v4.ST;
32 import org.antlr.tool.Grammar;
33 
34 import java.io.IOException;
35 import java.util.ArrayList;
36 import java.util.List;
37 
38 public class CTarget extends Target {
39 
40     List<String> strings = new ArrayList<String>();
41 
42     @Override
genRecognizerFile(Tool tool, CodeGenerator generator, Grammar grammar, ST outputFileST)43     protected void genRecognizerFile(Tool tool,
44             CodeGenerator generator,
45             Grammar grammar,
46             ST outputFileST)
47             throws IOException {
48 
49         // Before we write this, and cause it to generate its string,
50         // we need to add all the string literals that we are going to match
51         //
52         outputFileST.add("literals", strings);
53         String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
54         generator.write(outputFileST, fileName);
55     }
56 
57     @Override
genRecognizerHeaderFile(Tool tool, CodeGenerator generator, Grammar grammar, ST headerFileST, String extName)58     protected void genRecognizerHeaderFile(Tool tool,
59             CodeGenerator generator,
60             Grammar grammar,
61             ST headerFileST,
62             String extName)
63             throws IOException {
64         // Pick up the file name we are generating. This method will return a
65         // a file suffixed with .c, so we must substring and add the extName
66         // to it as we cannot assign into strings in Java.
67         ///
68         String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
69         fileName = fileName.substring(0, fileName.length() - 2) + extName;
70 
71         generator.write(headerFileST, fileName);
72     }
73 
chooseWhereCyclicDFAsGo(Tool tool, CodeGenerator generator, Grammar grammar, ST recognizerST, ST cyclicDFAST)74     protected ST chooseWhereCyclicDFAsGo(Tool tool,
75             CodeGenerator generator,
76             Grammar grammar,
77             ST recognizerST,
78             ST cyclicDFAST) {
79         return recognizerST;
80     }
81 
82     /** Is scope in @scope::name {action} valid for this kind of grammar?
83      *  Targets like C++ may want to allow new scopes like headerfile or
84      *  some such.  The action names themselves are not policed at the
85      *  moment so targets can add template actions w/o having to recompile
86      *  ANTLR.
87      */
88     @Override
isValidActionScope(int grammarType, String scope)89     public boolean isValidActionScope(int grammarType, String scope) {
90         switch (grammarType) {
91             case Grammar.LEXER:
92                 if (scope.equals("lexer")) {
93                     return true;
94                 }
95                 if (scope.equals("header")) {
96                     return true;
97                 }
98                 if (scope.equals("includes")) {
99                     return true;
100                 }
101                 if (scope.equals("preincludes")) {
102                     return true;
103                 }
104                 if (scope.equals("overrides")) {
105                     return true;
106                 }
107                 break;
108             case Grammar.PARSER:
109                 if (scope.equals("parser")) {
110                     return true;
111                 }
112                 if (scope.equals("header")) {
113                     return true;
114                 }
115                 if (scope.equals("includes")) {
116                     return true;
117                 }
118                 if (scope.equals("preincludes")) {
119                     return true;
120                 }
121                 if (scope.equals("overrides")) {
122                     return true;
123                 }
124                 break;
125             case Grammar.COMBINED:
126                 if (scope.equals("parser")) {
127                     return true;
128                 }
129                 if (scope.equals("lexer")) {
130                     return true;
131                 }
132                 if (scope.equals("header")) {
133                     return true;
134                 }
135                 if (scope.equals("includes")) {
136                     return true;
137                 }
138                 if (scope.equals("preincludes")) {
139                     return true;
140                 }
141                 if (scope.equals("overrides")) {
142                     return true;
143                 }
144                 break;
145             case Grammar.TREE_PARSER:
146                 if (scope.equals("treeparser")) {
147                     return true;
148                 }
149                 if (scope.equals("header")) {
150                     return true;
151                 }
152                 if (scope.equals("includes")) {
153                     return true;
154                 }
155                 if (scope.equals("preincludes")) {
156                     return true;
157                 }
158                 if (scope.equals("overrides")) {
159                     return true;
160                 }
161                 break;
162         }
163         return false;
164     }
165 
166     @Override
getTargetCharLiteralFromANTLRCharLiteral( CodeGenerator generator, String literal)167     public String getTargetCharLiteralFromANTLRCharLiteral(
168             CodeGenerator generator,
169             String literal) {
170 
171         if (literal.startsWith("'\\u")) {
172             literal = "0x" + literal.substring(3, 7);
173         } else {
174             int c = literal.charAt(1);
175 
176             if (c < 32 || c > 127) {
177                 literal = "0x" + Integer.toHexString(c);
178             }
179         }
180 
181         return literal;
182     }
183 
184     /** Convert from an ANTLR string literal found in a grammar file to
185      *  an equivalent string literal in the C target.
186      *  Because we must support Unicode character sets and have chosen
187      *  to have the lexer match UTF32 characters, then we must encode
188      *  string matches to use 32 bit character arrays. Here then we
189      *  must produce the C array and cater for the case where the
190      *  lexer has been encoded with a string such as 'xyz\n',
191      */
192     @Override
getTargetStringLiteralFromANTLRStringLiteral( CodeGenerator generator, String literal)193     public String getTargetStringLiteralFromANTLRStringLiteral(
194             CodeGenerator generator,
195             String literal) {
196         int index;
197         String bytes;
198         StringBuilder buf = new StringBuilder();
199 
200         buf.append("{ ");
201 
202         // We need ot lose any escaped characters of the form \x and just
203         // replace them with their actual values as well as lose the surrounding
204         // quote marks.
205         //
206         for (int i = 1; i < literal.length() - 1; i++) {
207             buf.append("0x");
208 
209             if (literal.charAt(i) == '\\') {
210                 i++; // Assume that there is a next character, this will just yield
211                 // invalid strings if not, which is what the input would be of course - invalid
212                 switch (literal.charAt(i)) {
213                     case 'u':
214                     case 'U':
215                         buf.append(literal.substring(i + 1, i + 5));  // Already a hex string
216                         i = i + 5;                                // Move to next string/char/escape
217                         break;
218 
219                     case 'n':
220                     case 'N':
221 
222                         buf.append("0A");
223                         break;
224 
225                     case 'r':
226                     case 'R':
227 
228                         buf.append("0D");
229                         break;
230 
231                     case 't':
232                     case 'T':
233 
234                         buf.append("09");
235                         break;
236 
237                     case 'b':
238                     case 'B':
239 
240                         buf.append("08");
241                         break;
242 
243                     case 'f':
244                     case 'F':
245 
246                         buf.append("0C");
247                         break;
248 
249                     default:
250 
251                         // Anything else is what it is!
252                         //
253                         buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
254                         break;
255                 }
256             } else {
257                 buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
258             }
259             buf.append(", ");
260         }
261         buf.append(" ANTLR3_STRING_TERMINATOR}");
262 
263         bytes = buf.toString();
264         index = strings.indexOf(bytes);
265 
266         if (index == -1) {
267             strings.add(bytes);
268             index = strings.indexOf(bytes);
269         }
270 
271         String strref = "lit_" + String.valueOf(index + 1);
272 
273         return strref;
274     }
275 
276     /**
277      * Overrides the standard grammar analysis so we can prepare the analyser
278      * a little differently from the other targets.
279      *
280      * In particular we want to influence the way the code generator makes assumptions about
281      * switchs vs ifs, vs table driven DFAs. In general, C code should be generated that
282      * has the minimum use of tables, and tha meximum use of large switch statements. This
283      * allows the optimizers to generate very efficient code, it can reduce object code size
284      * by about 30% and give about a 20% performance improvement over not doing this. Hence,
285      * for the C target only, we change the defaults here, but only if they are still set to the
286      * defaults.
287      *
288      * @param generator An instance of the generic code generator class.
289      * @param grammar The grammar that we are currently analyzing
290      */
291     @Override
performGrammarAnalysis(CodeGenerator generator, Grammar grammar)292     protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) {
293 
294         // Check to see if the maximum inline DFA states is still set to
295         // the default size. If it is then whack it all the way up to the maximum that
296         // we can sensibly get away with.
297         //
298         if (CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE == CodeGenerator.MADSI_DEFAULT ) {
299 
300             CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE = 65535;
301         }
302 
303         // Check to see if the maximum switch size is still set to the default
304         // and bring it up much higher if it is. Modern C compilers can handle
305         // much bigger switch statements than say Java can and if anyone finds a compiler
306         // that cannot deal with such big switches, all the need do is generate the
307         // code with a reduced -Xmaxswitchcaselabels nnn
308         //
309         if  (CodeGenerator.MAX_SWITCH_CASE_LABELS == CodeGenerator.MSCL_DEFAULT) {
310 
311             CodeGenerator.MAX_SWITCH_CASE_LABELS = 3000;
312         }
313 
314         // Check to see if the number of transitions considered a miminum for using
315         // a switch is still at the default. Because a switch is still generally faster than
316         // an if even with small sets, and given that the optimizer will do the best thing with it
317         // anyway, then we simply want to generate a switch for any number of states.
318         //
319         if (CodeGenerator.MIN_SWITCH_ALTS == CodeGenerator.MSA_DEFAULT) {
320 
321             CodeGenerator.MIN_SWITCH_ALTS = 1;
322         }
323 
324         // Now we allow the superclass implementation to do whatever it feels it
325         // must do.
326         //
327         super.performGrammarAnalysis(generator, grammar);
328     }
329 }
330 
331