/* * [The "BSD license"] * Copyright (c) 2010 Terence Parr * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.codegen; import org.antlr.Tool; import org.stringtemplate.v4.ST; import org.antlr.tool.Grammar; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class CTarget extends Target { List strings = new ArrayList(); @Override protected void genRecognizerFile(Tool tool, CodeGenerator generator, Grammar grammar, ST outputFileST) throws IOException { // Before we write this, and cause it to generate its string, // we need to add all the string literals that we are going to match // outputFileST.add("literals", strings); String fileName = generator.getRecognizerFileName(grammar.name, grammar.type); generator.write(outputFileST, fileName); } @Override protected void genRecognizerHeaderFile(Tool tool, CodeGenerator generator, Grammar grammar, ST headerFileST, String extName) throws IOException { // Pick up the file name we are generating. This method will return a // a file suffixed with .c, so we must substring and add the extName // to it as we cannot assign into strings in Java. /// String fileName = generator.getRecognizerFileName(grammar.name, grammar.type); fileName = fileName.substring(0, fileName.length() - 2) + extName; generator.write(headerFileST, fileName); } protected ST chooseWhereCyclicDFAsGo(Tool tool, CodeGenerator generator, Grammar grammar, ST recognizerST, ST cyclicDFAST) { return recognizerST; } /** Is scope in @scope::name {action} valid for this kind of grammar? * Targets like C++ may want to allow new scopes like headerfile or * some such. The action names themselves are not policed at the * moment so targets can add template actions w/o having to recompile * ANTLR. */ @Override public boolean isValidActionScope(int grammarType, String scope) { switch (grammarType) { case Grammar.LEXER: if (scope.equals("lexer")) { return true; } if (scope.equals("header")) { return true; } if (scope.equals("includes")) { return true; } if (scope.equals("preincludes")) { return true; } if (scope.equals("overrides")) { return true; } break; case Grammar.PARSER: if (scope.equals("parser")) { return true; } if (scope.equals("header")) { return true; } if (scope.equals("includes")) { return true; } if (scope.equals("preincludes")) { return true; } if (scope.equals("overrides")) { return true; } break; case Grammar.COMBINED: if (scope.equals("parser")) { return true; } if (scope.equals("lexer")) { return true; } if (scope.equals("header")) { return true; } if (scope.equals("includes")) { return true; } if (scope.equals("preincludes")) { return true; } if (scope.equals("overrides")) { return true; } break; case Grammar.TREE_PARSER: if (scope.equals("treeparser")) { return true; } if (scope.equals("header")) { return true; } if (scope.equals("includes")) { return true; } if (scope.equals("preincludes")) { return true; } if (scope.equals("overrides")) { return true; } break; } return false; } @Override public String getTargetCharLiteralFromANTLRCharLiteral( CodeGenerator generator, String literal) { if (literal.startsWith("'\\u")) { literal = "0x" + literal.substring(3, 7); } else { int c = literal.charAt(1); if (c < 32 || c > 127) { literal = "0x" + Integer.toHexString(c); } } return literal; } /** Convert from an ANTLR string literal found in a grammar file to * an equivalent string literal in the C target. * Because we must support Unicode character sets and have chosen * to have the lexer match UTF32 characters, then we must encode * string matches to use 32 bit character arrays. Here then we * must produce the C array and cater for the case where the * lexer has been encoded with a string such as 'xyz\n', */ @Override public String getTargetStringLiteralFromANTLRStringLiteral( CodeGenerator generator, String literal) { int index; String bytes; StringBuilder buf = new StringBuilder(); buf.append("{ "); // We need ot lose any escaped characters of the form \x and just // replace them with their actual values as well as lose the surrounding // quote marks. // for (int i = 1; i < literal.length() - 1; i++) { buf.append("0x"); if (literal.charAt(i) == '\\') { i++; // Assume that there is a next character, this will just yield // invalid strings if not, which is what the input would be of course - invalid switch (literal.charAt(i)) { case 'u': case 'U': buf.append(literal.substring(i + 1, i + 5)); // Already a hex string i = i + 5; // Move to next string/char/escape break; case 'n': case 'N': buf.append("0A"); break; case 'r': case 'R': buf.append("0D"); break; case 't': case 'T': buf.append("09"); break; case 'b': case 'B': buf.append("08"); break; case 'f': case 'F': buf.append("0C"); break; default: // Anything else is what it is! // buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase()); break; } } else { buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase()); } buf.append(", "); } buf.append(" ANTLR3_STRING_TERMINATOR}"); bytes = buf.toString(); index = strings.indexOf(bytes); if (index == -1) { strings.add(bytes); index = strings.indexOf(bytes); } String strref = "lit_" + String.valueOf(index + 1); return strref; } /** * Overrides the standard grammar analysis so we can prepare the analyser * a little differently from the other targets. * * In particular we want to influence the way the code generator makes assumptions about * switchs vs ifs, vs table driven DFAs. In general, C code should be generated that * has the minimum use of tables, and tha meximum use of large switch statements. This * allows the optimizers to generate very efficient code, it can reduce object code size * by about 30% and give about a 20% performance improvement over not doing this. Hence, * for the C target only, we change the defaults here, but only if they are still set to the * defaults. * * @param generator An instance of the generic code generator class. * @param grammar The grammar that we are currently analyzing */ @Override protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) { // Check to see if the maximum inline DFA states is still set to // the default size. If it is then whack it all the way up to the maximum that // we can sensibly get away with. // if (CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE == CodeGenerator.MADSI_DEFAULT ) { CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE = 65535; } // Check to see if the maximum switch size is still set to the default // and bring it up much higher if it is. Modern C compilers can handle // much bigger switch statements than say Java can and if anyone finds a compiler // that cannot deal with such big switches, all the need do is generate the // code with a reduced -Xmaxswitchcaselabels nnn // if (CodeGenerator.MAX_SWITCH_CASE_LABELS == CodeGenerator.MSCL_DEFAULT) { CodeGenerator.MAX_SWITCH_CASE_LABELS = 3000; } // Check to see if the number of transitions considered a miminum for using // a switch is still at the default. Because a switch is still generally faster than // an if even with small sets, and given that the optimizer will do the best thing with it // anyway, then we simply want to generate a switch for any number of states. // if (CodeGenerator.MIN_SWITCH_ALTS == CodeGenerator.MSA_DEFAULT) { CodeGenerator.MIN_SWITCH_ALTS = 1; } // Now we allow the superclass implementation to do whatever it feels it // must do. // super.performGrammarAnalysis(generator, grammar); } }