1 /* 2 * Copyright 2017 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "NFAtoDFA.h" 9 #include "RegexParser.h" 10 11 #include <fstream> 12 #include <sstream> 13 #include <string> 14 15 /** 16 * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex 17 * file is a text file with one token definition per line. Each line is of the form: 18 * <TOKEN_NAME> = <pattern> 19 * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string. 20 */ 21 22 static constexpr const char* HEADER = 23 "/*\n" 24 " * Copyright 2017 Google Inc.\n" 25 " *\n" 26 " * Use of this source code is governed by a BSD-style license that can be\n" 27 " * found in the LICENSE file.\n" 28 " */\n" 29 "/*****************************************************************************************\n" 30 " ******************** This file was generated by sksllex. Do not edit. *******************\n" 31 " *****************************************************************************************/\n"; 32 33 void writeH(const DFA& dfa, const char* lexer, const char* token, 34 const std::vector<std::string>& tokens, const char* hPath) { 35 std::ofstream out(hPath); 36 SkASSERT(out.good()); 37 out << HEADER; 38 out << "#ifndef SKSL_" << lexer << "\n"; 39 out << "#define SKSL_" << lexer << "\n"; 40 out << "#include <cstddef>\n"; 41 out << "#include <cstdint>\n"; 42 out << "namespace SkSL {\n"; 43 out << "\n"; 44 out << "struct " << token << " {\n"; 45 out << " enum Kind {\n"; 46 for (const std::string& t : tokens) { 47 out << " #undef " << t << "\n"; 48 out << " " << t << ",\n"; 49 } 50 out << " };\n"; 51 out << "\n"; 52 out << " " << token << "()\n"; 53 out << " : fKind(Kind::INVALID)\n"; 54 out << " , fOffset(-1)\n"; 55 out << " , fLength(-1) {}\n"; 56 out << "\n"; 57 out << " " << token << "(Kind kind, int32_t offset, int32_t length)\n"; 58 out << " : fKind(kind)\n"; 59 out << " , fOffset(offset)\n"; 60 out << " , fLength(length) {}\n"; 61 out << "\n"; 62 out << " Kind fKind;\n"; 63 out << " int fOffset;\n"; 64 out << " int fLength;\n"; 65 out << "};\n"; 66 out << "\n"; 67 out << "class " << lexer << " {\n"; 68 out << "public:\n"; 69 out << " void start(const char* text, int32_t length) {\n"; 70 out << " fText = text;\n"; 71 out << " fLength = length;\n"; 72 out << " fOffset = 0;\n"; 73 out << " }\n"; 74 out << "\n"; 75 out << " " << token << " next();\n"; 76 out << "\n"; 77 out << "private:\n"; 78 out << " const char* fText;\n"; 79 out << " int32_t fLength;\n"; 80 out << " int32_t fOffset;\n"; 81 out << "};\n"; 82 out << "\n"; 83 out << "} // namespace\n"; 84 out << "#endif\n"; 85 } 86 87 void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include, 88 const char* cppPath) { 89 std::ofstream out(cppPath); 90 SkASSERT(out.good()); 91 out << HEADER; 92 out << "#include \"" << include << "\"\n"; 93 out << "\n"; 94 out << "namespace SkSL {\n"; 95 out << "\n"; 96 97 size_t states = 0; 98 for (const auto& row : dfa.fTransitions) { 99 states = std::max(states, row.size()); 100 } 101 out << "static int8_t mappings[" << dfa.fCharMappings.size() << "] = {\n "; 102 const char* separator = ""; 103 for (int m : dfa.fCharMappings) { 104 out << separator << std::to_string(m); 105 separator = ", "; 106 } 107 out << "\n};\n"; 108 out << "static int16_t transitions[" << dfa.fTransitions.size() << "][" << states << "] = {\n"; 109 for (size_t c = 0; c < dfa.fTransitions.size(); ++c) { 110 out << " {"; 111 for (size_t j = 0; j < states; ++j) { 112 if ((size_t) c < dfa.fTransitions.size() && j < dfa.fTransitions[c].size()) { 113 out << " " << dfa.fTransitions[c][j] << ","; 114 } else { 115 out << " 0,"; 116 } 117 } 118 out << " },\n"; 119 } 120 out << "};\n"; 121 out << "\n"; 122 123 out << "static int8_t accepts[" << states << "] = {"; 124 for (size_t i = 0; i < states; ++i) { 125 if (i < dfa.fAccepts.size()) { 126 out << " " << dfa.fAccepts[i] << ","; 127 } else { 128 out << " " << INVALID << ","; 129 } 130 } 131 out << " };\n"; 132 out << "\n"; 133 134 out << token << " " << lexer << "::next() {\n"; 135 out << " // note that we cheat here: normally a lexer needs to worry about the case\n"; 136 out << " // where a token has a prefix which is not itself a valid token - for instance, \n"; 137 out << " // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid\n"; 138 out << " // tokens. Our grammar doesn't have this property, so we can simplify the logic\n"; 139 out << " // a bit.\n"; 140 out << " int32_t startOffset = fOffset;\n"; 141 out << " if (startOffset == fLength) {\n"; 142 out << " return " << token << "(" << token << "::END_OF_FILE, startOffset, 0);\n"; 143 out << " }\n"; 144 out << " int16_t state = 1;\n"; 145 out << " while (fOffset < fLength) {\n"; 146 out << " if ((uint8_t) fText[fOffset] >= " << dfa.fCharMappings.size() << ") {"; 147 out << " ++fOffset;\n"; 148 out << " break;"; 149 out << " }"; 150 out << " int16_t newState = transitions[mappings[(int) fText[fOffset]]][state];\n"; 151 out << " if (!newState) {\n"; 152 out << " break;\n"; 153 out << " }\n"; 154 out << " state = newState;"; 155 out << " ++fOffset;\n"; 156 out << " }\n"; 157 out << " Token::Kind kind = (" << token << "::Kind) accepts[state];\n"; 158 out << " return " << token << "(kind, startOffset, fOffset - startOffset);\n"; 159 out << "}\n"; 160 out << "\n"; 161 out << "} // namespace\n"; 162 } 163 164 void process(const char* inPath, const char* lexer, const char* token, const char* hPath, 165 const char* cppPath) { 166 NFA nfa; 167 std::vector<std::string> tokens; 168 tokens.push_back("END_OF_FILE"); 169 std::string line; 170 std::ifstream in(inPath); 171 while (std::getline(in, line)) { 172 std::istringstream split(line); 173 std::string name, delimiter, pattern; 174 if (split >> name >> delimiter >> pattern) { 175 SkASSERT(split.eof()); 176 SkASSERT(name != ""); 177 SkASSERT(delimiter == "="); 178 SkASSERT(pattern != ""); 179 tokens.push_back(name); 180 if (pattern[0] == '"') { 181 SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"'); 182 RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]); 183 for (size_t i = 2; i < pattern.size() - 1; ++i) { 184 node = RegexNode(RegexNode::kConcat_Kind, node, 185 RegexNode(RegexNode::kChar_Kind, pattern[i])); 186 } 187 nfa.addRegex(node); 188 } 189 else { 190 nfa.addRegex(RegexParser().parse(pattern)); 191 } 192 } 193 } 194 NFAtoDFA converter(&nfa); 195 DFA dfa = converter.convert(); 196 writeH(dfa, lexer, token, tokens, hPath); 197 writeCPP(dfa, lexer, token, (std::string("SkSL") + lexer + ".h").c_str(), cppPath); 198 } 199 200 int main(int argc, const char** argv) { 201 if (argc != 6) { 202 printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n"); 203 exit(1); 204 } 205 process(argv[1], argv[2], argv[3], argv[4], argv[5]); 206 return 0; 207 } 208