1 #include <cctype>
2 #include <cstdio>
3 #include <cstdlib>
4 #include <map>
5 #include <string>
6 #include <vector>
7 
8 //===----------------------------------------------------------------------===//
9 // Lexer
10 //===----------------------------------------------------------------------===//
11 
12 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
13 // of these for known things.
14 enum Token {
15   tok_eof = -1,
16 
17   // commands
18   tok_def = -2, tok_extern = -3,
19 
20   // primary
21   tok_identifier = -4, tok_number = -5
22 };
23 
24 static std::string IdentifierStr;  // Filled in if tok_identifier
25 static double NumVal;              // Filled in if tok_number
26 
27 /// gettok - Return the next token from standard input.
gettok()28 static int gettok() {
29   static int LastChar = ' ';
30 
31   // Skip any whitespace.
32   while (isspace(LastChar))
33     LastChar = getchar();
34 
35   if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
36     IdentifierStr = LastChar;
37     while (isalnum((LastChar = getchar())))
38       IdentifierStr += LastChar;
39 
40     if (IdentifierStr == "def") return tok_def;
41     if (IdentifierStr == "extern") return tok_extern;
42     return tok_identifier;
43   }
44 
45   if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
46     std::string NumStr;
47     do {
48       NumStr += LastChar;
49       LastChar = getchar();
50     } while (isdigit(LastChar) || LastChar == '.');
51 
52     NumVal = strtod(NumStr.c_str(), 0);
53     return tok_number;
54   }
55 
56   if (LastChar == '#') {
57     // Comment until end of line.
58     do LastChar = getchar();
59     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
60 
61     if (LastChar != EOF)
62       return gettok();
63   }
64 
65   // Check for end of file.  Don't eat the EOF.
66   if (LastChar == EOF)
67     return tok_eof;
68 
69   // Otherwise, just return the character as its ascii value.
70   int ThisChar = LastChar;
71   LastChar = getchar();
72   return ThisChar;
73 }
74 
75 //===----------------------------------------------------------------------===//
76 // Abstract Syntax Tree (aka Parse Tree)
77 //===----------------------------------------------------------------------===//
78 namespace {
79 /// ExprAST - Base class for all expression nodes.
80 class ExprAST {
81 public:
~ExprAST()82   virtual ~ExprAST() {}
83 };
84 
85 /// NumberExprAST - Expression class for numeric literals like "1.0".
86 class NumberExprAST : public ExprAST {
87 public:
NumberExprAST(double val)88   NumberExprAST(double val) {}
89 };
90 
91 /// VariableExprAST - Expression class for referencing a variable, like "a".
92 class VariableExprAST : public ExprAST {
93   std::string Name;
94 public:
VariableExprAST(const std::string & name)95   VariableExprAST(const std::string &name) : Name(name) {}
96 };
97 
98 /// BinaryExprAST - Expression class for a binary operator.
99 class BinaryExprAST : public ExprAST {
100 public:
BinaryExprAST(char op,ExprAST * lhs,ExprAST * rhs)101   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) {}
102 };
103 
104 /// CallExprAST - Expression class for function calls.
105 class CallExprAST : public ExprAST {
106   std::string Callee;
107   std::vector<ExprAST*> Args;
108 public:
CallExprAST(const std::string & callee,std::vector<ExprAST * > & args)109   CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
110     : Callee(callee), Args(args) {}
111 };
112 
113 /// PrototypeAST - This class represents the "prototype" for a function,
114 /// which captures its name, and its argument names (thus implicitly the number
115 /// of arguments the function takes).
116 class PrototypeAST {
117   std::string Name;
118   std::vector<std::string> Args;
119 public:
PrototypeAST(const std::string & name,const std::vector<std::string> & args)120   PrototypeAST(const std::string &name, const std::vector<std::string> &args)
121     : Name(name), Args(args) {}
122 
123 };
124 
125 /// FunctionAST - This class represents a function definition itself.
126 class FunctionAST {
127 public:
FunctionAST(PrototypeAST * proto,ExprAST * body)128   FunctionAST(PrototypeAST *proto, ExprAST *body) {}
129 };
130 } // end anonymous namespace
131 
132 //===----------------------------------------------------------------------===//
133 // Parser
134 //===----------------------------------------------------------------------===//
135 
136 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
137 /// token the parser is looking at.  getNextToken reads another token from the
138 /// lexer and updates CurTok with its results.
139 static int CurTok;
getNextToken()140 static int getNextToken() {
141   return CurTok = gettok();
142 }
143 
144 /// BinopPrecedence - This holds the precedence for each binary operator that is
145 /// defined.
146 static std::map<char, int> BinopPrecedence;
147 
148 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
GetTokPrecedence()149 static int GetTokPrecedence() {
150   if (!isascii(CurTok))
151     return -1;
152 
153   // Make sure it's a declared binop.
154   int TokPrec = BinopPrecedence[CurTok];
155   if (TokPrec <= 0) return -1;
156   return TokPrec;
157 }
158 
159 /// Error* - These are little helper functions for error handling.
Error(const char * Str)160 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
ErrorP(const char * Str)161 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
162 
163 static ExprAST *ParseExpression();
164 
165 /// identifierexpr
166 ///   ::= identifier
167 ///   ::= identifier '(' expression* ')'
ParseIdentifierExpr()168 static ExprAST *ParseIdentifierExpr() {
169   std::string IdName = IdentifierStr;
170 
171   getNextToken();  // eat identifier.
172 
173   if (CurTok != '(') // Simple variable ref.
174     return new VariableExprAST(IdName);
175 
176   // Call.
177   getNextToken();  // eat (
178   std::vector<ExprAST*> Args;
179   if (CurTok != ')') {
180     while (1) {
181       ExprAST *Arg = ParseExpression();
182       if (!Arg) return 0;
183       Args.push_back(Arg);
184 
185       if (CurTok == ')') break;
186 
187       if (CurTok != ',')
188         return Error("Expected ')' or ',' in argument list");
189       getNextToken();
190     }
191   }
192 
193   // Eat the ')'.
194   getNextToken();
195 
196   return new CallExprAST(IdName, Args);
197 }
198 
199 /// numberexpr ::= number
ParseNumberExpr()200 static ExprAST *ParseNumberExpr() {
201   ExprAST *Result = new NumberExprAST(NumVal);
202   getNextToken(); // consume the number
203   return Result;
204 }
205 
206 /// parenexpr ::= '(' expression ')'
ParseParenExpr()207 static ExprAST *ParseParenExpr() {
208   getNextToken();  // eat (.
209   ExprAST *V = ParseExpression();
210   if (!V) return 0;
211 
212   if (CurTok != ')')
213     return Error("expected ')'");
214   getNextToken();  // eat ).
215   return V;
216 }
217 
218 /// primary
219 ///   ::= identifierexpr
220 ///   ::= numberexpr
221 ///   ::= parenexpr
ParsePrimary()222 static ExprAST *ParsePrimary() {
223   switch (CurTok) {
224   default: return Error("unknown token when expecting an expression");
225   case tok_identifier: return ParseIdentifierExpr();
226   case tok_number:     return ParseNumberExpr();
227   case '(':            return ParseParenExpr();
228   }
229 }
230 
231 /// binoprhs
232 ///   ::= ('+' primary)*
ParseBinOpRHS(int ExprPrec,ExprAST * LHS)233 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
234   // If this is a binop, find its precedence.
235   while (1) {
236     int TokPrec = GetTokPrecedence();
237 
238     // If this is a binop that binds at least as tightly as the current binop,
239     // consume it, otherwise we are done.
240     if (TokPrec < ExprPrec)
241       return LHS;
242 
243     // Okay, we know this is a binop.
244     int BinOp = CurTok;
245     getNextToken();  // eat binop
246 
247     // Parse the primary expression after the binary operator.
248     ExprAST *RHS = ParsePrimary();
249     if (!RHS) return 0;
250 
251     // If BinOp binds less tightly with RHS than the operator after RHS, let
252     // the pending operator take RHS as its LHS.
253     int NextPrec = GetTokPrecedence();
254     if (TokPrec < NextPrec) {
255       RHS = ParseBinOpRHS(TokPrec+1, RHS);
256       if (RHS == 0) return 0;
257     }
258 
259     // Merge LHS/RHS.
260     LHS = new BinaryExprAST(BinOp, LHS, RHS);
261   }
262 }
263 
264 /// expression
265 ///   ::= primary binoprhs
266 ///
ParseExpression()267 static ExprAST *ParseExpression() {
268   ExprAST *LHS = ParsePrimary();
269   if (!LHS) return 0;
270 
271   return ParseBinOpRHS(0, LHS);
272 }
273 
274 /// prototype
275 ///   ::= id '(' id* ')'
ParsePrototype()276 static PrototypeAST *ParsePrototype() {
277   if (CurTok != tok_identifier)
278     return ErrorP("Expected function name in prototype");
279 
280   std::string FnName = IdentifierStr;
281   getNextToken();
282 
283   if (CurTok != '(')
284     return ErrorP("Expected '(' in prototype");
285 
286   std::vector<std::string> ArgNames;
287   while (getNextToken() == tok_identifier)
288     ArgNames.push_back(IdentifierStr);
289   if (CurTok != ')')
290     return ErrorP("Expected ')' in prototype");
291 
292   // success.
293   getNextToken();  // eat ')'.
294 
295   return new PrototypeAST(FnName, ArgNames);
296 }
297 
298 /// definition ::= 'def' prototype expression
ParseDefinition()299 static FunctionAST *ParseDefinition() {
300   getNextToken();  // eat def.
301   PrototypeAST *Proto = ParsePrototype();
302   if (Proto == 0) return 0;
303 
304   if (ExprAST *E = ParseExpression())
305     return new FunctionAST(Proto, E);
306   return 0;
307 }
308 
309 /// toplevelexpr ::= expression
ParseTopLevelExpr()310 static FunctionAST *ParseTopLevelExpr() {
311   if (ExprAST *E = ParseExpression()) {
312     // Make an anonymous proto.
313     PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
314     return new FunctionAST(Proto, E);
315   }
316   return 0;
317 }
318 
319 /// external ::= 'extern' prototype
ParseExtern()320 static PrototypeAST *ParseExtern() {
321   getNextToken();  // eat extern.
322   return ParsePrototype();
323 }
324 
325 //===----------------------------------------------------------------------===//
326 // Top-Level parsing
327 //===----------------------------------------------------------------------===//
328 
HandleDefinition()329 static void HandleDefinition() {
330   if (ParseDefinition()) {
331     fprintf(stderr, "Parsed a function definition.\n");
332   } else {
333     // Skip token for error recovery.
334     getNextToken();
335   }
336 }
337 
HandleExtern()338 static void HandleExtern() {
339   if (ParseExtern()) {
340     fprintf(stderr, "Parsed an extern\n");
341   } else {
342     // Skip token for error recovery.
343     getNextToken();
344   }
345 }
346 
HandleTopLevelExpression()347 static void HandleTopLevelExpression() {
348   // Evaluate a top-level expression into an anonymous function.
349   if (ParseTopLevelExpr()) {
350     fprintf(stderr, "Parsed a top-level expr\n");
351   } else {
352     // Skip token for error recovery.
353     getNextToken();
354   }
355 }
356 
357 /// top ::= definition | external | expression | ';'
MainLoop()358 static void MainLoop() {
359   while (1) {
360     fprintf(stderr, "ready> ");
361     switch (CurTok) {
362     case tok_eof:    return;
363     case ';':        getNextToken(); break;  // ignore top-level semicolons.
364     case tok_def:    HandleDefinition(); break;
365     case tok_extern: HandleExtern(); break;
366     default:         HandleTopLevelExpression(); break;
367     }
368   }
369 }
370 
371 //===----------------------------------------------------------------------===//
372 // Main driver code.
373 //===----------------------------------------------------------------------===//
374 
main()375 int main() {
376   // Install standard binary operators.
377   // 1 is lowest precedence.
378   BinopPrecedence['<'] = 10;
379   BinopPrecedence['+'] = 20;
380   BinopPrecedence['-'] = 20;
381   BinopPrecedence['*'] = 40;  // highest.
382 
383   // Prime the first token.
384   fprintf(stderr, "ready> ");
385   getNextToken();
386 
387   // Run the main "interpreter loop" now.
388   MainLoop();
389 
390   return 0;
391 }
392