1 #define MINIMAL_STDERR_OUTPUT
2 
3 #include "llvm/Analysis/Passes.h"
4 #include "llvm/ExecutionEngine/ExecutionEngine.h"
5 #include "llvm/IR/DataLayout.h"
6 #include "llvm/IR/DerivedTypes.h"
7 #include "llvm/IR/IRBuilder.h"
8 #include "llvm/IR/LLVMContext.h"
9 #include "llvm/IR/LegacyPassManager.h"
10 #include "llvm/IR/Module.h"
11 #include "llvm/IR/Verifier.h"
12 #include "llvm/Support/TargetSelect.h"
13 #include "llvm/Transforms/Scalar.h"
14 #include <cctype>
15 #include <cstdio>
16 #include <map>
17 #include <string>
18 #include <vector>
19 
20 using namespace llvm;
21 
22 //===----------------------------------------------------------------------===//
23 // Lexer
24 //===----------------------------------------------------------------------===//
25 
26 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
27 // of these for known things.
28 enum Token {
29   tok_eof = -1,
30 
31   // commands
32   tok_def = -2, tok_extern = -3,
33 
34   // primary
35   tok_identifier = -4, tok_number = -5,
36 
37   // control
38   tok_if = -6, tok_then = -7, tok_else = -8,
39   tok_for = -9, tok_in = -10,
40 
41   // operators
42   tok_binary = -11, tok_unary = -12,
43 
44   // var definition
45   tok_var = -13
46 };
47 
48 static std::string IdentifierStr;  // Filled in if tok_identifier
49 static double NumVal;              // Filled in if tok_number
50 
51 /// gettok - Return the next token from standard input.
gettok()52 static int gettok() {
53   static int LastChar = ' ';
54 
55   // Skip any whitespace.
56   while (isspace(LastChar))
57     LastChar = getchar();
58 
59   if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
60     IdentifierStr = LastChar;
61     while (isalnum((LastChar = getchar())))
62       IdentifierStr += LastChar;
63 
64     if (IdentifierStr == "def") return tok_def;
65     if (IdentifierStr == "extern") return tok_extern;
66     if (IdentifierStr == "if") return tok_if;
67     if (IdentifierStr == "then") return tok_then;
68     if (IdentifierStr == "else") return tok_else;
69     if (IdentifierStr == "for") return tok_for;
70     if (IdentifierStr == "in") return tok_in;
71     if (IdentifierStr == "binary") return tok_binary;
72     if (IdentifierStr == "unary") return tok_unary;
73     if (IdentifierStr == "var") return tok_var;
74     return tok_identifier;
75   }
76 
77   if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
78     std::string NumStr;
79     do {
80       NumStr += LastChar;
81       LastChar = getchar();
82     } while (isdigit(LastChar) || LastChar == '.');
83 
84     NumVal = strtod(NumStr.c_str(), 0);
85     return tok_number;
86   }
87 
88   if (LastChar == '#') {
89     // Comment until end of line.
90     do LastChar = getchar();
91     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
92 
93     if (LastChar != EOF)
94       return gettok();
95   }
96 
97   // Check for end of file.  Don't eat the EOF.
98   if (LastChar == EOF)
99     return tok_eof;
100 
101   // Otherwise, just return the character as its ascii value.
102   int ThisChar = LastChar;
103   LastChar = getchar();
104   return ThisChar;
105 }
106 
107 //===----------------------------------------------------------------------===//
108 // Abstract Syntax Tree (aka Parse Tree)
109 //===----------------------------------------------------------------------===//
110 
111 /// ExprAST - Base class for all expression nodes.
112 class ExprAST {
113 public:
~ExprAST()114   virtual ~ExprAST() {}
115   virtual Value *Codegen() = 0;
116 };
117 
118 /// NumberExprAST - Expression class for numeric literals like "1.0".
119 class NumberExprAST : public ExprAST {
120   double Val;
121 public:
NumberExprAST(double val)122   NumberExprAST(double val) : Val(val) {}
123   virtual Value *Codegen();
124 };
125 
126 /// VariableExprAST - Expression class for referencing a variable, like "a".
127 class VariableExprAST : public ExprAST {
128   std::string Name;
129 public:
VariableExprAST(const std::string & name)130   VariableExprAST(const std::string &name) : Name(name) {}
getName() const131   const std::string &getName() const { return Name; }
132   virtual Value *Codegen();
133 };
134 
135 /// UnaryExprAST - Expression class for a unary operator.
136 class UnaryExprAST : public ExprAST {
137   char Opcode;
138   ExprAST *Operand;
139 public:
UnaryExprAST(char opcode,ExprAST * operand)140   UnaryExprAST(char opcode, ExprAST *operand)
141     : Opcode(opcode), Operand(operand) {}
142   virtual Value *Codegen();
143 };
144 
145 /// BinaryExprAST - Expression class for a binary operator.
146 class BinaryExprAST : public ExprAST {
147   char Op;
148   ExprAST *LHS, *RHS;
149 public:
BinaryExprAST(char op,ExprAST * lhs,ExprAST * rhs)150   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
151     : Op(op), LHS(lhs), RHS(rhs) {}
152   virtual Value *Codegen();
153 };
154 
155 /// CallExprAST - Expression class for function calls.
156 class CallExprAST : public ExprAST {
157   std::string Callee;
158   std::vector<ExprAST*> Args;
159 public:
CallExprAST(const std::string & callee,std::vector<ExprAST * > & args)160   CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
161     : Callee(callee), Args(args) {}
162   virtual Value *Codegen();
163 };
164 
165 /// IfExprAST - Expression class for if/then/else.
166 class IfExprAST : public ExprAST {
167   ExprAST *Cond, *Then, *Else;
168 public:
IfExprAST(ExprAST * cond,ExprAST * then,ExprAST * _else)169   IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
170   : Cond(cond), Then(then), Else(_else) {}
171   virtual Value *Codegen();
172 };
173 
174 /// ForExprAST - Expression class for for/in.
175 class ForExprAST : public ExprAST {
176   std::string VarName;
177   ExprAST *Start, *End, *Step, *Body;
178 public:
ForExprAST(const std::string & varname,ExprAST * start,ExprAST * end,ExprAST * step,ExprAST * body)179   ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
180              ExprAST *step, ExprAST *body)
181     : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
182   virtual Value *Codegen();
183 };
184 
185 /// VarExprAST - Expression class for var/in
186 class VarExprAST : public ExprAST {
187   std::vector<std::pair<std::string, ExprAST*> > VarNames;
188   ExprAST *Body;
189 public:
VarExprAST(const std::vector<std::pair<std::string,ExprAST * >> & varnames,ExprAST * body)190   VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
191              ExprAST *body)
192   : VarNames(varnames), Body(body) {}
193 
194   virtual Value *Codegen();
195 };
196 
197 /// PrototypeAST - This class represents the "prototype" for a function,
198 /// which captures its argument names as well as if it is an operator.
199 class PrototypeAST {
200   std::string Name;
201   std::vector<std::string> Args;
202   bool isOperator;
203   unsigned Precedence;  // Precedence if a binary op.
204 public:
PrototypeAST(const std::string & name,const std::vector<std::string> & args,bool isoperator=false,unsigned prec=0)205   PrototypeAST(const std::string &name, const std::vector<std::string> &args,
206                bool isoperator = false, unsigned prec = 0)
207   : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
208 
isUnaryOp() const209   bool isUnaryOp() const { return isOperator && Args.size() == 1; }
isBinaryOp() const210   bool isBinaryOp() const { return isOperator && Args.size() == 2; }
211 
getOperatorName() const212   char getOperatorName() const {
213     assert(isUnaryOp() || isBinaryOp());
214     return Name[Name.size()-1];
215   }
216 
getBinaryPrecedence() const217   unsigned getBinaryPrecedence() const { return Precedence; }
218 
219   Function *Codegen();
220 
221   void CreateArgumentAllocas(Function *F);
222 };
223 
224 /// FunctionAST - This class represents a function definition itself.
225 class FunctionAST {
226   PrototypeAST *Proto;
227   ExprAST *Body;
228 public:
FunctionAST(PrototypeAST * proto,ExprAST * body)229   FunctionAST(PrototypeAST *proto, ExprAST *body)
230     : Proto(proto), Body(body) {}
231 
232   Function *Codegen();
233 };
234 
235 //===----------------------------------------------------------------------===//
236 // Parser
237 //===----------------------------------------------------------------------===//
238 
239 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
240 /// token the parser is looking at.  getNextToken reads another token from the
241 /// lexer and updates CurTok with its results.
242 static int CurTok;
getNextToken()243 static int getNextToken() {
244   return CurTok = gettok();
245 }
246 
247 /// BinopPrecedence - This holds the precedence for each binary operator that is
248 /// defined.
249 static std::map<char, int> BinopPrecedence;
250 
251 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
GetTokPrecedence()252 static int GetTokPrecedence() {
253   if (!isascii(CurTok))
254     return -1;
255 
256   // Make sure it's a declared binop.
257   int TokPrec = BinopPrecedence[CurTok];
258   if (TokPrec <= 0) return -1;
259   return TokPrec;
260 }
261 
262 /// Error* - These are little helper functions for error handling.
Error(const char * Str)263 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
ErrorP(const char * Str)264 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
ErrorF(const char * Str)265 FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
266 
267 static ExprAST *ParseExpression();
268 
269 /// identifierexpr
270 ///   ::= identifier
271 ///   ::= identifier '(' expression* ')'
ParseIdentifierExpr()272 static ExprAST *ParseIdentifierExpr() {
273   std::string IdName = IdentifierStr;
274 
275   getNextToken();  // eat identifier.
276 
277   if (CurTok != '(') // Simple variable ref.
278     return new VariableExprAST(IdName);
279 
280   // Call.
281   getNextToken();  // eat (
282   std::vector<ExprAST*> Args;
283   if (CurTok != ')') {
284     while (1) {
285       ExprAST *Arg = ParseExpression();
286       if (!Arg) return 0;
287       Args.push_back(Arg);
288 
289       if (CurTok == ')') break;
290 
291       if (CurTok != ',')
292         return Error("Expected ')' or ',' in argument list");
293       getNextToken();
294     }
295   }
296 
297   // Eat the ')'.
298   getNextToken();
299 
300   return new CallExprAST(IdName, Args);
301 }
302 
303 /// numberexpr ::= number
ParseNumberExpr()304 static ExprAST *ParseNumberExpr() {
305   ExprAST *Result = new NumberExprAST(NumVal);
306   getNextToken(); // consume the number
307   return Result;
308 }
309 
310 /// parenexpr ::= '(' expression ')'
ParseParenExpr()311 static ExprAST *ParseParenExpr() {
312   getNextToken();  // eat (.
313   ExprAST *V = ParseExpression();
314   if (!V) return 0;
315 
316   if (CurTok != ')')
317     return Error("expected ')'");
318   getNextToken();  // eat ).
319   return V;
320 }
321 
322 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
ParseIfExpr()323 static ExprAST *ParseIfExpr() {
324   getNextToken();  // eat the if.
325 
326   // condition.
327   ExprAST *Cond = ParseExpression();
328   if (!Cond) return 0;
329 
330   if (CurTok != tok_then)
331     return Error("expected then");
332   getNextToken();  // eat the then
333 
334   ExprAST *Then = ParseExpression();
335   if (Then == 0) return 0;
336 
337   if (CurTok != tok_else)
338     return Error("expected else");
339 
340   getNextToken();
341 
342   ExprAST *Else = ParseExpression();
343   if (!Else) return 0;
344 
345   return new IfExprAST(Cond, Then, Else);
346 }
347 
348 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
ParseForExpr()349 static ExprAST *ParseForExpr() {
350   getNextToken();  // eat the for.
351 
352   if (CurTok != tok_identifier)
353     return Error("expected identifier after for");
354 
355   std::string IdName = IdentifierStr;
356   getNextToken();  // eat identifier.
357 
358   if (CurTok != '=')
359     return Error("expected '=' after for");
360   getNextToken();  // eat '='.
361 
362 
363   ExprAST *Start = ParseExpression();
364   if (Start == 0) return 0;
365   if (CurTok != ',')
366     return Error("expected ',' after for start value");
367   getNextToken();
368 
369   ExprAST *End = ParseExpression();
370   if (End == 0) return 0;
371 
372   // The step value is optional.
373   ExprAST *Step = 0;
374   if (CurTok == ',') {
375     getNextToken();
376     Step = ParseExpression();
377     if (Step == 0) return 0;
378   }
379 
380   if (CurTok != tok_in)
381     return Error("expected 'in' after for");
382   getNextToken();  // eat 'in'.
383 
384   ExprAST *Body = ParseExpression();
385   if (Body == 0) return 0;
386 
387   return new ForExprAST(IdName, Start, End, Step, Body);
388 }
389 
390 /// varexpr ::= 'var' identifier ('=' expression)?
391 //                    (',' identifier ('=' expression)?)* 'in' expression
ParseVarExpr()392 static ExprAST *ParseVarExpr() {
393   getNextToken();  // eat the var.
394 
395   std::vector<std::pair<std::string, ExprAST*> > VarNames;
396 
397   // At least one variable name is required.
398   if (CurTok != tok_identifier)
399     return Error("expected identifier after var");
400 
401   while (1) {
402     std::string Name = IdentifierStr;
403     getNextToken();  // eat identifier.
404 
405     // Read the optional initializer.
406     ExprAST *Init = 0;
407     if (CurTok == '=') {
408       getNextToken(); // eat the '='.
409 
410       Init = ParseExpression();
411       if (Init == 0) return 0;
412     }
413 
414     VarNames.push_back(std::make_pair(Name, Init));
415 
416     // End of var list, exit loop.
417     if (CurTok != ',') break;
418     getNextToken(); // eat the ','.
419 
420     if (CurTok != tok_identifier)
421       return Error("expected identifier list after var");
422   }
423 
424   // At this point, we have to have 'in'.
425   if (CurTok != tok_in)
426     return Error("expected 'in' keyword after 'var'");
427   getNextToken();  // eat 'in'.
428 
429   ExprAST *Body = ParseExpression();
430   if (Body == 0) return 0;
431 
432   return new VarExprAST(VarNames, Body);
433 }
434 
435 /// primary
436 ///   ::= identifierexpr
437 ///   ::= numberexpr
438 ///   ::= parenexpr
439 ///   ::= ifexpr
440 ///   ::= forexpr
441 ///   ::= varexpr
ParsePrimary()442 static ExprAST *ParsePrimary() {
443   switch (CurTok) {
444   default: return Error("unknown token when expecting an expression");
445   case tok_identifier: return ParseIdentifierExpr();
446   case tok_number:     return ParseNumberExpr();
447   case '(':            return ParseParenExpr();
448   case tok_if:         return ParseIfExpr();
449   case tok_for:        return ParseForExpr();
450   case tok_var:        return ParseVarExpr();
451   }
452 }
453 
454 /// unary
455 ///   ::= primary
456 ///   ::= '!' unary
ParseUnary()457 static ExprAST *ParseUnary() {
458   // If the current token is not an operator, it must be a primary expr.
459   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
460     return ParsePrimary();
461 
462   // If this is a unary operator, read it.
463   int Opc = CurTok;
464   getNextToken();
465   if (ExprAST *Operand = ParseUnary())
466     return new UnaryExprAST(Opc, Operand);
467   return 0;
468 }
469 
470 /// binoprhs
471 ///   ::= ('+' unary)*
ParseBinOpRHS(int ExprPrec,ExprAST * LHS)472 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
473   // If this is a binop, find its precedence.
474   while (1) {
475     int TokPrec = GetTokPrecedence();
476 
477     // If this is a binop that binds at least as tightly as the current binop,
478     // consume it, otherwise we are done.
479     if (TokPrec < ExprPrec)
480       return LHS;
481 
482     // Okay, we know this is a binop.
483     int BinOp = CurTok;
484     getNextToken();  // eat binop
485 
486     // Parse the unary expression after the binary operator.
487     ExprAST *RHS = ParseUnary();
488     if (!RHS) return 0;
489 
490     // If BinOp binds less tightly with RHS than the operator after RHS, let
491     // the pending operator take RHS as its LHS.
492     int NextPrec = GetTokPrecedence();
493     if (TokPrec < NextPrec) {
494       RHS = ParseBinOpRHS(TokPrec+1, RHS);
495       if (RHS == 0) return 0;
496     }
497 
498     // Merge LHS/RHS.
499     LHS = new BinaryExprAST(BinOp, LHS, RHS);
500   }
501 }
502 
503 /// expression
504 ///   ::= unary binoprhs
505 ///
ParseExpression()506 static ExprAST *ParseExpression() {
507   ExprAST *LHS = ParseUnary();
508   if (!LHS) return 0;
509 
510   return ParseBinOpRHS(0, LHS);
511 }
512 
513 /// prototype
514 ///   ::= id '(' id* ')'
515 ///   ::= binary LETTER number? (id, id)
516 ///   ::= unary LETTER (id)
ParsePrototype()517 static PrototypeAST *ParsePrototype() {
518   std::string FnName;
519 
520   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
521   unsigned BinaryPrecedence = 30;
522 
523   switch (CurTok) {
524   default:
525     return ErrorP("Expected function name in prototype");
526   case tok_identifier:
527     FnName = IdentifierStr;
528     Kind = 0;
529     getNextToken();
530     break;
531   case tok_unary:
532     getNextToken();
533     if (!isascii(CurTok))
534       return ErrorP("Expected unary operator");
535     FnName = "unary";
536     FnName += (char)CurTok;
537     Kind = 1;
538     getNextToken();
539     break;
540   case tok_binary:
541     getNextToken();
542     if (!isascii(CurTok))
543       return ErrorP("Expected binary operator");
544     FnName = "binary";
545     FnName += (char)CurTok;
546     Kind = 2;
547     getNextToken();
548 
549     // Read the precedence if present.
550     if (CurTok == tok_number) {
551       if (NumVal < 1 || NumVal > 100)
552         return ErrorP("Invalid precedecnce: must be 1..100");
553       BinaryPrecedence = (unsigned)NumVal;
554       getNextToken();
555     }
556     break;
557   }
558 
559   if (CurTok != '(')
560     return ErrorP("Expected '(' in prototype");
561 
562   std::vector<std::string> ArgNames;
563   while (getNextToken() == tok_identifier)
564     ArgNames.push_back(IdentifierStr);
565   if (CurTok != ')')
566     return ErrorP("Expected ')' in prototype");
567 
568   // success.
569   getNextToken();  // eat ')'.
570 
571   // Verify right number of names for operator.
572   if (Kind && ArgNames.size() != Kind)
573     return ErrorP("Invalid number of operands for operator");
574 
575   return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
576 }
577 
578 /// definition ::= 'def' prototype expression
ParseDefinition()579 static FunctionAST *ParseDefinition() {
580   getNextToken();  // eat def.
581   PrototypeAST *Proto = ParsePrototype();
582   if (Proto == 0) return 0;
583 
584   if (ExprAST *E = ParseExpression())
585     return new FunctionAST(Proto, E);
586   return 0;
587 }
588 
589 /// toplevelexpr ::= expression
ParseTopLevelExpr()590 static FunctionAST *ParseTopLevelExpr() {
591   if (ExprAST *E = ParseExpression()) {
592     // Make an anonymous proto.
593     PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
594     return new FunctionAST(Proto, E);
595   }
596   return 0;
597 }
598 
599 /// external ::= 'extern' prototype
ParseExtern()600 static PrototypeAST *ParseExtern() {
601   getNextToken();  // eat extern.
602   return ParsePrototype();
603 }
604 
605 //===----------------------------------------------------------------------===//
606 // Code Generation
607 //===----------------------------------------------------------------------===//
608 
609 static Module *TheModule;
610 static FunctionPassManager *TheFPM;
611 static IRBuilder<> Builder(getGlobalContext());
612 static std::map<std::string, AllocaInst*> NamedValues;
613 
ErrorV(const char * Str)614 Value *ErrorV(const char *Str) { Error(Str); return 0; }
615 
616 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
617 /// the function.  This is used for mutable variables etc.
CreateEntryBlockAlloca(Function * TheFunction,const std::string & VarName)618 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
619                                           const std::string &VarName) {
620   IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
621                  TheFunction->getEntryBlock().begin());
622   return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
623                            VarName.c_str());
624 }
625 
Codegen()626 Value *NumberExprAST::Codegen() {
627   return ConstantFP::get(getGlobalContext(), APFloat(Val));
628 }
629 
Codegen()630 Value *VariableExprAST::Codegen() {
631   // Look this variable up in the function.
632   Value *V = NamedValues[Name];
633   if (V == 0) return ErrorV("Unknown variable name");
634 
635   // Load the value.
636   return Builder.CreateLoad(V, Name.c_str());
637 }
638 
Codegen()639 Value *UnaryExprAST::Codegen() {
640   Value *OperandV = Operand->Codegen();
641   if (OperandV == 0) return 0;
642 #ifdef USE_MCJIT
643   Function *F = TheHelper->getFunction(MakeLegalFunctionName(std::string("unary")+Opcode));
644 #else
645   Function *F = TheModule->getFunction(std::string("unary")+Opcode);
646 #endif
647   if (F == 0)
648     return ErrorV("Unknown unary operator");
649 
650   return Builder.CreateCall(F, OperandV, "unop");
651 }
652 
Codegen()653 Value *BinaryExprAST::Codegen() {
654   // Special case '=' because we don't want to emit the LHS as an expression.
655   if (Op == '=') {
656     // Assignment requires the LHS to be an identifier.
657     VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS);
658     if (!LHSE)
659       return ErrorV("destination of '=' must be a variable");
660     // Codegen the RHS.
661     Value *Val = RHS->Codegen();
662     if (Val == 0) return 0;
663 
664     // Look up the name.
665     Value *Variable = NamedValues[LHSE->getName()];
666     if (Variable == 0) return ErrorV("Unknown variable name");
667 
668     Builder.CreateStore(Val, Variable);
669     return Val;
670   }
671 
672   Value *L = LHS->Codegen();
673   Value *R = RHS->Codegen();
674   if (L == 0 || R == 0) return 0;
675 
676   switch (Op) {
677   case '+': return Builder.CreateFAdd(L, R, "addtmp");
678   case '-': return Builder.CreateFSub(L, R, "subtmp");
679   case '*': return Builder.CreateFMul(L, R, "multmp");
680   case '/': return Builder.CreateFDiv(L, R, "divtmp");
681   case '<':
682     L = Builder.CreateFCmpULT(L, R, "cmptmp");
683     // Convert bool 0/1 to double 0.0 or 1.0
684     return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
685                                 "booltmp");
686   default: break;
687   }
688 
689   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
690   // a call to it.
691   Function *F = TheModule->getFunction(std::string("binary")+Op);
692   assert(F && "binary operator not found!");
693 
694   Value *Ops[] = { L, R };
695   return Builder.CreateCall(F, Ops, "binop");
696 }
697 
Codegen()698 Value *CallExprAST::Codegen() {
699   // Look up the name in the global module table.
700   Function *CalleeF = TheModule->getFunction(Callee);
701   if (CalleeF == 0) {
702     char error_str[64];
703     sprintf(error_str, "Unknown function referenced %s", Callee.c_str());
704     return ErrorV(error_str);
705   }
706 
707   // If argument mismatch error.
708   if (CalleeF->arg_size() != Args.size())
709     return ErrorV("Incorrect # arguments passed");
710 
711   std::vector<Value*> ArgsV;
712   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
713     ArgsV.push_back(Args[i]->Codegen());
714     if (ArgsV.back() == 0) return 0;
715   }
716 
717   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
718 }
719 
Codegen()720 Value *IfExprAST::Codegen() {
721   Value *CondV = Cond->Codegen();
722   if (CondV == 0) return 0;
723 
724   // Convert condition to a bool by comparing equal to 0.0.
725   CondV = Builder.CreateFCmpONE(CondV,
726                               ConstantFP::get(getGlobalContext(), APFloat(0.0)),
727                                 "ifcond");
728 
729   Function *TheFunction = Builder.GetInsertBlock()->getParent();
730 
731   // Create blocks for the then and else cases.  Insert the 'then' block at the
732   // end of the function.
733   BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
734   BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
735   BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
736 
737   Builder.CreateCondBr(CondV, ThenBB, ElseBB);
738 
739   // Emit then value.
740   Builder.SetInsertPoint(ThenBB);
741 
742   Value *ThenV = Then->Codegen();
743   if (ThenV == 0) return 0;
744 
745   Builder.CreateBr(MergeBB);
746   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
747   ThenBB = Builder.GetInsertBlock();
748 
749   // Emit else block.
750   TheFunction->getBasicBlockList().push_back(ElseBB);
751   Builder.SetInsertPoint(ElseBB);
752 
753   Value *ElseV = Else->Codegen();
754   if (ElseV == 0) return 0;
755 
756   Builder.CreateBr(MergeBB);
757   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
758   ElseBB = Builder.GetInsertBlock();
759 
760   // Emit merge block.
761   TheFunction->getBasicBlockList().push_back(MergeBB);
762   Builder.SetInsertPoint(MergeBB);
763   PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
764                                   "iftmp");
765 
766   PN->addIncoming(ThenV, ThenBB);
767   PN->addIncoming(ElseV, ElseBB);
768   return PN;
769 }
770 
Codegen()771 Value *ForExprAST::Codegen() {
772   // Output this as:
773   //   var = alloca double
774   //   ...
775   //   start = startexpr
776   //   store start -> var
777   //   goto loop
778   // loop:
779   //   ...
780   //   bodyexpr
781   //   ...
782   // loopend:
783   //   step = stepexpr
784   //   endcond = endexpr
785   //
786   //   curvar = load var
787   //   nextvar = curvar + step
788   //   store nextvar -> var
789   //   br endcond, loop, endloop
790   // outloop:
791 
792   Function *TheFunction = Builder.GetInsertBlock()->getParent();
793 
794   // Create an alloca for the variable in the entry block.
795   AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
796 
797   // Emit the start code first, without 'variable' in scope.
798   Value *StartVal = Start->Codegen();
799   if (StartVal == 0) return 0;
800 
801   // Store the value into the alloca.
802   Builder.CreateStore(StartVal, Alloca);
803 
804   // Make the new basic block for the loop header, inserting after current
805   // block.
806   BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
807 
808   // Insert an explicit fall through from the current block to the LoopBB.
809   Builder.CreateBr(LoopBB);
810 
811   // Start insertion in LoopBB.
812   Builder.SetInsertPoint(LoopBB);
813 
814   // Within the loop, the variable is defined equal to the PHI node.  If it
815   // shadows an existing variable, we have to restore it, so save it now.
816   AllocaInst *OldVal = NamedValues[VarName];
817   NamedValues[VarName] = Alloca;
818 
819   // Emit the body of the loop.  This, like any other expr, can change the
820   // current BB.  Note that we ignore the value computed by the body, but don't
821   // allow an error.
822   if (Body->Codegen() == 0)
823     return 0;
824 
825   // Emit the step value.
826   Value *StepVal;
827   if (Step) {
828     StepVal = Step->Codegen();
829     if (StepVal == 0) return 0;
830   } else {
831     // If not specified, use 1.0.
832     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
833   }
834 
835   // Compute the end condition.
836   Value *EndCond = End->Codegen();
837   if (EndCond == 0) return EndCond;
838 
839   // Reload, increment, and restore the alloca.  This handles the case where
840   // the body of the loop mutates the variable.
841   Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
842   Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
843   Builder.CreateStore(NextVar, Alloca);
844 
845   // Convert condition to a bool by comparing equal to 0.0.
846   EndCond = Builder.CreateFCmpONE(EndCond,
847                               ConstantFP::get(getGlobalContext(), APFloat(0.0)),
848                                   "loopcond");
849 
850   // Create the "after loop" block and insert it.
851   BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
852 
853   // Insert the conditional branch into the end of LoopEndBB.
854   Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
855 
856   // Any new code will be inserted in AfterBB.
857   Builder.SetInsertPoint(AfterBB);
858 
859   // Restore the unshadowed variable.
860   if (OldVal)
861     NamedValues[VarName] = OldVal;
862   else
863     NamedValues.erase(VarName);
864 
865 
866   // for expr always returns 0.0.
867   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
868 }
869 
Codegen()870 Value *VarExprAST::Codegen() {
871   std::vector<AllocaInst *> OldBindings;
872 
873   Function *TheFunction = Builder.GetInsertBlock()->getParent();
874 
875   // Register all variables and emit their initializer.
876   for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
877     const std::string &VarName = VarNames[i].first;
878     ExprAST *Init = VarNames[i].second;
879 
880     // Emit the initializer before adding the variable to scope, this prevents
881     // the initializer from referencing the variable itself, and permits stuff
882     // like this:
883     //  var a = 1 in
884     //    var a = a in ...   # refers to outer 'a'.
885     Value *InitVal;
886     if (Init) {
887       InitVal = Init->Codegen();
888       if (InitVal == 0) return 0;
889     } else { // If not specified, use 0.0.
890       InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
891     }
892 
893     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
894     Builder.CreateStore(InitVal, Alloca);
895 
896     // Remember the old variable binding so that we can restore the binding when
897     // we unrecurse.
898     OldBindings.push_back(NamedValues[VarName]);
899 
900     // Remember this binding.
901     NamedValues[VarName] = Alloca;
902   }
903 
904   // Codegen the body, now that all vars are in scope.
905   Value *BodyVal = Body->Codegen();
906   if (BodyVal == 0) return 0;
907 
908   // Pop all our variables from scope.
909   for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
910     NamedValues[VarNames[i].first] = OldBindings[i];
911 
912   // Return the body computation.
913   return BodyVal;
914 }
915 
Codegen()916 Function *PrototypeAST::Codegen() {
917   // Make the function type:  double(double,double) etc.
918   std::vector<Type*> Doubles(Args.size(),
919                              Type::getDoubleTy(getGlobalContext()));
920   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
921                                        Doubles, false);
922 
923   Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
924   // If F conflicted, there was already something named 'Name'.  If it has a
925   // body, don't allow redefinition or reextern.
926   if (F->getName() != Name) {
927     // Delete the one we just made and get the existing one.
928     F->eraseFromParent();
929     F = TheModule->getFunction(Name);
930     // If F already has a body, reject this.
931     if (!F->empty()) {
932       ErrorF("redefinition of function");
933       return 0;
934     }
935     // If F took a different number of args, reject.
936     if (F->arg_size() != Args.size()) {
937       ErrorF("redefinition of function with different # args");
938       return 0;
939     }
940   }
941 
942   // Set names for all arguments.
943   unsigned Idx = 0;
944   for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
945        ++AI, ++Idx)
946     AI->setName(Args[Idx]);
947 
948   return F;
949 }
950 
951 /// CreateArgumentAllocas - Create an alloca for each argument and register the
952 /// argument in the symbol table so that references to it will succeed.
CreateArgumentAllocas(Function * F)953 void PrototypeAST::CreateArgumentAllocas(Function *F) {
954   Function::arg_iterator AI = F->arg_begin();
955   for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
956     // Create an alloca for this variable.
957     AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
958 
959     // Store the initial value into the alloca.
960     Builder.CreateStore(AI, Alloca);
961 
962     // Add arguments to variable symbol table.
963     NamedValues[Args[Idx]] = Alloca;
964   }
965 }
966 
Codegen()967 Function *FunctionAST::Codegen() {
968   NamedValues.clear();
969 
970   Function *TheFunction = Proto->Codegen();
971   if (TheFunction == 0)
972     return 0;
973 
974   // If this is an operator, install it.
975   if (Proto->isBinaryOp())
976     BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
977 
978   // Create a new basic block to start insertion into.
979   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
980   Builder.SetInsertPoint(BB);
981 
982   // Add all arguments to the symbol table and create their allocas.
983   Proto->CreateArgumentAllocas(TheFunction);
984 
985   if (Value *RetVal = Body->Codegen()) {
986     // Finish off the function.
987     Builder.CreateRet(RetVal);
988 
989     // Validate the generated code, checking for consistency.
990     verifyFunction(*TheFunction);
991 
992     // Optimize the function.
993     TheFPM->run(*TheFunction);
994 
995     return TheFunction;
996   }
997 
998   // Error reading body, remove function.
999   TheFunction->eraseFromParent();
1000 
1001   if (Proto->isBinaryOp())
1002     BinopPrecedence.erase(Proto->getOperatorName());
1003   return 0;
1004 }
1005 
1006 //===----------------------------------------------------------------------===//
1007 // Top-Level parsing and JIT Driver
1008 //===----------------------------------------------------------------------===//
1009 
1010 static ExecutionEngine *TheExecutionEngine;
1011 
HandleDefinition()1012 static void HandleDefinition() {
1013   if (FunctionAST *F = ParseDefinition()) {
1014     if (Function *LF = F->Codegen()) {
1015 #ifndef MINIMAL_STDERR_OUTPUT
1016       fprintf(stderr, "Read function definition:");
1017       LF->dump();
1018 #endif
1019     }
1020   } else {
1021     // Skip token for error recovery.
1022     getNextToken();
1023   }
1024 }
1025 
HandleExtern()1026 static void HandleExtern() {
1027   if (PrototypeAST *P = ParseExtern()) {
1028     if (Function *F = P->Codegen()) {
1029 #ifndef MINIMAL_STDERR_OUTPUT
1030       fprintf(stderr, "Read extern: ");
1031       F->dump();
1032 #endif
1033     }
1034   } else {
1035     // Skip token for error recovery.
1036     getNextToken();
1037   }
1038 }
1039 
HandleTopLevelExpression()1040 static void HandleTopLevelExpression() {
1041   // Evaluate a top-level expression into an anonymous function.
1042   if (FunctionAST *F = ParseTopLevelExpr()) {
1043     if (Function *LF = F->Codegen()) {
1044       // JIT the function, returning a function pointer.
1045       void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
1046       // Cast it to the right type (takes no arguments, returns a double) so we
1047       // can call it as a native function.
1048       double (*FP)() = (double (*)())(intptr_t)FPtr;
1049 #ifdef MINIMAL_STDERR_OUTPUT
1050       FP();
1051 #else
1052       fprintf(stderr, "Evaluated to %f\n", FP());
1053 #endif
1054     }
1055   } else {
1056     // Skip token for error recovery.
1057     getNextToken();
1058   }
1059 }
1060 
1061 /// top ::= definition | external | expression | ';'
MainLoop()1062 static void MainLoop() {
1063   while (1) {
1064 #ifndef MINIMAL_STDERR_OUTPUT
1065     fprintf(stderr, "ready> ");
1066 #endif
1067     switch (CurTok) {
1068     case tok_eof:    return;
1069     case ';':        getNextToken(); break;  // ignore top-level semicolons.
1070     case tok_def:    HandleDefinition(); break;
1071     case tok_extern: HandleExtern(); break;
1072     default:         HandleTopLevelExpression(); break;
1073     }
1074   }
1075 }
1076 
1077 //===----------------------------------------------------------------------===//
1078 // "Library" functions that can be "extern'd" from user code.
1079 //===----------------------------------------------------------------------===//
1080 
1081 /// putchard - putchar that takes a double and returns 0.
1082 extern "C"
putchard(double X)1083 double putchard(double X) {
1084   putchar((char)X);
1085   return 0;
1086 }
1087 
1088 /// printd - printf that takes a double prints it as "%f\n", returning 0.
1089 extern "C"
printd(double X)1090 double printd(double X) {
1091   printf("%f", X);
1092   return 0;
1093 }
1094 
1095 extern "C"
printlf()1096 double printlf() {
1097   printf("\n");
1098   return 0;
1099 }
1100 
1101 //===----------------------------------------------------------------------===//
1102 // Main driver code.
1103 //===----------------------------------------------------------------------===//
1104 
main(int argc,char ** argv)1105 int main(int argc, char **argv) {
1106   InitializeNativeTarget();
1107   LLVMContext &Context = getGlobalContext();
1108 
1109   // Install standard binary operators.
1110   // 1 is lowest precedence.
1111   BinopPrecedence['='] = 2;
1112   BinopPrecedence['<'] = 10;
1113   BinopPrecedence['+'] = 20;
1114   BinopPrecedence['-'] = 20;
1115   BinopPrecedence['/'] = 40;
1116   BinopPrecedence['*'] = 40;  // highest.
1117 
1118   // Make the module, which holds all the code.
1119   TheModule = new Module("my cool jit", Context);
1120 
1121   // Create the JIT.  This takes ownership of the module.
1122   std::string ErrStr;
1123   TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
1124   if (!TheExecutionEngine) {
1125     fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
1126     exit(1);
1127   }
1128 
1129   FunctionPassManager OurFPM(TheModule);
1130 
1131   // Set up the optimizer pipeline.  Start with registering info about how the
1132   // target lays out data structures.
1133   OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
1134   // Provide basic AliasAnalysis support for GVN.
1135   OurFPM.add(createBasicAliasAnalysisPass());
1136   // Promote allocas to registers.
1137   OurFPM.add(createPromoteMemoryToRegisterPass());
1138   // Do simple "peephole" optimizations and bit-twiddling optzns.
1139   OurFPM.add(createInstructionCombiningPass());
1140   // Reassociate expressions.
1141   OurFPM.add(createReassociatePass());
1142   // Eliminate Common SubExpressions.
1143   OurFPM.add(createGVNPass());
1144   // Simplify the control flow graph (deleting unreachable blocks, etc).
1145   OurFPM.add(createCFGSimplificationPass());
1146 
1147   OurFPM.doInitialization();
1148 
1149   // Set the global so the code gen can use this.
1150   TheFPM = &OurFPM;
1151 
1152   // Prime the first token.
1153 #ifndef MINIMAL_STDERR_OUTPUT
1154   fprintf(stderr, "ready> ");
1155 #endif
1156   getNextToken();
1157 
1158   // Run the main "interpreter loop" now.
1159   MainLoop();
1160 
1161   // Print out all of the generated code.
1162   TheFPM = 0;
1163 #ifndef MINIMAL_STDERR_OUTPUT
1164   TheModule->dump();
1165 #endif
1166   return 0;
1167 }
1168