1 #include "llvm/ADT/STLExtras.h"
2 #include "llvm/Analysis/Passes.h"
3 #include "llvm/ExecutionEngine/ExecutionEngine.h"
4 #include "llvm/ExecutionEngine/MCJIT.h"
5 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
6 #include "llvm/IR/DataLayout.h"
7 #include "llvm/IR/DerivedTypes.h"
8 #include "llvm/IR/IRBuilder.h"
9 #include "llvm/IR/LLVMContext.h"
10 #include "llvm/IR/LegacyPassManager.h"
11 #include "llvm/IR/Module.h"
12 #include "llvm/IR/Verifier.h"
13 #include "llvm/Support/TargetSelect.h"
14 #include "llvm/Transforms/Scalar.h"
15 #include <cctype>
16 #include <cstdio>
17 #include <map>
18 #include <string>
19 #include <vector>
20 using namespace llvm;
21 
22 //===----------------------------------------------------------------------===//
23 // Lexer
24 //===----------------------------------------------------------------------===//
25 
26 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
27 // of these for known things.
28 enum Token {
29   tok_eof = -1,
30 
31   // commands
32   tok_def = -2,
33   tok_extern = -3,
34 
35   // primary
36   tok_identifier = -4,
37   tok_number = -5,
38 
39   // control
40   tok_if = -6,
41   tok_then = -7,
42   tok_else = -8,
43   tok_for = -9,
44   tok_in = -10,
45 
46   // operators
47   tok_binary = -11,
48   tok_unary = -12
49 };
50 
51 static std::string IdentifierStr; // Filled in if tok_identifier
52 static double NumVal;             // Filled in if tok_number
53 
54 /// gettok - Return the next token from standard input.
gettok()55 static int gettok() {
56   static int LastChar = ' ';
57 
58   // Skip any whitespace.
59   while (isspace(LastChar))
60     LastChar = getchar();
61 
62   if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
63     IdentifierStr = LastChar;
64     while (isalnum((LastChar = getchar())))
65       IdentifierStr += LastChar;
66 
67     if (IdentifierStr == "def")
68       return tok_def;
69     if (IdentifierStr == "extern")
70       return tok_extern;
71     if (IdentifierStr == "if")
72       return tok_if;
73     if (IdentifierStr == "then")
74       return tok_then;
75     if (IdentifierStr == "else")
76       return tok_else;
77     if (IdentifierStr == "for")
78       return tok_for;
79     if (IdentifierStr == "in")
80       return tok_in;
81     if (IdentifierStr == "binary")
82       return tok_binary;
83     if (IdentifierStr == "unary")
84       return tok_unary;
85     return tok_identifier;
86   }
87 
88   if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
89     std::string NumStr;
90     do {
91       NumStr += LastChar;
92       LastChar = getchar();
93     } while (isdigit(LastChar) || LastChar == '.');
94 
95     NumVal = strtod(NumStr.c_str(), 0);
96     return tok_number;
97   }
98 
99   if (LastChar == '#') {
100     // Comment until end of line.
101     do
102       LastChar = getchar();
103     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
104 
105     if (LastChar != EOF)
106       return gettok();
107   }
108 
109   // Check for end of file.  Don't eat the EOF.
110   if (LastChar == EOF)
111     return tok_eof;
112 
113   // Otherwise, just return the character as its ascii value.
114   int ThisChar = LastChar;
115   LastChar = getchar();
116   return ThisChar;
117 }
118 
119 //===----------------------------------------------------------------------===//
120 // Abstract Syntax Tree (aka Parse Tree)
121 //===----------------------------------------------------------------------===//
122 namespace {
123 /// ExprAST - Base class for all expression nodes.
124 class ExprAST {
125 public:
~ExprAST()126   virtual ~ExprAST() {}
127   virtual Value *Codegen() = 0;
128 };
129 
130 /// NumberExprAST - Expression class for numeric literals like "1.0".
131 class NumberExprAST : public ExprAST {
132   double Val;
133 
134 public:
NumberExprAST(double val)135   NumberExprAST(double val) : Val(val) {}
136   Value *Codegen() override;
137 };
138 
139 /// VariableExprAST - Expression class for referencing a variable, like "a".
140 class VariableExprAST : public ExprAST {
141   std::string Name;
142 
143 public:
VariableExprAST(const std::string & name)144   VariableExprAST(const std::string &name) : Name(name) {}
145   Value *Codegen() override;
146 };
147 
148 /// UnaryExprAST - Expression class for a unary operator.
149 class UnaryExprAST : public ExprAST {
150   char Opcode;
151   ExprAST *Operand;
152 
153 public:
UnaryExprAST(char opcode,ExprAST * operand)154   UnaryExprAST(char opcode, ExprAST *operand)
155       : Opcode(opcode), Operand(operand) {}
156   Value *Codegen() override;
157 };
158 
159 /// BinaryExprAST - Expression class for a binary operator.
160 class BinaryExprAST : public ExprAST {
161   char Op;
162   ExprAST *LHS, *RHS;
163 
164 public:
BinaryExprAST(char op,ExprAST * lhs,ExprAST * rhs)165   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
166       : Op(op), LHS(lhs), RHS(rhs) {}
167   Value *Codegen() override;
168 };
169 
170 /// CallExprAST - Expression class for function calls.
171 class CallExprAST : public ExprAST {
172   std::string Callee;
173   std::vector<ExprAST *> Args;
174 
175 public:
CallExprAST(const std::string & callee,std::vector<ExprAST * > & args)176   CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
177       : Callee(callee), Args(args) {}
178   Value *Codegen() override;
179 };
180 
181 /// IfExprAST - Expression class for if/then/else.
182 class IfExprAST : public ExprAST {
183   ExprAST *Cond, *Then, *Else;
184 
185 public:
IfExprAST(ExprAST * cond,ExprAST * then,ExprAST * _else)186   IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
187       : Cond(cond), Then(then), Else(_else) {}
188   Value *Codegen() override;
189 };
190 
191 /// ForExprAST - Expression class for for/in.
192 class ForExprAST : public ExprAST {
193   std::string VarName;
194   ExprAST *Start, *End, *Step, *Body;
195 
196 public:
ForExprAST(const std::string & varname,ExprAST * start,ExprAST * end,ExprAST * step,ExprAST * body)197   ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
198              ExprAST *step, ExprAST *body)
199       : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
200   Value *Codegen() override;
201 };
202 
203 /// PrototypeAST - This class represents the "prototype" for a function,
204 /// which captures its name, and its argument names (thus implicitly the number
205 /// of arguments the function takes), as well as if it is an operator.
206 class PrototypeAST {
207   std::string Name;
208   std::vector<std::string> Args;
209   bool isOperator;
210   unsigned Precedence; // Precedence if a binary op.
211 public:
PrototypeAST(const std::string & name,const std::vector<std::string> & args,bool isoperator=false,unsigned prec=0)212   PrototypeAST(const std::string &name, const std::vector<std::string> &args,
213                bool isoperator = false, unsigned prec = 0)
214       : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
215 
isUnaryOp() const216   bool isUnaryOp() const { return isOperator && Args.size() == 1; }
isBinaryOp() const217   bool isBinaryOp() const { return isOperator && Args.size() == 2; }
218 
getOperatorName() const219   char getOperatorName() const {
220     assert(isUnaryOp() || isBinaryOp());
221     return Name[Name.size() - 1];
222   }
223 
getBinaryPrecedence() const224   unsigned getBinaryPrecedence() const { return Precedence; }
225 
226   Function *Codegen();
227 };
228 
229 /// FunctionAST - This class represents a function definition itself.
230 class FunctionAST {
231   PrototypeAST *Proto;
232   ExprAST *Body;
233 
234 public:
FunctionAST(PrototypeAST * proto,ExprAST * body)235   FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {}
236 
237   Function *Codegen();
238 };
239 } // end anonymous namespace
240 
241 //===----------------------------------------------------------------------===//
242 // Parser
243 //===----------------------------------------------------------------------===//
244 
245 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
246 /// token the parser is looking at.  getNextToken reads another token from the
247 /// lexer and updates CurTok with its results.
248 static int CurTok;
getNextToken()249 static int getNextToken() { return CurTok = gettok(); }
250 
251 /// BinopPrecedence - This holds the precedence for each binary operator that is
252 /// defined.
253 static std::map<char, int> BinopPrecedence;
254 
255 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
GetTokPrecedence()256 static int GetTokPrecedence() {
257   if (!isascii(CurTok))
258     return -1;
259 
260   // Make sure it's a declared binop.
261   int TokPrec = BinopPrecedence[CurTok];
262   if (TokPrec <= 0)
263     return -1;
264   return TokPrec;
265 }
266 
267 /// Error* - These are little helper functions for error handling.
Error(const char * Str)268 ExprAST *Error(const char *Str) {
269   fprintf(stderr, "Error: %s\n", Str);
270   return 0;
271 }
ErrorP(const char * Str)272 PrototypeAST *ErrorP(const char *Str) {
273   Error(Str);
274   return 0;
275 }
ErrorF(const char * Str)276 FunctionAST *ErrorF(const char *Str) {
277   Error(Str);
278   return 0;
279 }
280 
281 static ExprAST *ParseExpression();
282 
283 /// identifierexpr
284 ///   ::= identifier
285 ///   ::= identifier '(' expression* ')'
ParseIdentifierExpr()286 static ExprAST *ParseIdentifierExpr() {
287   std::string IdName = IdentifierStr;
288 
289   getNextToken(); // eat identifier.
290 
291   if (CurTok != '(') // Simple variable ref.
292     return new VariableExprAST(IdName);
293 
294   // Call.
295   getNextToken(); // eat (
296   std::vector<ExprAST *> Args;
297   if (CurTok != ')') {
298     while (1) {
299       ExprAST *Arg = ParseExpression();
300       if (!Arg)
301         return 0;
302       Args.push_back(Arg);
303 
304       if (CurTok == ')')
305         break;
306 
307       if (CurTok != ',')
308         return Error("Expected ')' or ',' in argument list");
309       getNextToken();
310     }
311   }
312 
313   // Eat the ')'.
314   getNextToken();
315 
316   return new CallExprAST(IdName, Args);
317 }
318 
319 /// numberexpr ::= number
ParseNumberExpr()320 static ExprAST *ParseNumberExpr() {
321   ExprAST *Result = new NumberExprAST(NumVal);
322   getNextToken(); // consume the number
323   return Result;
324 }
325 
326 /// parenexpr ::= '(' expression ')'
ParseParenExpr()327 static ExprAST *ParseParenExpr() {
328   getNextToken(); // eat (.
329   ExprAST *V = ParseExpression();
330   if (!V)
331     return 0;
332 
333   if (CurTok != ')')
334     return Error("expected ')'");
335   getNextToken(); // eat ).
336   return V;
337 }
338 
339 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
ParseIfExpr()340 static ExprAST *ParseIfExpr() {
341   getNextToken(); // eat the if.
342 
343   // condition.
344   ExprAST *Cond = ParseExpression();
345   if (!Cond)
346     return 0;
347 
348   if (CurTok != tok_then)
349     return Error("expected then");
350   getNextToken(); // eat the then
351 
352   ExprAST *Then = ParseExpression();
353   if (Then == 0)
354     return 0;
355 
356   if (CurTok != tok_else)
357     return Error("expected else");
358 
359   getNextToken();
360 
361   ExprAST *Else = ParseExpression();
362   if (!Else)
363     return 0;
364 
365   return new IfExprAST(Cond, Then, Else);
366 }
367 
368 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
ParseForExpr()369 static ExprAST *ParseForExpr() {
370   getNextToken(); // eat the for.
371 
372   if (CurTok != tok_identifier)
373     return Error("expected identifier after for");
374 
375   std::string IdName = IdentifierStr;
376   getNextToken(); // eat identifier.
377 
378   if (CurTok != '=')
379     return Error("expected '=' after for");
380   getNextToken(); // eat '='.
381 
382   ExprAST *Start = ParseExpression();
383   if (Start == 0)
384     return 0;
385   if (CurTok != ',')
386     return Error("expected ',' after for start value");
387   getNextToken();
388 
389   ExprAST *End = ParseExpression();
390   if (End == 0)
391     return 0;
392 
393   // The step value is optional.
394   ExprAST *Step = 0;
395   if (CurTok == ',') {
396     getNextToken();
397     Step = ParseExpression();
398     if (Step == 0)
399       return 0;
400   }
401 
402   if (CurTok != tok_in)
403     return Error("expected 'in' after for");
404   getNextToken(); // eat 'in'.
405 
406   ExprAST *Body = ParseExpression();
407   if (Body == 0)
408     return 0;
409 
410   return new ForExprAST(IdName, Start, End, Step, Body);
411 }
412 
413 /// primary
414 ///   ::= identifierexpr
415 ///   ::= numberexpr
416 ///   ::= parenexpr
417 ///   ::= ifexpr
418 ///   ::= forexpr
ParsePrimary()419 static ExprAST *ParsePrimary() {
420   switch (CurTok) {
421   default:
422     return Error("unknown token when expecting an expression");
423   case tok_identifier:
424     return ParseIdentifierExpr();
425   case tok_number:
426     return ParseNumberExpr();
427   case '(':
428     return ParseParenExpr();
429   case tok_if:
430     return ParseIfExpr();
431   case tok_for:
432     return ParseForExpr();
433   }
434 }
435 
436 /// unary
437 ///   ::= primary
438 ///   ::= '!' unary
ParseUnary()439 static ExprAST *ParseUnary() {
440   // If the current token is not an operator, it must be a primary expr.
441   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
442     return ParsePrimary();
443 
444   // If this is a unary operator, read it.
445   int Opc = CurTok;
446   getNextToken();
447   if (ExprAST *Operand = ParseUnary())
448     return new UnaryExprAST(Opc, Operand);
449   return 0;
450 }
451 
452 /// binoprhs
453 ///   ::= ('+' unary)*
ParseBinOpRHS(int ExprPrec,ExprAST * LHS)454 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
455   // If this is a binop, find its precedence.
456   while (1) {
457     int TokPrec = GetTokPrecedence();
458 
459     // If this is a binop that binds at least as tightly as the current binop,
460     // consume it, otherwise we are done.
461     if (TokPrec < ExprPrec)
462       return LHS;
463 
464     // Okay, we know this is a binop.
465     int BinOp = CurTok;
466     getNextToken(); // eat binop
467 
468     // Parse the unary expression after the binary operator.
469     ExprAST *RHS = ParseUnary();
470     if (!RHS)
471       return 0;
472 
473     // If BinOp binds less tightly with RHS than the operator after RHS, let
474     // the pending operator take RHS as its LHS.
475     int NextPrec = GetTokPrecedence();
476     if (TokPrec < NextPrec) {
477       RHS = ParseBinOpRHS(TokPrec + 1, RHS);
478       if (RHS == 0)
479         return 0;
480     }
481 
482     // Merge LHS/RHS.
483     LHS = new BinaryExprAST(BinOp, LHS, RHS);
484   }
485 }
486 
487 /// expression
488 ///   ::= unary binoprhs
489 ///
ParseExpression()490 static ExprAST *ParseExpression() {
491   ExprAST *LHS = ParseUnary();
492   if (!LHS)
493     return 0;
494 
495   return ParseBinOpRHS(0, LHS);
496 }
497 
498 /// prototype
499 ///   ::= id '(' id* ')'
500 ///   ::= binary LETTER number? (id, id)
501 ///   ::= unary LETTER (id)
ParsePrototype()502 static PrototypeAST *ParsePrototype() {
503   std::string FnName;
504 
505   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
506   unsigned BinaryPrecedence = 30;
507 
508   switch (CurTok) {
509   default:
510     return ErrorP("Expected function name in prototype");
511   case tok_identifier:
512     FnName = IdentifierStr;
513     Kind = 0;
514     getNextToken();
515     break;
516   case tok_unary:
517     getNextToken();
518     if (!isascii(CurTok))
519       return ErrorP("Expected unary operator");
520     FnName = "unary";
521     FnName += (char)CurTok;
522     Kind = 1;
523     getNextToken();
524     break;
525   case tok_binary:
526     getNextToken();
527     if (!isascii(CurTok))
528       return ErrorP("Expected binary operator");
529     FnName = "binary";
530     FnName += (char)CurTok;
531     Kind = 2;
532     getNextToken();
533 
534     // Read the precedence if present.
535     if (CurTok == tok_number) {
536       if (NumVal < 1 || NumVal > 100)
537         return ErrorP("Invalid precedecnce: must be 1..100");
538       BinaryPrecedence = (unsigned)NumVal;
539       getNextToken();
540     }
541     break;
542   }
543 
544   if (CurTok != '(')
545     return ErrorP("Expected '(' in prototype");
546 
547   std::vector<std::string> ArgNames;
548   while (getNextToken() == tok_identifier)
549     ArgNames.push_back(IdentifierStr);
550   if (CurTok != ')')
551     return ErrorP("Expected ')' in prototype");
552 
553   // success.
554   getNextToken(); // eat ')'.
555 
556   // Verify right number of names for operator.
557   if (Kind && ArgNames.size() != Kind)
558     return ErrorP("Invalid number of operands for operator");
559 
560   return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
561 }
562 
563 /// definition ::= 'def' prototype expression
ParseDefinition()564 static FunctionAST *ParseDefinition() {
565   getNextToken(); // eat def.
566   PrototypeAST *Proto = ParsePrototype();
567   if (Proto == 0)
568     return 0;
569 
570   if (ExprAST *E = ParseExpression())
571     return new FunctionAST(Proto, E);
572   return 0;
573 }
574 
575 /// toplevelexpr ::= expression
ParseTopLevelExpr()576 static FunctionAST *ParseTopLevelExpr() {
577   if (ExprAST *E = ParseExpression()) {
578     // Make an anonymous proto.
579     PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
580     return new FunctionAST(Proto, E);
581   }
582   return 0;
583 }
584 
585 /// external ::= 'extern' prototype
ParseExtern()586 static PrototypeAST *ParseExtern() {
587   getNextToken(); // eat extern.
588   return ParsePrototype();
589 }
590 
591 //===----------------------------------------------------------------------===//
592 // Code Generation
593 //===----------------------------------------------------------------------===//
594 
595 static Module *TheModule;
596 static IRBuilder<> Builder(getGlobalContext());
597 static std::map<std::string, Value *> NamedValues;
598 static legacy::FunctionPassManager *TheFPM;
599 
ErrorV(const char * Str)600 Value *ErrorV(const char *Str) {
601   Error(Str);
602   return 0;
603 }
604 
Codegen()605 Value *NumberExprAST::Codegen() {
606   return ConstantFP::get(getGlobalContext(), APFloat(Val));
607 }
608 
Codegen()609 Value *VariableExprAST::Codegen() {
610   // Look this variable up in the function.
611   Value *V = NamedValues[Name];
612   return V ? V : ErrorV("Unknown variable name");
613 }
614 
Codegen()615 Value *UnaryExprAST::Codegen() {
616   Value *OperandV = Operand->Codegen();
617   if (OperandV == 0)
618     return 0;
619 
620   Function *F = TheModule->getFunction(std::string("unary") + Opcode);
621   if (F == 0)
622     return ErrorV("Unknown unary operator");
623 
624   return Builder.CreateCall(F, OperandV, "unop");
625 }
626 
Codegen()627 Value *BinaryExprAST::Codegen() {
628   Value *L = LHS->Codegen();
629   Value *R = RHS->Codegen();
630   if (L == 0 || R == 0)
631     return 0;
632 
633   switch (Op) {
634   case '+':
635     return Builder.CreateFAdd(L, R, "addtmp");
636   case '-':
637     return Builder.CreateFSub(L, R, "subtmp");
638   case '*':
639     return Builder.CreateFMul(L, R, "multmp");
640   case '<':
641     L = Builder.CreateFCmpULT(L, R, "cmptmp");
642     // Convert bool 0/1 to double 0.0 or 1.0
643     return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
644                                 "booltmp");
645   default:
646     break;
647   }
648 
649   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
650   // a call to it.
651   Function *F = TheModule->getFunction(std::string("binary") + Op);
652   assert(F && "binary operator not found!");
653 
654   Value *Ops[] = { L, R };
655   return Builder.CreateCall(F, Ops, "binop");
656 }
657 
Codegen()658 Value *CallExprAST::Codegen() {
659   // Look up the name in the global module table.
660   Function *CalleeF = TheModule->getFunction(Callee);
661   if (CalleeF == 0)
662     return ErrorV("Unknown function referenced");
663 
664   // If argument mismatch error.
665   if (CalleeF->arg_size() != Args.size())
666     return ErrorV("Incorrect # arguments passed");
667 
668   std::vector<Value *> ArgsV;
669   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
670     ArgsV.push_back(Args[i]->Codegen());
671     if (ArgsV.back() == 0)
672       return 0;
673   }
674 
675   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
676 }
677 
Codegen()678 Value *IfExprAST::Codegen() {
679   Value *CondV = Cond->Codegen();
680   if (CondV == 0)
681     return 0;
682 
683   // Convert condition to a bool by comparing equal to 0.0.
684   CondV = Builder.CreateFCmpONE(
685       CondV, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "ifcond");
686 
687   Function *TheFunction = Builder.GetInsertBlock()->getParent();
688 
689   // Create blocks for the then and else cases.  Insert the 'then' block at the
690   // end of the function.
691   BasicBlock *ThenBB =
692       BasicBlock::Create(getGlobalContext(), "then", TheFunction);
693   BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
694   BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
695 
696   Builder.CreateCondBr(CondV, ThenBB, ElseBB);
697 
698   // Emit then value.
699   Builder.SetInsertPoint(ThenBB);
700 
701   Value *ThenV = Then->Codegen();
702   if (ThenV == 0)
703     return 0;
704 
705   Builder.CreateBr(MergeBB);
706   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
707   ThenBB = Builder.GetInsertBlock();
708 
709   // Emit else block.
710   TheFunction->getBasicBlockList().push_back(ElseBB);
711   Builder.SetInsertPoint(ElseBB);
712 
713   Value *ElseV = Else->Codegen();
714   if (ElseV == 0)
715     return 0;
716 
717   Builder.CreateBr(MergeBB);
718   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
719   ElseBB = Builder.GetInsertBlock();
720 
721   // Emit merge block.
722   TheFunction->getBasicBlockList().push_back(MergeBB);
723   Builder.SetInsertPoint(MergeBB);
724   PHINode *PN =
725       Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp");
726 
727   PN->addIncoming(ThenV, ThenBB);
728   PN->addIncoming(ElseV, ElseBB);
729   return PN;
730 }
731 
Codegen()732 Value *ForExprAST::Codegen() {
733   // Output this as:
734   //   ...
735   //   start = startexpr
736   //   goto loop
737   // loop:
738   //   variable = phi [start, loopheader], [nextvariable, loopend]
739   //   ...
740   //   bodyexpr
741   //   ...
742   // loopend:
743   //   step = stepexpr
744   //   nextvariable = variable + step
745   //   endcond = endexpr
746   //   br endcond, loop, endloop
747   // outloop:
748 
749   // Emit the start code first, without 'variable' in scope.
750   Value *StartVal = Start->Codegen();
751   if (StartVal == 0)
752     return 0;
753 
754   // Make the new basic block for the loop header, inserting after current
755   // block.
756   Function *TheFunction = Builder.GetInsertBlock()->getParent();
757   BasicBlock *PreheaderBB = Builder.GetInsertBlock();
758   BasicBlock *LoopBB =
759       BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
760 
761   // Insert an explicit fall through from the current block to the LoopBB.
762   Builder.CreateBr(LoopBB);
763 
764   // Start insertion in LoopBB.
765   Builder.SetInsertPoint(LoopBB);
766 
767   // Start the PHI node with an entry for Start.
768   PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
769                                         2, VarName.c_str());
770   Variable->addIncoming(StartVal, PreheaderBB);
771 
772   // Within the loop, the variable is defined equal to the PHI node.  If it
773   // shadows an existing variable, we have to restore it, so save it now.
774   Value *OldVal = NamedValues[VarName];
775   NamedValues[VarName] = Variable;
776 
777   // Emit the body of the loop.  This, like any other expr, can change the
778   // current BB.  Note that we ignore the value computed by the body, but don't
779   // allow an error.
780   if (Body->Codegen() == 0)
781     return 0;
782 
783   // Emit the step value.
784   Value *StepVal;
785   if (Step) {
786     StepVal = Step->Codegen();
787     if (StepVal == 0)
788       return 0;
789   } else {
790     // If not specified, use 1.0.
791     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
792   }
793 
794   Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
795 
796   // Compute the end condition.
797   Value *EndCond = End->Codegen();
798   if (EndCond == 0)
799     return EndCond;
800 
801   // Convert condition to a bool by comparing equal to 0.0.
802   EndCond = Builder.CreateFCmpONE(
803       EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond");
804 
805   // Create the "after loop" block and insert it.
806   BasicBlock *LoopEndBB = Builder.GetInsertBlock();
807   BasicBlock *AfterBB =
808       BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
809 
810   // Insert the conditional branch into the end of LoopEndBB.
811   Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
812 
813   // Any new code will be inserted in AfterBB.
814   Builder.SetInsertPoint(AfterBB);
815 
816   // Add a new entry to the PHI node for the backedge.
817   Variable->addIncoming(NextVar, LoopEndBB);
818 
819   // Restore the unshadowed variable.
820   if (OldVal)
821     NamedValues[VarName] = OldVal;
822   else
823     NamedValues.erase(VarName);
824 
825   // for expr always returns 0.0.
826   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
827 }
828 
Codegen()829 Function *PrototypeAST::Codegen() {
830   // Make the function type:  double(double,double) etc.
831   std::vector<Type *> Doubles(Args.size(),
832                               Type::getDoubleTy(getGlobalContext()));
833   FunctionType *FT =
834       FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
835 
836   Function *F =
837       Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
838 
839   // If F conflicted, there was already something named 'Name'.  If it has a
840   // body, don't allow redefinition or reextern.
841   if (F->getName() != Name) {
842     // Delete the one we just made and get the existing one.
843     F->eraseFromParent();
844     F = TheModule->getFunction(Name);
845 
846     // If F already has a body, reject this.
847     if (!F->empty()) {
848       ErrorF("redefinition of function");
849       return 0;
850     }
851 
852     // If F took a different number of args, reject.
853     if (F->arg_size() != Args.size()) {
854       ErrorF("redefinition of function with different # args");
855       return 0;
856     }
857   }
858 
859   // Set names for all arguments.
860   unsigned Idx = 0;
861   for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
862        ++AI, ++Idx) {
863     AI->setName(Args[Idx]);
864 
865     // Add arguments to variable symbol table.
866     NamedValues[Args[Idx]] = AI;
867   }
868 
869   return F;
870 }
871 
Codegen()872 Function *FunctionAST::Codegen() {
873   NamedValues.clear();
874 
875   Function *TheFunction = Proto->Codegen();
876   if (TheFunction == 0)
877     return 0;
878 
879   // If this is an operator, install it.
880   if (Proto->isBinaryOp())
881     BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
882 
883   // Create a new basic block to start insertion into.
884   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
885   Builder.SetInsertPoint(BB);
886 
887   if (Value *RetVal = Body->Codegen()) {
888     // Finish off the function.
889     Builder.CreateRet(RetVal);
890 
891     // Validate the generated code, checking for consistency.
892     verifyFunction(*TheFunction);
893 
894     // Optimize the function.
895     TheFPM->run(*TheFunction);
896 
897     return TheFunction;
898   }
899 
900   // Error reading body, remove function.
901   TheFunction->eraseFromParent();
902 
903   if (Proto->isBinaryOp())
904     BinopPrecedence.erase(Proto->getOperatorName());
905   return 0;
906 }
907 
908 //===----------------------------------------------------------------------===//
909 // Top-Level parsing and JIT Driver
910 //===----------------------------------------------------------------------===//
911 
912 static ExecutionEngine *TheExecutionEngine;
913 
HandleDefinition()914 static void HandleDefinition() {
915   if (FunctionAST *F = ParseDefinition()) {
916     if (Function *LF = F->Codegen()) {
917       fprintf(stderr, "Read function definition:");
918       LF->dump();
919     }
920   } else {
921     // Skip token for error recovery.
922     getNextToken();
923   }
924 }
925 
HandleExtern()926 static void HandleExtern() {
927   if (PrototypeAST *P = ParseExtern()) {
928     if (Function *F = P->Codegen()) {
929       fprintf(stderr, "Read extern: ");
930       F->dump();
931     }
932   } else {
933     // Skip token for error recovery.
934     getNextToken();
935   }
936 }
937 
HandleTopLevelExpression()938 static void HandleTopLevelExpression() {
939   // Evaluate a top-level expression into an anonymous function.
940   if (FunctionAST *F = ParseTopLevelExpr()) {
941     if (Function *LF = F->Codegen()) {
942       TheExecutionEngine->finalizeObject();
943       // JIT the function, returning a function pointer.
944       void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
945 
946       // Cast it to the right type (takes no arguments, returns a double) so we
947       // can call it as a native function.
948       double (*FP)() = (double (*)())(intptr_t)FPtr;
949       fprintf(stderr, "Evaluated to %f\n", FP());
950     }
951   } else {
952     // Skip token for error recovery.
953     getNextToken();
954   }
955 }
956 
957 /// top ::= definition | external | expression | ';'
MainLoop()958 static void MainLoop() {
959   while (1) {
960     fprintf(stderr, "ready> ");
961     switch (CurTok) {
962     case tok_eof:
963       return;
964     case ';':
965       getNextToken();
966       break; // ignore top-level semicolons.
967     case tok_def:
968       HandleDefinition();
969       break;
970     case tok_extern:
971       HandleExtern();
972       break;
973     default:
974       HandleTopLevelExpression();
975       break;
976     }
977   }
978 }
979 
980 //===----------------------------------------------------------------------===//
981 // "Library" functions that can be "extern'd" from user code.
982 //===----------------------------------------------------------------------===//
983 
984 /// putchard - putchar that takes a double and returns 0.
putchard(double X)985 extern "C" double putchard(double X) {
986   putchar((char)X);
987   return 0;
988 }
989 
990 /// printd - printf that takes a double prints it as "%f\n", returning 0.
printd(double X)991 extern "C" double printd(double X) {
992   printf("%f\n", X);
993   return 0;
994 }
995 
996 //===----------------------------------------------------------------------===//
997 // Main driver code.
998 //===----------------------------------------------------------------------===//
999 
main()1000 int main() {
1001   InitializeNativeTarget();
1002   InitializeNativeTargetAsmPrinter();
1003   InitializeNativeTargetAsmParser();
1004   LLVMContext &Context = getGlobalContext();
1005 
1006   // Install standard binary operators.
1007   // 1 is lowest precedence.
1008   BinopPrecedence['<'] = 10;
1009   BinopPrecedence['+'] = 20;
1010   BinopPrecedence['-'] = 20;
1011   BinopPrecedence['*'] = 40; // highest.
1012 
1013   // Prime the first token.
1014   fprintf(stderr, "ready> ");
1015   getNextToken();
1016 
1017   // Make the module, which holds all the code.
1018   std::unique_ptr<Module> Owner = make_unique<Module>("my cool jit", Context);
1019   TheModule = Owner.get();
1020 
1021   // Create the JIT.  This takes ownership of the module.
1022   std::string ErrStr;
1023   TheExecutionEngine =
1024       EngineBuilder(std::move(Owner))
1025           .setErrorStr(&ErrStr)
1026           .setMCJITMemoryManager(llvm::make_unique<SectionMemoryManager>())
1027           .create();
1028   if (!TheExecutionEngine) {
1029     fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
1030     exit(1);
1031   }
1032 
1033   legacy::FunctionPassManager OurFPM(TheModule);
1034 
1035   // Set up the optimizer pipeline.  Start with registering info about how the
1036   // target lays out data structures.
1037   TheModule->setDataLayout(*TheExecutionEngine->getDataLayout());
1038   // Provide basic AliasAnalysis support for GVN.
1039   OurFPM.add(createBasicAliasAnalysisPass());
1040   // Do simple "peephole" optimizations and bit-twiddling optzns.
1041   OurFPM.add(createInstructionCombiningPass());
1042   // Reassociate expressions.
1043   OurFPM.add(createReassociatePass());
1044   // Eliminate Common SubExpressions.
1045   OurFPM.add(createGVNPass());
1046   // Simplify the control flow graph (deleting unreachable blocks, etc).
1047   OurFPM.add(createCFGSimplificationPass());
1048 
1049   OurFPM.doInitialization();
1050 
1051   // Set the global so the code gen can use this.
1052   TheFPM = &OurFPM;
1053 
1054   // Run the main "interpreter loop" now.
1055   MainLoop();
1056 
1057   TheFPM = 0;
1058 
1059   // Print out all of the generated code.
1060   TheModule->dump();
1061 
1062   return 0;
1063 }
1064