1 #include "llvm/ADT/STLExtras.h"
2 #include "llvm/Analysis/Passes.h"
3 #include "llvm/IR/IRBuilder.h"
4 #include "llvm/IR/LLVMContext.h"
5 #include "llvm/IR/LegacyPassManager.h"
6 #include "llvm/IR/Module.h"
7 #include "llvm/IR/Verifier.h"
8 #include "llvm/Support/TargetSelect.h"
9 #include "llvm/Transforms/Scalar.h"
10 #include <cctype>
11 #include <cstdio>
12 #include <map>
13 #include <string>
14 #include <vector>
15 #include "../include/KaleidoscopeJIT.h"
16
17 using namespace llvm;
18 using namespace llvm::orc;
19
20 //===----------------------------------------------------------------------===//
21 // Lexer
22 //===----------------------------------------------------------------------===//
23
24 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
25 // of these for known things.
26 enum Token {
27 tok_eof = -1,
28
29 // commands
30 tok_def = -2,
31 tok_extern = -3,
32
33 // primary
34 tok_identifier = -4,
35 tok_number = -5,
36
37 // control
38 tok_if = -6,
39 tok_then = -7,
40 tok_else = -8,
41 tok_for = -9,
42 tok_in = -10,
43
44 // operators
45 tok_binary = -11,
46 tok_unary = -12,
47
48 // var definition
49 tok_var = -13
50 };
51
52 static std::string IdentifierStr; // Filled in if tok_identifier
53 static double NumVal; // Filled in if tok_number
54
55 /// gettok - Return the next token from standard input.
gettok()56 static int gettok() {
57 static int LastChar = ' ';
58
59 // Skip any whitespace.
60 while (isspace(LastChar))
61 LastChar = getchar();
62
63 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
64 IdentifierStr = LastChar;
65 while (isalnum((LastChar = getchar())))
66 IdentifierStr += LastChar;
67
68 if (IdentifierStr == "def")
69 return tok_def;
70 if (IdentifierStr == "extern")
71 return tok_extern;
72 if (IdentifierStr == "if")
73 return tok_if;
74 if (IdentifierStr == "then")
75 return tok_then;
76 if (IdentifierStr == "else")
77 return tok_else;
78 if (IdentifierStr == "for")
79 return tok_for;
80 if (IdentifierStr == "in")
81 return tok_in;
82 if (IdentifierStr == "binary")
83 return tok_binary;
84 if (IdentifierStr == "unary")
85 return tok_unary;
86 if (IdentifierStr == "var")
87 return tok_var;
88 return tok_identifier;
89 }
90
91 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
92 std::string NumStr;
93 do {
94 NumStr += LastChar;
95 LastChar = getchar();
96 } while (isdigit(LastChar) || LastChar == '.');
97
98 NumVal = strtod(NumStr.c_str(), nullptr);
99 return tok_number;
100 }
101
102 if (LastChar == '#') {
103 // Comment until end of line.
104 do
105 LastChar = getchar();
106 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
107
108 if (LastChar != EOF)
109 return gettok();
110 }
111
112 // Check for end of file. Don't eat the EOF.
113 if (LastChar == EOF)
114 return tok_eof;
115
116 // Otherwise, just return the character as its ascii value.
117 int ThisChar = LastChar;
118 LastChar = getchar();
119 return ThisChar;
120 }
121
122 //===----------------------------------------------------------------------===//
123 // Abstract Syntax Tree (aka Parse Tree)
124 //===----------------------------------------------------------------------===//
125 namespace {
126 /// ExprAST - Base class for all expression nodes.
127 class ExprAST {
128 public:
~ExprAST()129 virtual ~ExprAST() {}
130 virtual Value *codegen() = 0;
131 };
132
133 /// NumberExprAST - Expression class for numeric literals like "1.0".
134 class NumberExprAST : public ExprAST {
135 double Val;
136
137 public:
NumberExprAST(double Val)138 NumberExprAST(double Val) : Val(Val) {}
139 Value *codegen() override;
140 };
141
142 /// VariableExprAST - Expression class for referencing a variable, like "a".
143 class VariableExprAST : public ExprAST {
144 std::string Name;
145
146 public:
VariableExprAST(const std::string & Name)147 VariableExprAST(const std::string &Name) : Name(Name) {}
getName() const148 const std::string &getName() const { return Name; }
149 Value *codegen() override;
150 };
151
152 /// UnaryExprAST - Expression class for a unary operator.
153 class UnaryExprAST : public ExprAST {
154 char Opcode;
155 std::unique_ptr<ExprAST> Operand;
156
157 public:
UnaryExprAST(char Opcode,std::unique_ptr<ExprAST> Operand)158 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
159 : Opcode(Opcode), Operand(std::move(Operand)) {}
160 Value *codegen() override;
161 };
162
163 /// BinaryExprAST - Expression class for a binary operator.
164 class BinaryExprAST : public ExprAST {
165 char Op;
166 std::unique_ptr<ExprAST> LHS, RHS;
167
168 public:
BinaryExprAST(char Op,std::unique_ptr<ExprAST> LHS,std::unique_ptr<ExprAST> RHS)169 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
170 std::unique_ptr<ExprAST> RHS)
171 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
172 Value *codegen() override;
173 };
174
175 /// CallExprAST - Expression class for function calls.
176 class CallExprAST : public ExprAST {
177 std::string Callee;
178 std::vector<std::unique_ptr<ExprAST>> Args;
179
180 public:
CallExprAST(const std::string & Callee,std::vector<std::unique_ptr<ExprAST>> Args)181 CallExprAST(const std::string &Callee,
182 std::vector<std::unique_ptr<ExprAST>> Args)
183 : Callee(Callee), Args(std::move(Args)) {}
184 Value *codegen() override;
185 };
186
187 /// IfExprAST - Expression class for if/then/else.
188 class IfExprAST : public ExprAST {
189 std::unique_ptr<ExprAST> Cond, Then, Else;
190
191 public:
IfExprAST(std::unique_ptr<ExprAST> Cond,std::unique_ptr<ExprAST> Then,std::unique_ptr<ExprAST> Else)192 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
193 std::unique_ptr<ExprAST> Else)
194 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
195 Value *codegen() override;
196 };
197
198 /// ForExprAST - Expression class for for/in.
199 class ForExprAST : public ExprAST {
200 std::string VarName;
201 std::unique_ptr<ExprAST> Start, End, Step, Body;
202
203 public:
ForExprAST(const std::string & VarName,std::unique_ptr<ExprAST> Start,std::unique_ptr<ExprAST> End,std::unique_ptr<ExprAST> Step,std::unique_ptr<ExprAST> Body)204 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
205 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
206 std::unique_ptr<ExprAST> Body)
207 : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
208 Step(std::move(Step)), Body(std::move(Body)) {}
209 Value *codegen() override;
210 };
211
212 /// VarExprAST - Expression class for var/in
213 class VarExprAST : public ExprAST {
214 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
215 std::unique_ptr<ExprAST> Body;
216
217 public:
VarExprAST(std::vector<std::pair<std::string,std::unique_ptr<ExprAST>>> VarNames,std::unique_ptr<ExprAST> Body)218 VarExprAST(
219 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
220 std::unique_ptr<ExprAST> Body)
221 : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
222 Value *codegen() override;
223 };
224
225 /// PrototypeAST - This class represents the "prototype" for a function,
226 /// which captures its name, and its argument names (thus implicitly the number
227 /// of arguments the function takes), as well as if it is an operator.
228 class PrototypeAST {
229 std::string Name;
230 std::vector<std::string> Args;
231 bool IsOperator;
232 unsigned Precedence; // Precedence if a binary op.
233
234 public:
PrototypeAST(const std::string & Name,std::vector<std::string> Args,bool IsOperator=false,unsigned Prec=0)235 PrototypeAST(const std::string &Name, std::vector<std::string> Args,
236 bool IsOperator = false, unsigned Prec = 0)
237 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
238 Precedence(Prec) {}
239 Function *codegen();
getName() const240 const std::string &getName() const { return Name; }
241
isUnaryOp() const242 bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
isBinaryOp() const243 bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
244
getOperatorName() const245 char getOperatorName() const {
246 assert(isUnaryOp() || isBinaryOp());
247 return Name[Name.size() - 1];
248 }
249
getBinaryPrecedence() const250 unsigned getBinaryPrecedence() const { return Precedence; }
251 };
252
253 /// FunctionAST - This class represents a function definition itself.
254 class FunctionAST {
255 std::unique_ptr<PrototypeAST> Proto;
256 std::unique_ptr<ExprAST> Body;
257
258 public:
FunctionAST(std::unique_ptr<PrototypeAST> Proto,std::unique_ptr<ExprAST> Body)259 FunctionAST(std::unique_ptr<PrototypeAST> Proto,
260 std::unique_ptr<ExprAST> Body)
261 : Proto(std::move(Proto)), Body(std::move(Body)) {}
262 Function *codegen();
263 };
264 } // end anonymous namespace
265
266 //===----------------------------------------------------------------------===//
267 // Parser
268 //===----------------------------------------------------------------------===//
269
270 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
271 /// token the parser is looking at. getNextToken reads another token from the
272 /// lexer and updates CurTok with its results.
273 static int CurTok;
getNextToken()274 static int getNextToken() { return CurTok = gettok(); }
275
276 /// BinopPrecedence - This holds the precedence for each binary operator that is
277 /// defined.
278 static std::map<char, int> BinopPrecedence;
279
280 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
GetTokPrecedence()281 static int GetTokPrecedence() {
282 if (!isascii(CurTok))
283 return -1;
284
285 // Make sure it's a declared binop.
286 int TokPrec = BinopPrecedence[CurTok];
287 if (TokPrec <= 0)
288 return -1;
289 return TokPrec;
290 }
291
292 /// Error* - These are little helper functions for error handling.
Error(const char * Str)293 std::unique_ptr<ExprAST> Error(const char *Str) {
294 fprintf(stderr, "Error: %s\n", Str);
295 return nullptr;
296 }
297
ErrorP(const char * Str)298 std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
299 Error(Str);
300 return nullptr;
301 }
302
303 static std::unique_ptr<ExprAST> ParseExpression();
304
305 /// numberexpr ::= number
ParseNumberExpr()306 static std::unique_ptr<ExprAST> ParseNumberExpr() {
307 auto Result = llvm::make_unique<NumberExprAST>(NumVal);
308 getNextToken(); // consume the number
309 return std::move(Result);
310 }
311
312 /// parenexpr ::= '(' expression ')'
ParseParenExpr()313 static std::unique_ptr<ExprAST> ParseParenExpr() {
314 getNextToken(); // eat (.
315 auto V = ParseExpression();
316 if (!V)
317 return nullptr;
318
319 if (CurTok != ')')
320 return Error("expected ')'");
321 getNextToken(); // eat ).
322 return V;
323 }
324
325 /// identifierexpr
326 /// ::= identifier
327 /// ::= identifier '(' expression* ')'
ParseIdentifierExpr()328 static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
329 std::string IdName = IdentifierStr;
330
331 getNextToken(); // eat identifier.
332
333 if (CurTok != '(') // Simple variable ref.
334 return llvm::make_unique<VariableExprAST>(IdName);
335
336 // Call.
337 getNextToken(); // eat (
338 std::vector<std::unique_ptr<ExprAST>> Args;
339 if (CurTok != ')') {
340 while (1) {
341 if (auto Arg = ParseExpression())
342 Args.push_back(std::move(Arg));
343 else
344 return nullptr;
345
346 if (CurTok == ')')
347 break;
348
349 if (CurTok != ',')
350 return Error("Expected ')' or ',' in argument list");
351 getNextToken();
352 }
353 }
354
355 // Eat the ')'.
356 getNextToken();
357
358 return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
359 }
360
361 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
ParseIfExpr()362 static std::unique_ptr<ExprAST> ParseIfExpr() {
363 getNextToken(); // eat the if.
364
365 // condition.
366 auto Cond = ParseExpression();
367 if (!Cond)
368 return nullptr;
369
370 if (CurTok != tok_then)
371 return Error("expected then");
372 getNextToken(); // eat the then
373
374 auto Then = ParseExpression();
375 if (!Then)
376 return nullptr;
377
378 if (CurTok != tok_else)
379 return Error("expected else");
380
381 getNextToken();
382
383 auto Else = ParseExpression();
384 if (!Else)
385 return nullptr;
386
387 return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
388 std::move(Else));
389 }
390
391 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
ParseForExpr()392 static std::unique_ptr<ExprAST> ParseForExpr() {
393 getNextToken(); // eat the for.
394
395 if (CurTok != tok_identifier)
396 return Error("expected identifier after for");
397
398 std::string IdName = IdentifierStr;
399 getNextToken(); // eat identifier.
400
401 if (CurTok != '=')
402 return Error("expected '=' after for");
403 getNextToken(); // eat '='.
404
405 auto Start = ParseExpression();
406 if (!Start)
407 return nullptr;
408 if (CurTok != ',')
409 return Error("expected ',' after for start value");
410 getNextToken();
411
412 auto End = ParseExpression();
413 if (!End)
414 return nullptr;
415
416 // The step value is optional.
417 std::unique_ptr<ExprAST> Step;
418 if (CurTok == ',') {
419 getNextToken();
420 Step = ParseExpression();
421 if (!Step)
422 return nullptr;
423 }
424
425 if (CurTok != tok_in)
426 return Error("expected 'in' after for");
427 getNextToken(); // eat 'in'.
428
429 auto Body = ParseExpression();
430 if (!Body)
431 return nullptr;
432
433 return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
434 std::move(Step), std::move(Body));
435 }
436
437 /// varexpr ::= 'var' identifier ('=' expression)?
438 // (',' identifier ('=' expression)?)* 'in' expression
ParseVarExpr()439 static std::unique_ptr<ExprAST> ParseVarExpr() {
440 getNextToken(); // eat the var.
441
442 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
443
444 // At least one variable name is required.
445 if (CurTok != tok_identifier)
446 return Error("expected identifier after var");
447
448 while (1) {
449 std::string Name = IdentifierStr;
450 getNextToken(); // eat identifier.
451
452 // Read the optional initializer.
453 std::unique_ptr<ExprAST> Init = nullptr;
454 if (CurTok == '=') {
455 getNextToken(); // eat the '='.
456
457 Init = ParseExpression();
458 if (!Init)
459 return nullptr;
460 }
461
462 VarNames.push_back(std::make_pair(Name, std::move(Init)));
463
464 // End of var list, exit loop.
465 if (CurTok != ',')
466 break;
467 getNextToken(); // eat the ','.
468
469 if (CurTok != tok_identifier)
470 return Error("expected identifier list after var");
471 }
472
473 // At this point, we have to have 'in'.
474 if (CurTok != tok_in)
475 return Error("expected 'in' keyword after 'var'");
476 getNextToken(); // eat 'in'.
477
478 auto Body = ParseExpression();
479 if (!Body)
480 return nullptr;
481
482 return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
483 }
484
485 /// primary
486 /// ::= identifierexpr
487 /// ::= numberexpr
488 /// ::= parenexpr
489 /// ::= ifexpr
490 /// ::= forexpr
491 /// ::= varexpr
ParsePrimary()492 static std::unique_ptr<ExprAST> ParsePrimary() {
493 switch (CurTok) {
494 default:
495 return Error("unknown token when expecting an expression");
496 case tok_identifier:
497 return ParseIdentifierExpr();
498 case tok_number:
499 return ParseNumberExpr();
500 case '(':
501 return ParseParenExpr();
502 case tok_if:
503 return ParseIfExpr();
504 case tok_for:
505 return ParseForExpr();
506 case tok_var:
507 return ParseVarExpr();
508 }
509 }
510
511 /// unary
512 /// ::= primary
513 /// ::= '!' unary
ParseUnary()514 static std::unique_ptr<ExprAST> ParseUnary() {
515 // If the current token is not an operator, it must be a primary expr.
516 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
517 return ParsePrimary();
518
519 // If this is a unary operator, read it.
520 int Opc = CurTok;
521 getNextToken();
522 if (auto Operand = ParseUnary())
523 return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
524 return nullptr;
525 }
526
527 /// binoprhs
528 /// ::= ('+' unary)*
ParseBinOpRHS(int ExprPrec,std::unique_ptr<ExprAST> LHS)529 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
530 std::unique_ptr<ExprAST> LHS) {
531 // If this is a binop, find its precedence.
532 while (1) {
533 int TokPrec = GetTokPrecedence();
534
535 // If this is a binop that binds at least as tightly as the current binop,
536 // consume it, otherwise we are done.
537 if (TokPrec < ExprPrec)
538 return LHS;
539
540 // Okay, we know this is a binop.
541 int BinOp = CurTok;
542 getNextToken(); // eat binop
543
544 // Parse the unary expression after the binary operator.
545 auto RHS = ParseUnary();
546 if (!RHS)
547 return nullptr;
548
549 // If BinOp binds less tightly with RHS than the operator after RHS, let
550 // the pending operator take RHS as its LHS.
551 int NextPrec = GetTokPrecedence();
552 if (TokPrec < NextPrec) {
553 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
554 if (!RHS)
555 return nullptr;
556 }
557
558 // Merge LHS/RHS.
559 LHS =
560 llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
561 }
562 }
563
564 /// expression
565 /// ::= unary binoprhs
566 ///
ParseExpression()567 static std::unique_ptr<ExprAST> ParseExpression() {
568 auto LHS = ParseUnary();
569 if (!LHS)
570 return nullptr;
571
572 return ParseBinOpRHS(0, std::move(LHS));
573 }
574
575 /// prototype
576 /// ::= id '(' id* ')'
577 /// ::= binary LETTER number? (id, id)
578 /// ::= unary LETTER (id)
ParsePrototype()579 static std::unique_ptr<PrototypeAST> ParsePrototype() {
580 std::string FnName;
581
582 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
583 unsigned BinaryPrecedence = 30;
584
585 switch (CurTok) {
586 default:
587 return ErrorP("Expected function name in prototype");
588 case tok_identifier:
589 FnName = IdentifierStr;
590 Kind = 0;
591 getNextToken();
592 break;
593 case tok_unary:
594 getNextToken();
595 if (!isascii(CurTok))
596 return ErrorP("Expected unary operator");
597 FnName = "unary";
598 FnName += (char)CurTok;
599 Kind = 1;
600 getNextToken();
601 break;
602 case tok_binary:
603 getNextToken();
604 if (!isascii(CurTok))
605 return ErrorP("Expected binary operator");
606 FnName = "binary";
607 FnName += (char)CurTok;
608 Kind = 2;
609 getNextToken();
610
611 // Read the precedence if present.
612 if (CurTok == tok_number) {
613 if (NumVal < 1 || NumVal > 100)
614 return ErrorP("Invalid precedecnce: must be 1..100");
615 BinaryPrecedence = (unsigned)NumVal;
616 getNextToken();
617 }
618 break;
619 }
620
621 if (CurTok != '(')
622 return ErrorP("Expected '(' in prototype");
623
624 std::vector<std::string> ArgNames;
625 while (getNextToken() == tok_identifier)
626 ArgNames.push_back(IdentifierStr);
627 if (CurTok != ')')
628 return ErrorP("Expected ')' in prototype");
629
630 // success.
631 getNextToken(); // eat ')'.
632
633 // Verify right number of names for operator.
634 if (Kind && ArgNames.size() != Kind)
635 return ErrorP("Invalid number of operands for operator");
636
637 return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
638 BinaryPrecedence);
639 }
640
641 /// definition ::= 'def' prototype expression
ParseDefinition()642 static std::unique_ptr<FunctionAST> ParseDefinition() {
643 getNextToken(); // eat def.
644 auto Proto = ParsePrototype();
645 if (!Proto)
646 return nullptr;
647
648 if (auto E = ParseExpression())
649 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
650 return nullptr;
651 }
652
653 /// toplevelexpr ::= expression
ParseTopLevelExpr()654 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
655 if (auto E = ParseExpression()) {
656 // Make an anonymous proto.
657 auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
658 std::vector<std::string>());
659 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
660 }
661 return nullptr;
662 }
663
664 /// external ::= 'extern' prototype
ParseExtern()665 static std::unique_ptr<PrototypeAST> ParseExtern() {
666 getNextToken(); // eat extern.
667 return ParsePrototype();
668 }
669
670 //===----------------------------------------------------------------------===//
671 // Code Generation
672 //===----------------------------------------------------------------------===//
673
674 static std::unique_ptr<Module> TheModule;
675 static IRBuilder<> Builder(getGlobalContext());
676 static std::map<std::string, AllocaInst *> NamedValues;
677 static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
678 static std::unique_ptr<KaleidoscopeJIT> TheJIT;
679 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
680
ErrorV(const char * Str)681 Value *ErrorV(const char *Str) {
682 Error(Str);
683 return nullptr;
684 }
685
getFunction(std::string Name)686 Function *getFunction(std::string Name) {
687 // First, see if the function has already been added to the current module.
688 if (auto *F = TheModule->getFunction(Name))
689 return F;
690
691 // If not, check whether we can codegen the declaration from some existing
692 // prototype.
693 auto FI = FunctionProtos.find(Name);
694 if (FI != FunctionProtos.end())
695 return FI->second->codegen();
696
697 // If no existing prototype exists, return null.
698 return nullptr;
699 }
700
701 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
702 /// the function. This is used for mutable variables etc.
CreateEntryBlockAlloca(Function * TheFunction,const std::string & VarName)703 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
704 const std::string &VarName) {
705 IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
706 TheFunction->getEntryBlock().begin());
707 return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr,
708 VarName.c_str());
709 }
710
codegen()711 Value *NumberExprAST::codegen() {
712 return ConstantFP::get(getGlobalContext(), APFloat(Val));
713 }
714
codegen()715 Value *VariableExprAST::codegen() {
716 // Look this variable up in the function.
717 Value *V = NamedValues[Name];
718 if (!V)
719 return ErrorV("Unknown variable name");
720
721 // Load the value.
722 return Builder.CreateLoad(V, Name.c_str());
723 }
724
codegen()725 Value *UnaryExprAST::codegen() {
726 Value *OperandV = Operand->codegen();
727 if (!OperandV)
728 return nullptr;
729
730 Function *F = getFunction(std::string("unary") + Opcode);
731 if (!F)
732 return ErrorV("Unknown unary operator");
733
734 return Builder.CreateCall(F, OperandV, "unop");
735 }
736
codegen()737 Value *BinaryExprAST::codegen() {
738 // Special case '=' because we don't want to emit the LHS as an expression.
739 if (Op == '=') {
740 // Assignment requires the LHS to be an identifier.
741 // This assume we're building without RTTI because LLVM builds that way by
742 // default. If you build LLVM with RTTI this can be changed to a
743 // dynamic_cast for automatic error checking.
744 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
745 if (!LHSE)
746 return ErrorV("destination of '=' must be a variable");
747 // Codegen the RHS.
748 Value *Val = RHS->codegen();
749 if (!Val)
750 return nullptr;
751
752 // Look up the name.
753 Value *Variable = NamedValues[LHSE->getName()];
754 if (!Variable)
755 return ErrorV("Unknown variable name");
756
757 Builder.CreateStore(Val, Variable);
758 return Val;
759 }
760
761 Value *L = LHS->codegen();
762 Value *R = RHS->codegen();
763 if (!L || !R)
764 return nullptr;
765
766 switch (Op) {
767 case '+':
768 return Builder.CreateFAdd(L, R, "addtmp");
769 case '-':
770 return Builder.CreateFSub(L, R, "subtmp");
771 case '*':
772 return Builder.CreateFMul(L, R, "multmp");
773 case '<':
774 L = Builder.CreateFCmpULT(L, R, "cmptmp");
775 // Convert bool 0/1 to double 0.0 or 1.0
776 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
777 "booltmp");
778 default:
779 break;
780 }
781
782 // If it wasn't a builtin binary operator, it must be a user defined one. Emit
783 // a call to it.
784 Function *F = getFunction(std::string("binary") + Op);
785 assert(F && "binary operator not found!");
786
787 Value *Ops[] = {L, R};
788 return Builder.CreateCall(F, Ops, "binop");
789 }
790
codegen()791 Value *CallExprAST::codegen() {
792 // Look up the name in the global module table.
793 Function *CalleeF = getFunction(Callee);
794 if (!CalleeF)
795 return ErrorV("Unknown function referenced");
796
797 // If argument mismatch error.
798 if (CalleeF->arg_size() != Args.size())
799 return ErrorV("Incorrect # arguments passed");
800
801 std::vector<Value *> ArgsV;
802 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
803 ArgsV.push_back(Args[i]->codegen());
804 if (!ArgsV.back())
805 return nullptr;
806 }
807
808 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
809 }
810
codegen()811 Value *IfExprAST::codegen() {
812 Value *CondV = Cond->codegen();
813 if (!CondV)
814 return nullptr;
815
816 // Convert condition to a bool by comparing equal to 0.0.
817 CondV = Builder.CreateFCmpONE(
818 CondV, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "ifcond");
819
820 Function *TheFunction = Builder.GetInsertBlock()->getParent();
821
822 // Create blocks for the then and else cases. Insert the 'then' block at the
823 // end of the function.
824 BasicBlock *ThenBB =
825 BasicBlock::Create(getGlobalContext(), "then", TheFunction);
826 BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
827 BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
828
829 Builder.CreateCondBr(CondV, ThenBB, ElseBB);
830
831 // Emit then value.
832 Builder.SetInsertPoint(ThenBB);
833
834 Value *ThenV = Then->codegen();
835 if (!ThenV)
836 return nullptr;
837
838 Builder.CreateBr(MergeBB);
839 // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
840 ThenBB = Builder.GetInsertBlock();
841
842 // Emit else block.
843 TheFunction->getBasicBlockList().push_back(ElseBB);
844 Builder.SetInsertPoint(ElseBB);
845
846 Value *ElseV = Else->codegen();
847 if (!ElseV)
848 return nullptr;
849
850 Builder.CreateBr(MergeBB);
851 // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
852 ElseBB = Builder.GetInsertBlock();
853
854 // Emit merge block.
855 TheFunction->getBasicBlockList().push_back(MergeBB);
856 Builder.SetInsertPoint(MergeBB);
857 PHINode *PN =
858 Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp");
859
860 PN->addIncoming(ThenV, ThenBB);
861 PN->addIncoming(ElseV, ElseBB);
862 return PN;
863 }
864
865 // Output for-loop as:
866 // var = alloca double
867 // ...
868 // start = startexpr
869 // store start -> var
870 // goto loop
871 // loop:
872 // ...
873 // bodyexpr
874 // ...
875 // loopend:
876 // step = stepexpr
877 // endcond = endexpr
878 //
879 // curvar = load var
880 // nextvar = curvar + step
881 // store nextvar -> var
882 // br endcond, loop, endloop
883 // outloop:
codegen()884 Value *ForExprAST::codegen() {
885 Function *TheFunction = Builder.GetInsertBlock()->getParent();
886
887 // Create an alloca for the variable in the entry block.
888 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
889
890 // Emit the start code first, without 'variable' in scope.
891 Value *StartVal = Start->codegen();
892 if (!StartVal)
893 return nullptr;
894
895 // Store the value into the alloca.
896 Builder.CreateStore(StartVal, Alloca);
897
898 // Make the new basic block for the loop header, inserting after current
899 // block.
900 BasicBlock *LoopBB =
901 BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
902
903 // Insert an explicit fall through from the current block to the LoopBB.
904 Builder.CreateBr(LoopBB);
905
906 // Start insertion in LoopBB.
907 Builder.SetInsertPoint(LoopBB);
908
909 // Within the loop, the variable is defined equal to the PHI node. If it
910 // shadows an existing variable, we have to restore it, so save it now.
911 AllocaInst *OldVal = NamedValues[VarName];
912 NamedValues[VarName] = Alloca;
913
914 // Emit the body of the loop. This, like any other expr, can change the
915 // current BB. Note that we ignore the value computed by the body, but don't
916 // allow an error.
917 if (!Body->codegen())
918 return nullptr;
919
920 // Emit the step value.
921 Value *StepVal = nullptr;
922 if (Step) {
923 StepVal = Step->codegen();
924 if (!StepVal)
925 return nullptr;
926 } else {
927 // If not specified, use 1.0.
928 StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
929 }
930
931 // Compute the end condition.
932 Value *EndCond = End->codegen();
933 if (!EndCond)
934 return nullptr;
935
936 // Reload, increment, and restore the alloca. This handles the case where
937 // the body of the loop mutates the variable.
938 Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
939 Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
940 Builder.CreateStore(NextVar, Alloca);
941
942 // Convert condition to a bool by comparing equal to 0.0.
943 EndCond = Builder.CreateFCmpONE(
944 EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond");
945
946 // Create the "after loop" block and insert it.
947 BasicBlock *AfterBB =
948 BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
949
950 // Insert the conditional branch into the end of LoopEndBB.
951 Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
952
953 // Any new code will be inserted in AfterBB.
954 Builder.SetInsertPoint(AfterBB);
955
956 // Restore the unshadowed variable.
957 if (OldVal)
958 NamedValues[VarName] = OldVal;
959 else
960 NamedValues.erase(VarName);
961
962 // for expr always returns 0.0.
963 return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
964 }
965
codegen()966 Value *VarExprAST::codegen() {
967 std::vector<AllocaInst *> OldBindings;
968
969 Function *TheFunction = Builder.GetInsertBlock()->getParent();
970
971 // Register all variables and emit their initializer.
972 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
973 const std::string &VarName = VarNames[i].first;
974 ExprAST *Init = VarNames[i].second.get();
975
976 // Emit the initializer before adding the variable to scope, this prevents
977 // the initializer from referencing the variable itself, and permits stuff
978 // like this:
979 // var a = 1 in
980 // var a = a in ... # refers to outer 'a'.
981 Value *InitVal;
982 if (Init) {
983 InitVal = Init->codegen();
984 if (!InitVal)
985 return nullptr;
986 } else { // If not specified, use 0.0.
987 InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
988 }
989
990 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
991 Builder.CreateStore(InitVal, Alloca);
992
993 // Remember the old variable binding so that we can restore the binding when
994 // we unrecurse.
995 OldBindings.push_back(NamedValues[VarName]);
996
997 // Remember this binding.
998 NamedValues[VarName] = Alloca;
999 }
1000
1001 // Codegen the body, now that all vars are in scope.
1002 Value *BodyVal = Body->codegen();
1003 if (!BodyVal)
1004 return nullptr;
1005
1006 // Pop all our variables from scope.
1007 for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
1008 NamedValues[VarNames[i].first] = OldBindings[i];
1009
1010 // Return the body computation.
1011 return BodyVal;
1012 }
1013
codegen()1014 Function *PrototypeAST::codegen() {
1015 // Make the function type: double(double,double) etc.
1016 std::vector<Type *> Doubles(Args.size(),
1017 Type::getDoubleTy(getGlobalContext()));
1018 FunctionType *FT =
1019 FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
1020
1021 Function *F =
1022 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
1023
1024 // Set names for all arguments.
1025 unsigned Idx = 0;
1026 for (auto &Arg : F->args())
1027 Arg.setName(Args[Idx++]);
1028
1029 return F;
1030 }
1031
codegen()1032 Function *FunctionAST::codegen() {
1033 // Transfer ownership of the prototype to the FunctionProtos map, but keep a
1034 // reference to it for use below.
1035 auto &P = *Proto;
1036 FunctionProtos[Proto->getName()] = std::move(Proto);
1037 Function *TheFunction = getFunction(P.getName());
1038 if (!TheFunction)
1039 return nullptr;
1040
1041 // If this is an operator, install it.
1042 if (P.isBinaryOp())
1043 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
1044
1045 // Create a new basic block to start insertion into.
1046 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
1047 Builder.SetInsertPoint(BB);
1048
1049 // Record the function arguments in the NamedValues map.
1050 NamedValues.clear();
1051 for (auto &Arg : TheFunction->args()) {
1052 // Create an alloca for this variable.
1053 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
1054
1055 // Store the initial value into the alloca.
1056 Builder.CreateStore(&Arg, Alloca);
1057
1058 // Add arguments to variable symbol table.
1059 NamedValues[Arg.getName()] = Alloca;
1060 }
1061
1062 if (Value *RetVal = Body->codegen()) {
1063 // Finish off the function.
1064 Builder.CreateRet(RetVal);
1065
1066 // Validate the generated code, checking for consistency.
1067 verifyFunction(*TheFunction);
1068
1069 // Run the optimizer on the function.
1070 TheFPM->run(*TheFunction);
1071
1072 return TheFunction;
1073 }
1074
1075 // Error reading body, remove function.
1076 TheFunction->eraseFromParent();
1077
1078 if (P.isBinaryOp())
1079 BinopPrecedence.erase(Proto->getOperatorName());
1080 return nullptr;
1081 }
1082
1083 //===----------------------------------------------------------------------===//
1084 // Top-Level parsing and JIT Driver
1085 //===----------------------------------------------------------------------===//
1086
InitializeModuleAndPassManager()1087 static void InitializeModuleAndPassManager() {
1088 // Open a new module.
1089 TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext());
1090 TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
1091
1092 // Create a new pass manager attached to it.
1093 TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get());
1094
1095 // Do simple "peephole" optimizations and bit-twiddling optzns.
1096 TheFPM->add(createInstructionCombiningPass());
1097 // Reassociate expressions.
1098 TheFPM->add(createReassociatePass());
1099 // Eliminate Common SubExpressions.
1100 TheFPM->add(createGVNPass());
1101 // Simplify the control flow graph (deleting unreachable blocks, etc).
1102 TheFPM->add(createCFGSimplificationPass());
1103
1104 TheFPM->doInitialization();
1105 }
1106
HandleDefinition()1107 static void HandleDefinition() {
1108 if (auto FnAST = ParseDefinition()) {
1109 if (auto *FnIR = FnAST->codegen()) {
1110 fprintf(stderr, "Read function definition:");
1111 FnIR->dump();
1112 TheJIT->addModule(std::move(TheModule));
1113 InitializeModuleAndPassManager();
1114 }
1115 } else {
1116 // Skip token for error recovery.
1117 getNextToken();
1118 }
1119 }
1120
HandleExtern()1121 static void HandleExtern() {
1122 if (auto ProtoAST = ParseExtern()) {
1123 if (auto *FnIR = ProtoAST->codegen()) {
1124 fprintf(stderr, "Read extern: ");
1125 FnIR->dump();
1126 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
1127 }
1128 } else {
1129 // Skip token for error recovery.
1130 getNextToken();
1131 }
1132 }
1133
HandleTopLevelExpression()1134 static void HandleTopLevelExpression() {
1135 // Evaluate a top-level expression into an anonymous function.
1136 if (auto FnAST = ParseTopLevelExpr()) {
1137 if (FnAST->codegen()) {
1138
1139 // JIT the module containing the anonymous expression, keeping a handle so
1140 // we can free it later.
1141 auto H = TheJIT->addModule(std::move(TheModule));
1142 InitializeModuleAndPassManager();
1143
1144 // Search the JIT for the __anon_expr symbol.
1145 auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
1146 assert(ExprSymbol && "Function not found");
1147
1148 // Get the symbol's address and cast it to the right type (takes no
1149 // arguments, returns a double) so we can call it as a native function.
1150 double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
1151 fprintf(stderr, "Evaluated to %f\n", FP());
1152
1153 // Delete the anonymous expression module from the JIT.
1154 TheJIT->removeModule(H);
1155 }
1156 } else {
1157 // Skip token for error recovery.
1158 getNextToken();
1159 }
1160 }
1161
1162 /// top ::= definition | external | expression | ';'
MainLoop()1163 static void MainLoop() {
1164 while (1) {
1165 fprintf(stderr, "ready> ");
1166 switch (CurTok) {
1167 case tok_eof:
1168 return;
1169 case ';': // ignore top-level semicolons.
1170 getNextToken();
1171 break;
1172 case tok_def:
1173 HandleDefinition();
1174 break;
1175 case tok_extern:
1176 HandleExtern();
1177 break;
1178 default:
1179 HandleTopLevelExpression();
1180 break;
1181 }
1182 }
1183 }
1184
1185 //===----------------------------------------------------------------------===//
1186 // "Library" functions that can be "extern'd" from user code.
1187 //===----------------------------------------------------------------------===//
1188
1189 /// putchard - putchar that takes a double and returns 0.
putchard(double X)1190 extern "C" double putchard(double X) {
1191 fputc((char)X, stderr);
1192 return 0;
1193 }
1194
1195 /// printd - printf that takes a double prints it as "%f\n", returning 0.
printd(double X)1196 extern "C" double printd(double X) {
1197 fprintf(stderr, "%f\n", X);
1198 return 0;
1199 }
1200
1201 //===----------------------------------------------------------------------===//
1202 // Main driver code.
1203 //===----------------------------------------------------------------------===//
1204
main()1205 int main() {
1206 InitializeNativeTarget();
1207 InitializeNativeTargetAsmPrinter();
1208 InitializeNativeTargetAsmParser();
1209
1210 // Install standard binary operators.
1211 // 1 is lowest precedence.
1212 BinopPrecedence['='] = 2;
1213 BinopPrecedence['<'] = 10;
1214 BinopPrecedence['+'] = 20;
1215 BinopPrecedence['-'] = 20;
1216 BinopPrecedence['*'] = 40; // highest.
1217
1218 // Prime the first token.
1219 fprintf(stderr, "ready> ");
1220 getNextToken();
1221
1222 TheJIT = llvm::make_unique<KaleidoscopeJIT>();
1223
1224 InitializeModuleAndPassManager();
1225
1226 // Run the main "interpreter loop" now.
1227 MainLoop();
1228
1229 return 0;
1230 }
1231