1 #include "llvm/ADT/APFloat.h"
2 #include "llvm/ADT/STLExtras.h"
3 #include "llvm/IR/BasicBlock.h"
4 #include "llvm/IR/Constants.h"
5 #include "llvm/IR/DerivedTypes.h"
6 #include "llvm/IR/Function.h"
7 #include "llvm/IR/Instructions.h"
8 #include "llvm/IR/IRBuilder.h"
9 #include "llvm/IR/LLVMContext.h"
10 #include "llvm/IR/LegacyPassManager.h"
11 #include "llvm/IR/Module.h"
12 #include "llvm/IR/Type.h"
13 #include "llvm/IR/Verifier.h"
14 #include "llvm/Support/Error.h"
15 #include "llvm/Support/TargetSelect.h"
16 #include "llvm/Target/TargetMachine.h"
17 #include "llvm/Transforms/Scalar.h"
18 #include "llvm/Transforms/Scalar/GVN.h"
19 #include "KaleidoscopeJIT.h"
20 #include <cassert>
21 #include <cctype>
22 #include <cstdint>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <map>
26 #include <memory>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 #include <netdb.h>
32 #include <unistd.h>
33 #include <netinet/in.h>
34 #include <sys/socket.h>
35 
36 using namespace llvm;
37 using namespace llvm::orc;
38 
39 // Command line argument for TCP hostname.
40 cl::opt<std::string> HostName("hostname",
41                               cl::desc("TCP hostname to connect to"),
42                               cl::init("localhost"));
43 
44 // Command line argument for TCP port.
45 cl::opt<uint32_t> Port("port",
46                        cl::desc("TCP port to connect to"),
47                        cl::init(20000));
48 
49 //===----------------------------------------------------------------------===//
50 // Lexer
51 //===----------------------------------------------------------------------===//
52 
53 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
54 // of these for known things.
55 enum Token {
56   tok_eof = -1,
57 
58   // commands
59   tok_def = -2,
60   tok_extern = -3,
61 
62   // primary
63   tok_identifier = -4,
64   tok_number = -5,
65 
66   // control
67   tok_if = -6,
68   tok_then = -7,
69   tok_else = -8,
70   tok_for = -9,
71   tok_in = -10,
72 
73   // operators
74   tok_binary = -11,
75   tok_unary = -12,
76 
77   // var definition
78   tok_var = -13
79 };
80 
81 static std::string IdentifierStr; // Filled in if tok_identifier
82 static double NumVal;             // Filled in if tok_number
83 
84 /// gettok - Return the next token from standard input.
gettok()85 static int gettok() {
86   static int LastChar = ' ';
87 
88   // Skip any whitespace.
89   while (isspace(LastChar))
90     LastChar = getchar();
91 
92   if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
93     IdentifierStr = LastChar;
94     while (isalnum((LastChar = getchar())))
95       IdentifierStr += LastChar;
96 
97     if (IdentifierStr == "def")
98       return tok_def;
99     if (IdentifierStr == "extern")
100       return tok_extern;
101     if (IdentifierStr == "if")
102       return tok_if;
103     if (IdentifierStr == "then")
104       return tok_then;
105     if (IdentifierStr == "else")
106       return tok_else;
107     if (IdentifierStr == "for")
108       return tok_for;
109     if (IdentifierStr == "in")
110       return tok_in;
111     if (IdentifierStr == "binary")
112       return tok_binary;
113     if (IdentifierStr == "unary")
114       return tok_unary;
115     if (IdentifierStr == "var")
116       return tok_var;
117     return tok_identifier;
118   }
119 
120   if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
121     std::string NumStr;
122     do {
123       NumStr += LastChar;
124       LastChar = getchar();
125     } while (isdigit(LastChar) || LastChar == '.');
126 
127     NumVal = strtod(NumStr.c_str(), nullptr);
128     return tok_number;
129   }
130 
131   if (LastChar == '#') {
132     // Comment until end of line.
133     do
134       LastChar = getchar();
135     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
136 
137     if (LastChar != EOF)
138       return gettok();
139   }
140 
141   // Check for end of file.  Don't eat the EOF.
142   if (LastChar == EOF)
143     return tok_eof;
144 
145   // Otherwise, just return the character as its ascii value.
146   int ThisChar = LastChar;
147   LastChar = getchar();
148   return ThisChar;
149 }
150 
151 //===----------------------------------------------------------------------===//
152 // Abstract Syntax Tree (aka Parse Tree)
153 //===----------------------------------------------------------------------===//
154 
155 /// ExprAST - Base class for all expression nodes.
156 class ExprAST {
157 public:
~ExprAST()158   virtual ~ExprAST() {}
159   virtual Value *codegen() = 0;
160 };
161 
162 /// NumberExprAST - Expression class for numeric literals like "1.0".
163 class NumberExprAST : public ExprAST {
164   double Val;
165 
166 public:
NumberExprAST(double Val)167   NumberExprAST(double Val) : Val(Val) {}
168   Value *codegen() override;
169 };
170 
171 /// VariableExprAST - Expression class for referencing a variable, like "a".
172 class VariableExprAST : public ExprAST {
173   std::string Name;
174 
175 public:
VariableExprAST(const std::string & Name)176   VariableExprAST(const std::string &Name) : Name(Name) {}
getName() const177   const std::string &getName() const { return Name; }
178   Value *codegen() override;
179 };
180 
181 /// UnaryExprAST - Expression class for a unary operator.
182 class UnaryExprAST : public ExprAST {
183   char Opcode;
184   std::unique_ptr<ExprAST> Operand;
185 
186 public:
UnaryExprAST(char Opcode,std::unique_ptr<ExprAST> Operand)187   UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
188       : Opcode(Opcode), Operand(std::move(Operand)) {}
189   Value *codegen() override;
190 };
191 
192 /// BinaryExprAST - Expression class for a binary operator.
193 class BinaryExprAST : public ExprAST {
194   char Op;
195   std::unique_ptr<ExprAST> LHS, RHS;
196 
197 public:
BinaryExprAST(char Op,std::unique_ptr<ExprAST> LHS,std::unique_ptr<ExprAST> RHS)198   BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
199                 std::unique_ptr<ExprAST> RHS)
200       : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
201   Value *codegen() override;
202 };
203 
204 /// CallExprAST - Expression class for function calls.
205 class CallExprAST : public ExprAST {
206   std::string Callee;
207   std::vector<std::unique_ptr<ExprAST>> Args;
208 
209 public:
CallExprAST(const std::string & Callee,std::vector<std::unique_ptr<ExprAST>> Args)210   CallExprAST(const std::string &Callee,
211               std::vector<std::unique_ptr<ExprAST>> Args)
212       : Callee(Callee), Args(std::move(Args)) {}
213   Value *codegen() override;
214 };
215 
216 /// IfExprAST - Expression class for if/then/else.
217 class IfExprAST : public ExprAST {
218   std::unique_ptr<ExprAST> Cond, Then, Else;
219 
220 public:
IfExprAST(std::unique_ptr<ExprAST> Cond,std::unique_ptr<ExprAST> Then,std::unique_ptr<ExprAST> Else)221   IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
222             std::unique_ptr<ExprAST> Else)
223       : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
224   Value *codegen() override;
225 };
226 
227 /// ForExprAST - Expression class for for/in.
228 class ForExprAST : public ExprAST {
229   std::string VarName;
230   std::unique_ptr<ExprAST> Start, End, Step, Body;
231 
232 public:
ForExprAST(const std::string & VarName,std::unique_ptr<ExprAST> Start,std::unique_ptr<ExprAST> End,std::unique_ptr<ExprAST> Step,std::unique_ptr<ExprAST> Body)233   ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
234              std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
235              std::unique_ptr<ExprAST> Body)
236       : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
237         Step(std::move(Step)), Body(std::move(Body)) {}
238   Value *codegen() override;
239 };
240 
241 /// VarExprAST - Expression class for var/in
242 class VarExprAST : public ExprAST {
243   std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
244   std::unique_ptr<ExprAST> Body;
245 
246 public:
VarExprAST(std::vector<std::pair<std::string,std::unique_ptr<ExprAST>>> VarNames,std::unique_ptr<ExprAST> Body)247   VarExprAST(
248       std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
249       std::unique_ptr<ExprAST> Body)
250       : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
251   Value *codegen() override;
252 };
253 
254 /// PrototypeAST - This class represents the "prototype" for a function,
255 /// which captures its name, and its argument names (thus implicitly the number
256 /// of arguments the function takes), as well as if it is an operator.
257 class PrototypeAST {
258   std::string Name;
259   std::vector<std::string> Args;
260   bool IsOperator;
261   unsigned Precedence; // Precedence if a binary op.
262 
263 public:
PrototypeAST(const std::string & Name,std::vector<std::string> Args,bool IsOperator=false,unsigned Prec=0)264   PrototypeAST(const std::string &Name, std::vector<std::string> Args,
265                bool IsOperator = false, unsigned Prec = 0)
266       : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
267         Precedence(Prec) {}
268   Function *codegen();
getName() const269   const std::string &getName() const { return Name; }
270 
isUnaryOp() const271   bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
isBinaryOp() const272   bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
273 
getOperatorName() const274   char getOperatorName() const {
275     assert(isUnaryOp() || isBinaryOp());
276     return Name[Name.size() - 1];
277   }
278 
getBinaryPrecedence() const279   unsigned getBinaryPrecedence() const { return Precedence; }
280 };
281 
282 //===----------------------------------------------------------------------===//
283 // Parser
284 //===----------------------------------------------------------------------===//
285 
286 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
287 /// token the parser is looking at.  getNextToken reads another token from the
288 /// lexer and updates CurTok with its results.
289 static int CurTok;
getNextToken()290 static int getNextToken() { return CurTok = gettok(); }
291 
292 /// BinopPrecedence - This holds the precedence for each binary operator that is
293 /// defined.
294 static std::map<char, int> BinopPrecedence;
295 
296 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
GetTokPrecedence()297 static int GetTokPrecedence() {
298   if (!isascii(CurTok))
299     return -1;
300 
301   // Make sure it's a declared binop.
302   int TokPrec = BinopPrecedence[CurTok];
303   if (TokPrec <= 0)
304     return -1;
305   return TokPrec;
306 }
307 
308 /// LogError* - These are little helper functions for error handling.
LogError(const char * Str)309 std::unique_ptr<ExprAST> LogError(const char *Str) {
310   fprintf(stderr, "Error: %s\n", Str);
311   return nullptr;
312 }
313 
LogErrorP(const char * Str)314 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
315   LogError(Str);
316   return nullptr;
317 }
318 
319 static std::unique_ptr<ExprAST> ParseExpression();
320 
321 /// numberexpr ::= number
ParseNumberExpr()322 static std::unique_ptr<ExprAST> ParseNumberExpr() {
323   auto Result = llvm::make_unique<NumberExprAST>(NumVal);
324   getNextToken(); // consume the number
325   return std::move(Result);
326 }
327 
328 /// parenexpr ::= '(' expression ')'
ParseParenExpr()329 static std::unique_ptr<ExprAST> ParseParenExpr() {
330   getNextToken(); // eat (.
331   auto V = ParseExpression();
332   if (!V)
333     return nullptr;
334 
335   if (CurTok != ')')
336     return LogError("expected ')'");
337   getNextToken(); // eat ).
338   return V;
339 }
340 
341 /// identifierexpr
342 ///   ::= identifier
343 ///   ::= identifier '(' expression* ')'
ParseIdentifierExpr()344 static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
345   std::string IdName = IdentifierStr;
346 
347   getNextToken(); // eat identifier.
348 
349   if (CurTok != '(') // Simple variable ref.
350     return llvm::make_unique<VariableExprAST>(IdName);
351 
352   // Call.
353   getNextToken(); // eat (
354   std::vector<std::unique_ptr<ExprAST>> Args;
355   if (CurTok != ')') {
356     while (true) {
357       if (auto Arg = ParseExpression())
358         Args.push_back(std::move(Arg));
359       else
360         return nullptr;
361 
362       if (CurTok == ')')
363         break;
364 
365       if (CurTok != ',')
366         return LogError("Expected ')' or ',' in argument list");
367       getNextToken();
368     }
369   }
370 
371   // Eat the ')'.
372   getNextToken();
373 
374   return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
375 }
376 
377 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
ParseIfExpr()378 static std::unique_ptr<ExprAST> ParseIfExpr() {
379   getNextToken(); // eat the if.
380 
381   // condition.
382   auto Cond = ParseExpression();
383   if (!Cond)
384     return nullptr;
385 
386   if (CurTok != tok_then)
387     return LogError("expected then");
388   getNextToken(); // eat the then
389 
390   auto Then = ParseExpression();
391   if (!Then)
392     return nullptr;
393 
394   if (CurTok != tok_else)
395     return LogError("expected else");
396 
397   getNextToken();
398 
399   auto Else = ParseExpression();
400   if (!Else)
401     return nullptr;
402 
403   return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
404                                       std::move(Else));
405 }
406 
407 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
ParseForExpr()408 static std::unique_ptr<ExprAST> ParseForExpr() {
409   getNextToken(); // eat the for.
410 
411   if (CurTok != tok_identifier)
412     return LogError("expected identifier after for");
413 
414   std::string IdName = IdentifierStr;
415   getNextToken(); // eat identifier.
416 
417   if (CurTok != '=')
418     return LogError("expected '=' after for");
419   getNextToken(); // eat '='.
420 
421   auto Start = ParseExpression();
422   if (!Start)
423     return nullptr;
424   if (CurTok != ',')
425     return LogError("expected ',' after for start value");
426   getNextToken();
427 
428   auto End = ParseExpression();
429   if (!End)
430     return nullptr;
431 
432   // The step value is optional.
433   std::unique_ptr<ExprAST> Step;
434   if (CurTok == ',') {
435     getNextToken();
436     Step = ParseExpression();
437     if (!Step)
438       return nullptr;
439   }
440 
441   if (CurTok != tok_in)
442     return LogError("expected 'in' after for");
443   getNextToken(); // eat 'in'.
444 
445   auto Body = ParseExpression();
446   if (!Body)
447     return nullptr;
448 
449   return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
450                                        std::move(Step), std::move(Body));
451 }
452 
453 /// varexpr ::= 'var' identifier ('=' expression)?
454 //                    (',' identifier ('=' expression)?)* 'in' expression
ParseVarExpr()455 static std::unique_ptr<ExprAST> ParseVarExpr() {
456   getNextToken(); // eat the var.
457 
458   std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
459 
460   // At least one variable name is required.
461   if (CurTok != tok_identifier)
462     return LogError("expected identifier after var");
463 
464   while (true) {
465     std::string Name = IdentifierStr;
466     getNextToken(); // eat identifier.
467 
468     // Read the optional initializer.
469     std::unique_ptr<ExprAST> Init = nullptr;
470     if (CurTok == '=') {
471       getNextToken(); // eat the '='.
472 
473       Init = ParseExpression();
474       if (!Init)
475         return nullptr;
476     }
477 
478     VarNames.push_back(std::make_pair(Name, std::move(Init)));
479 
480     // End of var list, exit loop.
481     if (CurTok != ',')
482       break;
483     getNextToken(); // eat the ','.
484 
485     if (CurTok != tok_identifier)
486       return LogError("expected identifier list after var");
487   }
488 
489   // At this point, we have to have 'in'.
490   if (CurTok != tok_in)
491     return LogError("expected 'in' keyword after 'var'");
492   getNextToken(); // eat 'in'.
493 
494   auto Body = ParseExpression();
495   if (!Body)
496     return nullptr;
497 
498   return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
499 }
500 
501 /// primary
502 ///   ::= identifierexpr
503 ///   ::= numberexpr
504 ///   ::= parenexpr
505 ///   ::= ifexpr
506 ///   ::= forexpr
507 ///   ::= varexpr
ParsePrimary()508 static std::unique_ptr<ExprAST> ParsePrimary() {
509   switch (CurTok) {
510   default:
511     return LogError("unknown token when expecting an expression");
512   case tok_identifier:
513     return ParseIdentifierExpr();
514   case tok_number:
515     return ParseNumberExpr();
516   case '(':
517     return ParseParenExpr();
518   case tok_if:
519     return ParseIfExpr();
520   case tok_for:
521     return ParseForExpr();
522   case tok_var:
523     return ParseVarExpr();
524   }
525 }
526 
527 /// unary
528 ///   ::= primary
529 ///   ::= '!' unary
ParseUnary()530 static std::unique_ptr<ExprAST> ParseUnary() {
531   // If the current token is not an operator, it must be a primary expr.
532   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
533     return ParsePrimary();
534 
535   // If this is a unary operator, read it.
536   int Opc = CurTok;
537   getNextToken();
538   if (auto Operand = ParseUnary())
539     return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
540   return nullptr;
541 }
542 
543 /// binoprhs
544 ///   ::= ('+' unary)*
ParseBinOpRHS(int ExprPrec,std::unique_ptr<ExprAST> LHS)545 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
546                                               std::unique_ptr<ExprAST> LHS) {
547   // If this is a binop, find its precedence.
548   while (true) {
549     int TokPrec = GetTokPrecedence();
550 
551     // If this is a binop that binds at least as tightly as the current binop,
552     // consume it, otherwise we are done.
553     if (TokPrec < ExprPrec)
554       return LHS;
555 
556     // Okay, we know this is a binop.
557     int BinOp = CurTok;
558     getNextToken(); // eat binop
559 
560     // Parse the unary expression after the binary operator.
561     auto RHS = ParseUnary();
562     if (!RHS)
563       return nullptr;
564 
565     // If BinOp binds less tightly with RHS than the operator after RHS, let
566     // the pending operator take RHS as its LHS.
567     int NextPrec = GetTokPrecedence();
568     if (TokPrec < NextPrec) {
569       RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
570       if (!RHS)
571         return nullptr;
572     }
573 
574     // Merge LHS/RHS.
575     LHS =
576         llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
577   }
578 }
579 
580 /// expression
581 ///   ::= unary binoprhs
582 ///
ParseExpression()583 static std::unique_ptr<ExprAST> ParseExpression() {
584   auto LHS = ParseUnary();
585   if (!LHS)
586     return nullptr;
587 
588   return ParseBinOpRHS(0, std::move(LHS));
589 }
590 
591 /// prototype
592 ///   ::= id '(' id* ')'
593 ///   ::= binary LETTER number? (id, id)
594 ///   ::= unary LETTER (id)
ParsePrototype()595 static std::unique_ptr<PrototypeAST> ParsePrototype() {
596   std::string FnName;
597 
598   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
599   unsigned BinaryPrecedence = 30;
600 
601   switch (CurTok) {
602   default:
603     return LogErrorP("Expected function name in prototype");
604   case tok_identifier:
605     FnName = IdentifierStr;
606     Kind = 0;
607     getNextToken();
608     break;
609   case tok_unary:
610     getNextToken();
611     if (!isascii(CurTok))
612       return LogErrorP("Expected unary operator");
613     FnName = "unary";
614     FnName += (char)CurTok;
615     Kind = 1;
616     getNextToken();
617     break;
618   case tok_binary:
619     getNextToken();
620     if (!isascii(CurTok))
621       return LogErrorP("Expected binary operator");
622     FnName = "binary";
623     FnName += (char)CurTok;
624     Kind = 2;
625     getNextToken();
626 
627     // Read the precedence if present.
628     if (CurTok == tok_number) {
629       if (NumVal < 1 || NumVal > 100)
630         return LogErrorP("Invalid precedecnce: must be 1..100");
631       BinaryPrecedence = (unsigned)NumVal;
632       getNextToken();
633     }
634     break;
635   }
636 
637   if (CurTok != '(')
638     return LogErrorP("Expected '(' in prototype");
639 
640   std::vector<std::string> ArgNames;
641   while (getNextToken() == tok_identifier)
642     ArgNames.push_back(IdentifierStr);
643   if (CurTok != ')')
644     return LogErrorP("Expected ')' in prototype");
645 
646   // success.
647   getNextToken(); // eat ')'.
648 
649   // Verify right number of names for operator.
650   if (Kind && ArgNames.size() != Kind)
651     return LogErrorP("Invalid number of operands for operator");
652 
653   return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
654                                          BinaryPrecedence);
655 }
656 
657 /// definition ::= 'def' prototype expression
ParseDefinition()658 static std::unique_ptr<FunctionAST> ParseDefinition() {
659   getNextToken(); // eat def.
660   auto Proto = ParsePrototype();
661   if (!Proto)
662     return nullptr;
663 
664   if (auto E = ParseExpression())
665     return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
666   return nullptr;
667 }
668 
669 /// toplevelexpr ::= expression
ParseTopLevelExpr()670 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
671   if (auto E = ParseExpression()) {
672 
673     auto PEArgs = std::vector<std::unique_ptr<ExprAST>>();
674     PEArgs.push_back(std::move(E));
675     auto PrintExpr =
676       llvm::make_unique<CallExprAST>("printExprResult", std::move(PEArgs));
677 
678     // Make an anonymous proto.
679     auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
680                                                  std::vector<std::string>());
681     return llvm::make_unique<FunctionAST>(std::move(Proto),
682                                           std::move(PrintExpr));
683   }
684   return nullptr;
685 }
686 
687 /// external ::= 'extern' prototype
ParseExtern()688 static std::unique_ptr<PrototypeAST> ParseExtern() {
689   getNextToken(); // eat extern.
690   return ParsePrototype();
691 }
692 
693 //===----------------------------------------------------------------------===//
694 // Code Generation
695 //===----------------------------------------------------------------------===//
696 
697 static LLVMContext TheContext;
698 static IRBuilder<> Builder(TheContext);
699 static std::unique_ptr<Module> TheModule;
700 static std::map<std::string, AllocaInst *> NamedValues;
701 static std::unique_ptr<KaleidoscopeJIT> TheJIT;
702 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
703 static ExitOnError ExitOnErr;
704 
LogErrorV(const char * Str)705 Value *LogErrorV(const char *Str) {
706   LogError(Str);
707   return nullptr;
708 }
709 
getFunction(std::string Name)710 Function *getFunction(std::string Name) {
711   // First, see if the function has already been added to the current module.
712   if (auto *F = TheModule->getFunction(Name))
713     return F;
714 
715   // If not, check whether we can codegen the declaration from some existing
716   // prototype.
717   auto FI = FunctionProtos.find(Name);
718   if (FI != FunctionProtos.end())
719     return FI->second->codegen();
720 
721   // If no existing prototype exists, return null.
722   return nullptr;
723 }
724 
725 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
726 /// the function.  This is used for mutable variables etc.
CreateEntryBlockAlloca(Function * TheFunction,const std::string & VarName)727 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
728                                           const std::string &VarName) {
729   IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
730                    TheFunction->getEntryBlock().begin());
731   return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), nullptr, VarName);
732 }
733 
codegen()734 Value *NumberExprAST::codegen() {
735   return ConstantFP::get(TheContext, APFloat(Val));
736 }
737 
codegen()738 Value *VariableExprAST::codegen() {
739   // Look this variable up in the function.
740   Value *V = NamedValues[Name];
741   if (!V)
742     return LogErrorV("Unknown variable name");
743 
744   // Load the value.
745   return Builder.CreateLoad(V, Name.c_str());
746 }
747 
codegen()748 Value *UnaryExprAST::codegen() {
749   Value *OperandV = Operand->codegen();
750   if (!OperandV)
751     return nullptr;
752 
753   Function *F = getFunction(std::string("unary") + Opcode);
754   if (!F)
755     return LogErrorV("Unknown unary operator");
756 
757   return Builder.CreateCall(F, OperandV, "unop");
758 }
759 
codegen()760 Value *BinaryExprAST::codegen() {
761   // Special case '=' because we don't want to emit the LHS as an expression.
762   if (Op == '=') {
763     // Assignment requires the LHS to be an identifier.
764     // This assume we're building without RTTI because LLVM builds that way by
765     // default.  If you build LLVM with RTTI this can be changed to a
766     // dynamic_cast for automatic error checking.
767     VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
768     if (!LHSE)
769       return LogErrorV("destination of '=' must be a variable");
770     // Codegen the RHS.
771     Value *Val = RHS->codegen();
772     if (!Val)
773       return nullptr;
774 
775     // Look up the name.
776     Value *Variable = NamedValues[LHSE->getName()];
777     if (!Variable)
778       return LogErrorV("Unknown variable name");
779 
780     Builder.CreateStore(Val, Variable);
781     return Val;
782   }
783 
784   Value *L = LHS->codegen();
785   Value *R = RHS->codegen();
786   if (!L || !R)
787     return nullptr;
788 
789   switch (Op) {
790   case '+':
791     return Builder.CreateFAdd(L, R, "addtmp");
792   case '-':
793     return Builder.CreateFSub(L, R, "subtmp");
794   case '*':
795     return Builder.CreateFMul(L, R, "multmp");
796   case '<':
797     L = Builder.CreateFCmpULT(L, R, "cmptmp");
798     // Convert bool 0/1 to double 0.0 or 1.0
799     return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp");
800   default:
801     break;
802   }
803 
804   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
805   // a call to it.
806   Function *F = getFunction(std::string("binary") + Op);
807   assert(F && "binary operator not found!");
808 
809   Value *Ops[] = {L, R};
810   return Builder.CreateCall(F, Ops, "binop");
811 }
812 
codegen()813 Value *CallExprAST::codegen() {
814   // Look up the name in the global module table.
815   Function *CalleeF = getFunction(Callee);
816   if (!CalleeF)
817     return LogErrorV("Unknown function referenced");
818 
819   // If argument mismatch error.
820   if (CalleeF->arg_size() != Args.size())
821     return LogErrorV("Incorrect # arguments passed");
822 
823   std::vector<Value *> ArgsV;
824   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
825     ArgsV.push_back(Args[i]->codegen());
826     if (!ArgsV.back())
827       return nullptr;
828   }
829 
830   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
831 }
832 
codegen()833 Value *IfExprAST::codegen() {
834   Value *CondV = Cond->codegen();
835   if (!CondV)
836     return nullptr;
837 
838   // Convert condition to a bool by comparing equal to 0.0.
839   CondV = Builder.CreateFCmpONE(
840       CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
841 
842   Function *TheFunction = Builder.GetInsertBlock()->getParent();
843 
844   // Create blocks for the then and else cases.  Insert the 'then' block at the
845   // end of the function.
846   BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction);
847   BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else");
848   BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont");
849 
850   Builder.CreateCondBr(CondV, ThenBB, ElseBB);
851 
852   // Emit then value.
853   Builder.SetInsertPoint(ThenBB);
854 
855   Value *ThenV = Then->codegen();
856   if (!ThenV)
857     return nullptr;
858 
859   Builder.CreateBr(MergeBB);
860   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
861   ThenBB = Builder.GetInsertBlock();
862 
863   // Emit else block.
864   TheFunction->getBasicBlockList().push_back(ElseBB);
865   Builder.SetInsertPoint(ElseBB);
866 
867   Value *ElseV = Else->codegen();
868   if (!ElseV)
869     return nullptr;
870 
871   Builder.CreateBr(MergeBB);
872   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
873   ElseBB = Builder.GetInsertBlock();
874 
875   // Emit merge block.
876   TheFunction->getBasicBlockList().push_back(MergeBB);
877   Builder.SetInsertPoint(MergeBB);
878   PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp");
879 
880   PN->addIncoming(ThenV, ThenBB);
881   PN->addIncoming(ElseV, ElseBB);
882   return PN;
883 }
884 
885 // Output for-loop as:
886 //   var = alloca double
887 //   ...
888 //   start = startexpr
889 //   store start -> var
890 //   goto loop
891 // loop:
892 //   ...
893 //   bodyexpr
894 //   ...
895 // loopend:
896 //   step = stepexpr
897 //   endcond = endexpr
898 //
899 //   curvar = load var
900 //   nextvar = curvar + step
901 //   store nextvar -> var
902 //   br endcond, loop, endloop
903 // outloop:
codegen()904 Value *ForExprAST::codegen() {
905   Function *TheFunction = Builder.GetInsertBlock()->getParent();
906 
907   // Create an alloca for the variable in the entry block.
908   AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
909 
910   // Emit the start code first, without 'variable' in scope.
911   Value *StartVal = Start->codegen();
912   if (!StartVal)
913     return nullptr;
914 
915   // Store the value into the alloca.
916   Builder.CreateStore(StartVal, Alloca);
917 
918   // Make the new basic block for the loop header, inserting after current
919   // block.
920   BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction);
921 
922   // Insert an explicit fall through from the current block to the LoopBB.
923   Builder.CreateBr(LoopBB);
924 
925   // Start insertion in LoopBB.
926   Builder.SetInsertPoint(LoopBB);
927 
928   // Within the loop, the variable is defined equal to the PHI node.  If it
929   // shadows an existing variable, we have to restore it, so save it now.
930   AllocaInst *OldVal = NamedValues[VarName];
931   NamedValues[VarName] = Alloca;
932 
933   // Emit the body of the loop.  This, like any other expr, can change the
934   // current BB.  Note that we ignore the value computed by the body, but don't
935   // allow an error.
936   if (!Body->codegen())
937     return nullptr;
938 
939   // Emit the step value.
940   Value *StepVal = nullptr;
941   if (Step) {
942     StepVal = Step->codegen();
943     if (!StepVal)
944       return nullptr;
945   } else {
946     // If not specified, use 1.0.
947     StepVal = ConstantFP::get(TheContext, APFloat(1.0));
948   }
949 
950   // Compute the end condition.
951   Value *EndCond = End->codegen();
952   if (!EndCond)
953     return nullptr;
954 
955   // Reload, increment, and restore the alloca.  This handles the case where
956   // the body of the loop mutates the variable.
957   Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
958   Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
959   Builder.CreateStore(NextVar, Alloca);
960 
961   // Convert condition to a bool by comparing equal to 0.0.
962   EndCond = Builder.CreateFCmpONE(
963       EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
964 
965   // Create the "after loop" block and insert it.
966   BasicBlock *AfterBB =
967       BasicBlock::Create(TheContext, "afterloop", TheFunction);
968 
969   // Insert the conditional branch into the end of LoopEndBB.
970   Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
971 
972   // Any new code will be inserted in AfterBB.
973   Builder.SetInsertPoint(AfterBB);
974 
975   // Restore the unshadowed variable.
976   if (OldVal)
977     NamedValues[VarName] = OldVal;
978   else
979     NamedValues.erase(VarName);
980 
981   // for expr always returns 0.0.
982   return Constant::getNullValue(Type::getDoubleTy(TheContext));
983 }
984 
codegen()985 Value *VarExprAST::codegen() {
986   std::vector<AllocaInst *> OldBindings;
987 
988   Function *TheFunction = Builder.GetInsertBlock()->getParent();
989 
990   // Register all variables and emit their initializer.
991   for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
992     const std::string &VarName = VarNames[i].first;
993     ExprAST *Init = VarNames[i].second.get();
994 
995     // Emit the initializer before adding the variable to scope, this prevents
996     // the initializer from referencing the variable itself, and permits stuff
997     // like this:
998     //  var a = 1 in
999     //    var a = a in ...   # refers to outer 'a'.
1000     Value *InitVal;
1001     if (Init) {
1002       InitVal = Init->codegen();
1003       if (!InitVal)
1004         return nullptr;
1005     } else { // If not specified, use 0.0.
1006       InitVal = ConstantFP::get(TheContext, APFloat(0.0));
1007     }
1008 
1009     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
1010     Builder.CreateStore(InitVal, Alloca);
1011 
1012     // Remember the old variable binding so that we can restore the binding when
1013     // we unrecurse.
1014     OldBindings.push_back(NamedValues[VarName]);
1015 
1016     // Remember this binding.
1017     NamedValues[VarName] = Alloca;
1018   }
1019 
1020   // Codegen the body, now that all vars are in scope.
1021   Value *BodyVal = Body->codegen();
1022   if (!BodyVal)
1023     return nullptr;
1024 
1025   // Pop all our variables from scope.
1026   for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
1027     NamedValues[VarNames[i].first] = OldBindings[i];
1028 
1029   // Return the body computation.
1030   return BodyVal;
1031 }
1032 
codegen()1033 Function *PrototypeAST::codegen() {
1034   // Make the function type:  double(double,double) etc.
1035   std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext));
1036   FunctionType *FT =
1037       FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false);
1038 
1039   Function *F =
1040       Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
1041 
1042   // Set names for all arguments.
1043   unsigned Idx = 0;
1044   for (auto &Arg : F->args())
1045     Arg.setName(Args[Idx++]);
1046 
1047   return F;
1048 }
1049 
getProto() const1050 const PrototypeAST& FunctionAST::getProto() const {
1051   return *Proto;
1052 }
1053 
getName() const1054 const std::string& FunctionAST::getName() const {
1055   return Proto->getName();
1056 }
1057 
codegen()1058 Function *FunctionAST::codegen() {
1059   // Transfer ownership of the prototype to the FunctionProtos map, but keep a
1060   // reference to it for use below.
1061   auto &P = *Proto;
1062   Function *TheFunction = getFunction(P.getName());
1063   if (!TheFunction)
1064     return nullptr;
1065 
1066   // If this is an operator, install it.
1067   if (P.isBinaryOp())
1068     BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
1069 
1070   // Create a new basic block to start insertion into.
1071   BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction);
1072   Builder.SetInsertPoint(BB);
1073 
1074   // Record the function arguments in the NamedValues map.
1075   NamedValues.clear();
1076   for (auto &Arg : TheFunction->args()) {
1077     // Create an alloca for this variable.
1078     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
1079 
1080     // Store the initial value into the alloca.
1081     Builder.CreateStore(&Arg, Alloca);
1082 
1083     // Add arguments to variable symbol table.
1084     NamedValues[Arg.getName()] = Alloca;
1085   }
1086 
1087   if (Value *RetVal = Body->codegen()) {
1088     // Finish off the function.
1089     Builder.CreateRet(RetVal);
1090 
1091     // Validate the generated code, checking for consistency.
1092     verifyFunction(*TheFunction);
1093 
1094     return TheFunction;
1095   }
1096 
1097   // Error reading body, remove function.
1098   TheFunction->eraseFromParent();
1099 
1100   if (P.isBinaryOp())
1101     BinopPrecedence.erase(Proto->getOperatorName());
1102   return nullptr;
1103 }
1104 
1105 //===----------------------------------------------------------------------===//
1106 // Top-Level parsing and JIT Driver
1107 //===----------------------------------------------------------------------===//
1108 
InitializeModule()1109 static void InitializeModule() {
1110   // Open a new module.
1111   TheModule = llvm::make_unique<Module>("my cool jit", TheContext);
1112   TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
1113 }
1114 
1115 std::unique_ptr<llvm::Module>
irgenAndTakeOwnership(FunctionAST & FnAST,const std::string & Suffix)1116 irgenAndTakeOwnership(FunctionAST &FnAST, const std::string &Suffix) {
1117   if (auto *F = FnAST.codegen()) {
1118     F->setName(F->getName() + Suffix);
1119     auto M = std::move(TheModule);
1120     // Start a new module.
1121     InitializeModule();
1122     return M;
1123   } else
1124     report_fatal_error("Couldn't compile lazily JIT'd function");
1125 }
1126 
HandleDefinition()1127 static void HandleDefinition() {
1128   if (auto FnAST = ParseDefinition()) {
1129     FunctionProtos[FnAST->getProto().getName()] =
1130       llvm::make_unique<PrototypeAST>(FnAST->getProto());
1131     ExitOnErr(TheJIT->addFunctionAST(std::move(FnAST)));
1132   } else {
1133     // Skip token for error recovery.
1134     getNextToken();
1135   }
1136 }
1137 
HandleExtern()1138 static void HandleExtern() {
1139   if (auto ProtoAST = ParseExtern()) {
1140     if (auto *FnIR = ProtoAST->codegen()) {
1141       fprintf(stderr, "Read extern: ");
1142       FnIR->dump();
1143       FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
1144     }
1145   } else {
1146     // Skip token for error recovery.
1147     getNextToken();
1148   }
1149 }
1150 
HandleTopLevelExpression()1151 static void HandleTopLevelExpression() {
1152   // Evaluate a top-level expression into an anonymous function.
1153   if (auto FnAST = ParseTopLevelExpr()) {
1154     FunctionProtos[FnAST->getName()] =
1155       llvm::make_unique<PrototypeAST>(FnAST->getProto());
1156     if (FnAST->codegen()) {
1157       // JIT the module containing the anonymous expression, keeping a handle so
1158       // we can free it later.
1159       auto H = TheJIT->addModule(std::move(TheModule));
1160       InitializeModule();
1161 
1162       // Search the JIT for the __anon_expr symbol.
1163       auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
1164       assert(ExprSymbol && "Function not found");
1165 
1166       // Get the symbol's address and cast it to the right type (takes no
1167       // arguments, returns a double) so we can call it as a native function.
1168       ExitOnErr(TheJIT->executeRemoteExpr(ExprSymbol.getAddress()));
1169 
1170       // Delete the anonymous expression module from the JIT.
1171       TheJIT->removeModule(H);
1172     }
1173   } else {
1174     // Skip token for error recovery.
1175     getNextToken();
1176   }
1177 }
1178 
1179 /// top ::= definition | external | expression | ';'
MainLoop()1180 static void MainLoop() {
1181   while (true) {
1182     fprintf(stderr, "ready> ");
1183     switch (CurTok) {
1184     case tok_eof:
1185       return;
1186     case ';': // ignore top-level semicolons.
1187       getNextToken();
1188       break;
1189     case tok_def:
1190       HandleDefinition();
1191       break;
1192     case tok_extern:
1193       HandleExtern();
1194       break;
1195     default:
1196       HandleTopLevelExpression();
1197       break;
1198     }
1199   }
1200 }
1201 
1202 //===----------------------------------------------------------------------===//
1203 // "Library" functions that can be "extern'd" from user code.
1204 //===----------------------------------------------------------------------===//
1205 
1206 /// putchard - putchar that takes a double and returns 0.
putchard(double X)1207 extern "C" double putchard(double X) {
1208   fputc((char)X, stderr);
1209   return 0;
1210 }
1211 
1212 /// printd - printf that takes a double prints it as "%f\n", returning 0.
printd(double X)1213 extern "C" double printd(double X) {
1214   fprintf(stderr, "%f\n", X);
1215   return 0;
1216 }
1217 
1218 //===----------------------------------------------------------------------===//
1219 // TCP / Connection setup code.
1220 //===----------------------------------------------------------------------===//
1221 
connect()1222 std::unique_ptr<FDRPCChannel> connect() {
1223   int sockfd = socket(PF_INET, SOCK_STREAM, 0);
1224   hostent *server = gethostbyname(HostName.c_str());
1225 
1226   if (!server) {
1227     errs() << "Could not find host " << HostName << "\n";
1228     exit(1);
1229   }
1230 
1231   sockaddr_in servAddr;
1232   bzero(&servAddr, sizeof(servAddr));
1233   servAddr.sin_family = PF_INET;
1234   bcopy(server->h_addr, &servAddr.sin_addr.s_addr, server->h_length);
1235   servAddr.sin_port = htons(Port);
1236   if (connect(sockfd, reinterpret_cast<sockaddr*>(&servAddr),
1237               sizeof(servAddr)) < 0) {
1238     errs() << "Failure to connect.\n";
1239     exit(1);
1240   }
1241 
1242   return llvm::make_unique<FDRPCChannel>(sockfd, sockfd);
1243 }
1244 
1245 //===----------------------------------------------------------------------===//
1246 // Main driver code.
1247 //===----------------------------------------------------------------------===//
1248 
main(int argc,char * argv[])1249 int main(int argc, char *argv[]) {
1250   // Parse the command line options.
1251   cl::ParseCommandLineOptions(argc, argv, "Building A JIT - Client.\n");
1252 
1253   InitializeNativeTarget();
1254   InitializeNativeTargetAsmPrinter();
1255   InitializeNativeTargetAsmParser();
1256 
1257   ExitOnErr.setBanner("Kaleidoscope: ");
1258 
1259   // Install standard binary operators.
1260   // 1 is lowest precedence.
1261   BinopPrecedence['='] = 2;
1262   BinopPrecedence['<'] = 10;
1263   BinopPrecedence['+'] = 20;
1264   BinopPrecedence['-'] = 20;
1265   BinopPrecedence['*'] = 40; // highest.
1266 
1267   auto TCPChannel = connect();
1268   MyRemote Remote = ExitOnErr(MyRemote::Create(*TCPChannel));
1269   TheJIT = llvm::make_unique<KaleidoscopeJIT>(Remote);
1270 
1271   // Automatically inject a definition for 'printExprResult'.
1272   FunctionProtos["printExprResult"] =
1273     llvm::make_unique<PrototypeAST>("printExprResult",
1274                                     std::vector<std::string>({"Val"}));
1275 
1276   // Prime the first token.
1277   fprintf(stderr, "ready> ");
1278   getNextToken();
1279 
1280   InitializeModule();
1281 
1282   // Run the main "interpreter loop" now.
1283   MainLoop();
1284 
1285   // Delete the JIT before the Remote and Channel go out of scope, otherwise
1286   // we'll crash in the JIT destructor when it tries to release remote
1287   // resources over a channel that no longer exists.
1288   TheJIT = nullptr;
1289 
1290   // Send a terminate message to the remote to tell it to exit cleanly.
1291   ExitOnErr(Remote.terminateSession());
1292 
1293   return 0;
1294 }
1295