1// Package compile defines the Starlark bytecode compiler. 2// It is an internal package of the Starlark interpreter and is not directly accessible to clients. 3// 4// The compiler generates byte code with optional uint32 operands for a 5// virtual machine with the following components: 6// - a program counter, which is an index into the byte code array. 7// - an operand stack, whose maximum size is computed for each function by the compiler. 8// - an stack of active iterators. 9// - an array of local variables. 10// The number of local variables and their indices are computed by the resolver. 11// Locals (possibly including parameters) that are shared with nested functions 12// are 'cells': their locals array slot will contain a value of type 'cell', 13// an indirect value in a box that is explicitly read/updated by instructions. 14// - an array of free variables, for nested functions. 15// Free variables are a subset of the ancestors' cell variables. 16// As with locals and cells, these are computed by the resolver. 17// - an array of global variables, shared among all functions in the same module. 18// All elements are initially nil. 19// - two maps of predeclared and universal identifiers. 20// 21// Each function has a line number table that maps each program counter 22// offset to a source position, including the column number. 23// 24// Operands, logically uint32s, are encoded using little-endian 7-bit 25// varints, the top bit indicating that more bytes follow. 26// 27package compile // import "go.starlark.net/internal/compile" 28 29import ( 30 "bytes" 31 "fmt" 32 "log" 33 "os" 34 "path/filepath" 35 "strconv" 36 "strings" 37 "sync" 38 39 "go.starlark.net/resolve" 40 "go.starlark.net/syntax" 41) 42 43// Disassemble causes the assembly code for each function 44// to be printed to stderr as it is generated. 45var Disassemble = false 46 47const debug = false // make code generation verbose, for debugging the compiler 48 49// Increment this to force recompilation of saved bytecode files. 50const Version = 12 51 52type Opcode uint8 53 54// "x DUP x x" is a "stack picture" that describes the state of the 55// stack before and after execution of the instruction. 56// 57// OP<index> indicates an immediate operand that is an index into the 58// specified table: locals, names, freevars, constants. 59const ( 60 NOP Opcode = iota // - NOP - 61 62 // stack operations 63 DUP // x DUP x x 64 DUP2 // x y DUP2 x y x y 65 POP // x POP - 66 EXCH // x y EXCH y x 67 68 // binary comparisons 69 // (order must match Token) 70 LT 71 GT 72 GE 73 LE 74 EQL 75 NEQ 76 77 // binary arithmetic 78 // (order must match Token) 79 PLUS 80 MINUS 81 STAR 82 SLASH 83 SLASHSLASH 84 PERCENT 85 AMP 86 PIPE 87 CIRCUMFLEX 88 LTLT 89 GTGT 90 91 IN 92 93 // unary operators 94 UPLUS // x UPLUS x 95 UMINUS // x UMINUS -x 96 TILDE // x TILDE ~x 97 98 NONE // - NONE None 99 TRUE // - TRUE True 100 FALSE // - FALSE False 101 MANDATORY // - MANDATORY Mandatory [sentinel value for required kwonly args] 102 103 ITERPUSH // iterable ITERPUSH - [pushes the iterator stack] 104 ITERPOP // - ITERPOP - [pops the iterator stack] 105 NOT // value NOT bool 106 RETURN // value RETURN - 107 SETINDEX // a i new SETINDEX - 108 INDEX // a i INDEX elem 109 SETDICT // dict key value SETDICT - 110 SETDICTUNIQ // dict key value SETDICTUNIQ - 111 APPEND // list elem APPEND - 112 SLICE // x lo hi step SLICE slice 113 INPLACE_ADD // x y INPLACE_ADD z where z is x+y or x.extend(y) 114 MAKEDICT // - MAKEDICT dict 115 116 // --- opcodes with an argument must go below this line --- 117 118 // control flow 119 JMP // - JMP<addr> - 120 CJMP // cond CJMP<addr> - 121 ITERJMP // - ITERJMP<addr> elem (and fall through) [acts on topmost iterator] 122 // or: - ITERJMP<addr> - (and jump) 123 124 CONSTANT // - CONSTANT<constant> value 125 MAKETUPLE // x1 ... xn MAKETUPLE<n> tuple 126 MAKELIST // x1 ... xn MAKELIST<n> list 127 MAKEFUNC // defaults+freevars MAKEFUNC<func> fn 128 LOAD // from1 ... fromN module LOAD<n> v1 ... vN 129 SETLOCAL // value SETLOCAL<local> - 130 SETGLOBAL // value SETGLOBAL<global> - 131 LOCAL // - LOCAL<local> value 132 FREE // - FREE<freevar> cell 133 FREECELL // - FREECELL<freevar> value (content of FREE cell) 134 LOCALCELL // - LOCALCELL<local> value (content of LOCAL cell) 135 SETLOCALCELL // value SETLOCALCELL<local> - (set content of LOCAL cell) 136 GLOBAL // - GLOBAL<global> value 137 PREDECLARED // - PREDECLARED<name> value 138 UNIVERSAL // - UNIVERSAL<name> value 139 ATTR // x ATTR<name> y y = x.name 140 SETFIELD // x y SETFIELD<name> - x.name = y 141 UNPACK // iterable UNPACK<n> vn ... v1 142 143 // n>>8 is #positional args and n&0xff is #named args (pairs). 144 CALL // fn positional named CALL<n> result 145 CALL_VAR // fn positional named *args CALL_VAR<n> result 146 CALL_KW // fn positional named **kwargs CALL_KW<n> result 147 CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result 148 149 OpcodeArgMin = JMP 150 OpcodeMax = CALL_VAR_KW 151) 152 153// TODO(adonovan): add dynamic checks for missing opcodes in the tables below. 154 155var opcodeNames = [...]string{ 156 AMP: "amp", 157 APPEND: "append", 158 ATTR: "attr", 159 CALL: "call", 160 CALL_KW: "call_kw ", 161 CALL_VAR: "call_var", 162 CALL_VAR_KW: "call_var_kw", 163 CIRCUMFLEX: "circumflex", 164 CJMP: "cjmp", 165 CONSTANT: "constant", 166 DUP2: "dup2", 167 DUP: "dup", 168 EQL: "eql", 169 EXCH: "exch", 170 FALSE: "false", 171 FREE: "free", 172 FREECELL: "freecell", 173 GE: "ge", 174 GLOBAL: "global", 175 GT: "gt", 176 GTGT: "gtgt", 177 IN: "in", 178 INDEX: "index", 179 INPLACE_ADD: "inplace_add", 180 ITERJMP: "iterjmp", 181 ITERPOP: "iterpop", 182 ITERPUSH: "iterpush", 183 JMP: "jmp", 184 LE: "le", 185 LOAD: "load", 186 LOCAL: "local", 187 LOCALCELL: "localcell", 188 LT: "lt", 189 LTLT: "ltlt", 190 MAKEDICT: "makedict", 191 MAKEFUNC: "makefunc", 192 MAKELIST: "makelist", 193 MAKETUPLE: "maketuple", 194 MANDATORY: "mandatory", 195 MINUS: "minus", 196 NEQ: "neq", 197 NONE: "none", 198 NOP: "nop", 199 NOT: "not", 200 PERCENT: "percent", 201 PIPE: "pipe", 202 PLUS: "plus", 203 POP: "pop", 204 PREDECLARED: "predeclared", 205 RETURN: "return", 206 SETDICT: "setdict", 207 SETDICTUNIQ: "setdictuniq", 208 SETFIELD: "setfield", 209 SETGLOBAL: "setglobal", 210 SETINDEX: "setindex", 211 SETLOCAL: "setlocal", 212 SETLOCALCELL: "setlocalcell", 213 SLASH: "slash", 214 SLASHSLASH: "slashslash", 215 SLICE: "slice", 216 STAR: "star", 217 TILDE: "tilde", 218 TRUE: "true", 219 UMINUS: "uminus", 220 UNIVERSAL: "universal", 221 UNPACK: "unpack", 222 UPLUS: "uplus", 223} 224 225const variableStackEffect = 0x7f 226 227// stackEffect records the effect on the size of the operand stack of 228// each kind of instruction. For some instructions this requires computation. 229var stackEffect = [...]int8{ 230 AMP: -1, 231 APPEND: -2, 232 ATTR: 0, 233 CALL: variableStackEffect, 234 CALL_KW: variableStackEffect, 235 CALL_VAR: variableStackEffect, 236 CALL_VAR_KW: variableStackEffect, 237 CIRCUMFLEX: -1, 238 CJMP: -1, 239 CONSTANT: +1, 240 DUP2: +2, 241 DUP: +1, 242 EQL: -1, 243 FALSE: +1, 244 FREE: +1, 245 FREECELL: +1, 246 GE: -1, 247 GLOBAL: +1, 248 GT: -1, 249 GTGT: -1, 250 IN: -1, 251 INDEX: -1, 252 INPLACE_ADD: -1, 253 ITERJMP: variableStackEffect, 254 ITERPOP: 0, 255 ITERPUSH: -1, 256 JMP: 0, 257 LE: -1, 258 LOAD: -1, 259 LOCAL: +1, 260 LOCALCELL: +1, 261 LT: -1, 262 LTLT: -1, 263 MAKEDICT: +1, 264 MAKEFUNC: 0, 265 MAKELIST: variableStackEffect, 266 MAKETUPLE: variableStackEffect, 267 MANDATORY: +1, 268 MINUS: -1, 269 NEQ: -1, 270 NONE: +1, 271 NOP: 0, 272 NOT: 0, 273 PERCENT: -1, 274 PIPE: -1, 275 PLUS: -1, 276 POP: -1, 277 PREDECLARED: +1, 278 RETURN: -1, 279 SETLOCALCELL: -1, 280 SETDICT: -3, 281 SETDICTUNIQ: -3, 282 SETFIELD: -2, 283 SETGLOBAL: -1, 284 SETINDEX: -3, 285 SETLOCAL: -1, 286 SLASH: -1, 287 SLASHSLASH: -1, 288 SLICE: -3, 289 STAR: -1, 290 TRUE: +1, 291 UMINUS: 0, 292 UNIVERSAL: +1, 293 UNPACK: variableStackEffect, 294 UPLUS: 0, 295} 296 297func (op Opcode) String() string { 298 if op < OpcodeMax { 299 if name := opcodeNames[op]; name != "" { 300 return name 301 } 302 } 303 return fmt.Sprintf("illegal op (%d)", op) 304} 305 306// A Program is a Starlark file in executable form. 307// 308// Programs are serialized by the Program.Encode method, 309// which must be updated whenever this declaration is changed. 310type Program struct { 311 Loads []Binding // name (really, string) and position of each load stmt 312 Names []string // names of attributes and predeclared variables 313 Constants []interface{} // = string | int64 | float64 | *big.Int | Bytes 314 Functions []*Funcode 315 Globals []Binding // for error messages and tracing 316 Toplevel *Funcode // module initialization function 317} 318 319// The type of a bytes literal value, to distinguish from text string. 320type Bytes string 321 322// A Funcode is the code of a compiled Starlark function. 323// 324// Funcodes are serialized by the encoder.function method, 325// which must be updated whenever this declaration is changed. 326type Funcode struct { 327 Prog *Program 328 Pos syntax.Position // position of def or lambda token 329 Name string // name of this function 330 Doc string // docstring of this function 331 Code []byte // the byte code 332 pclinetab []uint16 // mapping from pc to linenum 333 Locals []Binding // locals, parameters first 334 Cells []int // indices of Locals that require cells 335 Freevars []Binding // for tracing 336 MaxStack int 337 NumParams int 338 NumKwonlyParams int 339 HasVarargs, HasKwargs bool 340 341 // -- transient state -- 342 343 lntOnce sync.Once 344 lnt []pclinecol // decoded line number table 345} 346 347type pclinecol struct { 348 pc uint32 349 line, col int32 350} 351 352// A Binding is the name and position of a binding identifier. 353type Binding struct { 354 Name string 355 Pos syntax.Position 356} 357 358// A pcomp holds the compiler state for a Program. 359type pcomp struct { 360 prog *Program // what we're building 361 362 names map[string]uint32 363 constants map[interface{}]uint32 364 functions map[*Funcode]uint32 365} 366 367// An fcomp holds the compiler state for a Funcode. 368type fcomp struct { 369 fn *Funcode // what we're building 370 371 pcomp *pcomp 372 pos syntax.Position // current position of generated code 373 loops []loop 374 block *block 375} 376 377type loop struct { 378 break_, continue_ *block 379} 380 381type block struct { 382 insns []insn 383 384 // If the last insn is a RETURN, jmp and cjmp are nil. 385 // If the last insn is a CJMP or ITERJMP, 386 // cjmp and jmp are the "true" and "false" successors. 387 // Otherwise, jmp is the sole successor. 388 jmp, cjmp *block 389 390 initialstack int // for stack depth computation 391 392 // Used during encoding 393 index int // -1 => not encoded yet 394 addr uint32 395} 396 397type insn struct { 398 op Opcode 399 arg uint32 400 line, col int32 401} 402 403// Position returns the source position for program counter pc. 404func (fn *Funcode) Position(pc uint32) syntax.Position { 405 fn.lntOnce.Do(fn.decodeLNT) 406 407 // Binary search to find last LNT entry not greater than pc. 408 // To avoid dynamic dispatch, this is a specialization of 409 // sort.Search using this predicate: 410 // !(i < len(fn.lnt)-1 && fn.lnt[i+1].pc <= pc) 411 n := len(fn.lnt) 412 i, j := 0, n 413 for i < j { 414 h := int(uint(i+j) >> 1) 415 if !(h >= n-1 || fn.lnt[h+1].pc > pc) { 416 i = h + 1 417 } else { 418 j = h 419 } 420 } 421 422 var line, col int32 423 if i < n { 424 line = fn.lnt[i].line 425 col = fn.lnt[i].col 426 } 427 428 pos := fn.Pos // copy the (annoyingly inaccessible) filename 429 pos.Col = col 430 pos.Line = line 431 return pos 432} 433 434// decodeLNT decodes the line number table and populates fn.lnt. 435// It is called at most once. 436func (fn *Funcode) decodeLNT() { 437 // Conceptually the table contains rows of the form 438 // (pc uint32, line int32, col int32), sorted by pc. 439 // We use a delta encoding, since the differences 440 // between successive pc, line, and column values 441 // are typically small and positive (though line and 442 // especially column differences may be negative). 443 // The delta encoding starts from 444 // {pc: 0, line: fn.Pos.Line, col: fn.Pos.Col}. 445 // 446 // Each entry is packed into one or more 16-bit values: 447 // Δpc uint4 448 // Δline int5 449 // Δcol int6 450 // incomplete uint1 451 // The top 4 bits are the unsigned delta pc. 452 // The next 5 bits are the signed line number delta. 453 // The next 6 bits are the signed column number delta. 454 // The bottom bit indicates that more rows follow because 455 // one of the deltas was maxed out. 456 // These field widths were chosen from a sample of real programs, 457 // and allow >97% of rows to be encoded in a single uint16. 458 459 fn.lnt = make([]pclinecol, 0, len(fn.pclinetab)) // a minor overapproximation 460 entry := pclinecol{ 461 pc: 0, 462 line: fn.Pos.Line, 463 col: fn.Pos.Col, 464 } 465 for _, x := range fn.pclinetab { 466 entry.pc += uint32(x) >> 12 467 entry.line += int32((int16(x) << 4) >> (16 - 5)) // sign extend Δline 468 entry.col += int32((int16(x) << 9) >> (16 - 6)) // sign extend Δcol 469 if (x & 1) == 0 { 470 fn.lnt = append(fn.lnt, entry) 471 } 472 } 473} 474 475// bindings converts resolve.Bindings to compiled form. 476func bindings(bindings []*resolve.Binding) []Binding { 477 res := make([]Binding, len(bindings)) 478 for i, bind := range bindings { 479 res[i].Name = bind.First.Name 480 res[i].Pos = bind.First.NamePos 481 } 482 return res 483} 484 485// Expr compiles an expression to a program whose toplevel function evaluates it. 486func Expr(expr syntax.Expr, name string, locals []*resolve.Binding) *Program { 487 pos := syntax.Start(expr) 488 stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}} 489 return File(stmts, pos, name, locals, nil) 490} 491 492// File compiles the statements of a file into a program. 493func File(stmts []syntax.Stmt, pos syntax.Position, name string, locals, globals []*resolve.Binding) *Program { 494 pcomp := &pcomp{ 495 prog: &Program{ 496 Globals: bindings(globals), 497 }, 498 names: make(map[string]uint32), 499 constants: make(map[interface{}]uint32), 500 functions: make(map[*Funcode]uint32), 501 } 502 pcomp.prog.Toplevel = pcomp.function(name, pos, stmts, locals, nil) 503 504 return pcomp.prog 505} 506 507func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*resolve.Binding) *Funcode { 508 fcomp := &fcomp{ 509 pcomp: pcomp, 510 pos: pos, 511 fn: &Funcode{ 512 Prog: pcomp.prog, 513 Pos: pos, 514 Name: name, 515 Doc: docStringFromBody(stmts), 516 Locals: bindings(locals), 517 Freevars: bindings(freevars), 518 }, 519 } 520 521 // Record indices of locals that require cells. 522 for i, local := range locals { 523 if local.Scope == resolve.Cell { 524 fcomp.fn.Cells = append(fcomp.fn.Cells, i) 525 } 526 } 527 528 if debug { 529 fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos) 530 } 531 532 // Convert AST to a CFG of instructions. 533 entry := fcomp.newBlock() 534 fcomp.block = entry 535 fcomp.stmts(stmts) 536 if fcomp.block != nil { 537 fcomp.emit(NONE) 538 fcomp.emit(RETURN) 539 } 540 541 var oops bool // something bad happened 542 543 setinitialstack := func(b *block, depth int) { 544 if b.initialstack == -1 { 545 b.initialstack = depth 546 } else if b.initialstack != depth { 547 fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n", 548 b.index, b.initialstack, depth) 549 oops = true 550 } 551 } 552 553 // Linearize the CFG: 554 // compute order, address, and initial 555 // stack depth of each reachable block. 556 var pc uint32 557 var blocks []*block 558 var maxstack int 559 var visit func(b *block) 560 visit = func(b *block) { 561 if b.index >= 0 { 562 return // already visited 563 } 564 b.index = len(blocks) 565 b.addr = pc 566 blocks = append(blocks, b) 567 568 stack := b.initialstack 569 if debug { 570 fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack) 571 } 572 var cjmpAddr *uint32 573 var isiterjmp int 574 for i, insn := range b.insns { 575 pc++ 576 577 // Compute size of argument. 578 if insn.op >= OpcodeArgMin { 579 switch insn.op { 580 case ITERJMP: 581 isiterjmp = 1 582 fallthrough 583 case CJMP: 584 cjmpAddr = &b.insns[i].arg 585 pc += 4 586 default: 587 pc += uint32(argLen(insn.arg)) 588 } 589 } 590 591 // Compute effect on stack. 592 se := insn.stackeffect() 593 if debug { 594 fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se) 595 } 596 stack += se 597 if stack < 0 { 598 fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc) 599 oops = true 600 } 601 if stack+isiterjmp > maxstack { 602 maxstack = stack + isiterjmp 603 } 604 } 605 606 if debug { 607 fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n", 608 b.addr, b.index) 609 if b.jmp != nil { 610 fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index) 611 } 612 if b.cjmp != nil { 613 fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index) 614 } 615 } 616 617 // Place the jmp block next. 618 if b.jmp != nil { 619 // jump threading (empty cycles are impossible) 620 for b.jmp.insns == nil { 621 b.jmp = b.jmp.jmp 622 } 623 624 setinitialstack(b.jmp, stack+isiterjmp) 625 if b.jmp.index < 0 { 626 // Successor is not yet visited: 627 // place it next and fall through. 628 visit(b.jmp) 629 } else { 630 // Successor already visited; 631 // explicit backward jump required. 632 pc += 5 633 } 634 } 635 636 // Then the cjmp block. 637 if b.cjmp != nil { 638 // jump threading (empty cycles are impossible) 639 for b.cjmp.insns == nil { 640 b.cjmp = b.cjmp.jmp 641 } 642 643 setinitialstack(b.cjmp, stack) 644 visit(b.cjmp) 645 646 // Patch the CJMP/ITERJMP, if present. 647 if cjmpAddr != nil { 648 *cjmpAddr = b.cjmp.addr 649 } 650 } 651 } 652 setinitialstack(entry, 0) 653 visit(entry) 654 655 fn := fcomp.fn 656 fn.MaxStack = maxstack 657 658 // Emit bytecode (and position table). 659 if Disassemble { 660 fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc) 661 } 662 fcomp.generate(blocks, pc) 663 664 if debug { 665 fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack) 666 } 667 668 // Don't panic until we've completed printing of the function. 669 if oops { 670 panic("internal error") 671 } 672 673 if debug { 674 fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos) 675 } 676 677 return fn 678} 679 680func docStringFromBody(body []syntax.Stmt) string { 681 if len(body) == 0 { 682 return "" 683 } 684 expr, ok := body[0].(*syntax.ExprStmt) 685 if !ok { 686 return "" 687 } 688 lit, ok := expr.X.(*syntax.Literal) 689 if !ok { 690 return "" 691 } 692 if lit.Token != syntax.STRING { 693 return "" 694 } 695 return lit.Value.(string) 696} 697 698func (insn *insn) stackeffect() int { 699 se := int(stackEffect[insn.op]) 700 if se == variableStackEffect { 701 arg := int(insn.arg) 702 switch insn.op { 703 case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW: 704 se = -int(2*(insn.arg&0xff) + insn.arg>>8) 705 if insn.op != CALL { 706 se-- 707 } 708 if insn.op == CALL_VAR_KW { 709 se-- 710 } 711 case ITERJMP: 712 // Stack effect differs by successor: 713 // +1 for jmp/false/ok 714 // 0 for cjmp/true/exhausted 715 // Handled specially in caller. 716 se = 0 717 case MAKELIST, MAKETUPLE: 718 se = 1 - arg 719 case UNPACK: 720 se = arg - 1 721 default: 722 panic(insn.op) 723 } 724 } 725 return se 726} 727 728// generate emits the linear instruction stream from the CFG, 729// and builds the PC-to-line number table. 730func (fcomp *fcomp) generate(blocks []*block, codelen uint32) { 731 code := make([]byte, 0, codelen) 732 var pclinetab []uint16 733 prev := pclinecol{ 734 pc: 0, 735 line: fcomp.fn.Pos.Line, 736 col: fcomp.fn.Pos.Col, 737 } 738 739 for _, b := range blocks { 740 if Disassemble { 741 fmt.Fprintf(os.Stderr, "%d:\n", b.index) 742 } 743 pc := b.addr 744 for _, insn := range b.insns { 745 if insn.line != 0 { 746 // Instruction has a source position. Delta-encode it. 747 // See Funcode.Position for the encoding. 748 for { 749 var incomplete uint16 750 751 // Δpc, uint4 752 deltapc := pc - prev.pc 753 if deltapc > 0x0f { 754 deltapc = 0x0f 755 incomplete = 1 756 } 757 prev.pc += deltapc 758 759 // Δline, int5 760 deltaline, ok := clip(insn.line-prev.line, -0x10, 0x0f) 761 if !ok { 762 incomplete = 1 763 } 764 prev.line += deltaline 765 766 // Δcol, int6 767 deltacol, ok := clip(insn.col-prev.col, -0x20, 0x1f) 768 if !ok { 769 incomplete = 1 770 } 771 prev.col += deltacol 772 773 entry := uint16(deltapc<<12) | uint16(deltaline&0x1f)<<7 | uint16(deltacol&0x3f)<<1 | incomplete 774 pclinetab = append(pclinetab, entry) 775 if incomplete == 0 { 776 break 777 } 778 } 779 780 if Disassemble { 781 fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s:%d:%d\n", 782 filepath.Base(fcomp.fn.Pos.Filename()), insn.line, insn.col) 783 } 784 } 785 if Disassemble { 786 PrintOp(fcomp.fn, pc, insn.op, insn.arg) 787 } 788 code = append(code, byte(insn.op)) 789 pc++ 790 if insn.op >= OpcodeArgMin { 791 if insn.op == CJMP || insn.op == ITERJMP { 792 code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes 793 } else { 794 code = addUint32(code, insn.arg, 0) 795 } 796 pc = uint32(len(code)) 797 } 798 } 799 800 if b.jmp != nil && b.jmp.index != b.index+1 { 801 addr := b.jmp.addr 802 if Disassemble { 803 fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n", 804 pc, addr, b.jmp.index) 805 } 806 code = append(code, byte(JMP)) 807 code = addUint32(code, addr, 4) 808 } 809 } 810 if len(code) != int(codelen) { 811 panic("internal error: wrong code length") 812 } 813 814 fcomp.fn.pclinetab = pclinetab 815 fcomp.fn.Code = code 816} 817 818// clip returns the value nearest x in the range [min...max], 819// and whether it equals x. 820func clip(x, min, max int32) (int32, bool) { 821 if x > max { 822 return max, false 823 } else if x < min { 824 return min, false 825 } else { 826 return x, true 827 } 828} 829 830// addUint32 encodes x as 7-bit little-endian varint. 831// TODO(adonovan): opt: steal top two bits of opcode 832// to encode the number of complete bytes that follow. 833func addUint32(code []byte, x uint32, min int) []byte { 834 end := len(code) + min 835 for x >= 0x80 { 836 code = append(code, byte(x)|0x80) 837 x >>= 7 838 } 839 code = append(code, byte(x)) 840 // Pad the operand with NOPs to exactly min bytes. 841 for len(code) < end { 842 code = append(code, byte(NOP)) 843 } 844 return code 845} 846 847func argLen(x uint32) int { 848 n := 0 849 for x >= 0x80 { 850 n++ 851 x >>= 7 852 } 853 return n + 1 854} 855 856// PrintOp prints an instruction. 857// It is provided for debugging. 858func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) { 859 if op < OpcodeArgMin { 860 fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op) 861 return 862 } 863 864 var comment string 865 switch op { 866 case CONSTANT: 867 switch x := fn.Prog.Constants[arg].(type) { 868 case string: 869 comment = strconv.Quote(x) 870 case Bytes: 871 comment = "b" + strconv.Quote(string(x)) 872 default: 873 comment = fmt.Sprint(x) 874 } 875 case MAKEFUNC: 876 comment = fn.Prog.Functions[arg].Name 877 case SETLOCAL, LOCAL: 878 comment = fn.Locals[arg].Name 879 case SETGLOBAL, GLOBAL: 880 comment = fn.Prog.Globals[arg].Name 881 case ATTR, SETFIELD, PREDECLARED, UNIVERSAL: 882 comment = fn.Prog.Names[arg] 883 case FREE: 884 comment = fn.Freevars[arg].Name 885 case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW: 886 comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff) 887 default: 888 // JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK: 889 // arg is just a number 890 } 891 var buf bytes.Buffer 892 fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg) 893 if comment != "" { 894 fmt.Fprint(&buf, "\t; ", comment) 895 } 896 fmt.Fprintln(&buf) 897 os.Stderr.Write(buf.Bytes()) 898} 899 900// newBlock returns a new block. 901func (fcomp) newBlock() *block { 902 return &block{index: -1, initialstack: -1} 903} 904 905// emit emits an instruction to the current block. 906func (fcomp *fcomp) emit(op Opcode) { 907 if op >= OpcodeArgMin { 908 panic("missing arg: " + op.String()) 909 } 910 insn := insn{op: op, line: fcomp.pos.Line, col: fcomp.pos.Col} 911 fcomp.block.insns = append(fcomp.block.insns, insn) 912 fcomp.pos.Line = 0 913 fcomp.pos.Col = 0 914} 915 916// emit1 emits an instruction with an immediate operand. 917func (fcomp *fcomp) emit1(op Opcode, arg uint32) { 918 if op < OpcodeArgMin { 919 panic("unwanted arg: " + op.String()) 920 } 921 insn := insn{op: op, arg: arg, line: fcomp.pos.Line, col: fcomp.pos.Col} 922 fcomp.block.insns = append(fcomp.block.insns, insn) 923 fcomp.pos.Line = 0 924 fcomp.pos.Col = 0 925} 926 927// jump emits a jump to the specified block. 928// On return, the current block is unset. 929func (fcomp *fcomp) jump(b *block) { 930 if b == fcomp.block { 931 panic("self-jump") // unreachable: Starlark has no arbitrary looping constructs 932 } 933 fcomp.block.jmp = b 934 fcomp.block = nil 935} 936 937// condjump emits a conditional jump (CJMP or ITERJMP) 938// to the specified true/false blocks. 939// (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.) 940// On return, the current block is unset. 941func (fcomp *fcomp) condjump(op Opcode, t, f *block) { 942 if !(op == CJMP || op == ITERJMP) { 943 panic("not a conditional jump: " + op.String()) 944 } 945 fcomp.emit1(op, 0) // fill in address later 946 fcomp.block.cjmp = t 947 fcomp.jump(f) 948} 949 950// nameIndex returns the index of the specified name 951// within the name pool, adding it if necessary. 952func (pcomp *pcomp) nameIndex(name string) uint32 { 953 index, ok := pcomp.names[name] 954 if !ok { 955 index = uint32(len(pcomp.prog.Names)) 956 pcomp.names[name] = index 957 pcomp.prog.Names = append(pcomp.prog.Names, name) 958 } 959 return index 960} 961 962// constantIndex returns the index of the specified constant 963// within the constant pool, adding it if necessary. 964func (pcomp *pcomp) constantIndex(v interface{}) uint32 { 965 index, ok := pcomp.constants[v] 966 if !ok { 967 index = uint32(len(pcomp.prog.Constants)) 968 pcomp.constants[v] = index 969 pcomp.prog.Constants = append(pcomp.prog.Constants, v) 970 } 971 return index 972} 973 974// functionIndex returns the index of the specified function 975// AST the nestedfun pool, adding it if necessary. 976func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 { 977 index, ok := pcomp.functions[fn] 978 if !ok { 979 index = uint32(len(pcomp.prog.Functions)) 980 pcomp.functions[fn] = index 981 pcomp.prog.Functions = append(pcomp.prog.Functions, fn) 982 } 983 return index 984} 985 986// string emits code to push the specified string. 987func (fcomp *fcomp) string(s string) { 988 fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(s)) 989} 990 991// setPos sets the current source position. 992// It should be called prior to any operation that can fail dynamically. 993// All positions are assumed to belong to the same file. 994func (fcomp *fcomp) setPos(pos syntax.Position) { 995 fcomp.pos = pos 996} 997 998// set emits code to store the top-of-stack value 999// to the specified local, cell, or global variable. 1000func (fcomp *fcomp) set(id *syntax.Ident) { 1001 bind := id.Binding.(*resolve.Binding) 1002 switch bind.Scope { 1003 case resolve.Local: 1004 fcomp.emit1(SETLOCAL, uint32(bind.Index)) 1005 case resolve.Cell: 1006 fcomp.emit1(SETLOCALCELL, uint32(bind.Index)) 1007 case resolve.Global: 1008 fcomp.emit1(SETGLOBAL, uint32(bind.Index)) 1009 default: 1010 log.Panicf("%s: set(%s): not global/local/cell (%d)", id.NamePos, id.Name, bind.Scope) 1011 } 1012} 1013 1014// lookup emits code to push the value of the specified variable. 1015func (fcomp *fcomp) lookup(id *syntax.Ident) { 1016 bind := id.Binding.(*resolve.Binding) 1017 if bind.Scope != resolve.Universal { // (universal lookup can't fail) 1018 fcomp.setPos(id.NamePos) 1019 } 1020 switch bind.Scope { 1021 case resolve.Local: 1022 fcomp.emit1(LOCAL, uint32(bind.Index)) 1023 case resolve.Free: 1024 fcomp.emit1(FREECELL, uint32(bind.Index)) 1025 case resolve.Cell: 1026 fcomp.emit1(LOCALCELL, uint32(bind.Index)) 1027 case resolve.Global: 1028 fcomp.emit1(GLOBAL, uint32(bind.Index)) 1029 case resolve.Predeclared: 1030 fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name)) 1031 case resolve.Universal: 1032 fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name)) 1033 default: 1034 log.Panicf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, bind.Scope) 1035 } 1036} 1037 1038func (fcomp *fcomp) stmts(stmts []syntax.Stmt) { 1039 for _, stmt := range stmts { 1040 fcomp.stmt(stmt) 1041 } 1042} 1043 1044func (fcomp *fcomp) stmt(stmt syntax.Stmt) { 1045 switch stmt := stmt.(type) { 1046 case *syntax.ExprStmt: 1047 if _, ok := stmt.X.(*syntax.Literal); ok { 1048 // Opt: don't compile doc comments only to pop them. 1049 return 1050 } 1051 fcomp.expr(stmt.X) 1052 fcomp.emit(POP) 1053 1054 case *syntax.BranchStmt: 1055 // Resolver invariant: break/continue appear only within loops. 1056 switch stmt.Token { 1057 case syntax.PASS: 1058 // no-op 1059 case syntax.BREAK: 1060 b := fcomp.loops[len(fcomp.loops)-1].break_ 1061 fcomp.jump(b) 1062 fcomp.block = fcomp.newBlock() // dead code 1063 case syntax.CONTINUE: 1064 b := fcomp.loops[len(fcomp.loops)-1].continue_ 1065 fcomp.jump(b) 1066 fcomp.block = fcomp.newBlock() // dead code 1067 } 1068 1069 case *syntax.IfStmt: 1070 // Keep consistent with CondExpr. 1071 t := fcomp.newBlock() 1072 f := fcomp.newBlock() 1073 done := fcomp.newBlock() 1074 1075 fcomp.ifelse(stmt.Cond, t, f) 1076 1077 fcomp.block = t 1078 fcomp.stmts(stmt.True) 1079 fcomp.jump(done) 1080 1081 fcomp.block = f 1082 fcomp.stmts(stmt.False) 1083 fcomp.jump(done) 1084 1085 fcomp.block = done 1086 1087 case *syntax.AssignStmt: 1088 switch stmt.Op { 1089 case syntax.EQ: 1090 // simple assignment: x = y 1091 fcomp.expr(stmt.RHS) 1092 fcomp.assign(stmt.OpPos, stmt.LHS) 1093 1094 case syntax.PLUS_EQ, 1095 syntax.MINUS_EQ, 1096 syntax.STAR_EQ, 1097 syntax.SLASH_EQ, 1098 syntax.SLASHSLASH_EQ, 1099 syntax.PERCENT_EQ, 1100 syntax.AMP_EQ, 1101 syntax.PIPE_EQ, 1102 syntax.CIRCUMFLEX_EQ, 1103 syntax.LTLT_EQ, 1104 syntax.GTGT_EQ: 1105 // augmented assignment: x += y 1106 1107 var set func() 1108 1109 // Evaluate "address" of x exactly once to avoid duplicate side-effects. 1110 switch lhs := unparen(stmt.LHS).(type) { 1111 case *syntax.Ident: 1112 // x = ... 1113 fcomp.lookup(lhs) 1114 set = func() { 1115 fcomp.set(lhs) 1116 } 1117 1118 case *syntax.IndexExpr: 1119 // x[y] = ... 1120 fcomp.expr(lhs.X) 1121 fcomp.expr(lhs.Y) 1122 fcomp.emit(DUP2) 1123 fcomp.setPos(lhs.Lbrack) 1124 fcomp.emit(INDEX) 1125 set = func() { 1126 fcomp.setPos(lhs.Lbrack) 1127 fcomp.emit(SETINDEX) 1128 } 1129 1130 case *syntax.DotExpr: 1131 // x.f = ... 1132 fcomp.expr(lhs.X) 1133 fcomp.emit(DUP) 1134 name := fcomp.pcomp.nameIndex(lhs.Name.Name) 1135 fcomp.setPos(lhs.Dot) 1136 fcomp.emit1(ATTR, name) 1137 set = func() { 1138 fcomp.setPos(lhs.Dot) 1139 fcomp.emit1(SETFIELD, name) 1140 } 1141 1142 default: 1143 panic(lhs) 1144 } 1145 1146 fcomp.expr(stmt.RHS) 1147 1148 if stmt.Op == syntax.PLUS_EQ { 1149 // Allow the runtime to optimize list += iterable. 1150 fcomp.setPos(stmt.OpPos) 1151 fcomp.emit(INPLACE_ADD) 1152 } else { 1153 fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS) 1154 } 1155 set() 1156 } 1157 1158 case *syntax.DefStmt: 1159 fcomp.function(stmt.Function.(*resolve.Function)) 1160 fcomp.set(stmt.Name) 1161 1162 case *syntax.ForStmt: 1163 // Keep consistent with ForClause. 1164 head := fcomp.newBlock() 1165 body := fcomp.newBlock() 1166 tail := fcomp.newBlock() 1167 1168 fcomp.expr(stmt.X) 1169 fcomp.setPos(stmt.For) 1170 fcomp.emit(ITERPUSH) 1171 fcomp.jump(head) 1172 1173 fcomp.block = head 1174 fcomp.condjump(ITERJMP, tail, body) 1175 1176 fcomp.block = body 1177 fcomp.assign(stmt.For, stmt.Vars) 1178 fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head}) 1179 fcomp.stmts(stmt.Body) 1180 fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] 1181 fcomp.jump(head) 1182 1183 fcomp.block = tail 1184 fcomp.emit(ITERPOP) 1185 1186 case *syntax.WhileStmt: 1187 head := fcomp.newBlock() 1188 body := fcomp.newBlock() 1189 done := fcomp.newBlock() 1190 1191 fcomp.jump(head) 1192 fcomp.block = head 1193 fcomp.ifelse(stmt.Cond, body, done) 1194 1195 fcomp.block = body 1196 fcomp.loops = append(fcomp.loops, loop{break_: done, continue_: head}) 1197 fcomp.stmts(stmt.Body) 1198 fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] 1199 fcomp.jump(head) 1200 1201 fcomp.block = done 1202 1203 case *syntax.ReturnStmt: 1204 if stmt.Result != nil { 1205 fcomp.expr(stmt.Result) 1206 } else { 1207 fcomp.emit(NONE) 1208 } 1209 fcomp.emit(RETURN) 1210 fcomp.block = fcomp.newBlock() // dead code 1211 1212 case *syntax.LoadStmt: 1213 for i := range stmt.From { 1214 fcomp.string(stmt.From[i].Name) 1215 } 1216 module := stmt.Module.Value.(string) 1217 fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Binding{ 1218 Name: module, 1219 Pos: stmt.Module.TokenPos, 1220 }) 1221 fcomp.string(module) 1222 fcomp.setPos(stmt.Load) 1223 fcomp.emit1(LOAD, uint32(len(stmt.From))) 1224 for i := range stmt.To { 1225 fcomp.set(stmt.To[len(stmt.To)-1-i]) 1226 } 1227 1228 default: 1229 start, _ := stmt.Span() 1230 log.Panicf("%s: exec: unexpected statement %T", start, stmt) 1231 } 1232} 1233 1234// assign implements lhs = rhs for arbitrary expressions lhs. 1235// RHS is on top of stack, consumed. 1236func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) { 1237 switch lhs := lhs.(type) { 1238 case *syntax.ParenExpr: 1239 // (lhs) = rhs 1240 fcomp.assign(pos, lhs.X) 1241 1242 case *syntax.Ident: 1243 // x = rhs 1244 fcomp.set(lhs) 1245 1246 case *syntax.TupleExpr: 1247 // x, y = rhs 1248 fcomp.assignSequence(pos, lhs.List) 1249 1250 case *syntax.ListExpr: 1251 // [x, y] = rhs 1252 fcomp.assignSequence(pos, lhs.List) 1253 1254 case *syntax.IndexExpr: 1255 // x[y] = rhs 1256 fcomp.expr(lhs.X) 1257 fcomp.emit(EXCH) 1258 fcomp.expr(lhs.Y) 1259 fcomp.emit(EXCH) 1260 fcomp.setPos(lhs.Lbrack) 1261 fcomp.emit(SETINDEX) 1262 1263 case *syntax.DotExpr: 1264 // x.f = rhs 1265 fcomp.expr(lhs.X) 1266 fcomp.emit(EXCH) 1267 fcomp.setPos(lhs.Dot) 1268 fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name)) 1269 1270 default: 1271 panic(lhs) 1272 } 1273} 1274 1275func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) { 1276 fcomp.setPos(pos) 1277 fcomp.emit1(UNPACK, uint32(len(lhs))) 1278 for i := range lhs { 1279 fcomp.assign(pos, lhs[i]) 1280 } 1281} 1282 1283func (fcomp *fcomp) expr(e syntax.Expr) { 1284 switch e := e.(type) { 1285 case *syntax.ParenExpr: 1286 fcomp.expr(e.X) 1287 1288 case *syntax.Ident: 1289 fcomp.lookup(e) 1290 1291 case *syntax.Literal: 1292 // e.Value is int64, float64, *bigInt, string 1293 v := e.Value 1294 if e.Token == syntax.BYTES { 1295 v = Bytes(v.(string)) 1296 } 1297 fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(v)) 1298 1299 case *syntax.ListExpr: 1300 for _, x := range e.List { 1301 fcomp.expr(x) 1302 } 1303 fcomp.emit1(MAKELIST, uint32(len(e.List))) 1304 1305 case *syntax.CondExpr: 1306 // Keep consistent with IfStmt. 1307 t := fcomp.newBlock() 1308 f := fcomp.newBlock() 1309 done := fcomp.newBlock() 1310 1311 fcomp.ifelse(e.Cond, t, f) 1312 1313 fcomp.block = t 1314 fcomp.expr(e.True) 1315 fcomp.jump(done) 1316 1317 fcomp.block = f 1318 fcomp.expr(e.False) 1319 fcomp.jump(done) 1320 1321 fcomp.block = done 1322 1323 case *syntax.IndexExpr: 1324 fcomp.expr(e.X) 1325 fcomp.expr(e.Y) 1326 fcomp.setPos(e.Lbrack) 1327 fcomp.emit(INDEX) 1328 1329 case *syntax.SliceExpr: 1330 fcomp.setPos(e.Lbrack) 1331 fcomp.expr(e.X) 1332 if e.Lo != nil { 1333 fcomp.expr(e.Lo) 1334 } else { 1335 fcomp.emit(NONE) 1336 } 1337 if e.Hi != nil { 1338 fcomp.expr(e.Hi) 1339 } else { 1340 fcomp.emit(NONE) 1341 } 1342 if e.Step != nil { 1343 fcomp.expr(e.Step) 1344 } else { 1345 fcomp.emit(NONE) 1346 } 1347 fcomp.emit(SLICE) 1348 1349 case *syntax.Comprehension: 1350 if e.Curly { 1351 fcomp.emit(MAKEDICT) 1352 } else { 1353 fcomp.emit1(MAKELIST, 0) 1354 } 1355 fcomp.comprehension(e, 0) 1356 1357 case *syntax.TupleExpr: 1358 fcomp.tuple(e.List) 1359 1360 case *syntax.DictExpr: 1361 fcomp.emit(MAKEDICT) 1362 for _, entry := range e.List { 1363 entry := entry.(*syntax.DictEntry) 1364 fcomp.emit(DUP) 1365 fcomp.expr(entry.Key) 1366 fcomp.expr(entry.Value) 1367 fcomp.setPos(entry.Colon) 1368 fcomp.emit(SETDICTUNIQ) 1369 } 1370 1371 case *syntax.UnaryExpr: 1372 fcomp.expr(e.X) 1373 fcomp.setPos(e.OpPos) 1374 switch e.Op { 1375 case syntax.MINUS: 1376 fcomp.emit(UMINUS) 1377 case syntax.PLUS: 1378 fcomp.emit(UPLUS) 1379 case syntax.NOT: 1380 fcomp.emit(NOT) 1381 case syntax.TILDE: 1382 fcomp.emit(TILDE) 1383 default: 1384 log.Panicf("%s: unexpected unary op: %s", e.OpPos, e.Op) 1385 } 1386 1387 case *syntax.BinaryExpr: 1388 switch e.Op { 1389 // short-circuit operators 1390 // TODO(adonovan): use ifelse to simplify conditions. 1391 case syntax.OR: 1392 // x or y => if x then x else y 1393 done := fcomp.newBlock() 1394 y := fcomp.newBlock() 1395 1396 fcomp.expr(e.X) 1397 fcomp.emit(DUP) 1398 fcomp.condjump(CJMP, done, y) 1399 1400 fcomp.block = y 1401 fcomp.emit(POP) // discard X 1402 fcomp.expr(e.Y) 1403 fcomp.jump(done) 1404 1405 fcomp.block = done 1406 1407 case syntax.AND: 1408 // x and y => if x then y else x 1409 done := fcomp.newBlock() 1410 y := fcomp.newBlock() 1411 1412 fcomp.expr(e.X) 1413 fcomp.emit(DUP) 1414 fcomp.condjump(CJMP, y, done) 1415 1416 fcomp.block = y 1417 fcomp.emit(POP) // discard X 1418 fcomp.expr(e.Y) 1419 fcomp.jump(done) 1420 1421 fcomp.block = done 1422 1423 case syntax.PLUS: 1424 fcomp.plus(e) 1425 1426 default: 1427 // all other strict binary operator (includes comparisons) 1428 fcomp.expr(e.X) 1429 fcomp.expr(e.Y) 1430 fcomp.binop(e.OpPos, e.Op) 1431 } 1432 1433 case *syntax.DotExpr: 1434 fcomp.expr(e.X) 1435 fcomp.setPos(e.Dot) 1436 fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name)) 1437 1438 case *syntax.CallExpr: 1439 fcomp.call(e) 1440 1441 case *syntax.LambdaExpr: 1442 fcomp.function(e.Function.(*resolve.Function)) 1443 1444 default: 1445 start, _ := e.Span() 1446 log.Panicf("%s: unexpected expr %T", start, e) 1447 } 1448} 1449 1450type summand struct { 1451 x syntax.Expr 1452 plusPos syntax.Position 1453} 1454 1455// plus emits optimized code for ((a+b)+...)+z that avoids naive 1456// quadratic behavior for strings, tuples, and lists, 1457// and folds together adjacent literals of the same type. 1458func (fcomp *fcomp) plus(e *syntax.BinaryExpr) { 1459 // Gather all the right operands of the left tree of plusses. 1460 // A tree (((a+b)+c)+d) becomes args=[a +b +c +d]. 1461 args := make([]summand, 0, 2) // common case: 2 operands 1462 for plus := e; ; { 1463 args = append(args, summand{unparen(plus.Y), plus.OpPos}) 1464 left := unparen(plus.X) 1465 x, ok := left.(*syntax.BinaryExpr) 1466 if !ok || x.Op != syntax.PLUS { 1467 args = append(args, summand{x: left}) 1468 break 1469 } 1470 plus = x 1471 } 1472 // Reverse args to syntactic order. 1473 for i, n := 0, len(args)/2; i < n; i++ { 1474 j := len(args) - 1 - i 1475 args[i], args[j] = args[j], args[i] 1476 } 1477 1478 // Fold sums of adjacent literals of the same type: ""+"", []+[], ()+(). 1479 out := args[:0] // compact in situ 1480 for i := 0; i < len(args); { 1481 j := i + 1 1482 if code := addable(args[i].x); code != 0 { 1483 for j < len(args) && addable(args[j].x) == code { 1484 j++ 1485 } 1486 if j > i+1 { 1487 args[i].x = add(code, args[i:j]) 1488 } 1489 } 1490 out = append(out, args[i]) 1491 i = j 1492 } 1493 args = out 1494 1495 // Emit code for an n-ary sum (n > 0). 1496 fcomp.expr(args[0].x) 1497 for _, summand := range args[1:] { 1498 fcomp.expr(summand.x) 1499 fcomp.setPos(summand.plusPos) 1500 fcomp.emit(PLUS) 1501 } 1502 1503 // If len(args) > 2, use of an accumulator instead of a chain of 1504 // PLUS operations may be more efficient. 1505 // However, no gain was measured on a workload analogous to Bazel loading; 1506 // TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload. 1507 // 1508 // We cannot use a single n-ary SUM operation 1509 // a b c SUM<3> 1510 // because we need to report a distinct error for each 1511 // individual '+' operation, so three additional operations are 1512 // needed: 1513 // 1514 // ACCSTART => create buffer and append to it 1515 // ACCUM => append to buffer 1516 // ACCEND => get contents of buffer 1517 // 1518 // For string, list, and tuple values, the interpreter can 1519 // optimize these operations by using a mutable buffer. 1520 // For all other types, ACCSTART and ACCEND would behave like 1521 // the identity function and ACCUM behaves like PLUS. 1522 // ACCUM must correctly support user-defined operations 1523 // such as list+foo. 1524 // 1525 // fcomp.emit(ACCSTART) 1526 // for _, summand := range args[1:] { 1527 // fcomp.expr(summand.x) 1528 // fcomp.setPos(summand.plusPos) 1529 // fcomp.emit(ACCUM) 1530 // } 1531 // fcomp.emit(ACCEND) 1532} 1533 1534// addable reports whether e is a statically addable 1535// expression: a [s]tring, [b]ytes, [l]ist, or [t]uple. 1536func addable(e syntax.Expr) rune { 1537 switch e := e.(type) { 1538 case *syntax.Literal: 1539 // TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding. 1540 switch e.Token { 1541 case syntax.STRING: 1542 return 's' 1543 case syntax.BYTES: 1544 return 'b' 1545 } 1546 case *syntax.ListExpr: 1547 return 'l' 1548 case *syntax.TupleExpr: 1549 return 't' 1550 } 1551 return 0 1552} 1553 1554// add returns an expression denoting the sum of args, 1555// which are all addable values of the type indicated by code. 1556// The resulting syntax is degenerate, lacking position, etc. 1557func add(code rune, args []summand) syntax.Expr { 1558 switch code { 1559 case 's', 'b': 1560 var buf strings.Builder 1561 for _, arg := range args { 1562 buf.WriteString(arg.x.(*syntax.Literal).Value.(string)) 1563 } 1564 tok := syntax.STRING 1565 if code == 'b' { 1566 tok = syntax.BYTES 1567 } 1568 return &syntax.Literal{Token: tok, Value: buf.String()} 1569 case 'l': 1570 var elems []syntax.Expr 1571 for _, arg := range args { 1572 elems = append(elems, arg.x.(*syntax.ListExpr).List...) 1573 } 1574 return &syntax.ListExpr{List: elems} 1575 case 't': 1576 var elems []syntax.Expr 1577 for _, arg := range args { 1578 elems = append(elems, arg.x.(*syntax.TupleExpr).List...) 1579 } 1580 return &syntax.TupleExpr{List: elems} 1581 } 1582 panic(code) 1583} 1584 1585func unparen(e syntax.Expr) syntax.Expr { 1586 if p, ok := e.(*syntax.ParenExpr); ok { 1587 return unparen(p.X) 1588 } 1589 return e 1590} 1591 1592func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) { 1593 // TODO(adonovan): simplify by assuming syntax and compiler constants align. 1594 fcomp.setPos(pos) 1595 switch op { 1596 // arithmetic 1597 case syntax.PLUS: 1598 fcomp.emit(PLUS) 1599 case syntax.MINUS: 1600 fcomp.emit(MINUS) 1601 case syntax.STAR: 1602 fcomp.emit(STAR) 1603 case syntax.SLASH: 1604 fcomp.emit(SLASH) 1605 case syntax.SLASHSLASH: 1606 fcomp.emit(SLASHSLASH) 1607 case syntax.PERCENT: 1608 fcomp.emit(PERCENT) 1609 case syntax.AMP: 1610 fcomp.emit(AMP) 1611 case syntax.PIPE: 1612 fcomp.emit(PIPE) 1613 case syntax.CIRCUMFLEX: 1614 fcomp.emit(CIRCUMFLEX) 1615 case syntax.LTLT: 1616 fcomp.emit(LTLT) 1617 case syntax.GTGT: 1618 fcomp.emit(GTGT) 1619 case syntax.IN: 1620 fcomp.emit(IN) 1621 case syntax.NOT_IN: 1622 fcomp.emit(IN) 1623 fcomp.emit(NOT) 1624 1625 // comparisons 1626 case syntax.EQL, 1627 syntax.NEQ, 1628 syntax.GT, 1629 syntax.LT, 1630 syntax.LE, 1631 syntax.GE: 1632 fcomp.emit(Opcode(op-syntax.EQL) + EQL) 1633 1634 default: 1635 log.Panicf("%s: unexpected binary op: %s", pos, op) 1636 } 1637} 1638 1639func (fcomp *fcomp) call(call *syntax.CallExpr) { 1640 // TODO(adonovan): opt: Use optimized path for calling methods 1641 // of built-ins: x.f(...) to avoid materializing a closure. 1642 // if dot, ok := call.Fcomp.(*syntax.DotExpr); ok { 1643 // fcomp.expr(dot.X) 1644 // fcomp.args(call) 1645 // fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name)) 1646 // return 1647 // } 1648 1649 // usual case 1650 fcomp.expr(call.Fn) 1651 op, arg := fcomp.args(call) 1652 fcomp.setPos(call.Lparen) 1653 fcomp.emit1(op, arg) 1654} 1655 1656// args emits code to push a tuple of positional arguments 1657// and a tuple of named arguments containing alternating keys and values. 1658// Either or both tuples may be empty (TODO(adonovan): optimize). 1659func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) { 1660 var callmode int 1661 // Compute the number of each kind of parameter. 1662 var p, n int // number of positional, named arguments 1663 var varargs, kwargs syntax.Expr 1664 for _, arg := range call.Args { 1665 if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ { 1666 1667 // named argument (name, value) 1668 fcomp.string(binary.X.(*syntax.Ident).Name) 1669 fcomp.expr(binary.Y) 1670 n++ 1671 continue 1672 } 1673 if unary, ok := arg.(*syntax.UnaryExpr); ok { 1674 if unary.Op == syntax.STAR { 1675 callmode |= 1 1676 varargs = unary.X 1677 continue 1678 } else if unary.Op == syntax.STARSTAR { 1679 callmode |= 2 1680 kwargs = unary.X 1681 continue 1682 } 1683 } 1684 1685 // positional argument 1686 fcomp.expr(arg) 1687 p++ 1688 } 1689 1690 // Python2 and Python3 both permit named arguments 1691 // to appear both before and after a *args argument: 1692 // f(1, 2, x=3, *[4], y=5, **dict(z=6)) 1693 // 1694 // They also differ in their evaluation order: 1695 // Python2: 1 2 3 5 4 6 (*args and **kwargs evaluated last) 1696 // Python3: 1 2 4 3 5 6 (positional args evaluated before named args) 1697 // Starlark-in-Java historically used a third order: 1698 // Lexical: 1 2 3 4 5 6 (all args evaluated left-to-right) 1699 // 1700 // After discussion in github.com/bazelbuild/starlark#13, the 1701 // spec now requires Starlark to statically reject named 1702 // arguments after *args (e.g. y=5), and to use Python2-style 1703 // evaluation order. This is both easy to implement and 1704 // consistent with lexical order: 1705 // 1706 // f(1, 2, x=3, *[4], **dict(z=6)) # 1 2 3 4 6 1707 1708 // *args 1709 if varargs != nil { 1710 fcomp.expr(varargs) 1711 } 1712 1713 // **kwargs 1714 if kwargs != nil { 1715 fcomp.expr(kwargs) 1716 } 1717 1718 // TODO(adonovan): avoid this with a more flexible encoding. 1719 if p >= 256 || n >= 256 { 1720 // resolve already checked this; should be unreachable 1721 panic("too many arguments in call") 1722 } 1723 1724 return CALL + Opcode(callmode), uint32(p<<8 | n) 1725} 1726 1727func (fcomp *fcomp) tuple(elems []syntax.Expr) { 1728 for _, elem := range elems { 1729 fcomp.expr(elem) 1730 } 1731 fcomp.emit1(MAKETUPLE, uint32(len(elems))) 1732} 1733 1734func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) { 1735 if clauseIndex == len(comp.Clauses) { 1736 fcomp.emit(DUP) // accumulator 1737 if comp.Curly { 1738 // dict: {k:v for ...} 1739 // Parser ensures that body is of form k:v. 1740 // Python-style set comprehensions {body for vars in x} 1741 // are not supported. 1742 entry := comp.Body.(*syntax.DictEntry) 1743 fcomp.expr(entry.Key) 1744 fcomp.expr(entry.Value) 1745 fcomp.setPos(entry.Colon) 1746 fcomp.emit(SETDICT) 1747 } else { 1748 // list: [body for vars in x] 1749 fcomp.expr(comp.Body) 1750 fcomp.emit(APPEND) 1751 } 1752 return 1753 } 1754 1755 clause := comp.Clauses[clauseIndex] 1756 switch clause := clause.(type) { 1757 case *syntax.IfClause: 1758 t := fcomp.newBlock() 1759 done := fcomp.newBlock() 1760 fcomp.ifelse(clause.Cond, t, done) 1761 1762 fcomp.block = t 1763 fcomp.comprehension(comp, clauseIndex+1) 1764 fcomp.jump(done) 1765 1766 fcomp.block = done 1767 return 1768 1769 case *syntax.ForClause: 1770 // Keep consistent with ForStmt. 1771 head := fcomp.newBlock() 1772 body := fcomp.newBlock() 1773 tail := fcomp.newBlock() 1774 1775 fcomp.expr(clause.X) 1776 fcomp.setPos(clause.For) 1777 fcomp.emit(ITERPUSH) 1778 fcomp.jump(head) 1779 1780 fcomp.block = head 1781 fcomp.condjump(ITERJMP, tail, body) 1782 1783 fcomp.block = body 1784 fcomp.assign(clause.For, clause.Vars) 1785 fcomp.comprehension(comp, clauseIndex+1) 1786 fcomp.jump(head) 1787 1788 fcomp.block = tail 1789 fcomp.emit(ITERPOP) 1790 return 1791 } 1792 1793 start, _ := clause.Span() 1794 log.Panicf("%s: unexpected comprehension clause %T", start, clause) 1795} 1796 1797func (fcomp *fcomp) function(f *resolve.Function) { 1798 // Evaluation of the defaults may fail, so record the position. 1799 fcomp.setPos(f.Pos) 1800 1801 // To reduce allocation, we emit a combined tuple 1802 // for the defaults and the freevars. 1803 // The function knows where to split it at run time. 1804 1805 // Generate tuple of parameter defaults. For: 1806 // def f(p1, p2=dp2, p3=dp3, *, k1, k2=dk2, k3, **kwargs) 1807 // the tuple is: 1808 // (dp2, dp3, MANDATORY, dk2, MANDATORY). 1809 ndefaults := 0 1810 seenStar := false 1811 for _, param := range f.Params { 1812 switch param := param.(type) { 1813 case *syntax.BinaryExpr: 1814 fcomp.expr(param.Y) 1815 ndefaults++ 1816 case *syntax.UnaryExpr: 1817 seenStar = true // * or *args (also **kwargs) 1818 case *syntax.Ident: 1819 if seenStar { 1820 fcomp.emit(MANDATORY) 1821 ndefaults++ 1822 } 1823 } 1824 } 1825 1826 // Capture the cells of the function's 1827 // free variables from the lexical environment. 1828 for _, freevar := range f.FreeVars { 1829 // Don't call fcomp.lookup because we want 1830 // the cell itself, not its content. 1831 switch freevar.Scope { 1832 case resolve.Free: 1833 fcomp.emit1(FREE, uint32(freevar.Index)) 1834 case resolve.Cell: 1835 fcomp.emit1(LOCAL, uint32(freevar.Index)) 1836 } 1837 } 1838 1839 fcomp.emit1(MAKETUPLE, uint32(ndefaults+len(f.FreeVars))) 1840 1841 funcode := fcomp.pcomp.function(f.Name, f.Pos, f.Body, f.Locals, f.FreeVars) 1842 1843 if debug { 1844 // TODO(adonovan): do compilations sequentially not as a tree, 1845 // to make the log easier to read. 1846 // Simplify by identifying Toplevel and functionIndex 0. 1847 fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos) 1848 } 1849 1850 // def f(a, *, b=1) has only 2 parameters. 1851 numParams := len(f.Params) 1852 if f.NumKwonlyParams > 0 && !f.HasVarargs { 1853 numParams-- 1854 } 1855 1856 funcode.NumParams = numParams 1857 funcode.NumKwonlyParams = f.NumKwonlyParams 1858 funcode.HasVarargs = f.HasVarargs 1859 funcode.HasKwargs = f.HasKwargs 1860 fcomp.emit1(MAKEFUNC, fcomp.pcomp.functionIndex(funcode)) 1861} 1862 1863// ifelse emits a Boolean control flow decision. 1864// On return, the current block is unset. 1865func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) { 1866 switch cond := cond.(type) { 1867 case *syntax.UnaryExpr: 1868 if cond.Op == syntax.NOT { 1869 // if not x then goto t else goto f 1870 // => 1871 // if x then goto f else goto t 1872 fcomp.ifelse(cond.X, f, t) 1873 return 1874 } 1875 1876 case *syntax.BinaryExpr: 1877 switch cond.Op { 1878 case syntax.AND: 1879 // if x and y then goto t else goto f 1880 // => 1881 // if x then ifelse(y, t, f) else goto f 1882 fcomp.expr(cond.X) 1883 y := fcomp.newBlock() 1884 fcomp.condjump(CJMP, y, f) 1885 1886 fcomp.block = y 1887 fcomp.ifelse(cond.Y, t, f) 1888 return 1889 1890 case syntax.OR: 1891 // if x or y then goto t else goto f 1892 // => 1893 // if x then goto t else ifelse(y, t, f) 1894 fcomp.expr(cond.X) 1895 y := fcomp.newBlock() 1896 fcomp.condjump(CJMP, t, y) 1897 1898 fcomp.block = y 1899 fcomp.ifelse(cond.Y, t, f) 1900 return 1901 case syntax.NOT_IN: 1902 // if x not in y then goto t else goto f 1903 // => 1904 // if x in y then goto f else goto t 1905 copy := *cond 1906 copy.Op = syntax.IN 1907 fcomp.expr(©) 1908 fcomp.condjump(CJMP, f, t) 1909 return 1910 } 1911 } 1912 1913 // general case 1914 fcomp.expr(cond) 1915 fcomp.condjump(CJMP, t, f) 1916} 1917