1// Package compile defines the Starlark bytecode compiler.
2// It is an internal package of the Starlark interpreter and is not directly accessible to clients.
3//
4// The compiler generates byte code with optional uint32 operands for a
5// virtual machine with the following components:
6//   - a program counter, which is an index into the byte code array.
7//   - an operand stack, whose maximum size is computed for each function by the compiler.
8//   - an stack of active iterators.
9//   - an array of local variables.
10//     The number of local variables and their indices are computed by the resolver.
11//     Locals (possibly including parameters) that are shared with nested functions
12//     are 'cells': their locals array slot will contain a value of type 'cell',
13//     an indirect value in a box that is explicitly read/updated by instructions.
14//   - an array of free variables, for nested functions.
15//     Free variables are a subset of the ancestors' cell variables.
16//     As with locals and cells, these are computed by the resolver.
17//   - an array of global variables, shared among all functions in the same module.
18//     All elements are initially nil.
19//   - two maps of predeclared and universal identifiers.
20//
21// Each function has a line number table that maps each program counter
22// offset to a source position, including the column number.
23//
24// Operands, logically uint32s, are encoded using little-endian 7-bit
25// varints, the top bit indicating that more bytes follow.
26//
27package compile // import "go.starlark.net/internal/compile"
28
29import (
30	"bytes"
31	"fmt"
32	"log"
33	"os"
34	"path/filepath"
35	"strconv"
36	"strings"
37	"sync"
38
39	"go.starlark.net/resolve"
40	"go.starlark.net/syntax"
41)
42
43// Disassemble causes the assembly code for each function
44// to be printed to stderr as it is generated.
45var Disassemble = false
46
47const debug = false // make code generation verbose, for debugging the compiler
48
49// Increment this to force recompilation of saved bytecode files.
50const Version = 12
51
52type Opcode uint8
53
54// "x DUP x x" is a "stack picture" that describes the state of the
55// stack before and after execution of the instruction.
56//
57// OP<index> indicates an immediate operand that is an index into the
58// specified table: locals, names, freevars, constants.
59const (
60	NOP Opcode = iota // - NOP -
61
62	// stack operations
63	DUP  //   x DUP x x
64	DUP2 // x y DUP2 x y x y
65	POP  //   x POP -
66	EXCH // x y EXCH y x
67
68	// binary comparisons
69	// (order must match Token)
70	LT
71	GT
72	GE
73	LE
74	EQL
75	NEQ
76
77	// binary arithmetic
78	// (order must match Token)
79	PLUS
80	MINUS
81	STAR
82	SLASH
83	SLASHSLASH
84	PERCENT
85	AMP
86	PIPE
87	CIRCUMFLEX
88	LTLT
89	GTGT
90
91	IN
92
93	// unary operators
94	UPLUS  // x UPLUS x
95	UMINUS // x UMINUS -x
96	TILDE  // x TILDE ~x
97
98	NONE      // - NONE None
99	TRUE      // - TRUE True
100	FALSE     // - FALSE False
101	MANDATORY // - MANDATORY Mandatory	     [sentinel value for required kwonly args]
102
103	ITERPUSH    //       iterable ITERPUSH    -  [pushes the iterator stack]
104	ITERPOP     //              - ITERPOP     -    [pops the iterator stack]
105	NOT         //          value NOT         bool
106	RETURN      //          value RETURN      -
107	SETINDEX    //        a i new SETINDEX    -
108	INDEX       //            a i INDEX       elem
109	SETDICT     // dict key value SETDICT     -
110	SETDICTUNIQ // dict key value SETDICTUNIQ -
111	APPEND      //      list elem APPEND      -
112	SLICE       //   x lo hi step SLICE       slice
113	INPLACE_ADD //            x y INPLACE_ADD z      where z is x+y or x.extend(y)
114	MAKEDICT    //              - MAKEDICT    dict
115
116	// --- opcodes with an argument must go below this line ---
117
118	// control flow
119	JMP     //            - JMP<addr>     -
120	CJMP    //         cond CJMP<addr>    -
121	ITERJMP //            - ITERJMP<addr> elem   (and fall through) [acts on topmost iterator]
122	//       or:          - ITERJMP<addr> -      (and jump)
123
124	CONSTANT     //                 - CONSTANT<constant>  value
125	MAKETUPLE    //         x1 ... xn MAKETUPLE<n>        tuple
126	MAKELIST     //         x1 ... xn MAKELIST<n>         list
127	MAKEFUNC     // defaults+freevars MAKEFUNC<func>      fn
128	LOAD         //   from1 ... fromN module LOAD<n>      v1 ... vN
129	SETLOCAL     //             value SETLOCAL<local>     -
130	SETGLOBAL    //             value SETGLOBAL<global>   -
131	LOCAL        //                 - LOCAL<local>        value
132	FREE         //                 - FREE<freevar>       cell
133	FREECELL     //                 - FREECELL<freevar>   value       (content of FREE cell)
134	LOCALCELL    //                 - LOCALCELL<local>    value       (content of LOCAL cell)
135	SETLOCALCELL //             value SETLOCALCELL<local> -           (set content of LOCAL cell)
136	GLOBAL       //                 - GLOBAL<global>      value
137	PREDECLARED  //                 - PREDECLARED<name>   value
138	UNIVERSAL    //                 - UNIVERSAL<name>     value
139	ATTR         //                 x ATTR<name>          y           y = x.name
140	SETFIELD     //               x y SETFIELD<name>      -           x.name = y
141	UNPACK       //          iterable UNPACK<n>           vn ... v1
142
143	// n>>8 is #positional args and n&0xff is #named args (pairs).
144	CALL        // fn positional named                CALL<n>        result
145	CALL_VAR    // fn positional named *args          CALL_VAR<n>    result
146	CALL_KW     // fn positional named       **kwargs CALL_KW<n>     result
147	CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result
148
149	OpcodeArgMin = JMP
150	OpcodeMax    = CALL_VAR_KW
151)
152
153// TODO(adonovan): add dynamic checks for missing opcodes in the tables below.
154
155var opcodeNames = [...]string{
156	AMP:          "amp",
157	APPEND:       "append",
158	ATTR:         "attr",
159	CALL:         "call",
160	CALL_KW:      "call_kw ",
161	CALL_VAR:     "call_var",
162	CALL_VAR_KW:  "call_var_kw",
163	CIRCUMFLEX:   "circumflex",
164	CJMP:         "cjmp",
165	CONSTANT:     "constant",
166	DUP2:         "dup2",
167	DUP:          "dup",
168	EQL:          "eql",
169	EXCH:         "exch",
170	FALSE:        "false",
171	FREE:         "free",
172	FREECELL:     "freecell",
173	GE:           "ge",
174	GLOBAL:       "global",
175	GT:           "gt",
176	GTGT:         "gtgt",
177	IN:           "in",
178	INDEX:        "index",
179	INPLACE_ADD:  "inplace_add",
180	ITERJMP:      "iterjmp",
181	ITERPOP:      "iterpop",
182	ITERPUSH:     "iterpush",
183	JMP:          "jmp",
184	LE:           "le",
185	LOAD:         "load",
186	LOCAL:        "local",
187	LOCALCELL:    "localcell",
188	LT:           "lt",
189	LTLT:         "ltlt",
190	MAKEDICT:     "makedict",
191	MAKEFUNC:     "makefunc",
192	MAKELIST:     "makelist",
193	MAKETUPLE:    "maketuple",
194	MANDATORY:    "mandatory",
195	MINUS:        "minus",
196	NEQ:          "neq",
197	NONE:         "none",
198	NOP:          "nop",
199	NOT:          "not",
200	PERCENT:      "percent",
201	PIPE:         "pipe",
202	PLUS:         "plus",
203	POP:          "pop",
204	PREDECLARED:  "predeclared",
205	RETURN:       "return",
206	SETDICT:      "setdict",
207	SETDICTUNIQ:  "setdictuniq",
208	SETFIELD:     "setfield",
209	SETGLOBAL:    "setglobal",
210	SETINDEX:     "setindex",
211	SETLOCAL:     "setlocal",
212	SETLOCALCELL: "setlocalcell",
213	SLASH:        "slash",
214	SLASHSLASH:   "slashslash",
215	SLICE:        "slice",
216	STAR:         "star",
217	TILDE:        "tilde",
218	TRUE:         "true",
219	UMINUS:       "uminus",
220	UNIVERSAL:    "universal",
221	UNPACK:       "unpack",
222	UPLUS:        "uplus",
223}
224
225const variableStackEffect = 0x7f
226
227// stackEffect records the effect on the size of the operand stack of
228// each kind of instruction. For some instructions this requires computation.
229var stackEffect = [...]int8{
230	AMP:          -1,
231	APPEND:       -2,
232	ATTR:         0,
233	CALL:         variableStackEffect,
234	CALL_KW:      variableStackEffect,
235	CALL_VAR:     variableStackEffect,
236	CALL_VAR_KW:  variableStackEffect,
237	CIRCUMFLEX:   -1,
238	CJMP:         -1,
239	CONSTANT:     +1,
240	DUP2:         +2,
241	DUP:          +1,
242	EQL:          -1,
243	FALSE:        +1,
244	FREE:         +1,
245	FREECELL:     +1,
246	GE:           -1,
247	GLOBAL:       +1,
248	GT:           -1,
249	GTGT:         -1,
250	IN:           -1,
251	INDEX:        -1,
252	INPLACE_ADD:  -1,
253	ITERJMP:      variableStackEffect,
254	ITERPOP:      0,
255	ITERPUSH:     -1,
256	JMP:          0,
257	LE:           -1,
258	LOAD:         -1,
259	LOCAL:        +1,
260	LOCALCELL:    +1,
261	LT:           -1,
262	LTLT:         -1,
263	MAKEDICT:     +1,
264	MAKEFUNC:     0,
265	MAKELIST:     variableStackEffect,
266	MAKETUPLE:    variableStackEffect,
267	MANDATORY:    +1,
268	MINUS:        -1,
269	NEQ:          -1,
270	NONE:         +1,
271	NOP:          0,
272	NOT:          0,
273	PERCENT:      -1,
274	PIPE:         -1,
275	PLUS:         -1,
276	POP:          -1,
277	PREDECLARED:  +1,
278	RETURN:       -1,
279	SETLOCALCELL: -1,
280	SETDICT:      -3,
281	SETDICTUNIQ:  -3,
282	SETFIELD:     -2,
283	SETGLOBAL:    -1,
284	SETINDEX:     -3,
285	SETLOCAL:     -1,
286	SLASH:        -1,
287	SLASHSLASH:   -1,
288	SLICE:        -3,
289	STAR:         -1,
290	TRUE:         +1,
291	UMINUS:       0,
292	UNIVERSAL:    +1,
293	UNPACK:       variableStackEffect,
294	UPLUS:        0,
295}
296
297func (op Opcode) String() string {
298	if op < OpcodeMax {
299		if name := opcodeNames[op]; name != "" {
300			return name
301		}
302	}
303	return fmt.Sprintf("illegal op (%d)", op)
304}
305
306// A Program is a Starlark file in executable form.
307//
308// Programs are serialized by the Program.Encode method,
309// which must be updated whenever this declaration is changed.
310type Program struct {
311	Loads     []Binding     // name (really, string) and position of each load stmt
312	Names     []string      // names of attributes and predeclared variables
313	Constants []interface{} // = string | int64 | float64 | *big.Int | Bytes
314	Functions []*Funcode
315	Globals   []Binding // for error messages and tracing
316	Toplevel  *Funcode  // module initialization function
317}
318
319// The type of a bytes literal value, to distinguish from text string.
320type Bytes string
321
322// A Funcode is the code of a compiled Starlark function.
323//
324// Funcodes are serialized by the encoder.function method,
325// which must be updated whenever this declaration is changed.
326type Funcode struct {
327	Prog                  *Program
328	Pos                   syntax.Position // position of def or lambda token
329	Name                  string          // name of this function
330	Doc                   string          // docstring of this function
331	Code                  []byte          // the byte code
332	pclinetab             []uint16        // mapping from pc to linenum
333	Locals                []Binding       // locals, parameters first
334	Cells                 []int           // indices of Locals that require cells
335	Freevars              []Binding       // for tracing
336	MaxStack              int
337	NumParams             int
338	NumKwonlyParams       int
339	HasVarargs, HasKwargs bool
340
341	// -- transient state --
342
343	lntOnce sync.Once
344	lnt     []pclinecol // decoded line number table
345}
346
347type pclinecol struct {
348	pc        uint32
349	line, col int32
350}
351
352// A Binding is the name and position of a binding identifier.
353type Binding struct {
354	Name string
355	Pos  syntax.Position
356}
357
358// A pcomp holds the compiler state for a Program.
359type pcomp struct {
360	prog *Program // what we're building
361
362	names     map[string]uint32
363	constants map[interface{}]uint32
364	functions map[*Funcode]uint32
365}
366
367// An fcomp holds the compiler state for a Funcode.
368type fcomp struct {
369	fn *Funcode // what we're building
370
371	pcomp *pcomp
372	pos   syntax.Position // current position of generated code
373	loops []loop
374	block *block
375}
376
377type loop struct {
378	break_, continue_ *block
379}
380
381type block struct {
382	insns []insn
383
384	// If the last insn is a RETURN, jmp and cjmp are nil.
385	// If the last insn is a CJMP or ITERJMP,
386	//  cjmp and jmp are the "true" and "false" successors.
387	// Otherwise, jmp is the sole successor.
388	jmp, cjmp *block
389
390	initialstack int // for stack depth computation
391
392	// Used during encoding
393	index int // -1 => not encoded yet
394	addr  uint32
395}
396
397type insn struct {
398	op        Opcode
399	arg       uint32
400	line, col int32
401}
402
403// Position returns the source position for program counter pc.
404func (fn *Funcode) Position(pc uint32) syntax.Position {
405	fn.lntOnce.Do(fn.decodeLNT)
406
407	// Binary search to find last LNT entry not greater than pc.
408	// To avoid dynamic dispatch, this is a specialization of
409	// sort.Search using this predicate:
410	//   !(i < len(fn.lnt)-1 && fn.lnt[i+1].pc <= pc)
411	n := len(fn.lnt)
412	i, j := 0, n
413	for i < j {
414		h := int(uint(i+j) >> 1)
415		if !(h >= n-1 || fn.lnt[h+1].pc > pc) {
416			i = h + 1
417		} else {
418			j = h
419		}
420	}
421
422	var line, col int32
423	if i < n {
424		line = fn.lnt[i].line
425		col = fn.lnt[i].col
426	}
427
428	pos := fn.Pos // copy the (annoyingly inaccessible) filename
429	pos.Col = col
430	pos.Line = line
431	return pos
432}
433
434// decodeLNT decodes the line number table and populates fn.lnt.
435// It is called at most once.
436func (fn *Funcode) decodeLNT() {
437	// Conceptually the table contains rows of the form
438	// (pc uint32, line int32, col int32), sorted by pc.
439	// We use a delta encoding, since the differences
440	// between successive pc, line, and column values
441	// are typically small and positive (though line and
442	// especially column differences may be negative).
443	// The delta encoding starts from
444	// {pc: 0, line: fn.Pos.Line, col: fn.Pos.Col}.
445	//
446	// Each entry is packed into one or more 16-bit values:
447	//    Δpc        uint4
448	//    Δline      int5
449	//    Δcol       int6
450	//    incomplete uint1
451	// The top 4 bits are the unsigned delta pc.
452	// The next 5 bits are the signed line number delta.
453	// The next 6 bits are the signed column number delta.
454	// The bottom bit indicates that more rows follow because
455	// one of the deltas was maxed out.
456	// These field widths were chosen from a sample of real programs,
457	// and allow >97% of rows to be encoded in a single uint16.
458
459	fn.lnt = make([]pclinecol, 0, len(fn.pclinetab)) // a minor overapproximation
460	entry := pclinecol{
461		pc:   0,
462		line: fn.Pos.Line,
463		col:  fn.Pos.Col,
464	}
465	for _, x := range fn.pclinetab {
466		entry.pc += uint32(x) >> 12
467		entry.line += int32((int16(x) << 4) >> (16 - 5)) // sign extend Δline
468		entry.col += int32((int16(x) << 9) >> (16 - 6))  // sign extend Δcol
469		if (x & 1) == 0 {
470			fn.lnt = append(fn.lnt, entry)
471		}
472	}
473}
474
475// bindings converts resolve.Bindings to compiled form.
476func bindings(bindings []*resolve.Binding) []Binding {
477	res := make([]Binding, len(bindings))
478	for i, bind := range bindings {
479		res[i].Name = bind.First.Name
480		res[i].Pos = bind.First.NamePos
481	}
482	return res
483}
484
485// Expr compiles an expression to a program whose toplevel function evaluates it.
486func Expr(expr syntax.Expr, name string, locals []*resolve.Binding) *Program {
487	pos := syntax.Start(expr)
488	stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}}
489	return File(stmts, pos, name, locals, nil)
490}
491
492// File compiles the statements of a file into a program.
493func File(stmts []syntax.Stmt, pos syntax.Position, name string, locals, globals []*resolve.Binding) *Program {
494	pcomp := &pcomp{
495		prog: &Program{
496			Globals: bindings(globals),
497		},
498		names:     make(map[string]uint32),
499		constants: make(map[interface{}]uint32),
500		functions: make(map[*Funcode]uint32),
501	}
502	pcomp.prog.Toplevel = pcomp.function(name, pos, stmts, locals, nil)
503
504	return pcomp.prog
505}
506
507func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*resolve.Binding) *Funcode {
508	fcomp := &fcomp{
509		pcomp: pcomp,
510		pos:   pos,
511		fn: &Funcode{
512			Prog:     pcomp.prog,
513			Pos:      pos,
514			Name:     name,
515			Doc:      docStringFromBody(stmts),
516			Locals:   bindings(locals),
517			Freevars: bindings(freevars),
518		},
519	}
520
521	// Record indices of locals that require cells.
522	for i, local := range locals {
523		if local.Scope == resolve.Cell {
524			fcomp.fn.Cells = append(fcomp.fn.Cells, i)
525		}
526	}
527
528	if debug {
529		fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos)
530	}
531
532	// Convert AST to a CFG of instructions.
533	entry := fcomp.newBlock()
534	fcomp.block = entry
535	fcomp.stmts(stmts)
536	if fcomp.block != nil {
537		fcomp.emit(NONE)
538		fcomp.emit(RETURN)
539	}
540
541	var oops bool // something bad happened
542
543	setinitialstack := func(b *block, depth int) {
544		if b.initialstack == -1 {
545			b.initialstack = depth
546		} else if b.initialstack != depth {
547			fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n",
548				b.index, b.initialstack, depth)
549			oops = true
550		}
551	}
552
553	// Linearize the CFG:
554	// compute order, address, and initial
555	// stack depth of each reachable block.
556	var pc uint32
557	var blocks []*block
558	var maxstack int
559	var visit func(b *block)
560	visit = func(b *block) {
561		if b.index >= 0 {
562			return // already visited
563		}
564		b.index = len(blocks)
565		b.addr = pc
566		blocks = append(blocks, b)
567
568		stack := b.initialstack
569		if debug {
570			fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack)
571		}
572		var cjmpAddr *uint32
573		var isiterjmp int
574		for i, insn := range b.insns {
575			pc++
576
577			// Compute size of argument.
578			if insn.op >= OpcodeArgMin {
579				switch insn.op {
580				case ITERJMP:
581					isiterjmp = 1
582					fallthrough
583				case CJMP:
584					cjmpAddr = &b.insns[i].arg
585					pc += 4
586				default:
587					pc += uint32(argLen(insn.arg))
588				}
589			}
590
591			// Compute effect on stack.
592			se := insn.stackeffect()
593			if debug {
594				fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se)
595			}
596			stack += se
597			if stack < 0 {
598				fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc)
599				oops = true
600			}
601			if stack+isiterjmp > maxstack {
602				maxstack = stack + isiterjmp
603			}
604		}
605
606		if debug {
607			fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n",
608				b.addr, b.index)
609			if b.jmp != nil {
610				fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index)
611			}
612			if b.cjmp != nil {
613				fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index)
614			}
615		}
616
617		// Place the jmp block next.
618		if b.jmp != nil {
619			// jump threading (empty cycles are impossible)
620			for b.jmp.insns == nil {
621				b.jmp = b.jmp.jmp
622			}
623
624			setinitialstack(b.jmp, stack+isiterjmp)
625			if b.jmp.index < 0 {
626				// Successor is not yet visited:
627				// place it next and fall through.
628				visit(b.jmp)
629			} else {
630				// Successor already visited;
631				// explicit backward jump required.
632				pc += 5
633			}
634		}
635
636		// Then the cjmp block.
637		if b.cjmp != nil {
638			// jump threading (empty cycles are impossible)
639			for b.cjmp.insns == nil {
640				b.cjmp = b.cjmp.jmp
641			}
642
643			setinitialstack(b.cjmp, stack)
644			visit(b.cjmp)
645
646			// Patch the CJMP/ITERJMP, if present.
647			if cjmpAddr != nil {
648				*cjmpAddr = b.cjmp.addr
649			}
650		}
651	}
652	setinitialstack(entry, 0)
653	visit(entry)
654
655	fn := fcomp.fn
656	fn.MaxStack = maxstack
657
658	// Emit bytecode (and position table).
659	if Disassemble {
660		fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc)
661	}
662	fcomp.generate(blocks, pc)
663
664	if debug {
665		fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack)
666	}
667
668	// Don't panic until we've completed printing of the function.
669	if oops {
670		panic("internal error")
671	}
672
673	if debug {
674		fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos)
675	}
676
677	return fn
678}
679
680func docStringFromBody(body []syntax.Stmt) string {
681	if len(body) == 0 {
682		return ""
683	}
684	expr, ok := body[0].(*syntax.ExprStmt)
685	if !ok {
686		return ""
687	}
688	lit, ok := expr.X.(*syntax.Literal)
689	if !ok {
690		return ""
691	}
692	if lit.Token != syntax.STRING {
693		return ""
694	}
695	return lit.Value.(string)
696}
697
698func (insn *insn) stackeffect() int {
699	se := int(stackEffect[insn.op])
700	if se == variableStackEffect {
701		arg := int(insn.arg)
702		switch insn.op {
703		case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW:
704			se = -int(2*(insn.arg&0xff) + insn.arg>>8)
705			if insn.op != CALL {
706				se--
707			}
708			if insn.op == CALL_VAR_KW {
709				se--
710			}
711		case ITERJMP:
712			// Stack effect differs by successor:
713			// +1 for jmp/false/ok
714			//  0 for cjmp/true/exhausted
715			// Handled specially in caller.
716			se = 0
717		case MAKELIST, MAKETUPLE:
718			se = 1 - arg
719		case UNPACK:
720			se = arg - 1
721		default:
722			panic(insn.op)
723		}
724	}
725	return se
726}
727
728// generate emits the linear instruction stream from the CFG,
729// and builds the PC-to-line number table.
730func (fcomp *fcomp) generate(blocks []*block, codelen uint32) {
731	code := make([]byte, 0, codelen)
732	var pclinetab []uint16
733	prev := pclinecol{
734		pc:   0,
735		line: fcomp.fn.Pos.Line,
736		col:  fcomp.fn.Pos.Col,
737	}
738
739	for _, b := range blocks {
740		if Disassemble {
741			fmt.Fprintf(os.Stderr, "%d:\n", b.index)
742		}
743		pc := b.addr
744		for _, insn := range b.insns {
745			if insn.line != 0 {
746				// Instruction has a source position.  Delta-encode it.
747				// See Funcode.Position for the encoding.
748				for {
749					var incomplete uint16
750
751					// Δpc, uint4
752					deltapc := pc - prev.pc
753					if deltapc > 0x0f {
754						deltapc = 0x0f
755						incomplete = 1
756					}
757					prev.pc += deltapc
758
759					// Δline, int5
760					deltaline, ok := clip(insn.line-prev.line, -0x10, 0x0f)
761					if !ok {
762						incomplete = 1
763					}
764					prev.line += deltaline
765
766					// Δcol, int6
767					deltacol, ok := clip(insn.col-prev.col, -0x20, 0x1f)
768					if !ok {
769						incomplete = 1
770					}
771					prev.col += deltacol
772
773					entry := uint16(deltapc<<12) | uint16(deltaline&0x1f)<<7 | uint16(deltacol&0x3f)<<1 | incomplete
774					pclinetab = append(pclinetab, entry)
775					if incomplete == 0 {
776						break
777					}
778				}
779
780				if Disassemble {
781					fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s:%d:%d\n",
782						filepath.Base(fcomp.fn.Pos.Filename()), insn.line, insn.col)
783				}
784			}
785			if Disassemble {
786				PrintOp(fcomp.fn, pc, insn.op, insn.arg)
787			}
788			code = append(code, byte(insn.op))
789			pc++
790			if insn.op >= OpcodeArgMin {
791				if insn.op == CJMP || insn.op == ITERJMP {
792					code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes
793				} else {
794					code = addUint32(code, insn.arg, 0)
795				}
796				pc = uint32(len(code))
797			}
798		}
799
800		if b.jmp != nil && b.jmp.index != b.index+1 {
801			addr := b.jmp.addr
802			if Disassemble {
803				fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n",
804					pc, addr, b.jmp.index)
805			}
806			code = append(code, byte(JMP))
807			code = addUint32(code, addr, 4)
808		}
809	}
810	if len(code) != int(codelen) {
811		panic("internal error: wrong code length")
812	}
813
814	fcomp.fn.pclinetab = pclinetab
815	fcomp.fn.Code = code
816}
817
818// clip returns the value nearest x in the range [min...max],
819// and whether it equals x.
820func clip(x, min, max int32) (int32, bool) {
821	if x > max {
822		return max, false
823	} else if x < min {
824		return min, false
825	} else {
826		return x, true
827	}
828}
829
830// addUint32 encodes x as 7-bit little-endian varint.
831// TODO(adonovan): opt: steal top two bits of opcode
832// to encode the number of complete bytes that follow.
833func addUint32(code []byte, x uint32, min int) []byte {
834	end := len(code) + min
835	for x >= 0x80 {
836		code = append(code, byte(x)|0x80)
837		x >>= 7
838	}
839	code = append(code, byte(x))
840	// Pad the operand with NOPs to exactly min bytes.
841	for len(code) < end {
842		code = append(code, byte(NOP))
843	}
844	return code
845}
846
847func argLen(x uint32) int {
848	n := 0
849	for x >= 0x80 {
850		n++
851		x >>= 7
852	}
853	return n + 1
854}
855
856// PrintOp prints an instruction.
857// It is provided for debugging.
858func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) {
859	if op < OpcodeArgMin {
860		fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op)
861		return
862	}
863
864	var comment string
865	switch op {
866	case CONSTANT:
867		switch x := fn.Prog.Constants[arg].(type) {
868		case string:
869			comment = strconv.Quote(x)
870		case Bytes:
871			comment = "b" + strconv.Quote(string(x))
872		default:
873			comment = fmt.Sprint(x)
874		}
875	case MAKEFUNC:
876		comment = fn.Prog.Functions[arg].Name
877	case SETLOCAL, LOCAL:
878		comment = fn.Locals[arg].Name
879	case SETGLOBAL, GLOBAL:
880		comment = fn.Prog.Globals[arg].Name
881	case ATTR, SETFIELD, PREDECLARED, UNIVERSAL:
882		comment = fn.Prog.Names[arg]
883	case FREE:
884		comment = fn.Freevars[arg].Name
885	case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW:
886		comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff)
887	default:
888		// JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK:
889		// arg is just a number
890	}
891	var buf bytes.Buffer
892	fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg)
893	if comment != "" {
894		fmt.Fprint(&buf, "\t; ", comment)
895	}
896	fmt.Fprintln(&buf)
897	os.Stderr.Write(buf.Bytes())
898}
899
900// newBlock returns a new block.
901func (fcomp) newBlock() *block {
902	return &block{index: -1, initialstack: -1}
903}
904
905// emit emits an instruction to the current block.
906func (fcomp *fcomp) emit(op Opcode) {
907	if op >= OpcodeArgMin {
908		panic("missing arg: " + op.String())
909	}
910	insn := insn{op: op, line: fcomp.pos.Line, col: fcomp.pos.Col}
911	fcomp.block.insns = append(fcomp.block.insns, insn)
912	fcomp.pos.Line = 0
913	fcomp.pos.Col = 0
914}
915
916// emit1 emits an instruction with an immediate operand.
917func (fcomp *fcomp) emit1(op Opcode, arg uint32) {
918	if op < OpcodeArgMin {
919		panic("unwanted arg: " + op.String())
920	}
921	insn := insn{op: op, arg: arg, line: fcomp.pos.Line, col: fcomp.pos.Col}
922	fcomp.block.insns = append(fcomp.block.insns, insn)
923	fcomp.pos.Line = 0
924	fcomp.pos.Col = 0
925}
926
927// jump emits a jump to the specified block.
928// On return, the current block is unset.
929func (fcomp *fcomp) jump(b *block) {
930	if b == fcomp.block {
931		panic("self-jump") // unreachable: Starlark has no arbitrary looping constructs
932	}
933	fcomp.block.jmp = b
934	fcomp.block = nil
935}
936
937// condjump emits a conditional jump (CJMP or ITERJMP)
938// to the specified true/false blocks.
939// (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.)
940// On return, the current block is unset.
941func (fcomp *fcomp) condjump(op Opcode, t, f *block) {
942	if !(op == CJMP || op == ITERJMP) {
943		panic("not a conditional jump: " + op.String())
944	}
945	fcomp.emit1(op, 0) // fill in address later
946	fcomp.block.cjmp = t
947	fcomp.jump(f)
948}
949
950// nameIndex returns the index of the specified name
951// within the name pool, adding it if necessary.
952func (pcomp *pcomp) nameIndex(name string) uint32 {
953	index, ok := pcomp.names[name]
954	if !ok {
955		index = uint32(len(pcomp.prog.Names))
956		pcomp.names[name] = index
957		pcomp.prog.Names = append(pcomp.prog.Names, name)
958	}
959	return index
960}
961
962// constantIndex returns the index of the specified constant
963// within the constant pool, adding it if necessary.
964func (pcomp *pcomp) constantIndex(v interface{}) uint32 {
965	index, ok := pcomp.constants[v]
966	if !ok {
967		index = uint32(len(pcomp.prog.Constants))
968		pcomp.constants[v] = index
969		pcomp.prog.Constants = append(pcomp.prog.Constants, v)
970	}
971	return index
972}
973
974// functionIndex returns the index of the specified function
975// AST the nestedfun pool, adding it if necessary.
976func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 {
977	index, ok := pcomp.functions[fn]
978	if !ok {
979		index = uint32(len(pcomp.prog.Functions))
980		pcomp.functions[fn] = index
981		pcomp.prog.Functions = append(pcomp.prog.Functions, fn)
982	}
983	return index
984}
985
986// string emits code to push the specified string.
987func (fcomp *fcomp) string(s string) {
988	fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(s))
989}
990
991// setPos sets the current source position.
992// It should be called prior to any operation that can fail dynamically.
993// All positions are assumed to belong to the same file.
994func (fcomp *fcomp) setPos(pos syntax.Position) {
995	fcomp.pos = pos
996}
997
998// set emits code to store the top-of-stack value
999// to the specified local, cell, or global variable.
1000func (fcomp *fcomp) set(id *syntax.Ident) {
1001	bind := id.Binding.(*resolve.Binding)
1002	switch bind.Scope {
1003	case resolve.Local:
1004		fcomp.emit1(SETLOCAL, uint32(bind.Index))
1005	case resolve.Cell:
1006		fcomp.emit1(SETLOCALCELL, uint32(bind.Index))
1007	case resolve.Global:
1008		fcomp.emit1(SETGLOBAL, uint32(bind.Index))
1009	default:
1010		log.Panicf("%s: set(%s): not global/local/cell (%d)", id.NamePos, id.Name, bind.Scope)
1011	}
1012}
1013
1014// lookup emits code to push the value of the specified variable.
1015func (fcomp *fcomp) lookup(id *syntax.Ident) {
1016	bind := id.Binding.(*resolve.Binding)
1017	if bind.Scope != resolve.Universal { // (universal lookup can't fail)
1018		fcomp.setPos(id.NamePos)
1019	}
1020	switch bind.Scope {
1021	case resolve.Local:
1022		fcomp.emit1(LOCAL, uint32(bind.Index))
1023	case resolve.Free:
1024		fcomp.emit1(FREECELL, uint32(bind.Index))
1025	case resolve.Cell:
1026		fcomp.emit1(LOCALCELL, uint32(bind.Index))
1027	case resolve.Global:
1028		fcomp.emit1(GLOBAL, uint32(bind.Index))
1029	case resolve.Predeclared:
1030		fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name))
1031	case resolve.Universal:
1032		fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name))
1033	default:
1034		log.Panicf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, bind.Scope)
1035	}
1036}
1037
1038func (fcomp *fcomp) stmts(stmts []syntax.Stmt) {
1039	for _, stmt := range stmts {
1040		fcomp.stmt(stmt)
1041	}
1042}
1043
1044func (fcomp *fcomp) stmt(stmt syntax.Stmt) {
1045	switch stmt := stmt.(type) {
1046	case *syntax.ExprStmt:
1047		if _, ok := stmt.X.(*syntax.Literal); ok {
1048			// Opt: don't compile doc comments only to pop them.
1049			return
1050		}
1051		fcomp.expr(stmt.X)
1052		fcomp.emit(POP)
1053
1054	case *syntax.BranchStmt:
1055		// Resolver invariant: break/continue appear only within loops.
1056		switch stmt.Token {
1057		case syntax.PASS:
1058			// no-op
1059		case syntax.BREAK:
1060			b := fcomp.loops[len(fcomp.loops)-1].break_
1061			fcomp.jump(b)
1062			fcomp.block = fcomp.newBlock() // dead code
1063		case syntax.CONTINUE:
1064			b := fcomp.loops[len(fcomp.loops)-1].continue_
1065			fcomp.jump(b)
1066			fcomp.block = fcomp.newBlock() // dead code
1067		}
1068
1069	case *syntax.IfStmt:
1070		// Keep consistent with CondExpr.
1071		t := fcomp.newBlock()
1072		f := fcomp.newBlock()
1073		done := fcomp.newBlock()
1074
1075		fcomp.ifelse(stmt.Cond, t, f)
1076
1077		fcomp.block = t
1078		fcomp.stmts(stmt.True)
1079		fcomp.jump(done)
1080
1081		fcomp.block = f
1082		fcomp.stmts(stmt.False)
1083		fcomp.jump(done)
1084
1085		fcomp.block = done
1086
1087	case *syntax.AssignStmt:
1088		switch stmt.Op {
1089		case syntax.EQ:
1090			// simple assignment: x = y
1091			fcomp.expr(stmt.RHS)
1092			fcomp.assign(stmt.OpPos, stmt.LHS)
1093
1094		case syntax.PLUS_EQ,
1095			syntax.MINUS_EQ,
1096			syntax.STAR_EQ,
1097			syntax.SLASH_EQ,
1098			syntax.SLASHSLASH_EQ,
1099			syntax.PERCENT_EQ,
1100			syntax.AMP_EQ,
1101			syntax.PIPE_EQ,
1102			syntax.CIRCUMFLEX_EQ,
1103			syntax.LTLT_EQ,
1104			syntax.GTGT_EQ:
1105			// augmented assignment: x += y
1106
1107			var set func()
1108
1109			// Evaluate "address" of x exactly once to avoid duplicate side-effects.
1110			switch lhs := unparen(stmt.LHS).(type) {
1111			case *syntax.Ident:
1112				// x = ...
1113				fcomp.lookup(lhs)
1114				set = func() {
1115					fcomp.set(lhs)
1116				}
1117
1118			case *syntax.IndexExpr:
1119				// x[y] = ...
1120				fcomp.expr(lhs.X)
1121				fcomp.expr(lhs.Y)
1122				fcomp.emit(DUP2)
1123				fcomp.setPos(lhs.Lbrack)
1124				fcomp.emit(INDEX)
1125				set = func() {
1126					fcomp.setPos(lhs.Lbrack)
1127					fcomp.emit(SETINDEX)
1128				}
1129
1130			case *syntax.DotExpr:
1131				// x.f = ...
1132				fcomp.expr(lhs.X)
1133				fcomp.emit(DUP)
1134				name := fcomp.pcomp.nameIndex(lhs.Name.Name)
1135				fcomp.setPos(lhs.Dot)
1136				fcomp.emit1(ATTR, name)
1137				set = func() {
1138					fcomp.setPos(lhs.Dot)
1139					fcomp.emit1(SETFIELD, name)
1140				}
1141
1142			default:
1143				panic(lhs)
1144			}
1145
1146			fcomp.expr(stmt.RHS)
1147
1148			if stmt.Op == syntax.PLUS_EQ {
1149				// Allow the runtime to optimize list += iterable.
1150				fcomp.setPos(stmt.OpPos)
1151				fcomp.emit(INPLACE_ADD)
1152			} else {
1153				fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS)
1154			}
1155			set()
1156		}
1157
1158	case *syntax.DefStmt:
1159		fcomp.function(stmt.Function.(*resolve.Function))
1160		fcomp.set(stmt.Name)
1161
1162	case *syntax.ForStmt:
1163		// Keep consistent with ForClause.
1164		head := fcomp.newBlock()
1165		body := fcomp.newBlock()
1166		tail := fcomp.newBlock()
1167
1168		fcomp.expr(stmt.X)
1169		fcomp.setPos(stmt.For)
1170		fcomp.emit(ITERPUSH)
1171		fcomp.jump(head)
1172
1173		fcomp.block = head
1174		fcomp.condjump(ITERJMP, tail, body)
1175
1176		fcomp.block = body
1177		fcomp.assign(stmt.For, stmt.Vars)
1178		fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head})
1179		fcomp.stmts(stmt.Body)
1180		fcomp.loops = fcomp.loops[:len(fcomp.loops)-1]
1181		fcomp.jump(head)
1182
1183		fcomp.block = tail
1184		fcomp.emit(ITERPOP)
1185
1186	case *syntax.WhileStmt:
1187		head := fcomp.newBlock()
1188		body := fcomp.newBlock()
1189		done := fcomp.newBlock()
1190
1191		fcomp.jump(head)
1192		fcomp.block = head
1193		fcomp.ifelse(stmt.Cond, body, done)
1194
1195		fcomp.block = body
1196		fcomp.loops = append(fcomp.loops, loop{break_: done, continue_: head})
1197		fcomp.stmts(stmt.Body)
1198		fcomp.loops = fcomp.loops[:len(fcomp.loops)-1]
1199		fcomp.jump(head)
1200
1201		fcomp.block = done
1202
1203	case *syntax.ReturnStmt:
1204		if stmt.Result != nil {
1205			fcomp.expr(stmt.Result)
1206		} else {
1207			fcomp.emit(NONE)
1208		}
1209		fcomp.emit(RETURN)
1210		fcomp.block = fcomp.newBlock() // dead code
1211
1212	case *syntax.LoadStmt:
1213		for i := range stmt.From {
1214			fcomp.string(stmt.From[i].Name)
1215		}
1216		module := stmt.Module.Value.(string)
1217		fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Binding{
1218			Name: module,
1219			Pos:  stmt.Module.TokenPos,
1220		})
1221		fcomp.string(module)
1222		fcomp.setPos(stmt.Load)
1223		fcomp.emit1(LOAD, uint32(len(stmt.From)))
1224		for i := range stmt.To {
1225			fcomp.set(stmt.To[len(stmt.To)-1-i])
1226		}
1227
1228	default:
1229		start, _ := stmt.Span()
1230		log.Panicf("%s: exec: unexpected statement %T", start, stmt)
1231	}
1232}
1233
1234// assign implements lhs = rhs for arbitrary expressions lhs.
1235// RHS is on top of stack, consumed.
1236func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) {
1237	switch lhs := lhs.(type) {
1238	case *syntax.ParenExpr:
1239		// (lhs) = rhs
1240		fcomp.assign(pos, lhs.X)
1241
1242	case *syntax.Ident:
1243		// x = rhs
1244		fcomp.set(lhs)
1245
1246	case *syntax.TupleExpr:
1247		// x, y = rhs
1248		fcomp.assignSequence(pos, lhs.List)
1249
1250	case *syntax.ListExpr:
1251		// [x, y] = rhs
1252		fcomp.assignSequence(pos, lhs.List)
1253
1254	case *syntax.IndexExpr:
1255		// x[y] = rhs
1256		fcomp.expr(lhs.X)
1257		fcomp.emit(EXCH)
1258		fcomp.expr(lhs.Y)
1259		fcomp.emit(EXCH)
1260		fcomp.setPos(lhs.Lbrack)
1261		fcomp.emit(SETINDEX)
1262
1263	case *syntax.DotExpr:
1264		// x.f = rhs
1265		fcomp.expr(lhs.X)
1266		fcomp.emit(EXCH)
1267		fcomp.setPos(lhs.Dot)
1268		fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name))
1269
1270	default:
1271		panic(lhs)
1272	}
1273}
1274
1275func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) {
1276	fcomp.setPos(pos)
1277	fcomp.emit1(UNPACK, uint32(len(lhs)))
1278	for i := range lhs {
1279		fcomp.assign(pos, lhs[i])
1280	}
1281}
1282
1283func (fcomp *fcomp) expr(e syntax.Expr) {
1284	switch e := e.(type) {
1285	case *syntax.ParenExpr:
1286		fcomp.expr(e.X)
1287
1288	case *syntax.Ident:
1289		fcomp.lookup(e)
1290
1291	case *syntax.Literal:
1292		// e.Value is int64, float64, *bigInt, string
1293		v := e.Value
1294		if e.Token == syntax.BYTES {
1295			v = Bytes(v.(string))
1296		}
1297		fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(v))
1298
1299	case *syntax.ListExpr:
1300		for _, x := range e.List {
1301			fcomp.expr(x)
1302		}
1303		fcomp.emit1(MAKELIST, uint32(len(e.List)))
1304
1305	case *syntax.CondExpr:
1306		// Keep consistent with IfStmt.
1307		t := fcomp.newBlock()
1308		f := fcomp.newBlock()
1309		done := fcomp.newBlock()
1310
1311		fcomp.ifelse(e.Cond, t, f)
1312
1313		fcomp.block = t
1314		fcomp.expr(e.True)
1315		fcomp.jump(done)
1316
1317		fcomp.block = f
1318		fcomp.expr(e.False)
1319		fcomp.jump(done)
1320
1321		fcomp.block = done
1322
1323	case *syntax.IndexExpr:
1324		fcomp.expr(e.X)
1325		fcomp.expr(e.Y)
1326		fcomp.setPos(e.Lbrack)
1327		fcomp.emit(INDEX)
1328
1329	case *syntax.SliceExpr:
1330		fcomp.setPos(e.Lbrack)
1331		fcomp.expr(e.X)
1332		if e.Lo != nil {
1333			fcomp.expr(e.Lo)
1334		} else {
1335			fcomp.emit(NONE)
1336		}
1337		if e.Hi != nil {
1338			fcomp.expr(e.Hi)
1339		} else {
1340			fcomp.emit(NONE)
1341		}
1342		if e.Step != nil {
1343			fcomp.expr(e.Step)
1344		} else {
1345			fcomp.emit(NONE)
1346		}
1347		fcomp.emit(SLICE)
1348
1349	case *syntax.Comprehension:
1350		if e.Curly {
1351			fcomp.emit(MAKEDICT)
1352		} else {
1353			fcomp.emit1(MAKELIST, 0)
1354		}
1355		fcomp.comprehension(e, 0)
1356
1357	case *syntax.TupleExpr:
1358		fcomp.tuple(e.List)
1359
1360	case *syntax.DictExpr:
1361		fcomp.emit(MAKEDICT)
1362		for _, entry := range e.List {
1363			entry := entry.(*syntax.DictEntry)
1364			fcomp.emit(DUP)
1365			fcomp.expr(entry.Key)
1366			fcomp.expr(entry.Value)
1367			fcomp.setPos(entry.Colon)
1368			fcomp.emit(SETDICTUNIQ)
1369		}
1370
1371	case *syntax.UnaryExpr:
1372		fcomp.expr(e.X)
1373		fcomp.setPos(e.OpPos)
1374		switch e.Op {
1375		case syntax.MINUS:
1376			fcomp.emit(UMINUS)
1377		case syntax.PLUS:
1378			fcomp.emit(UPLUS)
1379		case syntax.NOT:
1380			fcomp.emit(NOT)
1381		case syntax.TILDE:
1382			fcomp.emit(TILDE)
1383		default:
1384			log.Panicf("%s: unexpected unary op: %s", e.OpPos, e.Op)
1385		}
1386
1387	case *syntax.BinaryExpr:
1388		switch e.Op {
1389		// short-circuit operators
1390		// TODO(adonovan): use ifelse to simplify conditions.
1391		case syntax.OR:
1392			// x or y  =>  if x then x else y
1393			done := fcomp.newBlock()
1394			y := fcomp.newBlock()
1395
1396			fcomp.expr(e.X)
1397			fcomp.emit(DUP)
1398			fcomp.condjump(CJMP, done, y)
1399
1400			fcomp.block = y
1401			fcomp.emit(POP) // discard X
1402			fcomp.expr(e.Y)
1403			fcomp.jump(done)
1404
1405			fcomp.block = done
1406
1407		case syntax.AND:
1408			// x and y  =>  if x then y else x
1409			done := fcomp.newBlock()
1410			y := fcomp.newBlock()
1411
1412			fcomp.expr(e.X)
1413			fcomp.emit(DUP)
1414			fcomp.condjump(CJMP, y, done)
1415
1416			fcomp.block = y
1417			fcomp.emit(POP) // discard X
1418			fcomp.expr(e.Y)
1419			fcomp.jump(done)
1420
1421			fcomp.block = done
1422
1423		case syntax.PLUS:
1424			fcomp.plus(e)
1425
1426		default:
1427			// all other strict binary operator (includes comparisons)
1428			fcomp.expr(e.X)
1429			fcomp.expr(e.Y)
1430			fcomp.binop(e.OpPos, e.Op)
1431		}
1432
1433	case *syntax.DotExpr:
1434		fcomp.expr(e.X)
1435		fcomp.setPos(e.Dot)
1436		fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name))
1437
1438	case *syntax.CallExpr:
1439		fcomp.call(e)
1440
1441	case *syntax.LambdaExpr:
1442		fcomp.function(e.Function.(*resolve.Function))
1443
1444	default:
1445		start, _ := e.Span()
1446		log.Panicf("%s: unexpected expr %T", start, e)
1447	}
1448}
1449
1450type summand struct {
1451	x       syntax.Expr
1452	plusPos syntax.Position
1453}
1454
1455// plus emits optimized code for ((a+b)+...)+z that avoids naive
1456// quadratic behavior for strings, tuples, and lists,
1457// and folds together adjacent literals of the same type.
1458func (fcomp *fcomp) plus(e *syntax.BinaryExpr) {
1459	// Gather all the right operands of the left tree of plusses.
1460	// A tree (((a+b)+c)+d) becomes args=[a +b +c +d].
1461	args := make([]summand, 0, 2) // common case: 2 operands
1462	for plus := e; ; {
1463		args = append(args, summand{unparen(plus.Y), plus.OpPos})
1464		left := unparen(plus.X)
1465		x, ok := left.(*syntax.BinaryExpr)
1466		if !ok || x.Op != syntax.PLUS {
1467			args = append(args, summand{x: left})
1468			break
1469		}
1470		plus = x
1471	}
1472	// Reverse args to syntactic order.
1473	for i, n := 0, len(args)/2; i < n; i++ {
1474		j := len(args) - 1 - i
1475		args[i], args[j] = args[j], args[i]
1476	}
1477
1478	// Fold sums of adjacent literals of the same type: ""+"", []+[], ()+().
1479	out := args[:0] // compact in situ
1480	for i := 0; i < len(args); {
1481		j := i + 1
1482		if code := addable(args[i].x); code != 0 {
1483			for j < len(args) && addable(args[j].x) == code {
1484				j++
1485			}
1486			if j > i+1 {
1487				args[i].x = add(code, args[i:j])
1488			}
1489		}
1490		out = append(out, args[i])
1491		i = j
1492	}
1493	args = out
1494
1495	// Emit code for an n-ary sum (n > 0).
1496	fcomp.expr(args[0].x)
1497	for _, summand := range args[1:] {
1498		fcomp.expr(summand.x)
1499		fcomp.setPos(summand.plusPos)
1500		fcomp.emit(PLUS)
1501	}
1502
1503	// If len(args) > 2, use of an accumulator instead of a chain of
1504	// PLUS operations may be more efficient.
1505	// However, no gain was measured on a workload analogous to Bazel loading;
1506	// TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload.
1507	//
1508	// We cannot use a single n-ary SUM operation
1509	//    a b c SUM<3>
1510	// because we need to report a distinct error for each
1511	// individual '+' operation, so three additional operations are
1512	// needed:
1513	//
1514	//   ACCSTART => create buffer and append to it
1515	//   ACCUM    => append to buffer
1516	//   ACCEND   => get contents of buffer
1517	//
1518	// For string, list, and tuple values, the interpreter can
1519	// optimize these operations by using a mutable buffer.
1520	// For all other types, ACCSTART and ACCEND would behave like
1521	// the identity function and ACCUM behaves like PLUS.
1522	// ACCUM must correctly support user-defined operations
1523	// such as list+foo.
1524	//
1525	// fcomp.emit(ACCSTART)
1526	// for _, summand := range args[1:] {
1527	// 	fcomp.expr(summand.x)
1528	// 	fcomp.setPos(summand.plusPos)
1529	// 	fcomp.emit(ACCUM)
1530	// }
1531	// fcomp.emit(ACCEND)
1532}
1533
1534// addable reports whether e is a statically addable
1535// expression: a [s]tring, [b]ytes, [l]ist, or [t]uple.
1536func addable(e syntax.Expr) rune {
1537	switch e := e.(type) {
1538	case *syntax.Literal:
1539		// TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding.
1540		switch e.Token {
1541		case syntax.STRING:
1542			return 's'
1543		case syntax.BYTES:
1544			return 'b'
1545		}
1546	case *syntax.ListExpr:
1547		return 'l'
1548	case *syntax.TupleExpr:
1549		return 't'
1550	}
1551	return 0
1552}
1553
1554// add returns an expression denoting the sum of args,
1555// which are all addable values of the type indicated by code.
1556// The resulting syntax is degenerate, lacking position, etc.
1557func add(code rune, args []summand) syntax.Expr {
1558	switch code {
1559	case 's', 'b':
1560		var buf strings.Builder
1561		for _, arg := range args {
1562			buf.WriteString(arg.x.(*syntax.Literal).Value.(string))
1563		}
1564		tok := syntax.STRING
1565		if code == 'b' {
1566			tok = syntax.BYTES
1567		}
1568		return &syntax.Literal{Token: tok, Value: buf.String()}
1569	case 'l':
1570		var elems []syntax.Expr
1571		for _, arg := range args {
1572			elems = append(elems, arg.x.(*syntax.ListExpr).List...)
1573		}
1574		return &syntax.ListExpr{List: elems}
1575	case 't':
1576		var elems []syntax.Expr
1577		for _, arg := range args {
1578			elems = append(elems, arg.x.(*syntax.TupleExpr).List...)
1579		}
1580		return &syntax.TupleExpr{List: elems}
1581	}
1582	panic(code)
1583}
1584
1585func unparen(e syntax.Expr) syntax.Expr {
1586	if p, ok := e.(*syntax.ParenExpr); ok {
1587		return unparen(p.X)
1588	}
1589	return e
1590}
1591
1592func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) {
1593	// TODO(adonovan): simplify by assuming syntax and compiler constants align.
1594	fcomp.setPos(pos)
1595	switch op {
1596	// arithmetic
1597	case syntax.PLUS:
1598		fcomp.emit(PLUS)
1599	case syntax.MINUS:
1600		fcomp.emit(MINUS)
1601	case syntax.STAR:
1602		fcomp.emit(STAR)
1603	case syntax.SLASH:
1604		fcomp.emit(SLASH)
1605	case syntax.SLASHSLASH:
1606		fcomp.emit(SLASHSLASH)
1607	case syntax.PERCENT:
1608		fcomp.emit(PERCENT)
1609	case syntax.AMP:
1610		fcomp.emit(AMP)
1611	case syntax.PIPE:
1612		fcomp.emit(PIPE)
1613	case syntax.CIRCUMFLEX:
1614		fcomp.emit(CIRCUMFLEX)
1615	case syntax.LTLT:
1616		fcomp.emit(LTLT)
1617	case syntax.GTGT:
1618		fcomp.emit(GTGT)
1619	case syntax.IN:
1620		fcomp.emit(IN)
1621	case syntax.NOT_IN:
1622		fcomp.emit(IN)
1623		fcomp.emit(NOT)
1624
1625		// comparisons
1626	case syntax.EQL,
1627		syntax.NEQ,
1628		syntax.GT,
1629		syntax.LT,
1630		syntax.LE,
1631		syntax.GE:
1632		fcomp.emit(Opcode(op-syntax.EQL) + EQL)
1633
1634	default:
1635		log.Panicf("%s: unexpected binary op: %s", pos, op)
1636	}
1637}
1638
1639func (fcomp *fcomp) call(call *syntax.CallExpr) {
1640	// TODO(adonovan): opt: Use optimized path for calling methods
1641	// of built-ins: x.f(...) to avoid materializing a closure.
1642	// if dot, ok := call.Fcomp.(*syntax.DotExpr); ok {
1643	// 	fcomp.expr(dot.X)
1644	// 	fcomp.args(call)
1645	// 	fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name))
1646	// 	return
1647	// }
1648
1649	// usual case
1650	fcomp.expr(call.Fn)
1651	op, arg := fcomp.args(call)
1652	fcomp.setPos(call.Lparen)
1653	fcomp.emit1(op, arg)
1654}
1655
1656// args emits code to push a tuple of positional arguments
1657// and a tuple of named arguments containing alternating keys and values.
1658// Either or both tuples may be empty (TODO(adonovan): optimize).
1659func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) {
1660	var callmode int
1661	// Compute the number of each kind of parameter.
1662	var p, n int // number of  positional, named arguments
1663	var varargs, kwargs syntax.Expr
1664	for _, arg := range call.Args {
1665		if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ {
1666
1667			// named argument (name, value)
1668			fcomp.string(binary.X.(*syntax.Ident).Name)
1669			fcomp.expr(binary.Y)
1670			n++
1671			continue
1672		}
1673		if unary, ok := arg.(*syntax.UnaryExpr); ok {
1674			if unary.Op == syntax.STAR {
1675				callmode |= 1
1676				varargs = unary.X
1677				continue
1678			} else if unary.Op == syntax.STARSTAR {
1679				callmode |= 2
1680				kwargs = unary.X
1681				continue
1682			}
1683		}
1684
1685		// positional argument
1686		fcomp.expr(arg)
1687		p++
1688	}
1689
1690	// Python2 and Python3 both permit named arguments
1691	// to appear both before and after a *args argument:
1692	//   f(1, 2, x=3, *[4], y=5, **dict(z=6))
1693	//
1694	// They also differ in their evaluation order:
1695	//  Python2: 1 2 3 5 4 6 (*args and **kwargs evaluated last)
1696	//  Python3: 1 2 4 3 5 6 (positional args evaluated before named args)
1697	// Starlark-in-Java historically used a third order:
1698	//  Lexical: 1 2 3 4 5 6 (all args evaluated left-to-right)
1699	//
1700	// After discussion in github.com/bazelbuild/starlark#13, the
1701	// spec now requires Starlark to statically reject named
1702	// arguments after *args (e.g. y=5), and to use Python2-style
1703	// evaluation order. This is both easy to implement and
1704	// consistent with lexical order:
1705	//
1706	//   f(1, 2, x=3, *[4], **dict(z=6)) # 1 2 3 4 6
1707
1708	// *args
1709	if varargs != nil {
1710		fcomp.expr(varargs)
1711	}
1712
1713	// **kwargs
1714	if kwargs != nil {
1715		fcomp.expr(kwargs)
1716	}
1717
1718	// TODO(adonovan): avoid this with a more flexible encoding.
1719	if p >= 256 || n >= 256 {
1720		// resolve already checked this; should be unreachable
1721		panic("too many arguments in call")
1722	}
1723
1724	return CALL + Opcode(callmode), uint32(p<<8 | n)
1725}
1726
1727func (fcomp *fcomp) tuple(elems []syntax.Expr) {
1728	for _, elem := range elems {
1729		fcomp.expr(elem)
1730	}
1731	fcomp.emit1(MAKETUPLE, uint32(len(elems)))
1732}
1733
1734func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) {
1735	if clauseIndex == len(comp.Clauses) {
1736		fcomp.emit(DUP) // accumulator
1737		if comp.Curly {
1738			// dict: {k:v for ...}
1739			// Parser ensures that body is of form k:v.
1740			// Python-style set comprehensions {body for vars in x}
1741			// are not supported.
1742			entry := comp.Body.(*syntax.DictEntry)
1743			fcomp.expr(entry.Key)
1744			fcomp.expr(entry.Value)
1745			fcomp.setPos(entry.Colon)
1746			fcomp.emit(SETDICT)
1747		} else {
1748			// list: [body for vars in x]
1749			fcomp.expr(comp.Body)
1750			fcomp.emit(APPEND)
1751		}
1752		return
1753	}
1754
1755	clause := comp.Clauses[clauseIndex]
1756	switch clause := clause.(type) {
1757	case *syntax.IfClause:
1758		t := fcomp.newBlock()
1759		done := fcomp.newBlock()
1760		fcomp.ifelse(clause.Cond, t, done)
1761
1762		fcomp.block = t
1763		fcomp.comprehension(comp, clauseIndex+1)
1764		fcomp.jump(done)
1765
1766		fcomp.block = done
1767		return
1768
1769	case *syntax.ForClause:
1770		// Keep consistent with ForStmt.
1771		head := fcomp.newBlock()
1772		body := fcomp.newBlock()
1773		tail := fcomp.newBlock()
1774
1775		fcomp.expr(clause.X)
1776		fcomp.setPos(clause.For)
1777		fcomp.emit(ITERPUSH)
1778		fcomp.jump(head)
1779
1780		fcomp.block = head
1781		fcomp.condjump(ITERJMP, tail, body)
1782
1783		fcomp.block = body
1784		fcomp.assign(clause.For, clause.Vars)
1785		fcomp.comprehension(comp, clauseIndex+1)
1786		fcomp.jump(head)
1787
1788		fcomp.block = tail
1789		fcomp.emit(ITERPOP)
1790		return
1791	}
1792
1793	start, _ := clause.Span()
1794	log.Panicf("%s: unexpected comprehension clause %T", start, clause)
1795}
1796
1797func (fcomp *fcomp) function(f *resolve.Function) {
1798	// Evaluation of the defaults may fail, so record the position.
1799	fcomp.setPos(f.Pos)
1800
1801	// To reduce allocation, we emit a combined tuple
1802	// for the defaults and the freevars.
1803	// The function knows where to split it at run time.
1804
1805	// Generate tuple of parameter defaults. For:
1806	//  def f(p1, p2=dp2, p3=dp3, *, k1, k2=dk2, k3, **kwargs)
1807	// the tuple is:
1808	//  (dp2, dp3, MANDATORY, dk2, MANDATORY).
1809	ndefaults := 0
1810	seenStar := false
1811	for _, param := range f.Params {
1812		switch param := param.(type) {
1813		case *syntax.BinaryExpr:
1814			fcomp.expr(param.Y)
1815			ndefaults++
1816		case *syntax.UnaryExpr:
1817			seenStar = true // * or *args (also **kwargs)
1818		case *syntax.Ident:
1819			if seenStar {
1820				fcomp.emit(MANDATORY)
1821				ndefaults++
1822			}
1823		}
1824	}
1825
1826	// Capture the cells of the function's
1827	// free variables from the lexical environment.
1828	for _, freevar := range f.FreeVars {
1829		// Don't call fcomp.lookup because we want
1830		// the cell itself, not its content.
1831		switch freevar.Scope {
1832		case resolve.Free:
1833			fcomp.emit1(FREE, uint32(freevar.Index))
1834		case resolve.Cell:
1835			fcomp.emit1(LOCAL, uint32(freevar.Index))
1836		}
1837	}
1838
1839	fcomp.emit1(MAKETUPLE, uint32(ndefaults+len(f.FreeVars)))
1840
1841	funcode := fcomp.pcomp.function(f.Name, f.Pos, f.Body, f.Locals, f.FreeVars)
1842
1843	if debug {
1844		// TODO(adonovan): do compilations sequentially not as a tree,
1845		// to make the log easier to read.
1846		// Simplify by identifying Toplevel and functionIndex 0.
1847		fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos)
1848	}
1849
1850	// def f(a, *, b=1) has only 2 parameters.
1851	numParams := len(f.Params)
1852	if f.NumKwonlyParams > 0 && !f.HasVarargs {
1853		numParams--
1854	}
1855
1856	funcode.NumParams = numParams
1857	funcode.NumKwonlyParams = f.NumKwonlyParams
1858	funcode.HasVarargs = f.HasVarargs
1859	funcode.HasKwargs = f.HasKwargs
1860	fcomp.emit1(MAKEFUNC, fcomp.pcomp.functionIndex(funcode))
1861}
1862
1863// ifelse emits a Boolean control flow decision.
1864// On return, the current block is unset.
1865func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) {
1866	switch cond := cond.(type) {
1867	case *syntax.UnaryExpr:
1868		if cond.Op == syntax.NOT {
1869			// if not x then goto t else goto f
1870			//    =>
1871			// if x then goto f else goto t
1872			fcomp.ifelse(cond.X, f, t)
1873			return
1874		}
1875
1876	case *syntax.BinaryExpr:
1877		switch cond.Op {
1878		case syntax.AND:
1879			// if x and y then goto t else goto f
1880			//    =>
1881			// if x then ifelse(y, t, f) else goto f
1882			fcomp.expr(cond.X)
1883			y := fcomp.newBlock()
1884			fcomp.condjump(CJMP, y, f)
1885
1886			fcomp.block = y
1887			fcomp.ifelse(cond.Y, t, f)
1888			return
1889
1890		case syntax.OR:
1891			// if x or y then goto t else goto f
1892			//    =>
1893			// if x then goto t else ifelse(y, t, f)
1894			fcomp.expr(cond.X)
1895			y := fcomp.newBlock()
1896			fcomp.condjump(CJMP, t, y)
1897
1898			fcomp.block = y
1899			fcomp.ifelse(cond.Y, t, f)
1900			return
1901		case syntax.NOT_IN:
1902			// if x not in y then goto t else goto f
1903			//    =>
1904			// if x in y then goto f else goto t
1905			copy := *cond
1906			copy.Op = syntax.IN
1907			fcomp.expr(&copy)
1908			fcomp.condjump(CJMP, f, t)
1909			return
1910		}
1911	}
1912
1913	// general case
1914	fcomp.expr(cond)
1915	fcomp.condjump(CJMP, t, f)
1916}
1917