1package compile
2
3// This file defines functions to read and write a compile.Program to a file.
4//
5// It is the client's responsibility to avoid version skew between the
6// compiler used to produce a file and the interpreter that consumes it.
7// The version number is provided as a constant.
8// Incompatible protocol changes should also increment the version number.
9//
10// Encoding
11//
12// Program:
13//	"sky!"		[4]byte		# magic number
14//	str		uint32le	# offset of <strings> section
15//	version		varint		# must match Version
16//	filename	string
17//	numloads	varint
18//	loads		[]Ident
19//	numnames	varint
20//	names		[]string
21//	numconsts	varint
22//	consts		[]Constant
23//	numglobals	varint
24//	globals		[]Ident
25//	toplevel	Funcode
26//	numfuncs	varint
27//	funcs		[]Funcode
28//	<strings>	[]byte		# concatenation of all referenced strings
29//	EOF
30//
31// Funcode:
32//	id		Ident
33//	code		[]byte
34//	pclinetablen	varint
35//	pclinetab	[]varint
36//	numlocals	varint
37//	locals		[]Ident
38//	numcells	varint
39//	cells		[]int
40//	numfreevars	varint
41//	freevar		[]Ident
42//	maxstack	varint
43//	numparams	varint
44//	numkwonlyparams	varint
45//	hasvarargs	varint (0 or 1)
46//	haskwargs	varint (0 or 1)
47//
48// Ident:
49//	filename	string
50//	line, col	varint
51//
52// Constant:                            # type      data
53//      type            varint          # 0=string  string
54//      data            ...             # 1=bytes   string
55//                                      # 2=int     varint
56//                                      # 3=float   varint (bits as uint64)
57//                                      # 4=bigint  string (decimal ASCII text)
58//
59// The encoding starts with a four-byte magic number.
60// The next four bytes are a little-endian uint32
61// that provides the offset of the string section
62// at the end of the file, which contains the ordered
63// concatenation of all strings referenced by the
64// program. This design permits the decoder to read
65// the first and second parts of the file into different
66// memory allocations: the first (the encoded program)
67// is transient, but the second (the strings) persists
68// for the life of the Program.
69//
70// Within the encoded program, all strings are referred
71// to by their length. As the encoder and decoder process
72// the entire file sequentially, they are in lock step,
73// so the start offset of each string is implicit.
74//
75// Program.Code is represented as a []byte slice to permit
76// modification when breakpoints are set. All other strings
77// are represented as strings. They all (unsafely) share the
78// same backing byte slice.
79//
80// Aside from the str field, all integers are encoded as varints.
81
82import (
83	"encoding/binary"
84	"fmt"
85	"math"
86	"math/big"
87	debugpkg "runtime/debug"
88	"unsafe"
89
90	"go.starlark.net/syntax"
91)
92
93const magic = "!sky"
94
95// Encode encodes a compiled Starlark program.
96func (prog *Program) Encode() []byte {
97	var e encoder
98	e.p = append(e.p, magic...)
99	e.p = append(e.p, "????"...) // string data offset; filled in later
100	e.int(Version)
101	e.string(prog.Toplevel.Pos.Filename())
102	e.bindings(prog.Loads)
103	e.int(len(prog.Names))
104	for _, name := range prog.Names {
105		e.string(name)
106	}
107	e.int(len(prog.Constants))
108	for _, c := range prog.Constants {
109		switch c := c.(type) {
110		case string:
111			e.int(0)
112			e.string(c)
113		case Bytes:
114			e.int(1)
115			e.string(string(c))
116		case int64:
117			e.int(2)
118			e.int64(c)
119		case float64:
120			e.int(3)
121			e.uint64(math.Float64bits(c))
122		case *big.Int:
123			e.int(4)
124			e.string(c.Text(10))
125		}
126	}
127	e.bindings(prog.Globals)
128	e.function(prog.Toplevel)
129	e.int(len(prog.Functions))
130	for _, fn := range prog.Functions {
131		e.function(fn)
132	}
133
134	// Patch in the offset of the string data section.
135	binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p)))
136
137	return append(e.p, e.s...)
138}
139
140type encoder struct {
141	p   []byte // encoded program
142	s   []byte // strings
143	tmp [binary.MaxVarintLen64]byte
144}
145
146func (e *encoder) int(x int) {
147	e.int64(int64(x))
148}
149
150func (e *encoder) int64(x int64) {
151	n := binary.PutVarint(e.tmp[:], x)
152	e.p = append(e.p, e.tmp[:n]...)
153}
154
155func (e *encoder) uint64(x uint64) {
156	n := binary.PutUvarint(e.tmp[:], x)
157	e.p = append(e.p, e.tmp[:n]...)
158}
159
160func (e *encoder) string(s string) {
161	e.int(len(s))
162	e.s = append(e.s, s...)
163}
164
165func (e *encoder) bytes(b []byte) {
166	e.int(len(b))
167	e.s = append(e.s, b...)
168}
169
170func (e *encoder) binding(bind Binding) {
171	e.string(bind.Name)
172	e.int(int(bind.Pos.Line))
173	e.int(int(bind.Pos.Col))
174}
175
176func (e *encoder) bindings(binds []Binding) {
177	e.int(len(binds))
178	for _, bind := range binds {
179		e.binding(bind)
180	}
181}
182
183func (e *encoder) function(fn *Funcode) {
184	e.binding(Binding{fn.Name, fn.Pos})
185	e.string(fn.Doc)
186	e.bytes(fn.Code)
187	e.int(len(fn.pclinetab))
188	for _, x := range fn.pclinetab {
189		e.int64(int64(x))
190	}
191	e.bindings(fn.Locals)
192	e.int(len(fn.Cells))
193	for _, index := range fn.Cells {
194		e.int(index)
195	}
196	e.bindings(fn.Freevars)
197	e.int(fn.MaxStack)
198	e.int(fn.NumParams)
199	e.int(fn.NumKwonlyParams)
200	e.int(b2i(fn.HasVarargs))
201	e.int(b2i(fn.HasKwargs))
202}
203
204func b2i(b bool) int {
205	if b {
206		return 1
207	} else {
208		return 0
209	}
210}
211
212// DecodeProgram decodes a compiled Starlark program from data.
213func DecodeProgram(data []byte) (_ *Program, err error) {
214	if len(data) < len(magic) {
215		return nil, fmt.Errorf("not a compiled module: no magic number")
216	}
217	if got := string(data[:4]); got != magic {
218		return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q",
219			got, magic)
220	}
221	defer func() {
222		if x := recover(); x != nil {
223			debugpkg.PrintStack()
224			err = fmt.Errorf("internal error while decoding program: %v", x)
225		}
226	}()
227
228	offset := binary.LittleEndian.Uint32(data[4:8])
229	d := decoder{
230		p: data[8:offset],
231		s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist
232	}
233
234	if v := d.int(); v != Version {
235		return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version)
236	}
237
238	filename := d.string()
239	d.filename = &filename
240
241	loads := d.bindings()
242
243	names := make([]string, d.int())
244	for i := range names {
245		names[i] = d.string()
246	}
247
248	// constants
249	constants := make([]interface{}, d.int())
250	for i := range constants {
251		var c interface{}
252		switch d.int() {
253		case 0:
254			c = d.string()
255		case 1:
256			c = Bytes(d.string())
257		case 2:
258			c = d.int64()
259		case 3:
260			c = math.Float64frombits(d.uint64())
261		case 4:
262			c, _ = new(big.Int).SetString(d.string(), 10)
263		}
264		constants[i] = c
265	}
266
267	globals := d.bindings()
268	toplevel := d.function()
269	funcs := make([]*Funcode, d.int())
270	for i := range funcs {
271		funcs[i] = d.function()
272	}
273
274	prog := &Program{
275		Loads:     loads,
276		Names:     names,
277		Constants: constants,
278		Globals:   globals,
279		Functions: funcs,
280		Toplevel:  toplevel,
281	}
282	toplevel.Prog = prog
283	for _, f := range funcs {
284		f.Prog = prog
285	}
286
287	if len(d.p)+len(d.s) > 0 {
288		return nil, fmt.Errorf("internal error: unconsumed data during decoding")
289	}
290
291	return prog, nil
292}
293
294type decoder struct {
295	p        []byte  // encoded program
296	s        []byte  // strings
297	filename *string // (indirect to avoid keeping decoder live)
298}
299
300func (d *decoder) int() int {
301	return int(d.int64())
302}
303
304func (d *decoder) int64() int64 {
305	x, len := binary.Varint(d.p[:])
306	d.p = d.p[len:]
307	return x
308}
309
310func (d *decoder) uint64() uint64 {
311	x, len := binary.Uvarint(d.p[:])
312	d.p = d.p[len:]
313	return x
314}
315
316func (d *decoder) string() (s string) {
317	if slice := d.bytes(); len(slice) > 0 {
318		// Avoid a memory allocation for each string
319		// by unsafely aliasing slice.
320		type string struct {
321			data *byte
322			len  int
323		}
324		ptr := (*string)(unsafe.Pointer(&s))
325		ptr.data = &slice[0]
326		ptr.len = len(slice)
327	}
328	return s
329}
330
331func (d *decoder) bytes() []byte {
332	len := d.int()
333	r := d.s[:len:len]
334	d.s = d.s[len:]
335	return r
336}
337
338func (d *decoder) binding() Binding {
339	name := d.string()
340	line := int32(d.int())
341	col := int32(d.int())
342	return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)}
343}
344
345func (d *decoder) bindings() []Binding {
346	bindings := make([]Binding, d.int())
347	for i := range bindings {
348		bindings[i] = d.binding()
349	}
350	return bindings
351}
352
353func (d *decoder) ints() []int {
354	ints := make([]int, d.int())
355	for i := range ints {
356		ints[i] = d.int()
357	}
358	return ints
359}
360
361func (d *decoder) bool() bool { return d.int() != 0 }
362
363func (d *decoder) function() *Funcode {
364	id := d.binding()
365	doc := d.string()
366	code := d.bytes()
367	pclinetab := make([]uint16, d.int())
368	for i := range pclinetab {
369		pclinetab[i] = uint16(d.int())
370	}
371	locals := d.bindings()
372	cells := d.ints()
373	freevars := d.bindings()
374	maxStack := d.int()
375	numParams := d.int()
376	numKwonlyParams := d.int()
377	hasVarargs := d.int() != 0
378	hasKwargs := d.int() != 0
379	return &Funcode{
380		// Prog is filled in later.
381		Pos:             id.Pos,
382		Name:            id.Name,
383		Doc:             doc,
384		Code:            code,
385		pclinetab:       pclinetab,
386		Locals:          locals,
387		Cells:           cells,
388		Freevars:        freevars,
389		MaxStack:        maxStack,
390		NumParams:       numParams,
391		NumKwonlyParams: numKwonlyParams,
392		HasVarargs:      hasVarargs,
393		HasKwargs:       hasKwargs,
394	}
395}
396