1// Copyright 2017 syzkaller project authors. All rights reserved.
2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4// gen generates instruction tables (ifuzz/insns.go) from Intel XED tables.
5// Tables used to generate insns.go are checked in in all-enc-instructions.txt.
6package main
7
8import (
9	"bufio"
10	"fmt"
11	"os"
12	"reflect"
13	"strconv"
14	"strings"
15
16	"github.com/google/syzkaller/pkg/ifuzz"
17	"github.com/google/syzkaller/pkg/serializer"
18)
19
20// nolint: gocyclo
21func main() {
22	if len(os.Args) != 2 {
23		failf("usage: gen instructions.txt")
24	}
25	f, err := os.Open(os.Args[1])
26	if err != nil {
27		failf("failed to open input file: %v", err)
28	}
29	defer f.Close()
30
31	skipped := 0
32	saved := ""
33	var insns []*ifuzz.Insn
34	var insn, insn1 *ifuzz.Insn
35	s := bufio.NewScanner(f)
36	for i := 1; s.Scan(); i++ {
37		reportError := func(msg string, args ...interface{}) {
38			fmt.Fprintf(os.Stderr, "line %v: %v\n", i, s.Text())
39			failf(msg, args...)
40		}
41		line := s.Text()
42		if comment := strings.IndexByte(line, '#'); comment != -1 {
43			line = line[:comment]
44		}
45		line = strings.TrimSpace(line)
46		if line == "" {
47			continue
48		}
49		if line[len(line)-1] == '\\' {
50			saved += line[:len(line)-1]
51			continue
52		}
53		line = saved + line
54		saved = ""
55		if line == "{" {
56			insn = new(ifuzz.Insn)
57			continue
58		}
59		if line == "}" {
60			if insn1 != nil {
61				insns = append(insns, insn1)
62				insn1 = nil
63				insn = nil
64			}
65			continue
66		}
67		colon := strings.IndexByte(line, ':')
68		if colon == -1 {
69			reportError("no colon")
70		}
71		name := strings.TrimSpace(line[:colon])
72		if name == "" {
73			reportError("empty attribute name")
74		}
75		var vals []string
76		for _, v := range strings.Split(line[colon+1:], " ") {
77			v = strings.TrimSpace(v)
78			if v == "" {
79				continue
80			}
81			vals = append(vals, v)
82		}
83		switch name {
84		case "ICLASS":
85			if len(vals) != 1 {
86				reportError("ICLASS has more than one value")
87			}
88			insn.Name = vals[0]
89		case "CPL":
90			if len(vals) != 1 {
91				reportError("CPL has more than one value")
92			}
93			if vals[0] != "0" && vals[0] != "3" {
94				reportError("unknown CPL value: %v", vals[0])
95			}
96			insn.Priv = vals[0] == "0"
97		case "EXTENSION":
98			if len(vals) != 1 {
99				reportError("EXTENSION has more than one value")
100			}
101			insn.Extension = vals[0]
102			switch insn.Extension {
103			case "FMA", "AVX2", "AVX", "F16C", "BMI2", "BMI", "XOP", "FMA4", "AVXAES", "BMI1", "AVX2GATHER":
104				insn.Mode = 1<<ifuzz.ModeLong64 | 1<<ifuzz.ModeProt32
105			}
106			insn.Avx2Gather = insn.Extension == "AVX2GATHER"
107		case "PATTERN":
108			if insn1 != nil {
109				insns = append(insns, insn1)
110			}
111			insn1 = new(ifuzz.Insn)
112			*insn1 = *insn
113			if err := parsePattern(insn1, vals); err != nil {
114				if _, ok := err.(errSkip); !ok {
115					reportError(err.Error())
116				}
117				if err.Error() != "" {
118					fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err)
119				}
120				skipped++
121				insn1 = nil
122			}
123		case "OPERANDS":
124			if insn1 == nil {
125				break
126			}
127			if err := parseOperands(insn1, vals); err != nil {
128				if _, ok := err.(errSkip); !ok {
129					reportError(err.Error())
130				}
131				if err.Error() != "" {
132					fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err)
133				}
134				skipped++
135				insn1 = nil
136			}
137		}
138	}
139
140	var deduped []*ifuzz.Insn
141nextInsn:
142	for _, insn := range insns {
143		if insn.Extension == "AVX512VEX" || insn.Extension == "AVX512EVEX" {
144			skipped++
145			continue
146		}
147		mod0 := insn.Mod
148		for j := len(deduped) - 1; j >= 0; j-- {
149			insn1 := deduped[j]
150			if insn.Mod == 3 && insn1.Mod == -3 || insn.Mod == -3 && insn1.Mod == 3 || insn1.Mod == -1 {
151				insn.Mod = insn1.Mod
152			}
153			if reflect.DeepEqual(insn, insn1) {
154				if insn.Mod != mod0 {
155					insn1.Mod = -1
156				}
157				continue nextInsn
158			}
159			insn.Mod = mod0
160		}
161		deduped = append(deduped, insn)
162	}
163	fmt.Fprintf(os.Stderr, "deduped %v instructions\n", len(insns)-len(deduped))
164	insns = deduped
165
166	fmt.Printf("// AUTOGENERATED FILE\n\n")
167	fmt.Printf("package ifuzz\n\n")
168	fmt.Printf("import . \"github.com/google/syzkaller/pkg/ifuzz\"\n\n")
169	fmt.Printf("func init() { Insns = insns }\n\n")
170	fmt.Printf("var insns = ")
171	serializer.Write(os.Stdout, insns)
172
173	fmt.Fprintf(os.Stderr, "handled %v, skipped %v\n", len(insns), skipped)
174}
175
176type errSkip string
177
178func (err errSkip) Error() string {
179	return string(err)
180}
181
182// nolint: gocyclo
183func parsePattern(insn *ifuzz.Insn, vals []string) error {
184	if insn.Opcode != nil {
185		return fmt.Errorf("PATTERN is already parsed for the instruction")
186	}
187	// As spelled these have incorrect format for 16-bit addressing mode and with 67 prefix.
188	if insn.Name == "NOP5" || insn.Name == "NOP6" || insn.Name == "NOP7" ||
189		insn.Name == "NOP8" || insn.Name == "NOP9" {
190		return errSkip("")
191	}
192	if insn.Mode == 0 {
193		insn.Mode = 1<<ifuzz.ModeLast - 1
194	}
195	insn.Mod = -100
196	insn.Reg = -100
197	insn.Rm = -100
198	insn.VexP = -1
199	for _, v := range vals {
200		switch {
201		case strings.HasPrefix(v, "0x"):
202			op, err := strconv.ParseUint(v, 0, 8)
203			if err != nil {
204				return fmt.Errorf("failed to parse hex pattern: %v", v)
205			}
206			if !insn.Modrm {
207				insn.Opcode = append(insn.Opcode, byte(op))
208			} else {
209				insn.Suffix = append(insn.Suffix, byte(op))
210			}
211		case strings.HasPrefix(v, "0b"):
212			if len(v) != 8 || v[6] != '_' {
213				return fmt.Errorf("failed to parse bin pattern: %v", v)
214			}
215			var op byte
216			if v[2] == '1' {
217				op |= 1 << 7
218			}
219			if v[3] == '1' {
220				op |= 1 << 6
221			}
222			if v[4] == '1' {
223				op |= 1 << 5
224			}
225			if v[5] == '1' {
226				op |= 1 << 4
227			}
228			if v[7] == '1' {
229				op |= 1 << 3
230			}
231			insn.Opcode = append(insn.Opcode, op)
232		case strings.HasPrefix(v, "MOD["):
233			insn.Modrm = true
234			vv, err := parseModrm(v[3:])
235			if err != nil {
236				return fmt.Errorf("failed to parse %v: %v", v, err)
237			}
238			insn.Mod = vv
239		case strings.HasPrefix(v, "REG["):
240			insn.Modrm = true
241			vv, err := parseModrm(v[3:])
242			if err != nil {
243				return fmt.Errorf("failed to parse %v: %v", v, err)
244			}
245			insn.Reg = vv
246		case strings.HasPrefix(v, "RM["):
247			insn.Modrm = true
248			vv, err := parseModrm(v[2:])
249			if err != nil {
250				return fmt.Errorf("failed to parse %v: %v", v, err)
251			}
252			insn.Rm = vv
253		case v == "RM=4":
254			insn.Rm = 4
255		case strings.HasPrefix(v, "SRM["):
256			vv, err := parseModrm(v[3:])
257			if err != nil {
258				return fmt.Errorf("failed to parse %v: %v", v, err)
259			}
260			insn.Rm = vv
261			insn.Srm = true
262		case v == "SRM=0", v == "SRM!=0":
263		case v == "MOD!=3":
264			if !insn.Modrm || insn.Mod != -1 {
265				return fmt.Errorf("MOD!=3 without MOD")
266			}
267			insn.Mod = -3
268		case v == "MOD=3":
269			// Most other instructions contain "MOD[0b11] MOD=3",
270			// but BNDCL contains "MOD[mm] MOD=3"
271			insn.Mod = 3
272		case v == "MOD=0":
273			insn.Mod = 0
274		case v == "MOD=1":
275			insn.Mod = 1
276		case v == "MOD=2":
277			insn.Mod = 2
278		case v == "lock_prefix":
279			insn.Prefix = append(insn.Prefix, 0xF0)
280
281		// Immediates.
282		case v == "UIMM8()", v == "SIMM8()":
283			addImm(insn, 1)
284		case v == "UIMM16()":
285			addImm(insn, 2)
286		case v == "UIMM32()":
287			addImm(insn, 4)
288		case v == "SIMMz()":
289			addImm(insn, -1)
290		case v == "UIMMv()":
291			addImm(insn, -3)
292		case v == "UIMM8_1()":
293			addImm(insn, 1)
294		case v == "BRDISP8()":
295			addImm(insn, 1)
296		case v == "BRDISP32()":
297			addImm(insn, 4)
298		case v == "BRDISPz()":
299			addImm(insn, -1)
300		case v == "MEMDISPv()":
301			addImm(insn, -2)
302
303		// VOP/VEX
304		case v == "XOPV":
305			insn.Vex = 0x8f
306			insn.Mode &^= 1 << ifuzz.ModeReal16
307		case v == "EVV":
308			insn.Vex = 0xc4
309		case v == "VV1":
310			insn.Vex = 0xc4
311		case v == "VMAP0":
312			insn.VexMap = 0
313		case v == "V0F":
314			insn.VexMap = 1
315		case v == "V0F38":
316			insn.VexMap = 2
317		case v == "V0F3A":
318			insn.VexMap = 3
319		case v == "XMAP8":
320			insn.VexMap = 8
321		case v == "XMAP9":
322			insn.VexMap = 9
323		case v == "XMAPA":
324			insn.VexMap = 10
325		case v == "VNP":
326			insn.VexP = 0
327		case v == "V66":
328			insn.VexP = 1
329		case v == "VF2":
330			insn.VexP = 3
331		case v == "VF3":
332			insn.VexP = 2
333		case v == "VL128", v == "VL=0":
334			insn.VexL = -1
335		case v == "VL256", v == "VL=1":
336			insn.VexL = 1
337		case v == "NOVSR":
338			insn.VexNoR = true
339		case v == "NOEVSR":
340			insn.VexNoR = true
341			// VEXDEST3=0b1 VEXDEST210=0b111 VEXDEST4=0b0
342		case v == "SE_IMM8()":
343			addImm(insn, 1)
344
345		// Modes
346		case v == "mode64":
347			insn.Mode &= 1 << ifuzz.ModeLong64
348		case v == "not64":
349			insn.Mode &^= 1 << ifuzz.ModeLong64
350		case v == "mode32":
351			insn.Mode &= 1 << ifuzz.ModeProt32
352		case v == "mode16":
353			insn.Mode &= 1<<ifuzz.ModeProt16 | 1<<ifuzz.ModeReal16
354		case v == "eamode64",
355			v == "eamode32",
356			v == "eamode16",
357			v == "eanot16":
358
359		case v == "no_refining_prefix":
360			insn.NoRepPrefix = true
361			insn.No66Prefix = true
362		case v == "no66_prefix", v == "eosz32", v == "eosz64":
363			insn.No66Prefix = true
364		case v == "f2_refining_prefix", v == "refining_f2", v == "repne", v == "REP=2":
365			insn.Prefix = append(insn.Prefix, 0xF2)
366			insn.NoRepPrefix = true
367		case v == "f3_refining_prefix", v == "refining_f3", v == "repe", v == "REP=3":
368			insn.Prefix = append(insn.Prefix, 0xF3)
369			insn.NoRepPrefix = true
370		case v == "norep", v == "not_refining", v == "REP=0":
371			insn.NoRepPrefix = true
372		case v == "osz_refining_prefix":
373			insn.Prefix = append(insn.Prefix, 0x66)
374			insn.NoRepPrefix = true
375		case v == "rexw_prefix", v == "W1":
376			insn.Rexw = 1
377		case v == "norexw_prefix", v == "W0":
378			insn.Rexw = -1
379		case v == "MPXMODE=1",
380			v == "MPXMODE=0",
381			v == "TZCNT=1",
382			v == "TZCNT=0",
383			v == "LZCNT=1",
384			v == "LZCNT=0",
385			v == "CR_WIDTH()",
386			v == "DF64()",
387			v == "IMMUNE_REXW()",
388			v == "FORCE64()",
389			v == "EOSZ=1",
390			v == "EOSZ!=1",
391			v == "EOSZ=2",
392			v == "EOSZ!=2",
393			v == "EOSZ=3",
394			v == "EOSZ!=3",
395			v == "BRANCH_HINT()",
396			v == "P4=1",
397			v == "P4=0",
398			v == "rexb_prefix",
399			v == "norexb_prefix",
400			v == "IMMUNE66()",
401			v == "REFINING66()",
402			v == "IGNORE66()",
403			v == "IMMUNE66_LOOP64()",
404			v == "OVERRIDE_SEG0()",
405			v == "OVERRIDE_SEG1()",
406			v == "REMOVE_SEGMENT()",
407			v == "ONE()",
408			v == "nolock_prefix",
409			v == "MODRM()",
410			v == "VMODRM_XMM()",
411			v == "VMODRM_YMM()",
412			v == "BCRC=0",
413			v == "BCRC=1",
414			v == "ESIZE_8_BITS()",
415			v == "ESIZE_16_BITS()",
416			v == "ESIZE_32_BITS()",
417			v == "ESIZE_64_BITS()",
418			v == "NELEM_GPR_WRITER_STORE()",
419			v == "NELEM_GPR_WRITER_STORE_BYTE()",
420			v == "NELEM_GPR_WRITER_STORE_WORD()",
421			v == "NELEM_GPR_WRITER_LDOP_Q()",
422			v == "NELEM_GPR_WRITER_LDOP_D()",
423			v == "NELEM_GPR_READER()",
424			v == "NELEM_GPR_READER_BYTE()",
425			v == "NELEM_GPR_READER_WORD()",
426			v == "NELEM_GSCAT()",
427			v == "NELEM_HALF()",
428			v == "NELEM_FULL()",
429			v == "NELEM_FULLMEM()",
430			v == "NELEM_QUARTERMEM()",
431			v == "NELEM_EIGHTHMEM()",
432			v == "NELEM_HALFMEM()",
433			v == "NELEM_QUARTERMEM()",
434			v == "NELEM_MEM128()",
435			v == "NELEM_SCALAR()",
436			v == "NELEM_TUPLE1()",
437			v == "NELEM_TUPLE2()",
438			v == "NELEM_TUPLE4()",
439			v == "NELEM_TUPLE8()",
440			v == "NELEM_TUPLE1_4X()",
441			v == "NELEM_TUPLE1_BYTE()",
442			v == "NELEM_TUPLE1_WORD()",
443			v == "NELEM_MOVDDUP()",
444			v == "UISA_VMODRM_XMM()",
445			v == "UISA_VMODRM_YMM()",
446			v == "UISA_VMODRM_ZMM()",
447			v == "MASK=0",
448			v == "FIX_ROUND_LEN128()",
449			v == "FIX_ROUND_LEN512()",
450			v == "AVX512_ROUND()",
451			v == "ZEROING=0",
452			v == "SAE()",
453			v == "VL512", // VL=2
454			v == "not_refining_f3",
455			strings.HasPrefix(v, "MODEP5="):
456		default:
457			return errSkip(fmt.Sprintf("unknown pattern %v", v))
458		}
459	}
460	if insn.Modrm {
461		switch insn.Mod {
462		case -3, -1, 0, 1, 2, 3:
463		default:
464			return fmt.Errorf("bad MOD value: %v", insn.Mod)
465		}
466		if insn.Reg < -1 || insn.Reg > 7 {
467			return fmt.Errorf("bad REG value: %v", insn.Mod)
468		}
469		if insn.Rm < -1 || insn.Rm > 7 {
470			return fmt.Errorf("bad RM value: %v", insn.Mod)
471		}
472	}
473	if insn.Imm != 0 && len(insn.Suffix) != 0 {
474		return fmt.Errorf("both immediate and suffix opcode")
475	}
476	if insn.Mode == 0 {
477		return errSkip("no modes for instruction")
478	}
479	return nil
480}
481
482func parseOperands(insn *ifuzz.Insn, vals []string) error {
483	for _, v := range vals {
484		switch v {
485		case "REG0=SEG():r", "REG1=SEG():r", "REG0=SEG():w":
486			if insn.Reg != -1 {
487				return fmt.Errorf("REG=SEG() operand, but fixed reg")
488			}
489			insn.Reg = -6
490		case "REG0=CR_R():w", "REG1=CR_R():r":
491			if insn.Reg != -1 {
492				return fmt.Errorf("REG=CR_R() operand, but fixed reg")
493			}
494			insn.Reg = -8
495			insn.NoSibDisp = true
496		case "REG0=DR_R():w", "REG1=DR_R():r":
497			insn.NoSibDisp = true
498		case "MEM0:r:mem16", "MEM0:w:mem16", "MEM0:r:mem16int", "MEM0:w:mem16int":
499			insn.Mem16 = true
500		case "MEM0:r:mem32real", "MEM0:r:mem32int", "MEM0:w:mem32real", "MEM0:w:mem32int":
501			insn.Mem32 = true
502		}
503	}
504	return nil
505}
506
507func parseModrm(v string) (int8, error) {
508	if len(v) < 4 || len(v) > 7 || v[0] != '[' || v[len(v)-1] != ']' {
509		return 0, fmt.Errorf("malformed")
510	}
511	if v == "[mm]" || v == "[rrr]" || v == "[nnn]" {
512		return -1, nil
513	}
514	if !strings.HasPrefix(v, "[0b") {
515		return 0, fmt.Errorf("malformed")
516	}
517	var vv int8
518	for i := 3; i < len(v)-1; i++ {
519		if v[i] != '0' && v[i] != '1' {
520			return 0, fmt.Errorf("malformed")
521		}
522		vv *= 2
523		if v[i] == '1' {
524			vv++
525		}
526	}
527	return vv, nil
528}
529
530func addImm(insn *ifuzz.Insn, imm int8) {
531	if insn.Imm == 0 {
532		insn.Imm = imm
533		return
534	}
535	if insn.Imm2 == 0 {
536		insn.Imm2 = imm
537		return
538	}
539	panic("too many immediates")
540}
541
542func failf(msg string, args ...interface{}) {
543	fmt.Fprintf(os.Stderr, msg+"\n", args...)
544	os.Exit(1)
545}
546