1// Copyright 2017 syzkaller project authors. All rights reserved. 2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4// gen generates instruction tables (ifuzz/insns.go) from Intel XED tables. 5// Tables used to generate insns.go are checked in in all-enc-instructions.txt. 6package main 7 8import ( 9 "bufio" 10 "fmt" 11 "os" 12 "reflect" 13 "strconv" 14 "strings" 15 16 "github.com/google/syzkaller/pkg/ifuzz" 17 "github.com/google/syzkaller/pkg/serializer" 18) 19 20// nolint: gocyclo 21func main() { 22 if len(os.Args) != 2 { 23 failf("usage: gen instructions.txt") 24 } 25 f, err := os.Open(os.Args[1]) 26 if err != nil { 27 failf("failed to open input file: %v", err) 28 } 29 defer f.Close() 30 31 skipped := 0 32 saved := "" 33 var insns []*ifuzz.Insn 34 var insn, insn1 *ifuzz.Insn 35 s := bufio.NewScanner(f) 36 for i := 1; s.Scan(); i++ { 37 reportError := func(msg string, args ...interface{}) { 38 fmt.Fprintf(os.Stderr, "line %v: %v\n", i, s.Text()) 39 failf(msg, args...) 40 } 41 line := s.Text() 42 if comment := strings.IndexByte(line, '#'); comment != -1 { 43 line = line[:comment] 44 } 45 line = strings.TrimSpace(line) 46 if line == "" { 47 continue 48 } 49 if line[len(line)-1] == '\\' { 50 saved += line[:len(line)-1] 51 continue 52 } 53 line = saved + line 54 saved = "" 55 if line == "{" { 56 insn = new(ifuzz.Insn) 57 continue 58 } 59 if line == "}" { 60 if insn1 != nil { 61 insns = append(insns, insn1) 62 insn1 = nil 63 insn = nil 64 } 65 continue 66 } 67 colon := strings.IndexByte(line, ':') 68 if colon == -1 { 69 reportError("no colon") 70 } 71 name := strings.TrimSpace(line[:colon]) 72 if name == "" { 73 reportError("empty attribute name") 74 } 75 var vals []string 76 for _, v := range strings.Split(line[colon+1:], " ") { 77 v = strings.TrimSpace(v) 78 if v == "" { 79 continue 80 } 81 vals = append(vals, v) 82 } 83 switch name { 84 case "ICLASS": 85 if len(vals) != 1 { 86 reportError("ICLASS has more than one value") 87 } 88 insn.Name = vals[0] 89 case "CPL": 90 if len(vals) != 1 { 91 reportError("CPL has more than one value") 92 } 93 if vals[0] != "0" && vals[0] != "3" { 94 reportError("unknown CPL value: %v", vals[0]) 95 } 96 insn.Priv = vals[0] == "0" 97 case "EXTENSION": 98 if len(vals) != 1 { 99 reportError("EXTENSION has more than one value") 100 } 101 insn.Extension = vals[0] 102 switch insn.Extension { 103 case "FMA", "AVX2", "AVX", "F16C", "BMI2", "BMI", "XOP", "FMA4", "AVXAES", "BMI1", "AVX2GATHER": 104 insn.Mode = 1<<ifuzz.ModeLong64 | 1<<ifuzz.ModeProt32 105 } 106 insn.Avx2Gather = insn.Extension == "AVX2GATHER" 107 case "PATTERN": 108 if insn1 != nil { 109 insns = append(insns, insn1) 110 } 111 insn1 = new(ifuzz.Insn) 112 *insn1 = *insn 113 if err := parsePattern(insn1, vals); err != nil { 114 if _, ok := err.(errSkip); !ok { 115 reportError(err.Error()) 116 } 117 if err.Error() != "" { 118 fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err) 119 } 120 skipped++ 121 insn1 = nil 122 } 123 case "OPERANDS": 124 if insn1 == nil { 125 break 126 } 127 if err := parseOperands(insn1, vals); err != nil { 128 if _, ok := err.(errSkip); !ok { 129 reportError(err.Error()) 130 } 131 if err.Error() != "" { 132 fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err) 133 } 134 skipped++ 135 insn1 = nil 136 } 137 } 138 } 139 140 var deduped []*ifuzz.Insn 141nextInsn: 142 for _, insn := range insns { 143 if insn.Extension == "AVX512VEX" || insn.Extension == "AVX512EVEX" { 144 skipped++ 145 continue 146 } 147 mod0 := insn.Mod 148 for j := len(deduped) - 1; j >= 0; j-- { 149 insn1 := deduped[j] 150 if insn.Mod == 3 && insn1.Mod == -3 || insn.Mod == -3 && insn1.Mod == 3 || insn1.Mod == -1 { 151 insn.Mod = insn1.Mod 152 } 153 if reflect.DeepEqual(insn, insn1) { 154 if insn.Mod != mod0 { 155 insn1.Mod = -1 156 } 157 continue nextInsn 158 } 159 insn.Mod = mod0 160 } 161 deduped = append(deduped, insn) 162 } 163 fmt.Fprintf(os.Stderr, "deduped %v instructions\n", len(insns)-len(deduped)) 164 insns = deduped 165 166 fmt.Printf("// AUTOGENERATED FILE\n\n") 167 fmt.Printf("package ifuzz\n\n") 168 fmt.Printf("import . \"github.com/google/syzkaller/pkg/ifuzz\"\n\n") 169 fmt.Printf("func init() { Insns = insns }\n\n") 170 fmt.Printf("var insns = ") 171 serializer.Write(os.Stdout, insns) 172 173 fmt.Fprintf(os.Stderr, "handled %v, skipped %v\n", len(insns), skipped) 174} 175 176type errSkip string 177 178func (err errSkip) Error() string { 179 return string(err) 180} 181 182// nolint: gocyclo 183func parsePattern(insn *ifuzz.Insn, vals []string) error { 184 if insn.Opcode != nil { 185 return fmt.Errorf("PATTERN is already parsed for the instruction") 186 } 187 // As spelled these have incorrect format for 16-bit addressing mode and with 67 prefix. 188 if insn.Name == "NOP5" || insn.Name == "NOP6" || insn.Name == "NOP7" || 189 insn.Name == "NOP8" || insn.Name == "NOP9" { 190 return errSkip("") 191 } 192 if insn.Mode == 0 { 193 insn.Mode = 1<<ifuzz.ModeLast - 1 194 } 195 insn.Mod = -100 196 insn.Reg = -100 197 insn.Rm = -100 198 insn.VexP = -1 199 for _, v := range vals { 200 switch { 201 case strings.HasPrefix(v, "0x"): 202 op, err := strconv.ParseUint(v, 0, 8) 203 if err != nil { 204 return fmt.Errorf("failed to parse hex pattern: %v", v) 205 } 206 if !insn.Modrm { 207 insn.Opcode = append(insn.Opcode, byte(op)) 208 } else { 209 insn.Suffix = append(insn.Suffix, byte(op)) 210 } 211 case strings.HasPrefix(v, "0b"): 212 if len(v) != 8 || v[6] != '_' { 213 return fmt.Errorf("failed to parse bin pattern: %v", v) 214 } 215 var op byte 216 if v[2] == '1' { 217 op |= 1 << 7 218 } 219 if v[3] == '1' { 220 op |= 1 << 6 221 } 222 if v[4] == '1' { 223 op |= 1 << 5 224 } 225 if v[5] == '1' { 226 op |= 1 << 4 227 } 228 if v[7] == '1' { 229 op |= 1 << 3 230 } 231 insn.Opcode = append(insn.Opcode, op) 232 case strings.HasPrefix(v, "MOD["): 233 insn.Modrm = true 234 vv, err := parseModrm(v[3:]) 235 if err != nil { 236 return fmt.Errorf("failed to parse %v: %v", v, err) 237 } 238 insn.Mod = vv 239 case strings.HasPrefix(v, "REG["): 240 insn.Modrm = true 241 vv, err := parseModrm(v[3:]) 242 if err != nil { 243 return fmt.Errorf("failed to parse %v: %v", v, err) 244 } 245 insn.Reg = vv 246 case strings.HasPrefix(v, "RM["): 247 insn.Modrm = true 248 vv, err := parseModrm(v[2:]) 249 if err != nil { 250 return fmt.Errorf("failed to parse %v: %v", v, err) 251 } 252 insn.Rm = vv 253 case v == "RM=4": 254 insn.Rm = 4 255 case strings.HasPrefix(v, "SRM["): 256 vv, err := parseModrm(v[3:]) 257 if err != nil { 258 return fmt.Errorf("failed to parse %v: %v", v, err) 259 } 260 insn.Rm = vv 261 insn.Srm = true 262 case v == "SRM=0", v == "SRM!=0": 263 case v == "MOD!=3": 264 if !insn.Modrm || insn.Mod != -1 { 265 return fmt.Errorf("MOD!=3 without MOD") 266 } 267 insn.Mod = -3 268 case v == "MOD=3": 269 // Most other instructions contain "MOD[0b11] MOD=3", 270 // but BNDCL contains "MOD[mm] MOD=3" 271 insn.Mod = 3 272 case v == "MOD=0": 273 insn.Mod = 0 274 case v == "MOD=1": 275 insn.Mod = 1 276 case v == "MOD=2": 277 insn.Mod = 2 278 case v == "lock_prefix": 279 insn.Prefix = append(insn.Prefix, 0xF0) 280 281 // Immediates. 282 case v == "UIMM8()", v == "SIMM8()": 283 addImm(insn, 1) 284 case v == "UIMM16()": 285 addImm(insn, 2) 286 case v == "UIMM32()": 287 addImm(insn, 4) 288 case v == "SIMMz()": 289 addImm(insn, -1) 290 case v == "UIMMv()": 291 addImm(insn, -3) 292 case v == "UIMM8_1()": 293 addImm(insn, 1) 294 case v == "BRDISP8()": 295 addImm(insn, 1) 296 case v == "BRDISP32()": 297 addImm(insn, 4) 298 case v == "BRDISPz()": 299 addImm(insn, -1) 300 case v == "MEMDISPv()": 301 addImm(insn, -2) 302 303 // VOP/VEX 304 case v == "XOPV": 305 insn.Vex = 0x8f 306 insn.Mode &^= 1 << ifuzz.ModeReal16 307 case v == "EVV": 308 insn.Vex = 0xc4 309 case v == "VV1": 310 insn.Vex = 0xc4 311 case v == "VMAP0": 312 insn.VexMap = 0 313 case v == "V0F": 314 insn.VexMap = 1 315 case v == "V0F38": 316 insn.VexMap = 2 317 case v == "V0F3A": 318 insn.VexMap = 3 319 case v == "XMAP8": 320 insn.VexMap = 8 321 case v == "XMAP9": 322 insn.VexMap = 9 323 case v == "XMAPA": 324 insn.VexMap = 10 325 case v == "VNP": 326 insn.VexP = 0 327 case v == "V66": 328 insn.VexP = 1 329 case v == "VF2": 330 insn.VexP = 3 331 case v == "VF3": 332 insn.VexP = 2 333 case v == "VL128", v == "VL=0": 334 insn.VexL = -1 335 case v == "VL256", v == "VL=1": 336 insn.VexL = 1 337 case v == "NOVSR": 338 insn.VexNoR = true 339 case v == "NOEVSR": 340 insn.VexNoR = true 341 // VEXDEST3=0b1 VEXDEST210=0b111 VEXDEST4=0b0 342 case v == "SE_IMM8()": 343 addImm(insn, 1) 344 345 // Modes 346 case v == "mode64": 347 insn.Mode &= 1 << ifuzz.ModeLong64 348 case v == "not64": 349 insn.Mode &^= 1 << ifuzz.ModeLong64 350 case v == "mode32": 351 insn.Mode &= 1 << ifuzz.ModeProt32 352 case v == "mode16": 353 insn.Mode &= 1<<ifuzz.ModeProt16 | 1<<ifuzz.ModeReal16 354 case v == "eamode64", 355 v == "eamode32", 356 v == "eamode16", 357 v == "eanot16": 358 359 case v == "no_refining_prefix": 360 insn.NoRepPrefix = true 361 insn.No66Prefix = true 362 case v == "no66_prefix", v == "eosz32", v == "eosz64": 363 insn.No66Prefix = true 364 case v == "f2_refining_prefix", v == "refining_f2", v == "repne", v == "REP=2": 365 insn.Prefix = append(insn.Prefix, 0xF2) 366 insn.NoRepPrefix = true 367 case v == "f3_refining_prefix", v == "refining_f3", v == "repe", v == "REP=3": 368 insn.Prefix = append(insn.Prefix, 0xF3) 369 insn.NoRepPrefix = true 370 case v == "norep", v == "not_refining", v == "REP=0": 371 insn.NoRepPrefix = true 372 case v == "osz_refining_prefix": 373 insn.Prefix = append(insn.Prefix, 0x66) 374 insn.NoRepPrefix = true 375 case v == "rexw_prefix", v == "W1": 376 insn.Rexw = 1 377 case v == "norexw_prefix", v == "W0": 378 insn.Rexw = -1 379 case v == "MPXMODE=1", 380 v == "MPXMODE=0", 381 v == "TZCNT=1", 382 v == "TZCNT=0", 383 v == "LZCNT=1", 384 v == "LZCNT=0", 385 v == "CR_WIDTH()", 386 v == "DF64()", 387 v == "IMMUNE_REXW()", 388 v == "FORCE64()", 389 v == "EOSZ=1", 390 v == "EOSZ!=1", 391 v == "EOSZ=2", 392 v == "EOSZ!=2", 393 v == "EOSZ=3", 394 v == "EOSZ!=3", 395 v == "BRANCH_HINT()", 396 v == "P4=1", 397 v == "P4=0", 398 v == "rexb_prefix", 399 v == "norexb_prefix", 400 v == "IMMUNE66()", 401 v == "REFINING66()", 402 v == "IGNORE66()", 403 v == "IMMUNE66_LOOP64()", 404 v == "OVERRIDE_SEG0()", 405 v == "OVERRIDE_SEG1()", 406 v == "REMOVE_SEGMENT()", 407 v == "ONE()", 408 v == "nolock_prefix", 409 v == "MODRM()", 410 v == "VMODRM_XMM()", 411 v == "VMODRM_YMM()", 412 v == "BCRC=0", 413 v == "BCRC=1", 414 v == "ESIZE_8_BITS()", 415 v == "ESIZE_16_BITS()", 416 v == "ESIZE_32_BITS()", 417 v == "ESIZE_64_BITS()", 418 v == "NELEM_GPR_WRITER_STORE()", 419 v == "NELEM_GPR_WRITER_STORE_BYTE()", 420 v == "NELEM_GPR_WRITER_STORE_WORD()", 421 v == "NELEM_GPR_WRITER_LDOP_Q()", 422 v == "NELEM_GPR_WRITER_LDOP_D()", 423 v == "NELEM_GPR_READER()", 424 v == "NELEM_GPR_READER_BYTE()", 425 v == "NELEM_GPR_READER_WORD()", 426 v == "NELEM_GSCAT()", 427 v == "NELEM_HALF()", 428 v == "NELEM_FULL()", 429 v == "NELEM_FULLMEM()", 430 v == "NELEM_QUARTERMEM()", 431 v == "NELEM_EIGHTHMEM()", 432 v == "NELEM_HALFMEM()", 433 v == "NELEM_QUARTERMEM()", 434 v == "NELEM_MEM128()", 435 v == "NELEM_SCALAR()", 436 v == "NELEM_TUPLE1()", 437 v == "NELEM_TUPLE2()", 438 v == "NELEM_TUPLE4()", 439 v == "NELEM_TUPLE8()", 440 v == "NELEM_TUPLE1_4X()", 441 v == "NELEM_TUPLE1_BYTE()", 442 v == "NELEM_TUPLE1_WORD()", 443 v == "NELEM_MOVDDUP()", 444 v == "UISA_VMODRM_XMM()", 445 v == "UISA_VMODRM_YMM()", 446 v == "UISA_VMODRM_ZMM()", 447 v == "MASK=0", 448 v == "FIX_ROUND_LEN128()", 449 v == "FIX_ROUND_LEN512()", 450 v == "AVX512_ROUND()", 451 v == "ZEROING=0", 452 v == "SAE()", 453 v == "VL512", // VL=2 454 v == "not_refining_f3", 455 strings.HasPrefix(v, "MODEP5="): 456 default: 457 return errSkip(fmt.Sprintf("unknown pattern %v", v)) 458 } 459 } 460 if insn.Modrm { 461 switch insn.Mod { 462 case -3, -1, 0, 1, 2, 3: 463 default: 464 return fmt.Errorf("bad MOD value: %v", insn.Mod) 465 } 466 if insn.Reg < -1 || insn.Reg > 7 { 467 return fmt.Errorf("bad REG value: %v", insn.Mod) 468 } 469 if insn.Rm < -1 || insn.Rm > 7 { 470 return fmt.Errorf("bad RM value: %v", insn.Mod) 471 } 472 } 473 if insn.Imm != 0 && len(insn.Suffix) != 0 { 474 return fmt.Errorf("both immediate and suffix opcode") 475 } 476 if insn.Mode == 0 { 477 return errSkip("no modes for instruction") 478 } 479 return nil 480} 481 482func parseOperands(insn *ifuzz.Insn, vals []string) error { 483 for _, v := range vals { 484 switch v { 485 case "REG0=SEG():r", "REG1=SEG():r", "REG0=SEG():w": 486 if insn.Reg != -1 { 487 return fmt.Errorf("REG=SEG() operand, but fixed reg") 488 } 489 insn.Reg = -6 490 case "REG0=CR_R():w", "REG1=CR_R():r": 491 if insn.Reg != -1 { 492 return fmt.Errorf("REG=CR_R() operand, but fixed reg") 493 } 494 insn.Reg = -8 495 insn.NoSibDisp = true 496 case "REG0=DR_R():w", "REG1=DR_R():r": 497 insn.NoSibDisp = true 498 case "MEM0:r:mem16", "MEM0:w:mem16", "MEM0:r:mem16int", "MEM0:w:mem16int": 499 insn.Mem16 = true 500 case "MEM0:r:mem32real", "MEM0:r:mem32int", "MEM0:w:mem32real", "MEM0:w:mem32int": 501 insn.Mem32 = true 502 } 503 } 504 return nil 505} 506 507func parseModrm(v string) (int8, error) { 508 if len(v) < 4 || len(v) > 7 || v[0] != '[' || v[len(v)-1] != ']' { 509 return 0, fmt.Errorf("malformed") 510 } 511 if v == "[mm]" || v == "[rrr]" || v == "[nnn]" { 512 return -1, nil 513 } 514 if !strings.HasPrefix(v, "[0b") { 515 return 0, fmt.Errorf("malformed") 516 } 517 var vv int8 518 for i := 3; i < len(v)-1; i++ { 519 if v[i] != '0' && v[i] != '1' { 520 return 0, fmt.Errorf("malformed") 521 } 522 vv *= 2 523 if v[i] == '1' { 524 vv++ 525 } 526 } 527 return vv, nil 528} 529 530func addImm(insn *ifuzz.Insn, imm int8) { 531 if insn.Imm == 0 { 532 insn.Imm = imm 533 return 534 } 535 if insn.Imm2 == 0 { 536 insn.Imm2 = imm 537 return 538 } 539 panic("too many immediates") 540} 541 542func failf(msg string, args ...interface{}) { 543 fmt.Fprintf(os.Stderr, msg+"\n", args...) 544 os.Exit(1) 545} 546