1#!/usr/bin/python 2# Test tool to compare Capstone output with llvm-mc. By Nguyen Anh Quynh, 2014 3import sys 4import os 5from capstone import * 6 7def test_file(fname): 8 print("Test %s" %fname); 9 f = open(fname) 10 lines = f.readlines() 11 f.close() 12 13 if not lines[0].startswith('# '): 14 print("ERROR: decoding information is missing") 15 return 16 17 # skip '# ' at the front, then split line to get out hexcode 18 # Note: option can be '', or 'None' 19 #print lines[0] 20 #print lines[0][2:].split(', ') 21 (arch, mode, option) = lines[0][2:].split(', ') 22 mode = mode.replace(' ', '') 23 option = option.strip() 24 25 archs = { 26 "CS_ARCH_ARM": CS_ARCH_ARM, 27 "CS_ARCH_ARM64": CS_ARCH_ARM64, 28 "CS_ARCH_MIPS": CS_ARCH_MIPS, 29 "CS_ARCH_PPC": CS_ARCH_PPC, 30 "CS_ARCH_SPARC": CS_ARCH_SPARC, 31 "CS_ARCH_SYSZ": CS_ARCH_SYSZ, 32 "CS_ARCH_X86": CS_ARCH_X86, 33 "CS_ARCH_XCORE": CS_ARCH_XCORE, 34 } 35 36 modes = { 37 "CS_MODE_16": CS_MODE_16, 38 "CS_MODE_32": CS_MODE_32, 39 "CS_MODE_64": CS_MODE_64, 40 "CS_MODE_MIPS32": CS_MODE_MIPS32, 41 "CS_MODE_MIPS64": CS_MODE_MIPS64, 42 "0": CS_MODE_ARM, 43 "CS_MODE_ARM": CS_MODE_ARM, 44 "CS_MODE_THUMB": CS_MODE_THUMB, 45 "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8, 46 "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8, 47 "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS, 48 "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN, 49 "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN, 50 "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN, 51 "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN, 52 "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO, 53 "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN, 54 "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN, 55 "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9, 56 "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN, 57 "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN, 58 "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN, 59 "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN, 60 } 61 62 mc_modes = { 63 ("CS_ARCH_X86", "CS_MODE_32"): 0, 64 ("CS_ARCH_X86", "CS_MODE_64"): 1, 65 ("CS_ARCH_ARM", "CS_MODE_ARM"): 2, 66 ("CS_ARCH_ARM", "CS_MODE_THUMB"): 3, 67 ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): 4, 68 ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): 5, 69 ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): 6, 70 ("CS_ARCH_ARM64", "0"): 7, 71 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): 8, 72 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): 9, 73 ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): 10, 74 ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): 11, 75 ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): 12, 76 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 13, 77 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): 13, 78 ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): 14, 79 ("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN"): 15, 80 ("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN+CS_MODE_V9"): 16, 81 ("CS_ARCH_SYSZ", "0"): 17, 82 ("CS_ARCH_XCORE", "0"): 18, 83 ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_BIG_ENDIAN"): 19, 84 ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 20, 85 ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6"): 21, 86 ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_MICRO"): 22, 87 ("CS_ARCH_M68K", "0"): 23, 88 ("CS_ARCH_M680X", "CS_MODE_M680X_6809"): 24, 89 ("CS_ARCH_EVM", "0"): 25, 90 } 91 92 #if not option in ('', 'None'): 93 # print archs[arch], modes[mode], options[option] 94 95 for line in lines[1:]: 96 # ignore all the input lines having # in front. 97 if line.startswith('#'): 98 continue 99 if line.startswith('// '): 100 line=line[3:] 101 #print("Check %s" %line) 102 code = line.split(' = ')[0] 103 if len(code) < 2: 104 continue 105 if code.find('//') >= 0: 106 continue 107 hex_code = code.replace('0x', '') 108 hex_code = hex_code.replace(',', '') 109 try: 110 hex_data = hex_code.strip().decode('hex') 111 except: 112 print "skipping", hex_code 113 fout = open("fuzz/corpus/%s_%s" % (os.path.basename(fname), hex_code), 'w') 114 if (arch, mode) not in mc_modes: 115 print "fail", arch, mode 116 fout.write(unichr(mc_modes[(arch, mode)])) 117 fout.write(hex_data) 118 fout.close() 119 120 121if __name__ == '__main__': 122 if len(sys.argv) == 1: 123 fnames = sys.stdin.readlines() 124 for fname in fnames: 125 test_file(fname.strip()) 126 else: 127 #print("Usage: ./test_mc.py <input-file.s.cs>") 128 test_file(sys.argv[1]) 129 130