1 /* Tang Yuhang <tyh000011112222@gmail.com> 2016 */
2 /* pancake <pancake@nopcode.org> 2017 */
3 
4 #include <string.h>
5 #include <ctype.h>
6 #include <errno.h>
7 #include "getopt.h"
8 
9 #include <capstone/capstone.h>
10 
11 static struct {
12 	const char *name;
13 	cs_arch arch;
14 	cs_mode mode;
15 } all_archs[] = {
16 	{ "arm", CS_ARCH_ARM, CS_MODE_ARM },
17 	{ "armb", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_BIG_ENDIAN },
18 	{ "armbe", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_BIG_ENDIAN },
19 	{ "arml", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_LITTLE_ENDIAN },
20 	{ "armle", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_LITTLE_ENDIAN },
21 	{ "cortexm", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_THUMB | CS_MODE_MCLASS },
22 	{ "thumb", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_THUMB },
23 	{ "thumbbe", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_THUMB | CS_MODE_BIG_ENDIAN },
24 	{ "thumble", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_THUMB | CS_MODE_LITTLE_ENDIAN },
25 	{ "arm64", CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN },
26 	{ "arm64be", CS_ARCH_ARM64, CS_MODE_BIG_ENDIAN },
27 	{ "mips", CS_ARCH_MIPS, CS_MODE_MIPS32 | CS_MODE_LITTLE_ENDIAN },
28 	{ "mipsbe", CS_ARCH_MIPS, CS_MODE_MIPS32 | CS_MODE_BIG_ENDIAN },
29 	{ "mips64", CS_ARCH_MIPS, CS_MODE_MIPS64 | CS_MODE_LITTLE_ENDIAN },
30 	{ "mips64be", CS_ARCH_MIPS, CS_MODE_MIPS64 | CS_MODE_BIG_ENDIAN },
31 	{ "x16", CS_ARCH_X86, CS_MODE_16 }, // CS_MODE_16
32 	{ "x16att", CS_ARCH_X86, CS_MODE_16 }, // CS_MODE_16 , CS_OPT_SYNTAX_ATT
33 	{ "x32", CS_ARCH_X86, CS_MODE_32 }, // CS_MODE_32
34 	{ "x32att", CS_ARCH_X86, CS_MODE_32 }, // CS_MODE_32, CS_OPT_SYNTAX_ATT
35 	{ "x64", CS_ARCH_X86, CS_MODE_64 }, // CS_MODE_64
36 	{ "x64att", CS_ARCH_X86, CS_MODE_64 }, // CS_MODE_64, CS_OPT_SYNTAX_ATT
37 	{ "ppc64", CS_ARCH_PPC, CS_MODE_64 | CS_MODE_LITTLE_ENDIAN },
38 	{ "ppc64be", CS_ARCH_PPC, CS_MODE_64 | CS_MODE_BIG_ENDIAN },
39 	{ "sparc", CS_ARCH_SPARC, CS_MODE_BIG_ENDIAN },
40 	{ "systemz", CS_ARCH_SYSZ, CS_MODE_BIG_ENDIAN },
41 	{ "sysz", CS_ARCH_SYSZ, CS_MODE_BIG_ENDIAN },
42 	{ "s390x", CS_ARCH_SYSZ, CS_MODE_BIG_ENDIAN },
43 	{ "xcore", CS_ARCH_XCORE, CS_MODE_BIG_ENDIAN },
44 	{ "m68k", CS_ARCH_M68K, CS_MODE_BIG_ENDIAN },
45 	{ "m68k40", CS_ARCH_M68K, CS_MODE_M68K_040 },
46 	{ "tms320c64x", CS_ARCH_TMS320C64X, CS_MODE_BIG_ENDIAN },
47 	{ "tms320c64x", CS_ARCH_TMS320C64X, CS_MODE_BIG_ENDIAN },
48 	{ "m6800", CS_ARCH_M680X, CS_MODE_M680X_6800 },
49 	{ "m6801", CS_ARCH_M680X, CS_MODE_M680X_6801 },
50 	{ "m6805", CS_ARCH_M680X, CS_MODE_M680X_6805 },
51 	{ "m6808", CS_ARCH_M680X, CS_MODE_M680X_6808 },
52 	{ "m6809", CS_ARCH_M680X, CS_MODE_M680X_6809 },
53 	{ "m6811", CS_ARCH_M680X, CS_MODE_M680X_6811 },
54 	{ "cpu12", CS_ARCH_M680X, CS_MODE_M680X_CPU12 },
55 	{ "hd6301", CS_ARCH_M680X, CS_MODE_M680X_6301 },
56 	{ "hd6309", CS_ARCH_M680X, CS_MODE_M680X_6309 },
57 	{ "hcs08", CS_ARCH_M680X, CS_MODE_M680X_HCS08 },
58 	{ "evm", CS_ARCH_EVM, 0 },
59 	{ "mos65xx", CS_ARCH_MOS65XX, 0 },
60 	{ NULL }
61 };
62 
63 void print_insn_detail_x86(csh ud, cs_mode mode, cs_insn *ins);
64 void print_insn_detail_arm(csh handle, cs_insn *ins);
65 void print_insn_detail_arm64(csh handle, cs_insn *ins);
66 void print_insn_detail_mips(csh handle, cs_insn *ins);
67 void print_insn_detail_ppc(csh handle, cs_insn *ins);
68 void print_insn_detail_sparc(csh handle, cs_insn *ins);
69 void print_insn_detail_sysz(csh handle, cs_insn *ins);
70 void print_insn_detail_xcore(csh handle, cs_insn *ins);
71 void print_insn_detail_m68k(csh handle, cs_insn *ins);
72 void print_insn_detail_tms320c64x(csh handle, cs_insn *ins);
73 void print_insn_detail_m680x(csh handle, cs_insn *ins);
74 void print_insn_detail_evm(csh handle, cs_insn *ins);
75 void print_insn_detail_mos65xx(csh handle, cs_insn *ins);
76 
77 static void print_details(csh handle, cs_arch arch, cs_mode md, cs_insn *ins);
78 
print_string_hex(const char * comment,unsigned char * str,size_t len)79 void print_string_hex(const char *comment, unsigned char *str, size_t len)
80 {
81 	unsigned char *c;
82 
83 	printf("%s", comment);
84 	for (c = str; c < str + len; c++) {
85 		printf("0x%02x ", *c & 0xff);
86 	}
87 
88 	printf("\n");
89 }
90 
91 // convert hexchar to hexnum
char_to_hexnum(char c)92 static uint8_t char_to_hexnum(char c)
93 {
94 	if (c >= '0' && c <= '9') {
95 		return (uint8_t)(c - '0');
96 	}
97 
98 	if (c >= 'a' && c <= 'f') {
99 		return (uint8_t)(10 + c - 'a');
100 	}
101 
102 	//  c >= 'A' && c <= 'F'
103 	return (uint8_t)(10 + c - 'A');
104 }
105 
106 // convert user input (char[]) to uint8_t[], each element of which is
107 // valid hexadecimal, and return actual length of uint8_t[] in @size.
preprocess(char * code,size_t * size)108 static uint8_t *preprocess(char *code, size_t *size)
109 {
110 	size_t i = 0, j = 0;
111 	uint8_t high, low;
112 	uint8_t *result;
113 
114 	if (strlen(code) == 0)
115 		return NULL;
116 
117 	result = (uint8_t *)malloc(strlen(code));
118 	if (result != NULL) {
119 		while (code[i] != '\0') {
120 			if (isxdigit(code[i]) && isxdigit(code[i+1])) {
121 				high = 16 * char_to_hexnum(code[i]);
122 				low = char_to_hexnum(code[i+1]);
123 				result[j] = high + low;
124 				i++;
125 				j++;
126 			}
127 			i++;
128 		}
129 		*size = j;
130 	}
131 
132 	return result;
133 }
134 
usage(char * prog)135 static void usage(char *prog)
136 {
137 	printf("Cstool for Capstone Disassembler Engine v%u.%u.%u\n\n", CS_VERSION_MAJOR, CS_VERSION_MINOR, CS_VERSION_EXTRA);
138 	printf("Syntax: %s [-u|-d|-s|-v] <arch+mode> <assembly-hexstring> [start-address-in-hex-format]\n", prog);
139 	printf("\nThe following <arch+mode> options are supported:\n");
140 
141 	if (cs_support(CS_ARCH_X86)) {
142 		printf("        x16         16-bit mode (X86)\n");
143 		printf("        x32         32-bit mode (X86)\n");
144 		printf("        x64         64-bit mode (X86)\n");
145 		printf("        x16att      16-bit mode (X86), syntax AT&T\n");
146 		printf("        x32att      32-bit mode (X86), syntax AT&T\n");
147 		printf("        x64att      64-bit mode (X86), syntax AT&T\n");
148 	}
149 
150 	if (cs_support(CS_ARCH_ARM)) {
151 		printf("        arm         arm\n");
152 		printf("        armbe       arm + big endian\n");
153 		printf("        thumb       thumb mode\n");
154 		printf("        thumbbe     thumb + big endian\n");
155 		printf("        cortexm     thumb + cortex-m extensions\n");
156 	}
157 
158 	if (cs_support(CS_ARCH_ARM64)) {
159 		printf("        arm64       aarch64 mode\n");
160 		printf("        arm64be     aarch64 + big endian\n");
161 	}
162 
163 	if (cs_support(CS_ARCH_MIPS)) {
164 		printf("        mips        mips32 + little endian\n");
165 		printf("        mipsbe      mips32 + big endian\n");
166 		printf("        mips64      mips64 + little endian\n");
167 		printf("        mips64be    mips64 + big endian\n");
168 	}
169 
170 	if (cs_support(CS_ARCH_PPC)) {
171 		printf("        ppc64       ppc64 + little endian\n");
172 		printf("        ppc64be     ppc64 + big endian\n");
173 	}
174 
175 	if (cs_support(CS_ARCH_SPARC)) {
176 		printf("        sparc       sparc\n");
177 	}
178 
179 	if (cs_support(CS_ARCH_SYSZ)) {
180 		printf("        systemz     systemz (s390x)\n");
181 	}
182 
183 	if (cs_support(CS_ARCH_XCORE)) {
184 		printf("        xcore       xcore\n");
185 	}
186 
187 	if (cs_support(CS_ARCH_M68K)) {
188 		printf("        m68k        m68k + big endian\n");
189 		printf("        m68k40      m68k_040\n");
190 	}
191 
192 	if (cs_support(CS_ARCH_TMS320C64X)) {
193 		printf("        tms320c64x  TMS320C64x\n");
194 	}
195 
196 	if (cs_support(CS_ARCH_M680X)) {
197 		printf("        m6800       M6800/2\n");
198 		printf("        m6801       M6801/3\n");
199 		printf("        m6805       M6805\n");
200 		printf("        m6808       M68HC08\n");
201 		printf("        m6809       M6809\n");
202 		printf("        m6811       M68HC11\n");
203 		printf("        cpu12       M68HC12/HCS12\n");
204 		printf("        hd6301      HD6301/3\n");
205 		printf("        hd6309      HD6309\n");
206 		printf("        hcs08       HCS08\n");
207 	}
208 
209 	if (cs_support(CS_ARCH_EVM)) {
210 		printf("        evm         Ethereum Virtual Machine\n");
211 	}
212 
213 	if (cs_support(CS_ARCH_MOS65XX)) {
214 		printf("        mos65xx     MOS65XX family\n");
215 	}
216 
217 	printf("\nExtra options:\n");
218 	printf("        -d show detailed information of the instructions\n");
219 	printf("        -u show immediates as unsigned\n");
220 	printf("        -s decode in SKIPDATA mode\n");
221 	printf("        -v show version & Capstone core build info\n\n");
222 }
223 
print_details(csh handle,cs_arch arch,cs_mode md,cs_insn * ins)224 static void print_details(csh handle, cs_arch arch, cs_mode md, cs_insn *ins)
225 {
226 	switch(arch) {
227 		case CS_ARCH_X86:
228 			print_insn_detail_x86(handle, md, ins);
229 			break;
230 		case CS_ARCH_ARM:
231 			print_insn_detail_arm(handle, ins);
232 			break;
233 		case CS_ARCH_ARM64:
234 			print_insn_detail_arm64(handle, ins);
235 			break;
236 		case CS_ARCH_MIPS:
237 			print_insn_detail_mips(handle, ins);
238 			break;
239 		case CS_ARCH_PPC:
240 			print_insn_detail_ppc(handle, ins);
241 			break;
242 		case CS_ARCH_SPARC:
243 			print_insn_detail_sparc(handle, ins);
244 			break;
245 		case CS_ARCH_SYSZ:
246 			print_insn_detail_sysz(handle, ins);
247 			break;
248 		case CS_ARCH_XCORE:
249 			print_insn_detail_xcore(handle, ins);
250 			break;
251 		case CS_ARCH_M68K:
252 			print_insn_detail_m68k(handle, ins);
253 			break;
254 		case CS_ARCH_TMS320C64X:
255 			print_insn_detail_tms320c64x(handle, ins);
256 			break;
257 		case CS_ARCH_M680X:
258 			print_insn_detail_m680x(handle, ins);
259 			break;
260 		case CS_ARCH_EVM:
261 			print_insn_detail_evm(handle, ins);
262 			break;
263 		case CS_ARCH_MOS65XX:
264 			print_insn_detail_mos65xx(handle, ins);
265 			break;
266 		default: break;
267 	}
268 
269 	if (ins->detail->groups_count) {
270 		int j;
271 
272 		printf("\tGroups: ");
273 		for(j = 0; j < ins->detail->groups_count; j++) {
274 			printf("%s ", cs_group_name(handle, ins->detail->groups[j]));
275 		}
276 		printf("\n");
277 	}
278 
279 	printf("\n");
280 }
281 
main(int argc,char ** argv)282 int main(int argc, char **argv)
283 {
284 	int i, c;
285 	csh handle;
286 	char *mode;
287 	uint8_t *assembly;
288 	size_t count, size;
289 	uint64_t address = 0LL;
290 	cs_insn *insn;
291 	cs_err err;
292 	cs_mode md;
293 	cs_arch arch = CS_ARCH_ALL;
294 	bool detail_flag = false;
295 	bool unsigned_flag = false;
296 	bool skipdata = false;
297 	int args_left;
298 
299 	while ((c = getopt (argc, argv, "sudhv")) != -1) {
300 		switch (c) {
301 			case 's':
302 				skipdata = true;
303 				break;
304 			case 'u':
305 				unsigned_flag = true;
306 				break;
307 			case 'd':
308 				detail_flag = true;
309 				break;
310 			case 'v':
311 				printf("cstool for Capstone Disassembler, v%u.%u.%u\n", CS_VERSION_MAJOR, CS_VERSION_MINOR, CS_VERSION_EXTRA);
312 
313 				printf("Capstone build: ");
314 				if (cs_support(CS_ARCH_X86)) {
315 					printf("x86=1 ");
316 				}
317 
318 				if (cs_support(CS_ARCH_ARM)) {
319 					printf("arm=1 ");
320 				}
321 
322 				if (cs_support(CS_ARCH_ARM64)) {
323 					printf("arm64=1 ");
324 				}
325 
326 				if (cs_support(CS_ARCH_MIPS)) {
327 					printf("mips=1 ");
328 				}
329 
330 				if (cs_support(CS_ARCH_PPC)) {
331 					printf("ppc=1 ");
332 				}
333 
334 				if (cs_support(CS_ARCH_SPARC)) {
335 					printf("sparc=1 ");
336 				}
337 
338 				if (cs_support(CS_ARCH_SYSZ)) {
339 					printf("sysz=1 ");
340 				}
341 
342 				if (cs_support(CS_ARCH_XCORE)) {
343 					printf("xcore=1 ");
344 				}
345 
346 				if (cs_support(CS_ARCH_M68K)) {
347 					printf("m68k=1 ");
348 				}
349 
350 				if (cs_support(CS_ARCH_TMS320C64X)) {
351 					printf("tms320c64x=1 ");
352 				}
353 
354 				if (cs_support(CS_ARCH_M680X)) {
355 					printf("m680x=1 ");
356 				}
357 
358 				if (cs_support(CS_ARCH_EVM)) {
359 					printf("evm=1 ");
360 				}
361 
362 				if (cs_support(CS_ARCH_MOS65XX)) {
363 					printf("mos65xx=1 ");
364 				}
365 
366 				if (cs_support(CS_SUPPORT_DIET)) {
367 					printf("diet=1 ");
368 				}
369 
370 				if (cs_support(CS_SUPPORT_X86_REDUCE)) {
371 					printf("x86_reduce=1 ");
372 				}
373 
374 				printf("\n");
375 				return 0;
376 			case 'h':
377 				usage(argv[0]);
378 				return 0;
379 			default:
380 				usage(argv[0]);
381 				return -1;
382 		}
383 	}
384 
385 	args_left = argc - optind;
386 	if (args_left < 2 || args_left > 3) {
387 		usage(argv[0]);
388 		return -1;
389 	}
390 
391 	mode = argv[optind];
392 	assembly = preprocess(argv[optind + 1], &size);
393 	if (!assembly) {
394 		usage(argv[0]);
395 		return -1;
396 	}
397 
398 	if (args_left == 3) {
399 		char *temp, *src = argv[optind + 2];
400 		address = strtoull(src, &temp, 16);
401 		if (temp == src || *temp != '\0' || errno == ERANGE) {
402 			printf("ERROR: invalid address argument, quit!\n");
403 			return -2;
404 		}
405 	}
406 
407 	for (i = 0; all_archs[i].name; i++) {
408 		if (!strcmp(all_archs[i].name, mode)) {
409 			arch = all_archs[i].arch;
410 			err = cs_open(all_archs[i].arch, all_archs[i].mode, &handle);
411 			if (!err) {
412 				md = all_archs[i].mode;
413 				if (strstr (mode, "att")) {
414 					cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
415 				}
416 
417 				// turn on SKIPDATA mode
418 				if (skipdata)
419 					cs_option(handle, CS_OPT_SKIPDATA, CS_OPT_ON);
420 			}
421 			break;
422 		}
423 	}
424 
425 	if (arch == CS_ARCH_ALL) {
426 		printf("ERROR: Invalid <arch+mode>: \"%s\", quit!\n", mode);
427 		usage(argv[0]);
428 		return -1;
429 	}
430 
431 	if (err) {
432 		printf("ERROR: Failed on cs_open(), quit!\n");
433 		usage(argv[0]);
434 		return -1;
435 	}
436 
437 	if (detail_flag) {
438 		cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON);
439 	}
440 
441 	if (unsigned_flag) {
442 		cs_option(handle, CS_OPT_UNSIGNED, CS_OPT_ON);
443 	}
444 
445 	count = cs_disasm(handle, assembly, size, address, 0, &insn);
446 	if (count > 0) {
447 		size_t i;
448 
449 		for (i = 0; i < count; i++) {
450 			int j;
451 
452 			printf("%2"PRIx64"  ", insn[i].address);
453 			for (j = 0; j < insn[i].size; j++) {
454 				if (j > 0)
455 					putchar(' ');
456 				printf("%02x", insn[i].bytes[j]);
457 			}
458 			// X86 and s390 instruction sizes are variable.
459 			// align assembly instruction after the opcode
460 			if (arch == CS_ARCH_X86) {
461 				for (; j < 16; j++) {
462 					printf("   ");
463 				}
464 			} else if (arch == CS_ARCH_SYSZ) {
465 				for (; j < 6; j++) {
466 					printf("   ");
467 				}
468 			}
469 
470 			printf("  %s\t%s\n", insn[i].mnemonic, insn[i].op_str);
471 
472 			if (detail_flag) {
473 				print_details(handle, arch, md, &insn[i]);
474 			}
475 		}
476 
477 		cs_free(insn, count);
478 	} else {
479 		printf("ERROR: invalid assembly code\n");
480 		return(-4);
481 	}
482 
483 	cs_close(&handle);
484 	free(assembly);
485 
486 	return 0;
487 }
488