1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is part of the X86 Disassembler.
11 // It contains code to translate the data produced by the decoder into
12 //  MCInsts.
13 // Documentation for the disassembler can be found in X86Disassembler.h.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 /* Capstone Disassembly Engine */
18 /* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
19 
20 #ifdef CAPSTONE_HAS_X86
21 
22 #include <string.h>
23 
24 #include "../../cs_priv.h"
25 
26 #include "X86Disassembler.h"
27 #include "X86DisassemblerDecoderCommon.h"
28 #include "X86DisassemblerDecoder.h"
29 #include "../../MCInst.h"
30 #include "../../utils.h"
31 #include "X86Mapping.h"
32 
33 #define GET_REGINFO_ENUM
34 #define GET_REGINFO_MC_DESC
35 #include "X86GenRegisterInfo.inc"
36 
37 #define GET_INSTRINFO_ENUM
38 #ifdef CAPSTONE_X86_REDUCE
39 #include "X86GenInstrInfo_reduce.inc"
40 #else
41 #include "X86GenInstrInfo.inc"
42 #endif
43 
44 // Fill-ins to make the compiler happy.  These constants are never actually
45 //   assigned; they are just filler to make an automatically-generated switch
46 //   statement work.
47 enum {
48 	X86_BX_SI = 500,
49 	X86_BX_DI = 501,
50 	X86_BP_SI = 502,
51 	X86_BP_DI = 503,
52 	X86_sib   = 504,
53 	X86_sib64 = 505
54 };
55 
56 //
57 // Private code that translates from struct InternalInstructions to MCInsts.
58 //
59 
60 /// translateRegister - Translates an internal register to the appropriate LLVM
61 ///   register, and appends it as an operand to an MCInst.
62 ///
63 /// @param mcInst     - The MCInst to append to.
64 /// @param reg        - The Reg to append.
translateRegister(MCInst * mcInst,Reg reg)65 static void translateRegister(MCInst *mcInst, Reg reg)
66 {
67 #define ENTRY(x) X86_##x,
68 	uint8_t llvmRegnums[] = {
69 		ALL_REGS
70 			0
71 	};
72 #undef ENTRY
73 
74 	uint8_t llvmRegnum = llvmRegnums[reg];
75 	MCOperand_CreateReg0(mcInst, llvmRegnum);
76 }
77 
78 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
79 	0,        // SEG_OVERRIDE_NONE
80 	X86_CS,
81 	X86_SS,
82 	X86_DS,
83 	X86_ES,
84 	X86_FS,
85 	X86_GS
86 };
87 
88 /// translateSrcIndex   - Appends a source index operand to an MCInst.
89 ///
90 /// @param mcInst       - The MCInst to append to.
91 /// @param insn         - The internal instruction.
translateSrcIndex(MCInst * mcInst,InternalInstruction * insn)92 static bool translateSrcIndex(MCInst *mcInst, InternalInstruction *insn)
93 {
94 	unsigned baseRegNo;
95 
96 	if (insn->mode == MODE_64BIT)
97 		baseRegNo = insn->isPrefix67 ? X86_ESI : X86_RSI;
98 	else if (insn->mode == MODE_32BIT)
99 		baseRegNo = insn->isPrefix67 ? X86_SI : X86_ESI;
100 	else {
101 		// assert(insn->mode == MODE_16BIT);
102 		baseRegNo = insn->isPrefix67 ? X86_ESI : X86_SI;
103 	}
104 
105 	MCOperand_CreateReg0(mcInst, baseRegNo);
106 
107 	MCOperand_CreateReg0(mcInst, segmentRegnums[insn->segmentOverride]);
108 
109 	return false;
110 }
111 
112 /// translateDstIndex   - Appends a destination index operand to an MCInst.
113 ///
114 /// @param mcInst       - The MCInst to append to.
115 /// @param insn         - The internal instruction.
translateDstIndex(MCInst * mcInst,InternalInstruction * insn)116 static bool translateDstIndex(MCInst *mcInst, InternalInstruction *insn)
117 {
118 	unsigned baseRegNo;
119 
120 	if (insn->mode == MODE_64BIT)
121 		baseRegNo = insn->isPrefix67 ? X86_EDI : X86_RDI;
122 	else if (insn->mode == MODE_32BIT)
123 		baseRegNo = insn->isPrefix67 ? X86_DI : X86_EDI;
124 	else {
125 		// assert(insn->mode == MODE_16BIT);
126 		baseRegNo = insn->isPrefix67 ? X86_EDI : X86_DI;
127 	}
128 
129 	MCOperand_CreateReg0(mcInst, baseRegNo);
130 
131 	return false;
132 }
133 
134 /// translateImmediate  - Appends an immediate operand to an MCInst.
135 ///
136 /// @param mcInst       - The MCInst to append to.
137 /// @param immediate    - The immediate value to append.
138 /// @param operand      - The operand, as stored in the descriptor table.
139 /// @param insn         - The internal instruction.
translateImmediate(MCInst * mcInst,uint64_t immediate,const OperandSpecifier * operand,InternalInstruction * insn)140 static void translateImmediate(MCInst *mcInst, uint64_t immediate,
141 		const OperandSpecifier *operand, InternalInstruction *insn)
142 {
143 	OperandType type;
144 
145 	type = (OperandType)operand->type;
146 	if (type == TYPE_RELv) {
147 		//isBranch = true;
148 		//pcrel = insn->startLocation + insn->immediateOffset + insn->immediateSize;
149 		switch (insn->displacementSize) {
150 			case 1:
151 				if (immediate & 0x80)
152 					immediate |= ~(0xffull);
153 				break;
154 			case 2:
155 				if (immediate & 0x8000)
156 					immediate |= ~(0xffffull);
157 				break;
158 			case 4:
159 				if (immediate & 0x80000000)
160 					immediate |= ~(0xffffffffull);
161 				break;
162 			case 8:
163 				break;
164 			default:
165 				break;
166 		}
167 	} // By default sign-extend all X86 immediates based on their encoding.
168 	else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
169 			type == TYPE_IMM64 || type == TYPE_IMMv) {
170 
171 		uint32_t Opcode = MCInst_getOpcode(mcInst);
172 		bool check_opcode;
173 
174 		switch (operand->encoding) {
175 			default:
176 				break;
177 			case ENCODING_IB:
178 				// Special case those X86 instructions that use the imm8 as a set of
179 				// bits, bit count, etc. and are not sign-extend.
180 				check_opcode = (Opcode != X86_INT);
181 #ifndef CAPSTONE_X86_REDUCE
182         check_opcode = ((Opcode != X86_BLENDPSrri &&
183 						            Opcode != X86_BLENDPDrri &&
184 						            Opcode != X86_PBLENDWrri &&
185 						            Opcode != X86_MPSADBWrri &&
186 						            Opcode != X86_DPPSrri &&
187 						            Opcode != X86_DPPDrri &&
188 						            Opcode != X86_INSERTPSrr &&
189 						            Opcode != X86_VBLENDPSYrri &&
190 						            Opcode != X86_VBLENDPSYrmi &&
191 						            Opcode != X86_VBLENDPDYrri &&
192 						            Opcode != X86_VBLENDPDYrmi &&
193 						            Opcode != X86_VPBLENDWrri &&
194 						            Opcode != X86_VMPSADBWrri &&
195 						            Opcode != X86_VDPPSYrri &&
196 						            Opcode != X86_VDPPSYrmi &&
197 						            Opcode != X86_VDPPDrri &&
198 						            Opcode != X86_VINSERTPSrr) && check_opcode);
199 #endif
200 				if (check_opcode)
201 						if(immediate & 0x80)
202 							immediate |= ~(0xffull);
203 				break;
204 			case ENCODING_IW:
205 				if(immediate & 0x8000)
206 					immediate |= ~(0xffffull);
207 				break;
208 			case ENCODING_ID:
209 				if(immediate & 0x80000000)
210 					immediate |= ~(0xffffffffull);
211 				break;
212 			case ENCODING_IO:
213 				break;
214 		}
215 	} else if (type == TYPE_IMM3) {
216 #ifndef CAPSTONE_X86_REDUCE
217 		// Check for immediates that printSSECC can't handle.
218 		if (immediate >= 8) {
219 			unsigned NewOpc = 0;
220 
221 			switch (MCInst_getOpcode(mcInst)) {
222 				default: break;	// never reach
223 				case X86_CMPPDrmi: NewOpc = X86_CMPPDrmi_alt; break;
224 				case X86_CMPPDrri: NewOpc = X86_CMPPDrri_alt; break;
225 				case X86_CMPPSrmi: NewOpc = X86_CMPPSrmi_alt; break;
226 				case X86_CMPPSrri: NewOpc = X86_CMPPSrri_alt; break;
227 				case X86_CMPSDrm:  NewOpc = X86_CMPSDrm_alt;  break;
228 				case X86_CMPSDrr:  NewOpc = X86_CMPSDrr_alt;  break;
229 				case X86_CMPSSrm:  NewOpc = X86_CMPSSrm_alt;  break;
230 				case X86_CMPSSrr:  NewOpc = X86_CMPSSrr_alt;  break;
231 			}
232 			// Switch opcode to the one that doesn't get special printing.
233 			if (NewOpc != 0) {
234 				MCInst_setOpcode(mcInst, NewOpc);
235 			}
236 		}
237 #endif
238 	} else if (type == TYPE_IMM5) {
239 #ifndef CAPSTONE_X86_REDUCE
240 		// Check for immediates that printAVXCC can't handle.
241 		if (immediate >= 32) {
242 			unsigned NewOpc = 0;
243 
244 			switch (MCInst_getOpcode(mcInst)) {
245 				default: break; // unexpected opcode
246 				case X86_VCMPPDrmi:  NewOpc = X86_VCMPPDrmi_alt;  break;
247 				case X86_VCMPPDrri:  NewOpc = X86_VCMPPDrri_alt;  break;
248 				case X86_VCMPPSrmi:  NewOpc = X86_VCMPPSrmi_alt;  break;
249 				case X86_VCMPPSrri:  NewOpc = X86_VCMPPSrri_alt;  break;
250 				case X86_VCMPSDrm:   NewOpc = X86_VCMPSDrm_alt;   break;
251 				case X86_VCMPSDrr:   NewOpc = X86_VCMPSDrr_alt;   break;
252 				case X86_VCMPSSrm:   NewOpc = X86_VCMPSSrm_alt;   break;
253 				case X86_VCMPSSrr:   NewOpc = X86_VCMPSSrr_alt;   break;
254 				case X86_VCMPPDYrmi: NewOpc = X86_VCMPPDYrmi_alt; break;
255 				case X86_VCMPPDYrri: NewOpc = X86_VCMPPDYrri_alt; break;
256 				case X86_VCMPPSYrmi: NewOpc = X86_VCMPPSYrmi_alt; break;
257 				case X86_VCMPPSYrri: NewOpc = X86_VCMPPSYrri_alt; break;
258 				case X86_VCMPPDZrmi: NewOpc = X86_VCMPPDZrmi_alt; break;
259 				case X86_VCMPPDZrri: NewOpc = X86_VCMPPDZrri_alt; break;
260 				case X86_VCMPPSZrmi: NewOpc = X86_VCMPPSZrmi_alt; break;
261 				case X86_VCMPPSZrri: NewOpc = X86_VCMPPSZrri_alt; break;
262 				case X86_VCMPSDZrm:  NewOpc = X86_VCMPSDZrmi_alt; break;
263 				case X86_VCMPSDZrr:  NewOpc = X86_VCMPSDZrri_alt; break;
264 				case X86_VCMPSSZrm:  NewOpc = X86_VCMPSSZrmi_alt; break;
265 				case X86_VCMPSSZrr:  NewOpc = X86_VCMPSSZrri_alt; break;
266 			}
267 			// Switch opcode to the one that doesn't get special printing.
268 			if (NewOpc != 0) {
269 				MCInst_setOpcode(mcInst, NewOpc);
270 			}
271 		}
272 #endif
273 	}
274 
275 	switch (type) {
276 		case TYPE_XMM32:
277 		case TYPE_XMM64:
278 		case TYPE_XMM128:
279 			MCOperand_CreateReg0(mcInst, X86_XMM0 + ((uint32_t)immediate >> 4));
280 			return;
281 		case TYPE_XMM256:
282 			MCOperand_CreateReg0(mcInst, X86_YMM0 + ((uint32_t)immediate >> 4));
283 			return;
284 		case TYPE_XMM512:
285 			MCOperand_CreateReg0(mcInst, X86_ZMM0 + ((uint32_t)immediate >> 4));
286 			return;
287 		case TYPE_REL8:
288 			if(immediate & 0x80)
289 				immediate |= ~(0xffull);
290 			break;
291 		case TYPE_REL32:
292 		case TYPE_REL64:
293 			if(immediate & 0x80000000)
294 				immediate |= ~(0xffffffffull);
295 			break;
296 		default:
297 			// operand is 64 bits wide.  Do nothing.
298 			break;
299 	}
300 
301 	MCOperand_CreateImm0(mcInst, immediate);
302 
303 	if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 ||
304 			type == TYPE_MOFFS32 || type == TYPE_MOFFS64) {
305 		MCOperand_CreateReg0(mcInst, segmentRegnums[insn->segmentOverride]);
306 	}
307 }
308 
309 /// translateRMRegister - Translates a register stored in the R/M field of the
310 ///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
311 /// @param mcInst       - The MCInst to append to.
312 /// @param insn         - The internal instruction to extract the R/M field
313 ///                       from.
314 /// @return             - 0 on success; -1 otherwise
translateRMRegister(MCInst * mcInst,InternalInstruction * insn)315 static bool translateRMRegister(MCInst *mcInst, InternalInstruction *insn)
316 {
317 	if (insn->eaBase == EA_BASE_sib || insn->eaBase == EA_BASE_sib64) {
318 		//debug("A R/M register operand may not have a SIB byte");
319 		return true;
320 	}
321 
322 	switch (insn->eaBase) {
323 		case EA_BASE_NONE:
324 			//debug("EA_BASE_NONE for ModR/M base");
325 			return true;
326 #define ENTRY(x) case EA_BASE_##x:
327 			ALL_EA_BASES
328 #undef ENTRY
329 				//debug("A R/M register operand may not have a base; "
330 				//      "the operand must be a register.");
331 				return true;
332 #define ENTRY(x)                                                      \
333 		case EA_REG_##x:                                                    \
334 			MCOperand_CreateReg0(mcInst, X86_##x); break;
335 			ALL_REGS
336 #undef ENTRY
337 		default:
338 				//debug("Unexpected EA base register");
339 				return true;
340 	}
341 
342 	return false;
343 }
344 
345 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
346 ///   fields of an internal instruction (and possibly its SIB byte) to a memory
347 ///   operand in LLVM's format, and appends it to an MCInst.
348 ///
349 /// @param mcInst       - The MCInst to append to.
350 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
351 ///                       from.
352 /// @return             - 0 on success; nonzero otherwise
translateRMMemory(MCInst * mcInst,InternalInstruction * insn)353 static bool translateRMMemory(MCInst *mcInst, InternalInstruction *insn)
354 {
355 	// Addresses in an MCInst are represented as five operands:
356 	//   1. basereg       (register)  The R/M base, or (if there is a SIB) the
357 	//                                SIB base
358 	//   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified
359 	//                                scale amount
360 	//   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
361 	//                                the index (which is multiplied by the
362 	//                                scale amount)
363 	//   4. displacement  (immediate) 0, or the displacement if there is one
364 	//   5. segmentreg    (register)  x86_registerNONE for now, but could be set
365 	//                                if we have segment overrides
366 
367 	bool IndexIs512, IndexIs128, IndexIs256;
368 	int scaleAmount, indexReg;
369 #ifndef CAPSTONE_X86_REDUCE
370 	uint32_t Opcode;
371 #endif
372 
373 	if (insn->eaBase == EA_BASE_sib || insn->eaBase == EA_BASE_sib64) {
374 		if (insn->sibBase != SIB_BASE_NONE) {
375 			switch (insn->sibBase) {
376 #define ENTRY(x)                                          \
377 				case SIB_BASE_##x:                                  \
378 				MCOperand_CreateReg0(mcInst, X86_##x); break;
379 				ALL_SIB_BASES
380 #undef ENTRY
381 				default:
382 					//debug("Unexpected sibBase");
383 					return true;
384 			}
385 		} else {
386 			MCOperand_CreateReg0(mcInst, 0);
387 		}
388 
389 		// Check whether we are handling VSIB addressing mode for GATHER.
390 		// If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
391 		// we should use SIB_INDEX_XMM4|YMM4 for VSIB.
392 		// I don't see a way to get the correct IndexReg in readSIB:
393 		//   We can tell whether it is VSIB or SIB after instruction ID is decoded,
394 		//   but instruction ID may not be decoded yet when calling readSIB.
395 #ifndef CAPSTONE_X86_REDUCE
396 		Opcode = MCInst_getOpcode(mcInst);
397 #endif
398 		IndexIs128 = (
399 #ifndef CAPSTONE_X86_REDUCE
400 				Opcode == X86_VGATHERDPDrm ||
401 				Opcode == X86_VGATHERDPDYrm ||
402 				Opcode == X86_VGATHERQPDrm ||
403 				Opcode == X86_VGATHERDPSrm ||
404 				Opcode == X86_VGATHERQPSrm ||
405 				Opcode == X86_VPGATHERDQrm ||
406 				Opcode == X86_VPGATHERDQYrm ||
407 				Opcode == X86_VPGATHERQQrm ||
408 				Opcode == X86_VPGATHERDDrm ||
409 				Opcode == X86_VPGATHERQDrm ||
410 #endif
411 				false
412 				);
413 		IndexIs256 = (
414 #ifndef CAPSTONE_X86_REDUCE
415 				Opcode == X86_VGATHERQPDYrm ||
416 				Opcode == X86_VGATHERDPSYrm ||
417 				Opcode == X86_VGATHERQPSYrm ||
418 				Opcode == X86_VGATHERDPDZrm ||
419 				Opcode == X86_VPGATHERDQZrm ||
420 				Opcode == X86_VPGATHERQQYrm ||
421 				Opcode == X86_VPGATHERDDYrm ||
422 				Opcode == X86_VPGATHERQDYrm ||
423 #endif
424 				false
425 				);
426 		IndexIs512 = (
427 #ifndef CAPSTONE_X86_REDUCE
428 				Opcode == X86_VGATHERQPDZrm ||
429 				Opcode == X86_VGATHERDPSZrm ||
430 				Opcode == X86_VGATHERQPSZrm ||
431 				Opcode == X86_VPGATHERQQZrm ||
432 				Opcode == X86_VPGATHERDDZrm ||
433 				Opcode == X86_VPGATHERQDZrm ||
434 #endif
435 				false
436 				);
437 
438 		if (IndexIs128 || IndexIs256 || IndexIs512) {
439 			unsigned IndexOffset = insn->sibIndex -
440 				(insn->addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
441 			SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 :
442 				IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
443 
444 			insn->sibIndex = (SIBIndex)(IndexBase + (insn->sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
445 		}
446 
447 		if (insn->sibIndex != SIB_INDEX_NONE) {
448 			switch (insn->sibIndex) {
449 				default:
450 					//debug("Unexpected sibIndex");
451 					return true;
452 #define ENTRY(x)                                          \
453 				case SIB_INDEX_##x:                                 \
454 					indexReg = X86_##x; break;
455 					EA_BASES_32BIT
456 						EA_BASES_64BIT
457 						REGS_XMM
458 						REGS_YMM
459 						REGS_ZMM
460 #undef ENTRY
461 			}
462 		} else {
463 			indexReg = 0;
464 		}
465 
466 		scaleAmount = insn->sibScale;
467 	} else {
468 		switch (insn->eaBase) {
469 			case EA_BASE_NONE:
470 				if (insn->eaDisplacement == EA_DISP_NONE) {
471 					//debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
472 					return true;
473 				}
474 				if (insn->mode == MODE_64BIT) {
475 					if (insn->prefix3 == 0x67)	// address-size prefix overrides RIP relative addressing
476 						MCOperand_CreateReg0(mcInst, X86_EIP);
477 					else
478 						MCOperand_CreateReg0(mcInst, X86_RIP); // Section 2.2.1.6
479 				} else {
480 					MCOperand_CreateReg0(mcInst, 0);
481 				}
482 
483 				indexReg = 0;
484 				break;
485 			case EA_BASE_BX_SI:
486 				MCOperand_CreateReg0(mcInst, X86_BX);
487 				indexReg = X86_SI;
488 				break;
489 			case EA_BASE_BX_DI:
490 				MCOperand_CreateReg0(mcInst, X86_BX);
491 				indexReg = X86_DI;
492 				break;
493 			case EA_BASE_BP_SI:
494 				MCOperand_CreateReg0(mcInst, X86_BP);
495 				indexReg = X86_SI;
496 				break;
497 			case EA_BASE_BP_DI:
498 				MCOperand_CreateReg0(mcInst, X86_BP);
499 				indexReg = X86_DI;
500 				break;
501 			default:
502 				indexReg = 0;
503 				switch (insn->eaBase) {
504 					default:
505 						//debug("Unexpected eaBase");
506 						return true;
507 						// Here, we will use the fill-ins defined above.  However,
508 						//   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
509 						//   sib and sib64 were handled in the top-level if, so they're only
510 						//   placeholders to keep the compiler happy.
511 #define ENTRY(x)                                        \
512 					case EA_BASE_##x:                                 \
513 						  MCOperand_CreateReg0(mcInst, X86_##x); break;
514 						ALL_EA_BASES
515 #undef ENTRY
516 #define ENTRY(x) case EA_REG_##x:
517 							ALL_REGS
518 #undef ENTRY
519 							//debug("A R/M memory operand may not be a register; "
520 							//      "the base field must be a base.");
521 							return true;
522 				}
523 		}
524 
525 		scaleAmount = 1;
526 	}
527 
528 	MCOperand_CreateImm0(mcInst, scaleAmount);
529 	MCOperand_CreateReg0(mcInst, indexReg);
530 	MCOperand_CreateImm0(mcInst, insn->displacement);
531 
532 	MCOperand_CreateReg0(mcInst, segmentRegnums[insn->segmentOverride]);
533 
534 	return false;
535 }
536 
537 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
538 ///   byte of an instruction to LLVM form, and appends it to an MCInst.
539 ///
540 /// @param mcInst       - The MCInst to append to.
541 /// @param operand      - The operand, as stored in the descriptor table.
542 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
543 ///                       from.
544 /// @return             - 0 on success; nonzero otherwise
translateRM(MCInst * mcInst,const OperandSpecifier * operand,InternalInstruction * insn)545 static bool translateRM(MCInst *mcInst, const OperandSpecifier *operand,
546 		InternalInstruction *insn)
547 {
548 	switch (operand->type) {
549 		case TYPE_R8:
550 		case TYPE_R16:
551 		case TYPE_R32:
552 		case TYPE_R64:
553 		case TYPE_Rv:
554 		case TYPE_MM:
555 		case TYPE_MM32:
556 		case TYPE_MM64:
557 		case TYPE_XMM:
558 		case TYPE_XMM32:
559 		case TYPE_XMM64:
560 		case TYPE_XMM128:
561 		case TYPE_XMM256:
562 		case TYPE_XMM512:
563 		case TYPE_VK1:
564 		case TYPE_VK8:
565 		case TYPE_VK16:
566 		case TYPE_DEBUGREG:
567 		case TYPE_CONTROLREG:
568 			return translateRMRegister(mcInst, insn);
569 		case TYPE_M:
570 		case TYPE_M8:
571 		case TYPE_M16:
572 		case TYPE_M32:
573 		case TYPE_M64:
574 		case TYPE_M128:
575 		case TYPE_M256:
576 		case TYPE_M512:
577 		case TYPE_Mv:
578 		case TYPE_M32FP:
579 		case TYPE_M64FP:
580 		case TYPE_M80FP:
581 		case TYPE_M16INT:
582 		case TYPE_M32INT:
583 		case TYPE_M64INT:
584 		case TYPE_M1616:
585 		case TYPE_M1632:
586 		case TYPE_M1664:
587 		case TYPE_LEA:
588 			return translateRMMemory(mcInst, insn);
589 		default:
590 			//debug("Unexpected type for a R/M operand");
591 			return true;
592 	}
593 }
594 
595 /// translateFPRegister - Translates a stack position on the FPU stack to its
596 ///   LLVM form, and appends it to an MCInst.
597 ///
598 /// @param mcInst       - The MCInst to append to.
599 /// @param stackPos     - The stack position to translate.
translateFPRegister(MCInst * mcInst,uint8_t stackPos)600 static void translateFPRegister(MCInst *mcInst, uint8_t stackPos)
601 {
602 	MCOperand_CreateReg0(mcInst, X86_ST0 + stackPos);
603 }
604 
605 /// translateMaskRegister - Translates a 3-bit mask register number to
606 ///   LLVM form, and appends it to an MCInst.
607 ///
608 /// @param mcInst       - The MCInst to append to.
609 /// @param maskRegNum   - Number of mask register from 0 to 7.
610 /// @return             - false on success; true otherwise.
translateMaskRegister(MCInst * mcInst,uint8_t maskRegNum)611 static bool translateMaskRegister(MCInst *mcInst, uint8_t maskRegNum)
612 {
613 	if (maskRegNum >= 8) {
614 		// debug("Invalid mask register number");
615 		return true;
616 	}
617 
618 	MCOperand_CreateReg0(mcInst, X86_K0 + maskRegNum);
619 
620 	return false;
621 }
622 
623 /// translateOperand - Translates an operand stored in an internal instruction
624 ///   to LLVM's format and appends it to an MCInst.
625 ///
626 /// @param mcInst       - The MCInst to append to.
627 /// @param operand      - The operand, as stored in the descriptor table.
628 /// @param insn         - The internal instruction.
629 /// @return             - false on success; true otherwise.
translateOperand(MCInst * mcInst,const OperandSpecifier * operand,InternalInstruction * insn)630 static bool translateOperand(MCInst *mcInst, const OperandSpecifier *operand, InternalInstruction *insn)
631 {
632 	switch (operand->encoding) {
633 		case ENCODING_REG:
634 			translateRegister(mcInst, insn->reg);
635 			return false;
636 		case ENCODING_WRITEMASK:
637 			return translateMaskRegister(mcInst, insn->writemask);
638 		CASE_ENCODING_RM:
639 			return translateRM(mcInst, operand, insn);
640 		case ENCODING_CB:
641 		case ENCODING_CW:
642 		case ENCODING_CD:
643 		case ENCODING_CP:
644 		case ENCODING_CO:
645 		case ENCODING_CT:
646 			//debug("Translation of code offsets isn't supported.");
647 			return true;
648 		case ENCODING_IB:
649 		case ENCODING_IW:
650 		case ENCODING_ID:
651 		case ENCODING_IO:
652 		case ENCODING_Iv:
653 		case ENCODING_Ia:
654 			translateImmediate(mcInst, insn->immediates[insn->numImmediatesTranslated++], operand, insn);
655 			return false;
656 		case ENCODING_SI:
657 			return translateSrcIndex(mcInst, insn);
658 		case ENCODING_DI:
659 			return translateDstIndex(mcInst, insn);
660 		case ENCODING_RB:
661 		case ENCODING_RW:
662 		case ENCODING_RD:
663 		case ENCODING_RO:
664 		case ENCODING_Rv:
665 			translateRegister(mcInst, insn->opcodeRegister);
666 			return false;
667 		case ENCODING_FP:
668 			translateFPRegister(mcInst, insn->modRM & 7);
669 			return false;
670 		case ENCODING_VVVV:
671 			translateRegister(mcInst, insn->vvvv);
672 			return false;
673 		case ENCODING_DUP:
674 			return translateOperand(mcInst, &insn->operands[operand->type - TYPE_DUP0], insn);
675 		default:
676 			//debug("Unhandled operand encoding during translation");
677 			return true;
678 	}
679 }
680 
translateInstruction(MCInst * mcInst,InternalInstruction * insn)681 static bool translateInstruction(MCInst *mcInst, InternalInstruction *insn)
682 {
683 	int index;
684 
685 	if (!insn->spec) {
686 		//debug("Instruction has no specification");
687 		return true;
688 	}
689 
690 	MCInst_setOpcode(mcInst, insn->instructionID);
691 
692 	// If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
693 	// prefix bytes should be disassembled as xrelease and xacquire then set the
694 	// opcode to those instead of the rep and repne opcodes.
695 #ifndef CAPSTONE_X86_REDUCE
696 	if (insn->xAcquireRelease) {
697 		if (MCInst_getOpcode(mcInst) == X86_REP_PREFIX)
698 			MCInst_setOpcode(mcInst, X86_XRELEASE_PREFIX);
699 		else if (MCInst_getOpcode(mcInst) == X86_REPNE_PREFIX)
700 			MCInst_setOpcode(mcInst, X86_XACQUIRE_PREFIX);
701 	}
702 #endif
703 
704 	insn->numImmediatesTranslated = 0;
705 
706 	for (index = 0; index < X86_MAX_OPERANDS; ++index) {
707 		if (insn->operands[index].encoding != ENCODING_NONE) {
708 			if (translateOperand(mcInst, &insn->operands[index], insn)) {
709 				return true;
710 			}
711 		}
712 	}
713 
714 	return false;
715 }
716 
reader(const struct reader_info * info,uint8_t * byte,uint64_t address)717 static int reader(const struct reader_info *info, uint8_t *byte, uint64_t address)
718 {
719 	if (address - info->offset >= info->size)
720 		// out of buffer range
721 		return -1;
722 
723 	*byte = info->code[address - info->offset];
724 
725 	return 0;
726 }
727 
728 // copy x86 detail information from internal structure to public structure
update_pub_insn(cs_insn * pub,InternalInstruction * inter,uint8_t * prefixes)729 static void update_pub_insn(cs_insn *pub, InternalInstruction *inter, uint8_t *prefixes)
730 {
731 	prefixes[0] = inter->prefix0;
732 	prefixes[1] = inter->prefix1;
733 	prefixes[2] = inter->prefix2;
734 	prefixes[3] = inter->prefix3;
735 
736 	if (inter->vectorExtensionType != 0)
737 		memcpy(pub->detail->x86.opcode, inter->vectorExtensionPrefix, sizeof(pub->detail->x86.opcode));
738 	else {
739 		if (inter->twoByteEscape) {
740 			if (inter->threeByteEscape) {
741 				pub->detail->x86.opcode[0] = inter->twoByteEscape;
742 				pub->detail->x86.opcode[1] = inter->threeByteEscape;
743 				pub->detail->x86.opcode[2] = inter->opcode;
744 			} else {
745 				pub->detail->x86.opcode[0] = inter->twoByteEscape;
746 				pub->detail->x86.opcode[1] = inter->opcode;
747 			}
748 		} else {
749 				pub->detail->x86.opcode[0] = inter->opcode;
750 		}
751 	}
752 
753 	pub->detail->x86.rex = inter->rexPrefix;
754 
755 	pub->detail->x86.addr_size = inter->addressSize;
756 
757 	pub->detail->x86.modrm = inter->orgModRM;
758 	pub->detail->x86.sib = inter->sib;
759 	pub->detail->x86.disp = inter->displacement;
760 
761 	pub->detail->x86.sib_index = x86_map_sib_index(inter->sibIndex);
762 	pub->detail->x86.sib_scale = inter->sibScale;
763 	pub->detail->x86.sib_base = x86_map_sib_base(inter->sibBase);
764 }
765 
X86_init(MCRegisterInfo * MRI)766 void X86_init(MCRegisterInfo *MRI)
767 {
768 	/*
769 	   InitMCRegisterInfo(X86RegDesc, 234,
770 	   RA, PC,
771 	   X86MCRegisterClasses, 79,
772 	   X86RegUnitRoots, 119, X86RegDiffLists, X86RegStrings,
773 	   X86SubRegIdxLists, 7,
774 	   X86SubRegIdxRanges, X86RegEncodingTable);
775 	*/
776 
777 	MCRegisterInfo_InitMCRegisterInfo(MRI, X86RegDesc, 234,
778 			0, 0,
779 			X86MCRegisterClasses, 79,
780 			0, 0, X86RegDiffLists, 0,
781 			X86SubRegIdxLists, 7,
782 			0);
783 }
784 
785 // Public interface for the disassembler
X86_getInstruction(csh ud,const uint8_t * code,size_t code_len,MCInst * instr,uint16_t * size,uint64_t address,void * _info)786 bool X86_getInstruction(csh ud, const uint8_t *code, size_t code_len,
787 		MCInst *instr, uint16_t *size, uint64_t address, void *_info)
788 {
789 	cs_struct *handle = (cs_struct *)(uintptr_t)ud;
790 	InternalInstruction insn;
791 	struct reader_info info;
792 	int ret;
793 	bool result;
794 
795 	info.code = code;
796 	info.size = code_len;
797 	info.offset = address;
798 
799 	memset(&insn, 0, offsetof(InternalInstruction, reader));
800 
801 	if (instr->flat_insn->detail) {
802 		instr->flat_insn->detail->x86.op_count = 0;
803 		instr->flat_insn->detail->x86.sse_cc = X86_SSE_CC_INVALID;
804 		instr->flat_insn->detail->x86.avx_cc = X86_AVX_CC_INVALID;
805 		instr->flat_insn->detail->x86.avx_sae = false;
806 		instr->flat_insn->detail->x86.avx_rm = X86_AVX_RM_INVALID;
807 
808 		memset(instr->flat_insn->detail->x86.prefix, 0, sizeof(instr->flat_insn->detail->x86.prefix));
809 		memset(instr->flat_insn->detail->x86.opcode, 0, sizeof(instr->flat_insn->detail->x86.opcode));
810 		memset(instr->flat_insn->detail->x86.operands, 0, sizeof(instr->flat_insn->detail->x86.operands));
811 	}
812 
813 	if (handle->mode & CS_MODE_16)
814 		ret = decodeInstruction(&insn,
815 				reader, &info,
816 				address,
817 				MODE_16BIT);
818 	else if (handle->mode & CS_MODE_32)
819 		ret = decodeInstruction(&insn,
820 				reader, &info,
821 				address,
822 				MODE_32BIT);
823 	else
824 		ret = decodeInstruction(&insn,
825 				reader, &info,
826 				address,
827 				MODE_64BIT);
828 
829 	if (ret) {
830 		*size = (uint16_t)(insn.readerCursor - address);
831 
832 		return false;
833 	} else {
834 		*size = (uint16_t)insn.length;
835 
836 		result = (!translateInstruction(instr, &insn)) ?  true : false;
837 		if (result) {
838 			// quick fix for #904. TODO: fix this properly in the next update
839 			if (handle->mode & CS_MODE_64) {
840 				if (instr->Opcode == X86_LES16rm || instr->Opcode == X86_LES32rm)
841 					// LES is invalid in x64
842 					return false;
843 				if (instr->Opcode == X86_LDS16rm || instr->Opcode == X86_LDS32rm)
844 					// LDS is invalid in x64
845 					return false;
846 			}
847 
848 			instr->imm_size = insn.immSize;
849 			if (handle->detail) {
850 				update_pub_insn(instr->flat_insn, &insn, instr->x86_prefix);
851 			} else {
852 				// still copy all prefixes
853 				instr->x86_prefix[0] = insn.prefix0;
854 				instr->x86_prefix[1] = insn.prefix1;
855 				instr->x86_prefix[2] = insn.prefix2;
856 				instr->x86_prefix[3] = insn.prefix3;
857 			}
858 		}
859 
860 		return result;
861 	}
862 }
863 
864 #endif
865