1 /*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
2  *
3  *                     The LLVM Compiler Infrastructure
4  *
5  * This file is distributed under the University of Illinois Open Source
6  * License. See LICENSE.TXT for details.
7  *
8  *===----------------------------------------------------------------------===*
9  *
10  * This file is part of the X86 Disassembler.
11  * It contains the public interface of the instruction decoder.
12  * Documentation for the disassembler can be found in X86Disassembler.h.
13  *
14  *===----------------------------------------------------------------------===*/
15 
16 /* Capstone Disassembly Engine */
17 /* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2015 */
18 
19 #ifndef CS_X86_DISASSEMBLERDECODER_H
20 #define CS_X86_DISASSEMBLERDECODER_H
21 
22 #if defined(CAPSTONE_HAS_OSXKERNEL)
23 #include <libkern/libkern.h>
24 #else
25 #include <stdio.h>
26 #endif
27 
28 #include "X86DisassemblerDecoderCommon.h"
29 
30 /*
31  * Accessor functions for various fields of an Intel instruction
32  */
33 #define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
34 #define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
35 #define rmFromModRM(modRM)   ((modRM) & 0x7)
36 #define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
37 #define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
38 #define baseFromSIB(sib)     ((sib) & 0x7)
39 #define wFromREX(rex)        (((rex) & 0x8) >> 3)
40 #define rFromREX(rex)        (((rex) & 0x4) >> 2)
41 #define xFromREX(rex)        (((rex) & 0x2) >> 1)
42 #define bFromREX(rex)        ((rex) & 0x1)
43 
44 #define rFromEVEX2of4(evex)     (((~(evex)) & 0x80) >> 7)
45 #define xFromEVEX2of4(evex)     (((~(evex)) & 0x40) >> 6)
46 #define bFromEVEX2of4(evex)     (((~(evex)) & 0x20) >> 5)
47 #define r2FromEVEX2of4(evex)    (((~(evex)) & 0x10) >> 4)
48 #define mmFromEVEX2of4(evex)    ((evex) & 0x3)
49 #define wFromEVEX3of4(evex)     (((evex) & 0x80) >> 7)
50 #define vvvvFromEVEX3of4(evex)  (((~(evex)) & 0x78) >> 3)
51 #define ppFromEVEX3of4(evex)    ((evex) & 0x3)
52 #define zFromEVEX4of4(evex)     (((evex) & 0x80) >> 7)
53 #define l2FromEVEX4of4(evex)    (((evex) & 0x40) >> 6)
54 #define lFromEVEX4of4(evex)     (((evex) & 0x20) >> 5)
55 #define bFromEVEX4of4(evex)     (((evex) & 0x10) >> 4)
56 #define v2FromEVEX4of4(evex)    (((~evex) & 0x8) >> 3)
57 #define aaaFromEVEX4of4(evex)   ((evex) & 0x7)
58 
59 #define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
60 #define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
61 #define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
62 #define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
63 #define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
64 #define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
65 #define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
66 #define ppFromVEX3of3(vex)      ((vex) & 0x3)
67 
68 #define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
69 #define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
70 #define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
71 #define ppFromVEX2of2(vex)      ((vex) & 0x3)
72 
73 #define rFromXOP2of3(xop)       (((~(xop)) & 0x80) >> 7)
74 #define xFromXOP2of3(xop)       (((~(xop)) & 0x40) >> 6)
75 #define bFromXOP2of3(xop)       (((~(xop)) & 0x20) >> 5)
76 #define mmmmmFromXOP2of3(xop)   ((xop) & 0x1f)
77 #define wFromXOP3of3(xop)       (((xop) & 0x80) >> 7)
78 #define vvvvFromXOP3of3(vex)    (((~(vex)) & 0x78) >> 3)
79 #define lFromXOP3of3(xop)       (((xop) & 0x4) >> 2)
80 #define ppFromXOP3of3(xop)      ((xop) & 0x3)
81 
82 /*
83  * These enums represent Intel registers for use by the decoder.
84  */
85 
86 #define REGS_8BIT     \
87   ENTRY(AL)           \
88   ENTRY(CL)           \
89   ENTRY(DL)           \
90   ENTRY(BL)           \
91   ENTRY(AH)           \
92   ENTRY(CH)           \
93   ENTRY(DH)           \
94   ENTRY(BH)           \
95   ENTRY(R8B)          \
96   ENTRY(R9B)          \
97   ENTRY(R10B)         \
98   ENTRY(R11B)         \
99   ENTRY(R12B)         \
100   ENTRY(R13B)         \
101   ENTRY(R14B)         \
102   ENTRY(R15B)         \
103   ENTRY(SPL)          \
104   ENTRY(BPL)          \
105   ENTRY(SIL)          \
106   ENTRY(DIL)
107 
108 #define EA_BASES_16BIT  \
109   ENTRY(BX_SI)          \
110   ENTRY(BX_DI)          \
111   ENTRY(BP_SI)          \
112   ENTRY(BP_DI)          \
113   ENTRY(SI)             \
114   ENTRY(DI)             \
115   ENTRY(BP)             \
116   ENTRY(BX)             \
117   ENTRY(R8W)            \
118   ENTRY(R9W)            \
119   ENTRY(R10W)           \
120   ENTRY(R11W)           \
121   ENTRY(R12W)           \
122   ENTRY(R13W)           \
123   ENTRY(R14W)           \
124   ENTRY(R15W)
125 
126 #define REGS_16BIT    \
127   ENTRY(AX)           \
128   ENTRY(CX)           \
129   ENTRY(DX)           \
130   ENTRY(BX)           \
131   ENTRY(SP)           \
132   ENTRY(BP)           \
133   ENTRY(SI)           \
134   ENTRY(DI)           \
135   ENTRY(R8W)          \
136   ENTRY(R9W)          \
137   ENTRY(R10W)         \
138   ENTRY(R11W)         \
139   ENTRY(R12W)         \
140   ENTRY(R13W)         \
141   ENTRY(R14W)         \
142   ENTRY(R15W)
143 
144 #define EA_BASES_32BIT  \
145   ENTRY(EAX)            \
146   ENTRY(ECX)            \
147   ENTRY(EDX)            \
148   ENTRY(EBX)            \
149   ENTRY(sib)            \
150   ENTRY(EBP)            \
151   ENTRY(ESI)            \
152   ENTRY(EDI)            \
153   ENTRY(R8D)            \
154   ENTRY(R9D)            \
155   ENTRY(R10D)           \
156   ENTRY(R11D)           \
157   ENTRY(R12D)           \
158   ENTRY(R13D)           \
159   ENTRY(R14D)           \
160   ENTRY(R15D)
161 
162 #define REGS_32BIT  \
163   ENTRY(EAX)        \
164   ENTRY(ECX)        \
165   ENTRY(EDX)        \
166   ENTRY(EBX)        \
167   ENTRY(ESP)        \
168   ENTRY(EBP)        \
169   ENTRY(ESI)        \
170   ENTRY(EDI)        \
171   ENTRY(R8D)        \
172   ENTRY(R9D)        \
173   ENTRY(R10D)       \
174   ENTRY(R11D)       \
175   ENTRY(R12D)       \
176   ENTRY(R13D)       \
177   ENTRY(R14D)       \
178   ENTRY(R15D)
179 
180 #define EA_BASES_64BIT  \
181   ENTRY(RAX)            \
182   ENTRY(RCX)            \
183   ENTRY(RDX)            \
184   ENTRY(RBX)            \
185   ENTRY(sib64)          \
186   ENTRY(RBP)            \
187   ENTRY(RSI)            \
188   ENTRY(RDI)            \
189   ENTRY(R8)             \
190   ENTRY(R9)             \
191   ENTRY(R10)            \
192   ENTRY(R11)            \
193   ENTRY(R12)            \
194   ENTRY(R13)            \
195   ENTRY(R14)            \
196   ENTRY(R15)
197 
198 #define REGS_64BIT  \
199   ENTRY(RAX)        \
200   ENTRY(RCX)        \
201   ENTRY(RDX)        \
202   ENTRY(RBX)        \
203   ENTRY(RSP)        \
204   ENTRY(RBP)        \
205   ENTRY(RSI)        \
206   ENTRY(RDI)        \
207   ENTRY(R8)         \
208   ENTRY(R9)         \
209   ENTRY(R10)        \
210   ENTRY(R11)        \
211   ENTRY(R12)        \
212   ENTRY(R13)        \
213   ENTRY(R14)        \
214   ENTRY(R15)
215 
216 #define REGS_MMX  \
217   ENTRY(MM0)      \
218   ENTRY(MM1)      \
219   ENTRY(MM2)      \
220   ENTRY(MM3)      \
221   ENTRY(MM4)      \
222   ENTRY(MM5)      \
223   ENTRY(MM6)      \
224   ENTRY(MM7)
225 
226 #define REGS_XMM  \
227   ENTRY(XMM0)     \
228   ENTRY(XMM1)     \
229   ENTRY(XMM2)     \
230   ENTRY(XMM3)     \
231   ENTRY(XMM4)     \
232   ENTRY(XMM5)     \
233   ENTRY(XMM6)     \
234   ENTRY(XMM7)     \
235   ENTRY(XMM8)     \
236   ENTRY(XMM9)     \
237   ENTRY(XMM10)    \
238   ENTRY(XMM11)    \
239   ENTRY(XMM12)    \
240   ENTRY(XMM13)    \
241   ENTRY(XMM14)    \
242   ENTRY(XMM15)    \
243   ENTRY(XMM16)    \
244   ENTRY(XMM17)    \
245   ENTRY(XMM18)    \
246   ENTRY(XMM19)    \
247   ENTRY(XMM20)    \
248   ENTRY(XMM21)    \
249   ENTRY(XMM22)    \
250   ENTRY(XMM23)    \
251   ENTRY(XMM24)    \
252   ENTRY(XMM25)    \
253   ENTRY(XMM26)    \
254   ENTRY(XMM27)    \
255   ENTRY(XMM28)    \
256   ENTRY(XMM29)    \
257   ENTRY(XMM30)    \
258   ENTRY(XMM31)
259 
260 
261 #define REGS_YMM  \
262   ENTRY(YMM0)     \
263   ENTRY(YMM1)     \
264   ENTRY(YMM2)     \
265   ENTRY(YMM3)     \
266   ENTRY(YMM4)     \
267   ENTRY(YMM5)     \
268   ENTRY(YMM6)     \
269   ENTRY(YMM7)     \
270   ENTRY(YMM8)     \
271   ENTRY(YMM9)     \
272   ENTRY(YMM10)    \
273   ENTRY(YMM11)    \
274   ENTRY(YMM12)    \
275   ENTRY(YMM13)    \
276   ENTRY(YMM14)    \
277   ENTRY(YMM15)    \
278   ENTRY(YMM16)    \
279   ENTRY(YMM17)    \
280   ENTRY(YMM18)    \
281   ENTRY(YMM19)    \
282   ENTRY(YMM20)    \
283   ENTRY(YMM21)    \
284   ENTRY(YMM22)    \
285   ENTRY(YMM23)    \
286   ENTRY(YMM24)    \
287   ENTRY(YMM25)    \
288   ENTRY(YMM26)    \
289   ENTRY(YMM27)    \
290   ENTRY(YMM28)    \
291   ENTRY(YMM29)    \
292   ENTRY(YMM30)    \
293   ENTRY(YMM31)
294 
295 #define REGS_ZMM  \
296   ENTRY(ZMM0)     \
297   ENTRY(ZMM1)     \
298   ENTRY(ZMM2)     \
299   ENTRY(ZMM3)     \
300   ENTRY(ZMM4)     \
301   ENTRY(ZMM5)     \
302   ENTRY(ZMM6)     \
303   ENTRY(ZMM7)     \
304   ENTRY(ZMM8)     \
305   ENTRY(ZMM9)     \
306   ENTRY(ZMM10)    \
307   ENTRY(ZMM11)    \
308   ENTRY(ZMM12)    \
309   ENTRY(ZMM13)    \
310   ENTRY(ZMM14)    \
311   ENTRY(ZMM15)    \
312   ENTRY(ZMM16)    \
313   ENTRY(ZMM17)    \
314   ENTRY(ZMM18)    \
315   ENTRY(ZMM19)    \
316   ENTRY(ZMM20)    \
317   ENTRY(ZMM21)    \
318   ENTRY(ZMM22)    \
319   ENTRY(ZMM23)    \
320   ENTRY(ZMM24)    \
321   ENTRY(ZMM25)    \
322   ENTRY(ZMM26)    \
323   ENTRY(ZMM27)    \
324   ENTRY(ZMM28)    \
325   ENTRY(ZMM29)    \
326   ENTRY(ZMM30)    \
327   ENTRY(ZMM31)
328 
329 #define REGS_MASKS \
330   ENTRY(K0)        \
331   ENTRY(K1)        \
332   ENTRY(K2)        \
333   ENTRY(K3)        \
334   ENTRY(K4)        \
335   ENTRY(K5)        \
336   ENTRY(K6)        \
337   ENTRY(K7)
338 
339 #define REGS_SEGMENT \
340   ENTRY(ES)          \
341   ENTRY(CS)          \
342   ENTRY(SS)          \
343   ENTRY(DS)          \
344   ENTRY(FS)          \
345   ENTRY(GS)
346 
347 #define REGS_DEBUG  \
348   ENTRY(DR0)        \
349   ENTRY(DR1)        \
350   ENTRY(DR2)        \
351   ENTRY(DR3)        \
352   ENTRY(DR4)        \
353   ENTRY(DR5)        \
354   ENTRY(DR6)        \
355   ENTRY(DR7)        \
356   ENTRY(DR8)        \
357   ENTRY(DR9)        \
358   ENTRY(DR10)        \
359   ENTRY(DR11)        \
360   ENTRY(DR12)        \
361   ENTRY(DR13)        \
362   ENTRY(DR14)        \
363   ENTRY(DR15)
364 
365 #define REGS_CONTROL  \
366   ENTRY(CR0)          \
367   ENTRY(CR1)          \
368   ENTRY(CR2)          \
369   ENTRY(CR3)          \
370   ENTRY(CR4)          \
371   ENTRY(CR5)          \
372   ENTRY(CR6)          \
373   ENTRY(CR7)          \
374   ENTRY(CR8)          \
375   ENTRY(CR9)          \
376   ENTRY(CR10)          \
377   ENTRY(CR11)          \
378   ENTRY(CR12)          \
379   ENTRY(CR13)          \
380   ENTRY(CR14)          \
381   ENTRY(CR15)
382 
383 #define ALL_EA_BASES  \
384   EA_BASES_16BIT      \
385   EA_BASES_32BIT      \
386   EA_BASES_64BIT
387 
388 #define ALL_SIB_BASES \
389   REGS_32BIT          \
390   REGS_64BIT
391 
392 #define ALL_REGS      \
393   REGS_8BIT           \
394   REGS_16BIT          \
395   REGS_32BIT          \
396   REGS_64BIT          \
397   REGS_MMX            \
398   REGS_XMM            \
399   REGS_YMM            \
400   REGS_ZMM            \
401   REGS_MASKS          \
402   REGS_SEGMENT        \
403   REGS_DEBUG          \
404   REGS_CONTROL        \
405   ENTRY(RIP)
406 
407 /*
408  * EABase - All possible values of the base field for effective-address
409  *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
410  *   distinguish between bases (EA_BASE_*) and registers that just happen to be
411  *   referred to when Mod == 0b11 (EA_REG_*).
412  */
413 typedef enum {
414   EA_BASE_NONE,
415 #define ENTRY(x) EA_BASE_##x,
416   ALL_EA_BASES
417 #undef ENTRY
418 #define ENTRY(x) EA_REG_##x,
419   ALL_REGS
420 #undef ENTRY
421   EA_max
422 } EABase;
423 
424 /*
425  * SIBIndex - All possible values of the SIB index field.
426  *   Borrows entries from ALL_EA_BASES with the special case that
427  *   sib is synonymous with NONE.
428  * Vector SIB: index can be XMM or YMM.
429  */
430 typedef enum {
431   SIB_INDEX_NONE,
432 #define ENTRY(x) SIB_INDEX_##x,
433   ALL_EA_BASES
434   REGS_XMM
435   REGS_YMM
436   REGS_ZMM
437 #undef ENTRY
438   SIB_INDEX_max
439 } SIBIndex;
440 
441 /*
442  * SIBBase - All possible values of the SIB base field.
443  */
444 typedef enum {
445   SIB_BASE_NONE,
446 #define ENTRY(x) SIB_BASE_##x,
447   ALL_SIB_BASES
448 #undef ENTRY
449   SIB_BASE_max
450 } SIBBase;
451 
452 /*
453  * EADisplacement - Possible displacement types for effective-address
454  *   computations.
455  */
456 typedef enum {
457   EA_DISP_NONE,
458   EA_DISP_8,
459   EA_DISP_16,
460   EA_DISP_32
461 } EADisplacement;
462 
463 /*
464  * Reg - All possible values of the reg field in the ModR/M byte.
465  */
466 typedef enum {
467 #define ENTRY(x) MODRM_REG_##x,
468   ALL_REGS
469 #undef ENTRY
470   MODRM_REG_max
471 } Reg;
472 
473 /*
474  * SegmentOverride - All possible segment overrides.
475  */
476 typedef enum {
477   SEG_OVERRIDE_NONE,
478   SEG_OVERRIDE_CS,
479   SEG_OVERRIDE_SS,
480   SEG_OVERRIDE_DS,
481   SEG_OVERRIDE_ES,
482   SEG_OVERRIDE_FS,
483   SEG_OVERRIDE_GS,
484   SEG_OVERRIDE_max
485 } SegmentOverride;
486 
487 /*
488  * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
489  */
490 typedef enum {
491   VEX_LOB_0F = 0x1,
492   VEX_LOB_0F38 = 0x2,
493   VEX_LOB_0F3A = 0x3
494 } VEXLeadingOpcodeByte;
495 
496 typedef enum {
497   XOP_MAP_SELECT_8 = 0x8,
498   XOP_MAP_SELECT_9 = 0x9,
499   XOP_MAP_SELECT_A = 0xA
500 } XOPMapSelect;
501 
502 /*
503  * VEXPrefixCode - Possible values for the VEX.pp/EVEX.pp field
504  */
505 typedef enum {
506   VEX_PREFIX_NONE = 0x0,
507   VEX_PREFIX_66 = 0x1,
508   VEX_PREFIX_F3 = 0x2,
509   VEX_PREFIX_F2 = 0x3
510 } VEXPrefixCode;
511 
512 typedef enum {
513 	TYPE_NO_VEX_XOP   = 0x0,
514 	TYPE_VEX_2B       = 0x1,
515 	TYPE_VEX_3B       = 0x2,
516 	TYPE_EVEX         = 0x3,
517 	TYPE_XOP          = 0x4
518 } VectorExtensionType;
519 
520 struct reader_info {
521 	const uint8_t *code;
522 	uint64_t size;
523 	uint64_t offset;
524 };
525 
526 /*
527  * byteReader_t - Type for the byte reader that the consumer must provide to
528  *   the decoder.  Reads a single byte from the instruction's address space.
529  * @param arg     - A baton that the consumer can associate with any internal
530  *                  state that it needs.
531  * @param byte    - A pointer to a single byte in memory that should be set to
532  *                  contain the value at address.
533  * @param address - The address in the instruction's address space that should
534  *                  be read from.
535  * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
536  */
537 typedef int (*byteReader_t)(const struct reader_info *arg, uint8_t* byte, uint64_t address);
538 
539 /*
540  * dlog_t - Type for the logging function that the consumer can provide to
541  *   get debugging output from the decoder.
542  * @param arg     - A baton that the consumer can associate with any internal
543  *                  state that it needs.
544  * @param log     - A string that contains the message.  Will be reused after
545  *                  the logger returns.
546  */
547 typedef void (*dlog_t)(void* arg, const char *log);
548 
549 /// The specification for how to extract and interpret a full instruction and
550 /// its operands.
551 struct InstructionSpecifier {
552 	uint16_t operands;
553 };
554 
555 /*
556  * The x86 internal instruction, which is produced by the decoder.
557  */
558 typedef struct InternalInstruction {
559   // from here, all members must be initialized to ZERO to work properly
560   uint8_t operandSize;
561   uint8_t prefix0, prefix1, prefix2, prefix3;
562   /* true if the prefix byte corresponding to the entry is present; false if not */
563   bool isPrefix26;
564   bool isPrefix2e;
565   bool isPrefix36;
566   bool isPrefix3e;
567   bool isPrefix64;
568   bool isPrefix65;
569   bool isPrefix66;
570   bool isPrefix67;
571   bool isPrefixf0;
572   bool isPrefixf2;
573   bool isPrefixf3;
574   /* contains the location (for use with the reader) of the prefix byte */
575   uint64_t prefix26;
576   uint64_t prefix2e;
577   uint64_t prefix36;
578   uint64_t prefix3e;
579   uint64_t prefix64;
580   uint64_t prefix65;
581   uint64_t prefix66;
582   uint64_t prefix67;
583   uint64_t prefixf0;
584   uint64_t prefixf2;
585   uint64_t prefixf3;
586   /* The value of the REX prefix, if present */
587   uint8_t rexPrefix;
588   /* The segment override type */
589   SegmentOverride segmentOverride;
590   bool                          consumedModRM;
591   uint8_t                       orgModRM;  // save original modRM because we will modify modRM
592   /* The SIB byte, used for more complex 32- or 64-bit memory operands */
593   bool                          consumedSIB;
594   uint8_t                       sib;
595   /* The displacement, used for memory operands */
596   bool                          consumedDisplacement;
597   int64_t                       displacement;
598   /* The value of the two-byte escape prefix (usually 0x0f) */
599   uint8_t twoByteEscape;
600   /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
601   uint8_t threeByteEscape;
602   /* SIB state */
603   SIBIndex                      sibIndex;
604   uint8_t                       sibScale;
605   SIBBase                       sibBase;
606   uint8_t                       numImmediatesConsumed;
607   /* true if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease */
608   bool xAcquireRelease;
609 
610   /* The value of the vector extension prefix(EVEX/VEX/XOP), if present */
611   uint8_t vectorExtensionPrefix[4];
612 
613   /* Offsets from the start of the instruction to the pieces of data, which is
614      needed to find relocation entries for adding symbolic operands */
615   uint8_t displacementOffset;
616   uint8_t immediateOffset;
617   uint8_t modRMOffset;
618 
619   // end-of-zero-members
620 
621   /* Reader interface (C) */
622   byteReader_t reader;
623 
624   /* Opaque value passed to the reader */
625   const void* readerArg;
626   /* The address of the next byte to read via the reader */
627   uint64_t readerCursor;
628 
629   /* Logger interface (C) */
630   dlog_t dlog;
631   /* Opaque value passed to the logger */
632   void* dlogArg;
633 
634   /* General instruction information */
635 
636   /* The mode to disassemble for (64-bit, protected, real) */
637   DisassemblerMode mode;
638   /* The start of the instruction, usable with the reader */
639   uint64_t startLocation;
640   /* The length of the instruction, in bytes */
641   size_t length;
642 
643   /* Prefix state */
644 
645   /* The type of the vector extension prefix */
646   VectorExtensionType vectorExtensionType;
647 
648   /* The location where a mandatory prefix would have to be (i.e., right before
649 	 the opcode, or right before the REX prefix if one is present) */
650   uint64_t necessaryPrefixLocation;
651 
652   /* Sizes of various critical pieces of data, in bytes */
653   uint8_t registerSize;
654   uint8_t addressSize;
655   uint8_t displacementSize;
656   uint8_t immediateSize;
657 
658   uint8_t immSize;	// immediate size for X86_OP_IMM operand
659 
660   /* opcode state */
661 
662   /* The last byte of the opcode, not counting any ModR/M extension */
663   uint8_t opcode;
664 
665   /* decode state */
666 
667   /* The type of opcode, used for indexing into the array of decode tables */
668   OpcodeType opcodeType;
669   /* The instruction ID, extracted from the decode table */
670   uint16_t instructionID;
671   /* The specifier for the instruction, from the instruction info table */
672   const struct InstructionSpecifier *spec;
673 
674   /* state for additional bytes, consumed during operand decode.  Pattern:
675      consumed___ indicates that the byte was already consumed and does not
676      need to be consumed again */
677 
678   /* The VEX.vvvv field, which contains a third register operand for some AVX
679      instructions */
680   Reg                           vvvv;
681 
682   /* The writemask for AVX-512 instructions which is contained in EVEX.aaa */
683   Reg                           writemask;
684 
685   /* The ModR/M byte, which contains most register operands and some portion of
686      all memory operands */
687   uint8_t                       modRM;
688 
689   // special data to handle MOVcr, MOVdr, MOVrc, MOVrd
690   uint8_t                       firstByte;     // save the first byte in stream
691 
692   /* Immediates.  There can be two in some cases */
693   uint8_t                       numImmediatesTranslated;
694   uint64_t                      immediates[2];
695 
696   /* A register or immediate operand encoded into the opcode */
697   Reg                           opcodeRegister;
698 
699   /* Portions of the ModR/M byte */
700 
701   /* These fields determine the allowable values for the ModR/M fields, which
702      depend on operand and address widths */
703   EABase                        eaBaseBase;
704   EABase                        eaRegBase;
705   Reg                           regBase;
706 
707   /* The Mod and R/M fields can encode a base for an effective address, or a
708      register.  These are separated into two fields here */
709   EABase                        eaBase;
710   EADisplacement                eaDisplacement;
711   /* The reg field always encodes a register */
712   Reg                           reg;
713 
714   const struct OperandSpecifier *operands;
715 } InternalInstruction;
716 
717 /* decodeInstruction - Decode one instruction and store the decoding results in
718  *   a buffer provided by the consumer.
719  * @param insn      - The buffer to store the instruction in.  Allocated by the
720  *                    consumer.
721  * @param reader    - The byteReader_t for the bytes to be read.
722  * @param readerArg - An argument to pass to the reader for storing context
723  *                    specific to the consumer.  May be NULL.
724  * @param logger    - The dlog_t to be used in printing status messages from the
725  *                    disassembler.  May be NULL.
726  * @param loggerArg - An argument to pass to the logger for storing context
727  *                    specific to the logger.  May be NULL.
728  * @param startLoc  - The address (in the reader's address space) of the first
729  *                    byte in the instruction.
730  * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
731  * @return          - Nonzero if there was an error during decode, 0 otherwise.
732  */
733 int decodeInstruction(struct InternalInstruction* insn,
734                       byteReader_t reader,
735                       const void* readerArg,
736                       uint64_t startLoc,
737                       DisassemblerMode mode);
738 
739 //const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
740 
741 #endif
742