1 // Capstone Java binding
2 // By Nguyen Anh Quynh & Dang Hoang Vu,  2013
3 
4 package capstone;
5 
6 import com.sun.jna.Library;
7 import com.sun.jna.Native;
8 import com.sun.jna.NativeLong;
9 import com.sun.jna.ptr.NativeLongByReference;
10 import com.sun.jna.Structure;
11 import com.sun.jna.Union;
12 import com.sun.jna.Pointer;
13 import com.sun.jna.ptr.PointerByReference;
14 import com.sun.jna.ptr.IntByReference;
15 
16 import java.util.List;
17 import java.util.Arrays;
18 import java.lang.RuntimeException;
19 
20 public class Capstone {
21 
22   protected static abstract class OpInfo {};
23   protected static abstract class UnionOpInfo extends Structure {};
24 
25   public static class UnionArch extends Union {
26     public static class ByValue extends UnionArch implements Union.ByValue {};
27 
28     public Arm.UnionOpInfo arm;
29     public Arm64.UnionOpInfo arm64;
30     public X86.UnionOpInfo x86;
31     public Mips.UnionOpInfo mips;
32     public Ppc.UnionOpInfo ppc;
33     public Sparc.UnionOpInfo sparc;
34     public Systemz.UnionOpInfo sysz;
35     public Xcore.UnionOpInfo xcore;
36   }
37 
38   protected static class _cs_insn extends Structure {
39     // instruction ID.
40     public int id;
41     // instruction address.
42     public long address;
43     // instruction size.
44     public short size;
45     // machine bytes of instruction.
46     public byte[] bytes;
47     // instruction mnemonic. NOTE: irrelevant for diet engine.
48     public byte[] mnemonic;
49     // instruction operands. NOTE: irrelevant for diet engine.
50     public byte[] op_str;
51     // detail information of instruction.
52     public _cs_detail.ByReference cs_detail;
53 
_cs_insn()54     public _cs_insn() {
55       bytes = new byte[16];
56       mnemonic = new byte[32];
57       op_str = new byte[160];
58       java.util.Arrays.fill(mnemonic, (byte) 0);
59       java.util.Arrays.fill(op_str, (byte) 0);
60     }
61 
_cs_insn(Pointer p)62     public _cs_insn(Pointer p) {
63       this();
64       useMemory(p);
65       read();
66     }
67 
68     @Override
getFieldOrder()69     public List getFieldOrder() {
70       return Arrays.asList("id", "address", "size", "bytes", "mnemonic", "op_str", "cs_detail");
71     }
72   }
73 
74   protected static class _cs_detail extends Structure {
75     public static class ByReference extends _cs_detail implements Structure.ByReference {};
76 
77     // list of all implicit registers being read.
78     public byte[] regs_read = new byte[12];
79     public byte regs_read_count;
80     // list of all implicit registers being written.
81     public byte[] regs_write = new byte[20];
82     public byte regs_write_count;
83     // list of semantic groups this instruction belongs to.
84     public byte[] groups = new byte[8];
85     public byte groups_count;
86 
87     public UnionArch arch;
88 
89     @Override
getFieldOrder()90     public List getFieldOrder() {
91       return Arrays.asList("regs_read", "regs_read_count", "regs_write", "regs_write_count", "groups", "groups_count", "arch");
92     }
93   }
94 
95   public static class CsInsn {
96     private NativeLong csh;
97     private CS cs;
98     private _cs_insn raw;
99     private int arch;
100 
101     // instruction ID.
102     public int id;
103     // instruction address.
104     public long address;
105     // instruction size.
106     public short size;
107     // instruction mnemonic. NOTE: irrelevant for diet engine.
108     public String mnemonic;
109     // instruction operands. NOTE: irrelevant for diet engine.
110     public String opStr;
111     // list of all implicit registers being read.
112     public byte[] regsRead;
113     // list of all implicit registers being written.
114     public byte[] regsWrite;
115     // list of semantic groups this instruction belongs to.
116     public byte[] groups;
117     public OpInfo operands;
118 
CsInsn(_cs_insn insn, int _arch, NativeLong _csh, CS _cs, boolean diet)119     public CsInsn (_cs_insn insn, int _arch, NativeLong _csh, CS _cs, boolean diet) {
120       id = insn.id;
121       address = insn.address;
122       size = insn.size;
123 
124       if (!diet) {
125         int lm = 0;
126         while (insn.mnemonic[lm++] != 0);
127         int lo = 0;
128         while (insn.op_str[lo++] != 0);
129         mnemonic = new String(insn.mnemonic, 0, lm-1);
130         opStr = new String(insn.op_str, 0, lo-1);
131       }
132 
133       cs = _cs;
134       arch = _arch;
135       raw = insn;
136       csh = _csh;
137 
138       if (insn.cs_detail != null) {
139         if (!diet) {
140           regsRead = new byte[insn.cs_detail.regs_read_count];
141           for (int i=0; i<regsRead.length; i++)
142             regsRead[i] = insn.cs_detail.regs_read[i];
143           regsWrite = new byte[insn.cs_detail.regs_write_count];
144           for (int i=0; i<regsWrite.length; i++)
145             regsWrite[i] = insn.cs_detail.regs_write[i];
146           groups = new byte[insn.cs_detail.groups_count];
147           for (int i=0; i<groups.length; i++)
148             groups[i] = insn.cs_detail.groups[i];
149         }
150 
151         operands = getOptInfo(insn.cs_detail);
152       }
153     }
154 
getOptInfo(_cs_detail detail)155     private OpInfo getOptInfo(_cs_detail detail) {
156       OpInfo op_info = null;
157 
158       switch (this.arch) {
159         case CS_ARCH_ARM:
160           detail.arch.setType(Arm.UnionOpInfo.class);
161           detail.arch.read();
162           op_info = new Arm.OpInfo((Arm.UnionOpInfo) detail.arch.arm);
163           break;
164         case CS_ARCH_ARM64:
165           detail.arch.setType(Arm64.UnionOpInfo.class);
166           detail.arch.read();
167           op_info = new Arm64.OpInfo((Arm64.UnionOpInfo) detail.arch.arm64);
168           break;
169         case CS_ARCH_MIPS:
170           detail.arch.setType(Mips.UnionOpInfo.class);
171           detail.arch.read();
172           op_info = new Mips.OpInfo((Mips.UnionOpInfo) detail.arch.mips);
173           break;
174         case CS_ARCH_X86:
175           detail.arch.setType(X86.UnionOpInfo.class);
176           detail.arch.read();
177           op_info = new X86.OpInfo((X86.UnionOpInfo) detail.arch.x86);
178           break;
179         case CS_ARCH_SPARC:
180           detail.arch.setType(Sparc.UnionOpInfo.class);
181           detail.arch.read();
182           op_info = new Sparc.OpInfo((Sparc.UnionOpInfo) detail.arch.sparc);
183           break;
184         case CS_ARCH_SYSZ:
185           detail.arch.setType(Systemz.UnionOpInfo.class);
186           detail.arch.read();
187           op_info = new Systemz.OpInfo((Systemz.UnionOpInfo) detail.arch.sysz);
188           break;
189         case CS_ARCH_PPC:
190           detail.arch.setType(Ppc.UnionOpInfo.class);
191           detail.arch.read();
192           op_info = new Ppc.OpInfo((Ppc.UnionOpInfo) detail.arch.ppc);
193           break;
194         case CS_ARCH_XCORE:
195           detail.arch.setType(Xcore.UnionOpInfo.class);
196           detail.arch.read();
197           op_info = new Xcore.OpInfo((Xcore.UnionOpInfo) detail.arch.xcore);
198           break;
199         default:
200       }
201 
202       return op_info;
203     }
204 
opCount(int type)205     public int opCount(int type) {
206       return cs.cs_op_count(csh, raw.getPointer(), type);
207     }
208 
opIndex(int type, int index)209     public int opIndex(int type, int index) {
210       return cs.cs_op_index(csh, raw.getPointer(), type, index);
211     }
212 
regRead(int reg_id)213     public boolean regRead(int reg_id) {
214       return cs.cs_reg_read(csh, raw.getPointer(), reg_id) != 0;
215     }
216 
regWrite(int reg_id)217     public boolean regWrite(int reg_id) {
218       return cs.cs_reg_write(csh, raw.getPointer(), reg_id) != 0;
219     }
220 
errno()221     public int errno() {
222       return cs.cs_errno(csh);
223     }
224 
regName(int reg_id)225     public String regName(int reg_id) {
226       return cs.cs_reg_name(csh, reg_id);
227     }
228 
insnName()229     public String insnName() {
230       return cs.cs_insn_name(csh, id);
231     }
232 
groupName(int id)233     public String groupName(int id) {
234       return cs.cs_group_name(csh, id);
235     }
236 
group(int gid)237     public boolean group(int gid) {
238       return cs.cs_insn_group(csh, raw.getPointer(), gid) != 0;
239     }
240 
241   }
242 
fromArrayRaw(_cs_insn[] arr_raw)243   private CsInsn[] fromArrayRaw(_cs_insn[] arr_raw) {
244     CsInsn[] arr = new CsInsn[arr_raw.length];
245 
246     for (int i = 0; i < arr_raw.length; i++) {
247       arr[i] = new CsInsn(arr_raw[i], this.arch, ns.csh, cs, this.diet);
248     }
249 
250     return arr;
251   }
252 
253   private interface CS extends Library {
cs_open(int arch, int mode, NativeLongByReference handle)254     public int cs_open(int arch, int mode, NativeLongByReference handle);
cs_disasm(NativeLong handle, byte[] code, NativeLong code_len, long addr, NativeLong count, PointerByReference insn)255     public NativeLong cs_disasm(NativeLong handle, byte[] code, NativeLong code_len,
256         long addr, NativeLong count, PointerByReference insn);
cs_free(Pointer p, NativeLong count)257     public void cs_free(Pointer p, NativeLong count);
cs_close(NativeLongByReference handle)258     public int cs_close(NativeLongByReference handle);
cs_option(NativeLong handle, int option, NativeLong optionValue)259     public int cs_option(NativeLong handle, int option, NativeLong optionValue);
260 
cs_reg_name(NativeLong csh, int id)261     public String cs_reg_name(NativeLong csh, int id);
cs_op_count(NativeLong csh, Pointer insn, int type)262     public int cs_op_count(NativeLong csh, Pointer insn, int type);
cs_op_index(NativeLong csh, Pointer insn, int type, int index)263     public int cs_op_index(NativeLong csh, Pointer insn, int type, int index);
264 
cs_insn_name(NativeLong csh, int id)265     public String cs_insn_name(NativeLong csh, int id);
cs_group_name(NativeLong csh, int id)266     public String cs_group_name(NativeLong csh, int id);
cs_insn_group(NativeLong csh, Pointer insn, int id)267     public byte cs_insn_group(NativeLong csh, Pointer insn, int id);
cs_reg_read(NativeLong csh, Pointer insn, int id)268     public byte cs_reg_read(NativeLong csh, Pointer insn, int id);
cs_reg_write(NativeLong csh, Pointer insn, int id)269     public byte cs_reg_write(NativeLong csh, Pointer insn, int id);
cs_errno(NativeLong csh)270     public int cs_errno(NativeLong csh);
cs_version(IntByReference major, IntByReference minor)271     public int cs_version(IntByReference major, IntByReference minor);
cs_support(int query)272     public boolean cs_support(int query);
273   }
274 
275   // Capstone API version
276   public static final int CS_API_MAJOR = 3;
277   public static final int CS_API_MINOR = 0;
278 
279   // architectures
280   public static final int CS_ARCH_ARM = 0;
281   public static final int CS_ARCH_ARM64 = 1;
282   public static final int CS_ARCH_MIPS = 2;
283   public static final int CS_ARCH_X86 = 3;
284   public static final int CS_ARCH_PPC = 4;
285   public static final int CS_ARCH_SPARC = 5;
286   public static final int CS_ARCH_SYSZ = 6;
287   public static final int CS_ARCH_XCORE = 7;
288   public static final int CS_ARCH_MAX = 8;
289   public static final int CS_ARCH_ALL = 0xFFFF; // query id for cs_support()
290 
291   // disasm mode
292   public static final int CS_MODE_LITTLE_ENDIAN = 0;  // little-endian mode (default mode)
293   public static final int CS_MODE_ARM = 0;	          // 32-bit ARM
294   public static final int CS_MODE_16 = 1 << 1;		// 16-bit mode for X86
295   public static final int CS_MODE_32 = 1 << 2;		// 32-bit mode for X86
296   public static final int CS_MODE_64 = 1 << 3;		// 64-bit mode for X86, PPC
297   public static final int CS_MODE_THUMB = 1 << 4;	  // ARM's Thumb mode, including Thumb-2
298   public static final int CS_MODE_MCLASS = 1 << 5;	  // ARM's Cortex-M series
299   public static final int CS_MODE_V8 = 1 << 6;	      // ARMv8 A32 encodings for ARM
300   public static final int CS_MODE_MICRO = 1 << 4;	  // MicroMips mode (Mips arch)
301   public static final int CS_MODE_MIPS3 = 1 << 5;     // Mips III ISA
302   public static final int CS_MODE_MIPS32R6 = 1 << 6;  // Mips32r6 ISA
303   public static final int CS_MODE_MIPSGP64 = 1 << 7;  // General Purpose Registers are 64-bit wide (MIPS arch)
304   public static final int CS_MODE_BIG_ENDIAN = 1 << 31; // big-endian mode
305   public static final int CS_MODE_V9 = 1 << 4;	      // SparcV9 mode (Sparc arch)
306   public static final int CS_MODE_MIPS32 = CS_MODE_32; // Mips32 ISA
307   public static final int CS_MODE_MIPS64 = CS_MODE_64; // Mips64 ISA
308 
309   // Capstone error
310   public static final int CS_ERR_OK = 0;
311   public static final int CS_ERR_MEM = 1;	    // Out-Of-Memory error
312   public static final int CS_ERR_ARCH = 2;	  // Unsupported architecture
313   public static final int CS_ERR_HANDLE = 3;	// Invalid handle
314   public static final int CS_ERR_CSH = 4;	    // Invalid csh argument
315   public static final int CS_ERR_MODE = 5;	  // Invalid/unsupported mode
316   public static final int CS_ERR_OPTION = 6;  // Invalid/unsupported option: cs_option()
317   public static final int CS_ERR_DETAIL = 7;  // Invalid/unsupported option: cs_option()
318   public static final int CS_ERR_MEMSETUP = 8;
319   public static final int CS_ERR_VERSION = 9;  //Unsupported version (bindings)
320   public static final int CS_ERR_DIET = 10;  //Information irrelevant in diet engine
321   public static final int CS_ERR_SKIPDATA = 11;  //Access irrelevant data for "data" instruction in SKIPDATA mode
322   public static final int CS_ERR_X86_ATT = 12;  //X86 AT&T syntax is unsupported (opt-out at compile time)
323   public static final int CS_ERR_X86_INTEL = 13;  //X86 Intel syntax is unsupported (opt-out at compile time)
324 
325   // Capstone option type
326   public static final int CS_OPT_SYNTAX = 1;  // Intel X86 asm syntax (CS_ARCH_X86 arch)
327   public static final int CS_OPT_DETAIL = 2;  // Break down instruction structure into details
328   public static final int CS_OPT_MODE = 3;  // Change engine's mode at run-time
329 
330   // Capstone option value
331   public static final int CS_OPT_OFF = 0;  // Turn OFF an option - default option of CS_OPT_DETAIL
332   public static final int CS_OPT_SYNTAX_INTEL = 1;  // Intel X86 asm syntax - default syntax on X86 (CS_OPT_SYNTAX,  CS_ARCH_X86)
333   public static final int CS_OPT_SYNTAX_ATT = 2;    // ATT asm syntax (CS_OPT_SYNTAX, CS_ARCH_X86)
334   public static final int CS_OPT_ON = 3;  // Turn ON an option (CS_OPT_DETAIL)
335   public static final int CS_OPT_SYNTAX_NOREGNAME = 3; // PPC asm syntax: Prints register name with only number (CS_OPT_SYNTAX)
336 
337   // Common instruction operand types - to be consistent across all architectures.
338   public static final int CS_OP_INVALID = 0;
339   public static final int CS_OP_REG = 1;
340   public static final int CS_OP_IMM = 2;
341   public static final int CS_OP_MEM = 3;
342   public static final int CS_OP_FP  = 4;
343 
344   // Common instruction groups - to be consistent across all architectures.
345   public static final int CS_GRP_INVALID = 0;  // uninitialized/invalid group.
346   public static final int CS_GRP_JUMP    = 1;  // all jump instructions (conditional+direct+indirect jumps)
347   public static final int CS_GRP_CALL    = 2;  // all call instructions
348   public static final int CS_GRP_RET     = 3;  // all return instructions
349   public static final int CS_GRP_INT     = 4;  // all interrupt instructions (int+syscall)
350   public static final int CS_GRP_IRET    = 5;  // all interrupt return instructions
351 
352   // Query id for cs_support()
353   public static final int CS_SUPPORT_DIET = CS_ARCH_ALL+1;	  // diet mode
354   public static final int CS_SUPPORT_X86_REDUCE = CS_ARCH_ALL+2;  // X86 reduce mode
355 
356   protected class NativeStruct {
357       private NativeLong csh;
358       private NativeLongByReference handleRef;
359   }
360 
361   private static final CsInsn[] EMPTY_INSN = new CsInsn[0];
362 
363   protected NativeStruct ns; // for memory retention
364   private CS cs;
365   public int arch;
366   public int mode;
367   private int syntax;
368   private int detail;
369   private boolean diet;
370 
Capstone(int arch, int mode)371   public Capstone(int arch, int mode) {
372     cs = (CS)Native.loadLibrary("capstone", CS.class);
373     int version = cs.cs_version(null, null);
374     if (version != (CS_API_MAJOR << 8) + CS_API_MINOR) {
375       throw new RuntimeException("Different API version between core & binding (CS_ERR_VERSION)");
376     }
377 
378     this.arch = arch;
379     this.mode = mode;
380     ns = new NativeStruct();
381     ns.handleRef = new NativeLongByReference();
382     if (cs.cs_open(arch, mode, ns.handleRef) != CS_ERR_OK) {
383       throw new RuntimeException("ERROR: Wrong arch or mode");
384     }
385     ns.csh = ns.handleRef.getValue();
386     this.detail = CS_OPT_OFF;
387 	this.diet = cs.cs_support(CS_SUPPORT_DIET);
388   }
389 
390   // return combined API version
version()391   public int version() {
392     return cs.cs_version(null, null);
393   }
394 
395   // set Assembly syntax
setSyntax(int syntax)396   public void setSyntax(int syntax) {
397     if (cs.cs_option(ns.csh, CS_OPT_SYNTAX, new NativeLong(syntax)) == CS_ERR_OK) {
398       this.syntax = syntax;
399     } else {
400       throw new RuntimeException("ERROR: Failed to set assembly syntax");
401     }
402   }
403 
404   // set detail option at run-time
setDetail(int opt)405   public void setDetail(int opt) {
406     if (cs.cs_option(ns.csh, CS_OPT_DETAIL, new NativeLong(opt)) == CS_ERR_OK) {
407       this.detail = opt;
408     } else {
409       throw new RuntimeException("ERROR: Failed to set detail option");
410     }
411   }
412 
413   // set mode option at run-time
setMode(int opt)414   public void setMode(int opt) {
415     if (cs.cs_option(ns.csh, CS_OPT_MODE, new NativeLong(opt)) == CS_ERR_OK) {
416       this.mode = opt;
417     } else {
418       throw new RuntimeException("ERROR: Failed to set mode option");
419     }
420   }
421 
422   // destructor automatically caled at destroyed time.
finalize()423   protected void finalize() {
424     // FIXME: crashed on Ubuntu 14.04 64bit, OpenJDK java 1.6.0_33
425     // cs.cs_close(ns.handleRef);
426   }
427 
428   // destructor automatically caled at destroyed time.
close()429   public int close() {
430     return cs.cs_close(ns.handleRef);
431   }
432 
433   /**
434    * Disassemble instructions from @code assumed to be located at @address,
435    * stop when encountering first broken instruction.
436    *
437    * @param code The source machine code bytes.
438    * @param address The address of the first machine code byte.
439    * @return the array of successfully disassembled instructions, empty if no instruction could be disassembled.
440    */
disasm(byte[] code, long address)441   public CsInsn[] disasm(byte[] code, long address) {
442     return disasm(code, address, 0);
443   }
444 
445   /**
446    * Disassemble up to @count instructions from @code assumed to be located at @address,
447    * stop when encountering first broken instruction.
448    *
449    * @param code The source machine code bytes.
450    * @param address The address of the first machine code byte.
451    * @param count The maximum number of instructions to disassemble, 0 for no maximum.
452    * @return the array of successfully disassembled instructions, empty if no instruction could be disassembled.
453    */
disasm(byte[] code, long address, long count)454   public CsInsn[] disasm(byte[] code, long address, long count) {
455     PointerByReference insnRef = new PointerByReference();
456 
457     NativeLong c = cs.cs_disasm(ns.csh, code, new NativeLong(code.length), address, new NativeLong(count), insnRef);
458 
459     if (0 == c.intValue()) {
460     	return EMPTY_INSN;
461     }
462 
463     Pointer p = insnRef.getValue();
464     _cs_insn byref = new _cs_insn(p);
465 
466     CsInsn[] allInsn = fromArrayRaw((_cs_insn[]) byref.toArray(c.intValue()));
467 
468     // free allocated memory
469     // cs.cs_free(p, c);
470     // FIXME(danghvu): Can't free because memory is still inside CsInsn
471 
472     return allInsn;
473   }
474 }
475