1 /*===-- llvm-c/EnhancedDisassembly.h - Disassembler C Interface ---*- C -*-===*\
2 |*                                                                            *|
3 |*                     The LLVM Compiler Infrastructure                       *|
4 |*                                                                            *|
5 |* This file is distributed under the University of Illinois Open Source      *|
6 |* License. See LICENSE.TXT for details.                                      *|
7 |*                                                                            *|
8 |*===----------------------------------------------------------------------===*|
9 |*                                                                            *|
10 |* This header declares the C interface to EnhancedDisassembly.so, which      *|
11 |* implements a disassembler with the ability to extract operand values and   *|
12 |* individual tokens from assembly instructions.                              *|
13 |*                                                                            *|
14 |* The header declares additional interfaces if the host compiler supports    *|
15 |* the blocks API.                                                            *|
16 |*                                                                            *|
17 \*===----------------------------------------------------------------------===*/
18 
19 #ifndef LLVM_C_ENHANCEDDISASSEMBLY_H
20 #define LLVM_C_ENHANCEDDISASSEMBLY_H
21 
22 #include "llvm/Support/DataTypes.h"
23 
24 #ifdef __cplusplus
25 extern "C" {
26 #endif
27 
28 /*!
29  @typedef EDByteReaderCallback
30  Interface to memory from which instructions may be read.
31  @param byte A pointer whose target should be filled in with the data returned.
32  @param address The address of the byte to be read.
33  @param arg An anonymous argument for client use.
34  @result 0 on success; -1 otherwise.
35  */
36 typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
37 
38 /*!
39  @typedef EDRegisterReaderCallback
40  Interface to registers from which registers may be read.
41  @param value A pointer whose target should be filled in with the value of the
42    register.
43  @param regID The LLVM register identifier for the register to read.
44  @param arg An anonymous argument for client use.
45  @result 0 if the register could be read; -1 otherwise.
46  */
47 typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
48                                         void* arg);
49 
50 /*!
51  @typedef EDAssemblySyntax_t
52  An assembly syntax for use in tokenizing instructions.
53  */
54 enum {
55 /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
56   kEDAssemblySyntaxX86Intel  = 0,
57 /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
58   kEDAssemblySyntaxX86ATT    = 1,
59   kEDAssemblySyntaxARMUAL    = 2
60 };
61 typedef unsigned EDAssemblySyntax_t;
62 
63 /*!
64  @typedef EDDisassemblerRef
65  Encapsulates a disassembler for a single CPU architecture.
66  */
67 typedef void *EDDisassemblerRef;
68 
69 /*!
70  @typedef EDInstRef
71  Encapsulates a single disassembled instruction in one assembly syntax.
72  */
73 typedef void *EDInstRef;
74 
75 /*!
76  @typedef EDTokenRef
77  Encapsulates a token from the disassembly of an instruction.
78  */
79 typedef void *EDTokenRef;
80 
81 /*!
82  @typedef EDOperandRef
83  Encapsulates an operand of an instruction.
84  */
85 typedef void *EDOperandRef;
86 
87 /*!
88  @functiongroup Getting a disassembler
89  */
90 
91 /*!
92  @function EDGetDisassembler
93  Gets the disassembler for a given target.
94  @param disassembler A pointer whose target will be filled in with the
95    disassembler.
96  @param triple Identifies the target.  Example: "x86_64-apple-darwin10"
97  @param syntax The assembly syntax to use when decoding instructions.
98  @result 0 on success; -1 otherwise.
99  */
100 int EDGetDisassembler(EDDisassemblerRef *disassembler,
101                       const char *triple,
102                       EDAssemblySyntax_t syntax);
103 
104 /*!
105  @functiongroup Generic architectural queries
106  */
107 
108 /*!
109  @function EDGetRegisterName
110  Gets the human-readable name for a given register.
111  @param regName A pointer whose target will be pointed at the name of the
112    register.  The name does not need to be deallocated and will be
113  @param disassembler The disassembler to query for the name.
114  @param regID The register identifier, as returned by EDRegisterTokenValue.
115  @result 0 on success; -1 otherwise.
116  */
117 int EDGetRegisterName(const char** regName,
118                       EDDisassemblerRef disassembler,
119                       unsigned regID);
120 
121 /*!
122  @function EDRegisterIsStackPointer
123  Determines if a register is one of the platform's stack-pointer registers.
124  @param disassembler The disassembler to query.
125  @param regID The register identifier, as returned by EDRegisterTokenValue.
126  @result 1 if true; 0 otherwise.
127  */
128 int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
129                              unsigned regID);
130 
131 /*!
132  @function EDRegisterIsProgramCounter
133  Determines if a register is one of the platform's stack-pointer registers.
134  @param disassembler The disassembler to query.
135  @param regID The register identifier, as returned by EDRegisterTokenValue.
136  @result 1 if true; 0 otherwise.
137  */
138 int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
139                                unsigned regID);
140 
141 /*!
142  @functiongroup Creating and querying instructions
143  */
144 
145 /*!
146  @function EDCreateInst
147  Gets a set of contiguous instructions from a disassembler.
148  @param insts A pointer to an array that will be filled in with the
149    instructions.  Must have at least count entries.  Entries not filled in will
150    be set to NULL.
151  @param count The maximum number of instructions to fill in.
152  @param disassembler The disassembler to use when decoding the instructions.
153  @param byteReader The function to use when reading the instruction's machine
154    code.
155  @param address The address of the first byte of the instruction.
156  @param arg An anonymous argument to be passed to byteReader.
157  @result The number of instructions read on success; 0 otherwise.
158  */
159 unsigned int EDCreateInsts(EDInstRef *insts,
160                            unsigned int count,
161                            EDDisassemblerRef disassembler,
162                            EDByteReaderCallback byteReader,
163                            uint64_t address,
164                            void *arg);
165 
166 /*!
167  @function EDReleaseInst
168  Frees the memory for an instruction.  The instruction can no longer be accessed
169  after this call.
170  @param inst The instruction to be freed.
171  */
172 void EDReleaseInst(EDInstRef inst);
173 
174 /*!
175  @function EDInstByteSize
176  @param inst The instruction to be queried.
177  @result The number of bytes in the instruction's machine-code representation.
178  */
179 int EDInstByteSize(EDInstRef inst);
180 
181 /*!
182  @function EDGetInstString
183  Gets the disassembled text equivalent of the instruction.
184  @param buf A pointer whose target will be filled in with a pointer to the
185    string.  (The string becomes invalid when the instruction is released.)
186  @param inst The instruction to be queried.
187  @result 0 on success; -1 otherwise.
188  */
189 int EDGetInstString(const char **buf,
190                     EDInstRef inst);
191 
192 /*!
193  @function EDInstID
194  @param instID A pointer whose target will be filled in with the LLVM identifier
195    for the instruction.
196  @param inst The instruction to be queried.
197  @result 0 on success; -1 otherwise.
198  */
199 int EDInstID(unsigned *instID, EDInstRef inst);
200 
201 /*!
202  @function EDInstIsBranch
203  @param inst The instruction to be queried.
204  @result 1 if the instruction is a branch instruction; 0 if it is some other
205    type of instruction; -1 if there was an error.
206  */
207 int EDInstIsBranch(EDInstRef inst);
208 
209 /*!
210  @function EDInstIsMove
211  @param inst The instruction to be queried.
212  @result 1 if the instruction is a move instruction; 0 if it is some other
213    type of instruction; -1 if there was an error.
214  */
215 int EDInstIsMove(EDInstRef inst);
216 
217 /*!
218  @function EDBranchTargetID
219  @param inst The instruction to be queried.
220  @result The ID of the branch target operand, suitable for use with
221    EDCopyOperand.  -1 if no such operand exists.
222  */
223 int EDBranchTargetID(EDInstRef inst);
224 
225 /*!
226  @function EDMoveSourceID
227  @param inst The instruction to be queried.
228  @result The ID of the move source operand, suitable for use with
229    EDCopyOperand.  -1 if no such operand exists.
230  */
231 int EDMoveSourceID(EDInstRef inst);
232 
233 /*!
234  @function EDMoveTargetID
235  @param inst The instruction to be queried.
236  @result The ID of the move source operand, suitable for use with
237    EDCopyOperand.  -1 if no such operand exists.
238  */
239 int EDMoveTargetID(EDInstRef inst);
240 
241 /*!
242  @functiongroup Creating and querying tokens
243  */
244 
245 /*!
246  @function EDNumTokens
247  @param inst The instruction to be queried.
248  @result The number of tokens in the instruction, or -1 on error.
249  */
250 int EDNumTokens(EDInstRef inst);
251 
252 /*!
253  @function EDGetToken
254  Retrieves a token from an instruction.  The token is valid until the
255  instruction is released.
256  @param token A pointer to be filled in with the token.
257  @param inst The instruction to be queried.
258  @param index The index of the token in the instruction.
259  @result 0 on success; -1 otherwise.
260  */
261 int EDGetToken(EDTokenRef *token,
262                EDInstRef inst,
263                int index);
264 
265 /*!
266  @function EDGetTokenString
267  Gets the disassembled text for a token.
268  @param buf A pointer whose target will be filled in with a pointer to the
269    string.  (The string becomes invalid when the token is released.)
270  @param token The token to be queried.
271  @result 0 on success; -1 otherwise.
272  */
273 int EDGetTokenString(const char **buf,
274                      EDTokenRef token);
275 
276 /*!
277  @function EDOperandIndexForToken
278  Returns the index of the operand to which a token belongs.
279  @param token The token to be queried.
280  @result The operand index on success; -1 otherwise
281  */
282 int EDOperandIndexForToken(EDTokenRef token);
283 
284 /*!
285  @function EDTokenIsWhitespace
286  @param token The token to be queried.
287  @result 1 if the token is whitespace; 0 if not; -1 on error.
288  */
289 int EDTokenIsWhitespace(EDTokenRef token);
290 
291 /*!
292  @function EDTokenIsPunctuation
293  @param token The token to be queried.
294  @result 1 if the token is punctuation; 0 if not; -1 on error.
295  */
296 int EDTokenIsPunctuation(EDTokenRef token);
297 
298 /*!
299  @function EDTokenIsOpcode
300  @param token The token to be queried.
301  @result 1 if the token is opcode; 0 if not; -1 on error.
302  */
303 int EDTokenIsOpcode(EDTokenRef token);
304 
305 /*!
306  @function EDTokenIsLiteral
307  @param token The token to be queried.
308  @result 1 if the token is a numeric literal; 0 if not; -1 on error.
309  */
310 int EDTokenIsLiteral(EDTokenRef token);
311 
312 /*!
313  @function EDTokenIsRegister
314  @param token The token to be queried.
315  @result 1 if the token identifies a register; 0 if not; -1 on error.
316  */
317 int EDTokenIsRegister(EDTokenRef token);
318 
319 /*!
320  @function EDTokenIsNegativeLiteral
321  @param token The token to be queried.
322  @result 1 if the token is a negative signed literal; 0 if not; -1 on error.
323  */
324 int EDTokenIsNegativeLiteral(EDTokenRef token);
325 
326 /*!
327  @function EDLiteralTokenAbsoluteValue
328  @param value A pointer whose target will be filled in with the absolute value
329    of the literal.
330  @param token The token to be queried.
331  @result 0 on success; -1 otherwise.
332  */
333 int EDLiteralTokenAbsoluteValue(uint64_t *value,
334                                 EDTokenRef token);
335 
336 /*!
337  @function EDRegisterTokenValue
338  @param registerID A pointer whose target will be filled in with the LLVM
339    register identifier for the token.
340  @param token The token to be queried.
341  @result 0 on success; -1 otherwise.
342  */
343 int EDRegisterTokenValue(unsigned *registerID,
344                          EDTokenRef token);
345 
346 /*!
347  @functiongroup Creating and querying operands
348  */
349 
350 /*!
351  @function EDNumOperands
352  @param inst The instruction to be queried.
353  @result The number of operands in the instruction, or -1 on error.
354  */
355 int EDNumOperands(EDInstRef inst);
356 
357 /*!
358  @function EDGetOperand
359  Retrieves an operand from an instruction.  The operand is valid until the
360  instruction is released.
361  @param operand A pointer to be filled in with the operand.
362  @param inst The instruction to be queried.
363  @param index The index of the operand in the instruction.
364  @result 0 on success; -1 otherwise.
365  */
366 int EDGetOperand(EDOperandRef *operand,
367                  EDInstRef inst,
368                  int index);
369 
370 /*!
371  @function EDOperandIsRegister
372  @param operand The operand to be queried.
373  @result 1 if the operand names a register; 0 if not; -1 on error.
374  */
375 int EDOperandIsRegister(EDOperandRef operand);
376 
377 /*!
378  @function EDOperandIsImmediate
379  @param operand The operand to be queried.
380  @result 1 if the operand specifies an immediate value; 0 if not; -1 on error.
381  */
382 int EDOperandIsImmediate(EDOperandRef operand);
383 
384 /*!
385  @function EDOperandIsMemory
386  @param operand The operand to be queried.
387  @result 1 if the operand specifies a location in memory; 0 if not; -1 on error.
388  */
389 int EDOperandIsMemory(EDOperandRef operand);
390 
391 /*!
392  @function EDRegisterOperandValue
393  @param value A pointer whose target will be filled in with the LLVM register ID
394    of the register named by the operand.
395  @param operand The operand to be queried.
396  @result 0 on success; -1 otherwise.
397  */
398 int EDRegisterOperandValue(unsigned *value,
399                            EDOperandRef operand);
400 
401 /*!
402  @function EDImmediateOperandValue
403  @param value A pointer whose target will be filled in with the value of the
404    immediate.
405  @param operand The operand to be queried.
406  @result 0 on success; -1 otherwise.
407  */
408 int EDImmediateOperandValue(uint64_t *value,
409                             EDOperandRef operand);
410 
411 /*!
412  @function EDEvaluateOperand
413  Evaluates an operand using a client-supplied register state accessor.  Register
414  operands are evaluated by reading the value of the register; immediate operands
415  are evaluated by reporting the immediate value; memory operands are evaluated
416  by computing the target address (with only those relocations applied that were
417  already applied to the original bytes).
418  @param result A pointer whose target is to be filled with the result of
419    evaluating the operand.
420  @param operand The operand to be evaluated.
421  @param regReader The function to use when reading registers from the register
422    state.
423  @param arg An anonymous argument for client use.
424  @result 0 if the operand could be evaluated; -1 otherwise.
425  */
426 int EDEvaluateOperand(uint64_t *result,
427                       EDOperandRef operand,
428                       EDRegisterReaderCallback regReader,
429                       void *arg);
430 
431 #ifdef __BLOCKS__
432 
433 /*!
434  @typedef EDByteBlock_t
435  Block-based interface to memory from which instructions may be read.
436  @param byte A pointer whose target should be filled in with the data returned.
437  @param address The address of the byte to be read.
438  @result 0 on success; -1 otherwise.
439  */
440 typedef int (^EDByteBlock_t)(uint8_t *byte, uint64_t address);
441 
442 /*!
443  @typedef EDRegisterBlock_t
444  Block-based interface to registers from which registers may be read.
445  @param value A pointer whose target should be filled in with the value of the
446    register.
447  @param regID The LLVM register identifier for the register to read.
448  @result 0 if the register could be read; -1 otherwise.
449  */
450 typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
451 
452 /*!
453  @typedef EDTokenVisitor_t
454  Block-based handler for individual tokens.
455  @param token The current token being read.
456  @result 0 to continue; 1 to stop normally; -1 on error.
457  */
458 typedef int (^EDTokenVisitor_t)(EDTokenRef token);
459 
460 /*! @functiongroup Block-based interfaces */
461 
462 /*!
463  @function EDBlockCreateInsts
464  Gets a set of contiguous instructions from a disassembler, using a block to
465  read memory.
466  @param insts A pointer to an array that will be filled in with the
467    instructions.  Must have at least count entries.  Entries not filled in will
468    be set to NULL.
469  @param count The maximum number of instructions to fill in.
470  @param disassembler The disassembler to use when decoding the instructions.
471  @param byteBlock The block to use when reading the instruction's machine
472    code.
473  @param address The address of the first byte of the instruction.
474  @result The number of instructions read on success; 0 otherwise.
475  */
476 unsigned int EDBlockCreateInsts(EDInstRef *insts,
477                                 int count,
478                                 EDDisassemblerRef disassembler,
479                                 EDByteBlock_t byteBlock,
480                                 uint64_t address);
481 
482 /*!
483  @function EDBlockEvaluateOperand
484  Evaluates an operand using a block to read registers.
485  @param result A pointer whose target is to be filled with the result of
486    evaluating the operand.
487  @param operand The operand to be evaluated.
488  @param regBlock The block to use when reading registers from the register
489    state.
490  @result 0 if the operand could be evaluated; -1 otherwise.
491  */
492 int EDBlockEvaluateOperand(uint64_t *result,
493                            EDOperandRef operand,
494                            EDRegisterBlock_t regBlock);
495 
496 /*!
497  @function EDBlockVisitTokens
498  Visits every token with a visitor.
499  @param inst The instruction with the tokens to be visited.
500  @param visitor The visitor.
501  @result 0 if the visit ended normally; -1 if the visitor encountered an error
502    or there was some other error.
503  */
504 int EDBlockVisitTokens(EDInstRef inst,
505                        EDTokenVisitor_t visitor);
506 
507 #endif
508 
509 #ifdef __cplusplus
510 }
511 #endif
512 
513 #endif
514