1 //===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interface for the Enhanced Disassembly library's
11 // disassembler class.  The disassembler is responsible for vending individual
12 // instructions according to a given architecture and disassembly syntax.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_EDDISASSEMBLER_H
17 #define LLVM_EDDISASSEMBLER_H
18 
19 #include "EDInfo.h"
20 
21 #include "llvm/ADT/OwningPtr.h"
22 #include "llvm/ADT/Triple.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include "llvm/Support/Mutex.h"
25 
26 #include <map>
27 #include <set>
28 #include <vector>
29 
30 namespace llvm {
31 class AsmLexer;
32 class AsmParser;
33 class AsmToken;
34 class MCContext;
35 class MCAsmInfo;
36 class MCAsmLexer;
37 class MCDisassembler;
38 class MCInstPrinter;
39 class MCInst;
40 class MCParsedAsmOperand;
41 class MCRegisterInfo;
42 class MCStreamer;
43 class MCSubtargetInfo;
44 class MCTargetAsmLexer;
45 class MCTargetAsmParser;
46 template <typename T> class SmallVectorImpl;
47 class SourceMgr;
48 class Target;
49 
50 struct EDInstInfo;
51 struct EDInst;
52 struct EDOperand;
53 struct EDToken;
54 
55 typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
56 
57 /// EDDisassembler - Encapsulates a disassembler for a single architecture and
58 ///   disassembly syntax.  Also manages the static disassembler registry.
59 struct EDDisassembler {
60   typedef enum {
61     /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
62     kEDAssemblySyntaxX86Intel  = 0,
63     /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
64     kEDAssemblySyntaxX86ATT    = 1,
65     kEDAssemblySyntaxARMUAL    = 2
66   } AssemblySyntax;
67 
68 
69   ////////////////////
70   // Static members //
71   ////////////////////
72 
73   /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
74   ///   pair
75   struct CPUKey {
76     /// The architecture type
77     llvm::Triple::ArchType Arch;
78 
79     /// The assembly syntax
80     AssemblySyntax Syntax;
81 
82     /// operator== - Equality operator
83     bool operator==(const CPUKey &key) const {
84       return (Arch == key.Arch &&
85               Syntax == key.Syntax);
86     }
87 
88     /// operator< - Less-than operator
89     bool operator<(const CPUKey &key) const {
90       return ((Arch < key.Arch) ||
91               ((Arch == key.Arch) && Syntax < (key.Syntax)));
92     }
93   };
94 
95   typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
96 
97   /// True if the disassembler registry has been initialized; false if not
98   static bool sInitialized;
99   /// A map from disassembler specifications to disassemblers.  Populated
100   ///   lazily.
101   static DisassemblerMap_t sDisassemblers;
102 
103   /// getDisassembler - Returns the specified disassemble, or NULL on failure
104   ///
105   /// @arg arch   - The desired architecture
106   /// @arg syntax - The desired disassembly syntax
107   static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
108                                          AssemblySyntax syntax);
109 
110   /// getDisassembler - Returns the disassembler for a given combination of
111   ///   CPU type, CPU subtype, and assembly syntax, or NULL on failure
112   ///
113   /// @arg str    - The string representation of the architecture triple, e.g.,
114   ///               "x86_64-apple-darwin"
115   /// @arg syntax - The disassembly syntax for the required disassembler
116   static EDDisassembler *getDisassembler(llvm::StringRef str,
117                                          AssemblySyntax syntax);
118 
119   /// initialize - Initializes the disassembler registry and the LLVM backend
120   static void initialize();
121 
122   ////////////////////////
123   // Per-object members //
124   ////////////////////////
125 
126   /// True only if the object has been successfully initialized
127   bool Valid;
128   /// True if the disassembler can provide semantic information
129   bool HasSemantics;
130 
131   /// The stream to write errors to
132   llvm::raw_ostream &ErrorStream;
133 
134   /// The architecture/syntax pair for the current architecture
135   CPUKey Key;
136   /// The LLVM target corresponding to the disassembler
137   const llvm::Target *Tgt;
138   /// The assembly information for the target architecture
139   llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
140   /// The subtarget information for the target architecture
141   llvm::OwningPtr<const llvm::MCSubtargetInfo> STI;
142   // The register information for the target architecture.
143   llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
144   /// The disassembler for the target architecture
145   llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
146   /// The output string for the instruction printer; must be guarded with
147   ///   PrinterMutex
148   llvm::OwningPtr<std::string> InstString;
149   /// The output stream for the disassembler; must be guarded with
150   ///   PrinterMutex
151   llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
152   /// The instruction printer for the target architecture; must be guarded with
153   ///   PrinterMutex when printing
154   llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
155   /// The mutex that guards the instruction printer's printing functions, which
156   ///   use a shared stream
157   llvm::sys::Mutex PrinterMutex;
158   /// The array of instruction information provided by the TableGen backend for
159   ///   the target architecture
160   const llvm::EDInstInfo *InstInfos;
161   /// The target-specific lexer for use in tokenizing strings, in
162   ///   target-independent and target-specific portions
163   llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
164   llvm::OwningPtr<llvm::MCTargetAsmLexer> SpecificAsmLexer;
165   /// The guard for the above
166   llvm::sys::Mutex ParserMutex;
167   /// The LLVM number used for the target disassembly syntax variant
168   int LLVMSyntaxVariant;
169 
170   typedef std::vector<std::string> regvec_t;
171   typedef std::map<std::string, unsigned> regrmap_t;
172 
173   /// A vector of registers for quick mapping from LLVM register IDs to names
174   regvec_t RegVec;
175   /// A map of registers for quick mapping from register names to LLVM IDs
176   regrmap_t RegRMap;
177 
178   /// A set of register IDs for aliases of the stack pointer for the current
179   ///   architecture
180   std::set<unsigned> stackPointers;
181   /// A set of register IDs for aliases of the program counter for the current
182   ///   architecture
183   std::set<unsigned> programCounters;
184 
185   /// Constructor - initializes a disassembler with all the necessary objects,
186   ///   which come pre-allocated from the registry accessor function
187   ///
188   /// @arg key                - the architecture and disassembly syntax for the
189   ///                           disassembler
190   EDDisassembler(CPUKey& key);
191 
192   /// valid - reports whether there was a failure in the constructor.
validEDDisassembler193   bool valid() {
194     return Valid;
195   }
196 
197   /// hasSemantics - reports whether the disassembler can provide operands and
198   ///   tokens.
hasSemanticsEDDisassembler199   bool hasSemantics() {
200     return HasSemantics;
201   }
202 
203   ~EDDisassembler();
204 
205   /// createInst - creates and returns an instruction given a callback and
206   ///   memory address, or NULL on failure
207   ///
208   /// @arg byteReader - A callback function that provides machine code bytes
209   /// @arg address    - The address of the first byte of the instruction,
210   ///                   suitable for passing to byteReader
211   /// @arg arg        - An opaque argument for byteReader
212   EDInst *createInst(EDByteReaderCallback byteReader,
213                      uint64_t address,
214                      void *arg);
215 
216   /// initMaps - initializes regVec and regRMap using the provided register
217   ///   info
218   ///
219   /// @arg registerInfo - the register information to use as a source
220   void initMaps(const llvm::MCRegisterInfo &registerInfo);
221   /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
222   ///   register for a given register ID, or NULL on failure
223   ///
224   /// @arg registerID - the ID of the register to be queried
225   const char *nameWithRegisterID(unsigned registerID) const;
226   /// registerIDWithName - Returns the ID of a register for a given register
227   ///   name, or (unsigned)-1 on failure
228   ///
229   /// @arg name - The name of the register
230   unsigned registerIDWithName(const char *name) const;
231 
232   /// registerIsStackPointer - reports whether a register ID is an alias for the
233   ///   stack pointer register
234   ///
235   /// @arg registerID - The LLVM register ID
236   bool registerIsStackPointer(unsigned registerID);
237   /// registerIsStackPointer - reports whether a register ID is an alias for the
238   ///   stack pointer register
239   ///
240   /// @arg registerID - The LLVM register ID
241   bool registerIsProgramCounter(unsigned registerID);
242 
243   /// printInst - prints an MCInst to a string, returning 0 on success, or -1
244   ///   otherwise
245   ///
246   /// @arg str  - A reference to a string which is filled in with the string
247   ///             representation of the instruction
248   /// @arg inst - A reference to the MCInst to be printed
249   int printInst(std::string& str,
250                 llvm::MCInst& inst);
251 
252   /// parseInst - extracts operands and tokens from a string for use in
253   ///   tokenizing the string.  Returns 0 on success, or -1 otherwise.
254   ///
255   /// @arg operands - A reference to a vector that will be filled in with the
256   ///                 parsed operands
257   /// @arg tokens   - A reference to a vector that will be filled in with the
258   ///                 tokens
259   /// @arg str      - The string representation of the instruction
260   int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
261                 llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
262                 const std::string &str);
263 
264   /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
265   int llvmSyntaxVariant() const;
266 };
267 
268 } // end namespace llvm
269 
270 #endif
271