1 //===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "DisassemblerLLVMC.h"
11 
12 #include "llvm-c/Disassembler.h"
13 #include "llvm/ADT/OwningPtr.h"
14 #include "llvm/MC/MCAsmInfo.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCDisassembler.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstPrinter.h"
19 #include "llvm/MC/MCInstrInfo.h"
20 #include "llvm/MC/MCRegisterInfo.h"
21 #include "llvm/MC/MCRelocationInfo.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MemoryObject.h"
25 #include "llvm/Support/TargetRegistry.h"
26 #include "llvm/Support/TargetSelect.h"
27 #include "llvm/ADT/SmallString.h"
28 
29 
30 #include "lldb/Core/Address.h"
31 #include "lldb/Core/DataExtractor.h"
32 #include "lldb/Core/Module.h"
33 #include "lldb/Core/Stream.h"
34 #include "lldb/Symbol/SymbolContext.h"
35 #include "lldb/Target/ExecutionContext.h"
36 #include "lldb/Target/Process.h"
37 #include "lldb/Target/RegisterContext.h"
38 #include "lldb/Target/Target.h"
39 #include "lldb/Target/StackFrame.h"
40 
41 #include <regex.h>
42 
43 using namespace lldb;
44 using namespace lldb_private;
45 
46 class InstructionLLVMC : public lldb_private::Instruction
47 {
48 public:
InstructionLLVMC(DisassemblerLLVMC & disasm,const lldb_private::Address & address,AddressClass addr_class)49     InstructionLLVMC (DisassemblerLLVMC &disasm,
50                       const lldb_private::Address &address,
51                       AddressClass addr_class) :
52         Instruction (address, addr_class),
53         m_disasm_sp (disasm.shared_from_this()),
54         m_does_branch (eLazyBoolCalculate),
55         m_is_valid (false),
56         m_using_file_addr (false)
57     {
58     }
59 
60     virtual
~InstructionLLVMC()61     ~InstructionLLVMC ()
62     {
63     }
64 
65     virtual bool
DoesBranch()66     DoesBranch ()
67     {
68         if (m_does_branch == eLazyBoolCalculate)
69         {
70             GetDisassemblerLLVMC().Lock(this, NULL);
71             DataExtractor data;
72             if (m_opcode.GetData(data))
73             {
74                 bool is_alternate_isa;
75                 lldb::addr_t pc = m_address.GetFileAddress();
76 
77                 DisassemblerLLVMC::LLVMCDisassembler *mc_disasm_ptr = GetDisasmToUse (is_alternate_isa);
78                 const uint8_t *opcode_data = data.GetDataStart();
79                 const size_t opcode_data_len = data.GetByteSize();
80                 llvm::MCInst inst;
81                 const size_t inst_size = mc_disasm_ptr->GetMCInst (opcode_data,
82                                                                    opcode_data_len,
83                                                                    pc,
84                                                                    inst);
85                 // Be conservative, if we didn't understand the instruction, say it might branch...
86                 if (inst_size == 0)
87                     m_does_branch = eLazyBoolYes;
88                 else
89                 {
90                     const bool can_branch = mc_disasm_ptr->CanBranch(inst);
91                     if (can_branch)
92                         m_does_branch = eLazyBoolYes;
93                     else
94                         m_does_branch = eLazyBoolNo;
95                 }
96             }
97             GetDisassemblerLLVMC().Unlock();
98         }
99         return m_does_branch == eLazyBoolYes;
100     }
101 
102     DisassemblerLLVMC::LLVMCDisassembler *
GetDisasmToUse(bool & is_alternate_isa)103     GetDisasmToUse (bool &is_alternate_isa)
104     {
105         is_alternate_isa = false;
106         DisassemblerLLVMC &llvm_disasm = GetDisassemblerLLVMC();
107         if (llvm_disasm.m_alternate_disasm_ap.get() != NULL)
108         {
109             const AddressClass address_class = GetAddressClass ();
110 
111             if (address_class == eAddressClassCodeAlternateISA)
112             {
113                 is_alternate_isa = true;
114                 return llvm_disasm.m_alternate_disasm_ap.get();
115             }
116         }
117         return llvm_disasm.m_disasm_ap.get();
118     }
119 
120     virtual size_t
Decode(const lldb_private::Disassembler & disassembler,const lldb_private::DataExtractor & data,lldb::offset_t data_offset)121     Decode (const lldb_private::Disassembler &disassembler,
122             const lldb_private::DataExtractor &data,
123             lldb::offset_t data_offset)
124     {
125         // All we have to do is read the opcode which can be easy for some
126         // architectures
127         bool got_op = false;
128         DisassemblerLLVMC &llvm_disasm = GetDisassemblerLLVMC();
129         const ArchSpec &arch = llvm_disasm.GetArchitecture();
130 
131         const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
132         const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
133         if (min_op_byte_size == max_op_byte_size)
134         {
135             // Fixed size instructions, just read that amount of data.
136             if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size))
137                 return false;
138 
139             switch (min_op_byte_size)
140             {
141                 case 1:
142                     m_opcode.SetOpcode8  (data.GetU8  (&data_offset));
143                     got_op = true;
144                     break;
145 
146                 case 2:
147                     m_opcode.SetOpcode16 (data.GetU16 (&data_offset));
148                     got_op = true;
149                     break;
150 
151                 case 4:
152                     m_opcode.SetOpcode32 (data.GetU32 (&data_offset));
153                     got_op = true;
154                     break;
155 
156                 case 8:
157                     m_opcode.SetOpcode64 (data.GetU64 (&data_offset));
158                     got_op = true;
159                     break;
160 
161                 default:
162                     m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size), min_op_byte_size);
163                     got_op = true;
164                     break;
165             }
166         }
167         if (!got_op)
168         {
169             bool is_alternate_isa = false;
170             DisassemblerLLVMC::LLVMCDisassembler *mc_disasm_ptr = GetDisasmToUse (is_alternate_isa);
171 
172             const llvm::Triple::ArchType machine = arch.GetMachine();
173             if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb)
174             {
175                 if (machine == llvm::Triple::thumb || is_alternate_isa)
176                 {
177                     uint32_t thumb_opcode = data.GetU16(&data_offset);
178                     if ((thumb_opcode & 0xe000) != 0xe000 || ((thumb_opcode & 0x1800u) == 0))
179                     {
180                         m_opcode.SetOpcode16 (thumb_opcode);
181                         m_is_valid = true;
182                     }
183                     else
184                     {
185                         thumb_opcode <<= 16;
186                         thumb_opcode |= data.GetU16(&data_offset);
187                         m_opcode.SetOpcode16_2 (thumb_opcode);
188                         m_is_valid = true;
189                     }
190                 }
191                 else
192                 {
193                     m_opcode.SetOpcode32 (data.GetU32(&data_offset));
194                     m_is_valid = true;
195                 }
196             }
197             else
198             {
199                 // The opcode isn't evenly sized, so we need to actually use the llvm
200                 // disassembler to parse it and get the size.
201                 uint8_t *opcode_data = const_cast<uint8_t *>(data.PeekData (data_offset, 1));
202                 const size_t opcode_data_len = data.BytesLeft(data_offset);
203                 const addr_t pc = m_address.GetFileAddress();
204                 llvm::MCInst inst;
205 
206                 llvm_disasm.Lock(this, NULL);
207                 const size_t inst_size = mc_disasm_ptr->GetMCInst(opcode_data,
208                                                                   opcode_data_len,
209                                                                   pc,
210                                                                   inst);
211                 llvm_disasm.Unlock();
212                 if (inst_size == 0)
213                     m_opcode.Clear();
214                 else
215                 {
216                     m_opcode.SetOpcodeBytes(opcode_data, inst_size);
217                     m_is_valid = true;
218                 }
219             }
220         }
221         return m_opcode.GetByteSize();
222     }
223 
224     void
AppendComment(std::string & description)225     AppendComment (std::string &description)
226     {
227         if (m_comment.empty())
228             m_comment.swap (description);
229         else
230         {
231             m_comment.append(", ");
232             m_comment.append(description);
233         }
234     }
235 
236     virtual void
CalculateMnemonicOperandsAndComment(const lldb_private::ExecutionContext * exe_ctx)237     CalculateMnemonicOperandsAndComment (const lldb_private::ExecutionContext *exe_ctx)
238     {
239         DataExtractor data;
240         const AddressClass address_class = GetAddressClass ();
241 
242         if (m_opcode.GetData(data))
243         {
244             char out_string[512];
245 
246             DisassemblerLLVMC &llvm_disasm = GetDisassemblerLLVMC();
247 
248             DisassemblerLLVMC::LLVMCDisassembler *mc_disasm_ptr;
249 
250             if (address_class == eAddressClassCodeAlternateISA)
251                 mc_disasm_ptr = llvm_disasm.m_alternate_disasm_ap.get();
252             else
253                 mc_disasm_ptr = llvm_disasm.m_disasm_ap.get();
254 
255             lldb::addr_t pc = m_address.GetFileAddress();
256             m_using_file_addr = true;
257 
258             const bool data_from_file = GetDisassemblerLLVMC().m_data_from_file;
259             bool use_hex_immediates = true;
260             Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC;
261 
262             if (exe_ctx)
263             {
264                 Target *target = exe_ctx->GetTargetPtr();
265                 if (target)
266                 {
267                     use_hex_immediates = target->GetUseHexImmediates();
268                     hex_style = target->GetHexImmediateStyle();
269 
270                     if (!data_from_file)
271                     {
272                         const lldb::addr_t load_addr = m_address.GetLoadAddress(target);
273                         if (load_addr != LLDB_INVALID_ADDRESS)
274                         {
275                             pc = load_addr;
276                             m_using_file_addr = false;
277                         }
278                     }
279                 }
280             }
281 
282             llvm_disasm.Lock(this, exe_ctx);
283 
284             const uint8_t *opcode_data = data.GetDataStart();
285             const size_t opcode_data_len = data.GetByteSize();
286             llvm::MCInst inst;
287             size_t inst_size = mc_disasm_ptr->GetMCInst (opcode_data,
288                                                          opcode_data_len,
289                                                          pc,
290                                                          inst);
291 
292             if (inst_size > 0)
293             {
294                 mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);
295                 mc_disasm_ptr->PrintMCInst(inst, out_string, sizeof(out_string));
296             }
297 
298             llvm_disasm.Unlock();
299 
300             if (inst_size == 0)
301             {
302                 m_comment.assign ("unknown opcode");
303                 inst_size = m_opcode.GetByteSize();
304                 StreamString mnemonic_strm;
305                 lldb::offset_t offset = 0;
306                 switch (inst_size)
307                 {
308                     case 1:
309                         {
310                             const uint8_t uval8 = data.GetU8 (&offset);
311                             m_opcode.SetOpcode8 (uval8);
312                             m_opcode_name.assign (".byte");
313                             mnemonic_strm.Printf("0x%2.2x", uval8);
314                         }
315                         break;
316                     case 2:
317                         {
318                             const uint16_t uval16 = data.GetU16(&offset);
319                             m_opcode.SetOpcode16(uval16);
320                             m_opcode_name.assign (".short");
321                             mnemonic_strm.Printf("0x%4.4x", uval16);
322                         }
323                         break;
324                     case 4:
325                         {
326                             const uint32_t uval32 = data.GetU32(&offset);
327                             m_opcode.SetOpcode32(uval32);
328                             m_opcode_name.assign (".long");
329                             mnemonic_strm.Printf("0x%8.8x", uval32);
330                         }
331                         break;
332                     case 8:
333                         {
334                             const uint64_t uval64 = data.GetU64(&offset);
335                             m_opcode.SetOpcode64(uval64);
336                             m_opcode_name.assign (".quad");
337                             mnemonic_strm.Printf("0x%16.16" PRIx64, uval64);
338                         }
339                         break;
340                     default:
341                         if (inst_size == 0)
342                             return;
343                         else
344                         {
345                             const uint8_t *bytes = data.PeekData(offset, inst_size);
346                             if (bytes == NULL)
347                                 return;
348                             m_opcode_name.assign (".byte");
349                             m_opcode.SetOpcodeBytes(bytes, inst_size);
350                             mnemonic_strm.Printf("0x%2.2x", bytes[0]);
351                             for (uint32_t i=1; i<inst_size; ++i)
352                                 mnemonic_strm.Printf(" 0x%2.2x", bytes[i]);
353                         }
354                         break;
355                 }
356                 m_mnemonics.swap(mnemonic_strm.GetString());
357                 return;
358             }
359             else
360             {
361                 if (m_does_branch == eLazyBoolCalculate)
362                 {
363                     const bool can_branch = mc_disasm_ptr->CanBranch(inst);
364                     if (can_branch)
365                         m_does_branch = eLazyBoolYes;
366                     else
367                         m_does_branch = eLazyBoolNo;
368 
369                 }
370             }
371 
372             if (!s_regex_compiled)
373             {
374                 ::regcomp(&s_regex, "[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?", REG_EXTENDED);
375                 s_regex_compiled = true;
376             }
377 
378             ::regmatch_t matches[3];
379 
380             if (!::regexec(&s_regex, out_string, sizeof(matches) / sizeof(::regmatch_t), matches, 0))
381             {
382                 if (matches[1].rm_so != -1)
383                     m_opcode_name.assign(out_string + matches[1].rm_so, matches[1].rm_eo - matches[1].rm_so);
384                 if (matches[2].rm_so != -1)
385                     m_mnemonics.assign(out_string + matches[2].rm_so, matches[2].rm_eo - matches[2].rm_so);
386             }
387         }
388     }
389 
390     bool
IsValid() const391     IsValid () const
392     {
393         return m_is_valid;
394     }
395 
396     bool
UsingFileAddress() const397     UsingFileAddress() const
398     {
399         return m_using_file_addr;
400     }
401     size_t
GetByteSize() const402     GetByteSize () const
403     {
404         return m_opcode.GetByteSize();
405     }
406 
407     DisassemblerLLVMC &
GetDisassemblerLLVMC()408     GetDisassemblerLLVMC ()
409     {
410         return *(DisassemblerLLVMC *)m_disasm_sp.get();
411     }
412 protected:
413 
414     DisassemblerSP          m_disasm_sp; // for ownership
415     LazyBool                m_does_branch;
416     bool                    m_is_valid;
417     bool                    m_using_file_addr;
418 
419     static bool             s_regex_compiled;
420     static ::regex_t        s_regex;
421 };
422 
423 bool InstructionLLVMC::s_regex_compiled = false;
424 ::regex_t InstructionLLVMC::s_regex;
425 
LLVMCDisassembler(const char * triple,unsigned flavor,DisassemblerLLVMC & owner)426 DisassemblerLLVMC::LLVMCDisassembler::LLVMCDisassembler (const char *triple, unsigned flavor, DisassemblerLLVMC &owner):
427     m_is_valid(true)
428 {
429     std::string Error;
430     const llvm::Target *curr_target = llvm::TargetRegistry::lookupTarget(triple, Error);
431     if (!curr_target)
432     {
433         m_is_valid = false;
434         return;
435     }
436 
437     m_instr_info_ap.reset(curr_target->createMCInstrInfo());
438     m_reg_info_ap.reset (curr_target->createMCRegInfo(triple));
439 
440     std::string features_str;
441 
442     m_subtarget_info_ap.reset(curr_target->createMCSubtargetInfo(triple, "",
443                                                                 features_str));
444 
445     m_asm_info_ap.reset(curr_target->createMCAsmInfo(*curr_target->createMCRegInfo(triple), triple));
446 
447     if (m_instr_info_ap.get() == NULL || m_reg_info_ap.get() == NULL || m_subtarget_info_ap.get() == NULL || m_asm_info_ap.get() == NULL)
448     {
449         m_is_valid = false;
450         return;
451     }
452 
453     m_context_ap.reset(new llvm::MCContext(m_asm_info_ap.get(), m_reg_info_ap.get(), 0));
454 
455     m_disasm_ap.reset(curr_target->createMCDisassembler(*m_subtarget_info_ap.get()));
456     if (m_disasm_ap.get() && m_context_ap.get())
457     {
458         llvm::OwningPtr<llvm::MCRelocationInfo> RelInfo(curr_target->createMCRelocationInfo(triple, *m_context_ap.get()));
459         if (!RelInfo)
460         {
461             m_is_valid = false;
462             return;
463         }
464         m_disasm_ap->setupForSymbolicDisassembly(NULL,
465                                                  DisassemblerLLVMC::SymbolLookupCallback,
466                                                  (void *) &owner,
467                                                  m_context_ap.get(),
468                                                  RelInfo);
469 
470         unsigned asm_printer_variant;
471         if (flavor == ~0U)
472             asm_printer_variant = m_asm_info_ap->getAssemblerDialect();
473         else
474         {
475             asm_printer_variant = flavor;
476         }
477 
478         m_instr_printer_ap.reset(curr_target->createMCInstPrinter(asm_printer_variant,
479                                                                   *m_asm_info_ap.get(),
480                                                                   *m_instr_info_ap.get(),
481                                                                   *m_reg_info_ap.get(),
482                                                                   *m_subtarget_info_ap.get()));
483         if (m_instr_printer_ap.get() == NULL)
484         {
485             m_disasm_ap.reset();
486             m_is_valid = false;
487         }
488     }
489     else
490         m_is_valid = false;
491 }
492 
~LLVMCDisassembler()493 DisassemblerLLVMC::LLVMCDisassembler::~LLVMCDisassembler()
494 {
495 }
496 
497 namespace {
498     // This is the memory object we use in GetInstruction.
499     class LLDBDisasmMemoryObject : public llvm::MemoryObject {
500       const uint8_t *m_bytes;
501       uint64_t m_size;
502       uint64_t m_base_PC;
503     public:
LLDBDisasmMemoryObject(const uint8_t * bytes,uint64_t size,uint64_t basePC)504       LLDBDisasmMemoryObject(const uint8_t *bytes, uint64_t size, uint64_t basePC) :
505                          m_bytes(bytes), m_size(size), m_base_PC(basePC) {}
506 
getBase() const507       uint64_t getBase() const { return m_base_PC; }
getExtent() const508       uint64_t getExtent() const { return m_size; }
509 
readByte(uint64_t addr,uint8_t * byte) const510       int readByte(uint64_t addr, uint8_t *byte) const {
511         if (addr - m_base_PC >= m_size)
512           return -1;
513         *byte = m_bytes[addr - m_base_PC];
514         return 0;
515       }
516     };
517 } // End Anonymous Namespace
518 
519 uint64_t
GetMCInst(const uint8_t * opcode_data,size_t opcode_data_len,lldb::addr_t pc,llvm::MCInst & mc_inst)520 DisassemblerLLVMC::LLVMCDisassembler::GetMCInst (const uint8_t *opcode_data,
521                                                  size_t opcode_data_len,
522                                                  lldb::addr_t pc,
523                                                  llvm::MCInst &mc_inst)
524 {
525     LLDBDisasmMemoryObject memory_object (opcode_data, opcode_data_len, pc);
526     llvm::MCDisassembler::DecodeStatus status;
527 
528     uint64_t new_inst_size;
529     status = m_disasm_ap->getInstruction(mc_inst,
530                                          new_inst_size,
531                                          memory_object,
532                                          pc,
533                                          llvm::nulls(),
534                                          llvm::nulls());
535     if (status == llvm::MCDisassembler::Success)
536         return new_inst_size;
537     else
538         return 0;
539 }
540 
541 uint64_t
PrintMCInst(llvm::MCInst & mc_inst,char * dst,size_t dst_len)542 DisassemblerLLVMC::LLVMCDisassembler::PrintMCInst (llvm::MCInst &mc_inst,
543                                                    char *dst,
544                                                    size_t dst_len)
545 {
546     llvm::StringRef unused_annotations;
547     llvm::SmallString<64> inst_string;
548     llvm::raw_svector_ostream inst_stream(inst_string);
549     m_instr_printer_ap->printInst (&mc_inst, inst_stream, unused_annotations);
550     inst_stream.flush();
551     const size_t output_size = std::min(dst_len - 1, inst_string.size());
552     std::memcpy(dst, inst_string.data(), output_size);
553     dst[output_size] = '\0';
554 
555     return output_size;
556 }
557 
558 void
SetStyle(bool use_hex_immed,HexImmediateStyle hex_style)559 DisassemblerLLVMC::LLVMCDisassembler::SetStyle (bool use_hex_immed, HexImmediateStyle hex_style)
560 {
561     m_instr_printer_ap->setPrintImmHex(use_hex_immed);
562     switch(hex_style)
563     {
564     case eHexStyleC:      m_instr_printer_ap->setPrintImmHex(llvm::HexStyle::C); break;
565     case eHexStyleAsm:    m_instr_printer_ap->setPrintImmHex(llvm::HexStyle::Asm); break;
566     }
567 }
568 
569 bool
CanBranch(llvm::MCInst & mc_inst)570 DisassemblerLLVMC::LLVMCDisassembler::CanBranch (llvm::MCInst &mc_inst)
571 {
572     return m_instr_info_ap->get(mc_inst.getOpcode()).mayAffectControlFlow(mc_inst, *m_reg_info_ap.get());
573 }
574 
575 bool
FlavorValidForArchSpec(const lldb_private::ArchSpec & arch,const char * flavor)576 DisassemblerLLVMC::FlavorValidForArchSpec (const lldb_private::ArchSpec &arch, const char *flavor)
577 {
578     llvm::Triple triple = arch.GetTriple();
579     if (flavor == NULL || strcmp (flavor, "default") == 0)
580         return true;
581 
582     if (triple.getArch() == llvm::Triple::x86 || triple.getArch() == llvm::Triple::x86_64)
583     {
584         if (strcmp (flavor, "intel") == 0 || strcmp (flavor, "att") == 0)
585             return true;
586         else
587             return false;
588     }
589     else
590         return false;
591 }
592 
593 
594 Disassembler *
CreateInstance(const ArchSpec & arch,const char * flavor)595 DisassemblerLLVMC::CreateInstance (const ArchSpec &arch, const char *flavor)
596 {
597     if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch)
598     {
599         std::unique_ptr<DisassemblerLLVMC> disasm_ap (new DisassemblerLLVMC(arch, flavor));
600 
601         if (disasm_ap.get() && disasm_ap->IsValid())
602             return disasm_ap.release();
603     }
604     return NULL;
605 }
606 
DisassemblerLLVMC(const ArchSpec & arch,const char * flavor_string)607 DisassemblerLLVMC::DisassemblerLLVMC (const ArchSpec &arch, const char *flavor_string) :
608     Disassembler(arch, flavor_string),
609     m_exe_ctx (NULL),
610     m_inst (NULL),
611     m_data_from_file (false)
612 {
613     if (!FlavorValidForArchSpec (arch, m_flavor.c_str()))
614     {
615         m_flavor.assign("default");
616     }
617 
618     const char *triple = arch.GetTriple().getTriple().c_str();
619     unsigned flavor = ~0U;
620 
621     // So far the only supported flavor is "intel" on x86.  The base class will set this
622     // correctly coming in.
623     if (arch.GetTriple().getArch() == llvm::Triple::x86
624         || arch.GetTriple().getArch() == llvm::Triple::x86_64)
625     {
626         if (m_flavor == "intel")
627         {
628             flavor = 1;
629         }
630         else if (m_flavor == "att")
631         {
632             flavor = 0;
633         }
634     }
635 
636     ArchSpec thumb_arch(arch);
637     if (arch.GetTriple().getArch() == llvm::Triple::arm)
638     {
639         std::string thumb_arch_name (thumb_arch.GetTriple().getArchName().str());
640         // Replace "arm" with "thumb" so we get all thumb variants correct
641         if (thumb_arch_name.size() > 3)
642         {
643             thumb_arch_name.erase(0,3);
644             thumb_arch_name.insert(0, "thumb");
645         }
646         else
647         {
648             thumb_arch_name = "thumbv7";
649         }
650         thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name.c_str()));
651     }
652 
653     // Cortex-M3 devices (e.g. armv7m) can only execute thumb (T2) instructions,
654     // so hardcode the primary disassembler to thumb mode.
655     if (arch.GetTriple().getArch() == llvm::Triple::arm
656         && (arch.GetCore() == ArchSpec::Core::eCore_arm_armv7m || arch.GetCore() == ArchSpec::Core::eCore_arm_armv7em))
657     {
658         triple = thumb_arch.GetTriple().getTriple().c_str();
659     }
660 
661     m_disasm_ap.reset (new LLVMCDisassembler(triple, flavor, *this));
662     if (!m_disasm_ap->IsValid())
663     {
664         // We use m_disasm_ap.get() to tell whether we are valid or not, so if this isn't good for some reason,
665         // we reset it, and then we won't be valid and FindPlugin will fail and we won't get used.
666         m_disasm_ap.reset();
667     }
668 
669     // For arm CPUs that can execute arm or thumb instructions, also create a thumb instruction disassembler.
670     if (arch.GetTriple().getArch() == llvm::Triple::arm)
671     {
672         std::string thumb_triple(thumb_arch.GetTriple().getTriple());
673         m_alternate_disasm_ap.reset(new LLVMCDisassembler(thumb_triple.c_str(), flavor, *this));
674         if (!m_alternate_disasm_ap->IsValid())
675         {
676             m_disasm_ap.reset();
677             m_alternate_disasm_ap.reset();
678         }
679     }
680 }
681 
~DisassemblerLLVMC()682 DisassemblerLLVMC::~DisassemblerLLVMC()
683 {
684 }
685 
686 size_t
DecodeInstructions(const Address & base_addr,const DataExtractor & data,lldb::offset_t data_offset,size_t num_instructions,bool append,bool data_from_file)687 DisassemblerLLVMC::DecodeInstructions (const Address &base_addr,
688                                        const DataExtractor& data,
689                                        lldb::offset_t data_offset,
690                                        size_t num_instructions,
691                                        bool append,
692                                        bool data_from_file)
693 {
694     if (!append)
695         m_instruction_list.Clear();
696 
697     if (!IsValid())
698         return 0;
699 
700     m_data_from_file = data_from_file;
701     uint32_t data_cursor = data_offset;
702     const size_t data_byte_size = data.GetByteSize();
703     uint32_t instructions_parsed = 0;
704     Address inst_addr(base_addr);
705 
706     while (data_cursor < data_byte_size && instructions_parsed < num_instructions)
707     {
708 
709         AddressClass address_class = eAddressClassCode;
710 
711         if (m_alternate_disasm_ap.get() != NULL)
712             address_class = inst_addr.GetAddressClass ();
713 
714         InstructionSP inst_sp(new InstructionLLVMC(*this,
715                                                    inst_addr,
716                                                    address_class));
717 
718         if (!inst_sp)
719             break;
720 
721         uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
722 
723         if (inst_size == 0)
724             break;
725 
726         m_instruction_list.Append(inst_sp);
727         data_cursor += inst_size;
728         inst_addr.Slide(inst_size);
729         instructions_parsed++;
730     }
731 
732     return data_cursor - data_offset;
733 }
734 
735 void
Initialize()736 DisassemblerLLVMC::Initialize()
737 {
738     PluginManager::RegisterPlugin (GetPluginNameStatic(),
739                                    "Disassembler that uses LLVM MC to disassemble i386, x86_64 and ARM.",
740                                    CreateInstance);
741 
742     llvm::InitializeAllTargetInfos();
743     llvm::InitializeAllTargetMCs();
744     llvm::InitializeAllAsmParsers();
745     llvm::InitializeAllDisassemblers();
746 }
747 
748 void
Terminate()749 DisassemblerLLVMC::Terminate()
750 {
751     PluginManager::UnregisterPlugin (CreateInstance);
752 }
753 
754 
755 ConstString
GetPluginNameStatic()756 DisassemblerLLVMC::GetPluginNameStatic()
757 {
758     static ConstString g_name("llvm-mc");
759     return g_name;
760 }
761 
OpInfoCallback(void * disassembler,uint64_t pc,uint64_t offset,uint64_t size,int tag_type,void * tag_bug)762 int DisassemblerLLVMC::OpInfoCallback (void *disassembler,
763                                        uint64_t pc,
764                                        uint64_t offset,
765                                        uint64_t size,
766                                        int tag_type,
767                                        void *tag_bug)
768 {
769     return static_cast<DisassemblerLLVMC*>(disassembler)->OpInfo (pc,
770                                                                   offset,
771                                                                   size,
772                                                                   tag_type,
773                                                                   tag_bug);
774 }
775 
SymbolLookupCallback(void * disassembler,uint64_t value,uint64_t * type,uint64_t pc,const char ** name)776 const char *DisassemblerLLVMC::SymbolLookupCallback (void *disassembler,
777                                                      uint64_t value,
778                                                      uint64_t *type,
779                                                      uint64_t pc,
780                                                      const char **name)
781 {
782     return static_cast<DisassemblerLLVMC*>(disassembler)->SymbolLookup(value,
783                                                                        type,
784                                                                        pc,
785                                                                        name);
786 }
787 
OpInfo(uint64_t PC,uint64_t Offset,uint64_t Size,int tag_type,void * tag_bug)788 int DisassemblerLLVMC::OpInfo (uint64_t PC,
789                                uint64_t Offset,
790                                uint64_t Size,
791                                int tag_type,
792                                void *tag_bug)
793 {
794     switch (tag_type)
795     {
796     default:
797         break;
798     case 1:
799         bzero (tag_bug, sizeof(::LLVMOpInfo1));
800         break;
801     }
802     return 0;
803 }
804 
SymbolLookup(uint64_t value,uint64_t * type_ptr,uint64_t pc,const char ** name)805 const char *DisassemblerLLVMC::SymbolLookup (uint64_t value,
806                                              uint64_t *type_ptr,
807                                              uint64_t pc,
808                                              const char **name)
809 {
810     if (*type_ptr)
811     {
812         if (m_exe_ctx && m_inst)
813         {
814             //std::string remove_this_prior_to_checkin;
815             Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : NULL;
816             Address value_so_addr;
817             if (m_inst->UsingFileAddress())
818             {
819                 ModuleSP module_sp(m_inst->GetAddress().GetModule());
820                 if (module_sp)
821                     module_sp->ResolveFileAddress(value, value_so_addr);
822             }
823             else if (target && !target->GetSectionLoadList().IsEmpty())
824             {
825                 target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr);
826             }
827 
828             if (value_so_addr.IsValid() && value_so_addr.GetSection())
829             {
830                 StreamString ss;
831 
832                 value_so_addr.Dump (&ss,
833                                     target,
834                                     Address::DumpStyleResolvedDescriptionNoModule,
835                                     Address::DumpStyleSectionNameOffset);
836 
837                 if (!ss.GetString().empty())
838                 {
839                     m_inst->AppendComment(ss.GetString());
840                 }
841             }
842         }
843     }
844 
845     *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
846     *name = NULL;
847     return NULL;
848 }
849 
850 //------------------------------------------------------------------
851 // PluginInterface protocol
852 //------------------------------------------------------------------
853 ConstString
GetPluginName()854 DisassemblerLLVMC::GetPluginName()
855 {
856     return GetPluginNameStatic();
857 }
858 
859 uint32_t
GetPluginVersion()860 DisassemblerLLVMC::GetPluginVersion()
861 {
862     return 1;
863 }
864 
865