1 //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfiledBinary.h"
10 #include "ErrorHandling.h"
11 #include "llvm/ADT/Triple.h"
12 #include "llvm/Demangle/Demangle.h"
13 #include "llvm/Support/CommandLine.h"
14 #include "llvm/Support/Format.h"
15 #include "llvm/Support/TargetRegistry.h"
16 #include "llvm/Support/TargetSelect.h"
17 
18 #define DEBUG_TYPE "load-binary"
19 
20 using namespace llvm;
21 using namespace sampleprof;
22 
23 static cl::opt<bool> ShowDisassembly("show-disassembly", cl::ReallyHidden,
24                                      cl::init(false), cl::ZeroOrMore,
25                                      cl::desc("Print disassembled code."));
26 
27 static cl::opt<bool> ShowSourceLocations("show-source-locations",
28                                          cl::ReallyHidden, cl::init(false),
29                                          cl::ZeroOrMore,
30                                          cl::desc("Print source locations."));
31 
32 namespace llvm {
33 namespace sampleprof {
34 
getTarget(const ObjectFile * Obj)35 static const Target *getTarget(const ObjectFile *Obj) {
36   Triple TheTriple = Obj->makeTriple();
37   std::string Error;
38   std::string ArchName;
39   const Target *TheTarget =
40       TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
41   if (!TheTarget)
42     exitWithError(Error, Obj->getFileName());
43   return TheTarget;
44 }
45 
46 template <class ELFT>
getELFImageLMAForSec(const ELFFile<ELFT> & Obj,const object::ELFSectionRef & Sec,StringRef FileName)47 static uint64_t getELFImageLMAForSec(const ELFFile<ELFT> &Obj,
48                                      const object::ELFSectionRef &Sec,
49                                      StringRef FileName) {
50   // Search for a PT_LOAD segment containing the requested section. Return this
51   // segment's p_addr as the image load address for the section.
52   const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
53   for (const typename ELFT::Phdr &Phdr : PhdrRange)
54     if ((Phdr.p_type == ELF::PT_LOAD) && (Phdr.p_vaddr <= Sec.getAddress()) &&
55         (Phdr.p_vaddr + Phdr.p_memsz > Sec.getAddress()))
56       // Segments will always be loaded at a page boundary.
57       return Phdr.p_paddr & ~(Phdr.p_align - 1U);
58   return 0;
59 }
60 
61 // Get the image load address for a specific section. Note that an image is
62 // loaded by segments (a group of sections) and segments may not be consecutive
63 // in memory.
getELFImageLMAForSec(const object::ELFSectionRef & Sec)64 static uint64_t getELFImageLMAForSec(const object::ELFSectionRef &Sec) {
65   if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Sec.getObject()))
66     return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
67                                 ELFObj->getFileName());
68   else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Sec.getObject()))
69     return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
70                                 ELFObj->getFileName());
71   else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Sec.getObject()))
72     return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
73                                 ELFObj->getFileName());
74   const auto *ELFObj = cast<ELF64BEObjectFile>(Sec.getObject());
75   return getELFImageLMAForSec(ELFObj->getELFFile(), Sec, ELFObj->getFileName());
76 }
77 
load()78 void ProfiledBinary::load() {
79   // Attempt to open the binary.
80   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
81   Binary &Binary = *OBinary.getBinary();
82 
83   auto *Obj = dyn_cast<ELFObjectFileBase>(&Binary);
84   if (!Obj)
85     exitWithError("not a valid Elf image", Path);
86 
87   TheTriple = Obj->makeTriple();
88   // Current only support X86
89   if (!TheTriple.isX86())
90     exitWithError("unsupported target", TheTriple.getTriple());
91   LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
92 
93   // Find the preferred base address for text sections.
94   setPreferredBaseAddress(Obj);
95 
96   // Disassemble the text sections.
97   disassemble(Obj);
98 
99   // Use function start and return address to infer prolog and epilog
100   ProEpilogTracker.inferPrologOffsets(FuncStartAddrMap);
101   ProEpilogTracker.inferEpilogOffsets(RetAddrs);
102 
103   // TODO: decode other sections.
104 
105   return;
106 }
107 
inlineContextEqual(uint64_t Address1,uint64_t Address2) const108 bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
109                                         uint64_t Address2) const {
110   uint64_t Offset1 = virtualAddrToOffset(Address1);
111   uint64_t Offset2 = virtualAddrToOffset(Address2);
112   const FrameLocationStack &Context1 = getFrameLocationStack(Offset1);
113   const FrameLocationStack &Context2 = getFrameLocationStack(Offset2);
114   if (Context1.size() != Context2.size())
115     return false;
116 
117   // The leaf frame contains location within the leaf, and it
118   // needs to be remove that as it's not part of the calling context
119   return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
120                     Context2.begin(), Context2.begin() + Context2.size() - 1);
121 }
122 
123 std::string
getExpandedContextStr(const std::list<uint64_t> & Stack) const124 ProfiledBinary::getExpandedContextStr(const std::list<uint64_t> &Stack) const {
125   std::string ContextStr;
126   SmallVector<std::string, 8> ContextVec;
127   // Process from frame root to leaf
128   for (auto Iter = Stack.rbegin(); Iter != Stack.rend(); Iter++) {
129     uint64_t Offset = virtualAddrToOffset(*Iter);
130     const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset);
131     for (const auto &Loc : ExpandedContext) {
132       ContextVec.push_back(getCallSite(Loc));
133     }
134   }
135 
136   assert(ContextVec.size() && "Context length should be at least 1");
137 
138   std::ostringstream OContextStr;
139   for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) {
140     if (OContextStr.str().size()) {
141       OContextStr << " @ ";
142     }
143 
144     if (I == ContextVec.size() - 1) {
145       // Only keep the function name for the leaf frame
146       StringRef Ref(ContextVec[I]);
147       OContextStr << Ref.split(":").first.str();
148     } else {
149       OContextStr << ContextVec[I];
150     }
151   }
152 
153   return OContextStr.str();
154 }
155 
setPreferredBaseAddress(const ELFObjectFileBase * Obj)156 void ProfiledBinary::setPreferredBaseAddress(const ELFObjectFileBase *Obj) {
157   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
158        SI != SE; ++SI) {
159     const SectionRef &Section = *SI;
160     if (Section.isText()) {
161       PreferredBaseAddress = getELFImageLMAForSec(Section);
162       return;
163     }
164   }
165   exitWithError("no text section found", Obj->getFileName());
166 }
167 
dissassembleSymbol(std::size_t SI,ArrayRef<uint8_t> Bytes,SectionSymbolsTy & Symbols,const SectionRef & Section)168 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
169                                         SectionSymbolsTy &Symbols,
170                                         const SectionRef &Section) {
171 
172   std::size_t SE = Symbols.size();
173   uint64_t SectionOffset = Section.getAddress() - PreferredBaseAddress;
174   uint64_t SectSize = Section.getSize();
175   uint64_t StartOffset = Symbols[SI].Addr - PreferredBaseAddress;
176   uint64_t EndOffset = (SI + 1 < SE)
177                            ? Symbols[SI + 1].Addr - PreferredBaseAddress
178                            : SectionOffset + SectSize;
179   if (StartOffset >= EndOffset)
180     return true;
181 
182   std::string &&SymbolName = Symbols[SI].Name.str();
183   if (ShowDisassembly)
184     outs() << '<' << SymbolName << ">:\n";
185 
186   uint64_t Offset = StartOffset;
187   while (Offset < EndOffset) {
188     MCInst Inst;
189     uint64_t Size;
190     // Disassemble an instruction.
191     if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
192                                 Offset + PreferredBaseAddress, nulls()))
193       return false;
194 
195     if (ShowDisassembly) {
196       outs() << format("%8" PRIx64 ":", Offset);
197       size_t Start = outs().tell();
198       IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
199       if (ShowSourceLocations) {
200         unsigned Cur = outs().tell() - Start;
201         if (Cur < 40)
202           outs().indent(40 - Cur);
203         InstructionPointer Inst(this, Offset);
204         outs() << getReversedLocWithContext(symbolize(Inst));
205       }
206       outs() << "\n";
207     }
208 
209     const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
210 
211     // Populate a vector of the symbolized callsite at this location
212     InstructionPointer IP(this, Offset);
213     Offset2LocStackMap[Offset] = symbolize(IP, true);
214 
215     // Populate address maps.
216     CodeAddrs.push_back(Offset);
217     if (MCDesc.isCall())
218       CallAddrs.insert(Offset);
219     else if (MCDesc.isReturn())
220       RetAddrs.insert(Offset);
221 
222     Offset += Size;
223   }
224 
225   if (ShowDisassembly)
226     outs() << "\n";
227 
228   FuncStartAddrMap[StartOffset] = Symbols[SI].Name.str();
229   return true;
230 }
231 
setUpDisassembler(const ELFObjectFileBase * Obj)232 void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
233   const Target *TheTarget = getTarget(Obj);
234   std::string TripleName = TheTriple.getTriple();
235   StringRef FileName = Obj->getFileName();
236 
237   MRI.reset(TheTarget->createMCRegInfo(TripleName));
238   if (!MRI)
239     exitWithError("no register info for target " + TripleName, FileName);
240 
241   MCTargetOptions MCOptions;
242   AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
243   if (!AsmInfo)
244     exitWithError("no assembly info for target " + TripleName, FileName);
245 
246   SubtargetFeatures Features = Obj->getFeatures();
247   STI.reset(
248       TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString()));
249   if (!STI)
250     exitWithError("no subtarget info for target " + TripleName, FileName);
251 
252   MII.reset(TheTarget->createMCInstrInfo());
253   if (!MII)
254     exitWithError("no instruction info for target " + TripleName, FileName);
255 
256   MCObjectFileInfo MOFI;
257   MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
258   MOFI.InitMCObjectFileInfo(Triple(TripleName), false, Ctx);
259   DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
260   if (!DisAsm)
261     exitWithError("no disassembler for target " + TripleName, FileName);
262 
263   MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
264 
265   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
266   IPrinter.reset(TheTarget->createMCInstPrinter(
267       Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
268   IPrinter->setPrintBranchImmAsAddress(true);
269 }
270 
disassemble(const ELFObjectFileBase * Obj)271 void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
272   // Set up disassembler and related components.
273   setUpDisassembler(Obj);
274 
275   // Create a mapping from virtual address to symbol name. The symbols in text
276   // sections are the candidates to dissassemble.
277   std::map<SectionRef, SectionSymbolsTy> AllSymbols;
278   StringRef FileName = Obj->getFileName();
279   for (const SymbolRef &Symbol : Obj->symbols()) {
280     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
281     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
282     section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
283     if (SecI != Obj->section_end())
284       AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
285   }
286 
287   // Sort all the symbols. Use a stable sort to stabilize the output.
288   for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
289     stable_sort(SecSyms.second);
290 
291   if (ShowDisassembly)
292     outs() << "\nDisassembly of " << FileName << ":\n";
293 
294   // Dissassemble a text section.
295   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
296        SI != SE; ++SI) {
297     const SectionRef &Section = *SI;
298     if (!Section.isText())
299       continue;
300 
301     uint64_t ImageLoadAddr = PreferredBaseAddress;
302     uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr;
303     uint64_t SectSize = Section.getSize();
304     if (!SectSize)
305       continue;
306 
307     // Register the text section.
308     TextSections.insert({SectionOffset, SectSize});
309 
310     if (ShowDisassembly) {
311       StringRef SectionName = unwrapOrError(Section.getName(), FileName);
312       outs() << "\nDisassembly of section " << SectionName;
313       outs() << " [" << format("0x%" PRIx64, SectionOffset) << ", "
314              << format("0x%" PRIx64, SectionOffset + SectSize) << "]:\n\n";
315     }
316 
317     // Get the section data.
318     ArrayRef<uint8_t> Bytes =
319         arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
320 
321     // Get the list of all the symbols in this section.
322     SectionSymbolsTy &Symbols = AllSymbols[Section];
323 
324     // Disassemble symbol by symbol.
325     for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
326       if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
327         exitWithError("disassembling error", FileName);
328     }
329   }
330 }
331 
setupSymbolizer()332 void ProfiledBinary::setupSymbolizer() {
333   symbolize::LLVMSymbolizer::Options SymbolizerOpts;
334   SymbolizerOpts.PrintFunctions =
335       DILineInfoSpecifier::FunctionNameKind::LinkageName;
336   SymbolizerOpts.Demangle = false;
337   SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
338   SymbolizerOpts.UseSymbolTable = false;
339   SymbolizerOpts.RelativeAddresses = false;
340   Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
341 }
342 
symbolize(const InstructionPointer & IP,bool UseCanonicalFnName)343 FrameLocationStack ProfiledBinary::symbolize(const InstructionPointer &IP,
344                                              bool UseCanonicalFnName) {
345   assert(this == IP.Binary &&
346          "Binary should only symbolize its own instruction");
347   auto Addr = object::SectionedAddress{IP.Offset + PreferredBaseAddress,
348                                        object::SectionedAddress::UndefSection};
349   DIInliningInfo InlineStack =
350       unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName());
351 
352   FrameLocationStack CallStack;
353 
354   for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
355     const auto &CallerFrame = InlineStack.getFrame(I);
356     if (CallerFrame.FunctionName == "<invalid>")
357       break;
358     StringRef FunctionName(CallerFrame.FunctionName);
359     if (UseCanonicalFnName)
360       FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
361     LineLocation Line(CallerFrame.Line - CallerFrame.StartLine,
362                       CallerFrame.Discriminator);
363     FrameLocation Callsite(FunctionName.str(), Line);
364     CallStack.push_back(Callsite);
365   }
366 
367   return CallStack;
368 }
369 
InstructionPointer(ProfiledBinary * Binary,uint64_t Address,bool RoundToNext)370 InstructionPointer::InstructionPointer(ProfiledBinary *Binary, uint64_t Address,
371                                        bool RoundToNext)
372     : Binary(Binary), Address(Address) {
373   Index = Binary->getIndexForAddr(Address);
374   if (RoundToNext) {
375     // we might get address which is not the code
376     // it should round to the next valid address
377     this->Address = Binary->getAddressforIndex(Index);
378   }
379 }
380 
advance()381 void InstructionPointer::advance() {
382   Index++;
383   Address = Binary->getAddressforIndex(Index);
384 }
385 
backward()386 void InstructionPointer::backward() {
387   Index--;
388   Address = Binary->getAddressforIndex(Index);
389 }
390 
update(uint64_t Addr)391 void InstructionPointer::update(uint64_t Addr) {
392   Address = Addr;
393   Index = Binary->getIndexForAddr(Address);
394 }
395 
396 } // end namespace sampleprof
397 } // end namespace llvm
398