1 //===-- SymbolizableObjectFile.cpp ----------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Implementation of SymbolizableObjectFile class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SymbolizableObjectFile.h"
15 #include "llvm/Object/COFF.h"
16 #include "llvm/Object/SymbolSize.h"
17 #include "llvm/Support/DataExtractor.h"
18 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
19 
20 namespace llvm {
21 namespace symbolize {
22 
23 using namespace object;
24 
25 static DILineInfoSpecifier
getDILineInfoSpecifier(FunctionNameKind FNKind)26 getDILineInfoSpecifier(FunctionNameKind FNKind) {
27   return DILineInfoSpecifier(
28       DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind);
29 }
30 
31 ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
create(object::ObjectFile * Obj,std::unique_ptr<DIContext> DICtx)32 SymbolizableObjectFile::create(object::ObjectFile *Obj,
33                                std::unique_ptr<DIContext> DICtx) {
34   std::unique_ptr<SymbolizableObjectFile> res(
35       new SymbolizableObjectFile(Obj, std::move(DICtx)));
36   std::unique_ptr<DataExtractor> OpdExtractor;
37   uint64_t OpdAddress = 0;
38   // Find the .opd (function descriptor) section if any, for big-endian
39   // PowerPC64 ELF.
40   if (Obj->getArch() == Triple::ppc64) {
41     for (section_iterator Section : Obj->sections()) {
42       StringRef Name;
43       StringRef Data;
44       if (auto EC = Section->getName(Name))
45         return EC;
46       if (Name == ".opd") {
47         if (auto EC = Section->getContents(Data))
48           return EC;
49         OpdExtractor.reset(new DataExtractor(Data, Obj->isLittleEndian(),
50                                              Obj->getBytesInAddress()));
51         OpdAddress = Section->getAddress();
52         break;
53       }
54     }
55   }
56   std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
57       computeSymbolSizes(*Obj);
58   for (auto &P : Symbols)
59     res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
60 
61   // If this is a COFF object and we didn't find any symbols, try the export
62   // table.
63   if (Symbols.empty()) {
64     if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
65       if (auto EC = res->addCoffExportSymbols(CoffObj))
66         return EC;
67   }
68   return std::move(res);
69 }
70 
SymbolizableObjectFile(ObjectFile * Obj,std::unique_ptr<DIContext> DICtx)71 SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj,
72                                                std::unique_ptr<DIContext> DICtx)
73     : Module(Obj), DebugInfoContext(std::move(DICtx)) {}
74 
75 namespace {
76 struct OffsetNamePair {
77   uint32_t Offset;
78   StringRef Name;
operator <llvm::symbolize::__anon3715c9e50111::OffsetNamePair79   bool operator<(const OffsetNamePair &R) const {
80     return Offset < R.Offset;
81   }
82 };
83 }
84 
addCoffExportSymbols(const COFFObjectFile * CoffObj)85 std::error_code SymbolizableObjectFile::addCoffExportSymbols(
86     const COFFObjectFile *CoffObj) {
87   // Get all export names and offsets.
88   std::vector<OffsetNamePair> ExportSyms;
89   for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
90     StringRef Name;
91     uint32_t Offset;
92     if (auto EC = Ref.getSymbolName(Name))
93       return EC;
94     if (auto EC = Ref.getExportRVA(Offset))
95       return EC;
96     ExportSyms.push_back(OffsetNamePair{Offset, Name});
97   }
98   if (ExportSyms.empty())
99     return std::error_code();
100 
101   // Sort by ascending offset.
102   array_pod_sort(ExportSyms.begin(), ExportSyms.end());
103 
104   // Approximate the symbol sizes by assuming they run to the next symbol.
105   // FIXME: This assumes all exports are functions.
106   uint64_t ImageBase = CoffObj->getImageBase();
107   for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
108     OffsetNamePair &Export = *I;
109     // FIXME: The last export has a one byte size now.
110     uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
111     uint64_t SymbolStart = ImageBase + Export.Offset;
112     uint64_t SymbolSize = NextOffset - Export.Offset;
113     SymbolDesc SD = {SymbolStart, SymbolSize};
114     Functions.insert(std::make_pair(SD, Export.Name));
115   }
116   return std::error_code();
117 }
118 
addSymbol(const SymbolRef & Symbol,uint64_t SymbolSize,DataExtractor * OpdExtractor,uint64_t OpdAddress)119 std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
120                                                   uint64_t SymbolSize,
121                                                   DataExtractor *OpdExtractor,
122                                                   uint64_t OpdAddress) {
123   Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
124   if (!SymbolTypeOrErr)
125     return errorToErrorCode(SymbolTypeOrErr.takeError());
126   SymbolRef::Type SymbolType = *SymbolTypeOrErr;
127   if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
128     return std::error_code();
129   Expected<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
130   if (!SymbolAddressOrErr)
131     return errorToErrorCode(SymbolAddressOrErr.takeError());
132   uint64_t SymbolAddress = *SymbolAddressOrErr;
133   if (OpdExtractor) {
134     // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
135     // function descriptors. The first word of the descriptor is a pointer to
136     // the function's code.
137     // For the purposes of symbolization, pretend the symbol's address is that
138     // of the function's code, not the descriptor.
139     uint64_t OpdOffset = SymbolAddress - OpdAddress;
140     uint32_t OpdOffset32 = OpdOffset;
141     if (OpdOffset == OpdOffset32 &&
142         OpdExtractor->isValidOffsetForAddress(OpdOffset32))
143       SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
144   }
145   Expected<StringRef> SymbolNameOrErr = Symbol.getName();
146   if (!SymbolNameOrErr)
147     return errorToErrorCode(SymbolNameOrErr.takeError());
148   StringRef SymbolName = *SymbolNameOrErr;
149   // Mach-O symbol table names have leading underscore, skip it.
150   if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
151     SymbolName = SymbolName.drop_front();
152   // FIXME: If a function has alias, there are two entries in symbol table
153   // with same address size. Make sure we choose the correct one.
154   auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
155   SymbolDesc SD = { SymbolAddress, SymbolSize };
156   M.insert(std::make_pair(SD, SymbolName));
157   return std::error_code();
158 }
159 
160 // Return true if this is a 32-bit x86 PE COFF module.
isWin32Module() const161 bool SymbolizableObjectFile::isWin32Module() const {
162   auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
163   return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
164 }
165 
getModulePreferredBase() const166 uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
167   if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
168     return CoffObject->getImageBase();
169   return 0;
170 }
171 
getNameFromSymbolTable(SymbolRef::Type Type,uint64_t Address,std::string & Name,uint64_t & Addr,uint64_t & Size) const172 bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
173                                                     uint64_t Address,
174                                                     std::string &Name,
175                                                     uint64_t &Addr,
176                                                     uint64_t &Size) const {
177   const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects;
178   if (SymbolMap.empty())
179     return false;
180   SymbolDesc SD = { Address, Address };
181   auto SymbolIterator = SymbolMap.upper_bound(SD);
182   if (SymbolIterator == SymbolMap.begin())
183     return false;
184   --SymbolIterator;
185   if (SymbolIterator->first.Size != 0 &&
186       SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
187     return false;
188   Name = SymbolIterator->second.str();
189   Addr = SymbolIterator->first.Addr;
190   Size = SymbolIterator->first.Size;
191   return true;
192 }
193 
shouldOverrideWithSymbolTable(FunctionNameKind FNKind,bool UseSymbolTable) const194 bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
195     FunctionNameKind FNKind, bool UseSymbolTable) const {
196   // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
197   // better answers for linkage names than the DIContext. Otherwise, we are
198   // probably using PEs and PDBs, and we shouldn't do the override. PE files
199   // generally only contain the names of exported symbols.
200   return FNKind == FunctionNameKind::LinkageName && UseSymbolTable &&
201          isa<DWARFContext>(DebugInfoContext.get());
202 }
203 
symbolizeCode(uint64_t ModuleOffset,FunctionNameKind FNKind,bool UseSymbolTable) const204 DILineInfo SymbolizableObjectFile::symbolizeCode(uint64_t ModuleOffset,
205                                                  FunctionNameKind FNKind,
206                                                  bool UseSymbolTable) const {
207   DILineInfo LineInfo;
208   if (DebugInfoContext) {
209     LineInfo = DebugInfoContext->getLineInfoForAddress(
210         ModuleOffset, getDILineInfoSpecifier(FNKind));
211   }
212   // Override function name from symbol table if necessary.
213   if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
214     std::string FunctionName;
215     uint64_t Start, Size;
216     if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
217                                FunctionName, Start, Size)) {
218       LineInfo.FunctionName = FunctionName;
219     }
220   }
221   return LineInfo;
222 }
223 
symbolizeInlinedCode(uint64_t ModuleOffset,FunctionNameKind FNKind,bool UseSymbolTable) const224 DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
225     uint64_t ModuleOffset, FunctionNameKind FNKind, bool UseSymbolTable) const {
226   DIInliningInfo InlinedContext;
227 
228   if (DebugInfoContext)
229     InlinedContext = DebugInfoContext->getInliningInfoForAddress(
230         ModuleOffset, getDILineInfoSpecifier(FNKind));
231   // Make sure there is at least one frame in context.
232   if (InlinedContext.getNumberOfFrames() == 0)
233     InlinedContext.addFrame(DILineInfo());
234 
235   // Override the function name in lower frame with name from symbol table.
236   if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
237     std::string FunctionName;
238     uint64_t Start, Size;
239     if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
240                                FunctionName, Start, Size)) {
241       InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
242           ->FunctionName = FunctionName;
243     }
244   }
245 
246   return InlinedContext;
247 }
248 
symbolizeData(uint64_t ModuleOffset) const249 DIGlobal SymbolizableObjectFile::symbolizeData(uint64_t ModuleOffset) const {
250   DIGlobal Res;
251   getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Res.Name, Res.Start,
252                          Res.Size);
253   return Res;
254 }
255 
256 }  // namespace symbolize
257 }  // namespace llvm
258 
259