1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InputFile.h"
11 
12 #include "FormatUtil.h"
13 #include "LinePrinter.h"
14 
15 #include "llvm/BinaryFormat/Magic.h"
16 #include "llvm/DebugInfo/CodeView/CodeView.h"
17 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
18 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
19 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
23 #include "llvm/DebugInfo/PDB/Native/RawError.h"
24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
25 #include "llvm/DebugInfo/PDB/PDB.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/FormatVariadic.h"
29 
30 using namespace llvm;
31 using namespace llvm::codeview;
32 using namespace llvm::object;
33 using namespace llvm::pdb;
34 
InputFile()35 InputFile::InputFile() {}
~InputFile()36 InputFile::~InputFile() {}
37 
38 static Expected<ModuleDebugStreamRef>
getModuleDebugStream(PDBFile & File,StringRef & ModuleName,uint32_t Index)39 getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
40   ExitOnError Err("Unexpected error: ");
41 
42   auto &Dbi = Err(File.getPDBDbiStream());
43   const auto &Modules = Dbi.modules();
44   auto Modi = Modules.getModuleDescriptor(Index);
45 
46   ModuleName = Modi.getModuleName();
47 
48   uint16_t ModiStream = Modi.getModuleStreamIndex();
49   if (ModiStream == kInvalidStreamIndex)
50     return make_error<RawError>(raw_error_code::no_stream,
51                                 "Module stream not present");
52 
53   auto ModStreamData = File.createIndexedStream(ModiStream);
54 
55   ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
56   if (auto EC = ModS.reload())
57     return make_error<RawError>(raw_error_code::corrupt_file,
58                                 "Invalid module stream");
59 
60   return std::move(ModS);
61 }
62 
isCodeViewDebugSubsection(object::SectionRef Section,StringRef Name,BinaryStreamReader & Reader)63 static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
64                                              StringRef Name,
65                                              BinaryStreamReader &Reader) {
66   StringRef SectionName, Contents;
67   if (Section.getName(SectionName))
68     return false;
69 
70   if (SectionName != Name)
71     return false;
72 
73   if (Section.getContents(Contents))
74     return false;
75 
76   Reader = BinaryStreamReader(Contents, support::little);
77   uint32_t Magic;
78   if (Reader.bytesRemaining() < sizeof(uint32_t))
79     return false;
80   cantFail(Reader.readInteger(Magic));
81   if (Magic != COFF::DEBUG_SECTION_MAGIC)
82     return false;
83   return true;
84 }
85 
isDebugSSection(object::SectionRef Section,DebugSubsectionArray & Subsections)86 static inline bool isDebugSSection(object::SectionRef Section,
87                                    DebugSubsectionArray &Subsections) {
88   BinaryStreamReader Reader;
89   if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
90     return false;
91 
92   cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
93   return true;
94 }
95 
isDebugTSection(SectionRef Section,CVTypeArray & Types)96 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
97   BinaryStreamReader Reader;
98   if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
99       !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
100     return false;
101   cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
102   return true;
103 }
104 
formatChecksumKind(FileChecksumKind Kind)105 static std::string formatChecksumKind(FileChecksumKind Kind) {
106   switch (Kind) {
107     RETURN_CASE(FileChecksumKind, None, "None");
108     RETURN_CASE(FileChecksumKind, MD5, "MD5");
109     RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
110     RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
111   }
112   return formatUnknownEnum(Kind);
113 }
114 
extractStringTable(PDBFile & File)115 static const DebugStringTableSubsectionRef &extractStringTable(PDBFile &File) {
116   return cantFail(File.getStringTable()).getStringTable();
117 }
118 
119 template <typename... Args>
formatInternal(LinePrinter & Printer,bool Append,Args &&...args)120 static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) {
121   if (Append)
122     Printer.format(std::forward<Args>(args)...);
123   else
124     Printer.formatLine(std::forward<Args>(args)...);
125 }
126 
SymbolGroup(InputFile * File,uint32_t GroupIndex)127 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
128   if (!File)
129     return;
130 
131   if (File->isPdb())
132     initializeForPdb(GroupIndex);
133   else {
134     Name = ".debug$S";
135     uint32_t I = 0;
136     for (const auto &S : File->obj().sections()) {
137       DebugSubsectionArray SS;
138       if (!isDebugSSection(S, SS))
139         continue;
140 
141       if (!SC.hasChecksums() || !SC.hasStrings())
142         SC.initialize(SS);
143 
144       if (I == GroupIndex)
145         Subsections = SS;
146 
147       if (SC.hasChecksums() && SC.hasStrings())
148         break;
149     }
150     rebuildChecksumMap();
151   }
152 }
153 
name() const154 StringRef SymbolGroup::name() const { return Name; }
155 
updateDebugS(const codeview::DebugSubsectionArray & SS)156 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
157   Subsections = SS;
158 }
159 
updatePdbModi(uint32_t Modi)160 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
161 
initializeForPdb(uint32_t Modi)162 void SymbolGroup::initializeForPdb(uint32_t Modi) {
163   assert(File && File->isPdb());
164 
165   // PDB always uses the same string table, but each module has its own
166   // checksums.  So we only set the strings if they're not already set.
167   if (!SC.hasStrings())
168     SC.setStrings(extractStringTable(File->pdb()));
169 
170   SC.resetChecksums();
171   auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
172   if (!MDS) {
173     consumeError(MDS.takeError());
174     return;
175   }
176 
177   DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
178   Subsections = DebugStream->getSubsectionsArray();
179   SC.initialize(Subsections);
180   rebuildChecksumMap();
181 }
182 
rebuildChecksumMap()183 void SymbolGroup::rebuildChecksumMap() {
184   if (!SC.hasChecksums())
185     return;
186 
187   for (const auto &Entry : SC.checksums()) {
188     auto S = SC.strings().getString(Entry.FileNameOffset);
189     if (!S)
190       continue;
191     ChecksumsByFile[*S] = Entry;
192   }
193 }
194 
getPdbModuleStream() const195 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
196   assert(File && File->isPdb() && DebugStream);
197   return *DebugStream;
198 }
199 
getNameFromStringTable(uint32_t Offset) const200 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
201   return SC.strings().getString(Offset);
202 }
203 
formatFromFileName(LinePrinter & Printer,StringRef File,bool Append) const204 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
205                                      bool Append) const {
206   auto FC = ChecksumsByFile.find(File);
207   if (FC == ChecksumsByFile.end()) {
208     formatInternal(Printer, Append, "- (no checksum) {0}", File);
209     return;
210   }
211 
212   formatInternal(Printer, Append, "- ({0}: {1}) {2}",
213                  formatChecksumKind(FC->getValue().Kind),
214                  toHex(FC->getValue().Checksum), File);
215 }
216 
formatFromChecksumsOffset(LinePrinter & Printer,uint32_t Offset,bool Append) const217 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
218                                             uint32_t Offset,
219                                             bool Append) const {
220   if (!SC.hasChecksums()) {
221     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
222     return;
223   }
224 
225   auto Iter = SC.checksums().getArray().at(Offset);
226   if (Iter == SC.checksums().getArray().end()) {
227     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
228     return;
229   }
230 
231   uint32_t FO = Iter->FileNameOffset;
232   auto ExpectedFile = getNameFromStringTable(FO);
233   if (!ExpectedFile) {
234     formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
235     consumeError(ExpectedFile.takeError());
236     return;
237   }
238   if (Iter->Kind == FileChecksumKind::None) {
239     formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
240   } else {
241     formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
242                    formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
243   }
244 }
245 
open(StringRef Path,bool AllowUnknownFile)246 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
247   InputFile IF;
248   if (!llvm::sys::fs::exists(Path))
249     return make_error<StringError>(formatv("File {0} not found", Path),
250                                    inconvertibleErrorCode());
251 
252   file_magic Magic;
253   if (auto EC = identify_magic(Path, Magic))
254     return make_error<StringError>(
255         formatv("Unable to identify file type for file {0}", Path), EC);
256 
257   if (Magic == file_magic::coff_object) {
258     Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
259     if (!BinaryOrErr)
260       return BinaryOrErr.takeError();
261 
262     IF.CoffObject = std::move(*BinaryOrErr);
263     IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
264     return std::move(IF);
265   }
266 
267   if (Magic == file_magic::pdb) {
268     std::unique_ptr<IPDBSession> Session;
269     if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
270       return std::move(Err);
271 
272     IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
273     IF.PdbOrObj = &IF.PdbSession->getPDBFile();
274 
275     return std::move(IF);
276   }
277 
278   if (!AllowUnknownFile)
279     return make_error<StringError>(
280         formatv("File {0} is not a supported file type", Path),
281         inconvertibleErrorCode());
282 
283   auto Result = MemoryBuffer::getFile(Path, -1LL, false);
284   if (!Result)
285     return make_error<StringError>(
286         formatv("File {0} could not be opened", Path), Result.getError());
287 
288   IF.UnknownFile = std::move(*Result);
289   IF.PdbOrObj = IF.UnknownFile.get();
290   return std::move(IF);
291 }
292 
pdb()293 PDBFile &InputFile::pdb() {
294   assert(isPdb());
295   return *PdbOrObj.get<PDBFile *>();
296 }
297 
pdb() const298 const PDBFile &InputFile::pdb() const {
299   assert(isPdb());
300   return *PdbOrObj.get<PDBFile *>();
301 }
302 
obj()303 object::COFFObjectFile &InputFile::obj() {
304   assert(isObj());
305   return *PdbOrObj.get<object::COFFObjectFile *>();
306 }
307 
obj() const308 const object::COFFObjectFile &InputFile::obj() const {
309   assert(isObj());
310   return *PdbOrObj.get<object::COFFObjectFile *>();
311 }
312 
unknown()313 MemoryBuffer &InputFile::unknown() {
314   assert(isUnknown());
315   return *PdbOrObj.get<MemoryBuffer *>();
316 }
317 
unknown() const318 const MemoryBuffer &InputFile::unknown() const {
319   assert(isUnknown());
320   return *PdbOrObj.get<MemoryBuffer *>();
321 }
322 
getFilePath() const323 StringRef InputFile::getFilePath() const {
324   if (isPdb())
325     return pdb().getFilePath();
326   if (isObj())
327     return obj().getFileName();
328   assert(isUnknown());
329   return unknown().getBufferIdentifier();
330 }
331 
hasTypes() const332 bool InputFile::hasTypes() const {
333   if (isPdb())
334     return pdb().hasPDBTpiStream();
335 
336   for (const auto &Section : obj().sections()) {
337     CVTypeArray Types;
338     if (isDebugTSection(Section, Types))
339       return true;
340   }
341   return false;
342 }
343 
hasIds() const344 bool InputFile::hasIds() const {
345   if (isObj())
346     return false;
347   return pdb().hasPDBIpiStream();
348 }
349 
isPdb() const350 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); }
351 
isObj() const352 bool InputFile::isObj() const {
353   return PdbOrObj.is<object::COFFObjectFile *>();
354 }
355 
isUnknown() const356 bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); }
357 
358 codeview::LazyRandomTypeCollection &
getOrCreateTypeCollection(TypeCollectionKind Kind)359 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
360   if (Types && Kind == kTypes)
361     return *Types;
362   if (Ids && Kind == kIds)
363     return *Ids;
364 
365   if (Kind == kIds) {
366     assert(isPdb() && pdb().hasPDBIpiStream());
367   }
368 
369   // If the collection was already initialized, we should have just returned it
370   // in step 1.
371   if (isPdb()) {
372     TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
373     auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
374                                            : pdb().getPDBTpiStream());
375 
376     auto &Array = Stream.typeArray();
377     uint32_t Count = Stream.getNumTypeRecords();
378     auto Offsets = Stream.getTypeIndexOffsets();
379     Collection =
380         llvm::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
381     return *Collection;
382   }
383 
384   assert(isObj());
385   assert(Kind == kTypes);
386   assert(!Types);
387 
388   for (const auto &Section : obj().sections()) {
389     CVTypeArray Records;
390     if (!isDebugTSection(Section, Records))
391       continue;
392 
393     Types = llvm::make_unique<LazyRandomTypeCollection>(Records, 100);
394     return *Types;
395   }
396 
397   Types = llvm::make_unique<LazyRandomTypeCollection>(100);
398   return *Types;
399 }
400 
types()401 codeview::LazyRandomTypeCollection &InputFile::types() {
402   return getOrCreateTypeCollection(kTypes);
403 }
404 
ids()405 codeview::LazyRandomTypeCollection &InputFile::ids() {
406   // Object files have only one type stream that contains both types and ids.
407   // Similarly, some PDBs don't contain an IPI stream, and for those both types
408   // and IDs are in the same stream.
409   if (isObj() || !pdb().hasPDBIpiStream())
410     return types();
411 
412   return getOrCreateTypeCollection(kIds);
413 }
414 
symbol_groups()415 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
416   return make_range<SymbolGroupIterator>(symbol_groups_begin(),
417                                          symbol_groups_end());
418 }
419 
symbol_groups_begin()420 SymbolGroupIterator InputFile::symbol_groups_begin() {
421   return SymbolGroupIterator(*this);
422 }
423 
symbol_groups_end()424 SymbolGroupIterator InputFile::symbol_groups_end() {
425   return SymbolGroupIterator();
426 }
427 
SymbolGroupIterator()428 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
429 
SymbolGroupIterator(InputFile & File)430 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
431   if (File.isObj()) {
432     SectionIter = File.obj().section_begin();
433     scanToNextDebugS();
434   }
435 }
436 
operator ==(const SymbolGroupIterator & R) const437 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
438   bool E = isEnd();
439   bool RE = R.isEnd();
440   if (E || RE)
441     return E == RE;
442 
443   if (Value.File != R.Value.File)
444     return false;
445   return Index == R.Index;
446 }
447 
operator *() const448 const SymbolGroup &SymbolGroupIterator::operator*() const {
449   assert(!isEnd());
450   return Value;
451 }
operator *()452 SymbolGroup &SymbolGroupIterator::operator*() {
453   assert(!isEnd());
454   return Value;
455 }
456 
operator ++()457 SymbolGroupIterator &SymbolGroupIterator::operator++() {
458   assert(Value.File && !isEnd());
459   ++Index;
460   if (isEnd())
461     return *this;
462 
463   if (Value.File->isPdb()) {
464     Value.updatePdbModi(Index);
465     return *this;
466   }
467 
468   scanToNextDebugS();
469   return *this;
470 }
471 
scanToNextDebugS()472 void SymbolGroupIterator::scanToNextDebugS() {
473   assert(SectionIter.hasValue());
474   auto End = Value.File->obj().section_end();
475   auto &Iter = *SectionIter;
476   assert(!isEnd());
477 
478   while (++Iter != End) {
479     DebugSubsectionArray SS;
480     SectionRef SR = *Iter;
481     if (!isDebugSSection(SR, SS))
482       continue;
483 
484     Value.updateDebugS(SS);
485     return;
486   }
487 }
488 
isEnd() const489 bool SymbolGroupIterator::isEnd() const {
490   if (!Value.File)
491     return true;
492   if (Value.File->isPdb()) {
493     auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
494     uint32_t Count = Dbi.modules().getModuleCount();
495     assert(Index <= Count);
496     return Index == Count;
497   }
498 
499   assert(SectionIter.hasValue());
500   return *SectionIter == Value.File->obj().section_end();
501 }
502