1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_COFF_INPUT_FILES_H
10 #define LLD_COFF_INPUT_FILES_H
11 
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/BinaryFormat/Magic.h"
18 #include "llvm/Object/Archive.h"
19 #include "llvm/Object/COFF.h"
20 #include "llvm/Support/StringSaver.h"
21 #include <memory>
22 #include <set>
23 #include <vector>
24 
25 namespace llvm {
26 struct DILineInfo;
27 namespace pdb {
28 class DbiModuleDescriptorBuilder;
29 class NativeSession;
30 }
31 namespace lto {
32 class InputFile;
33 }
34 }
35 
36 namespace lld {
37 class DWARFCache;
38 
39 namespace coff {
40 
41 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
42 
43 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
44 using llvm::COFF::MachineTypes;
45 using llvm::object::Archive;
46 using llvm::object::COFFObjectFile;
47 using llvm::object::COFFSymbolRef;
48 using llvm::object::coff_import_header;
49 using llvm::object::coff_section;
50 
51 class Chunk;
52 class Defined;
53 class DefinedImportData;
54 class DefinedImportThunk;
55 class DefinedRegular;
56 class SectionChunk;
57 class Symbol;
58 class Undefined;
59 class TpiSource;
60 
61 // The root class of input files.
62 class InputFile {
63 public:
64   enum Kind {
65     ArchiveKind,
66     ObjectKind,
67     LazyObjectKind,
68     PDBKind,
69     ImportKind,
70     BitcodeKind
71   };
kind()72   Kind kind() const { return fileKind; }
~InputFile()73   virtual ~InputFile() {}
74 
75   // Returns the filename.
getName()76   StringRef getName() const { return mb.getBufferIdentifier(); }
77 
78   // Reads a file (the constructor doesn't do that).
79   virtual void parse() = 0;
80 
81   // Returns the CPU type this file was compiled to.
getMachineType()82   virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
83 
84   MemoryBufferRef mb;
85 
86   // An archive file name if this file is created from an archive.
87   StringRef parentName;
88 
89   // Returns .drectve section contents if exist.
getDirectives()90   StringRef getDirectives() { return directives; }
91 
92 protected:
InputFile(Kind k,MemoryBufferRef m)93   InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
94 
95   StringRef directives;
96 
97 private:
98   const Kind fileKind;
99 };
100 
101 // .lib or .a file.
102 class ArchiveFile : public InputFile {
103 public:
104   explicit ArchiveFile(MemoryBufferRef m);
classof(const InputFile * f)105   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
106   void parse() override;
107 
108   // Enqueues an archive member load for the given symbol. If we've already
109   // enqueued a load for the same archive member, this function does nothing,
110   // which ensures that we don't load the same member more than once.
111   void addMember(const Archive::Symbol &sym);
112 
113 private:
114   std::unique_ptr<Archive> file;
115   llvm::DenseSet<uint64_t> seen;
116 };
117 
118 // .obj or .o file between -start-lib and -end-lib.
119 class LazyObjFile : public InputFile {
120 public:
LazyObjFile(MemoryBufferRef m)121   explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {}
classof(const InputFile * f)122   static bool classof(const InputFile *f) {
123     return f->kind() == LazyObjectKind;
124   }
125   // Makes this object file part of the link.
126   void fetch();
127   // Adds the symbols in this file to the symbol table as LazyObject symbols.
128   void parse() override;
129 
130 private:
131   std::vector<Symbol *> symbols;
132 };
133 
134 // .obj or .o file. This may be a member of an archive file.
135 class ObjFile : public InputFile {
136 public:
ObjFile(MemoryBufferRef m)137   explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {}
ObjFile(MemoryBufferRef m,std::vector<Symbol * > && symbols)138   explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols)
139       : InputFile(ObjectKind, m), symbols(std::move(symbols)) {}
classof(const InputFile * f)140   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
141   void parse() override;
142   MachineTypes getMachineType() override;
getChunks()143   ArrayRef<Chunk *> getChunks() { return chunks; }
getDebugChunks()144   ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
getSXDataChunks()145   ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
getGuardFidChunks()146   ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
getGuardIATChunks()147   ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
getGuardLJmpChunks()148   ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
getSymbols()149   ArrayRef<Symbol *> getSymbols() { return symbols; }
150 
getMutableSymbols()151   MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
152 
153   ArrayRef<uint8_t> getDebugSection(StringRef secName);
154 
155   // Returns a Symbol object for the symbolIndex'th symbol in the
156   // underlying object file.
getSymbol(uint32_t symbolIndex)157   Symbol *getSymbol(uint32_t symbolIndex) {
158     return symbols[symbolIndex];
159   }
160 
161   // Returns the underlying COFF file.
getCOFFObj()162   COFFObjectFile *getCOFFObj() { return coffObj.get(); }
163 
164   // Add a symbol for a range extension thunk. Return the new symbol table
165   // index. This index can be used to modify a relocation.
addRangeThunkSymbol(Symbol * thunk)166   uint32_t addRangeThunkSymbol(Symbol *thunk) {
167     symbols.push_back(thunk);
168     return symbols.size() - 1;
169   }
170 
171   void includeResourceChunks();
172 
isResourceObjFile()173   bool isResourceObjFile() const { return !resourceChunks.empty(); }
174 
175   static std::vector<ObjFile *> instances;
176 
177   // Flags in the absolute @feat.00 symbol if it is present. These usually
178   // indicate if an object was compiled with certain security features enabled
179   // like stack guard, safeseh, /guard:cf, or other things.
180   uint32_t feat00Flags = 0;
181 
182   // True if this object file is compatible with SEH.  COFF-specific and
183   // x86-only. COFF spec 5.10.1. The .sxdata section.
hasSafeSEH()184   bool hasSafeSEH() { return feat00Flags & 0x1; }
185 
186   // True if this file was compiled with /guard:cf.
hasGuardCF()187   bool hasGuardCF() { return feat00Flags & 0x800; }
188 
189   // Pointer to the PDB module descriptor builder. Various debug info records
190   // will reference object files by "module index", which is here. Things like
191   // source files and section contributions are also recorded here. Will be null
192   // if we are not producing a PDB.
193   llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
194 
195   const coff_section *addrsigSec = nullptr;
196 
197   const coff_section *callgraphSec = nullptr;
198 
199   // When using Microsoft precompiled headers, this is the PCH's key.
200   // The same key is used by both the precompiled object, and objects using the
201   // precompiled object. Any difference indicates out-of-date objects.
202   llvm::Optional<uint32_t> pchSignature;
203 
204   // Whether this file was compiled with /hotpatch.
205   bool hotPatchable = false;
206 
207   // Whether the object was already merged into the final PDB.
208   bool mergedIntoPDB = false;
209 
210   // If the OBJ has a .debug$T stream, this tells how it will be handled.
211   TpiSource *debugTypesObj = nullptr;
212 
213   // The .debug$P or .debug$T section data if present. Empty otherwise.
214   ArrayRef<uint8_t> debugTypes;
215 
216   llvm::Optional<std::pair<StringRef, uint32_t>>
217   getVariableLocation(StringRef var);
218 
219   llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
220                                                  uint32_t sectionIndex);
221 
222 private:
223   const coff_section* getSection(uint32_t i);
getSection(COFFSymbolRef sym)224   const coff_section *getSection(COFFSymbolRef sym) {
225     return getSection(sym.getSectionNumber());
226   }
227 
228   void initializeChunks();
229   void initializeSymbols();
230   void initializeFlags();
231   void initializeDependencies();
232 
233   SectionChunk *
234   readSection(uint32_t sectionNumber,
235               const llvm::object::coff_aux_section_definition *def,
236               StringRef leaderName);
237 
238   void readAssociativeDefinition(
239       COFFSymbolRef coffSym,
240       const llvm::object::coff_aux_section_definition *def);
241 
242   void readAssociativeDefinition(
243       COFFSymbolRef coffSym,
244       const llvm::object::coff_aux_section_definition *def,
245       uint32_t parentSection);
246 
247   void recordPrevailingSymbolForMingw(
248       COFFSymbolRef coffSym,
249       llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
250 
251   void maybeAssociateSEHForMingw(
252       COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
253       const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
254 
255   // Given a new symbol Sym with comdat selection Selection, if the new
256   // symbol is not (yet) Prevailing and the existing comdat leader set to
257   // Leader, emits a diagnostic if the new symbol and its selection doesn't
258   // match the existing symbol and its selection. If either old or new
259   // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
260   // the existing leader. In that case, Prevailing is set to true.
261   void
262   handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
263                         bool &prevailing, DefinedRegular *leader,
264                         const llvm::object::coff_aux_section_definition *def);
265 
266   llvm::Optional<Symbol *>
267   createDefined(COFFSymbolRef sym,
268                 std::vector<const llvm::object::coff_aux_section_definition *>
269                     &comdatDefs,
270                 bool &prevailingComdat);
271   Symbol *createRegular(COFFSymbolRef sym);
272   Symbol *createUndefined(COFFSymbolRef sym);
273 
274   std::unique_ptr<COFFObjectFile> coffObj;
275 
276   // List of all chunks defined by this file. This includes both section
277   // chunks and non-section chunks for common symbols.
278   std::vector<Chunk *> chunks;
279 
280   std::vector<SectionChunk *> resourceChunks;
281 
282   // CodeView debug info sections.
283   std::vector<SectionChunk *> debugChunks;
284 
285   // Chunks containing symbol table indices of exception handlers. Only used for
286   // 32-bit x86.
287   std::vector<SectionChunk *> sxDataChunks;
288 
289   // Chunks containing symbol table indices of address taken symbols, address
290   // taken IAT entries, and longjmp targets. These are not linked into the
291   // final binary when /guard:cf is set.
292   std::vector<SectionChunk *> guardFidChunks;
293   std::vector<SectionChunk *> guardIATChunks;
294   std::vector<SectionChunk *> guardLJmpChunks;
295 
296   // This vector contains a list of all symbols defined or referenced by this
297   // file. They are indexed such that you can get a Symbol by symbol
298   // index. Nonexistent indices (which are occupied by auxiliary
299   // symbols in the real symbol table) are filled with null pointers.
300   std::vector<Symbol *> symbols;
301 
302   // This vector contains the same chunks as Chunks, but they are
303   // indexed such that you can get a SectionChunk by section index.
304   // Nonexistent section indices are filled with null pointers.
305   // (Because section number is 1-based, the first slot is always a
306   // null pointer.) This vector is only valid during initialization.
307   std::vector<SectionChunk *> sparseChunks;
308 
309   DWARFCache *dwarf = nullptr;
310 };
311 
312 // This is a PDB type server dependency, that is not a input file per se, but
313 // needs to be treated like one. Such files are discovered from the debug type
314 // stream.
315 class PDBInputFile : public InputFile {
316 public:
317   explicit PDBInputFile(MemoryBufferRef m);
318   ~PDBInputFile();
classof(const InputFile * f)319   static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
320   void parse() override;
321 
322   static void enqueue(StringRef path, ObjFile *fromFile);
323 
324   static PDBInputFile *findFromRecordPath(StringRef path, ObjFile *fromFile);
325 
326   static std::map<std::string, PDBInputFile *> instances;
327 
328   // Record possible errors while opening the PDB file
329   llvm::Optional<Error> loadErr;
330 
331   // This is the actual interface to the PDB (if it was opened successfully)
332   std::unique_ptr<llvm::pdb::NativeSession> session;
333 
334   // If the PDB has a .debug$T stream, this tells how it will be handled.
335   TpiSource *debugTypesObj = nullptr;
336 };
337 
338 // This type represents import library members that contain DLL names
339 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
340 // for details about the format.
341 class ImportFile : public InputFile {
342 public:
ImportFile(MemoryBufferRef m)343   explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {}
344 
classof(const InputFile * f)345   static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
346 
347   static std::vector<ImportFile *> instances;
348 
349   Symbol *impSym = nullptr;
350   Symbol *thunkSym = nullptr;
351   std::string dllName;
352 
353 private:
354   void parse() override;
355 
356 public:
357   StringRef externalName;
358   const coff_import_header *hdr;
359   Chunk *location = nullptr;
360 
361   // We want to eliminate dllimported symbols if no one actually refers to them.
362   // These "Live" bits are used to keep track of which import library members
363   // are actually in use.
364   //
365   // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
366   // symbols provided by this import library member. We also track whether the
367   // imported symbol is used separately from whether the thunk is used in order
368   // to avoid creating unnecessary thunks.
369   bool live = !config->doGC;
370   bool thunkLive = !config->doGC;
371 };
372 
373 // Used for LTO.
374 class BitcodeFile : public InputFile {
375 public:
376   BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
377               uint64_t offsetInArchive);
378   explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName,
379                        uint64_t offsetInArchive,
380                        std::vector<Symbol *> &&symbols);
381   ~BitcodeFile();
classof(const InputFile * f)382   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
getSymbols()383   ArrayRef<Symbol *> getSymbols() { return symbols; }
384   MachineTypes getMachineType() override;
385   static std::vector<BitcodeFile *> instances;
386   std::unique_ptr<llvm::lto::InputFile> obj;
387 
388 private:
389   void parse() override;
390 
391   std::vector<Symbol *> symbols;
392 };
393 
isBitcode(MemoryBufferRef mb)394 inline bool isBitcode(MemoryBufferRef mb) {
395   return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
396 }
397 
398 std::string replaceThinLTOSuffix(StringRef path);
399 } // namespace coff
400 
401 std::string toString(const coff::InputFile *file);
402 } // namespace lld
403 
404 #endif
405