1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_WASM_SYMBOLS_H
10 #define LLD_WASM_SYMBOLS_H
11 
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/Object/Archive.h"
16 #include "llvm/Object/Wasm.h"
17 
18 namespace lld {
19 namespace wasm {
20 
21 // Shared string constants
22 
23 // The default module name to use for symbol imports.
24 extern const char *defaultModule;
25 
26 // The name under which to import or export the wasm table.
27 extern const char *functionTableName;
28 
29 using llvm::wasm::WasmSymbolType;
30 
31 class InputFile;
32 class InputChunk;
33 class InputSegment;
34 class InputFunction;
35 class InputGlobal;
36 class InputEvent;
37 class InputSection;
38 class OutputSection;
39 
40 #define INVALID_INDEX UINT32_MAX
41 
42 // The base class for real symbol classes.
43 class Symbol {
44 public:
45   enum Kind : uint8_t {
46     DefinedFunctionKind,
47     DefinedDataKind,
48     DefinedGlobalKind,
49     DefinedEventKind,
50     SectionKind,
51     OutputSectionKind,
52     UndefinedFunctionKind,
53     UndefinedDataKind,
54     UndefinedGlobalKind,
55     LazyKind,
56   };
57 
kind()58   Kind kind() const { return symbolKind; }
59 
isDefined()60   bool isDefined() const { return !isLazy() && !isUndefined(); }
61 
isUndefined()62   bool isUndefined() const {
63     return symbolKind == UndefinedFunctionKind ||
64            symbolKind == UndefinedDataKind || symbolKind == UndefinedGlobalKind;
65   }
66 
isLazy()67   bool isLazy() const { return symbolKind == LazyKind; }
68 
69   bool isLocal() const;
70   bool isWeak() const;
71   bool isHidden() const;
72 
73   // Returns true if this symbol exists in a discarded (due to COMDAT) section
74   bool isDiscarded() const;
75 
76   // True if this is an undefined weak symbol. This only works once
77   // all input files have been added.
isUndefWeak()78   bool isUndefWeak() const {
79     // See comment on lazy symbols for details.
80     return isWeak() && (isUndefined() || isLazy());
81   }
82 
83   // Returns the symbol name.
getName()84   StringRef getName() const { return name; }
85 
86   // Returns the file from which this symbol was created.
getFile()87   InputFile *getFile() const { return file; }
88 
89   InputChunk *getChunk() const;
90 
91   // Indicates that the section or import for this symbol will be included in
92   // the final image.
93   bool isLive() const;
94 
95   // Marks the symbol's InputChunk as Live, so that it will be included in the
96   // final image.
97   void markLive();
98 
99   void setHidden(bool isHidden);
100 
101   // Get/set the index in the output symbol table.  This is only used for
102   // relocatable output.
103   uint32_t getOutputSymbolIndex() const;
104   void setOutputSymbolIndex(uint32_t index);
105 
106   WasmSymbolType getWasmType() const;
107   bool isExported() const;
108 
109   // Indicates that the symbol is used in an __attribute__((used)) directive
110   // or similar.
111   bool isNoStrip() const;
112 
113   const WasmSignature* getSignature() const;
114 
getGOTIndex()115   uint32_t getGOTIndex() const {
116     assert(gotIndex != INVALID_INDEX);
117     return gotIndex;
118   }
119 
120   void setGOTIndex(uint32_t index);
hasGOTIndex()121   bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; }
122 
123 protected:
Symbol(StringRef name,Kind k,uint32_t flags,InputFile * f)124   Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
125       : name(name), file(f), symbolKind(k), referenced(!config->gcSections),
126         requiresGOT(false), isUsedInRegularObj(false), forceExport(false),
127         canInline(false), traced(false), isStub(false), flags(flags) {}
128 
129   StringRef name;
130   InputFile *file;
131   uint32_t outputSymbolIndex = INVALID_INDEX;
132   uint32_t gotIndex = INVALID_INDEX;
133   Kind symbolKind;
134 
135 public:
136   bool referenced : 1;
137 
138   // True for data symbols that needs a dummy GOT entry.  Used for static
139   // linking of GOT accesses.
140   bool requiresGOT : 1;
141 
142   // True if the symbol was used for linking and thus need to be added to the
143   // output file's symbol table. This is true for all symbols except for
144   // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
145   // are unreferenced except by other bitcode objects.
146   bool isUsedInRegularObj : 1;
147 
148   // True if ths symbol is explicitly marked for export (i.e. via the
149   // -e/--export command line flag)
150   bool forceExport : 1;
151 
152   // False if LTO shouldn't inline whatever this symbol points to. If a symbol
153   // is overwritten after LTO, LTO shouldn't inline the symbol because it
154   // doesn't know the final contents of the symbol.
155   bool canInline : 1;
156 
157   // True if this symbol is specified by --trace-symbol option.
158   bool traced : 1;
159 
160   // True if this symbol is a linker-synthesized stub function (traps when
161   // called) and should otherwise be treated as missing/undefined.  See
162   // SymbolTable::replaceWithUndefined.
163   // These stubs never appear in the table and any table index relocations
164   // against them will produce address 0 (The table index representing
165   // the null function pointer).
166   bool isStub : 1;
167 
168   uint32_t flags;
169 };
170 
171 class FunctionSymbol : public Symbol {
172 public:
classof(const Symbol * s)173   static bool classof(const Symbol *s) {
174     return s->kind() == DefinedFunctionKind ||
175            s->kind() == UndefinedFunctionKind;
176   }
177 
178   // Get/set the table index
179   void setTableIndex(uint32_t index);
180   uint32_t getTableIndex() const;
181   bool hasTableIndex() const;
182 
183   // Get/set the function index
184   uint32_t getFunctionIndex() const;
185   void setFunctionIndex(uint32_t index);
186   bool hasFunctionIndex() const;
187 
188   const WasmSignature *signature;
189 
190 protected:
FunctionSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmSignature * sig)191   FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
192                  const WasmSignature *sig)
193       : Symbol(name, k, flags, f), signature(sig) {}
194 
195   uint32_t tableIndex = INVALID_INDEX;
196   uint32_t functionIndex = INVALID_INDEX;
197 };
198 
199 class DefinedFunction : public FunctionSymbol {
200 public:
201   DefinedFunction(StringRef name, uint32_t flags, InputFile *f,
202                   InputFunction *function);
203 
classof(const Symbol * s)204   static bool classof(const Symbol *s) {
205     return s->kind() == DefinedFunctionKind;
206   }
207 
208   InputFunction *function;
209 };
210 
211 class UndefinedFunction : public FunctionSymbol {
212 public:
213   UndefinedFunction(StringRef name, llvm::Optional<StringRef> importName,
214                     llvm::Optional<StringRef> importModule, uint32_t flags,
215                     InputFile *file = nullptr,
216                     const WasmSignature *type = nullptr,
217                     bool isCalledDirectly = true)
FunctionSymbol(name,UndefinedFunctionKind,flags,file,type)218       : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type),
219         importName(importName), importModule(importModule),
220         isCalledDirectly(isCalledDirectly) {}
221 
classof(const Symbol * s)222   static bool classof(const Symbol *s) {
223     return s->kind() == UndefinedFunctionKind;
224   }
225 
226   llvm::Optional<StringRef> importName;
227   llvm::Optional<StringRef> importModule;
228   DefinedFunction *stubFunction = nullptr;
229   bool isCalledDirectly;
230 };
231 
232 // Section symbols for output sections are different from those for input
233 // section.  These are generated by the linker and point the OutputSection
234 // rather than an InputSection.
235 class OutputSectionSymbol : public Symbol {
236 public:
OutputSectionSymbol(const OutputSection * s)237   OutputSectionSymbol(const OutputSection *s)
238       : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL,
239                nullptr),
240         section(s) {}
241 
classof(const Symbol * s)242   static bool classof(const Symbol *s) {
243     return s->kind() == OutputSectionKind;
244   }
245 
246   const OutputSection *section;
247 };
248 
249 class SectionSymbol : public Symbol {
250 public:
251   SectionSymbol(uint32_t flags, const InputSection *s, InputFile *f = nullptr)
252       : Symbol("", SectionKind, flags, f), section(s) {}
253 
classof(const Symbol * s)254   static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
255 
256   const OutputSectionSymbol *getOutputSectionSymbol() const;
257 
258   const InputSection *section;
259 };
260 
261 class DataSymbol : public Symbol {
262 public:
classof(const Symbol * s)263   static bool classof(const Symbol *s) {
264     return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind;
265   }
266 
267 protected:
DataSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f)268   DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
269       : Symbol(name, k, flags, f) {}
270 };
271 
272 class DefinedData : public DataSymbol {
273 public:
274   // Constructor for regular data symbols originating from input files.
DefinedData(StringRef name,uint32_t flags,InputFile * f,InputSegment * segment,uint64_t offset,uint64_t size)275   DefinedData(StringRef name, uint32_t flags, InputFile *f,
276               InputSegment *segment, uint64_t offset, uint64_t size)
277       : DataSymbol(name, DefinedDataKind, flags, f), segment(segment),
278         offset(offset), size(size) {}
279 
280   // Constructor for linker synthetic data symbols.
DefinedData(StringRef name,uint32_t flags)281   DefinedData(StringRef name, uint32_t flags)
282       : DataSymbol(name, DefinedDataKind, flags, nullptr) {}
283 
classof(const Symbol * s)284   static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; }
285 
286   // Returns the output virtual address of a defined data symbol.
287   uint64_t getVirtualAddress() const;
288   void setVirtualAddress(uint64_t va);
289 
290   // Returns the offset of a defined data symbol within its OutputSegment.
291   uint64_t getOutputSegmentOffset() const;
292   uint64_t getOutputSegmentIndex() const;
getSize()293   uint64_t getSize() const { return size; }
294 
295   InputSegment *segment = nullptr;
296   uint32_t offset = 0;
297 
298 protected:
299   uint64_t size = 0;
300 };
301 
302 class UndefinedData : public DataSymbol {
303 public:
304   UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr)
DataSymbol(name,UndefinedDataKind,flags,file)305       : DataSymbol(name, UndefinedDataKind, flags, file) {}
classof(const Symbol * s)306   static bool classof(const Symbol *s) {
307     return s->kind() == UndefinedDataKind;
308   }
309 };
310 
311 class GlobalSymbol : public Symbol {
312 public:
classof(const Symbol * s)313   static bool classof(const Symbol *s) {
314     return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind;
315   }
316 
getGlobalType()317   const WasmGlobalType *getGlobalType() const { return globalType; }
318 
319   // Get/set the global index
320   uint32_t getGlobalIndex() const;
321   void setGlobalIndex(uint32_t index);
322   bool hasGlobalIndex() const;
323 
324 protected:
GlobalSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmGlobalType * globalType)325   GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
326                const WasmGlobalType *globalType)
327       : Symbol(name, k, flags, f), globalType(globalType) {}
328 
329   const WasmGlobalType *globalType;
330   uint32_t globalIndex = INVALID_INDEX;
331 };
332 
333 class DefinedGlobal : public GlobalSymbol {
334 public:
335   DefinedGlobal(StringRef name, uint32_t flags, InputFile *file,
336                 InputGlobal *global);
337 
classof(const Symbol * s)338   static bool classof(const Symbol *s) {
339     return s->kind() == DefinedGlobalKind;
340   }
341 
342   InputGlobal *global;
343 };
344 
345 class UndefinedGlobal : public GlobalSymbol {
346 public:
347   UndefinedGlobal(StringRef name, llvm::Optional<StringRef> importName,
348                   llvm::Optional<StringRef> importModule, uint32_t flags,
349                   InputFile *file = nullptr,
350                   const WasmGlobalType *type = nullptr)
GlobalSymbol(name,UndefinedGlobalKind,flags,file,type)351       : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type),
352         importName(importName), importModule(importModule) {}
353 
classof(const Symbol * s)354   static bool classof(const Symbol *s) {
355     return s->kind() == UndefinedGlobalKind;
356   }
357 
358   llvm::Optional<StringRef> importName;
359   llvm::Optional<StringRef> importModule;
360 };
361 
362 // Wasm events are features that suspend the current execution and transfer the
363 // control flow to a corresponding handler. Currently the only supported event
364 // kind is exceptions.
365 //
366 // Event tags are values to distinguish different events. For exceptions, they
367 // can be used to distinguish different language's exceptions, i.e., all C++
368 // exceptions have the same tag. Wasm can generate code capable of doing
369 // different handling actions based on the tag of caught exceptions.
370 //
371 // A single EventSymbol object represents a single tag. C++ exception event
372 // symbol is a weak symbol generated in every object file in which exceptions
373 // are used, and has name '__cpp_exception' for linking.
374 class EventSymbol : public Symbol {
375 public:
classof(const Symbol * s)376   static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; }
377 
getEventType()378   const WasmEventType *getEventType() const { return eventType; }
379 
380   // Get/set the event index
381   uint32_t getEventIndex() const;
382   void setEventIndex(uint32_t index);
383   bool hasEventIndex() const;
384 
385   const WasmSignature *signature;
386 
387 protected:
EventSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmEventType * eventType,const WasmSignature * sig)388   EventSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
389               const WasmEventType *eventType, const WasmSignature *sig)
390       : Symbol(name, k, flags, f), signature(sig), eventType(eventType) {}
391 
392   const WasmEventType *eventType;
393   uint32_t eventIndex = INVALID_INDEX;
394 };
395 
396 class DefinedEvent : public EventSymbol {
397 public:
398   DefinedEvent(StringRef name, uint32_t flags, InputFile *file,
399                InputEvent *event);
400 
classof(const Symbol * s)401   static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; }
402 
403   InputEvent *event;
404 };
405 
406 // LazySymbol represents a symbol that is not yet in the link, but we know where
407 // to find it if needed. If the resolver finds both Undefined and Lazy for the
408 // same name, it will ask the Lazy to load a file.
409 //
410 // A special complication is the handling of weak undefined symbols. They should
411 // not load a file, but we have to remember we have seen both the weak undefined
412 // and the lazy. We represent that with a lazy symbol with a weak binding. This
413 // means that code looking for undefined symbols normally also has to take lazy
414 // symbols into consideration.
415 class LazySymbol : public Symbol {
416 public:
LazySymbol(StringRef name,uint32_t flags,InputFile * file,const llvm::object::Archive::Symbol & sym)417   LazySymbol(StringRef name, uint32_t flags, InputFile *file,
418              const llvm::object::Archive::Symbol &sym)
419       : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {}
420 
classof(const Symbol * s)421   static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
422   void fetch();
423   void setWeak();
424   MemoryBufferRef getMemberBuffer();
425 
426   // Lazy symbols can have a signature because they can replace an
427   // UndefinedFunction which which case we need to be able to preserve the
428   // signature.
429   // TODO(sbc): This repetition of the signature field is inelegant.  Revisit
430   // the use of class hierarchy to represent symbol taxonomy.
431   const WasmSignature *signature = nullptr;
432 
433 private:
434   llvm::object::Archive::Symbol archiveSymbol;
435 };
436 
437 // linker-generated symbols
438 struct WasmSym {
439   // __global_base
440   // Symbol marking the start of the global section.
441   static DefinedData *globalBase;
442 
443   // __stack_pointer
444   // Global that holds the address of the top of the explicit value stack in
445   // linear memory.
446   static GlobalSymbol *stackPointer;
447 
448   // __tls_base
449   // Global that holds the address of the base of the current thread's
450   // TLS block.
451   static GlobalSymbol *tlsBase;
452 
453   // __tls_size
454   // Symbol whose value is the size of the TLS block.
455   static GlobalSymbol *tlsSize;
456 
457   // __tls_size
458   // Symbol whose value is the alignment of the TLS block.
459   static GlobalSymbol *tlsAlign;
460 
461   // __data_end
462   // Symbol marking the end of the data and bss.
463   static DefinedData *dataEnd;
464 
465   // __heap_base
466   // Symbol marking the end of the data, bss and explicit stack.  Any linear
467   // memory following this address is not used by the linked code and can
468   // therefore be used as a backing store for brk()/malloc() implementations.
469   static DefinedData *heapBase;
470 
471   // __wasm_init_memory_flag
472   // Symbol whose contents are nonzero iff memory has already been initialized.
473   static DefinedData *initMemoryFlag;
474 
475   // __wasm_init_memory
476   // Function that initializes passive data segments during instantiation.
477   static DefinedFunction *initMemory;
478 
479   // __wasm_call_ctors
480   // Function that directly calls all ctors in priority order.
481   static DefinedFunction *callCtors;
482 
483   // __wasm_call_dtors
484   // Function that calls the libc/etc. cleanup function.
485   static DefinedFunction *callDtors;
486 
487   // __wasm_apply_relocs
488   // Function that applies relocations to data segment post-instantiation.
489   static DefinedFunction *applyRelocs;
490 
491   // __wasm_init_tls
492   // Function that allocates thread-local storage and initializes it.
493   static DefinedFunction *initTLS;
494 
495   // __dso_handle
496   // Symbol used in calls to __cxa_atexit to determine current DLL
497   static DefinedData *dsoHandle;
498 
499   // __table_base
500   // Used in PIC code for offset of indirect function table
501   static UndefinedGlobal *tableBase;
502   static DefinedData *definedTableBase;
503 
504   // __memory_base
505   // Used in PIC code for offset of global data
506   static UndefinedGlobal *memoryBase;
507   static DefinedData *definedMemoryBase;
508 };
509 
510 // A buffer class that is large enough to hold any Symbol-derived
511 // object. We allocate memory using this class and instantiate a symbol
512 // using the placement new.
513 union SymbolUnion {
514   alignas(DefinedFunction) char a[sizeof(DefinedFunction)];
515   alignas(DefinedData) char b[sizeof(DefinedData)];
516   alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)];
517   alignas(DefinedEvent) char d[sizeof(DefinedEvent)];
518   alignas(LazySymbol) char e[sizeof(LazySymbol)];
519   alignas(UndefinedFunction) char f[sizeof(UndefinedFunction)];
520   alignas(UndefinedData) char g[sizeof(UndefinedData)];
521   alignas(UndefinedGlobal) char h[sizeof(UndefinedGlobal)];
522   alignas(SectionSymbol) char i[sizeof(SectionSymbol)];
523 };
524 
525 // It is important to keep the size of SymbolUnion small for performance and
526 // memory usage reasons. 96 bytes is a soft limit based on the size of
527 // UndefinedFunction on a 64-bit system.
528 static_assert(sizeof(SymbolUnion) <= 120, "SymbolUnion too large");
529 
530 void printTraceSymbol(Symbol *sym);
531 void printTraceSymbolUndefined(StringRef name, const InputFile* file);
532 
533 template <typename T, typename... ArgT>
replaceSymbol(Symbol * s,ArgT &&...arg)534 T *replaceSymbol(Symbol *s, ArgT &&... arg) {
535   static_assert(std::is_trivially_destructible<T>(),
536                 "Symbol types must be trivially destructible");
537   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
538   static_assert(alignof(T) <= alignof(SymbolUnion),
539                 "SymbolUnion not aligned enough");
540   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
541          "Not a Symbol");
542 
543   Symbol symCopy = *s;
544 
545   T *s2 = new (s) T(std::forward<ArgT>(arg)...);
546   s2->isUsedInRegularObj = symCopy.isUsedInRegularObj;
547   s2->forceExport = symCopy.forceExport;
548   s2->canInline = symCopy.canInline;
549   s2->traced = symCopy.traced;
550 
551   // Print out a log message if --trace-symbol was specified.
552   // This is for debugging.
553   if (s2->traced)
554     printTraceSymbol(s2);
555 
556   return s2;
557 }
558 
559 } // namespace wasm
560 
561 // Returns a symbol name for an error message.
562 std::string toString(const wasm::Symbol &sym);
563 std::string toString(wasm::Symbol::Kind kind);
564 std::string maybeDemangleSymbol(StringRef name);
565 
566 } // namespace lld
567 
568 #endif
569