1 //===- Core/DefinedAtom.h - An Atom with content --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_CORE_DEFINED_ATOM_H
10 #define LLD_CORE_DEFINED_ATOM_H
11 
12 #include "lld/Common/LLVM.h"
13 #include "lld/Core/Atom.h"
14 #include "lld/Core/Reference.h"
15 #include "llvm/Support/ErrorHandling.h"
16 
17 namespace lld {
18 class File;
19 
20 /// The fundamental unit of linking.
21 ///
22 /// A C function or global variable is an atom.  An atom has content and
23 /// attributes. The content of a function atom is the instructions that
24 /// implement the function.  The content of a global variable atom is its
25 /// initial bytes.
26 ///
27 /// Here are some example attribute sets for common atoms. If a particular
28 /// attribute is not listed, the default values are:  definition=regular,
29 /// sectionChoice=basedOnContent, scope=translationUnit, merge=no,
30 /// deadStrip=normal, interposable=no
31 ///
32 ///  C function:  void foo() {} <br>
33 ///    name=foo, type=code, perm=r_x, scope=global
34 ///
35 ///  C static function:  staic void func() {} <br>
36 ///    name=func, type=code, perm=r_x
37 ///
38 ///  C global variable:  int count = 1; <br>
39 ///    name=count, type=data, perm=rw_, scope=global
40 ///
41 ///  C tentative definition:  int bar; <br>
42 ///    name=bar, type=zerofill, perm=rw_, scope=global,
43 ///    merge=asTentative, interposable=yesAndRuntimeWeak
44 ///
45 ///  Uninitialized C static variable:  static int stuff; <br>
46 ///    name=stuff, type=zerofill, perm=rw_
47 ///
48 ///  Weak C function:  __attribute__((weak)) void foo() {} <br>
49 ///    name=foo, type=code, perm=r_x, scope=global, merge=asWeak
50 ///
51 ///  Hidden C function:  __attribute__((visibility("hidden"))) void foo() {}<br>
52 ///    name=foo, type=code, perm=r_x, scope=linkageUnit
53 ///
54 ///  No-dead-strip function:  __attribute__((used)) void foo() {} <br>
55 ///    name=foo, type=code, perm=r_x, scope=global, deadStrip=never
56 ///
57 ///  Non-inlined C++ inline method:  inline void Foo::doit() {} <br>
58 ///    name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global,
59 ///    mergeDupes=asWeak
60 ///
61 ///  Non-inlined C++ inline method whose address is taken:
62 ///     inline void Foo::doit() {} <br>
63 ///    name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global,
64 ///    mergeDupes=asAddressedWeak
65 ///
66 ///  literal c-string:  "hello" <br>
67 ///    name="" type=cstring, perm=r__, scope=linkageUnit
68 ///
69 ///  literal double:  1.234 <br>
70 ///    name="" type=literal8, perm=r__, scope=linkageUnit
71 ///
72 ///  constant:  { 1,2,3 } <br>
73 ///    name="" type=constant, perm=r__, scope=linkageUnit
74 ///
75 ///  Pointer to initializer function:  <br>
76 ///    name="" type=initializer, perm=rw_l,
77 ///    sectionChoice=customRequired
78 ///
79 ///  C function place in custom section:  __attribute__((section("__foo")))
80 ///                                       void foo() {} <br>
81 ///    name=foo, type=code, perm=r_x, scope=global,
82 ///    sectionChoice=customRequired, customSectionName=__foo
83 ///
84 class DefinedAtom : public Atom {
85 public:
86   enum Interposable {
87     interposeNo,            // linker can directly bind uses of this atom
88     interposeYes,           // linker must indirect (through GOT) uses
89     interposeYesAndRuntimeWeak // must indirect and mark symbol weak in final
90                                // linked image
91   };
92 
93   enum Merge {
94     mergeNo,                // Another atom with same name is error
95     mergeAsTentative,       // Is ANSI C tentative definition, can be coalesced
96     mergeAsWeak,            // Is C++ inline definition that was not inlined,
97                             // but address was not taken, so atom can be hidden
98                             // by linker
99     mergeAsWeakAndAddressUsed, // Is C++ definition inline definition whose
100                                // address was taken.
101     mergeSameNameAndSize,   // Another atom with different size is error
102     mergeByLargestSection,  // Choose an atom whose section is the largest.
103     mergeByContent,         // Merge with other constants with same content.
104   };
105 
106   enum ContentType {
107     typeUnknown,            // for use with definitionUndefined
108     typeMachHeader,         // atom representing mach_header [Darwin]
109     typeCode,               // executable code
110     typeResolver,           // function which returns address of target
111     typeBranchIsland,       // linker created for large binaries
112     typeBranchShim,         // linker created to switch thumb mode
113     typeStub,               // linker created for calling external function
114     typeStubHelper,         // linker created for initial stub binding
115     typeConstant,           // a read-only constant
116     typeCString,            // a zero terminated UTF8 C string
117     typeUTF16String,        // a zero terminated UTF16 string
118     typeCFI,                // a FDE or CIE from dwarf unwind info
119     typeLSDA,               // extra unwinding info
120     typeLiteral4,           // a four-btye read-only constant
121     typeLiteral8,           // an eight-btye read-only constant
122     typeLiteral16,          // a sixteen-btye read-only constant
123     typeData,               // read-write data
124     typeDataFast,           // allow data to be quickly accessed
125     typeZeroFill,           // zero-fill data
126     typeZeroFillFast,       // allow zero-fill data to be quicky accessed
127     typeConstData,          // read-only data after dynamic linker is done
128     typeObjC1Class,         // ObjC1 class [Darwin]
129     typeLazyPointer,        // pointer through which a stub jumps
130     typeLazyDylibPointer,   // pointer through which a stub jumps [Darwin]
131     typeNonLazyPointer,     // pointer to external symbol
132     typeCFString,           // NS/CFString object [Darwin]
133     typeGOT,                // pointer to external symbol
134     typeInitializerPtr,     // pointer to initializer function
135     typeTerminatorPtr,      // pointer to terminator function
136     typeCStringPtr,         // pointer to UTF8 C string [Darwin]
137     typeObjCClassPtr,       // pointer to ObjC class [Darwin]
138     typeObjC2CategoryList,  // pointers to ObjC category [Darwin]
139     typeObjCImageInfo,      // pointer to ObjC class [Darwin]
140     typeObjCMethodList,     // pointer to ObjC method list [Darwin]
141     typeDTraceDOF,          // runtime data for Dtrace [Darwin]
142     typeInterposingTuples,  // tuples of interposing info for dyld [Darwin]
143     typeTempLTO,            // temporary atom for bitcode reader
144     typeCompactUnwindInfo,  // runtime data for unwinder [Darwin]
145     typeProcessedUnwindInfo,// compressed compact unwind info [Darwin]
146     typeThunkTLV,           // thunk used to access a TLV [Darwin]
147     typeTLVInitialData,     // initial data for a TLV [Darwin]
148     typeTLVInitialZeroFill, // TLV initial zero fill data [Darwin]
149     typeTLVInitializerPtr,  // pointer to thread local initializer [Darwin]
150     typeDSOHandle,          // atom representing DSO handle [Darwin]
151     typeSectCreate,         // Created via the -sectcreate option [Darwin]
152   };
153 
154   // Permission bits for atoms and segments. The order of these values are
155   // important, because the layout pass may sort atoms by permission if other
156   // attributes are the same.
157   enum ContentPermissions {
158     perm___  = 0,           // mapped as unaccessible
159     permR__  = 8,           // mapped read-only
160     permRW_  = 8 + 2,       // mapped readable and writable
161     permRW_L = 8 + 2 + 1,   // initially mapped r/w, then made read-only
162                             // loader writable
163     permR_X  = 8 + 4,       // mapped readable and executable
164     permRWX  = 8 + 2 + 4,   // mapped readable and writable and executable
165     permUnknown = 16        // unknown or invalid permissions
166   };
167 
168   enum SectionChoice {
169     sectionBasedOnContent,  // linker infers final section based on content
170     sectionCustomPreferred, // linker may place in specific section
171     sectionCustomRequired   // linker must place in specific section
172   };
173 
174   enum DeadStripKind {
175     deadStripNormal,        // linker may dead strip this atom
176     deadStripNever,         // linker must never dead strip this atom
177     deadStripAlways         // linker must remove this atom if unused
178   };
179 
180   enum DynamicExport {
181     /// The linker may or may not export this atom dynamically depending
182     ///   on the output type and other context of the link.
183     dynamicExportNormal,
184     /// The linker will always export this atom dynamically.
185     dynamicExportAlways,
186   };
187 
188   // Attributes describe a code model used by the atom.
189   enum CodeModel {
190     codeNA,           // no specific code model
191     // MIPS code models
192     codeMipsPIC,      // PIC function in a PIC / non-PIC mixed file
193     codeMipsMicro,    // microMIPS instruction encoding
194     codeMipsMicroPIC, // microMIPS instruction encoding + PIC
195     codeMips16,       // MIPS-16 instruction encoding
196     // ARM code models
197     codeARMThumb,     // ARM Thumb instruction set
198     codeARM_a,        // $a-like mapping symbol (for ARM code)
199     codeARM_d,        // $d-like mapping symbol (for data)
200     codeARM_t,        // $t-like mapping symbol (for Thumb code)
201   };
202 
203   struct Alignment {
valueAlignment204     Alignment(int v, int m = 0) : value(v), modulus(m) {}
205 
206     uint16_t value;
207     uint16_t modulus;
208 
209     bool operator==(const Alignment &rhs) const {
210       return (value == rhs.value) && (modulus == rhs.modulus);
211     }
212   };
213 
214   /// returns a value for the order of this Atom within its file.
215   ///
216   /// This is used by the linker to order the layout of Atoms so that the
217   /// resulting image is stable and reproducible.
218   virtual uint64_t ordinal() const = 0;
219 
220   /// the number of bytes of space this atom's content will occupy in the
221   /// final linked image.
222   ///
223   /// For a function atom, it is the number of bytes of code in the function.
224   virtual uint64_t size() const = 0;
225 
226   /// The size of the section from which the atom is instantiated.
227   ///
228   /// Merge::mergeByLargestSection is defined in terms of section size
229   /// and not in terms of atom size, so we need this function separate
230   /// from size().
sectionSize()231   virtual uint64_t sectionSize() const { return 0; }
232 
233   /// The visibility of this atom to other atoms.
234   ///
235   /// C static functions have scope scopeTranslationUnit.  Regular C functions
236   /// have scope scopeGlobal.  Functions compiled with visibility=hidden have
237   /// scope scopeLinkageUnit so they can be see by other atoms being linked but
238   /// not by the OS loader.
239   virtual Scope scope() const = 0;
240 
241   /// Whether the linker should use direct or indirect access to this
242   /// atom.
243   virtual Interposable interposable() const = 0;
244 
245   /// how the linker should handle if multiple atoms have the same name.
246   virtual Merge merge() const = 0;
247 
248   /// The type of this atom, such as code or data.
249   virtual ContentType contentType() const = 0;
250 
251   /// The alignment constraints on how this atom must be laid out in the
252   /// final linked image (e.g. 16-byte aligned).
253   virtual Alignment alignment() const = 0;
254 
255   /// Whether this atom must be in a specially named section in the final
256   /// linked image, or if the linker can infer the section based on the
257   /// contentType().
258   virtual SectionChoice sectionChoice() const = 0;
259 
260   /// If sectionChoice() != sectionBasedOnContent, then this return the
261   /// name of the section the atom should be placed into.
262   virtual StringRef customSectionName() const = 0;
263 
264   /// constraints on whether the linker may dead strip away this atom.
265   virtual DeadStripKind deadStrip() const = 0;
266 
267   /// Under which conditions should this atom be dynamically exported.
dynamicExport()268   virtual DynamicExport dynamicExport() const {
269     return dynamicExportNormal;
270   }
271 
272   /// Code model used by the atom.
codeModel()273   virtual CodeModel codeModel() const { return codeNA; }
274 
275   /// Returns the OS memory protections required for this atom's content
276   /// at runtime.
277   ///
278   /// A function atom is R_X, a global variable is RW_, and a read-only constant
279   /// is R__.
280   virtual ContentPermissions permissions() const;
281 
282   /// returns a reference to the raw (unrelocated) bytes of this Atom's
283   /// content.
284   virtual ArrayRef<uint8_t> rawContent() const = 0;
285 
286   /// This class abstracts iterating over the sequence of References
287   /// in an Atom.  Concrete instances of DefinedAtom must implement
288   /// the derefIterator() and incrementIterator() methods.
289   class reference_iterator {
290   public:
reference_iterator(const DefinedAtom & a,const void * it)291     reference_iterator(const DefinedAtom &a, const void *it)
292       : _atom(a), _it(it) { }
293 
294     const Reference *operator*() const {
295       return _atom.derefIterator(_it);
296     }
297 
298     const Reference *operator->() const {
299       return _atom.derefIterator(_it);
300     }
301 
302     bool operator==(const reference_iterator &other) const {
303       return _it == other._it;
304     }
305 
306     bool operator!=(const reference_iterator &other) const {
307       return !(*this == other);
308     }
309 
310     reference_iterator &operator++() {
311       _atom.incrementIterator(_it);
312       return *this;
313     }
314   private:
315     const DefinedAtom &_atom;
316     const void *_it;
317   };
318 
319   /// Returns an iterator to the beginning of this Atom's References.
320   virtual reference_iterator begin() const = 0;
321 
322   /// Returns an iterator to the end of this Atom's References.
323   virtual reference_iterator end() const = 0;
324 
325   /// Adds a reference to this atom.
addReference(Reference::KindNamespace ns,Reference::KindArch arch,Reference::KindValue kindValue,uint64_t off,const Atom * target,Reference::Addend a)326   virtual void addReference(Reference::KindNamespace ns,
327                             Reference::KindArch arch,
328                             Reference::KindValue kindValue, uint64_t off,
329                             const Atom *target, Reference::Addend a) {
330     llvm_unreachable("Subclass does not permit adding references");
331   }
332 
classof(const Atom * a)333   static bool classof(const Atom *a) {
334     return a->definition() == definitionRegular;
335   }
336 
337   /// Utility for deriving permissions from content type
338   static ContentPermissions permissions(ContentType type);
339 
340   /// Utility function to check if the atom occupies file space
occupiesDiskSpace()341   bool occupiesDiskSpace() const {
342     ContentType atomContentType = contentType();
343     return !(atomContentType == DefinedAtom::typeZeroFill ||
344              atomContentType == DefinedAtom::typeZeroFillFast ||
345              atomContentType == DefinedAtom::typeTLVInitialZeroFill);
346   }
347 
348   /// Utility function to check if relocations in this atom to other defined
349   /// atoms can be implicitly generated, and so we don't need to explicitly
350   /// emit those relocations.
relocsToDefinedCanBeImplicit()351   bool relocsToDefinedCanBeImplicit() const {
352     ContentType atomContentType = contentType();
353     return atomContentType == typeCFI;
354   }
355 
356 protected:
357   // DefinedAtom is an abstract base class. Only subclasses can access
358   // constructor.
DefinedAtom()359   DefinedAtom() : Atom(definitionRegular) { }
360 
361   ~DefinedAtom() override = default;
362 
363   /// Returns a pointer to the Reference object that the abstract
364   /// iterator "points" to.
365   virtual const Reference *derefIterator(const void *iter) const = 0;
366 
367   /// Adjusts the abstract iterator to "point" to the next Reference
368   /// object for this Atom.
369   virtual void incrementIterator(const void *&iter) const = 0;
370 };
371 } // end namespace lld
372 
373 #endif
374