1 //===- MCAssembler.h - Object File Generation -------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCASSEMBLER_H
11 #define LLVM_MC_MCASSEMBLER_H
12 
13 #include "llvm/ADT/ArrayRef.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/iterator.h"
18 #include "llvm/ADT/iterator_range.h"
19 #include "llvm/BinaryFormat/MachO.h"
20 #include "llvm/MC/MCDirectives.h"
21 #include "llvm/MC/MCDwarf.h"
22 #include "llvm/MC/MCFixup.h"
23 #include "llvm/MC/MCFragment.h"
24 #include "llvm/MC/MCLinkerOptimizationHint.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include <cassert>
27 #include <cstddef>
28 #include <cstdint>
29 #include <string>
30 #include <utility>
31 #include <vector>
32 
33 namespace llvm {
34 
35 class MCAsmBackend;
36 class MCAsmLayout;
37 class MCContext;
38 class MCCodeEmitter;
39 class MCFragment;
40 class MCObjectWriter;
41 class MCSection;
42 class MCValue;
43 
44 // FIXME: This really doesn't belong here. See comments below.
45 struct IndirectSymbolData {
46   MCSymbol *Symbol;
47   MCSection *Section;
48 };
49 
50 // FIXME: Ditto this. Purely so the Streamer and the ObjectWriter can talk
51 // to one another.
52 struct DataRegionData {
53   // This enum should be kept in sync w/ the mach-o definition in
54   // llvm/Object/MachOFormat.h.
55   enum KindTy { Data = 1, JumpTable8, JumpTable16, JumpTable32 } Kind;
56   MCSymbol *Start;
57   MCSymbol *End;
58 };
59 
60 class MCAssembler {
61   friend class MCAsmLayout;
62 
63 public:
64   using SectionListType = std::vector<MCSection *>;
65   using SymbolDataListType = std::vector<const MCSymbol *>;
66 
67   using const_iterator = pointee_iterator<SectionListType::const_iterator>;
68   using iterator = pointee_iterator<SectionListType::iterator>;
69 
70   using const_symbol_iterator =
71       pointee_iterator<SymbolDataListType::const_iterator>;
72   using symbol_iterator = pointee_iterator<SymbolDataListType::iterator>;
73 
74   using symbol_range = iterator_range<symbol_iterator>;
75   using const_symbol_range = iterator_range<const_symbol_iterator>;
76 
77   using const_indirect_symbol_iterator =
78       std::vector<IndirectSymbolData>::const_iterator;
79   using indirect_symbol_iterator = std::vector<IndirectSymbolData>::iterator;
80 
81   using const_data_region_iterator =
82       std::vector<DataRegionData>::const_iterator;
83   using data_region_iterator = std::vector<DataRegionData>::iterator;
84 
85   /// MachO specific deployment target version info.
86   // A Major version of 0 indicates that no version information was supplied
87   // and so the corresponding load command should not be emitted.
88   using VersionInfoType = struct {
89     bool EmitBuildVersion;
90     union {
91       MCVersionMinType Type;          ///< Used when EmitBuildVersion==false.
92       MachO::PlatformType Platform;   ///< Used when EmitBuildVersion==true.
93     } TypeOrPlatform;
94     unsigned Major;
95     unsigned Minor;
96     unsigned Update;
97   };
98 
99 private:
100   MCContext &Context;
101 
102   std::unique_ptr<MCAsmBackend> Backend;
103 
104   std::unique_ptr<MCCodeEmitter> Emitter;
105 
106   std::unique_ptr<MCObjectWriter> Writer;
107 
108   SectionListType Sections;
109 
110   SymbolDataListType Symbols;
111 
112   std::vector<IndirectSymbolData> IndirectSymbols;
113 
114   std::vector<DataRegionData> DataRegions;
115 
116   /// The list of linker options to propagate into the object file.
117   std::vector<std::vector<std::string>> LinkerOptions;
118 
119   /// List of declared file names
120   std::vector<std::string> FileNames;
121 
122   MCDwarfLineTableParams LTParams;
123 
124   /// The set of function symbols for which a .thumb_func directive has
125   /// been seen.
126   //
127   // FIXME: We really would like this in target specific code rather than
128   // here. Maybe when the relocation stuff moves to target specific,
129   // this can go with it? The streamer would need some target specific
130   // refactoring too.
131   mutable SmallPtrSet<const MCSymbol *, 32> ThumbFuncs;
132 
133   /// The bundle alignment size currently set in the assembler.
134   ///
135   /// By default it's 0, which means bundling is disabled.
136   unsigned BundleAlignSize;
137 
138   bool RelaxAll : 1;
139   bool SubsectionsViaSymbols : 1;
140   bool IncrementalLinkerCompatible : 1;
141 
142   /// ELF specific e_header flags
143   // It would be good if there were an MCELFAssembler class to hold this.
144   // ELF header flags are used both by the integrated and standalone assemblers.
145   // Access to the flags is necessary in cases where assembler directives affect
146   // which flags to be set.
147   unsigned ELFHeaderEFlags;
148 
149   /// Used to communicate Linker Optimization Hint information between
150   /// the Streamer and the .o writer
151   MCLOHContainer LOHContainer;
152 
153   VersionInfoType VersionInfo;
154 
155   /// Evaluate a fixup to a relocatable expression and the value which should be
156   /// placed into the fixup.
157   ///
158   /// \param Layout The layout to use for evaluation.
159   /// \param Fixup The fixup to evaluate.
160   /// \param DF The fragment the fixup is inside.
161   /// \param Target [out] On return, the relocatable expression the fixup
162   /// evaluates to.
163   /// \param Value [out] On return, the value of the fixup as currently laid
164   /// out.
165   /// \param WasForced [out] On return, the value in the fixup is set to the
166   /// correct value if WasForced is true, even if evaluateFixup returns false.
167   /// \return Whether the fixup value was fully resolved. This is true if the
168   /// \p Value result is fixed, otherwise the value may change due to
169   /// relocation.
170   bool evaluateFixup(const MCAsmLayout &Layout, const MCFixup &Fixup,
171                      const MCFragment *DF, MCValue &Target,
172                      uint64_t &Value, bool &WasForced) const;
173 
174   /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
175   /// (increased in size, in order to hold its value correctly).
176   bool fixupNeedsRelaxation(const MCFixup &Fixup, const MCRelaxableFragment *DF,
177                             const MCAsmLayout &Layout) const;
178 
179   /// Check whether the given fragment needs relaxation.
180   bool fragmentNeedsRelaxation(const MCRelaxableFragment *IF,
181                                const MCAsmLayout &Layout) const;
182 
183   /// Perform one layout iteration and return true if any offsets
184   /// were adjusted.
185   bool layoutOnce(MCAsmLayout &Layout);
186 
187   /// Perform one layout iteration of the given section and return true
188   /// if any offsets were adjusted.
189   bool layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec);
190 
191   bool relaxInstruction(MCAsmLayout &Layout, MCRelaxableFragment &IF);
192 
193   bool relaxPaddingFragment(MCAsmLayout &Layout, MCPaddingFragment &PF);
194 
195   bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF);
196 
197   bool relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF);
198   bool relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
199                                    MCDwarfCallFrameFragment &DF);
200   bool relaxCVInlineLineTable(MCAsmLayout &Layout,
201                               MCCVInlineLineTableFragment &DF);
202   bool relaxCVDefRange(MCAsmLayout &Layout, MCCVDefRangeFragment &DF);
203 
204   /// finishLayout - Finalize a layout, including fragment lowering.
205   void finishLayout(MCAsmLayout &Layout);
206 
207   std::tuple<MCValue, uint64_t, bool>
208   handleFixup(const MCAsmLayout &Layout, MCFragment &F, const MCFixup &Fixup);
209 
210 public:
211   std::vector<std::pair<StringRef, const MCSymbol *>> Symvers;
212 
213   /// Construct a new assembler instance.
214   //
215   // FIXME: How are we going to parameterize this? Two obvious options are stay
216   // concrete and require clients to pass in a target like object. The other
217   // option is to make this abstract, and have targets provide concrete
218   // implementations as we do with AsmParser.
219   MCAssembler(MCContext &Context, std::unique_ptr<MCAsmBackend> Backend,
220               std::unique_ptr<MCCodeEmitter> Emitter,
221               std::unique_ptr<MCObjectWriter> Writer);
222   MCAssembler(const MCAssembler &) = delete;
223   MCAssembler &operator=(const MCAssembler &) = delete;
224   ~MCAssembler();
225 
226   /// Compute the effective fragment size assuming it is laid out at the given
227   /// \p SectionAddress and \p FragmentOffset.
228   uint64_t computeFragmentSize(const MCAsmLayout &Layout,
229                                const MCFragment &F) const;
230 
231   /// Find the symbol which defines the atom containing the given symbol, or
232   /// null if there is no such symbol.
233   const MCSymbol *getAtom(const MCSymbol &S) const;
234 
235   /// Check whether a particular symbol is visible to the linker and is required
236   /// in the symbol table, or whether it can be discarded by the assembler. This
237   /// also effects whether the assembler treats the label as potentially
238   /// defining a separate atom.
239   bool isSymbolLinkerVisible(const MCSymbol &SD) const;
240 
241   /// Emit the section contents to \p OS.
242   void writeSectionData(raw_ostream &OS, const MCSection *Section,
243                         const MCAsmLayout &Layout) const;
244 
245   /// Check whether a given symbol has been flagged with .thumb_func.
246   bool isThumbFunc(const MCSymbol *Func) const;
247 
248   /// Flag a function symbol as the target of a .thumb_func directive.
setIsThumbFunc(const MCSymbol * Func)249   void setIsThumbFunc(const MCSymbol *Func) { ThumbFuncs.insert(Func); }
250 
251   /// ELF e_header flags
getELFHeaderEFlags()252   unsigned getELFHeaderEFlags() const { return ELFHeaderEFlags; }
setELFHeaderEFlags(unsigned Flags)253   void setELFHeaderEFlags(unsigned Flags) { ELFHeaderEFlags = Flags; }
254 
255   /// MachO deployment target version information.
getVersionInfo()256   const VersionInfoType &getVersionInfo() const { return VersionInfo; }
setVersionMin(MCVersionMinType Type,unsigned Major,unsigned Minor,unsigned Update)257   void setVersionMin(MCVersionMinType Type, unsigned Major, unsigned Minor,
258                      unsigned Update) {
259     VersionInfo.EmitBuildVersion = false;
260     VersionInfo.TypeOrPlatform.Type = Type;
261     VersionInfo.Major = Major;
262     VersionInfo.Minor = Minor;
263     VersionInfo.Update = Update;
264   }
setBuildVersion(MachO::PlatformType Platform,unsigned Major,unsigned Minor,unsigned Update)265   void setBuildVersion(MachO::PlatformType Platform, unsigned Major,
266                        unsigned Minor, unsigned Update) {
267     VersionInfo.EmitBuildVersion = true;
268     VersionInfo.TypeOrPlatform.Platform = Platform;
269     VersionInfo.Major = Major;
270     VersionInfo.Minor = Minor;
271     VersionInfo.Update = Update;
272   }
273 
274   /// Reuse an assembler instance
275   ///
276   void reset();
277 
getContext()278   MCContext &getContext() const { return Context; }
279 
getBackendPtr()280   MCAsmBackend *getBackendPtr() const { return Backend.get(); }
281 
getEmitterPtr()282   MCCodeEmitter *getEmitterPtr() const { return Emitter.get(); }
283 
getWriterPtr()284   MCObjectWriter *getWriterPtr() const { return Writer.get(); }
285 
getBackend()286   MCAsmBackend &getBackend() const { return *Backend; }
287 
getEmitter()288   MCCodeEmitter &getEmitter() const { return *Emitter; }
289 
getWriter()290   MCObjectWriter &getWriter() const { return *Writer; }
291 
getDWARFLinetableParams()292   MCDwarfLineTableParams getDWARFLinetableParams() const { return LTParams; }
setDWARFLinetableParams(MCDwarfLineTableParams P)293   void setDWARFLinetableParams(MCDwarfLineTableParams P) { LTParams = P; }
294 
295   /// Finish - Do final processing and write the object to the output stream.
296   /// \p Writer is used for custom object writer (as the MCJIT does),
297   /// if not specified it is automatically created from backend.
298   void Finish();
299 
300   // Layout all section and prepare them for emission.
301   void layout(MCAsmLayout &Layout);
302 
303   // FIXME: This does not belong here.
getSubsectionsViaSymbols()304   bool getSubsectionsViaSymbols() const { return SubsectionsViaSymbols; }
setSubsectionsViaSymbols(bool Value)305   void setSubsectionsViaSymbols(bool Value) { SubsectionsViaSymbols = Value; }
306 
isIncrementalLinkerCompatible()307   bool isIncrementalLinkerCompatible() const {
308     return IncrementalLinkerCompatible;
309   }
setIncrementalLinkerCompatible(bool Value)310   void setIncrementalLinkerCompatible(bool Value) {
311     IncrementalLinkerCompatible = Value;
312   }
313 
getRelaxAll()314   bool getRelaxAll() const { return RelaxAll; }
setRelaxAll(bool Value)315   void setRelaxAll(bool Value) { RelaxAll = Value; }
316 
isBundlingEnabled()317   bool isBundlingEnabled() const { return BundleAlignSize != 0; }
318 
getBundleAlignSize()319   unsigned getBundleAlignSize() const { return BundleAlignSize; }
320 
setBundleAlignSize(unsigned Size)321   void setBundleAlignSize(unsigned Size) {
322     assert((Size == 0 || !(Size & (Size - 1))) &&
323            "Expect a power-of-two bundle align size");
324     BundleAlignSize = Size;
325   }
326 
327   /// \name Section List Access
328   /// @{
329 
begin()330   iterator begin() { return Sections.begin(); }
begin()331   const_iterator begin() const { return Sections.begin(); }
332 
end()333   iterator end() { return Sections.end(); }
end()334   const_iterator end() const { return Sections.end(); }
335 
size()336   size_t size() const { return Sections.size(); }
337 
338   /// @}
339   /// \name Symbol List Access
340   /// @{
symbol_begin()341   symbol_iterator symbol_begin() { return Symbols.begin(); }
symbol_begin()342   const_symbol_iterator symbol_begin() const { return Symbols.begin(); }
343 
symbol_end()344   symbol_iterator symbol_end() { return Symbols.end(); }
symbol_end()345   const_symbol_iterator symbol_end() const { return Symbols.end(); }
346 
symbols()347   symbol_range symbols() { return make_range(symbol_begin(), symbol_end()); }
symbols()348   const_symbol_range symbols() const {
349     return make_range(symbol_begin(), symbol_end());
350   }
351 
symbol_size()352   size_t symbol_size() const { return Symbols.size(); }
353 
354   /// @}
355   /// \name Indirect Symbol List Access
356   /// @{
357 
358   // FIXME: This is a total hack, this should not be here. Once things are
359   // factored so that the streamer has direct access to the .o writer, it can
360   // disappear.
getIndirectSymbols()361   std::vector<IndirectSymbolData> &getIndirectSymbols() {
362     return IndirectSymbols;
363   }
364 
indirect_symbol_begin()365   indirect_symbol_iterator indirect_symbol_begin() {
366     return IndirectSymbols.begin();
367   }
indirect_symbol_begin()368   const_indirect_symbol_iterator indirect_symbol_begin() const {
369     return IndirectSymbols.begin();
370   }
371 
indirect_symbol_end()372   indirect_symbol_iterator indirect_symbol_end() {
373     return IndirectSymbols.end();
374   }
indirect_symbol_end()375   const_indirect_symbol_iterator indirect_symbol_end() const {
376     return IndirectSymbols.end();
377   }
378 
indirect_symbol_size()379   size_t indirect_symbol_size() const { return IndirectSymbols.size(); }
380 
381   /// @}
382   /// \name Linker Option List Access
383   /// @{
384 
getLinkerOptions()385   std::vector<std::vector<std::string>> &getLinkerOptions() {
386     return LinkerOptions;
387   }
388 
389   /// @}
390   /// \name Data Region List Access
391   /// @{
392 
393   // FIXME: This is a total hack, this should not be here. Once things are
394   // factored so that the streamer has direct access to the .o writer, it can
395   // disappear.
getDataRegions()396   std::vector<DataRegionData> &getDataRegions() { return DataRegions; }
397 
data_region_begin()398   data_region_iterator data_region_begin() { return DataRegions.begin(); }
data_region_begin()399   const_data_region_iterator data_region_begin() const {
400     return DataRegions.begin();
401   }
402 
data_region_end()403   data_region_iterator data_region_end() { return DataRegions.end(); }
data_region_end()404   const_data_region_iterator data_region_end() const {
405     return DataRegions.end();
406   }
407 
data_region_size()408   size_t data_region_size() const { return DataRegions.size(); }
409 
410   /// @}
411   /// \name Data Region List Access
412   /// @{
413 
414   // FIXME: This is a total hack, this should not be here. Once things are
415   // factored so that the streamer has direct access to the .o writer, it can
416   // disappear.
getLOHContainer()417   MCLOHContainer &getLOHContainer() { return LOHContainer; }
getLOHContainer()418   const MCLOHContainer &getLOHContainer() const {
419     return const_cast<MCAssembler *>(this)->getLOHContainer();
420   }
421 
422   struct CGProfileEntry {
423     const MCSymbolRefExpr *From;
424     const MCSymbolRefExpr *To;
425     uint64_t Count;
426   };
427   std::vector<CGProfileEntry> CGProfile;
428   /// @}
429   /// \name Backend Data Access
430   /// @{
431 
432   bool registerSection(MCSection &Section);
433 
434   void registerSymbol(const MCSymbol &Symbol, bool *Created = nullptr);
435 
getFileNames()436   ArrayRef<std::string> getFileNames() { return FileNames; }
437 
addFileName(StringRef FileName)438   void addFileName(StringRef FileName) {
439     if (!is_contained(FileNames, FileName))
440       FileNames.push_back(FileName);
441   }
442 
443   /// Write the necessary bundle padding to \p OS.
444   /// Expects a fragment \p F containing instructions and its size \p FSize.
445   void writeFragmentPadding(raw_ostream &OS, const MCEncodedFragment &F,
446                             uint64_t FSize) const;
447 
448   /// @}
449 
450   void dump() const;
451 };
452 
453 /// Compute the amount of padding required before the fragment \p F to
454 /// obey bundling restrictions, where \p FOffset is the fragment's offset in
455 /// its section and \p FSize is the fragment's size.
456 uint64_t computeBundlePadding(const MCAssembler &Assembler,
457                               const MCEncodedFragment *F, uint64_t FOffset,
458                               uint64_t FSize);
459 
460 } // end namespace llvm
461 
462 #endif // LLVM_MC_MCASSEMBLER_H
463