1 //===- subzero/src/IceELFSection.h - Model of ELF sections ------*- C++ -*-===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Representation of ELF sections.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef SUBZERO_SRC_ICEELFSECTION_H
16 #define SUBZERO_SRC_ICEELFSECTION_H
17 
18 #include "IceDefs.h"
19 #include "IceELFStreamer.h"
20 #include "IceFixups.h"
21 #include "IceOperand.h"
22 #include "IceStringPool.h"
23 
24 using namespace llvm::ELF;
25 
26 namespace Ice {
27 
28 class ELFStreamer;
29 class ELFStringTableSection;
30 
31 /// Base representation of an ELF section.
32 class ELFSection {
33   ELFSection() = delete;
34   ELFSection(const ELFSection &) = delete;
35   ELFSection &operator=(const ELFSection &) = delete;
36 
37 public:
38   virtual ~ELFSection() = default;
39 
40   /// Sentinel value for a section number/index for before the final section
41   /// index is actually known. The dummy NULL section will be assigned number 0,
42   /// and it is referenced by the dummy 0-th symbol in the symbol table, so use
43   /// max() instead of 0.
44   enum { NoSectionNumber = std::numeric_limits<SizeT>::max() };
45 
46   /// Constructs an ELF section, filling in fields that will be known once the
47   /// *type* of section is decided. Other fields may be updated incrementally or
48   /// only after the program is completely defined.
ELFSection(const std::string & Name,Elf64_Word ShType,Elf64_Xword ShFlags,Elf64_Xword ShAddralign,Elf64_Xword ShEntsize)49   ELFSection(const std::string &Name, Elf64_Word ShType, Elf64_Xword ShFlags,
50              Elf64_Xword ShAddralign, Elf64_Xword ShEntsize)
51       : Name(Name), Header() {
52     Header.sh_type = ShType;
53     Header.sh_flags = ShFlags;
54     Header.sh_addralign = ShAddralign;
55     Header.sh_entsize = ShEntsize;
56   }
57 
58   /// Set the section number/index after it is finally known.
setNumber(SizeT N)59   void setNumber(SizeT N) {
60     // Should only set the number once: from NoSectionNumber -> N.
61     assert(Number == NoSectionNumber);
62     Number = N;
63   }
getNumber()64   SizeT getNumber() const {
65     assert(Number != NoSectionNumber);
66     return Number;
67   }
68 
setSize(Elf64_Xword sh_size)69   void setSize(Elf64_Xword sh_size) { Header.sh_size = sh_size; }
getCurrentSize()70   SizeT getCurrentSize() const { return Header.sh_size; }
71 
setNameStrIndex(Elf64_Word sh_name)72   void setNameStrIndex(Elf64_Word sh_name) { Header.sh_name = sh_name; }
73 
getName()74   const std::string &getName() const { return Name; }
75 
setLinkNum(Elf64_Word sh_link)76   void setLinkNum(Elf64_Word sh_link) { Header.sh_link = sh_link; }
77 
setInfoNum(Elf64_Word sh_info)78   void setInfoNum(Elf64_Word sh_info) { Header.sh_info = sh_info; }
79 
setFileOffset(Elf64_Off sh_offset)80   void setFileOffset(Elf64_Off sh_offset) { Header.sh_offset = sh_offset; }
81 
getSectionAlign()82   Elf64_Xword getSectionAlign() const { return Header.sh_addralign; }
83 
84   /// Write the section header out with the given streamer.
85   template <bool IsELF64> void writeHeader(ELFStreamer &Str);
86 
87 protected:
88   /// Name of the section in convenient string form (instead of a index into the
89   /// Section Header String Table, which is not known till later).
90   const std::string Name;
91 
92   // The fields of the header. May only be partially initialized, but should
93   // be fully initialized before writing.
94   Elf64_Shdr Header;
95 
96   /// The number of the section after laying out sections.
97   SizeT Number = NoSectionNumber;
98 };
99 
100 /// Models text/code sections. Code is written out incrementally and the size of
101 /// the section is then updated incrementally.
102 class ELFTextSection : public ELFSection {
103   ELFTextSection() = delete;
104   ELFTextSection(const ELFTextSection &) = delete;
105   ELFTextSection &operator=(const ELFTextSection &) = delete;
106 
107 public:
108   using ELFSection::ELFSection;
109 
110   void appendData(ELFStreamer &Str, const llvm::StringRef MoreData);
111 };
112 
113 /// Models data/rodata sections. Data is written out incrementally and the size
114 /// of the section is then updated incrementally. Some rodata sections may have
115 /// fixed entsize and duplicates may be mergeable.
116 class ELFDataSection : public ELFSection {
117   ELFDataSection() = delete;
118   ELFDataSection(const ELFDataSection &) = delete;
119   ELFDataSection &operator=(const ELFDataSection &) = delete;
120 
121 public:
122   using ELFSection::ELFSection;
123 
124   void appendData(ELFStreamer &Str, const llvm::StringRef MoreData);
125 
126   void appendZeros(ELFStreamer &Str, SizeT NumBytes);
127 
128   void appendRelocationOffset(ELFStreamer &Str, bool IsRela,
129                               RelocOffsetT RelocOffset);
130 
131   /// Pad the next section offset for writing data elements to the requested
132   /// alignment. If the section is NOBITS then do not actually write out the
133   /// padding and only update the section size.
134   void padToAlignment(ELFStreamer &Str, Elf64_Xword Align);
135 };
136 
137 /// Model of ELF symbol table entries. Besides keeping track of the fields
138 /// required for an elf symbol table entry it also tracks the number that
139 /// represents the symbol's final index in the symbol table.
140 struct ELFSym {
141   Elf64_Sym Sym;
142   ELFSection *Section;
143   SizeT Number;
144 
145   /// Sentinel value for symbols that haven't been assigned a number yet. The
146   /// dummy 0-th symbol will be assigned number 0, so don't use that.
147   enum { UnknownNumber = std::numeric_limits<SizeT>::max() };
148 
setNumberELFSym149   void setNumber(SizeT N) {
150     assert(Number == UnknownNumber);
151     Number = N;
152   }
153 
getNumberELFSym154   SizeT getNumber() const {
155     assert(Number != UnknownNumber);
156     return Number;
157   }
158 };
159 
160 /// Models a symbol table. Symbols may be added up until updateIndices is
161 /// called. At that point the indices of each symbol will be finalized.
162 class ELFSymbolTableSection : public ELFSection {
163   ELFSymbolTableSection() = delete;
164   ELFSymbolTableSection(const ELFSymbolTableSection &) = delete;
165   ELFSymbolTableSection &operator=(const ELFSymbolTableSection &) = delete;
166 
167 public:
ELFSymbolTableSection(const std::string & Name,Elf64_Word ShType,Elf64_Xword ShFlags,Elf64_Xword ShAddralign,Elf64_Xword ShEntsize)168   ELFSymbolTableSection(const std::string &Name, Elf64_Word ShType,
169                         Elf64_Xword ShFlags, Elf64_Xword ShAddralign,
170                         Elf64_Xword ShEntsize)
171       : ELFSection(Name, ShType, ShFlags, ShAddralign, ShEntsize),
172         NullSymbolName(), NullSymbol(nullptr) {}
173 
174   /// Create initial entry for a symbol when it is defined. Each entry should
175   /// only be defined once. We might want to allow Name to be a dummy name
176   /// initially, then get updated to the real thing, since Data initializers are
177   /// read before the bitcode's symbol table is read.
178   void createDefinedSym(GlobalString Name, uint8_t Type, uint8_t Binding,
179                         ELFSection *Section, RelocOffsetT Offset, SizeT Size);
180 
181   /// Note that a symbol table entry needs to be created for the given symbol
182   /// because it is undefined.
183   void noteUndefinedSym(GlobalString Name, ELFSection *NullSection);
184 
185   const ELFSym *findSymbol(GlobalString Name) const;
186 
187   void createNullSymbol(ELFSection *NullSection, GlobalContext *Ctx);
getNullSymbol()188   const ELFSym *getNullSymbol() const { return NullSymbol; }
189 
getSectionDataSize()190   size_t getSectionDataSize() const {
191     return (LocalSymbols.size() + GlobalSymbols.size()) * Header.sh_entsize;
192   }
193 
getNumLocals()194   size_t getNumLocals() const { return LocalSymbols.size(); }
195 
196   void updateIndices(const ELFStringTableSection *StrTab);
197 
198   void writeData(ELFStreamer &Str, bool IsELF64);
199 
200 private:
201   // Map from symbol name to its symbol information. This assumes symbols are
202   // unique across all sections.
203   using SymtabKey = GlobalString;
204   using SymMap = std::map<SymtabKey, ELFSym>;
205 
206   template <bool IsELF64>
207   void writeSymbolMap(ELFStreamer &Str, const SymMap &Map);
208 
209   GlobalString NullSymbolName;
210   const ELFSym *NullSymbol;
211   // Keep Local and Global symbols separate, since the sh_info needs to know
212   // the index of the last LOCAL.
213   SymMap LocalSymbols;
214   SymMap GlobalSymbols;
215 };
216 
217 /// Models a relocation section.
218 class ELFRelocationSection : public ELFSection {
219   ELFRelocationSection() = delete;
220   ELFRelocationSection(const ELFRelocationSection &) = delete;
221   ELFRelocationSection &operator=(const ELFRelocationSection &) = delete;
222 
223 public:
ELFRelocationSection(const std::string & Name,Elf64_Word ShType,Elf64_Xword ShFlags,Elf64_Xword ShAddralign,Elf64_Xword ShEntsize)224   ELFRelocationSection(const std::string &Name, Elf64_Word ShType,
225                        Elf64_Xword ShFlags, Elf64_Xword ShAddralign,
226                        Elf64_Xword ShEntsize)
227       : ELFSection(Name, ShType, ShFlags, ShAddralign, ShEntsize),
228         RelatedSection(nullptr) {}
229 
getRelatedSection()230   const ELFSection *getRelatedSection() const { return RelatedSection; }
setRelatedSection(const ELFSection * Section)231   void setRelatedSection(const ELFSection *Section) {
232     RelatedSection = Section;
233   }
234 
235   /// Track additional relocations which start out relative to offset 0, but
236   /// should be adjusted to be relative to BaseOff.
237   void addRelocations(RelocOffsetT BaseOff, const FixupRefList &FixupRefs,
238                       ELFSymbolTableSection *SymTab);
239 
240   /// Track a single additional relocation.
addRelocation(const AssemblerFixup & Fixup)241   void addRelocation(const AssemblerFixup &Fixup) { Fixups.push_back(Fixup); }
242 
243   size_t getSectionDataSize() const;
244 
245   template <bool IsELF64>
246   void writeData(ELFStreamer &Str, const ELFSymbolTableSection *SymTab);
247 
isRela()248   bool isRela() const { return Header.sh_type == SHT_RELA; }
249 
250 private:
251   const ELFSection *RelatedSection;
252   FixupList Fixups;
253 };
254 
255 /// Models a string table. The user will build the string table by adding
256 /// strings incrementally. At some point, all strings should be known and
257 /// doLayout() should be called. After that, no other strings may be added.
258 /// However, the final offsets of the strings can be discovered and used to fill
259 /// out section headers and symbol table entries.
260 class ELFStringTableSection : public ELFSection {
261   ELFStringTableSection() = delete;
262   ELFStringTableSection(const ELFStringTableSection &) = delete;
263   ELFStringTableSection &operator=(const ELFStringTableSection &) = delete;
264 
265 public:
266   using ELFSection::ELFSection;
267 
268   /// Add a string to the table, in preparation for final layout.
269   void add(const std::string &Str);
add(GlobalString Str)270   void add(GlobalString Str) {
271     if (Str.hasStdString())
272       add(Str.toString());
273   }
274 
275   /// Finalizes the layout of the string table and fills in the section Data.
276   void doLayout();
277 
278   /// The first byte of the string table should be \0, so it is an invalid
279   /// index. Indices start out as unknown until layout is complete.
280   enum { UnknownIndex = 0 };
281 
282   /// Grabs the final index of a string after layout. Returns UnknownIndex if
283   /// the string's index is not found.
284   size_t getIndex(const std::string &Str) const;
285 
getSectionData()286   llvm::StringRef getSectionData() const {
287     assert(isLaidOut());
288     return llvm::StringRef(reinterpret_cast<const char *>(StringData.data()),
289                            StringData.size());
290   }
291 
getSectionDataSize()292   size_t getSectionDataSize() const { return getSectionData().size(); }
293 
294 private:
isLaidOut()295   bool isLaidOut() const { return !StringData.empty(); }
296 
297   /// Strings can share a string table entry if they share the same suffix.
298   /// E.g., "pop" and "lollipop" can both use the characters in "lollipop", but
299   /// "pops" cannot, and "unpop" cannot either. Though, "pop", "lollipop", and
300   /// "unpop" share "pop" as the suffix, "pop" can only share the characters
301   /// with one of them.
302   struct SuffixComparator {
303     bool operator()(const std::string &StrA, const std::string &StrB) const;
304   };
305 
306   using StringToIndexType = std::map<std::string, size_t, SuffixComparator>;
307 
308   /// Track strings to their index. Index will be UnknownIndex if not yet laid
309   /// out.
310   StringToIndexType StringToIndexMap;
311 
312   using RawDataType = std::vector<uint8_t>;
313   RawDataType StringData;
314 };
315 
writeHeader(ELFStreamer & Str)316 template <bool IsELF64> void ELFSection::writeHeader(ELFStreamer &Str) {
317   Str.writeELFWord<IsELF64>(Header.sh_name);
318   Str.writeELFWord<IsELF64>(Header.sh_type);
319   Str.writeELFXword<IsELF64>(Header.sh_flags);
320   Str.writeAddrOrOffset<IsELF64>(Header.sh_addr);
321   Str.writeAddrOrOffset<IsELF64>(Header.sh_offset);
322   Str.writeELFXword<IsELF64>(Header.sh_size);
323   Str.writeELFWord<IsELF64>(Header.sh_link);
324   Str.writeELFWord<IsELF64>(Header.sh_info);
325   Str.writeELFXword<IsELF64>(Header.sh_addralign);
326   Str.writeELFXword<IsELF64>(Header.sh_entsize);
327 }
328 
329 template <bool IsELF64>
writeSymbolMap(ELFStreamer & Str,const SymMap & Map)330 void ELFSymbolTableSection::writeSymbolMap(ELFStreamer &Str,
331                                            const SymMap &Map) {
332   // The order of the fields is different, so branch on IsELF64.
333   if (IsELF64) {
334     for (auto &KeyValue : Map) {
335       const Elf64_Sym &SymInfo = KeyValue.second.Sym;
336       Str.writeELFWord<IsELF64>(SymInfo.st_name);
337       Str.write8(SymInfo.st_info);
338       Str.write8(SymInfo.st_other);
339       Str.writeLE16(SymInfo.st_shndx);
340       Str.writeAddrOrOffset<IsELF64>(SymInfo.st_value);
341       Str.writeELFXword<IsELF64>(SymInfo.st_size);
342     }
343   } else {
344     for (auto &KeyValue : Map) {
345       const Elf64_Sym &SymInfo = KeyValue.second.Sym;
346       Str.writeELFWord<IsELF64>(SymInfo.st_name);
347       Str.writeAddrOrOffset<IsELF64>(SymInfo.st_value);
348       Str.writeELFWord<IsELF64>(SymInfo.st_size);
349       Str.write8(SymInfo.st_info);
350       Str.write8(SymInfo.st_other);
351       Str.writeLE16(SymInfo.st_shndx);
352     }
353   }
354 }
355 
356 template <bool IsELF64>
writeData(ELFStreamer & Str,const ELFSymbolTableSection * SymTab)357 void ELFRelocationSection::writeData(ELFStreamer &Str,
358                                      const ELFSymbolTableSection *SymTab) {
359   for (const AssemblerFixup &Fixup : Fixups) {
360     const ELFSym *Symbol;
361     if (Fixup.isNullSymbol()) {
362       Symbol = SymTab->getNullSymbol();
363     } else if (Fixup.valueIsSymbol()) {
364       Symbol = Fixup.getSymbolValue();
365     } else {
366       GlobalString Name = Fixup.symbol();
367       Symbol = SymTab->findSymbol(Name);
368       if (!Symbol)
369         llvm::report_fatal_error(Name + ": Missing symbol mentioned in reloc");
370     }
371 
372     if (IsELF64) {
373       Elf64_Rela Rela;
374       Rela.r_offset = Fixup.position();
375       Rela.setSymbolAndType(Symbol->getNumber(), Fixup.kind());
376       Rela.r_addend = Fixup.offset();
377       Str.writeAddrOrOffset<IsELF64>(Rela.r_offset);
378       Str.writeELFXword<IsELF64>(Rela.r_info);
379       Str.writeELFXword<IsELF64>(Rela.r_addend);
380     } else {
381       Elf32_Rel Rel;
382       Rel.r_offset = Fixup.position();
383       Rel.setSymbolAndType(Symbol->getNumber(), Fixup.kind());
384       Str.writeAddrOrOffset<IsELF64>(Rel.r_offset);
385       Str.writeELFWord<IsELF64>(Rel.r_info);
386     }
387   }
388 }
389 
390 } // end of namespace Ice
391 
392 #endif // SUBZERO_SRC_ICEELFSECTION_H
393