1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_ELF_BUILDER_H_
18 #define ART_COMPILER_ELF_BUILDER_H_
19 
20 #include <vector>
21 
22 #include "arch/instruction_set.h"
23 #include "base/bit_utils.h"
24 #include "base/unix_file/fd_file.h"
25 #include "buffered_output_stream.h"
26 #include "elf_utils.h"
27 #include "file_output_stream.h"
28 
29 namespace art {
30 
31 class CodeOutput {
32  public:
33   virtual bool Write(OutputStream* out) = 0;
~CodeOutput()34   virtual ~CodeOutput() {}
35 };
36 
37 // Writes ELF file.
38 // The main complication is that the sections often want to reference
39 // each other.  We solve this by writing the ELF file in two stages:
40 //  * Sections are asked about their size, and overall layout is calculated.
41 //  * Sections do the actual writes which may use offsets of other sections.
42 template <typename ElfTypes>
43 class ElfBuilder FINAL {
44  public:
45   using Elf_Addr = typename ElfTypes::Addr;
46   using Elf_Off = typename ElfTypes::Off;
47   using Elf_Word = typename ElfTypes::Word;
48   using Elf_Sword = typename ElfTypes::Sword;
49   using Elf_Ehdr = typename ElfTypes::Ehdr;
50   using Elf_Shdr = typename ElfTypes::Shdr;
51   using Elf_Sym = typename ElfTypes::Sym;
52   using Elf_Phdr = typename ElfTypes::Phdr;
53   using Elf_Dyn = typename ElfTypes::Dyn;
54 
55   // Base class of all sections.
56   class Section {
57    public:
Section(const std::string & name,Elf_Word type,Elf_Word flags,const Section * link,Elf_Word info,Elf_Word align,Elf_Word entsize)58     Section(const std::string& name, Elf_Word type, Elf_Word flags,
59             const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
60         : header_(), section_index_(0), name_(name), link_(link) {
61       header_.sh_type = type;
62       header_.sh_flags = flags;
63       header_.sh_info = info;
64       header_.sh_addralign = align;
65       header_.sh_entsize = entsize;
66     }
~Section()67     virtual ~Section() {}
68 
69     // Returns the size of the content of this section.  It is used to
70     // calculate file offsets of all sections before doing any writes.
71     virtual Elf_Word GetSize() const = 0;
72 
73     // Write the content of this section to the given file.
74     // This must write exactly the number of bytes returned by GetSize().
75     // Offsets of all sections are known when this method is called.
76     virtual bool Write(File* elf_file) = 0;
77 
GetLink()78     Elf_Word GetLink() const {
79       return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
80     }
81 
GetHeader()82     const Elf_Shdr* GetHeader() const {
83       return &header_;
84     }
85 
GetHeader()86     Elf_Shdr* GetHeader() {
87       return &header_;
88     }
89 
GetSectionIndex()90     Elf_Word GetSectionIndex() const {
91       DCHECK_NE(section_index_, 0u);
92       return section_index_;
93     }
94 
SetSectionIndex(Elf_Word section_index)95     void SetSectionIndex(Elf_Word section_index) {
96       section_index_ = section_index;
97     }
98 
GetName()99     const std::string& GetName() const {
100       return name_;
101     }
102 
103    private:
104     Elf_Shdr header_;
105     Elf_Word section_index_;
106     const std::string name_;
107     const Section* const link_;
108 
109     DISALLOW_COPY_AND_ASSIGN(Section);
110   };
111 
112   // Writer of .dynamic section.
113   class DynamicSection FINAL : public Section {
114    public:
AddDynamicTag(Elf_Sword tag,Elf_Word value,const Section * section)115     void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
116       DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
117       dynamics_.push_back({tag, value, section});
118     }
119 
DynamicSection(const std::string & name,Section * link)120     DynamicSection(const std::string& name, Section* link)
121         : Section(name, SHT_DYNAMIC, SHF_ALLOC,
122                   link, 0, kPageSize, sizeof(Elf_Dyn)) {}
123 
GetSize()124     Elf_Word GetSize() const OVERRIDE {
125       return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
126     }
127 
Write(File * elf_file)128     bool Write(File* elf_file) OVERRIDE {
129       std::vector<Elf_Dyn> buffer;
130       buffer.reserve(dynamics_.size() + 1u);
131       for (const ElfDynamicState& it : dynamics_) {
132         if (it.section_ != nullptr) {
133           // We are adding an address relative to a section.
134           buffer.push_back(
135               {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
136         } else {
137           buffer.push_back({it.tag_, {it.value_}});
138         }
139       }
140       buffer.push_back({DT_NULL, {0}});
141       return WriteArray(elf_file, buffer.data(), buffer.size());
142     }
143 
144    private:
145     struct ElfDynamicState {
146       Elf_Sword tag_;
147       Elf_Word value_;
148       const Section* section_;
149     };
150     std::vector<ElfDynamicState> dynamics_;
151   };
152 
153   using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations,
154                            Elf_Addr buffer_address,
155                            Elf_Addr base_address,
156                            std::vector<uint8_t>* buffer);
157 
158   // Section with content based on simple memory buffer.
159   // The buffer can be optionally patched before writing.
160   class RawSection FINAL : public Section {
161    public:
162     RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
163                const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
164                PatchFn patch = nullptr, const Section* patch_base_section = nullptr)
Section(name,type,flags,link,info,align,entsize)165         : Section(name, type, flags, link, info, align, entsize),
166           patched_(false), patch_(patch), patch_base_section_(patch_base_section) {
167     }
168 
RawSection(const std::string & name,Elf_Word type)169     RawSection(const std::string& name, Elf_Word type)
170         : RawSection(name, type, 0, nullptr, 0, 1, 0, nullptr, nullptr) {
171     }
172 
GetSize()173     Elf_Word GetSize() const OVERRIDE {
174       return buffer_.size();
175     }
176 
Write(File * elf_file)177     bool Write(File* elf_file) OVERRIDE {
178       if (!patch_locations_.empty()) {
179         DCHECK(!patched_);  // Do not patch twice.
180         DCHECK(patch_ != nullptr);
181         DCHECK(patch_base_section_ != nullptr);
182         patch_(patch_locations_,
183                this->GetHeader()->sh_addr,
184                patch_base_section_->GetHeader()->sh_addr,
185                &buffer_);
186         patched_ = true;
187       }
188       return WriteArray(elf_file, buffer_.data(), buffer_.size());
189     }
190 
IsEmpty()191     bool IsEmpty() const {
192       return buffer_.size() == 0;
193     }
194 
GetBuffer()195     std::vector<uint8_t>* GetBuffer() {
196       return &buffer_;
197     }
198 
SetBuffer(const std::vector<uint8_t> & buffer)199     void SetBuffer(const std::vector<uint8_t>& buffer) {
200       buffer_ = buffer;
201     }
202 
GetPatchLocations()203     std::vector<uintptr_t>* GetPatchLocations() {
204       return &patch_locations_;
205     }
206 
207    private:
208     std::vector<uint8_t> buffer_;
209     std::vector<uintptr_t> patch_locations_;
210     bool patched_;
211     // User-provided function to do the actual patching.
212     PatchFn patch_;
213     // The section that we patch against (usually .text).
214     const Section* patch_base_section_;
215   };
216 
217   // Writer of .rodata section or .text section.
218   // The write is done lazily using the provided CodeOutput.
219   class OatSection FINAL : public Section {
220    public:
OatSection(const std::string & name,Elf_Word type,Elf_Word flags,const Section * link,Elf_Word info,Elf_Word align,Elf_Word entsize,Elf_Word size,CodeOutput * code_output)221     OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
222                const Section* link, Elf_Word info, Elf_Word align,
223                Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
224         : Section(name, type, flags, link, info, align, entsize),
225           size_(size), code_output_(code_output) {
226     }
227 
GetSize()228     Elf_Word GetSize() const OVERRIDE {
229       return size_;
230     }
231 
Write(File * elf_file)232     bool Write(File* elf_file) OVERRIDE {
233       // The BufferedOutputStream class contains the buffer as field,
234       // therefore it is too big to allocate on the stack.
235       std::unique_ptr<BufferedOutputStream> output_stream(
236           new BufferedOutputStream(new FileOutputStream(elf_file)));
237       return code_output_->Write(output_stream.get());
238     }
239 
240    private:
241     Elf_Word size_;
242     CodeOutput* code_output_;
243   };
244 
245   // Writer of .bss section.
246   class NoBitsSection FINAL : public Section {
247    public:
NoBitsSection(const std::string & name,Elf_Word size)248     NoBitsSection(const std::string& name, Elf_Word size)
249         : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
250           size_(size) {
251     }
252 
GetSize()253     Elf_Word GetSize() const OVERRIDE {
254       return size_;
255     }
256 
Write(File * elf_file ATTRIBUTE_UNUSED)257     bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
258       LOG(ERROR) << "This section should not be written to the ELF file";
259       return false;
260     }
261 
262    private:
263     Elf_Word size_;
264   };
265 
266   // Writer of .dynstr .strtab and .shstrtab sections.
267   class StrtabSection FINAL : public Section {
268    public:
StrtabSection(const std::string & name,Elf_Word flags)269     StrtabSection(const std::string& name, Elf_Word flags)
270         : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 0) {
271       buffer_.reserve(4 * KB);
272       // The first entry of strtab must be empty string.
273       buffer_ += '\0';
274     }
275 
AddName(const std::string & name)276     Elf_Word AddName(const std::string& name) {
277       Elf_Word offset = buffer_.size();
278       buffer_ += name;
279       buffer_ += '\0';
280       return offset;
281     }
282 
GetSize()283     Elf_Word GetSize() const OVERRIDE {
284       return buffer_.size();
285     }
286 
Write(File * elf_file)287     bool Write(File* elf_file) OVERRIDE {
288       return WriteArray(elf_file, buffer_.data(), buffer_.size());
289     }
290 
291    private:
292     std::string buffer_;
293   };
294 
295   class HashSection;
296 
297   // Writer of .dynsym and .symtab sections.
298   class SymtabSection FINAL : public Section {
299    public:
300     // Add a symbol with given name to this symtab. The symbol refers to
301     // 'relative_addr' within the given section and has the given attributes.
302     void AddSymbol(const std::string& name, const Section* section,
303                    Elf_Addr addr, bool is_relative, Elf_Word size,
304                    uint8_t binding, uint8_t type, uint8_t other = 0) {
305       CHECK(section != nullptr);
306       Elf_Word name_idx = strtab_->AddName(name);
307       symbols_.push_back({ name, section, addr, size, is_relative,
308                            MakeStInfo(binding, type), other, name_idx });
309     }
310 
SymtabSection(const std::string & name,Elf_Word type,Elf_Word flags,StrtabSection * strtab)311     SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
312                   StrtabSection* strtab)
313         : Section(name, type, flags, strtab, 0, sizeof(Elf_Off), sizeof(Elf_Sym)),
314           strtab_(strtab) {
315     }
316 
IsEmpty()317     bool IsEmpty() const {
318       return symbols_.empty();
319     }
320 
GetSize()321     Elf_Word GetSize() const OVERRIDE {
322       return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
323     }
324 
Write(File * elf_file)325     bool Write(File* elf_file) OVERRIDE {
326       std::vector<Elf_Sym> buffer;
327       buffer.reserve(1u + symbols_.size());
328       buffer.push_back(Elf_Sym());  // NULL.
329       for (const ElfSymbolState& it : symbols_) {
330         Elf_Sym sym = Elf_Sym();
331         sym.st_name = it.name_idx_;
332         if (it.is_relative_) {
333           sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
334         } else {
335           sym.st_value = it.addr_;
336         }
337         sym.st_size = it.size_;
338         sym.st_other = it.other_;
339         sym.st_shndx = it.section_->GetSectionIndex();
340         sym.st_info = it.info_;
341         buffer.push_back(sym);
342       }
343       return WriteArray(elf_file, buffer.data(), buffer.size());
344     }
345 
346    private:
347     struct ElfSymbolState {
348       const std::string name_;
349       const Section* section_;
350       Elf_Addr addr_;
351       Elf_Word size_;
352       bool is_relative_;
353       uint8_t info_;
354       uint8_t other_;
355       Elf_Word name_idx_;  // index in the strtab.
356     };
357 
MakeStInfo(uint8_t binding,uint8_t type)358     static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
359       return ((binding) << 4) + ((type) & 0xf);
360     }
361 
362     // The symbols in the same order they will be in the symbol table.
363     std::vector<ElfSymbolState> symbols_;
364     StrtabSection* strtab_;
365 
366     friend class HashSection;
367   };
368 
369   // TODO: Consider removing.
370   // We use it only for the dynsym section which has only 5 symbols.
371   // We do not use it for symtab, and we probably do not have to
372   // since we use those symbols only to print backtraces.
373   class HashSection FINAL : public Section {
374    public:
HashSection(const std::string & name,Elf_Word flags,SymtabSection * symtab)375     HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
376         : Section(name, SHT_HASH, flags, symtab,
377                   0, sizeof(Elf_Word), sizeof(Elf_Word)),
378           symtab_(symtab) {
379     }
380 
GetSize()381     Elf_Word GetSize() const OVERRIDE {
382       Elf_Word nbuckets = GetNumBuckets();
383       Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
384       return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
385     }
386 
Write(File * const elf_file)387     bool Write(File* const elf_file) OVERRIDE {
388       // Here is how The ELF hash table works.
389       // There are 3 arrays to worry about.
390       // * The symbol table where the symbol information is.
391       // * The bucket array which is an array of indexes into the symtab and chain.
392       // * The chain array which is also an array of indexes into the symtab and chain.
393       //
394       // Lets say the state is something like this.
395       // +--------+       +--------+      +-----------+
396       // | symtab |       | bucket |      |   chain   |
397       // |  null  |       | 1      |      | STN_UNDEF |
398       // | <sym1> |       | 4      |      | 2         |
399       // | <sym2> |       |        |      | 5         |
400       // | <sym3> |       |        |      | STN_UNDEF |
401       // | <sym4> |       |        |      | 3         |
402       // | <sym5> |       |        |      | STN_UNDEF |
403       // +--------+       +--------+      +-----------+
404       //
405       // The lookup process (in python psudocode) is
406       //
407       // def GetSym(name):
408       //     # NB STN_UNDEF == 0
409       //     indx = bucket[elfhash(name) % num_buckets]
410       //     while indx != STN_UNDEF:
411       //         if GetSymbolName(symtab[indx]) == name:
412       //             return symtab[indx]
413       //         indx = chain[indx]
414       //     return SYMBOL_NOT_FOUND
415       //
416       // Between bucket and chain arrays every symtab index must be present exactly
417       // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
418       const auto& symbols = symtab_->symbols_;
419       // Select number of buckets.
420       // This is essentially arbitrary.
421       Elf_Word nbuckets = GetNumBuckets();
422       // 1 is for the implicit NULL symbol.
423       Elf_Word chain_size = (symbols.size() + 1);
424       std::vector<Elf_Word> hash;
425       hash.push_back(nbuckets);
426       hash.push_back(chain_size);
427       uint32_t bucket_offset = hash.size();
428       uint32_t chain_offset = bucket_offset + nbuckets;
429       hash.resize(hash.size() + nbuckets + chain_size, 0);
430 
431       Elf_Word* buckets = hash.data() + bucket_offset;
432       Elf_Word* chain   = hash.data() + chain_offset;
433 
434       // Set up the actual hash table.
435       for (Elf_Word i = 0; i < symbols.size(); i++) {
436         // Add 1 since we need to have the null symbol that is not in the symbols
437         // list.
438         Elf_Word index = i + 1;
439         Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
440         if (buckets[hash_val] == 0) {
441           buckets[hash_val] = index;
442         } else {
443           hash_val = buckets[hash_val];
444           CHECK_LT(hash_val, chain_size);
445           while (chain[hash_val] != 0) {
446             hash_val = chain[hash_val];
447             CHECK_LT(hash_val, chain_size);
448           }
449           chain[hash_val] = index;
450           // Check for loops. Works because if this is non-empty then there must be
451           // another cell which already contains the same symbol index as this one,
452           // which means some symbol has more then one name, which isn't allowed.
453           CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
454         }
455       }
456       return WriteArray(elf_file, hash.data(), hash.size());
457     }
458 
459    private:
GetNumBuckets()460     Elf_Word GetNumBuckets() const {
461       const auto& symbols = symtab_->symbols_;
462       if (symbols.size() < 8) {
463         return 2;
464       } else if (symbols.size() < 32) {
465         return 4;
466       } else if (symbols.size() < 256) {
467         return 16;
468       } else {
469         // Have about 32 ids per bucket.
470         return RoundUp(symbols.size()/32, 2);
471       }
472     }
473 
474     // from bionic
elfhash(const char * _name)475     static inline unsigned elfhash(const char *_name) {
476       const unsigned char *name = (const unsigned char *) _name;
477       unsigned h = 0, g;
478 
479       while (*name) {
480         h = (h << 4) + *name++;
481         g = h & 0xf0000000;
482         h ^= g;
483         h ^= g >> 24;
484       }
485       return h;
486     }
487 
488     SymtabSection* symtab_;
489 
490     DISALLOW_COPY_AND_ASSIGN(HashSection);
491   };
492 
ElfBuilder(InstructionSet isa,Elf_Word rodata_size,CodeOutput * rodata_writer,Elf_Word text_size,CodeOutput * text_writer,Elf_Word bss_size)493   ElfBuilder(InstructionSet isa,
494              Elf_Word rodata_size, CodeOutput* rodata_writer,
495              Elf_Word text_size, CodeOutput* text_writer,
496              Elf_Word bss_size)
497     : isa_(isa),
498       dynstr_(".dynstr", SHF_ALLOC),
499       dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
500       hash_(".hash", SHF_ALLOC, &dynsym_),
501       rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
502               nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
503       text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
504             nullptr, 0, kPageSize, 0, text_size, text_writer),
505       bss_(".bss", bss_size),
506       dynamic_(".dynamic", &dynstr_),
507       strtab_(".strtab", 0),
508       symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
509       shstrtab_(".shstrtab", 0) {
510   }
~ElfBuilder()511   ~ElfBuilder() {}
512 
GetText()513   OatSection* GetText() { return &text_; }
GetSymtab()514   SymtabSection* GetSymtab() { return &symtab_; }
515 
Write(File * elf_file)516   bool Write(File* elf_file) {
517     // Since the .text section of an oat file contains relative references to .rodata
518     // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
519     // a non-traditional layout where the .bss section is mapped independently of the
520     // .dynamic section and needs its own program header with LOAD RW.
521     //
522     // The basic layout of the elf file. Order may be different in final output.
523     // +-------------------------+
524     // | Elf_Ehdr                |
525     // +-------------------------+
526     // | Elf_Phdr PHDR           |
527     // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
528     // | Elf_Phdr LOAD R X       | .text
529     // | Elf_Phdr LOAD RW        | .bss (Optional)
530     // | Elf_Phdr LOAD RW        | .dynamic
531     // | Elf_Phdr DYNAMIC        | .dynamic
532     // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
533     // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
534     // +-------------------------+
535     // | .dynsym                 |
536     // | Elf_Sym  STN_UNDEF      |
537     // | Elf_Sym  oatdata        |
538     // | Elf_Sym  oatexec        |
539     // | Elf_Sym  oatlastword    |
540     // | Elf_Sym  oatbss         | (Optional)
541     // | Elf_Sym  oatbsslastword | (Optional)
542     // +-------------------------+
543     // | .dynstr                 |
544     // | names for .dynsym       |
545     // +-------------------------+
546     // | .hash                   |
547     // | hashtable for dynsym    |
548     // +-------------------------+
549     // | .rodata                 |
550     // | oatdata..oatexec-4      |
551     // +-------------------------+
552     // | .text                   |
553     // | oatexec..oatlastword    |
554     // +-------------------------+
555     // | .dynamic                |
556     // | Elf_Dyn DT_HASH         |
557     // | Elf_Dyn DT_STRTAB       |
558     // | Elf_Dyn DT_SYMTAB       |
559     // | Elf_Dyn DT_SYMENT       |
560     // | Elf_Dyn DT_STRSZ        |
561     // | Elf_Dyn DT_SONAME       |
562     // | Elf_Dyn DT_NULL         |
563     // +-------------------------+  (Optional)
564     // | .symtab                 |  (Optional)
565     // | program symbols         |  (Optional)
566     // +-------------------------+  (Optional)
567     // | .strtab                 |  (Optional)
568     // | names for .symtab       |  (Optional)
569     // +-------------------------+  (Optional)
570     // | .eh_frame               |  (Optional)
571     // +-------------------------+  (Optional)
572     // | .eh_frame_hdr           |  (Optional)
573     // +-------------------------+  (Optional)
574     // | .debug_info             |  (Optional)
575     // +-------------------------+  (Optional)
576     // | .debug_abbrev           |  (Optional)
577     // +-------------------------+  (Optional)
578     // | .debug_str              |  (Optional)
579     // +-------------------------+  (Optional)
580     // | .debug_line             |  (Optional)
581     // +-------------------------+
582     // | .shstrtab               |
583     // | names of sections       |
584     // +-------------------------+
585     // | Elf_Shdr null           |
586     // | Elf_Shdr .dynsym        |
587     // | Elf_Shdr .dynstr        |
588     // | Elf_Shdr .hash          |
589     // | Elf_Shdr .rodata        |
590     // | Elf_Shdr .text          |
591     // | Elf_Shdr .bss           |  (Optional)
592     // | Elf_Shdr .dynamic       |
593     // | Elf_Shdr .symtab        |  (Optional)
594     // | Elf_Shdr .strtab        |  (Optional)
595     // | Elf_Shdr .eh_frame      |  (Optional)
596     // | Elf_Shdr .eh_frame_hdr  |  (Optional)
597     // | Elf_Shdr .debug_info    |  (Optional)
598     // | Elf_Shdr .debug_abbrev  |  (Optional)
599     // | Elf_Shdr .debug_str     |  (Optional)
600     // | Elf_Shdr .debug_line    |  (Optional)
601     // | Elf_Shdr .oat_patches   |  (Optional)
602     // | Elf_Shdr .shstrtab      |
603     // +-------------------------+
604     constexpr bool debug_logging_ = false;
605 
606     // Create a list of all section which we want to write.
607     // This is the order in which they will be written.
608     std::vector<Section*> sections;
609     sections.push_back(&dynsym_);
610     sections.push_back(&dynstr_);
611     sections.push_back(&hash_);
612     sections.push_back(&rodata_);
613     sections.push_back(&text_);
614     if (bss_.GetSize() != 0u) {
615       sections.push_back(&bss_);
616     }
617     sections.push_back(&dynamic_);
618     if (!symtab_.IsEmpty()) {
619       sections.push_back(&symtab_);
620       sections.push_back(&strtab_);
621     }
622     for (Section* section : other_sections_) {
623       sections.push_back(section);
624     }
625     sections.push_back(&shstrtab_);
626     for (size_t i = 0; i < sections.size(); i++) {
627       // The first section index is 1.  Index 0 is reserved for NULL.
628       // Section index is used for relative symbols and for section links.
629       sections[i]->SetSectionIndex(i + 1);
630       // Add section name to .shstrtab.
631       Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
632       sections[i]->GetHeader()->sh_name = name_offset;
633     }
634 
635     // The running program does not have access to section headers
636     // and the loader is not supposed to use them either.
637     // The dynamic sections therefore replicates some of the layout
638     // information like the address and size of .rodata and .text.
639     // It also contains other metadata like the SONAME.
640     // The .dynamic section is found using the PT_DYNAMIC program header.
641     BuildDynsymSection();
642     BuildDynamicSection(elf_file->GetPath());
643 
644     // We do not know the number of headers until the final stages of write.
645     // It is easiest to just reserve a fixed amount of space for them.
646     constexpr size_t kMaxProgramHeaders = 8;
647     constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
648 
649     // Layout of all sections - determine the final file offsets and addresses.
650     // This must be done after we have built all sections and know their size.
651     Elf_Off file_offset = kProgramHeadersOffset + sizeof(Elf_Phdr) * kMaxProgramHeaders;
652     Elf_Addr load_address = file_offset;
653     std::vector<Elf_Shdr> section_headers;
654     section_headers.reserve(1u + sections.size());
655     section_headers.push_back(Elf_Shdr());  // NULL at index 0.
656     for (auto* section : sections) {
657       Elf_Shdr* header = section->GetHeader();
658       Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
659       header->sh_size = section->GetSize();
660       header->sh_link = section->GetLink();
661       // Allocate memory for the section in the file.
662       if (header->sh_type != SHT_NOBITS) {
663         header->sh_offset = RoundUp(file_offset, alignment);
664         file_offset = header->sh_offset + header->sh_size;
665       }
666       // Allocate memory for the section during program execution.
667       if ((header->sh_flags & SHF_ALLOC) != 0) {
668         header->sh_addr = RoundUp(load_address, alignment);
669         load_address = header->sh_addr + header->sh_size;
670       }
671       if (debug_logging_) {
672         LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
673                   << " offset=0x" << header->sh_offset
674                   << " addr=0x" << header->sh_addr
675                   << " size=0x" << header->sh_size;
676       }
677       // Collect section headers into continuous array for convenience.
678       section_headers.push_back(*header);
679     }
680     Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Off));
681 
682     // Create program headers now that we know the layout of the whole file.
683     // Each segment contains one or more sections which are mapped together.
684     // Not all sections are mapped during the execution of the program.
685     // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
686     // interesting parts of memory and their addresses overlap with PT_LOAD.
687     std::vector<Elf_Phdr> program_headers;
688     program_headers.push_back(Elf_Phdr());  // Placeholder for PT_PHDR.
689     // Create the main LOAD R segment which spans all sections up to .rodata.
690     const Elf_Shdr* rodata = rodata_.GetHeader();
691     program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
692       0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
693     program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
694     if (bss_.GetHeader()->sh_size != 0u) {
695       program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
696     }
697     program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
698     program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
699     const Section* eh_frame = FindSection(".eh_frame");
700     if (eh_frame != nullptr) {
701       program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
702       const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
703       if (eh_frame_hdr != nullptr) {
704         // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
705         CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
706         CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
707                  eh_frame_hdr->GetHeader()->sh_offset);
708         // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
709         program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
710         program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
711         program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
712       }
713     }
714     DCHECK_EQ(program_headers[0].p_type, 0u);  // Check placeholder.
715     program_headers[0] = MakeProgramHeader(PT_PHDR, PF_R,
716       kProgramHeadersOffset, program_headers.size() * sizeof(Elf_Phdr), sizeof(Elf_Off));
717     CHECK_LE(program_headers.size(), kMaxProgramHeaders);
718 
719     // Create the main ELF header.
720     Elf_Ehdr elf_header = MakeElfHeader(isa_);
721     elf_header.e_phoff = kProgramHeadersOffset;
722     elf_header.e_shoff = section_headers_offset;
723     elf_header.e_phnum = program_headers.size();
724     elf_header.e_shnum = section_headers.size();
725     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
726 
727     // Write all headers and section content to the file.
728     // Depending on the implementations of Section::Write, this
729     // might be just memory copies or some more elaborate operations.
730     if (!WriteArray(elf_file, &elf_header, 1)) {
731       LOG(INFO) << "Failed to write the ELF header";
732       return false;
733     }
734     if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
735       LOG(INFO) << "Failed to write the program headers";
736       return false;
737     }
738     for (Section* section : sections) {
739       const Elf_Shdr* header = section->GetHeader();
740       if (header->sh_type != SHT_NOBITS) {
741         if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
742           LOG(INFO) << "Failed to write section " << section->GetName();
743           return false;
744         }
745         Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
746         CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
747           << "The number of bytes written does not match GetSize()";
748       }
749     }
750     if (!SeekTo(elf_file, section_headers_offset) ||
751         !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
752       LOG(INFO) << "Failed to write the section headers";
753       return false;
754     }
755     return true;
756   }
757 
758   // Adds the given section to the builder.  It does not take ownership.
RegisterSection(Section * section)759   void RegisterSection(Section* section) {
760     other_sections_.push_back(section);
761   }
762 
FindSection(const char * name)763   const Section* FindSection(const char* name) {
764     for (const auto* section : other_sections_) {
765       if (section->GetName() == name) {
766         return section;
767       }
768     }
769     return nullptr;
770   }
771 
772  private:
SeekTo(File * elf_file,Elf_Word offset)773   static bool SeekTo(File* elf_file, Elf_Word offset) {
774     DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
775       << "Seeking backwards";
776     if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
777       PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
778       return false;
779     }
780     return true;
781   }
782 
783   template<typename T>
WriteArray(File * elf_file,const T * data,size_t count)784   static bool WriteArray(File* elf_file, const T* data, size_t count) {
785     if (count != 0) {
786       DCHECK(data != nullptr);
787       if (!elf_file->WriteFully(data, count * sizeof(T))) {
788         PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
789         return false;
790       }
791     }
792     return true;
793   }
794 
795   // Helper - create segment header based on memory range.
MakeProgramHeader(Elf_Word type,Elf_Word flags,Elf_Off offset,Elf_Word size,Elf_Word align)796   static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
797                                     Elf_Off offset, Elf_Word size, Elf_Word align) {
798     Elf_Phdr phdr = Elf_Phdr();
799     phdr.p_type    = type;
800     phdr.p_flags   = flags;
801     phdr.p_offset  = offset;
802     phdr.p_vaddr   = offset;
803     phdr.p_paddr   = offset;
804     phdr.p_filesz  = size;
805     phdr.p_memsz   = size;
806     phdr.p_align   = align;
807     return phdr;
808   }
809 
810   // Helper - create segment header based on section header.
MakeProgramHeader(Elf_Word type,Elf_Word flags,const Section & section)811   static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
812                                     const Section& section) {
813     const Elf_Shdr* shdr = section.GetHeader();
814     // Only run-time allocated sections should be in segment headers.
815     CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
816     Elf_Phdr phdr = Elf_Phdr();
817     phdr.p_type   = type;
818     phdr.p_flags  = flags;
819     phdr.p_offset = shdr->sh_offset;
820     phdr.p_vaddr  = shdr->sh_addr;
821     phdr.p_paddr  = shdr->sh_addr;
822     phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
823     phdr.p_memsz  = shdr->sh_size;
824     phdr.p_align  = shdr->sh_addralign;
825     return phdr;
826   }
827 
MakeElfHeader(InstructionSet isa)828   static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
829     Elf_Ehdr elf_header = Elf_Ehdr();
830     switch (isa) {
831       case kArm:
832         // Fall through.
833       case kThumb2: {
834         elf_header.e_machine = EM_ARM;
835         elf_header.e_flags = EF_ARM_EABI_VER5;
836         break;
837       }
838       case kArm64: {
839         elf_header.e_machine = EM_AARCH64;
840         elf_header.e_flags = 0;
841         break;
842       }
843       case kX86: {
844         elf_header.e_machine = EM_386;
845         elf_header.e_flags = 0;
846         break;
847       }
848       case kX86_64: {
849         elf_header.e_machine = EM_X86_64;
850         elf_header.e_flags = 0;
851         break;
852       }
853       case kMips: {
854         elf_header.e_machine = EM_MIPS;
855         elf_header.e_flags = (EF_MIPS_NOREORDER |
856                                EF_MIPS_PIC       |
857                                EF_MIPS_CPIC      |
858                                EF_MIPS_ABI_O32   |
859                                EF_MIPS_ARCH_32R2);
860         break;
861       }
862       case kMips64: {
863         elf_header.e_machine = EM_MIPS;
864         elf_header.e_flags = (EF_MIPS_NOREORDER |
865                                EF_MIPS_PIC       |
866                                EF_MIPS_CPIC      |
867                                EF_MIPS_ARCH_64R6);
868         break;
869       }
870       case kNone: {
871         LOG(FATAL) << "No instruction set";
872       }
873     }
874 
875     elf_header.e_ident[EI_MAG0]       = ELFMAG0;
876     elf_header.e_ident[EI_MAG1]       = ELFMAG1;
877     elf_header.e_ident[EI_MAG2]       = ELFMAG2;
878     elf_header.e_ident[EI_MAG3]       = ELFMAG3;
879     elf_header.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
880                                          ? ELFCLASS32 : ELFCLASS64;;
881     elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
882     elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
883     elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
884     elf_header.e_ident[EI_ABIVERSION] = 0;
885     elf_header.e_type = ET_DYN;
886     elf_header.e_version = 1;
887     elf_header.e_entry = 0;
888     elf_header.e_ehsize = sizeof(Elf_Ehdr);
889     elf_header.e_phentsize = sizeof(Elf_Phdr);
890     elf_header.e_shentsize = sizeof(Elf_Shdr);
891     elf_header.e_phoff = sizeof(Elf_Ehdr);
892     return elf_header;
893   }
894 
BuildDynamicSection(const std::string & elf_file_path)895   void BuildDynamicSection(const std::string& elf_file_path) {
896     std::string soname(elf_file_path);
897     size_t directory_separator_pos = soname.rfind('/');
898     if (directory_separator_pos != std::string::npos) {
899       soname = soname.substr(directory_separator_pos + 1);
900     }
901     // NB: We must add the name before adding DT_STRSZ.
902     Elf_Word soname_offset = dynstr_.AddName(soname);
903 
904     dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
905     dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
906     dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
907     dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
908     dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
909     dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
910   }
911 
BuildDynsymSection()912   void BuildDynsymSection() {
913     dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
914                       rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
915     dynsym_.AddSymbol("oatexec", &text_, 0, true,
916                       text_.GetSize(), STB_GLOBAL, STT_OBJECT);
917     dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
918                       true, 4, STB_GLOBAL, STT_OBJECT);
919     if (bss_.GetSize() != 0u) {
920       dynsym_.AddSymbol("oatbss", &bss_, 0, true,
921                         bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
922       dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
923                         true, 4, STB_GLOBAL, STT_OBJECT);
924     }
925   }
926 
927   InstructionSet isa_;
928   StrtabSection dynstr_;
929   SymtabSection dynsym_;
930   HashSection hash_;
931   OatSection rodata_;
932   OatSection text_;
933   NoBitsSection bss_;
934   DynamicSection dynamic_;
935   StrtabSection strtab_;
936   SymtabSection symtab_;
937   std::vector<Section*> other_sections_;
938   StrtabSection shstrtab_;
939 
940   DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
941 };
942 
943 }  // namespace art
944 
945 #endif  // ART_COMPILER_ELF_BUILDER_H_
946