1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_ELF_BUILDER_H_ 18 #define ART_COMPILER_ELF_BUILDER_H_ 19 20 #include <vector> 21 22 #include "arch/instruction_set.h" 23 #include "base/bit_utils.h" 24 #include "base/unix_file/fd_file.h" 25 #include "buffered_output_stream.h" 26 #include "elf_utils.h" 27 #include "file_output_stream.h" 28 29 namespace art { 30 31 class CodeOutput { 32 public: 33 virtual bool Write(OutputStream* out) = 0; ~CodeOutput()34 virtual ~CodeOutput() {} 35 }; 36 37 // Writes ELF file. 38 // The main complication is that the sections often want to reference 39 // each other. We solve this by writing the ELF file in two stages: 40 // * Sections are asked about their size, and overall layout is calculated. 41 // * Sections do the actual writes which may use offsets of other sections. 42 template <typename ElfTypes> 43 class ElfBuilder FINAL { 44 public: 45 using Elf_Addr = typename ElfTypes::Addr; 46 using Elf_Off = typename ElfTypes::Off; 47 using Elf_Word = typename ElfTypes::Word; 48 using Elf_Sword = typename ElfTypes::Sword; 49 using Elf_Ehdr = typename ElfTypes::Ehdr; 50 using Elf_Shdr = typename ElfTypes::Shdr; 51 using Elf_Sym = typename ElfTypes::Sym; 52 using Elf_Phdr = typename ElfTypes::Phdr; 53 using Elf_Dyn = typename ElfTypes::Dyn; 54 55 // Base class of all sections. 56 class Section { 57 public: Section(const std::string & name,Elf_Word type,Elf_Word flags,const Section * link,Elf_Word info,Elf_Word align,Elf_Word entsize)58 Section(const std::string& name, Elf_Word type, Elf_Word flags, 59 const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize) 60 : header_(), section_index_(0), name_(name), link_(link) { 61 header_.sh_type = type; 62 header_.sh_flags = flags; 63 header_.sh_info = info; 64 header_.sh_addralign = align; 65 header_.sh_entsize = entsize; 66 } ~Section()67 virtual ~Section() {} 68 69 // Returns the size of the content of this section. It is used to 70 // calculate file offsets of all sections before doing any writes. 71 virtual Elf_Word GetSize() const = 0; 72 73 // Write the content of this section to the given file. 74 // This must write exactly the number of bytes returned by GetSize(). 75 // Offsets of all sections are known when this method is called. 76 virtual bool Write(File* elf_file) = 0; 77 GetLink()78 Elf_Word GetLink() const { 79 return (link_ != nullptr) ? link_->GetSectionIndex() : 0; 80 } 81 GetHeader()82 const Elf_Shdr* GetHeader() const { 83 return &header_; 84 } 85 GetHeader()86 Elf_Shdr* GetHeader() { 87 return &header_; 88 } 89 GetSectionIndex()90 Elf_Word GetSectionIndex() const { 91 DCHECK_NE(section_index_, 0u); 92 return section_index_; 93 } 94 SetSectionIndex(Elf_Word section_index)95 void SetSectionIndex(Elf_Word section_index) { 96 section_index_ = section_index; 97 } 98 GetName()99 const std::string& GetName() const { 100 return name_; 101 } 102 103 private: 104 Elf_Shdr header_; 105 Elf_Word section_index_; 106 const std::string name_; 107 const Section* const link_; 108 109 DISALLOW_COPY_AND_ASSIGN(Section); 110 }; 111 112 // Writer of .dynamic section. 113 class DynamicSection FINAL : public Section { 114 public: AddDynamicTag(Elf_Sword tag,Elf_Word value,const Section * section)115 void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) { 116 DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL)); 117 dynamics_.push_back({tag, value, section}); 118 } 119 DynamicSection(const std::string & name,Section * link)120 DynamicSection(const std::string& name, Section* link) 121 : Section(name, SHT_DYNAMIC, SHF_ALLOC, 122 link, 0, kPageSize, sizeof(Elf_Dyn)) {} 123 GetSize()124 Elf_Word GetSize() const OVERRIDE { 125 return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn); 126 } 127 Write(File * elf_file)128 bool Write(File* elf_file) OVERRIDE { 129 std::vector<Elf_Dyn> buffer; 130 buffer.reserve(dynamics_.size() + 1u); 131 for (const ElfDynamicState& it : dynamics_) { 132 if (it.section_ != nullptr) { 133 // We are adding an address relative to a section. 134 buffer.push_back( 135 {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}}); 136 } else { 137 buffer.push_back({it.tag_, {it.value_}}); 138 } 139 } 140 buffer.push_back({DT_NULL, {0}}); 141 return WriteArray(elf_file, buffer.data(), buffer.size()); 142 } 143 144 private: 145 struct ElfDynamicState { 146 Elf_Sword tag_; 147 Elf_Word value_; 148 const Section* section_; 149 }; 150 std::vector<ElfDynamicState> dynamics_; 151 }; 152 153 using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations, 154 Elf_Addr buffer_address, 155 Elf_Addr base_address, 156 std::vector<uint8_t>* buffer); 157 158 // Section with content based on simple memory buffer. 159 // The buffer can be optionally patched before writing. 160 class RawSection FINAL : public Section { 161 public: 162 RawSection(const std::string& name, Elf_Word type, Elf_Word flags, 163 const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize, 164 PatchFn patch = nullptr, const Section* patch_base_section = nullptr) Section(name,type,flags,link,info,align,entsize)165 : Section(name, type, flags, link, info, align, entsize), 166 patched_(false), patch_(patch), patch_base_section_(patch_base_section) { 167 } 168 RawSection(const std::string & name,Elf_Word type)169 RawSection(const std::string& name, Elf_Word type) 170 : RawSection(name, type, 0, nullptr, 0, 1, 0, nullptr, nullptr) { 171 } 172 GetSize()173 Elf_Word GetSize() const OVERRIDE { 174 return buffer_.size(); 175 } 176 Write(File * elf_file)177 bool Write(File* elf_file) OVERRIDE { 178 if (!patch_locations_.empty()) { 179 DCHECK(!patched_); // Do not patch twice. 180 DCHECK(patch_ != nullptr); 181 DCHECK(patch_base_section_ != nullptr); 182 patch_(patch_locations_, 183 this->GetHeader()->sh_addr, 184 patch_base_section_->GetHeader()->sh_addr, 185 &buffer_); 186 patched_ = true; 187 } 188 return WriteArray(elf_file, buffer_.data(), buffer_.size()); 189 } 190 IsEmpty()191 bool IsEmpty() const { 192 return buffer_.size() == 0; 193 } 194 GetBuffer()195 std::vector<uint8_t>* GetBuffer() { 196 return &buffer_; 197 } 198 SetBuffer(const std::vector<uint8_t> & buffer)199 void SetBuffer(const std::vector<uint8_t>& buffer) { 200 buffer_ = buffer; 201 } 202 GetPatchLocations()203 std::vector<uintptr_t>* GetPatchLocations() { 204 return &patch_locations_; 205 } 206 207 private: 208 std::vector<uint8_t> buffer_; 209 std::vector<uintptr_t> patch_locations_; 210 bool patched_; 211 // User-provided function to do the actual patching. 212 PatchFn patch_; 213 // The section that we patch against (usually .text). 214 const Section* patch_base_section_; 215 }; 216 217 // Writer of .rodata section or .text section. 218 // The write is done lazily using the provided CodeOutput. 219 class OatSection FINAL : public Section { 220 public: OatSection(const std::string & name,Elf_Word type,Elf_Word flags,const Section * link,Elf_Word info,Elf_Word align,Elf_Word entsize,Elf_Word size,CodeOutput * code_output)221 OatSection(const std::string& name, Elf_Word type, Elf_Word flags, 222 const Section* link, Elf_Word info, Elf_Word align, 223 Elf_Word entsize, Elf_Word size, CodeOutput* code_output) 224 : Section(name, type, flags, link, info, align, entsize), 225 size_(size), code_output_(code_output) { 226 } 227 GetSize()228 Elf_Word GetSize() const OVERRIDE { 229 return size_; 230 } 231 Write(File * elf_file)232 bool Write(File* elf_file) OVERRIDE { 233 // The BufferedOutputStream class contains the buffer as field, 234 // therefore it is too big to allocate on the stack. 235 std::unique_ptr<BufferedOutputStream> output_stream( 236 new BufferedOutputStream(new FileOutputStream(elf_file))); 237 return code_output_->Write(output_stream.get()); 238 } 239 240 private: 241 Elf_Word size_; 242 CodeOutput* code_output_; 243 }; 244 245 // Writer of .bss section. 246 class NoBitsSection FINAL : public Section { 247 public: NoBitsSection(const std::string & name,Elf_Word size)248 NoBitsSection(const std::string& name, Elf_Word size) 249 : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), 250 size_(size) { 251 } 252 GetSize()253 Elf_Word GetSize() const OVERRIDE { 254 return size_; 255 } 256 Write(File * elf_file ATTRIBUTE_UNUSED)257 bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE { 258 LOG(ERROR) << "This section should not be written to the ELF file"; 259 return false; 260 } 261 262 private: 263 Elf_Word size_; 264 }; 265 266 // Writer of .dynstr .strtab and .shstrtab sections. 267 class StrtabSection FINAL : public Section { 268 public: StrtabSection(const std::string & name,Elf_Word flags)269 StrtabSection(const std::string& name, Elf_Word flags) 270 : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 0) { 271 buffer_.reserve(4 * KB); 272 // The first entry of strtab must be empty string. 273 buffer_ += '\0'; 274 } 275 AddName(const std::string & name)276 Elf_Word AddName(const std::string& name) { 277 Elf_Word offset = buffer_.size(); 278 buffer_ += name; 279 buffer_ += '\0'; 280 return offset; 281 } 282 GetSize()283 Elf_Word GetSize() const OVERRIDE { 284 return buffer_.size(); 285 } 286 Write(File * elf_file)287 bool Write(File* elf_file) OVERRIDE { 288 return WriteArray(elf_file, buffer_.data(), buffer_.size()); 289 } 290 291 private: 292 std::string buffer_; 293 }; 294 295 class HashSection; 296 297 // Writer of .dynsym and .symtab sections. 298 class SymtabSection FINAL : public Section { 299 public: 300 // Add a symbol with given name to this symtab. The symbol refers to 301 // 'relative_addr' within the given section and has the given attributes. 302 void AddSymbol(const std::string& name, const Section* section, 303 Elf_Addr addr, bool is_relative, Elf_Word size, 304 uint8_t binding, uint8_t type, uint8_t other = 0) { 305 CHECK(section != nullptr); 306 Elf_Word name_idx = strtab_->AddName(name); 307 symbols_.push_back({ name, section, addr, size, is_relative, 308 MakeStInfo(binding, type), other, name_idx }); 309 } 310 SymtabSection(const std::string & name,Elf_Word type,Elf_Word flags,StrtabSection * strtab)311 SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags, 312 StrtabSection* strtab) 313 : Section(name, type, flags, strtab, 0, sizeof(Elf_Off), sizeof(Elf_Sym)), 314 strtab_(strtab) { 315 } 316 IsEmpty()317 bool IsEmpty() const { 318 return symbols_.empty(); 319 } 320 GetSize()321 Elf_Word GetSize() const OVERRIDE { 322 return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym); 323 } 324 Write(File * elf_file)325 bool Write(File* elf_file) OVERRIDE { 326 std::vector<Elf_Sym> buffer; 327 buffer.reserve(1u + symbols_.size()); 328 buffer.push_back(Elf_Sym()); // NULL. 329 for (const ElfSymbolState& it : symbols_) { 330 Elf_Sym sym = Elf_Sym(); 331 sym.st_name = it.name_idx_; 332 if (it.is_relative_) { 333 sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr; 334 } else { 335 sym.st_value = it.addr_; 336 } 337 sym.st_size = it.size_; 338 sym.st_other = it.other_; 339 sym.st_shndx = it.section_->GetSectionIndex(); 340 sym.st_info = it.info_; 341 buffer.push_back(sym); 342 } 343 return WriteArray(elf_file, buffer.data(), buffer.size()); 344 } 345 346 private: 347 struct ElfSymbolState { 348 const std::string name_; 349 const Section* section_; 350 Elf_Addr addr_; 351 Elf_Word size_; 352 bool is_relative_; 353 uint8_t info_; 354 uint8_t other_; 355 Elf_Word name_idx_; // index in the strtab. 356 }; 357 MakeStInfo(uint8_t binding,uint8_t type)358 static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) { 359 return ((binding) << 4) + ((type) & 0xf); 360 } 361 362 // The symbols in the same order they will be in the symbol table. 363 std::vector<ElfSymbolState> symbols_; 364 StrtabSection* strtab_; 365 366 friend class HashSection; 367 }; 368 369 // TODO: Consider removing. 370 // We use it only for the dynsym section which has only 5 symbols. 371 // We do not use it for symtab, and we probably do not have to 372 // since we use those symbols only to print backtraces. 373 class HashSection FINAL : public Section { 374 public: HashSection(const std::string & name,Elf_Word flags,SymtabSection * symtab)375 HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab) 376 : Section(name, SHT_HASH, flags, symtab, 377 0, sizeof(Elf_Word), sizeof(Elf_Word)), 378 symtab_(symtab) { 379 } 380 GetSize()381 Elf_Word GetSize() const OVERRIDE { 382 Elf_Word nbuckets = GetNumBuckets(); 383 Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */; 384 return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word); 385 } 386 Write(File * const elf_file)387 bool Write(File* const elf_file) OVERRIDE { 388 // Here is how The ELF hash table works. 389 // There are 3 arrays to worry about. 390 // * The symbol table where the symbol information is. 391 // * The bucket array which is an array of indexes into the symtab and chain. 392 // * The chain array which is also an array of indexes into the symtab and chain. 393 // 394 // Lets say the state is something like this. 395 // +--------+ +--------+ +-----------+ 396 // | symtab | | bucket | | chain | 397 // | null | | 1 | | STN_UNDEF | 398 // | <sym1> | | 4 | | 2 | 399 // | <sym2> | | | | 5 | 400 // | <sym3> | | | | STN_UNDEF | 401 // | <sym4> | | | | 3 | 402 // | <sym5> | | | | STN_UNDEF | 403 // +--------+ +--------+ +-----------+ 404 // 405 // The lookup process (in python psudocode) is 406 // 407 // def GetSym(name): 408 // # NB STN_UNDEF == 0 409 // indx = bucket[elfhash(name) % num_buckets] 410 // while indx != STN_UNDEF: 411 // if GetSymbolName(symtab[indx]) == name: 412 // return symtab[indx] 413 // indx = chain[indx] 414 // return SYMBOL_NOT_FOUND 415 // 416 // Between bucket and chain arrays every symtab index must be present exactly 417 // once (except for STN_UNDEF, which must be present 1 + num_bucket times). 418 const auto& symbols = symtab_->symbols_; 419 // Select number of buckets. 420 // This is essentially arbitrary. 421 Elf_Word nbuckets = GetNumBuckets(); 422 // 1 is for the implicit NULL symbol. 423 Elf_Word chain_size = (symbols.size() + 1); 424 std::vector<Elf_Word> hash; 425 hash.push_back(nbuckets); 426 hash.push_back(chain_size); 427 uint32_t bucket_offset = hash.size(); 428 uint32_t chain_offset = bucket_offset + nbuckets; 429 hash.resize(hash.size() + nbuckets + chain_size, 0); 430 431 Elf_Word* buckets = hash.data() + bucket_offset; 432 Elf_Word* chain = hash.data() + chain_offset; 433 434 // Set up the actual hash table. 435 for (Elf_Word i = 0; i < symbols.size(); i++) { 436 // Add 1 since we need to have the null symbol that is not in the symbols 437 // list. 438 Elf_Word index = i + 1; 439 Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets; 440 if (buckets[hash_val] == 0) { 441 buckets[hash_val] = index; 442 } else { 443 hash_val = buckets[hash_val]; 444 CHECK_LT(hash_val, chain_size); 445 while (chain[hash_val] != 0) { 446 hash_val = chain[hash_val]; 447 CHECK_LT(hash_val, chain_size); 448 } 449 chain[hash_val] = index; 450 // Check for loops. Works because if this is non-empty then there must be 451 // another cell which already contains the same symbol index as this one, 452 // which means some symbol has more then one name, which isn't allowed. 453 CHECK_EQ(chain[index], static_cast<Elf_Word>(0)); 454 } 455 } 456 return WriteArray(elf_file, hash.data(), hash.size()); 457 } 458 459 private: GetNumBuckets()460 Elf_Word GetNumBuckets() const { 461 const auto& symbols = symtab_->symbols_; 462 if (symbols.size() < 8) { 463 return 2; 464 } else if (symbols.size() < 32) { 465 return 4; 466 } else if (symbols.size() < 256) { 467 return 16; 468 } else { 469 // Have about 32 ids per bucket. 470 return RoundUp(symbols.size()/32, 2); 471 } 472 } 473 474 // from bionic elfhash(const char * _name)475 static inline unsigned elfhash(const char *_name) { 476 const unsigned char *name = (const unsigned char *) _name; 477 unsigned h = 0, g; 478 479 while (*name) { 480 h = (h << 4) + *name++; 481 g = h & 0xf0000000; 482 h ^= g; 483 h ^= g >> 24; 484 } 485 return h; 486 } 487 488 SymtabSection* symtab_; 489 490 DISALLOW_COPY_AND_ASSIGN(HashSection); 491 }; 492 ElfBuilder(InstructionSet isa,Elf_Word rodata_size,CodeOutput * rodata_writer,Elf_Word text_size,CodeOutput * text_writer,Elf_Word bss_size)493 ElfBuilder(InstructionSet isa, 494 Elf_Word rodata_size, CodeOutput* rodata_writer, 495 Elf_Word text_size, CodeOutput* text_writer, 496 Elf_Word bss_size) 497 : isa_(isa), 498 dynstr_(".dynstr", SHF_ALLOC), 499 dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_), 500 hash_(".hash", SHF_ALLOC, &dynsym_), 501 rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC, 502 nullptr, 0, kPageSize, 0, rodata_size, rodata_writer), 503 text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, 504 nullptr, 0, kPageSize, 0, text_size, text_writer), 505 bss_(".bss", bss_size), 506 dynamic_(".dynamic", &dynstr_), 507 strtab_(".strtab", 0), 508 symtab_(".symtab", SHT_SYMTAB, 0, &strtab_), 509 shstrtab_(".shstrtab", 0) { 510 } ~ElfBuilder()511 ~ElfBuilder() {} 512 GetText()513 OatSection* GetText() { return &text_; } GetSymtab()514 SymtabSection* GetSymtab() { return &symtab_; } 515 Write(File * elf_file)516 bool Write(File* elf_file) { 517 // Since the .text section of an oat file contains relative references to .rodata 518 // and (optionally) .bss, we keep these 2 or 3 sections together. This creates 519 // a non-traditional layout where the .bss section is mapped independently of the 520 // .dynamic section and needs its own program header with LOAD RW. 521 // 522 // The basic layout of the elf file. Order may be different in final output. 523 // +-------------------------+ 524 // | Elf_Ehdr | 525 // +-------------------------+ 526 // | Elf_Phdr PHDR | 527 // | Elf_Phdr LOAD R | .dynsym .dynstr .hash .rodata 528 // | Elf_Phdr LOAD R X | .text 529 // | Elf_Phdr LOAD RW | .bss (Optional) 530 // | Elf_Phdr LOAD RW | .dynamic 531 // | Elf_Phdr DYNAMIC | .dynamic 532 // | Elf_Phdr LOAD R | .eh_frame .eh_frame_hdr 533 // | Elf_Phdr EH_FRAME R | .eh_frame_hdr 534 // +-------------------------+ 535 // | .dynsym | 536 // | Elf_Sym STN_UNDEF | 537 // | Elf_Sym oatdata | 538 // | Elf_Sym oatexec | 539 // | Elf_Sym oatlastword | 540 // | Elf_Sym oatbss | (Optional) 541 // | Elf_Sym oatbsslastword | (Optional) 542 // +-------------------------+ 543 // | .dynstr | 544 // | names for .dynsym | 545 // +-------------------------+ 546 // | .hash | 547 // | hashtable for dynsym | 548 // +-------------------------+ 549 // | .rodata | 550 // | oatdata..oatexec-4 | 551 // +-------------------------+ 552 // | .text | 553 // | oatexec..oatlastword | 554 // +-------------------------+ 555 // | .dynamic | 556 // | Elf_Dyn DT_HASH | 557 // | Elf_Dyn DT_STRTAB | 558 // | Elf_Dyn DT_SYMTAB | 559 // | Elf_Dyn DT_SYMENT | 560 // | Elf_Dyn DT_STRSZ | 561 // | Elf_Dyn DT_SONAME | 562 // | Elf_Dyn DT_NULL | 563 // +-------------------------+ (Optional) 564 // | .symtab | (Optional) 565 // | program symbols | (Optional) 566 // +-------------------------+ (Optional) 567 // | .strtab | (Optional) 568 // | names for .symtab | (Optional) 569 // +-------------------------+ (Optional) 570 // | .eh_frame | (Optional) 571 // +-------------------------+ (Optional) 572 // | .eh_frame_hdr | (Optional) 573 // +-------------------------+ (Optional) 574 // | .debug_info | (Optional) 575 // +-------------------------+ (Optional) 576 // | .debug_abbrev | (Optional) 577 // +-------------------------+ (Optional) 578 // | .debug_str | (Optional) 579 // +-------------------------+ (Optional) 580 // | .debug_line | (Optional) 581 // +-------------------------+ 582 // | .shstrtab | 583 // | names of sections | 584 // +-------------------------+ 585 // | Elf_Shdr null | 586 // | Elf_Shdr .dynsym | 587 // | Elf_Shdr .dynstr | 588 // | Elf_Shdr .hash | 589 // | Elf_Shdr .rodata | 590 // | Elf_Shdr .text | 591 // | Elf_Shdr .bss | (Optional) 592 // | Elf_Shdr .dynamic | 593 // | Elf_Shdr .symtab | (Optional) 594 // | Elf_Shdr .strtab | (Optional) 595 // | Elf_Shdr .eh_frame | (Optional) 596 // | Elf_Shdr .eh_frame_hdr | (Optional) 597 // | Elf_Shdr .debug_info | (Optional) 598 // | Elf_Shdr .debug_abbrev | (Optional) 599 // | Elf_Shdr .debug_str | (Optional) 600 // | Elf_Shdr .debug_line | (Optional) 601 // | Elf_Shdr .oat_patches | (Optional) 602 // | Elf_Shdr .shstrtab | 603 // +-------------------------+ 604 constexpr bool debug_logging_ = false; 605 606 // Create a list of all section which we want to write. 607 // This is the order in which they will be written. 608 std::vector<Section*> sections; 609 sections.push_back(&dynsym_); 610 sections.push_back(&dynstr_); 611 sections.push_back(&hash_); 612 sections.push_back(&rodata_); 613 sections.push_back(&text_); 614 if (bss_.GetSize() != 0u) { 615 sections.push_back(&bss_); 616 } 617 sections.push_back(&dynamic_); 618 if (!symtab_.IsEmpty()) { 619 sections.push_back(&symtab_); 620 sections.push_back(&strtab_); 621 } 622 for (Section* section : other_sections_) { 623 sections.push_back(section); 624 } 625 sections.push_back(&shstrtab_); 626 for (size_t i = 0; i < sections.size(); i++) { 627 // The first section index is 1. Index 0 is reserved for NULL. 628 // Section index is used for relative symbols and for section links. 629 sections[i]->SetSectionIndex(i + 1); 630 // Add section name to .shstrtab. 631 Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName()); 632 sections[i]->GetHeader()->sh_name = name_offset; 633 } 634 635 // The running program does not have access to section headers 636 // and the loader is not supposed to use them either. 637 // The dynamic sections therefore replicates some of the layout 638 // information like the address and size of .rodata and .text. 639 // It also contains other metadata like the SONAME. 640 // The .dynamic section is found using the PT_DYNAMIC program header. 641 BuildDynsymSection(); 642 BuildDynamicSection(elf_file->GetPath()); 643 644 // We do not know the number of headers until the final stages of write. 645 // It is easiest to just reserve a fixed amount of space for them. 646 constexpr size_t kMaxProgramHeaders = 8; 647 constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr); 648 649 // Layout of all sections - determine the final file offsets and addresses. 650 // This must be done after we have built all sections and know their size. 651 Elf_Off file_offset = kProgramHeadersOffset + sizeof(Elf_Phdr) * kMaxProgramHeaders; 652 Elf_Addr load_address = file_offset; 653 std::vector<Elf_Shdr> section_headers; 654 section_headers.reserve(1u + sections.size()); 655 section_headers.push_back(Elf_Shdr()); // NULL at index 0. 656 for (auto* section : sections) { 657 Elf_Shdr* header = section->GetHeader(); 658 Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1; 659 header->sh_size = section->GetSize(); 660 header->sh_link = section->GetLink(); 661 // Allocate memory for the section in the file. 662 if (header->sh_type != SHT_NOBITS) { 663 header->sh_offset = RoundUp(file_offset, alignment); 664 file_offset = header->sh_offset + header->sh_size; 665 } 666 // Allocate memory for the section during program execution. 667 if ((header->sh_flags & SHF_ALLOC) != 0) { 668 header->sh_addr = RoundUp(load_address, alignment); 669 load_address = header->sh_addr + header->sh_size; 670 } 671 if (debug_logging_) { 672 LOG(INFO) << "Section " << section->GetName() << ":" << std::hex 673 << " offset=0x" << header->sh_offset 674 << " addr=0x" << header->sh_addr 675 << " size=0x" << header->sh_size; 676 } 677 // Collect section headers into continuous array for convenience. 678 section_headers.push_back(*header); 679 } 680 Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Off)); 681 682 // Create program headers now that we know the layout of the whole file. 683 // Each segment contains one or more sections which are mapped together. 684 // Not all sections are mapped during the execution of the program. 685 // PT_LOAD does the mapping. Other PT_* types allow the program to locate 686 // interesting parts of memory and their addresses overlap with PT_LOAD. 687 std::vector<Elf_Phdr> program_headers; 688 program_headers.push_back(Elf_Phdr()); // Placeholder for PT_PHDR. 689 // Create the main LOAD R segment which spans all sections up to .rodata. 690 const Elf_Shdr* rodata = rodata_.GetHeader(); 691 program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, 692 0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign)); 693 program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_)); 694 if (bss_.GetHeader()->sh_size != 0u) { 695 program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_)); 696 } 697 program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_)); 698 program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_)); 699 const Section* eh_frame = FindSection(".eh_frame"); 700 if (eh_frame != nullptr) { 701 program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame)); 702 const Section* eh_frame_hdr = FindSection(".eh_frame_hdr"); 703 if (eh_frame_hdr != nullptr) { 704 // Check layout: eh_frame is before eh_frame_hdr and there is no gap. 705 CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset); 706 CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size, 707 eh_frame_hdr->GetHeader()->sh_offset); 708 // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well. 709 program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size; 710 program_headers.back().p_memsz += eh_frame_hdr->GetHeader()->sh_size; 711 program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr)); 712 } 713 } 714 DCHECK_EQ(program_headers[0].p_type, 0u); // Check placeholder. 715 program_headers[0] = MakeProgramHeader(PT_PHDR, PF_R, 716 kProgramHeadersOffset, program_headers.size() * sizeof(Elf_Phdr), sizeof(Elf_Off)); 717 CHECK_LE(program_headers.size(), kMaxProgramHeaders); 718 719 // Create the main ELF header. 720 Elf_Ehdr elf_header = MakeElfHeader(isa_); 721 elf_header.e_phoff = kProgramHeadersOffset; 722 elf_header.e_shoff = section_headers_offset; 723 elf_header.e_phnum = program_headers.size(); 724 elf_header.e_shnum = section_headers.size(); 725 elf_header.e_shstrndx = shstrtab_.GetSectionIndex(); 726 727 // Write all headers and section content to the file. 728 // Depending on the implementations of Section::Write, this 729 // might be just memory copies or some more elaborate operations. 730 if (!WriteArray(elf_file, &elf_header, 1)) { 731 LOG(INFO) << "Failed to write the ELF header"; 732 return false; 733 } 734 if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) { 735 LOG(INFO) << "Failed to write the program headers"; 736 return false; 737 } 738 for (Section* section : sections) { 739 const Elf_Shdr* header = section->GetHeader(); 740 if (header->sh_type != SHT_NOBITS) { 741 if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) { 742 LOG(INFO) << "Failed to write section " << section->GetName(); 743 return false; 744 } 745 Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR); 746 CHECK_EQ(current_offset, header->sh_offset + header->sh_size) 747 << "The number of bytes written does not match GetSize()"; 748 } 749 } 750 if (!SeekTo(elf_file, section_headers_offset) || 751 !WriteArray(elf_file, section_headers.data(), section_headers.size())) { 752 LOG(INFO) << "Failed to write the section headers"; 753 return false; 754 } 755 return true; 756 } 757 758 // Adds the given section to the builder. It does not take ownership. RegisterSection(Section * section)759 void RegisterSection(Section* section) { 760 other_sections_.push_back(section); 761 } 762 FindSection(const char * name)763 const Section* FindSection(const char* name) { 764 for (const auto* section : other_sections_) { 765 if (section->GetName() == name) { 766 return section; 767 } 768 } 769 return nullptr; 770 } 771 772 private: SeekTo(File * elf_file,Elf_Word offset)773 static bool SeekTo(File* elf_file, Elf_Word offset) { 774 DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset)) 775 << "Seeking backwards"; 776 if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) { 777 PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath(); 778 return false; 779 } 780 return true; 781 } 782 783 template<typename T> WriteArray(File * elf_file,const T * data,size_t count)784 static bool WriteArray(File* elf_file, const T* data, size_t count) { 785 if (count != 0) { 786 DCHECK(data != nullptr); 787 if (!elf_file->WriteFully(data, count * sizeof(T))) { 788 PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath(); 789 return false; 790 } 791 } 792 return true; 793 } 794 795 // Helper - create segment header based on memory range. MakeProgramHeader(Elf_Word type,Elf_Word flags,Elf_Off offset,Elf_Word size,Elf_Word align)796 static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags, 797 Elf_Off offset, Elf_Word size, Elf_Word align) { 798 Elf_Phdr phdr = Elf_Phdr(); 799 phdr.p_type = type; 800 phdr.p_flags = flags; 801 phdr.p_offset = offset; 802 phdr.p_vaddr = offset; 803 phdr.p_paddr = offset; 804 phdr.p_filesz = size; 805 phdr.p_memsz = size; 806 phdr.p_align = align; 807 return phdr; 808 } 809 810 // Helper - create segment header based on section header. MakeProgramHeader(Elf_Word type,Elf_Word flags,const Section & section)811 static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags, 812 const Section& section) { 813 const Elf_Shdr* shdr = section.GetHeader(); 814 // Only run-time allocated sections should be in segment headers. 815 CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u); 816 Elf_Phdr phdr = Elf_Phdr(); 817 phdr.p_type = type; 818 phdr.p_flags = flags; 819 phdr.p_offset = shdr->sh_offset; 820 phdr.p_vaddr = shdr->sh_addr; 821 phdr.p_paddr = shdr->sh_addr; 822 phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u; 823 phdr.p_memsz = shdr->sh_size; 824 phdr.p_align = shdr->sh_addralign; 825 return phdr; 826 } 827 MakeElfHeader(InstructionSet isa)828 static Elf_Ehdr MakeElfHeader(InstructionSet isa) { 829 Elf_Ehdr elf_header = Elf_Ehdr(); 830 switch (isa) { 831 case kArm: 832 // Fall through. 833 case kThumb2: { 834 elf_header.e_machine = EM_ARM; 835 elf_header.e_flags = EF_ARM_EABI_VER5; 836 break; 837 } 838 case kArm64: { 839 elf_header.e_machine = EM_AARCH64; 840 elf_header.e_flags = 0; 841 break; 842 } 843 case kX86: { 844 elf_header.e_machine = EM_386; 845 elf_header.e_flags = 0; 846 break; 847 } 848 case kX86_64: { 849 elf_header.e_machine = EM_X86_64; 850 elf_header.e_flags = 0; 851 break; 852 } 853 case kMips: { 854 elf_header.e_machine = EM_MIPS; 855 elf_header.e_flags = (EF_MIPS_NOREORDER | 856 EF_MIPS_PIC | 857 EF_MIPS_CPIC | 858 EF_MIPS_ABI_O32 | 859 EF_MIPS_ARCH_32R2); 860 break; 861 } 862 case kMips64: { 863 elf_header.e_machine = EM_MIPS; 864 elf_header.e_flags = (EF_MIPS_NOREORDER | 865 EF_MIPS_PIC | 866 EF_MIPS_CPIC | 867 EF_MIPS_ARCH_64R6); 868 break; 869 } 870 case kNone: { 871 LOG(FATAL) << "No instruction set"; 872 } 873 } 874 875 elf_header.e_ident[EI_MAG0] = ELFMAG0; 876 elf_header.e_ident[EI_MAG1] = ELFMAG1; 877 elf_header.e_ident[EI_MAG2] = ELFMAG2; 878 elf_header.e_ident[EI_MAG3] = ELFMAG3; 879 elf_header.e_ident[EI_CLASS] = (sizeof(Elf_Addr) == sizeof(Elf32_Addr)) 880 ? ELFCLASS32 : ELFCLASS64;; 881 elf_header.e_ident[EI_DATA] = ELFDATA2LSB; 882 elf_header.e_ident[EI_VERSION] = EV_CURRENT; 883 elf_header.e_ident[EI_OSABI] = ELFOSABI_LINUX; 884 elf_header.e_ident[EI_ABIVERSION] = 0; 885 elf_header.e_type = ET_DYN; 886 elf_header.e_version = 1; 887 elf_header.e_entry = 0; 888 elf_header.e_ehsize = sizeof(Elf_Ehdr); 889 elf_header.e_phentsize = sizeof(Elf_Phdr); 890 elf_header.e_shentsize = sizeof(Elf_Shdr); 891 elf_header.e_phoff = sizeof(Elf_Ehdr); 892 return elf_header; 893 } 894 BuildDynamicSection(const std::string & elf_file_path)895 void BuildDynamicSection(const std::string& elf_file_path) { 896 std::string soname(elf_file_path); 897 size_t directory_separator_pos = soname.rfind('/'); 898 if (directory_separator_pos != std::string::npos) { 899 soname = soname.substr(directory_separator_pos + 1); 900 } 901 // NB: We must add the name before adding DT_STRSZ. 902 Elf_Word soname_offset = dynstr_.AddName(soname); 903 904 dynamic_.AddDynamicTag(DT_HASH, 0, &hash_); 905 dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_); 906 dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_); 907 dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr); 908 dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr); 909 dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr); 910 } 911 BuildDynsymSection()912 void BuildDynsymSection() { 913 dynsym_.AddSymbol("oatdata", &rodata_, 0, true, 914 rodata_.GetSize(), STB_GLOBAL, STT_OBJECT); 915 dynsym_.AddSymbol("oatexec", &text_, 0, true, 916 text_.GetSize(), STB_GLOBAL, STT_OBJECT); 917 dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4, 918 true, 4, STB_GLOBAL, STT_OBJECT); 919 if (bss_.GetSize() != 0u) { 920 dynsym_.AddSymbol("oatbss", &bss_, 0, true, 921 bss_.GetSize(), STB_GLOBAL, STT_OBJECT); 922 dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4, 923 true, 4, STB_GLOBAL, STT_OBJECT); 924 } 925 } 926 927 InstructionSet isa_; 928 StrtabSection dynstr_; 929 SymtabSection dynsym_; 930 HashSection hash_; 931 OatSection rodata_; 932 OatSection text_; 933 NoBitsSection bss_; 934 DynamicSection dynamic_; 935 StrtabSection strtab_; 936 SymtabSection symtab_; 937 std::vector<Section*> other_sections_; 938 StrtabSection shstrtab_; 939 940 DISALLOW_COPY_AND_ASSIGN(ElfBuilder); 941 }; 942 943 } // namespace art 944 945 #endif // ART_COMPILER_ELF_BUILDER_H_ 946