1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "elf_reader.h"
18 
19 #include <elf.h>
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <unistd.h>
25 #include <cstddef>
26 #include <memory>
27 #include <optional>
28 
29 #include "berberis/base/bit_util.h"
30 #include "berberis/base/checks.h"
31 #include "berberis/base/macros.h"
32 #include "berberis/base/mapped_file_fragment.h"
33 #include "berberis/base/stringprintf.h"
34 
35 #include "buffer.h"
36 #include "string_offset_table.h"
37 #include "string_table.h"
38 #include "zstd.h"
39 
40 namespace nogrod {
41 
42 using berberis::bit_cast;
43 using berberis::StringPrintf;
44 
45 namespace {
46 
ElfStType(uint32_t info)47 [[nodiscard]] constexpr uint8_t ElfStType(uint32_t info) {
48   return info & 0xf;
49 }
50 
51 class Elf32 {
52  public:
53   using Off = Elf32_Off;
54   using Word = Elf32_Word;
55 
56   using Chrd = Elf32_Chdr;
57   using Ehdr = Elf32_Ehdr;
58   using Shdr = Elf32_Shdr;
59   using Sym = Elf32_Sym;
60 
61   Elf32() = delete;
62   Elf32(const Elf32&) = delete;
63   const Elf32& operator=(const Elf32&) = delete;
64 };
65 
66 class Elf64 {
67  public:
68   using Off = Elf64_Off;
69   using Word = Elf64_Word;
70 
71   using Chrd = Elf64_Chdr;
72   using Ehdr = Elf64_Ehdr;
73   using Shdr = Elf64_Shdr;
74   using Sym = Elf64_Sym;
75 
76   Elf64() = delete;
77   Elf64(const Elf64&) = delete;
78   const Elf64& operator=(const Elf64&) = delete;
79 };
80 
81 template <typename ElfT>
82 class ElfFileImpl : public ElfFile {
83  public:
84   ~ElfFileImpl() override;
85 
86   [[nodiscard]] static std::unique_ptr<ElfFileImpl<ElfT>> Create(const char* path,
87                                                                  int fd,
88                                                                  std::string* error_msg);
89 
90   [[nodiscard]] bool ReadExportedSymbols(std::vector<std::string>* symbols,
91                                          std::string* error_msg) override;
92   [[nodiscard]] std::unique_ptr<DwarfInfo> ReadDwarfInfo(std::string* error_msg) override;
93 
94  private:
95   explicit ElfFileImpl(const char* path, int fd);
96   [[nodiscard]] bool Init(std::string* error_msg);
97   [[nodiscard]] bool ValidateShdrTable(std::string* error_msg);
98   template <typename T>
ReadSection(const ElfT::Shdr * section_header,std::string * error_msg)99   [[nodiscard]] std::optional<Buffer<T>> ReadSection(const ElfT::Shdr* section_header,
100                                                      std::string* error_msg) {
101     // We support any type as long as its size is 1
102     static_assert(sizeof(T) == 1);
103 
104     const T* section_data = ShdrOffsetToAddr<const T>(section_header);
105     size_t section_size = section_header->sh_size;
106 
107     if (!IsCompressed(section_header)) {
108       return Buffer{section_data, section_size};
109     }
110 
111     std::optional<std::vector<T>> uncompressed_data =
112         UncompressSection(section_data, section_size, error_msg);
113     if (!uncompressed_data.has_value()) {
114       return std::nullopt;
115     }
116     return Buffer{std::move(*uncompressed_data)};
117   }
118 
119   template <typename T>
UncompressSection(const T * section_data,size_t section_size,std::string * error_msg)120   [[nodiscard]] std::optional<std::vector<T>> UncompressSection(const T* section_data,
121                                                                 size_t section_size,
122                                                                 std::string* error_msg) {
123     // We support any type as long as its size is 1
124     static_assert(sizeof(T) == 1);
125 
126     // Read the header
127     constexpr size_t kChdrSize = sizeof(typename ElfT::Chrd);
128     if (static_cast<size_t>(section_size) < kChdrSize) {
129       *error_msg = "Invalid compressed section (it is too small to fit Elf_Chrd)";
130       return std::nullopt;
131     }
132 
133     const typename ElfT::Chrd* chrd = reinterpret_cast<const ElfT::Chrd*>(section_data);
134     if (chrd->ch_type != ELFCOMPRESS_ZSTD) {
135       *error_msg = StringPrintf("Unsupported compression type: %d, expected ELFCOMPRESS_ZSTD(2)",
136                                 chrd->ch_type);
137       return std::nullopt;
138     }
139 
140     // Uncompress
141     size_t uncompressed_size = chrd->ch_size;
142     const T* compressed_data = section_data + kChdrSize;
143     size_t compressed_size = section_size - kChdrSize;
144     std::vector<T> uncompressed_data(uncompressed_size);
145 
146     size_t result = ZSTD_decompress(reinterpret_cast<uint8_t*>(uncompressed_data.data()),
147                                     uncompressed_data.size(),
148                                     reinterpret_cast<const uint8_t*>(compressed_data),
149                                     compressed_size);
150 
151     if (ZSTD_isError(result)) {
152       *error_msg = StringPrintf("Error while uncompressing zstd: %s", ZSTD_getErrorName(result));
153       return std::nullopt;
154     }
155 
156     // We expect the output buffer of the size specified in the chrd->ch_size
157     CHECK_EQ(result, uncompressed_size);
158 
159     return uncompressed_data;
160   }
161 
162   [[nodiscard]] const typename ElfT::Shdr* FindSectionHeaderByType(typename ElfT::Word sh_type);
163   [[nodiscard]] const typename ElfT::Shdr* FindSectionHeaderByName(const char* name);
164 
165   template <typename T>
166   [[nodiscard]] T* OffsetToAddr(typename ElfT::Off offset) const;
167 
168   template <typename T>
169   [[nodiscard]] T* ShdrOffsetToAddr(const typename ElfT::Shdr* shdr) const;
170 
IsCompressed(const ElfT::Shdr * shdr)171   [[nodiscard]] static constexpr bool IsCompressed(const ElfT::Shdr* shdr) {
172     return (shdr->sh_flags & SHF_COMPRESSED) != 0;
173   }
174 
175   std::string path_;
176   int fd_;
177 
178   MappedFileFragment mapped_file_;
179 
180   const typename ElfT::Ehdr* header_;
181 
182   const typename ElfT::Shdr* shdr_table_;
183   size_t shdr_num_;
184 
185   StringTable strtab_;
186 };
187 
188 template <typename ElfT>
ElfFileImpl(const char * path,int fd)189 ElfFileImpl<ElfT>::ElfFileImpl(const char* path, int fd)
190     : path_(path), fd_(fd), header_(nullptr), shdr_table_(nullptr), shdr_num_(0) {}
191 
192 template <typename ElfT>
~ElfFileImpl()193 ElfFileImpl<ElfT>::~ElfFileImpl() {
194   close(fd_);
195 }
196 
197 template <typename ElfT>
ValidateShdrTable(std::string * error_msg)198 bool ElfFileImpl<ElfT>::ValidateShdrTable(std::string* error_msg) {
199   size_t file_size = mapped_file_.size();
200   for (size_t i = 0; i < shdr_num_; ++i) {
201     const typename ElfT::Shdr* shdr = shdr_table_ + i;
202 
203     if (shdr->sh_link >= shdr_num_) {
204       *error_msg = StringPrintf(
205           "section %zd: sh_link (%d) is out of bounds (shnum=%zd)", i, shdr->sh_link, shdr_num_);
206       return false;
207     }
208 
209     // Skip boundary checks for SHT_NOBIT section headers.
210     if (shdr->sh_type == SHT_NOBITS) {
211       continue;
212     }
213 
214     if (shdr->sh_offset >= file_size) {
215       *error_msg = StringPrintf("section %zd: offset (%zd) is out of bounds (file_size=%zd)",
216                                 i,
217                                 static_cast<size_t>(shdr->sh_offset),
218                                 file_size);
219       return false;
220     }
221 
222     size_t section_end = shdr->sh_offset + shdr->sh_size;
223     if (section_end > file_size) {
224       *error_msg = StringPrintf("section %zd: offset+size (%zd) is out of bounds (file_size=%zd)",
225                                 i,
226                                 section_end,
227                                 file_size);
228       return false;
229     }
230   }
231 
232   return true;
233 }
234 
235 template <typename ElfT>
236 template <typename T>
OffsetToAddr(typename ElfT::Off offset) const237 T* ElfFileImpl<ElfT>::OffsetToAddr(typename ElfT::Off offset) const {
238   auto start = bit_cast<uintptr_t>(mapped_file_.data());
239   return bit_cast<T*>(start + offset);
240 }
241 
242 template <typename ElfT>
243 template <typename T>
ShdrOffsetToAddr(const typename ElfT::Shdr * shdr) const244 T* ElfFileImpl<ElfT>::ShdrOffsetToAddr(const typename ElfT::Shdr* shdr) const {
245   CHECK(shdr->sh_type != SHT_NOBITS);
246   return OffsetToAddr<T>(shdr->sh_offset);
247 }
248 
249 template <typename ElfT>
Init(std::string * error_msg)250 bool ElfFileImpl<ElfT>::Init(std::string* error_msg) {
251   struct stat st {};
252   if (fstat(fd_, &st) == -1) {
253     *error_msg = StringPrintf("unable to stat \"%s\": %s", path_.c_str(), strerror(errno));
254     return false;
255   }
256 
257   size_t size = st.st_size;
258 
259   if (!mapped_file_.Map(fd_, 0, 0, size)) {
260     *error_msg = StringPrintf("unable to map the file \"%s\"", path_.c_str());
261     return false;
262   }
263 
264   if (size < sizeof(typename ElfT::Ehdr)) {
265     *error_msg = StringPrintf(
266         "file \"%s\" is too small(%zd), there is not enough space for an ELF header(%zd)",
267         path_.c_str(),
268         size,
269         sizeof(typename ElfT::Ehdr));
270     return false;
271   }
272 
273   header_ = OffsetToAddr<const typename ElfT::Ehdr>(0);
274 
275   uintptr_t shdr_offset = header_->e_shoff;
276   size_t shdr_num = header_->e_shnum;
277 
278   if (header_->e_shentsize != sizeof(typename ElfT::Shdr)) {
279     *error_msg = StringPrintf("invalid e_shentsize: %d, expected: %zd",
280                               header_->e_shentsize,
281                               sizeof(typename ElfT::Shdr));
282     return false;
283   }
284 
285   if (shdr_offset >= size) {
286     *error_msg = StringPrintf("file \"%s\" is too small, e_shoff(%zd) is out of bounds (%zd)",
287                               path_.c_str(),
288                               shdr_offset,
289                               size);
290     return false;
291   }
292 
293   if (shdr_offset + (shdr_num * sizeof(typename ElfT::Shdr)) > size) {
294     *error_msg =
295         StringPrintf("file \"%s\" is too small, e_shoff + shdr_size (%zd) is out of bounds (%zd)",
296                      path_.c_str(),
297                      shdr_offset + (shdr_num * sizeof(typename ElfT::Shdr)),
298                      size);
299     return false;
300   }
301 
302   shdr_table_ = OffsetToAddr<const typename ElfT::Shdr>(shdr_offset);
303   shdr_num_ = shdr_num;
304 
305   if (!ValidateShdrTable(error_msg)) {
306     return false;
307   }
308 
309   if (header_->e_shstrndx == SHN_UNDEF) {
310     *error_msg = StringPrintf(
311         "\"%s\": e_shstrndx is not defined, this is not good because "
312         "section names are needed to extract dwarf_info",
313         path_.c_str());
314     return false;
315   }
316 
317   if (header_->e_shstrndx >= shdr_num) {
318     *error_msg = StringPrintf("\"%s\" invalid e_shstrndx (%d) - out of bounds (e_shnum=%zd)",
319                               path_.c_str(),
320                               header_->e_shstrndx,
321                               shdr_num);
322     return false;
323   }
324 
325   const typename ElfT::Shdr* strtab_shdr = &shdr_table_[header_->e_shstrndx];
326 
327   strtab_ = StringTable(Buffer{ShdrOffsetToAddr<const char>(strtab_shdr), strtab_shdr->sh_size});
328 
329   return true;
330 }
331 
332 template <typename ElfT>
Create(const char * path,int fd,std::string * error_msg)333 std::unique_ptr<ElfFileImpl<ElfT>> ElfFileImpl<ElfT>::Create(const char* path,
334                                                              int fd,
335                                                              std::string* error_msg) {
336   std::unique_ptr<ElfFileImpl<ElfT>> result(new ElfFileImpl<ElfT>(path, fd));
337   if (!result->Init(error_msg)) {
338     return nullptr;
339   }
340 
341   return result;
342 }
343 
344 template <typename ElfT>
FindSectionHeaderByType(typename ElfT::Word sh_type)345 const typename ElfT::Shdr* ElfFileImpl<ElfT>::FindSectionHeaderByType(typename ElfT::Word sh_type) {
346   for (size_t i = 0; i < shdr_num_; ++i) {
347     if (shdr_table_[i].sh_type == sh_type) {
348       return shdr_table_ + i;
349     }
350   }
351 
352   return nullptr;
353 }
354 
355 template <typename ElfT>
FindSectionHeaderByName(const char * name)356 const typename ElfT::Shdr* ElfFileImpl<ElfT>::FindSectionHeaderByName(const char* name) {
357   for (size_t i = 0; i < shdr_num_; ++i) {
358     if (strcmp(name, strtab_.GetString(shdr_table_[i].sh_name)) == 0) {
359       return shdr_table_ + i;
360     }
361   }
362 
363   return nullptr;
364 }
365 
366 template <typename ElfT>
ReadExportedSymbols(std::vector<std::string> * symbols,std::string * error_msg)367 bool ElfFileImpl<ElfT>::ReadExportedSymbols(std::vector<std::string>* symbols,
368                                             std::string* error_msg) {
369   const typename ElfT::Shdr* dynsym_shdr = FindSectionHeaderByType(SHT_DYNSYM);
370 
371   // This section is not expected to be compressed
372   if (IsCompressed(dynsym_shdr)) {
373     *error_msg = "dynamic symbol section is not expected to be compressed";
374     return false;
375   }
376 
377   if (dynsym_shdr == nullptr) {
378     *error_msg = "dynamic symbol section was not found";
379     return false;
380   }
381 
382   if (dynsym_shdr->sh_size % sizeof(typename ElfT::Sym) != 0) {
383     *error_msg = StringPrintf("invalid SHT_DYNSYM section size(%zd): should be divisible by %zd",
384                               static_cast<size_t>(dynsym_shdr->sh_size),
385                               sizeof(typename ElfT::Sym));
386     return false;
387   }
388 
389   size_t dynsym_num = dynsym_shdr->sh_size / sizeof(typename ElfT::Sym);
390   const auto* dynsyms = ShdrOffsetToAddr<const typename ElfT::Sym>(dynsym_shdr);
391 
392   const typename ElfT::Shdr* strtab_shdr = shdr_table_ + dynsym_shdr->sh_link;
393 
394   // String table for .dynsym section is also not expected to be compressed
395   if (IsCompressed(strtab_shdr)) {
396     *error_msg = "string table for dynamic symbol section is not expected to be compressed";
397     return false;
398   }
399 
400   const StringTable strtab(Buffer{ShdrOffsetToAddr<const char>(strtab_shdr), strtab_shdr->sh_size});
401 
402   for (size_t i = 0; i < dynsym_num; ++i) {
403     const typename ElfT::Sym* sym = dynsyms + i;
404     // skip undefined symbols
405     if (sym->st_shndx == SHN_UNDEF) {
406       continue;
407     }
408 
409     // We are interested only in functions and variables.
410     // This is a bit strange but the fact of the matter is that ld.gold generates OBJECT
411     // of size 0 for version labels - we need to skip them as well.
412     uint8_t st_type = ElfStType(sym->st_info);
413     if (st_type == STT_FUNC || (st_type == STT_OBJECT && sym->st_size != 0)) {
414       symbols->push_back(strtab.GetString(sym->st_name));
415     }
416   }
417 
418   return true;
419 }
420 
421 template <typename ElfT>
ReadDwarfInfo(std::string * error_msg)422 std::unique_ptr<DwarfInfo> ElfFileImpl<ElfT>::ReadDwarfInfo(std::string* error_msg) {
423   const typename ElfT::Shdr* dwarf_abbrev_shdr = FindSectionHeaderByName(".debug_abbrev");
424   if (dwarf_abbrev_shdr == nullptr) {
425     *error_msg = "couldn't find .debug_abbrev section";
426     return nullptr;
427   }
428 
429   const typename ElfT::Shdr* dwarf_info_shdr = FindSectionHeaderByName(".debug_info");
430   if (dwarf_info_shdr == nullptr) {
431     *error_msg = "couldn't find .debug_info section";
432     return nullptr;
433   }
434 
435   const typename ElfT::Shdr* dwarf_str_shdr = FindSectionHeaderByName(".debug_str");
436   if (dwarf_str_shdr == nullptr) {
437     *error_msg = "couldn't find .debug_str section";
438     return nullptr;
439   }
440 
441   auto string_table_buf = ReadSection<char>(dwarf_str_shdr, error_msg);
442   if (!string_table_buf.has_value()) {
443     return nullptr;
444   }
445 
446   StringTable string_table{std::move(*string_table_buf)};
447 
448   // This section is optional (at least as of now)
449   const typename ElfT::Shdr* debug_str_offsets_shdr = FindSectionHeaderByName(".debug_str_offsets");
450   std::optional<StringOffsetTable> string_offsets_table;
451   if (debug_str_offsets_shdr != nullptr) {
452     auto string_offset_table_buf = ReadSection<uint8_t>(debug_str_offsets_shdr, error_msg);
453     if (!string_offset_table_buf.has_value()) {
454       return nullptr;
455     }
456     string_offsets_table.emplace(std::move(*string_offset_table_buf));
457   }
458 
459   auto dwarf_abbrev_buf = ReadSection<uint8_t>(dwarf_abbrev_shdr, error_msg);
460   if (!dwarf_abbrev_buf.has_value()) {
461     return nullptr;
462   }
463 
464   auto dwarf_info_buf = ReadSection<uint8_t>(dwarf_info_shdr, error_msg);
465   if (!dwarf_info_buf.has_value()) {
466     return nullptr;
467   }
468 
469   std::unique_ptr<DwarfInfo> dwarf_info(new DwarfInfo(std::move(*dwarf_abbrev_buf),
470                                                       std::move(*dwarf_info_buf),
471                                                       std::move(string_table),
472                                                       std::move(string_offsets_table)));
473 
474   if (!dwarf_info->Parse(error_msg)) {
475     return nullptr;
476   }
477 
478   return dwarf_info;
479 }
480 
481 }  // namespace
482 
Load(const char * path,std::string * error_msg)483 std::unique_ptr<ElfFile> ElfFile::Load(const char* path, std::string* error_msg) {
484   int fd = TEMP_FAILURE_RETRY(open(path, O_RDONLY | O_CLOEXEC));
485   if (fd == -1) {
486     *error_msg = strerror(errno);
487     return nullptr;
488   }
489 
490   // Read header in order verify the file and detect bitness
491 
492   uint8_t e_ident[EI_NIDENT];
493   ssize_t res = TEMP_FAILURE_RETRY(pread64(fd, e_ident, sizeof(e_ident), 0));
494   if (res < 0) {
495     *error_msg = strerror(errno);
496     return nullptr;
497   }
498 
499   if (res != sizeof(e_ident)) {
500     *error_msg = "file is too small for an ELF file";
501     return nullptr;
502   }
503 
504   if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
505     *error_msg = "bad ELF magic";
506     return nullptr;
507   }
508 
509   std::unique_ptr<ElfFile> result;
510 
511   if (e_ident[EI_CLASS] == ELFCLASS32) {
512     result = ElfFileImpl<Elf32>::Create(path, fd, error_msg);
513   } else if (e_ident[EI_CLASS] == ELFCLASS64) {
514     result = ElfFileImpl<Elf64>::Create(path, fd, error_msg);
515   } else {
516     *error_msg = StringPrintf("bad EI_CLASS: %d", e_ident[EI_CLASS]);
517   }
518 
519   return result;
520 }
521 
522 }  // namespace nogrod
523