1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "elf_reader.h"
18
19 #include <elf.h>
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <unistd.h>
25 #include <cstddef>
26 #include <memory>
27 #include <optional>
28
29 #include "berberis/base/bit_util.h"
30 #include "berberis/base/checks.h"
31 #include "berberis/base/macros.h"
32 #include "berberis/base/mapped_file_fragment.h"
33 #include "berberis/base/stringprintf.h"
34
35 #include "buffer.h"
36 #include "string_offset_table.h"
37 #include "string_table.h"
38 #include "zstd.h"
39
40 namespace nogrod {
41
42 using berberis::bit_cast;
43 using berberis::StringPrintf;
44
45 namespace {
46
ElfStType(uint32_t info)47 [[nodiscard]] constexpr uint8_t ElfStType(uint32_t info) {
48 return info & 0xf;
49 }
50
51 class Elf32 {
52 public:
53 using Off = Elf32_Off;
54 using Word = Elf32_Word;
55
56 using Chrd = Elf32_Chdr;
57 using Ehdr = Elf32_Ehdr;
58 using Shdr = Elf32_Shdr;
59 using Sym = Elf32_Sym;
60
61 Elf32() = delete;
62 Elf32(const Elf32&) = delete;
63 const Elf32& operator=(const Elf32&) = delete;
64 };
65
66 class Elf64 {
67 public:
68 using Off = Elf64_Off;
69 using Word = Elf64_Word;
70
71 using Chrd = Elf64_Chdr;
72 using Ehdr = Elf64_Ehdr;
73 using Shdr = Elf64_Shdr;
74 using Sym = Elf64_Sym;
75
76 Elf64() = delete;
77 Elf64(const Elf64&) = delete;
78 const Elf64& operator=(const Elf64&) = delete;
79 };
80
81 template <typename ElfT>
82 class ElfFileImpl : public ElfFile {
83 public:
84 ~ElfFileImpl() override;
85
86 [[nodiscard]] static std::unique_ptr<ElfFileImpl<ElfT>> Create(const char* path,
87 int fd,
88 std::string* error_msg);
89
90 [[nodiscard]] bool ReadExportedSymbols(std::vector<std::string>* symbols,
91 std::string* error_msg) override;
92 [[nodiscard]] std::unique_ptr<DwarfInfo> ReadDwarfInfo(std::string* error_msg) override;
93
94 private:
95 explicit ElfFileImpl(const char* path, int fd);
96 [[nodiscard]] bool Init(std::string* error_msg);
97 [[nodiscard]] bool ValidateShdrTable(std::string* error_msg);
98 template <typename T>
ReadSection(const ElfT::Shdr * section_header,std::string * error_msg)99 [[nodiscard]] std::optional<Buffer<T>> ReadSection(const ElfT::Shdr* section_header,
100 std::string* error_msg) {
101 // We support any type as long as its size is 1
102 static_assert(sizeof(T) == 1);
103
104 const T* section_data = ShdrOffsetToAddr<const T>(section_header);
105 size_t section_size = section_header->sh_size;
106
107 if (!IsCompressed(section_header)) {
108 return Buffer{section_data, section_size};
109 }
110
111 std::optional<std::vector<T>> uncompressed_data =
112 UncompressSection(section_data, section_size, error_msg);
113 if (!uncompressed_data.has_value()) {
114 return std::nullopt;
115 }
116 return Buffer{std::move(*uncompressed_data)};
117 }
118
119 template <typename T>
UncompressSection(const T * section_data,size_t section_size,std::string * error_msg)120 [[nodiscard]] std::optional<std::vector<T>> UncompressSection(const T* section_data,
121 size_t section_size,
122 std::string* error_msg) {
123 // We support any type as long as its size is 1
124 static_assert(sizeof(T) == 1);
125
126 // Read the header
127 constexpr size_t kChdrSize = sizeof(typename ElfT::Chrd);
128 if (static_cast<size_t>(section_size) < kChdrSize) {
129 *error_msg = "Invalid compressed section (it is too small to fit Elf_Chrd)";
130 return std::nullopt;
131 }
132
133 const typename ElfT::Chrd* chrd = reinterpret_cast<const ElfT::Chrd*>(section_data);
134 if (chrd->ch_type != ELFCOMPRESS_ZSTD) {
135 *error_msg = StringPrintf("Unsupported compression type: %d, expected ELFCOMPRESS_ZSTD(2)",
136 chrd->ch_type);
137 return std::nullopt;
138 }
139
140 // Uncompress
141 size_t uncompressed_size = chrd->ch_size;
142 const T* compressed_data = section_data + kChdrSize;
143 size_t compressed_size = section_size - kChdrSize;
144 std::vector<T> uncompressed_data(uncompressed_size);
145
146 size_t result = ZSTD_decompress(reinterpret_cast<uint8_t*>(uncompressed_data.data()),
147 uncompressed_data.size(),
148 reinterpret_cast<const uint8_t*>(compressed_data),
149 compressed_size);
150
151 if (ZSTD_isError(result)) {
152 *error_msg = StringPrintf("Error while uncompressing zstd: %s", ZSTD_getErrorName(result));
153 return std::nullopt;
154 }
155
156 // We expect the output buffer of the size specified in the chrd->ch_size
157 CHECK_EQ(result, uncompressed_size);
158
159 return uncompressed_data;
160 }
161
162 [[nodiscard]] const typename ElfT::Shdr* FindSectionHeaderByType(typename ElfT::Word sh_type);
163 [[nodiscard]] const typename ElfT::Shdr* FindSectionHeaderByName(const char* name);
164
165 template <typename T>
166 [[nodiscard]] T* OffsetToAddr(typename ElfT::Off offset) const;
167
168 template <typename T>
169 [[nodiscard]] T* ShdrOffsetToAddr(const typename ElfT::Shdr* shdr) const;
170
IsCompressed(const ElfT::Shdr * shdr)171 [[nodiscard]] static constexpr bool IsCompressed(const ElfT::Shdr* shdr) {
172 return (shdr->sh_flags & SHF_COMPRESSED) != 0;
173 }
174
175 std::string path_;
176 int fd_;
177
178 MappedFileFragment mapped_file_;
179
180 const typename ElfT::Ehdr* header_;
181
182 const typename ElfT::Shdr* shdr_table_;
183 size_t shdr_num_;
184
185 StringTable strtab_;
186 };
187
188 template <typename ElfT>
ElfFileImpl(const char * path,int fd)189 ElfFileImpl<ElfT>::ElfFileImpl(const char* path, int fd)
190 : path_(path), fd_(fd), header_(nullptr), shdr_table_(nullptr), shdr_num_(0) {}
191
192 template <typename ElfT>
~ElfFileImpl()193 ElfFileImpl<ElfT>::~ElfFileImpl() {
194 close(fd_);
195 }
196
197 template <typename ElfT>
ValidateShdrTable(std::string * error_msg)198 bool ElfFileImpl<ElfT>::ValidateShdrTable(std::string* error_msg) {
199 size_t file_size = mapped_file_.size();
200 for (size_t i = 0; i < shdr_num_; ++i) {
201 const typename ElfT::Shdr* shdr = shdr_table_ + i;
202
203 if (shdr->sh_link >= shdr_num_) {
204 *error_msg = StringPrintf(
205 "section %zd: sh_link (%d) is out of bounds (shnum=%zd)", i, shdr->sh_link, shdr_num_);
206 return false;
207 }
208
209 // Skip boundary checks for SHT_NOBIT section headers.
210 if (shdr->sh_type == SHT_NOBITS) {
211 continue;
212 }
213
214 if (shdr->sh_offset >= file_size) {
215 *error_msg = StringPrintf("section %zd: offset (%zd) is out of bounds (file_size=%zd)",
216 i,
217 static_cast<size_t>(shdr->sh_offset),
218 file_size);
219 return false;
220 }
221
222 size_t section_end = shdr->sh_offset + shdr->sh_size;
223 if (section_end > file_size) {
224 *error_msg = StringPrintf("section %zd: offset+size (%zd) is out of bounds (file_size=%zd)",
225 i,
226 section_end,
227 file_size);
228 return false;
229 }
230 }
231
232 return true;
233 }
234
235 template <typename ElfT>
236 template <typename T>
OffsetToAddr(typename ElfT::Off offset) const237 T* ElfFileImpl<ElfT>::OffsetToAddr(typename ElfT::Off offset) const {
238 auto start = bit_cast<uintptr_t>(mapped_file_.data());
239 return bit_cast<T*>(start + offset);
240 }
241
242 template <typename ElfT>
243 template <typename T>
ShdrOffsetToAddr(const typename ElfT::Shdr * shdr) const244 T* ElfFileImpl<ElfT>::ShdrOffsetToAddr(const typename ElfT::Shdr* shdr) const {
245 CHECK(shdr->sh_type != SHT_NOBITS);
246 return OffsetToAddr<T>(shdr->sh_offset);
247 }
248
249 template <typename ElfT>
Init(std::string * error_msg)250 bool ElfFileImpl<ElfT>::Init(std::string* error_msg) {
251 struct stat st {};
252 if (fstat(fd_, &st) == -1) {
253 *error_msg = StringPrintf("unable to stat \"%s\": %s", path_.c_str(), strerror(errno));
254 return false;
255 }
256
257 size_t size = st.st_size;
258
259 if (!mapped_file_.Map(fd_, 0, 0, size)) {
260 *error_msg = StringPrintf("unable to map the file \"%s\"", path_.c_str());
261 return false;
262 }
263
264 if (size < sizeof(typename ElfT::Ehdr)) {
265 *error_msg = StringPrintf(
266 "file \"%s\" is too small(%zd), there is not enough space for an ELF header(%zd)",
267 path_.c_str(),
268 size,
269 sizeof(typename ElfT::Ehdr));
270 return false;
271 }
272
273 header_ = OffsetToAddr<const typename ElfT::Ehdr>(0);
274
275 uintptr_t shdr_offset = header_->e_shoff;
276 size_t shdr_num = header_->e_shnum;
277
278 if (header_->e_shentsize != sizeof(typename ElfT::Shdr)) {
279 *error_msg = StringPrintf("invalid e_shentsize: %d, expected: %zd",
280 header_->e_shentsize,
281 sizeof(typename ElfT::Shdr));
282 return false;
283 }
284
285 if (shdr_offset >= size) {
286 *error_msg = StringPrintf("file \"%s\" is too small, e_shoff(%zd) is out of bounds (%zd)",
287 path_.c_str(),
288 shdr_offset,
289 size);
290 return false;
291 }
292
293 if (shdr_offset + (shdr_num * sizeof(typename ElfT::Shdr)) > size) {
294 *error_msg =
295 StringPrintf("file \"%s\" is too small, e_shoff + shdr_size (%zd) is out of bounds (%zd)",
296 path_.c_str(),
297 shdr_offset + (shdr_num * sizeof(typename ElfT::Shdr)),
298 size);
299 return false;
300 }
301
302 shdr_table_ = OffsetToAddr<const typename ElfT::Shdr>(shdr_offset);
303 shdr_num_ = shdr_num;
304
305 if (!ValidateShdrTable(error_msg)) {
306 return false;
307 }
308
309 if (header_->e_shstrndx == SHN_UNDEF) {
310 *error_msg = StringPrintf(
311 "\"%s\": e_shstrndx is not defined, this is not good because "
312 "section names are needed to extract dwarf_info",
313 path_.c_str());
314 return false;
315 }
316
317 if (header_->e_shstrndx >= shdr_num) {
318 *error_msg = StringPrintf("\"%s\" invalid e_shstrndx (%d) - out of bounds (e_shnum=%zd)",
319 path_.c_str(),
320 header_->e_shstrndx,
321 shdr_num);
322 return false;
323 }
324
325 const typename ElfT::Shdr* strtab_shdr = &shdr_table_[header_->e_shstrndx];
326
327 strtab_ = StringTable(Buffer{ShdrOffsetToAddr<const char>(strtab_shdr), strtab_shdr->sh_size});
328
329 return true;
330 }
331
332 template <typename ElfT>
Create(const char * path,int fd,std::string * error_msg)333 std::unique_ptr<ElfFileImpl<ElfT>> ElfFileImpl<ElfT>::Create(const char* path,
334 int fd,
335 std::string* error_msg) {
336 std::unique_ptr<ElfFileImpl<ElfT>> result(new ElfFileImpl<ElfT>(path, fd));
337 if (!result->Init(error_msg)) {
338 return nullptr;
339 }
340
341 return result;
342 }
343
344 template <typename ElfT>
FindSectionHeaderByType(typename ElfT::Word sh_type)345 const typename ElfT::Shdr* ElfFileImpl<ElfT>::FindSectionHeaderByType(typename ElfT::Word sh_type) {
346 for (size_t i = 0; i < shdr_num_; ++i) {
347 if (shdr_table_[i].sh_type == sh_type) {
348 return shdr_table_ + i;
349 }
350 }
351
352 return nullptr;
353 }
354
355 template <typename ElfT>
FindSectionHeaderByName(const char * name)356 const typename ElfT::Shdr* ElfFileImpl<ElfT>::FindSectionHeaderByName(const char* name) {
357 for (size_t i = 0; i < shdr_num_; ++i) {
358 if (strcmp(name, strtab_.GetString(shdr_table_[i].sh_name)) == 0) {
359 return shdr_table_ + i;
360 }
361 }
362
363 return nullptr;
364 }
365
366 template <typename ElfT>
ReadExportedSymbols(std::vector<std::string> * symbols,std::string * error_msg)367 bool ElfFileImpl<ElfT>::ReadExportedSymbols(std::vector<std::string>* symbols,
368 std::string* error_msg) {
369 const typename ElfT::Shdr* dynsym_shdr = FindSectionHeaderByType(SHT_DYNSYM);
370
371 // This section is not expected to be compressed
372 if (IsCompressed(dynsym_shdr)) {
373 *error_msg = "dynamic symbol section is not expected to be compressed";
374 return false;
375 }
376
377 if (dynsym_shdr == nullptr) {
378 *error_msg = "dynamic symbol section was not found";
379 return false;
380 }
381
382 if (dynsym_shdr->sh_size % sizeof(typename ElfT::Sym) != 0) {
383 *error_msg = StringPrintf("invalid SHT_DYNSYM section size(%zd): should be divisible by %zd",
384 static_cast<size_t>(dynsym_shdr->sh_size),
385 sizeof(typename ElfT::Sym));
386 return false;
387 }
388
389 size_t dynsym_num = dynsym_shdr->sh_size / sizeof(typename ElfT::Sym);
390 const auto* dynsyms = ShdrOffsetToAddr<const typename ElfT::Sym>(dynsym_shdr);
391
392 const typename ElfT::Shdr* strtab_shdr = shdr_table_ + dynsym_shdr->sh_link;
393
394 // String table for .dynsym section is also not expected to be compressed
395 if (IsCompressed(strtab_shdr)) {
396 *error_msg = "string table for dynamic symbol section is not expected to be compressed";
397 return false;
398 }
399
400 const StringTable strtab(Buffer{ShdrOffsetToAddr<const char>(strtab_shdr), strtab_shdr->sh_size});
401
402 for (size_t i = 0; i < dynsym_num; ++i) {
403 const typename ElfT::Sym* sym = dynsyms + i;
404 // skip undefined symbols
405 if (sym->st_shndx == SHN_UNDEF) {
406 continue;
407 }
408
409 // We are interested only in functions and variables.
410 // This is a bit strange but the fact of the matter is that ld.gold generates OBJECT
411 // of size 0 for version labels - we need to skip them as well.
412 uint8_t st_type = ElfStType(sym->st_info);
413 if (st_type == STT_FUNC || (st_type == STT_OBJECT && sym->st_size != 0)) {
414 symbols->push_back(strtab.GetString(sym->st_name));
415 }
416 }
417
418 return true;
419 }
420
421 template <typename ElfT>
ReadDwarfInfo(std::string * error_msg)422 std::unique_ptr<DwarfInfo> ElfFileImpl<ElfT>::ReadDwarfInfo(std::string* error_msg) {
423 const typename ElfT::Shdr* dwarf_abbrev_shdr = FindSectionHeaderByName(".debug_abbrev");
424 if (dwarf_abbrev_shdr == nullptr) {
425 *error_msg = "couldn't find .debug_abbrev section";
426 return nullptr;
427 }
428
429 const typename ElfT::Shdr* dwarf_info_shdr = FindSectionHeaderByName(".debug_info");
430 if (dwarf_info_shdr == nullptr) {
431 *error_msg = "couldn't find .debug_info section";
432 return nullptr;
433 }
434
435 const typename ElfT::Shdr* dwarf_str_shdr = FindSectionHeaderByName(".debug_str");
436 if (dwarf_str_shdr == nullptr) {
437 *error_msg = "couldn't find .debug_str section";
438 return nullptr;
439 }
440
441 auto string_table_buf = ReadSection<char>(dwarf_str_shdr, error_msg);
442 if (!string_table_buf.has_value()) {
443 return nullptr;
444 }
445
446 StringTable string_table{std::move(*string_table_buf)};
447
448 // This section is optional (at least as of now)
449 const typename ElfT::Shdr* debug_str_offsets_shdr = FindSectionHeaderByName(".debug_str_offsets");
450 std::optional<StringOffsetTable> string_offsets_table;
451 if (debug_str_offsets_shdr != nullptr) {
452 auto string_offset_table_buf = ReadSection<uint8_t>(debug_str_offsets_shdr, error_msg);
453 if (!string_offset_table_buf.has_value()) {
454 return nullptr;
455 }
456 string_offsets_table.emplace(std::move(*string_offset_table_buf));
457 }
458
459 auto dwarf_abbrev_buf = ReadSection<uint8_t>(dwarf_abbrev_shdr, error_msg);
460 if (!dwarf_abbrev_buf.has_value()) {
461 return nullptr;
462 }
463
464 auto dwarf_info_buf = ReadSection<uint8_t>(dwarf_info_shdr, error_msg);
465 if (!dwarf_info_buf.has_value()) {
466 return nullptr;
467 }
468
469 std::unique_ptr<DwarfInfo> dwarf_info(new DwarfInfo(std::move(*dwarf_abbrev_buf),
470 std::move(*dwarf_info_buf),
471 std::move(string_table),
472 std::move(string_offsets_table)));
473
474 if (!dwarf_info->Parse(error_msg)) {
475 return nullptr;
476 }
477
478 return dwarf_info;
479 }
480
481 } // namespace
482
Load(const char * path,std::string * error_msg)483 std::unique_ptr<ElfFile> ElfFile::Load(const char* path, std::string* error_msg) {
484 int fd = TEMP_FAILURE_RETRY(open(path, O_RDONLY | O_CLOEXEC));
485 if (fd == -1) {
486 *error_msg = strerror(errno);
487 return nullptr;
488 }
489
490 // Read header in order verify the file and detect bitness
491
492 uint8_t e_ident[EI_NIDENT];
493 ssize_t res = TEMP_FAILURE_RETRY(pread64(fd, e_ident, sizeof(e_ident), 0));
494 if (res < 0) {
495 *error_msg = strerror(errno);
496 return nullptr;
497 }
498
499 if (res != sizeof(e_ident)) {
500 *error_msg = "file is too small for an ELF file";
501 return nullptr;
502 }
503
504 if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
505 *error_msg = "bad ELF magic";
506 return nullptr;
507 }
508
509 std::unique_ptr<ElfFile> result;
510
511 if (e_ident[EI_CLASS] == ELFCLASS32) {
512 result = ElfFileImpl<Elf32>::Create(path, fd, error_msg);
513 } else if (e_ident[EI_CLASS] == ELFCLASS64) {
514 result = ElfFileImpl<Elf64>::Create(path, fd, error_msg);
515 } else {
516 *error_msg = StringPrintf("bad EI_CLASS: %d", e_ident[EI_CLASS]);
517 }
518
519 return result;
520 }
521
522 } // namespace nogrod
523