1 // Copyright (c) 2011 Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31 
32 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
33 // Find all the debugging info in a file and dump it as a Breakpad symbol file.
34 
35 #include "common/linux/dump_symbols.h"
36 
37 #include <assert.h>
38 #include <elf.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <link.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <sys/mman.h>
46 #include <sys/stat.h>
47 #include <unistd.h>
48 
49 #include <iostream>
50 #include <set>
51 #include <string>
52 #include <utility>
53 #include <vector>
54 
55 #include "common/dwarf/bytereader-inl.h"
56 #include "common/dwarf/dwarf2diehandler.h"
57 #include "common/dwarf_cfi_to_module.h"
58 #include "common/dwarf_cu_to_module.h"
59 #include "common/dwarf_line_to_module.h"
60 #include "common/linux/crc32.h"
61 #include "common/linux/eintr_wrapper.h"
62 #include "common/linux/elfutils.h"
63 #include "common/linux/elfutils-inl.h"
64 #include "common/linux/elf_symbols_to_module.h"
65 #include "common/linux/file_id.h"
66 #include "common/module.h"
67 #include "common/scoped_ptr.h"
68 #ifndef NO_STABS_SUPPORT
69 #include "common/stabs_reader.h"
70 #include "common/stabs_to_module.h"
71 #endif
72 #include "common/using_std_string.h"
73 
74 // This namespace contains helper functions.
75 namespace {
76 
77 using google_breakpad::DumpOptions;
78 using google_breakpad::DwarfCFIToModule;
79 using google_breakpad::DwarfCUToModule;
80 using google_breakpad::DwarfLineToModule;
81 using google_breakpad::ElfClass;
82 using google_breakpad::ElfClass32;
83 using google_breakpad::ElfClass64;
84 using google_breakpad::FindElfSectionByName;
85 using google_breakpad::GetOffset;
86 using google_breakpad::IsValidElf;
87 using google_breakpad::Module;
88 #ifndef NO_STABS_SUPPORT
89 using google_breakpad::StabsToModule;
90 #endif
91 using google_breakpad::scoped_ptr;
92 
93 // Define AARCH64 ELF architecture if host machine does not include this define.
94 #ifndef EM_AARCH64
95 #define EM_AARCH64      183
96 #endif
97 
98 //
99 // FDWrapper
100 //
101 // Wrapper class to make sure opened file is closed.
102 //
103 class FDWrapper {
104  public:
FDWrapper(int fd)105   explicit FDWrapper(int fd) :
106     fd_(fd) {}
~FDWrapper()107   ~FDWrapper() {
108     if (fd_ != -1)
109       close(fd_);
110   }
get()111   int get() {
112     return fd_;
113   }
release()114   int release() {
115     int fd = fd_;
116     fd_ = -1;
117     return fd;
118   }
119  private:
120   int fd_;
121 };
122 
123 //
124 // MmapWrapper
125 //
126 // Wrapper class to make sure mapped regions are unmapped.
127 //
128 class MmapWrapper {
129  public:
MmapWrapper()130   MmapWrapper() : is_set_(false) {}
~MmapWrapper()131   ~MmapWrapper() {
132     if (is_set_ && base_ != NULL) {
133       assert(size_ > 0);
134       munmap(base_, size_);
135     }
136   }
set(void * mapped_address,size_t mapped_size)137   void set(void *mapped_address, size_t mapped_size) {
138     is_set_ = true;
139     base_ = mapped_address;
140     size_ = mapped_size;
141   }
release()142   void release() {
143     assert(is_set_);
144     is_set_ = false;
145     base_ = NULL;
146     size_ = 0;
147   }
148 
149  private:
150   bool is_set_;
151   void* base_;
152   size_t size_;
153 };
154 
155 // Find the preferred loading address of the binary.
156 template<typename ElfClass>
GetLoadingAddress(const typename ElfClass::Phdr * program_headers,int nheader)157 typename ElfClass::Addr GetLoadingAddress(
158     const typename ElfClass::Phdr* program_headers,
159     int nheader) {
160   typedef typename ElfClass::Phdr Phdr;
161 
162   // For non-PIC executables (e_type == ET_EXEC), the load address is
163   // the start address of the first PT_LOAD segment.  (ELF requires
164   // the segments to be sorted by load address.)  For PIC executables
165   // and dynamic libraries (e_type == ET_DYN), this address will
166   // normally be zero.
167   for (int i = 0; i < nheader; ++i) {
168     const Phdr& header = program_headers[i];
169     if (header.p_type == PT_LOAD)
170       return header.p_vaddr;
171   }
172   return 0;
173 }
174 
175 #ifndef NO_STABS_SUPPORT
176 template<typename ElfClass>
LoadStabs(const typename ElfClass::Ehdr * elf_header,const typename ElfClass::Shdr * stab_section,const typename ElfClass::Shdr * stabstr_section,const bool big_endian,Module * module)177 bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
178                const typename ElfClass::Shdr* stab_section,
179                const typename ElfClass::Shdr* stabstr_section,
180                const bool big_endian,
181                Module* module) {
182   // A callback object to handle data from the STABS reader.
183   StabsToModule handler(module);
184   // Find the addresses of the STABS data, and create a STABS reader object.
185   // On Linux, STABS entries always have 32-bit values, regardless of the
186   // address size of the architecture whose code they're describing, and
187   // the strings are always "unitized".
188   const uint8_t* stabs =
189       GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset);
190   const uint8_t* stabstr =
191       GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset);
192   google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
193                                       stabstr, stabstr_section->sh_size,
194                                       big_endian, 4, true, &handler);
195   // Read the STABS data, and do post-processing.
196   if (!reader.Process())
197     return false;
198   handler.Finalize();
199   return true;
200 }
201 #endif  // NO_STABS_SUPPORT
202 
203 // A line-to-module loader that accepts line number info parsed by
204 // dwarf2reader::LineInfo and populates a Module and a line vector
205 // with the results.
206 class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler {
207  public:
208   // Create a line-to-module converter using BYTE_READER.
DumperLineToModule(dwarf2reader::ByteReader * byte_reader)209   explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
210       : byte_reader_(byte_reader) { }
StartCompilationUnit(const string & compilation_dir)211   void StartCompilationUnit(const string& compilation_dir) {
212     compilation_dir_ = compilation_dir;
213   }
ReadProgram(const char * program,uint64 length,Module * module,std::vector<Module::Line> * lines)214   void ReadProgram(const char* program, uint64 length,
215                    Module* module, std::vector<Module::Line>* lines) {
216     DwarfLineToModule handler(module, compilation_dir_, lines);
217     dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
218     parser.Start();
219   }
220  private:
221   string compilation_dir_;
222   dwarf2reader::ByteReader *byte_reader_;
223 };
224 
225 template<typename ElfClass>
LoadDwarf(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const bool big_endian,bool handle_inter_cu_refs,Module * module)226 bool LoadDwarf(const string& dwarf_filename,
227                const typename ElfClass::Ehdr* elf_header,
228                const bool big_endian,
229                bool handle_inter_cu_refs,
230                Module* module) {
231   typedef typename ElfClass::Shdr Shdr;
232 
233   const dwarf2reader::Endianness endianness = big_endian ?
234       dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
235   dwarf2reader::ByteReader byte_reader(endianness);
236 
237   // Construct a context for this file.
238   DwarfCUToModule::FileContext file_context(dwarf_filename,
239                                             module,
240                                             handle_inter_cu_refs);
241 
242   // Build a map of the ELF file's sections.
243   const Shdr* sections =
244       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
245   int num_sections = elf_header->e_shnum;
246   const Shdr* section_names = sections + elf_header->e_shstrndx;
247   for (int i = 0; i < num_sections; i++) {
248     const Shdr* section = &sections[i];
249     string name = GetOffset<ElfClass, char>(elf_header,
250                                             section_names->sh_offset) +
251                   section->sh_name;
252     const char* contents = GetOffset<ElfClass, char>(elf_header,
253                                                      section->sh_offset);
254     file_context.AddSectionToSectionMap(name, contents, section->sh_size);
255   }
256 
257   // Parse all the compilation units in the .debug_info section.
258   DumperLineToModule line_to_module(&byte_reader);
259   dwarf2reader::SectionMap::const_iterator debug_info_entry =
260       file_context.section_map().find(".debug_info");
261   assert(debug_info_entry != file_context.section_map().end());
262   const std::pair<const char*, uint64>& debug_info_section =
263       debug_info_entry->second;
264   // This should never have been called if the file doesn't have a
265   // .debug_info section.
266   assert(debug_info_section.first);
267   uint64 debug_info_length = debug_info_section.second;
268   for (uint64 offset = 0; offset < debug_info_length;) {
269     // Make a handler for the root DIE that populates MODULE with the
270     // data that was found.
271     DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset);
272     DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter);
273     // Make a Dwarf2Handler that drives the DIEHandler.
274     dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
275     // Make a DWARF parser for the compilation unit at OFFSET.
276     dwarf2reader::CompilationUnit reader(file_context.section_map(),
277                                          offset,
278                                          &byte_reader,
279                                          &die_dispatcher);
280     // Process the entire compilation unit; get the offset of the next.
281     offset += reader.Start();
282   }
283   return true;
284 }
285 
286 // Fill REGISTER_NAMES with the register names appropriate to the
287 // machine architecture given in HEADER, indexed by the register
288 // numbers used in DWARF call frame information. Return true on
289 // success, or false if HEADER's machine architecture is not
290 // supported.
291 template<typename ElfClass>
DwarfCFIRegisterNames(const typename ElfClass::Ehdr * elf_header,std::vector<string> * register_names)292 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
293                            std::vector<string>* register_names) {
294   switch (elf_header->e_machine) {
295     case EM_386:
296       *register_names = DwarfCFIToModule::RegisterNames::I386();
297       return true;
298     case EM_ARM:
299       *register_names = DwarfCFIToModule::RegisterNames::ARM();
300       return true;
301     case EM_AARCH64:
302       *register_names = DwarfCFIToModule::RegisterNames::ARM64();
303       return true;
304     case EM_MIPS:
305       *register_names = DwarfCFIToModule::RegisterNames::MIPS();
306       return true;
307     case EM_X86_64:
308       *register_names = DwarfCFIToModule::RegisterNames::X86_64();
309       return true;
310     default:
311       return false;
312   }
313 }
314 
315 template<typename ElfClass>
LoadDwarfCFI(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const char * section_name,const typename ElfClass::Shdr * section,const bool eh_frame,const typename ElfClass::Shdr * got_section,const typename ElfClass::Shdr * text_section,const bool big_endian,Module * module)316 bool LoadDwarfCFI(const string& dwarf_filename,
317                   const typename ElfClass::Ehdr* elf_header,
318                   const char* section_name,
319                   const typename ElfClass::Shdr* section,
320                   const bool eh_frame,
321                   const typename ElfClass::Shdr* got_section,
322                   const typename ElfClass::Shdr* text_section,
323                   const bool big_endian,
324                   Module* module) {
325   // Find the appropriate set of register names for this file's
326   // architecture.
327   std::vector<string> register_names;
328   if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &register_names)) {
329     fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
330             " cannot convert DWARF call frame information\n",
331             dwarf_filename.c_str(), elf_header->e_machine);
332     return false;
333   }
334 
335   const dwarf2reader::Endianness endianness = big_endian ?
336       dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
337 
338   // Find the call frame information and its size.
339   const char* cfi =
340       GetOffset<ElfClass, char>(elf_header, section->sh_offset);
341   size_t cfi_size = section->sh_size;
342 
343   // Plug together the parser, handler, and their entourages.
344   DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name);
345   DwarfCFIToModule handler(module, register_names, &module_reporter);
346   dwarf2reader::ByteReader byte_reader(endianness);
347 
348   byte_reader.SetAddressSize(ElfClass::kAddrSize);
349 
350   // Provide the base addresses for .eh_frame encoded pointers, if
351   // possible.
352   byte_reader.SetCFIDataBase(section->sh_addr, cfi);
353   if (got_section)
354     byte_reader.SetDataBase(got_section->sh_addr);
355   if (text_section)
356     byte_reader.SetTextBase(text_section->sh_addr);
357 
358   dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename,
359                                                        section_name);
360   dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
361                                      &byte_reader, &handler, &dwarf_reporter,
362                                      eh_frame);
363   parser.Start();
364   return true;
365 }
366 
LoadELF(const string & obj_file,MmapWrapper * map_wrapper,void ** elf_header)367 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
368              void** elf_header) {
369   int obj_fd = open(obj_file.c_str(), O_RDONLY);
370   if (obj_fd < 0) {
371     fprintf(stderr, "Failed to open ELF file '%s': %s\n",
372             obj_file.c_str(), strerror(errno));
373     return false;
374   }
375   FDWrapper obj_fd_wrapper(obj_fd);
376   struct stat st;
377   if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
378     fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
379             obj_file.c_str(), strerror(errno));
380     return false;
381   }
382   void* obj_base = mmap(NULL, st.st_size,
383                         PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
384   if (obj_base == MAP_FAILED) {
385     fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
386             obj_file.c_str(), strerror(errno));
387     return false;
388   }
389   map_wrapper->set(obj_base, st.st_size);
390   *elf_header = obj_base;
391   if (!IsValidElf(*elf_header)) {
392     fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
393     return false;
394   }
395   return true;
396 }
397 
398 // Get the endianness of ELF_HEADER. If it's invalid, return false.
399 template<typename ElfClass>
ElfEndianness(const typename ElfClass::Ehdr * elf_header,bool * big_endian)400 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
401                    bool* big_endian) {
402   if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
403     *big_endian = false;
404     return true;
405   }
406   if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
407     *big_endian = true;
408     return true;
409   }
410 
411   fprintf(stderr, "bad data encoding in ELF header: %d\n",
412           elf_header->e_ident[EI_DATA]);
413   return false;
414 }
415 
416 // Given |left_abspath|, find the absolute path for |right_path| and see if the
417 // two absolute paths are the same.
IsSameFile(const char * left_abspath,const string & right_path)418 bool IsSameFile(const char* left_abspath, const string& right_path) {
419   char right_abspath[PATH_MAX];
420   if (!realpath(right_path.c_str(), right_abspath))
421     return false;
422   return strcmp(left_abspath, right_abspath) == 0;
423 }
424 
425 // Read the .gnu_debuglink and get the debug file name. If anything goes
426 // wrong, return an empty string.
ReadDebugLink(const char * debuglink,const size_t debuglink_size,const bool big_endian,const string & obj_file,const std::vector<string> & debug_dirs)427 string ReadDebugLink(const char* debuglink,
428                      const size_t debuglink_size,
429                      const bool big_endian,
430                      const string& obj_file,
431                      const std::vector<string>& debug_dirs) {
432   size_t debuglink_len = strlen(debuglink) + 5;  // Include '\0' + CRC32.
433   debuglink_len = 4 * ((debuglink_len + 3) / 4);  // Round up to 4 bytes.
434 
435   // Sanity check.
436   if (debuglink_len != debuglink_size) {
437     fprintf(stderr, "Mismatched .gnu_debuglink string / section size: "
438             "%zx %zx\n", debuglink_len, debuglink_size);
439     return string();
440   }
441 
442   char obj_file_abspath[PATH_MAX];
443   if (!realpath(obj_file.c_str(), obj_file_abspath)) {
444     fprintf(stderr, "Cannot resolve absolute path for %s\n", obj_file.c_str());
445     return string();
446   }
447 
448   std::vector<string> searched_paths;
449   string debuglink_path;
450   std::vector<string>::const_iterator it;
451   for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) {
452     const string& debug_dir = *it;
453     debuglink_path = debug_dir + "/" + debuglink;
454 
455     // There is the annoying case of /path/to/foo.so having foo.so as the
456     // debug link file name. Thus this may end up opening /path/to/foo.so again,
457     // and there is a small chance of the two files having the same CRC.
458     if (IsSameFile(obj_file_abspath, debuglink_path))
459       continue;
460 
461     searched_paths.push_back(debug_dir);
462     int debuglink_fd = open(debuglink_path.c_str(), O_RDONLY);
463     if (debuglink_fd < 0)
464       continue;
465 
466     FDWrapper debuglink_fd_wrapper(debuglink_fd);
467 
468     // The CRC is the last 4 bytes in |debuglink|.
469     const dwarf2reader::Endianness endianness = big_endian ?
470         dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
471     dwarf2reader::ByteReader byte_reader(endianness);
472     uint32_t expected_crc =
473         byte_reader.ReadFourBytes(&debuglink[debuglink_size - 4]);
474 
475     uint32_t actual_crc = 0;
476     while (true) {
477       const size_t kReadSize = 4096;
478       char buf[kReadSize];
479       ssize_t bytes_read = HANDLE_EINTR(read(debuglink_fd, &buf, kReadSize));
480       if (bytes_read < 0) {
481         fprintf(stderr, "Error reading debug ELF file %s.\n",
482                 debuglink_path.c_str());
483         return string();
484       }
485       if (bytes_read == 0)
486         break;
487       actual_crc = google_breakpad::UpdateCrc32(actual_crc, buf, bytes_read);
488     }
489     if (actual_crc != expected_crc) {
490       fprintf(stderr, "Error reading debug ELF file - CRC32 mismatch: %s\n",
491               debuglink_path.c_str());
492       continue;
493     }
494 
495     // Found debug file.
496     return debuglink_path;
497   }
498 
499   // Not found case.
500   fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n",
501           obj_file.c_str());
502   for (it = searched_paths.begin(); it < searched_paths.end(); ++it) {
503     const string& debug_dir = *it;
504     fprintf(stderr, "  %s/%s\n", debug_dir.c_str(), debuglink);
505   }
506   return string();
507 }
508 
509 //
510 // LoadSymbolsInfo
511 //
512 // Holds the state between the two calls to LoadSymbols() in case it's necessary
513 // to follow the .gnu_debuglink section and load debug information from a
514 // different file.
515 //
516 template<typename ElfClass>
517 class LoadSymbolsInfo {
518  public:
519   typedef typename ElfClass::Addr Addr;
520 
LoadSymbolsInfo(const std::vector<string> & dbg_dirs)521   explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) :
522     debug_dirs_(dbg_dirs),
523     has_loading_addr_(false) {}
524 
525   // Keeps track of which sections have been loaded so sections don't
526   // accidentally get loaded twice from two different files.
LoadedSection(const string & section)527   void LoadedSection(const string &section) {
528     if (loaded_sections_.count(section) == 0) {
529       loaded_sections_.insert(section);
530     } else {
531       fprintf(stderr, "Section %s has already been loaded.\n",
532               section.c_str());
533     }
534   }
535 
536   // The ELF file and linked debug file are expected to have the same preferred
537   // loading address.
set_loading_addr(Addr addr,const string & filename)538   void set_loading_addr(Addr addr, const string &filename) {
539     if (!has_loading_addr_) {
540       loading_addr_ = addr;
541       loaded_file_ = filename;
542       return;
543     }
544 
545     if (addr != loading_addr_) {
546       fprintf(stderr,
547               "ELF file '%s' and debug ELF file '%s' "
548               "have different load addresses.\n",
549               loaded_file_.c_str(), filename.c_str());
550       assert(false);
551     }
552   }
553 
554   // Setters and getters
debug_dirs() const555   const std::vector<string>& debug_dirs() const {
556     return debug_dirs_;
557   }
558 
debuglink_file() const559   string debuglink_file() const {
560     return debuglink_file_;
561   }
set_debuglink_file(string file)562   void set_debuglink_file(string file) {
563     debuglink_file_ = file;
564   }
565 
566  private:
567   const std::vector<string>& debug_dirs_; // Directories in which to
568                                           // search for the debug ELF file.
569 
570   string debuglink_file_;  // Full path to the debug ELF file.
571 
572   bool has_loading_addr_;  // Indicate if LOADING_ADDR_ is valid.
573 
574   Addr loading_addr_;  // Saves the preferred loading address from the
575                        // first call to LoadSymbols().
576 
577   string loaded_file_;  // Name of the file loaded from the first call to
578                         // LoadSymbols().
579 
580   std::set<string> loaded_sections_;  // Tracks the Loaded ELF sections
581                                       // between calls to LoadSymbols().
582 };
583 
584 template<typename ElfClass>
LoadSymbols(const string & obj_file,const bool big_endian,const typename ElfClass::Ehdr * elf_header,const bool read_gnu_debug_link,LoadSymbolsInfo<ElfClass> * info,const DumpOptions & options,Module * module)585 bool LoadSymbols(const string& obj_file,
586                  const bool big_endian,
587                  const typename ElfClass::Ehdr* elf_header,
588                  const bool read_gnu_debug_link,
589                  LoadSymbolsInfo<ElfClass>* info,
590                  const DumpOptions& options,
591                  Module* module) {
592   typedef typename ElfClass::Addr Addr;
593   typedef typename ElfClass::Phdr Phdr;
594   typedef typename ElfClass::Shdr Shdr;
595   typedef typename ElfClass::Word Word;
596 
597   Addr loading_addr = GetLoadingAddress<ElfClass>(
598       GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
599       elf_header->e_phnum);
600   module->SetLoadAddress(loading_addr);
601   info->set_loading_addr(loading_addr, obj_file);
602 
603   Word debug_section_type =
604       elf_header->e_machine == EM_MIPS ? SHT_MIPS_DWARF : SHT_PROGBITS;
605   const Shdr* sections =
606       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
607   const Shdr* section_names = sections + elf_header->e_shstrndx;
608   const char* names =
609       GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
610   const char *names_end = names + section_names->sh_size;
611   bool found_debug_info_section = false;
612   bool found_usable_info = false;
613 
614   if (options.symbol_data != ONLY_CFI) {
615 #ifndef NO_STABS_SUPPORT
616     // Look for STABS debugging information, and load it if present.
617     const Shdr* stab_section =
618       FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS,
619                                      sections, names, names_end,
620                                      elf_header->e_shnum);
621     if (stab_section) {
622       const Shdr* stabstr_section = stab_section->sh_link + sections;
623       if (stabstr_section) {
624         found_debug_info_section = true;
625         found_usable_info = true;
626         info->LoadedSection(".stab");
627         if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section,
628                                  big_endian, module)) {
629           fprintf(stderr, "%s: \".stab\" section found, but failed to load"
630                   " STABS debugging information\n", obj_file.c_str());
631         }
632       }
633     }
634 #endif  // NO_STABS_SUPPORT
635 
636     // Look for DWARF debugging information, and load it if present.
637     const Shdr* dwarf_section =
638       FindElfSectionByName<ElfClass>(".debug_info", debug_section_type,
639                                      sections, names, names_end,
640                                      elf_header->e_shnum);
641     if (dwarf_section) {
642       found_debug_info_section = true;
643       found_usable_info = true;
644       info->LoadedSection(".debug_info");
645       if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian,
646                                options.handle_inter_cu_refs, module)) {
647         fprintf(stderr, "%s: \".debug_info\" section found, but failed to load "
648                 "DWARF debugging information\n", obj_file.c_str());
649       }
650     }
651 
652     // See if there are export symbols available.
653     const Shdr* dynsym_section =
654       FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM,
655                                      sections, names, names_end,
656                                      elf_header->e_shnum);
657     const Shdr* dynstr_section =
658       FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB,
659                                      sections, names, names_end,
660                                      elf_header->e_shnum);
661     if (dynsym_section && dynstr_section) {
662       info->LoadedSection(".dynsym");
663 
664       const uint8_t* dynsyms =
665           GetOffset<ElfClass, uint8_t>(elf_header,
666                                        dynsym_section->sh_offset);
667       const uint8_t* dynstrs =
668           GetOffset<ElfClass, uint8_t>(elf_header,
669                                        dynstr_section->sh_offset);
670       bool result =
671           ELFSymbolsToModule(dynsyms,
672                              dynsym_section->sh_size,
673                              dynstrs,
674                              dynstr_section->sh_size,
675                              big_endian,
676                              ElfClass::kAddrSize,
677                              module);
678       found_usable_info = found_usable_info || result;
679     }
680   }
681 
682   if (options.symbol_data != NO_CFI) {
683     // Dwarf Call Frame Information (CFI) is actually independent from
684     // the other DWARF debugging information, and can be used alone.
685     const Shdr* dwarf_cfi_section =
686         FindElfSectionByName<ElfClass>(".debug_frame", debug_section_type,
687                                        sections, names, names_end,
688                                        elf_header->e_shnum);
689     if (dwarf_cfi_section) {
690       // Ignore the return value of this function; even without call frame
691       // information, the other debugging information could be perfectly
692       // useful.
693       info->LoadedSection(".debug_frame");
694       bool result =
695           LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
696                                  dwarf_cfi_section, false, 0, 0, big_endian,
697                                  module);
698       found_usable_info = found_usable_info || result;
699     }
700 
701     // Linux C++ exception handling information can also provide
702     // unwinding data.
703     const Shdr* eh_frame_section =
704         FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
705                                        sections, names, names_end,
706                                        elf_header->e_shnum);
707     if (eh_frame_section) {
708       // Pointers in .eh_frame data may be relative to the base addresses of
709       // certain sections. Provide those sections if present.
710       const Shdr* got_section =
711           FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
712                                          sections, names, names_end,
713                                          elf_header->e_shnum);
714       const Shdr* text_section =
715           FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
716                                          sections, names, names_end,
717                                          elf_header->e_shnum);
718       info->LoadedSection(".eh_frame");
719       // As above, ignore the return value of this function.
720       bool result =
721           LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
722                                  eh_frame_section, true,
723                                  got_section, text_section, big_endian, module);
724       found_usable_info = found_usable_info || result;
725     }
726   }
727 
728   if (!found_debug_info_section) {
729     fprintf(stderr, "%s: file contains no debugging information"
730             " (no \".stab\" or \".debug_info\" sections)\n",
731             obj_file.c_str());
732 
733     // Failed, but maybe there's a .gnu_debuglink section?
734     if (read_gnu_debug_link) {
735       const Shdr* gnu_debuglink_section
736           = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS,
737                                            sections, names,
738                                            names_end, elf_header->e_shnum);
739       if (gnu_debuglink_section) {
740         if (!info->debug_dirs().empty()) {
741           const char* debuglink_contents =
742               GetOffset<ElfClass, char>(elf_header,
743                                         gnu_debuglink_section->sh_offset);
744           string debuglink_file =
745               ReadDebugLink(debuglink_contents,
746                             gnu_debuglink_section->sh_size,
747                             big_endian,
748                             obj_file,
749                             info->debug_dirs());
750           info->set_debuglink_file(debuglink_file);
751         } else {
752           fprintf(stderr, ".gnu_debuglink section found in '%s', "
753                   "but no debug path specified.\n", obj_file.c_str());
754         }
755       } else {
756         fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n",
757                 obj_file.c_str());
758       }
759     } else {
760       // Return true if some usable information was found, since the caller
761       // doesn't want to use .gnu_debuglink.
762       return found_usable_info;
763     }
764 
765     // No debug info was found, let the user try again with .gnu_debuglink
766     // if present.
767     return false;
768   }
769 
770   return true;
771 }
772 
773 // Return the breakpad symbol file identifier for the architecture of
774 // ELF_HEADER.
775 template<typename ElfClass>
ElfArchitecture(const typename ElfClass::Ehdr * elf_header)776 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
777   typedef typename ElfClass::Half Half;
778   Half arch = elf_header->e_machine;
779   switch (arch) {
780     case EM_386:        return "x86";
781     case EM_ARM:        return "arm";
782     case EM_AARCH64:    return "arm64";
783     case EM_MIPS:       return "mips";
784     case EM_PPC64:      return "ppc64";
785     case EM_PPC:        return "ppc";
786     case EM_S390:       return "s390";
787     case EM_SPARC:      return "sparc";
788     case EM_SPARCV9:    return "sparcv9";
789     case EM_X86_64:     return "x86_64";
790     default: return NULL;
791   }
792 }
793 
794 // Format the Elf file identifier in IDENTIFIER as a UUID with the
795 // dashes removed.
FormatIdentifier(unsigned char identifier[16])796 string FormatIdentifier(unsigned char identifier[16]) {
797   char identifier_str[40];
798   google_breakpad::FileID::ConvertIdentifierToString(
799       identifier,
800       identifier_str,
801       sizeof(identifier_str));
802   string id_no_dash;
803   for (int i = 0; identifier_str[i] != '\0'; ++i)
804     if (identifier_str[i] != '-')
805       id_no_dash += identifier_str[i];
806   // Add an extra "0" by the end.  PDB files on Windows have an 'age'
807   // number appended to the end of the file identifier; this isn't
808   // really used or necessary on other platforms, but be consistent.
809   id_no_dash += '0';
810   return id_no_dash;
811 }
812 
813 // Return the non-directory portion of FILENAME: the portion after the
814 // last slash, or the whole filename if there are no slashes.
BaseFileName(const string & filename)815 string BaseFileName(const string &filename) {
816   // Lots of copies!  basename's behavior is less than ideal.
817   char* c_filename = strdup(filename.c_str());
818   string base = basename(c_filename);
819   free(c_filename);
820   return base;
821 }
822 
823 template<typename ElfClass>
SanitizeDebugFile(const typename ElfClass::Ehdr * debug_elf_header,const string & debuglink_file,const string & obj_filename,const char * obj_file_architecture,const bool obj_file_is_big_endian)824 bool SanitizeDebugFile(const typename ElfClass::Ehdr* debug_elf_header,
825                        const string& debuglink_file,
826                        const string& obj_filename,
827                        const char* obj_file_architecture,
828                        const bool obj_file_is_big_endian) {
829   const char* debug_architecture =
830       ElfArchitecture<ElfClass>(debug_elf_header);
831   if (!debug_architecture) {
832     fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
833             debuglink_file.c_str(), debug_elf_header->e_machine);
834     return false;
835   }
836   if (strcmp(obj_file_architecture, debug_architecture)) {
837     fprintf(stderr, "%s with ELF machine architecture %s does not match "
838             "%s with ELF architecture %s\n",
839             debuglink_file.c_str(), debug_architecture,
840             obj_filename.c_str(), obj_file_architecture);
841     return false;
842   }
843   bool debug_big_endian;
844   if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
845     return false;
846   if (debug_big_endian != obj_file_is_big_endian) {
847     fprintf(stderr, "%s and %s does not match in endianness\n",
848             obj_filename.c_str(), debuglink_file.c_str());
849     return false;
850   }
851   return true;
852 }
853 
854 template<typename ElfClass>
ReadSymbolDataElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** out_module)855 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
856                             const string& obj_filename,
857                             const std::vector<string>& debug_dirs,
858                             const DumpOptions& options,
859                             Module** out_module) {
860   typedef typename ElfClass::Ehdr Ehdr;
861   typedef typename ElfClass::Shdr Shdr;
862 
863   *out_module = NULL;
864 
865   unsigned char identifier[16];
866   if (!google_breakpad::FileID::ElfFileIdentifierFromMappedFile(elf_header,
867                                                                 identifier)) {
868     fprintf(stderr, "%s: unable to generate file identifier\n",
869             obj_filename.c_str());
870     return false;
871   }
872 
873   const char *architecture = ElfArchitecture<ElfClass>(elf_header);
874   if (!architecture) {
875     fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
876             obj_filename.c_str(), elf_header->e_machine);
877     return false;
878   }
879 
880   // Figure out what endianness this file is.
881   bool big_endian;
882   if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
883     return false;
884 
885   string name = BaseFileName(obj_filename);
886   string os = "Linux";
887   string id = FormatIdentifier(identifier);
888 
889   LoadSymbolsInfo<ElfClass> info(debug_dirs);
890   scoped_ptr<Module> module(new Module(name, os, architecture, id));
891   if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
892                              !debug_dirs.empty(), &info,
893                              options, module.get())) {
894     const string debuglink_file = info.debuglink_file();
895     if (debuglink_file.empty())
896       return false;
897 
898     // Load debuglink ELF file.
899     fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
900     MmapWrapper debug_map_wrapper;
901     Ehdr* debug_elf_header = NULL;
902     if (!LoadELF(debuglink_file, &debug_map_wrapper,
903                  reinterpret_cast<void**>(&debug_elf_header)) ||
904         !SanitizeDebugFile<ElfClass>(debug_elf_header, debuglink_file,
905                                      obj_filename, architecture, big_endian)) {
906       return false;
907     }
908 
909     if (!LoadSymbols<ElfClass>(debuglink_file, big_endian,
910                                debug_elf_header, false, &info,
911                                options, module.get())) {
912       return false;
913     }
914   }
915 
916   *out_module = module.release();
917   return true;
918 }
919 
920 }  // namespace
921 
922 namespace google_breakpad {
923 
924 // Not explicitly exported, but not static so it can be used in unit tests.
ReadSymbolDataInternal(const uint8_t * obj_file,const string & obj_filename,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)925 bool ReadSymbolDataInternal(const uint8_t* obj_file,
926                             const string& obj_filename,
927                             const std::vector<string>& debug_dirs,
928                             const DumpOptions& options,
929                             Module** module) {
930   if (!IsValidElf(obj_file)) {
931     fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
932     return false;
933   }
934 
935   int elfclass = ElfClass(obj_file);
936   if (elfclass == ELFCLASS32) {
937     return ReadSymbolDataElfClass<ElfClass32>(
938         reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs,
939         options, module);
940   }
941   if (elfclass == ELFCLASS64) {
942     return ReadSymbolDataElfClass<ElfClass64>(
943         reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs,
944         options, module);
945   }
946 
947   return false;
948 }
949 
WriteSymbolFile(const string & obj_file,const std::vector<string> & debug_dirs,const DumpOptions & options,std::ostream & sym_stream)950 bool WriteSymbolFile(const string &obj_file,
951                      const std::vector<string>& debug_dirs,
952                      const DumpOptions& options,
953                      std::ostream &sym_stream) {
954   Module* module;
955   if (!ReadSymbolData(obj_file, debug_dirs, options, &module))
956     return false;
957 
958   bool result = module->Write(sym_stream, options.symbol_data);
959   delete module;
960   return result;
961 }
962 
ReadSymbolData(const string & obj_file,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)963 bool ReadSymbolData(const string& obj_file,
964                     const std::vector<string>& debug_dirs,
965                     const DumpOptions& options,
966                     Module** module) {
967   MmapWrapper map_wrapper;
968   void* elf_header = NULL;
969   if (!LoadELF(obj_file, &map_wrapper, &elf_header))
970     return false;
971 
972   return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
973                                 obj_file, debug_dirs, options, module);
974 }
975 
976 }  // namespace google_breakpad
977