1// -*- mode: c++ -*- 2 3// Copyright (c) 2011, Google Inc. 4// All rights reserved. 5// 6// Redistribution and use in source and binary forms, with or without 7// modification, are permitted provided that the following conditions are 8// met: 9// 10// * Redistributions of source code must retain the above copyright 11// notice, this list of conditions and the following disclaimer. 12// * Redistributions in binary form must reproduce the above 13// copyright notice, this list of conditions and the following disclaimer 14// in the documentation and/or other materials provided with the 15// distribution. 16// * Neither the name of Google Inc. nor the names of its 17// contributors may be used to endorse or promote products derived from 18// this software without specific prior written permission. 19// 20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 32// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 33 34// dump_syms.mm: Create a symbol file for use with minidumps 35 36#include "common/mac/dump_syms.h" 37 38#include <Foundation/Foundation.h> 39#include <mach-o/arch.h> 40#include <mach-o/fat.h> 41#include <stdio.h> 42 43#include <ostream> 44#include <string> 45#include <vector> 46 47#include "common/dwarf/bytereader-inl.h" 48#include "common/dwarf/dwarf2reader.h" 49#include "common/dwarf_cfi_to_module.h" 50#include "common/dwarf_cu_to_module.h" 51#include "common/dwarf_line_to_module.h" 52#include "common/mac/file_id.h" 53#include "common/mac/arch_utilities.h" 54#include "common/mac/macho_reader.h" 55#include "common/module.h" 56#include "common/scoped_ptr.h" 57#include "common/stabs_reader.h" 58#include "common/stabs_to_module.h" 59#include "common/symbol_data.h" 60 61#ifndef CPU_TYPE_ARM 62#define CPU_TYPE_ARM (static_cast<cpu_type_t>(12)) 63#endif // CPU_TYPE_ARM 64 65#ifndef CPU_TYPE_ARM64 66#define CPU_TYPE_ARM64 (static_cast<cpu_type_t>(16777228)) 67#endif // CPU_TYPE_ARM64 68 69using dwarf2reader::ByteReader; 70using google_breakpad::DwarfCUToModule; 71using google_breakpad::DwarfLineToModule; 72using google_breakpad::FileID; 73using google_breakpad::mach_o::FatReader; 74using google_breakpad::mach_o::Section; 75using google_breakpad::mach_o::Segment; 76using google_breakpad::Module; 77using google_breakpad::StabsReader; 78using google_breakpad::StabsToModule; 79using google_breakpad::scoped_ptr; 80using std::make_pair; 81using std::pair; 82using std::string; 83using std::vector; 84 85namespace google_breakpad { 86 87bool DumpSymbols::Read(NSString *filename) { 88 if (![[NSFileManager defaultManager] fileExistsAtPath:filename]) { 89 fprintf(stderr, "Object file does not exist: %s\n", 90 [filename fileSystemRepresentation]); 91 return false; 92 } 93 94 input_pathname_ = [filename retain]; 95 96 // Does this filename refer to a dSYM bundle? 97 NSBundle *bundle = [NSBundle bundleWithPath:input_pathname_]; 98 99 if (bundle) { 100 // Filenames referring to bundles usually have names of the form 101 // "<basename>.dSYM"; however, if the user has specified a wrapper 102 // suffix (the WRAPPER_SUFFIX and WRAPPER_EXTENSION build settings), 103 // then the name may have the form "<basename>.<extension>.dSYM". In 104 // either case, the resource name for the file containing the DWARF 105 // info within the bundle is <basename>. 106 // 107 // Since there's no way to tell how much to strip off, remove one 108 // extension at a time, and use the first one that 109 // pathForResource:ofType:inDirectory likes. 110 NSString *base_name = [input_pathname_ lastPathComponent]; 111 NSString *dwarf_resource; 112 113 do { 114 NSString *new_base_name = [base_name stringByDeletingPathExtension]; 115 116 // If stringByDeletingPathExtension returned the name unchanged, then 117 // there's nothing more for us to strip off --- lose. 118 if ([new_base_name isEqualToString:base_name]) { 119 fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n", 120 [input_pathname_ fileSystemRepresentation]); 121 return false; 122 } 123 124 // Take the shortened result as our new base_name. 125 base_name = new_base_name; 126 127 // Try to find a DWARF resource in the bundle under the new base_name. 128 dwarf_resource = [bundle pathForResource:base_name 129 ofType:nil inDirectory:@"DWARF"]; 130 } while (!dwarf_resource); 131 132 object_filename_ = [dwarf_resource retain]; 133 } else { 134 object_filename_ = [input_pathname_ retain]; 135 } 136 137 // Read the file's contents into memory. 138 // 139 // The documentation for dataWithContentsOfMappedFile says: 140 // 141 // Because of file mapping restrictions, this method should only be 142 // used if the file is guaranteed to exist for the duration of the 143 // data object’s existence. It is generally safer to use the 144 // dataWithContentsOfFile: method. 145 // 146 // I gather this means that OS X doesn't have (or at least, that method 147 // doesn't use) a form of mapping like Linux's MAP_PRIVATE, where the 148 // process appears to get its own copy of the data, and changes to the 149 // file don't affect memory and vice versa). 150 NSError *error; 151 contents_ = [NSData dataWithContentsOfFile:object_filename_ 152 options:0 153 error:&error]; 154 if (!contents_) { 155 fprintf(stderr, "Error reading object file: %s: %s\n", 156 [object_filename_ fileSystemRepresentation], 157 [[error localizedDescription] UTF8String]); 158 return false; 159 } 160 [contents_ retain]; 161 162 // Get the list of object files present in the file. 163 FatReader::Reporter fat_reporter([object_filename_ 164 fileSystemRepresentation]); 165 FatReader fat_reader(&fat_reporter); 166 if (!fat_reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]), 167 [contents_ length])) { 168 return false; 169 } 170 171 // Get our own copy of fat_reader's object file list. 172 size_t object_files_count; 173 const struct fat_arch *object_files = 174 fat_reader.object_files(&object_files_count); 175 if (object_files_count == 0) { 176 fprintf(stderr, "Fat binary file contains *no* architectures: %s\n", 177 [object_filename_ fileSystemRepresentation]); 178 return false; 179 } 180 object_files_.resize(object_files_count); 181 memcpy(&object_files_[0], object_files, 182 sizeof(struct fat_arch) * object_files_count); 183 184 return true; 185} 186 187bool DumpSymbols::SetArchitecture(cpu_type_t cpu_type, 188 cpu_subtype_t cpu_subtype) { 189 // Find the best match for the architecture the user requested. 190 const struct fat_arch *best_match 191 = NXFindBestFatArch(cpu_type, cpu_subtype, &object_files_[0], 192 static_cast<uint32_t>(object_files_.size())); 193 if (!best_match) return false; 194 195 // Record the selected object file. 196 selected_object_file_ = best_match; 197 return true; 198} 199 200bool DumpSymbols::SetArchitecture(const std::string &arch_name) { 201 bool arch_set = false; 202 const NXArchInfo *arch_info = 203 google_breakpad::BreakpadGetArchInfoFromName(arch_name.c_str()); 204 if (arch_info) { 205 arch_set = SetArchitecture(arch_info->cputype, arch_info->cpusubtype); 206 } 207 return arch_set; 208} 209 210string DumpSymbols::Identifier() { 211 FileID file_id([object_filename_ fileSystemRepresentation]); 212 unsigned char identifier_bytes[16]; 213 cpu_type_t cpu_type = selected_object_file_->cputype; 214 cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype; 215 if (!file_id.MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) { 216 fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n", 217 [object_filename_ fileSystemRepresentation]); 218 return ""; 219 } 220 221 char identifier_string[40]; 222 FileID::ConvertIdentifierToString(identifier_bytes, identifier_string, 223 sizeof(identifier_string)); 224 225 string compacted(identifier_string); 226 for(size_t i = compacted.find('-'); i != string::npos; 227 i = compacted.find('-', i)) 228 compacted.erase(i, 1); 229 230 return compacted; 231} 232 233// A line-to-module loader that accepts line number info parsed by 234// dwarf2reader::LineInfo and populates a Module and a line vector 235// with the results. 236class DumpSymbols::DumperLineToModule: 237 public DwarfCUToModule::LineToModuleHandler { 238 public: 239 // Create a line-to-module converter using BYTE_READER. 240 DumperLineToModule(dwarf2reader::ByteReader *byte_reader) 241 : byte_reader_(byte_reader) { } 242 243 void StartCompilationUnit(const string& compilation_dir) { 244 compilation_dir_ = compilation_dir; 245 } 246 247 void ReadProgram(const char *program, uint64 length, 248 Module *module, vector<Module::Line> *lines) { 249 DwarfLineToModule handler(module, compilation_dir_, lines); 250 dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler); 251 parser.Start(); 252 } 253 private: 254 string compilation_dir_; 255 dwarf2reader::ByteReader *byte_reader_; // WEAK 256}; 257 258bool DumpSymbols::ReadDwarf(google_breakpad::Module *module, 259 const mach_o::Reader &macho_reader, 260 const mach_o::SectionMap &dwarf_sections, 261 bool handle_inter_cu_refs) const { 262 // Build a byte reader of the appropriate endianness. 263 ByteReader byte_reader(macho_reader.big_endian() 264 ? dwarf2reader::ENDIANNESS_BIG 265 : dwarf2reader::ENDIANNESS_LITTLE); 266 267 // Construct a context for this file. 268 DwarfCUToModule::FileContext file_context(selected_object_name_, 269 module, 270 handle_inter_cu_refs); 271 272 // Build a dwarf2reader::SectionMap from our mach_o::SectionMap. 273 for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin(); 274 it != dwarf_sections.end(); ++it) { 275 file_context.AddSectionToSectionMap( 276 it->first, 277 reinterpret_cast<const char *>(it->second.contents.start), 278 it->second.contents.Size()); 279 } 280 281 // Find the __debug_info section. 282 dwarf2reader::SectionMap::const_iterator debug_info_entry = 283 file_context.section_map().find("__debug_info"); 284 assert(debug_info_entry != file_context.section_map().end()); 285 const std::pair<const char*, uint64>& debug_info_section = 286 debug_info_entry->second; 287 // There had better be a __debug_info section! 288 if (!debug_info_section.first) { 289 fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n", 290 selected_object_name_.c_str()); 291 return false; 292 } 293 294 // Build a line-to-module loader for the root handler to use. 295 DumperLineToModule line_to_module(&byte_reader); 296 297 // Walk the __debug_info section, one compilation unit at a time. 298 uint64 debug_info_length = debug_info_section.second; 299 for (uint64 offset = 0; offset < debug_info_length;) { 300 // Make a handler for the root DIE that populates MODULE with the 301 // debug info. 302 DwarfCUToModule::WarningReporter reporter(selected_object_name_, 303 offset); 304 DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter); 305 // Make a Dwarf2Handler that drives our DIEHandler. 306 dwarf2reader::DIEDispatcher die_dispatcher(&root_handler); 307 // Make a DWARF parser for the compilation unit at OFFSET. 308 dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map(), 309 offset, 310 &byte_reader, 311 &die_dispatcher); 312 // Process the entire compilation unit; get the offset of the next. 313 offset += dwarf_reader.Start(); 314 } 315 316 return true; 317} 318 319bool DumpSymbols::ReadCFI(google_breakpad::Module *module, 320 const mach_o::Reader &macho_reader, 321 const mach_o::Section §ion, 322 bool eh_frame) const { 323 // Find the appropriate set of register names for this file's 324 // architecture. 325 vector<string> register_names; 326 switch (macho_reader.cpu_type()) { 327 case CPU_TYPE_X86: 328 register_names = DwarfCFIToModule::RegisterNames::I386(); 329 break; 330 case CPU_TYPE_X86_64: 331 register_names = DwarfCFIToModule::RegisterNames::X86_64(); 332 break; 333 case CPU_TYPE_ARM: 334 register_names = DwarfCFIToModule::RegisterNames::ARM(); 335 break; 336 case CPU_TYPE_ARM64: 337 register_names = DwarfCFIToModule::RegisterNames::ARM64(); 338 break; 339 default: { 340 const NXArchInfo *arch = google_breakpad::BreakpadGetArchInfoFromCpuType( 341 macho_reader.cpu_type(), macho_reader.cpu_subtype()); 342 fprintf(stderr, "%s: cannot convert DWARF call frame information for ", 343 selected_object_name_.c_str()); 344 if (arch) 345 fprintf(stderr, "architecture '%s'", arch->name); 346 else 347 fprintf(stderr, "architecture %d,%d", 348 macho_reader.cpu_type(), macho_reader.cpu_subtype()); 349 fprintf(stderr, " to Breakpad symbol file: no register name table\n"); 350 return false; 351 } 352 } 353 354 // Find the call frame information and its size. 355 const char *cfi = reinterpret_cast<const char *>(section.contents.start); 356 size_t cfi_size = section.contents.Size(); 357 358 // Plug together the parser, handler, and their entourages. 359 DwarfCFIToModule::Reporter module_reporter(selected_object_name_, 360 section.section_name); 361 DwarfCFIToModule handler(module, register_names, &module_reporter); 362 dwarf2reader::ByteReader byte_reader(macho_reader.big_endian() ? 363 dwarf2reader::ENDIANNESS_BIG : 364 dwarf2reader::ENDIANNESS_LITTLE); 365 byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4); 366 // At the moment, according to folks at Apple and some cursory 367 // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so 368 // this is the only base address the CFI parser will need. 369 byte_reader.SetCFIDataBase(section.address, cfi); 370 371 dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(selected_object_name_, 372 section.section_name); 373 dwarf2reader::CallFrameInfo parser(cfi, cfi_size, 374 &byte_reader, &handler, &dwarf_reporter, 375 eh_frame); 376 parser.Start(); 377 return true; 378} 379 380// A LoadCommandHandler that loads whatever debugging data it finds into a 381// Module. 382class DumpSymbols::LoadCommandDumper: 383 public mach_o::Reader::LoadCommandHandler { 384 public: 385 // Create a load command dumper handling load commands from READER's 386 // file, and adding data to MODULE. 387 LoadCommandDumper(const DumpSymbols &dumper, 388 google_breakpad::Module *module, 389 const mach_o::Reader &reader, 390 SymbolData symbol_data, 391 bool handle_inter_cu_refs) 392 : dumper_(dumper), 393 module_(module), 394 reader_(reader), 395 symbol_data_(symbol_data), 396 handle_inter_cu_refs_(handle_inter_cu_refs) { } 397 398 bool SegmentCommand(const mach_o::Segment &segment); 399 bool SymtabCommand(const ByteBuffer &entries, const ByteBuffer &strings); 400 401 private: 402 const DumpSymbols &dumper_; 403 google_breakpad::Module *module_; // WEAK 404 const mach_o::Reader &reader_; 405 const SymbolData symbol_data_; 406 const bool handle_inter_cu_refs_; 407}; 408 409bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment &segment) { 410 mach_o::SectionMap section_map; 411 if (!reader_.MapSegmentSections(segment, §ion_map)) 412 return false; 413 414 if (segment.name == "__TEXT") { 415 module_->SetLoadAddress(segment.vmaddr); 416 if (symbol_data_ != NO_CFI) { 417 mach_o::SectionMap::const_iterator eh_frame = 418 section_map.find("__eh_frame"); 419 if (eh_frame != section_map.end()) { 420 // If there is a problem reading this, don't treat it as a fatal error. 421 dumper_.ReadCFI(module_, reader_, eh_frame->second, true); 422 } 423 } 424 return true; 425 } 426 427 if (segment.name == "__DWARF") { 428 if (symbol_data_ != ONLY_CFI) { 429 if (!dumper_.ReadDwarf(module_, reader_, section_map, 430 handle_inter_cu_refs_)) { 431 return false; 432 } 433 } 434 if (symbol_data_ != NO_CFI) { 435 mach_o::SectionMap::const_iterator debug_frame 436 = section_map.find("__debug_frame"); 437 if (debug_frame != section_map.end()) { 438 // If there is a problem reading this, don't treat it as a fatal error. 439 dumper_.ReadCFI(module_, reader_, debug_frame->second, false); 440 } 441 } 442 } 443 444 return true; 445} 446 447bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer &entries, 448 const ByteBuffer &strings) { 449 StabsToModule stabs_to_module(module_); 450 // Mac OS X STABS are never "unitized", and the size of the 'value' field 451 // matches the address size of the executable. 452 StabsReader stabs_reader(entries.start, entries.Size(), 453 strings.start, strings.Size(), 454 reader_.big_endian(), 455 reader_.bits_64() ? 8 : 4, 456 true, 457 &stabs_to_module); 458 if (!stabs_reader.Process()) 459 return false; 460 stabs_to_module.Finalize(); 461 return true; 462} 463 464bool DumpSymbols::ReadSymbolData(Module** out_module) { 465 // Select an object file, if SetArchitecture hasn't been called to set one 466 // explicitly. 467 if (!selected_object_file_) { 468 // If there's only one architecture, that's the one. 469 if (object_files_.size() == 1) 470 selected_object_file_ = &object_files_[0]; 471 else { 472 // Look for an object file whose architecture matches our own. 473 const NXArchInfo *local_arch = NXGetLocalArchInfo(); 474 if (!SetArchitecture(local_arch->cputype, local_arch->cpusubtype)) { 475 fprintf(stderr, "%s: object file contains more than one" 476 " architecture, none of which match the current" 477 " architecture; specify an architecture explicitly" 478 " with '-a ARCH' to resolve the ambiguity\n", 479 [object_filename_ fileSystemRepresentation]); 480 return false; 481 } 482 } 483 } 484 485 assert(selected_object_file_); 486 487 // Find the name of the selected file's architecture, to appear in 488 // the MODULE record and in error messages. 489 const NXArchInfo *selected_arch_info = 490 google_breakpad::BreakpadGetArchInfoFromCpuType( 491 selected_object_file_->cputype, selected_object_file_->cpusubtype); 492 493 const char *selected_arch_name = selected_arch_info->name; 494 if (strcmp(selected_arch_name, "i386") == 0) 495 selected_arch_name = "x86"; 496 497 // Produce a name to use in error messages that includes the 498 // filename, and the architecture, if there is more than one. 499 selected_object_name_ = [object_filename_ UTF8String]; 500 if (object_files_.size() > 1) { 501 selected_object_name_ += ", architecture "; 502 selected_object_name_ + selected_arch_name; 503 } 504 505 // Compute a module name, to appear in the MODULE record. 506 NSString *module_name = [object_filename_ lastPathComponent]; 507 508 // Choose an identifier string, to appear in the MODULE record. 509 string identifier = Identifier(); 510 if (identifier.empty()) 511 return false; 512 identifier += "0"; 513 514 // Create a module to hold the debugging information. 515 scoped_ptr<Module> module(new Module([module_name UTF8String], 516 "mac", 517 selected_arch_name, 518 identifier)); 519 520 // Parse the selected object file. 521 mach_o::Reader::Reporter reporter(selected_object_name_); 522 mach_o::Reader reader(&reporter); 523 if (!reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]) 524 + selected_object_file_->offset, 525 selected_object_file_->size, 526 selected_object_file_->cputype, 527 selected_object_file_->cpusubtype)) 528 return false; 529 530 // Walk its load commands, and deal with whatever is there. 531 LoadCommandDumper load_command_dumper(*this, module.get(), reader, 532 symbol_data_, handle_inter_cu_refs_); 533 if (!reader.WalkLoadCommands(&load_command_dumper)) 534 return false; 535 536 *out_module = module.release(); 537 538 return true; 539} 540 541bool DumpSymbols::WriteSymbolFile(std::ostream &stream) { 542 Module* module = NULL; 543 544 if (ReadSymbolData(&module) && module) { 545 bool res = module->Write(stream, symbol_data_); 546 delete module; 547 return res; 548 } 549 550 return false; 551} 552 553} // namespace google_breakpad 554