1 // -*- mode: c++ -*-
2 
3 // Copyright (c) 2011, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 
32 // Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33 
34 // dump_syms.h: Declaration of google_breakpad::DumpSymbols, a class for
35 // reading debugging information from Mach-O files and writing it out as a
36 // Breakpad symbol file.
37 
38 #include <Foundation/Foundation.h>
39 #include <mach-o/loader.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 
43 #include <ostream>
44 #include <string>
45 #include <vector>
46 
47 #include "common/byte_cursor.h"
48 #include "common/mac/macho_reader.h"
49 #include "common/module.h"
50 #include "common/symbol_data.h"
51 
52 namespace google_breakpad {
53 
54 class DumpSymbols {
55  public:
56   DumpSymbols(SymbolData symbol_data, bool handle_inter_cu_refs)
57       : symbol_data_(symbol_data),
58         handle_inter_cu_refs_(handle_inter_cu_refs),
59         input_pathname_(),
60         object_filename_(),
61         contents_(),
62         selected_object_file_(),
63         selected_object_name_() { }
64   ~DumpSymbols() {
65     [input_pathname_ release];
66     [object_filename_ release];
67     [contents_ release];
68   }
69 
70   // Prepare to read debugging information from |filename|. |filename| may be
71   // the name of a universal binary, a Mach-O file, or a dSYM bundle
72   // containing either of the above. On success, return true; if there is a
73   // problem reading |filename|, report it and return false.
74   //
75   // (This class uses NSString for filenames and related values,
76   // because the Mac Foundation framework seems to support
77   // filename-related operations more fully on NSString values.)
78   bool Read(NSString *filename);
79 
80   // If this dumper's file includes an object file for |cpu_type| and
81   // |cpu_subtype|, then select that object file for dumping, and return
82   // true. Otherwise, return false, and leave this dumper's selected
83   // architecture unchanged.
84   //
85   // By default, if this dumper's file contains only one object file, then
86   // the dumper will dump those symbols; and if it contains more than one
87   // object file, then the dumper will dump the object file whose
88   // architecture matches that of this dumper program.
89   bool SetArchitecture(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype);
90 
91   // If this dumper's file includes an object file for |arch_name|, then select
92   // that object file for dumping, and return true. Otherwise, return false,
93   // and leave this dumper's selected architecture unchanged.
94   //
95   // By default, if this dumper's file contains only one object file, then
96   // the dumper will dump those symbols; and if it contains more than one
97   // object file, then the dumper will dump the object file whose
98   // architecture matches that of this dumper program.
99   bool SetArchitecture(const std::string &arch_name);
100 
101   // Return a pointer to an array of 'struct fat_arch' structures,
102   // describing the object files contained in this dumper's file. Set
103   // *|count| to the number of elements in the array. The returned array is
104   // owned by this DumpSymbols instance.
105   //
106   // If there are no available architectures, this function
107   // may return NULL.
108   const struct fat_arch *AvailableArchitectures(size_t *count) {
109     *count = object_files_.size();
110     if (object_files_.size() > 0)
111       return &object_files_[0];
112     return NULL;
113   }
114 
115   // Read the selected object file's debugging information, and write it out to
116   // |stream|. Return true on success; if an error occurs, report it and
117   // return false.
118   bool WriteSymbolFile(std::ostream &stream);
119 
120   // As above, but simply return the debugging information in module
121   // instead of writing it to a stream. The caller owns the resulting
122   // module object and must delete it when finished.
123   bool ReadSymbolData(Module** module);
124 
125  private:
126   // Used internally.
127   class DumperLineToModule;
128   class LoadCommandDumper;
129 
130   // Return an identifier string for the file this DumpSymbols is dumping.
131   std::string Identifier();
132 
133   // Read debugging information from |dwarf_sections|, which was taken from
134   // |macho_reader|, and add it to |module|. On success, return true;
135   // on failure, report the problem and return false.
136   bool ReadDwarf(google_breakpad::Module *module,
137                  const mach_o::Reader &macho_reader,
138                  const mach_o::SectionMap &dwarf_sections,
139                  bool handle_inter_cu_refs) const;
140 
141   // Read DWARF CFI or .eh_frame data from |section|, belonging to
142   // |macho_reader|, and record it in |module|.  If |eh_frame| is true,
143   // then the data is .eh_frame-format data; otherwise, it is standard DWARF
144   // .debug_frame data. On success, return true; on failure, report
145   // the problem and return false.
146   bool ReadCFI(google_breakpad::Module *module,
147                const mach_o::Reader &macho_reader,
148                const mach_o::Section &section,
149                bool eh_frame) const;
150 
151   // The selection of what type of symbol data to read/write.
152   const SymbolData symbol_data_;
153 
154   // Whether to handle references between compilation units.
155   const bool handle_inter_cu_refs_;
156 
157   // The name of the file or bundle whose symbols this will dump.
158   // This is the path given to Read, for use in error messages.
159   NSString *input_pathname_;
160 
161   // The name of the file this DumpSymbols will actually read debugging
162   // information from. Normally, this is the same as input_pathname_, but if
163   // filename refers to a dSYM bundle, then this is the resource file
164   // within that bundle.
165   NSString *object_filename_;
166 
167   // The complete contents of object_filename_, mapped into memory.
168   NSData *contents_;
169 
170   // A vector of fat_arch structures describing the object files
171   // object_filename_ contains. If object_filename_ refers to a fat binary,
172   // this may have more than one element; if it refers to a Mach-O file, this
173   // has exactly one element.
174   vector<struct fat_arch> object_files_;
175 
176   // The object file in object_files_ selected to dump, or NULL if
177   // SetArchitecture hasn't been called yet.
178   const struct fat_arch *selected_object_file_;
179 
180   // A string that identifies the selected object file, for use in error
181   // messages.  This is usually object_filename_, but if that refers to a
182   // fat binary, it includes an indication of the particular architecture
183   // within that binary.
184   string selected_object_name_;
185 };
186 
187 }  // namespace google_breakpad
188