1 // -*- mode: c++ -*-
2 
3 // Copyright (c) 2010 Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 
32 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33 
34 // Add DWARF debugging information to a Breakpad symbol file. This
35 // file defines the DwarfCUToModule class, which accepts parsed DWARF
36 // data and populates a google_breakpad::Module with the results; the
37 // Module can then write its contents as a Breakpad symbol file.
38 
39 #ifndef COMMON_LINUX_DWARF_CU_TO_MODULE_H__
40 #define COMMON_LINUX_DWARF_CU_TO_MODULE_H__
41 
42 #include <stdint.h>
43 
44 #include <string>
45 
46 #include "common/language.h"
47 #include "common/module.h"
48 #include "common/dwarf/bytereader.h"
49 #include "common/dwarf/dwarf2diehandler.h"
50 #include "common/dwarf/dwarf2reader.h"
51 #include "common/scoped_ptr.h"
52 #include "common/using_std_string.h"
53 
54 namespace google_breakpad {
55 
56 using dwarf2reader::DwarfAttribute;
57 using dwarf2reader::DwarfForm;
58 using dwarf2reader::DwarfLanguage;
59 using dwarf2reader::DwarfTag;
60 
61 // Populate a google_breakpad::Module with DWARF debugging information.
62 //
63 // An instance of this class can be provided as a handler to a
64 // dwarf2reader::DIEDispatcher, which can in turn be a handler for a
65 // dwarf2reader::CompilationUnit DWARF parser. The handler uses the results
66 // of parsing to populate a google_breakpad::Module with source file,
67 // function, and source line information.
68 class DwarfCUToModule: public dwarf2reader::RootDIEHandler {
69   struct FilePrivate;
70  public:
71   // Information global to the DWARF-bearing file we are processing,
72   // for use by DwarfCUToModule. Each DwarfCUToModule instance deals
73   // with a single compilation unit within the file, but information
74   // global to the whole file is held here. The client is responsible
75   // for filling it in appropriately (except for the 'file_private'
76   // field, which the constructor and destructor take care of), and
77   // then providing it to the DwarfCUToModule instance for each
78   // compilation unit we process in that file. Set HANDLE_INTER_CU_REFS
79   // to true to handle debugging symbols with DW_FORM_ref_addr entries.
80   class FileContext {
81    public:
82     FileContext(const string &filename,
83                 Module *module,
84                 bool handle_inter_cu_refs);
85     ~FileContext();
86 
87     // Add CONTENTS of size LENGTH to the section map as NAME.
88     void AddSectionToSectionMap(const string& name,
89                                 const uint8_t *contents,
90                                 uint64_t length);
91 
92     // Clear the section map for testing.
93     void ClearSectionMapForTest();
94 
95     const dwarf2reader::SectionMap& section_map() const;
96 
97    private:
98     friend class DwarfCUToModule;
99 
100     // Clears all the Specifications if HANDLE_INTER_CU_REFS_ is false.
101     void ClearSpecifications();
102 
103     // Given an OFFSET and a CU that starts at COMPILATION_UNIT_START, returns
104     // true if this is an inter-compilation unit reference that is not being
105     // handled.
106     bool IsUnhandledInterCUReference(uint64_t offset,
107                                      uint64_t compilation_unit_start) const;
108 
109     // The name of this file, for use in error messages.
110     const string filename_;
111 
112     // A map of this file's sections, used for finding other DWARF
113     // sections that the .debug_info section may refer to.
114     dwarf2reader::SectionMap section_map_;
115 
116     // The Module to which we're contributing definitions.
117     Module *module_;
118 
119     // True if we are handling references between compilation units.
120     const bool handle_inter_cu_refs_;
121 
122     // Inter-compilation unit data used internally by the handlers.
123     scoped_ptr<FilePrivate> file_private_;
124   };
125 
126   // An abstract base class for handlers that handle DWARF range lists for
127   // DwarfCUToModule.
128   class RangesHandler {
129    public:
RangesHandler()130     RangesHandler() { }
~RangesHandler()131     virtual ~RangesHandler() { }
132 
133     // Called when finishing a function to populate the function's ranges.
134     // The ranges' entries are read starting from offset in the .debug_ranges
135     // section, base_address holds the base PC the range list values are
136     // offsets off. Return false if the rangelist falls out of the
137     // .debug_ranges section.
138     virtual bool ReadRanges(uint64_t offset, Module::Address base_address,
139                             vector<Module::Range>* ranges) = 0;
140   };
141 
142   // An abstract base class for handlers that handle DWARF line data
143   // for DwarfCUToModule. DwarfCUToModule could certainly just use
144   // dwarf2reader::LineInfo itself directly, but decoupling things
145   // this way makes unit testing a little easier.
146   class LineToModuleHandler {
147    public:
LineToModuleHandler()148     LineToModuleHandler() { }
~LineToModuleHandler()149     virtual ~LineToModuleHandler() { }
150 
151     // Called at the beginning of a new compilation unit, prior to calling
152     // ReadProgram(). compilation_dir will indicate the path that the
153     // current compilation unit was compiled in, consistent with the
154     // DW_AT_comp_dir DIE.
155     virtual void StartCompilationUnit(const string& compilation_dir) = 0;
156 
157     // Populate MODULE and LINES with source file names and code/line
158     // mappings, given a pointer to some DWARF line number data
159     // PROGRAM, and an overestimate of its size. Add no zero-length
160     // lines to LINES.
161     virtual void ReadProgram(const uint8_t *program, uint64_t length,
162                              Module *module, vector<Module::Line> *lines) = 0;
163   };
164 
165   // The interface DwarfCUToModule uses to report warnings. The member
166   // function definitions for this class write messages to stderr, but
167   // you can override them if you'd like to detect or report these
168   // conditions yourself.
169   class WarningReporter {
170    public:
171     // Warn about problems in the DWARF file FILENAME, in the
172     // compilation unit at OFFSET.
WarningReporter(const string & filename,uint64_t cu_offset)173     WarningReporter(const string &filename, uint64_t cu_offset)
174         : filename_(filename), cu_offset_(cu_offset), printed_cu_header_(false),
175           printed_unpaired_header_(false),
176           uncovered_warnings_enabled_(false) { }
~WarningReporter()177     virtual ~WarningReporter() { }
178 
179     // Set the name of the compilation unit we're processing to NAME.
SetCUName(const string & name)180     virtual void SetCUName(const string &name) { cu_name_ = name; }
181 
182     // Accessor and setter for uncovered_warnings_enabled_.
183     // UncoveredFunction and UncoveredLine only report a problem if that is
184     // true. By default, these warnings are disabled, because those
185     // conditions occur occasionally in healthy code.
uncovered_warnings_enabled()186     virtual bool uncovered_warnings_enabled() const {
187       return uncovered_warnings_enabled_;
188     }
set_uncovered_warnings_enabled(bool value)189     virtual void set_uncovered_warnings_enabled(bool value) {
190       uncovered_warnings_enabled_ = value;
191     }
192 
193     // A DW_AT_specification in the DIE at OFFSET refers to a DIE we
194     // haven't processed yet, or that wasn't marked as a declaration,
195     // at TARGET.
196     virtual void UnknownSpecification(uint64_t offset, uint64_t target);
197 
198     // A DW_AT_abstract_origin in the DIE at OFFSET refers to a DIE we
199     // haven't processed yet, or that wasn't marked as inline, at TARGET.
200     virtual void UnknownAbstractOrigin(uint64_t offset, uint64_t target);
201 
202     // We were unable to find the DWARF section named SECTION_NAME.
203     virtual void MissingSection(const string &section_name);
204 
205     // The CU's DW_AT_stmt_list offset OFFSET is bogus.
206     virtual void BadLineInfoOffset(uint64_t offset);
207 
208     // FUNCTION includes code covered by no line number data.
209     virtual void UncoveredFunction(const Module::Function &function);
210 
211     // Line number NUMBER in LINE_FILE, of length LENGTH, includes code
212     // covered by no function.
213     virtual void UncoveredLine(const Module::Line &line);
214 
215     // The DW_TAG_subprogram DIE at OFFSET has no name specified directly
216     // in the DIE, nor via a DW_AT_specification or DW_AT_abstract_origin
217     // link.
218     virtual void UnnamedFunction(uint64_t offset);
219 
220     // __cxa_demangle() failed to demangle INPUT.
221     virtual void DemangleError(const string &input);
222 
223     // The DW_FORM_ref_addr at OFFSET to TARGET was not handled because
224     // FilePrivate did not retain the inter-CU specification data.
225     virtual void UnhandledInterCUReference(uint64_t offset, uint64_t target);
226 
227     // The DW_AT_ranges at offset is malformed (truncated or outside of the
228     // .debug_ranges section's bound).
229     virtual void MalformedRangeList(uint64_t offset);
230 
231     // A DW_AT_ranges attribute was encountered but the no .debug_ranges
232     // section was found.
233     virtual void MissingRanges();
234 
cu_offset()235     uint64_t cu_offset() const {
236       return cu_offset_;
237     }
238 
239    protected:
240     const string filename_;
241     const uint64_t cu_offset_;
242     string cu_name_;
243     bool printed_cu_header_;
244     bool printed_unpaired_header_;
245     bool uncovered_warnings_enabled_;
246 
247    private:
248     // Print a per-CU heading, once.
249     void CUHeading();
250     // Print an unpaired function/line heading, once.
251     void UncoveredHeading();
252   };
253 
254   // Create a DWARF debugging info handler for a compilation unit
255   // within FILE_CONTEXT. This uses information received from the
256   // dwarf2reader::CompilationUnit DWARF parser to populate
257   // FILE_CONTEXT->module. Use LINE_READER to handle the compilation
258   // unit's line number data. Use REPORTER to report problems with the
259   // data we find.
260   DwarfCUToModule(FileContext *file_context,
261                   LineToModuleHandler *line_reader,
262                   RangesHandler *ranges_handler,
263                   WarningReporter *reporter);
264   ~DwarfCUToModule();
265 
266   void ProcessAttributeSigned(enum DwarfAttribute attr,
267                               enum DwarfForm form,
268                               int64_t data);
269   void ProcessAttributeUnsigned(enum DwarfAttribute attr,
270                                 enum DwarfForm form,
271                                 uint64_t data);
272   void ProcessAttributeString(enum DwarfAttribute attr,
273                               enum DwarfForm form,
274                               const string &data);
275   bool EndAttributes();
276   DIEHandler *FindChildHandler(uint64_t offset, enum DwarfTag tag);
277 
278   // Assign all our source Lines to the Functions that cover their
279   // addresses, and then add them to module_.
280   void Finish();
281 
282   bool StartCompilationUnit(uint64_t offset, uint8_t address_size,
283                             uint8_t offset_size, uint64_t cu_length,
284                             uint8_t dwarf_version);
285   bool StartRootDIE(uint64_t offset, enum DwarfTag tag);
286 
287  private:
288   // Used internally by the handler. Full definitions are in
289   // dwarf_cu_to_module.cc.
290   struct CUContext;
291   struct DIEContext;
292   struct Specification;
293   class GenericDIEHandler;
294   class FuncHandler;
295   class NamedScopeHandler;
296 
297   // A map from section offsets to specifications.
298   typedef map<uint64_t, Specification> SpecificationByOffset;
299 
300   // Set this compilation unit's source language to LANGUAGE.
301   void SetLanguage(DwarfLanguage language);
302 
303   // Read source line information at OFFSET in the .debug_line
304   // section.  Record source files in module_, but record source lines
305   // in lines_; we apportion them to functions in
306   // AssignLinesToFunctions.
307   void ReadSourceLines(uint64_t offset);
308 
309   // Assign the lines in lines_ to the individual line lists of the
310   // functions in functions_.  (DWARF line information maps an entire
311   // compilation unit at a time, and gives no indication of which
312   // lines belong to which functions, beyond their addresses.)
313   void AssignLinesToFunctions();
314 
315   // The only reason cu_context_ and child_context_ are pointers is
316   // that we want to keep their definitions private to
317   // dwarf_cu_to_module.cc, instead of listing them all here. They are
318   // owned by this DwarfCUToModule: the constructor sets them, and the
319   // destructor deletes them.
320 
321   // The handler to use to handle line number data.
322   LineToModuleHandler *line_reader_;
323 
324   // This compilation unit's context.
325   scoped_ptr<CUContext> cu_context_;
326 
327   // A context for our children.
328   scoped_ptr<DIEContext> child_context_;
329 
330   // True if this compilation unit has source line information.
331   bool has_source_line_info_;
332 
333   // The offset of this compilation unit's line number information in
334   // the .debug_line section.
335   uint64_t source_line_offset_;
336 
337   // The line numbers we have seen thus far.  We accumulate these here
338   // during parsing.  Then, in Finish, we call AssignLinesToFunctions
339   // to dole them out to the appropriate functions.
340   vector<Module::Line> lines_;
341 };
342 
343 }  // namespace google_breakpad
344 
345 #endif  // COMMON_LINUX_DWARF_CU_TO_MODULE_H__
346