1 // -*- mode: C++ -*-
2 
3 // Copyright (c) 2010 Google Inc. All Rights Reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
32 
33 // This file contains definitions related to the DWARF2/3 reader and
34 // it's handler interfaces.
35 // The DWARF2/3 specification can be found at
36 // http://dwarf.freestandards.org and should be considered required
37 // reading if you wish to modify the implementation.
38 // Only a cursory attempt is made to explain terminology that is
39 // used here, as it is much better explained in the standard documents
40 #ifndef COMMON_DWARF_DWARF2READER_H__
41 #define COMMON_DWARF_DWARF2READER_H__
42 
43 #include <stdint.h>
44 
45 #include <list>
46 #include <map>
47 #include <string>
48 #include <utility>
49 #include <vector>
50 #include <memory>
51 
52 #include "common/dwarf/bytereader.h"
53 #include "common/dwarf/dwarf2enums.h"
54 #include "common/dwarf/types.h"
55 #include "common/using_std_string.h"
56 #include "common/dwarf/elf_reader.h"
57 
58 namespace dwarf2reader {
59 struct LineStateMachine;
60 class Dwarf2Handler;
61 class LineInfoHandler;
62 class DwpReader;
63 
64 // This maps from a string naming a section to a pair containing a
65 // the data for the section, and the size of the section.
66 typedef std::map<string, std::pair<const uint8_t *, uint64_t> > SectionMap;
67 typedef std::list<std::pair<enum DwarfAttribute, enum DwarfForm> >
68     AttributeList;
69 typedef AttributeList::iterator AttributeIterator;
70 typedef AttributeList::const_iterator ConstAttributeIterator;
71 
72 struct LineInfoHeader {
73   uint64_t total_length;
74   uint16_t version;
75   uint64_t prologue_length;
76   uint8_t min_insn_length; // insn stands for instructin
77   bool default_is_stmt; // stmt stands for statement
78   int8_t line_base;
79   uint8_t line_range;
80   uint8_t opcode_base;
81   // Use a pointer so that signalsafe_addr2line is able to use this structure
82   // without heap allocation problem.
83   std::vector<unsigned char> *std_opcode_lengths;
84 };
85 
86 class LineInfo {
87  public:
88 
89   // Initializes a .debug_line reader. Buffer and buffer length point
90   // to the beginning and length of the line information to read.
91   // Reader is a ByteReader class that has the endianness set
92   // properly.
93   LineInfo(const uint8_t *buffer_, uint64_t buffer_length,
94            ByteReader* reader, LineInfoHandler* handler);
95 
~LineInfo()96   virtual ~LineInfo() {
97     if (header_.std_opcode_lengths) {
98       delete header_.std_opcode_lengths;
99     }
100   }
101 
102   // Start processing line info, and calling callbacks in the handler.
103   // Consumes the line number information for a single compilation unit.
104   // Returns the number of bytes processed.
105   uint64_t Start();
106 
107   // Process a single line info opcode at START using the state
108   // machine at LSM.  Return true if we should define a line using the
109   // current state of the line state machine.  Place the length of the
110   // opcode in LEN.
111   // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm
112   // passes the address of PC. In other words, LSM_PASSES_PC will be
113   // set to true, if the following condition is met.
114   //
115   // lsm's old address < PC <= lsm's new address
116   static bool ProcessOneOpcode(ByteReader* reader,
117                                LineInfoHandler* handler,
118                                const struct LineInfoHeader &header,
119                                const uint8_t *start,
120                                struct LineStateMachine* lsm,
121                                size_t* len,
122                                uintptr pc,
123                                bool *lsm_passes_pc);
124 
125  private:
126   // Reads the DWARF2/3 header for this line info.
127   void ReadHeader();
128 
129   // Reads the DWARF2/3 line information
130   void ReadLines();
131 
132   // The associated handler to call processing functions in
133   LineInfoHandler* handler_;
134 
135   // The associated ByteReader that handles endianness issues for us
136   ByteReader* reader_;
137 
138   // A DWARF2/3 line info header.  This is not the same size as
139   // in the actual file, as the one in the file may have a 32 bit or
140   // 64 bit lengths
141 
142   struct LineInfoHeader header_;
143 
144   // buffer is the buffer for our line info, starting at exactly where
145   // the line info to read is.  after_header is the place right after
146   // the end of the line information header.
147   const uint8_t *buffer_;
148 #ifndef NDEBUG
149   uint64_t buffer_length_;
150 #endif
151   const uint8_t *after_header_;
152 };
153 
154 // This class is the main interface between the line info reader and
155 // the client.  The virtual functions inside this get called for
156 // interesting events that happen during line info reading.  The
157 // default implementation does nothing
158 
159 class LineInfoHandler {
160  public:
LineInfoHandler()161   LineInfoHandler() { }
162 
~LineInfoHandler()163   virtual ~LineInfoHandler() { }
164 
165   // Called when we define a directory.  NAME is the directory name,
166   // DIR_NUM is the directory number
DefineDir(const string & name,uint32_t dir_num)167   virtual void DefineDir(const string& name, uint32_t dir_num) { }
168 
169   // Called when we define a filename. NAME is the filename, FILE_NUM
170   // is the file number which is -1 if the file index is the next
171   // index after the last numbered index (this happens when files are
172   // dynamically defined by the line program), DIR_NUM is the
173   // directory index for the directory name of this file, MOD_TIME is
174   // the modification time of the file, and LENGTH is the length of
175   // the file
DefineFile(const string & name,int32_t file_num,uint32_t dir_num,uint64_t mod_time,uint64_t length)176   virtual void DefineFile(const string& name, int32_t file_num,
177                           uint32_t dir_num, uint64_t mod_time,
178                           uint64_t length) { }
179 
180   // Called when the line info reader has a new line, address pair
181   // ready for us. ADDRESS is the address of the code, LENGTH is the
182   // length of its machine code in bytes, FILE_NUM is the file number
183   // containing the code, LINE_NUM is the line number in that file for
184   // the code, and COLUMN_NUM is the column number the code starts at,
185   // if we know it (0 otherwise).
AddLine(uint64_t address,uint64_t length,uint32_t file_num,uint32_t line_num,uint32_t column_num)186   virtual void AddLine(uint64_t address, uint64_t length,
187                        uint32_t file_num, uint32_t line_num, uint32_t column_num) { }
188 };
189 
190 class RangeListHandler {
191  public:
RangeListHandler()192   RangeListHandler() { }
193 
~RangeListHandler()194   virtual ~RangeListHandler() { }
195 
196   // Add a range.
AddRange(uint64_t begin,uint64_t end)197   virtual void AddRange(uint64_t begin, uint64_t end) { };
198 
199   // A new base address must be set for computing the ranges' addresses.
SetBaseAddress(uint64_t base_address)200   virtual void SetBaseAddress(uint64_t base_address) { };
201 
202   // Finish processing the range list.
Finish()203   virtual void Finish() { };
204 };
205 
206 class RangeListReader {
207  public:
208   RangeListReader(const uint8_t *buffer, uint64_t size, ByteReader *reader,
209                   RangeListHandler *handler);
210 
211   bool ReadRangeList(uint64_t offset);
212 
213  private:
214   const uint8_t *buffer_;
215   uint64_t size_;
216   ByteReader* reader_;
217   RangeListHandler *handler_;
218 };
219 
220 // This class is the main interface between the reader and the
221 // client.  The virtual functions inside this get called for
222 // interesting events that happen during DWARF2 reading.
223 // The default implementation skips everything.
224 class Dwarf2Handler {
225  public:
Dwarf2Handler()226   Dwarf2Handler() { }
227 
~Dwarf2Handler()228   virtual ~Dwarf2Handler() { }
229 
230   // Start to process a compilation unit at OFFSET from the beginning of the
231   // .debug_info section. Return false if you would like to skip this
232   // compilation unit.
StartCompilationUnit(uint64_t offset,uint8_t address_size,uint8_t offset_size,uint64_t cu_length,uint8_t dwarf_version)233   virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size,
234                                     uint8_t offset_size, uint64_t cu_length,
235                                     uint8_t dwarf_version) { return false; }
236 
237   // When processing a skeleton compilation unit, resulting from a split
238   // DWARF compilation, once the skeleton debug info has been read,
239   // the reader will call this function to ask the client if it needs
240   // the full debug info from the .dwo or .dwp file.  Return true if
241   // you need it, or false to skip processing the split debug info.
NeedSplitDebugInfo()242   virtual bool NeedSplitDebugInfo() { return true; }
243 
244   // Start to process a split compilation unit at OFFSET from the beginning of
245   // the debug_info section in the .dwp/.dwo file.  Return false if you would
246   // like to skip this compilation unit.
StartSplitCompilationUnit(uint64_t offset,uint64_t cu_length)247   virtual bool StartSplitCompilationUnit(uint64_t offset,
248                                          uint64_t cu_length) { return false; }
249 
250   // Start to process a DIE at OFFSET from the beginning of the .debug_info
251   // section. Return false if you would like to skip this DIE.
StartDIE(uint64_t offset,enum DwarfTag tag)252   virtual bool StartDIE(uint64_t offset, enum DwarfTag tag) { return false; }
253 
254   // Called when we have an attribute with unsigned data to give to our
255   // handler. The attribute is for the DIE at OFFSET from the beginning of the
256   // .debug_info section. Its name is ATTR, its form is FORM, and its value is
257   // DATA.
ProcessAttributeUnsigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)258   virtual void ProcessAttributeUnsigned(uint64_t offset,
259                                         enum DwarfAttribute attr,
260                                         enum DwarfForm form,
261                                         uint64_t data) { }
262 
263   // Called when we have an attribute with signed data to give to our handler.
264   // The attribute is for the DIE at OFFSET from the beginning of the
265   // .debug_info section. Its name is ATTR, its form is FORM, and its value is
266   // DATA.
ProcessAttributeSigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,int64_t data)267   virtual void ProcessAttributeSigned(uint64_t offset,
268                                       enum DwarfAttribute attr,
269                                       enum DwarfForm form,
270                                       int64_t data) { }
271 
272   // Called when we have an attribute whose value is a reference to
273   // another DIE. The attribute belongs to the DIE at OFFSET from the
274   // beginning of the .debug_info section. Its name is ATTR, its form
275   // is FORM, and the offset of the DIE being referred to from the
276   // beginning of the .debug_info section is DATA.
ProcessAttributeReference(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)277   virtual void ProcessAttributeReference(uint64_t offset,
278                                          enum DwarfAttribute attr,
279                                          enum DwarfForm form,
280                                          uint64_t data) { }
281 
282   // Called when we have an attribute with a buffer of data to give to our
283   // handler. The attribute is for the DIE at OFFSET from the beginning of the
284   // .debug_info section. Its name is ATTR, its form is FORM, DATA points to
285   // the buffer's contents, and its length in bytes is LENGTH. The buffer is
286   // owned by the caller, not the callee, and may not persist for very long.
287   // If you want the data to be available later, it needs to be copied.
ProcessAttributeBuffer(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64_t len)288   virtual void ProcessAttributeBuffer(uint64_t offset,
289                                       enum DwarfAttribute attr,
290                                       enum DwarfForm form,
291                                       const uint8_t *data,
292                                       uint64_t len) { }
293 
294   // Called when we have an attribute with string data to give to our handler.
295   // The attribute is for the DIE at OFFSET from the beginning of the
296   // .debug_info section. Its name is ATTR, its form is FORM, and its value is
297   // DATA.
ProcessAttributeString(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const string & data)298   virtual void ProcessAttributeString(uint64_t offset,
299                                       enum DwarfAttribute attr,
300                                       enum DwarfForm form,
301                                       const string& data) { }
302 
303   // Called when we have an attribute whose value is the 64-bit signature
304   // of a type unit in the .debug_types section. OFFSET is the offset of
305   // the DIE whose attribute we're reporting. ATTR and FORM are the
306   // attribute's name and form. SIGNATURE is the type unit's signature.
ProcessAttributeSignature(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t signature)307   virtual void ProcessAttributeSignature(uint64_t offset,
308                                          enum DwarfAttribute attr,
309                                          enum DwarfForm form,
310                                          uint64_t signature) { }
311 
312   // Called when finished processing the DIE at OFFSET.
313   // Because DWARF2/3 specifies a tree of DIEs, you may get starts
314   // before ends of the previous DIE, as we process children before
315   // ending the parent.
EndDIE(uint64_t offset)316   virtual void EndDIE(uint64_t offset) { }
317 
318 };
319 
320 // The base of DWARF2/3 debug info is a DIE (Debugging Information
321 // Entry.
322 // DWARF groups DIE's into a tree and calls the root of this tree a
323 // "compilation unit".  Most of the time, there is one compilation
324 // unit in the .debug_info section for each file that had debug info
325 // generated.
326 // Each DIE consists of
327 
328 // 1. a tag specifying a thing that is being described (ie
329 // DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc
330 // 2. attributes (such as DW_AT_location for location in memory,
331 // DW_AT_name for name), and data for each attribute.
332 // 3. A flag saying whether the DIE has children or not
333 
334 // In order to gain some amount of compression, the format of
335 // each DIE (tag name, attributes and data forms for the attributes)
336 // are stored in a separate table called the "abbreviation table".
337 // This is done because a large number of DIEs have the exact same tag
338 // and list of attributes, but different data for those attributes.
339 // As a result, the .debug_info section is just a stream of data, and
340 // requires reading of the .debug_abbrev section to say what the data
341 // means.
342 
343 // As a warning to the user, it should be noted that the reason for
344 // using absolute offsets from the beginning of .debug_info is that
345 // DWARF2/3 supports referencing DIE's from other DIE's by their offset
346 // from either the current compilation unit start, *or* the beginning
347 // of the .debug_info section.  This means it is possible to reference
348 // a DIE in one compilation unit from a DIE in another compilation
349 // unit.  This style of reference is usually used to eliminate
350 // duplicated information that occurs across compilation
351 // units, such as base types, etc.  GCC 3.4+ support this with
352 // -feliminate-dwarf2-dups.  Other toolchains will sometimes do
353 // duplicate elimination in the linker.
354 
355 class CompilationUnit {
356  public:
357 
358   // Initialize a compilation unit.  This requires a map of sections,
359   // the offset of this compilation unit in the .debug_info section, a
360   // ByteReader, and a Dwarf2Handler class to call callbacks in.
361   CompilationUnit(const string& path, const SectionMap& sections,
362                   uint64_t offset, ByteReader* reader, Dwarf2Handler* handler);
~CompilationUnit()363   virtual ~CompilationUnit() {
364     if (abbrevs_) delete abbrevs_;
365   }
366 
367   // Initialize a compilation unit from a .dwo or .dwp file.
368   // In this case, we need the .debug_addr section from the
369   // executable file that contains the corresponding skeleton
370   // compilation unit.  We also inherit the Dwarf2Handler from
371   // the executable file, and call it as if we were still
372   // processing the original compilation unit.
373   void SetSplitDwarf(const uint8_t* addr_buffer, uint64_t addr_buffer_length,
374                      uint64_t addr_base, uint64_t ranges_base, uint64_t dwo_id);
375 
376   // Begin reading a Dwarf2 compilation unit, and calling the
377   // callbacks in the Dwarf2Handler
378 
379   // Return the full length of the compilation unit, including
380   // headers. This plus the starting offset passed to the constructor
381   // is the offset of the end of the compilation unit --- and the
382   // start of the next compilation unit, if there is one.
383   uint64_t Start();
384 
385  private:
386 
387   // This struct represents a single DWARF2/3 abbreviation
388   // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a
389   // tag and a list of attributes, as well as the data form of each attribute.
390   struct Abbrev {
391     uint64_t number;
392     enum DwarfTag tag;
393     bool has_children;
394     AttributeList attributes;
395   };
396 
397   // A DWARF2/3 compilation unit header.  This is not the same size as
398   // in the actual file, as the one in the file may have a 32 bit or
399   // 64 bit length.
400   struct CompilationUnitHeader {
401     uint64_t length;
402     uint16_t version;
403     uint64_t abbrev_offset;
404     uint8_t address_size;
405   } header_;
406 
407   // Reads the DWARF2/3 header for this compilation unit.
408   void ReadHeader();
409 
410   // Reads the DWARF2/3 abbreviations for this compilation unit
411   void ReadAbbrevs();
412 
413   // Processes a single DIE for this compilation unit and return a new
414   // pointer just past the end of it
415   const uint8_t *ProcessDIE(uint64_t dieoffset,
416                             const uint8_t *start,
417                             const Abbrev& abbrev);
418 
419   // Processes a single attribute and return a new pointer just past the
420   // end of it
421   const uint8_t *ProcessAttribute(uint64_t dieoffset,
422                                   const uint8_t *start,
423                                   enum DwarfAttribute attr,
424                                   enum DwarfForm form);
425 
426   // Called when we have an attribute with unsigned data to give to
427   // our handler.  The attribute is for the DIE at OFFSET from the
428   // beginning of compilation unit, has a name of ATTR, a form of
429   // FORM, and the actual data of the attribute is in DATA.
430   // If we see a DW_AT_GNU_dwo_id attribute, save the value so that
431   // we can find the debug info in a .dwo or .dwp file.
ProcessAttributeUnsigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)432   void ProcessAttributeUnsigned(uint64_t offset,
433                                 enum DwarfAttribute attr,
434                                 enum DwarfForm form,
435                                 uint64_t data) {
436     if (attr == DW_AT_GNU_dwo_id) {
437       dwo_id_ = data;
438     }
439     else if (attr == DW_AT_GNU_addr_base) {
440       addr_base_ = data;
441     }
442     else if (attr == DW_AT_GNU_ranges_base) {
443       ranges_base_ = data;
444     }
445     // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5,
446     // that base will apply to DW_AT_ranges attributes in the
447     // skeleton CU as well as in the .dwo/.dwp files.
448     else if (attr == DW_AT_ranges && is_split_dwarf_) {
449       data += ranges_base_;
450     }
451     handler_->ProcessAttributeUnsigned(offset, attr, form, data);
452   }
453 
454   // Called when we have an attribute with signed data to give to
455   // our handler.  The attribute is for the DIE at OFFSET from the
456   // beginning of compilation unit, has a name of ATTR, a form of
457   // FORM, and the actual data of the attribute is in DATA.
ProcessAttributeSigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,int64_t data)458   void ProcessAttributeSigned(uint64_t offset,
459                               enum DwarfAttribute attr,
460                               enum DwarfForm form,
461                               int64_t data) {
462     handler_->ProcessAttributeSigned(offset, attr, form, data);
463   }
464 
465   // Called when we have an attribute with a buffer of data to give to
466   // our handler.  The attribute is for the DIE at OFFSET from the
467   // beginning of compilation unit, has a name of ATTR, a form of
468   // FORM, and the actual data of the attribute is in DATA, and the
469   // length of the buffer is LENGTH.
ProcessAttributeBuffer(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64_t len)470   void ProcessAttributeBuffer(uint64_t offset,
471                               enum DwarfAttribute attr,
472                               enum DwarfForm form,
473                               const uint8_t* data,
474                               uint64_t len) {
475     handler_->ProcessAttributeBuffer(offset, attr, form, data, len);
476   }
477 
478   // Called when we have an attribute with string data to give to
479   // our handler.  The attribute is for the DIE at OFFSET from the
480   // beginning of compilation unit, has a name of ATTR, a form of
481   // FORM, and the actual data of the attribute is in DATA.
482   // If we see a DW_AT_GNU_dwo_name attribute, save the value so
483   // that we can find the debug info in a .dwo or .dwp file.
ProcessAttributeString(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const char * data)484   void ProcessAttributeString(uint64_t offset,
485                               enum DwarfAttribute attr,
486                               enum DwarfForm form,
487                               const char* data) {
488     if (attr == DW_AT_GNU_dwo_name)
489       dwo_name_ = data;
490     handler_->ProcessAttributeString(offset, attr, form, data);
491   }
492 
493   // Processes all DIEs for this compilation unit
494   void ProcessDIEs();
495 
496   // Skips the die with attributes specified in ABBREV starting at
497   // START, and return the new place to position the stream to.
498   const uint8_t *SkipDIE(const uint8_t *start, const Abbrev& abbrev);
499 
500   // Skips the attribute starting at START, with FORM, and return the
501   // new place to position the stream to.
502   const uint8_t *SkipAttribute(const uint8_t *start, enum DwarfForm form);
503 
504   // Process the actual debug information in a split DWARF file.
505   void ProcessSplitDwarf();
506 
507   // Read the debug sections from a .dwo file.
508   void ReadDebugSectionsFromDwo(ElfReader* elf_reader,
509                                 SectionMap* sections);
510 
511   // Path of the file containing the debug information.
512   const string path_;
513 
514   // Offset from section start is the offset of this compilation unit
515   // from the beginning of the .debug_info section.
516   uint64_t offset_from_section_start_;
517 
518   // buffer is the buffer for our CU, starting at .debug_info + offset
519   // passed in from constructor.
520   // after_header points to right after the compilation unit header.
521   const uint8_t *buffer_;
522   uint64_t buffer_length_;
523   const uint8_t *after_header_;
524 
525   // The associated ByteReader that handles endianness issues for us
526   ByteReader* reader_;
527 
528   // The map of sections in our file to buffers containing their data
529   const SectionMap& sections_;
530 
531   // The associated handler to call processing functions in
532   Dwarf2Handler* handler_;
533 
534   // Set of DWARF2/3 abbreviations for this compilation unit.  Indexed
535   // by abbreviation number, which means that abbrevs_[0] is not
536   // valid.
537   std::vector<Abbrev>* abbrevs_;
538 
539   // String section buffer and length, if we have a string section.
540   // This is here to avoid doing a section lookup for strings in
541   // ProcessAttribute, which is in the hot path for DWARF2 reading.
542   const uint8_t *string_buffer_;
543   uint64_t string_buffer_length_;
544 
545   // String offsets section buffer and length, if we have a string offsets
546   // section (.debug_str_offsets or .debug_str_offsets.dwo).
547   const uint8_t* str_offsets_buffer_;
548   uint64_t str_offsets_buffer_length_;
549 
550   // Address section buffer and length, if we have an address section
551   // (.debug_addr).
552   const uint8_t* addr_buffer_;
553   uint64_t addr_buffer_length_;
554 
555   // Flag indicating whether this compilation unit is part of a .dwo
556   // or .dwp file.  If true, we are reading this unit because a
557   // skeleton compilation unit in an executable file had a
558   // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute.
559   // In a .dwo file, we expect the string offsets section to
560   // have a ".dwo" suffix, and we will use the ".debug_addr" section
561   // associated with the skeleton compilation unit.
562   bool is_split_dwarf_;
563 
564   // The value of the DW_AT_GNU_dwo_id attribute, if any.
565   uint64_t dwo_id_;
566 
567   // The value of the DW_AT_GNU_dwo_name attribute, if any.
568   const char* dwo_name_;
569 
570   // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute
571   // from the skeleton CU.
572   uint64_t skeleton_dwo_id_;
573 
574   // The value of the DW_AT_GNU_ranges_base attribute, if any.
575   uint64_t ranges_base_;
576 
577   // The value of the DW_AT_GNU_addr_base attribute, if any.
578   uint64_t addr_base_;
579 
580   // True if we have already looked for a .dwp file.
581   bool have_checked_for_dwp_;
582 
583   // Path to the .dwp file.
584   string dwp_path_;
585 
586   // ByteReader for the DWP file.
587   std::unique_ptr<ByteReader> dwp_byte_reader_;
588 
589   // DWP reader.
590    std::unique_ptr<DwpReader> dwp_reader_;
591 };
592 
593 // A Reader for a .dwp file.  Supports the fetching of DWARF debug
594 // info for a given dwo_id.
595 //
596 // There are two versions of .dwp files.  In both versions, the
597 // .dwp file is an ELF file containing only debug sections.
598 // In Version 1, the file contains many copies of each debug
599 // section, one for each .dwo file that is packaged in the .dwp
600 // file, and the .debug_cu_index section maps from the dwo_id
601 // to a set of section indexes.  In Version 2, the file contains
602 // one of each debug section, and the .debug_cu_index section
603 // maps from the dwo_id to a set of offsets and lengths that
604 // identify each .dwo file's contribution to the larger sections.
605 
606 class DwpReader {
607  public:
608   DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader);
609 
610   ~DwpReader();
611 
612   // Read the CU index and initialize data members.
613   void Initialize();
614 
615   // Read the debug sections for the given dwo_id.
616   void ReadDebugSectionsForCU(uint64_t dwo_id, SectionMap* sections);
617 
618  private:
619   // Search a v1 hash table for "dwo_id".  Returns the slot index
620   // where the dwo_id was found, or -1 if it was not found.
621   int LookupCU(uint64_t dwo_id);
622 
623   // Search a v2 hash table for "dwo_id".  Returns the row index
624   // in the offsets and sizes tables, or 0 if it was not found.
625   uint32_t LookupCUv2(uint64_t dwo_id);
626 
627   // The ELF reader for the .dwp file.
628   ElfReader* elf_reader_;
629 
630   // The ByteReader for the .dwp file.
631   const ByteReader& byte_reader_;
632 
633   // Pointer to the .debug_cu_index section.
634   const char* cu_index_;
635 
636   // Size of the .debug_cu_index section.
637   size_t cu_index_size_;
638 
639   // Pointer to the .debug_str.dwo section.
640   const char* string_buffer_;
641 
642   // Size of the .debug_str.dwo section.
643   size_t string_buffer_size_;
644 
645   // Version of the .dwp file.  We support versions 1 and 2 currently.
646   int version_;
647 
648   // Number of columns in the section tables (version 2).
649   unsigned int ncolumns_;
650 
651   // Number of units in the section tables (version 2).
652   unsigned int nunits_;
653 
654   // Number of slots in the hash table.
655   unsigned int nslots_;
656 
657   // Pointer to the beginning of the hash table.
658   const char* phash_;
659 
660   // Pointer to the beginning of the index table.
661   const char* pindex_;
662 
663   // Pointer to the beginning of the section index pool (version 1).
664   const char* shndx_pool_;
665 
666   // Pointer to the beginning of the section offset table (version 2).
667   const char* offset_table_;
668 
669   // Pointer to the beginning of the section size table (version 2).
670   const char* size_table_;
671 
672   // Contents of the sections of interest (version 2).
673   const char* abbrev_data_;
674   size_t abbrev_size_;
675   const char* info_data_;
676   size_t info_size_;
677   const char* str_offsets_data_;
678   size_t str_offsets_size_;
679 };
680 
681 // This class is a reader for DWARF's Call Frame Information.  CFI
682 // describes how to unwind stack frames --- even for functions that do
683 // not follow fixed conventions for saving registers, whose frame size
684 // varies as they execute, etc.
685 //
686 // CFI describes, at each machine instruction, how to compute the
687 // stack frame's base address, how to find the return address, and
688 // where to find the saved values of the caller's registers (if the
689 // callee has stashed them somewhere to free up the registers for its
690 // own use).
691 //
692 // For example, suppose we have a function whose machine code looks
693 // like this (imagine an assembly language that looks like C, for a
694 // machine with 32-bit registers, and a stack that grows towards lower
695 // addresses):
696 //
697 // func:                                ; entry point; return address at sp
698 // func+0:      sp = sp - 16            ; allocate space for stack frame
699 // func+1:      sp[12] = r0             ; save r0 at sp+12
700 // ...                                  ; other code, not frame-related
701 // func+10:     sp -= 4; *sp = x        ; push some x on the stack
702 // ...                                  ; other code, not frame-related
703 // func+20:     r0 = sp[16]             ; restore saved r0
704 // func+21:     sp += 20                ; pop whole stack frame
705 // func+22:     pc = *sp; sp += 4       ; pop return address and jump to it
706 //
707 // DWARF CFI is (a very compressed representation of) a table with a
708 // row for each machine instruction address and a column for each
709 // register showing how to restore it, if possible.
710 //
711 // A special column named "CFA", for "Canonical Frame Address", tells how
712 // to compute the base address of the frame; registers' entries may
713 // refer to the CFA in describing where the registers are saved.
714 //
715 // Another special column, named "RA", represents the return address.
716 //
717 // For example, here is a complete (uncompressed) table describing the
718 // function above:
719 //
720 //     insn      cfa    r0      r1 ...  ra
721 //     =======================================
722 //     func+0:   sp                     cfa[0]
723 //     func+1:   sp+16                  cfa[0]
724 //     func+2:   sp+16  cfa[-4]         cfa[0]
725 //     func+11:  sp+20  cfa[-4]         cfa[0]
726 //     func+21:  sp+20                  cfa[0]
727 //     func+22:  sp                     cfa[0]
728 //
729 // Some things to note here:
730 //
731 // - Each row describes the state of affairs *before* executing the
732 //   instruction at the given address.  Thus, the row for func+0
733 //   describes the state before we allocate the stack frame.  In the
734 //   next row, the formula for computing the CFA has changed,
735 //   reflecting that allocation.
736 //
737 // - The other entries are written in terms of the CFA; this allows
738 //   them to remain unchanged as the stack pointer gets bumped around.
739 //   For example, the rule for recovering the return address (the "ra"
740 //   column) remains unchanged throughout the function, even as the
741 //   stack pointer takes on three different offsets from the return
742 //   address.
743 //
744 // - Although we haven't shown it, most calling conventions designate
745 //   "callee-saves" and "caller-saves" registers. The callee must
746 //   preserve the values of callee-saves registers; if it uses them,
747 //   it must save their original values somewhere, and restore them
748 //   before it returns. In contrast, the callee is free to trash
749 //   caller-saves registers; if the callee uses these, it will
750 //   probably not bother to save them anywhere, and the CFI will
751 //   probably mark their values as "unrecoverable".
752 //
753 //   (However, since the caller cannot assume the callee was going to
754 //   save them, caller-saves registers are probably dead in the caller
755 //   anyway, so compilers usually don't generate CFA for caller-saves
756 //   registers.)
757 //
758 // - Exactly where the CFA points is a matter of convention that
759 //   depends on the architecture and ABI in use. In the example, the
760 //   CFA is the value the stack pointer had upon entry to the
761 //   function, pointing at the saved return address. But on the x86,
762 //   the call frame information generated by GCC follows the
763 //   convention that the CFA is the address *after* the saved return
764 //   address.
765 //
766 //   But by definition, the CFA remains constant throughout the
767 //   lifetime of the frame. This makes it a useful value for other
768 //   columns to refer to. It is also gives debuggers a useful handle
769 //   for identifying a frame.
770 //
771 // If you look at the table above, you'll notice that a given entry is
772 // often the same as the one immediately above it: most instructions
773 // change only one or two aspects of the stack frame, if they affect
774 // it at all. The DWARF format takes advantage of this fact, and
775 // reduces the size of the data by mentioning only the addresses and
776 // columns at which changes take place. So for the above, DWARF CFI
777 // data would only actually mention the following:
778 //
779 //     insn      cfa    r0      r1 ...  ra
780 //     =======================================
781 //     func+0:   sp                     cfa[0]
782 //     func+1:   sp+16
783 //     func+2:          cfa[-4]
784 //     func+11:  sp+20
785 //     func+21:         r0
786 //     func+22:  sp
787 //
788 // In fact, this is the way the parser reports CFI to the consumer: as
789 // a series of statements of the form, "At address X, column Y changed
790 // to Z," and related conventions for describing the initial state.
791 //
792 // Naturally, it would be impractical to have to scan the entire
793 // program's CFI, noting changes as we go, just to recover the
794 // unwinding rules in effect at one particular instruction. To avoid
795 // this, CFI data is grouped into "entries", each of which covers a
796 // specified range of addresses and begins with a complete statement
797 // of the rules for all recoverable registers at that starting
798 // address. Each entry typically covers a single function.
799 //
800 // Thus, to compute the contents of a given row of the table --- that
801 // is, rules for recovering the CFA, RA, and registers at a given
802 // instruction --- the consumer should find the entry that covers that
803 // instruction's address, start with the initial state supplied at the
804 // beginning of the entry, and work forward until it has processed all
805 // the changes up to and including those for the present instruction.
806 //
807 // There are seven kinds of rules that can appear in an entry of the
808 // table:
809 //
810 // - "undefined": The given register is not preserved by the callee;
811 //   its value cannot be recovered.
812 //
813 // - "same value": This register has the same value it did in the callee.
814 //
815 // - offset(N): The register is saved at offset N from the CFA.
816 //
817 // - val_offset(N): The value the register had in the caller is the
818 //   CFA plus offset N. (This is usually only useful for describing
819 //   the stack pointer.)
820 //
821 // - register(R): The register's value was saved in another register R.
822 //
823 // - expression(E): Evaluating the DWARF expression E using the
824 //   current frame's registers' values yields the address at which the
825 //   register was saved.
826 //
827 // - val_expression(E): Evaluating the DWARF expression E using the
828 //   current frame's registers' values yields the value the register
829 //   had in the caller.
830 
831 class CallFrameInfo {
832  public:
833   // The different kinds of entries one finds in CFI. Used internally,
834   // and for error reporting.
835   enum EntryKind { kUnknown, kCIE, kFDE, kTerminator };
836 
837   // The handler class to which the parser hands the parsed call frame
838   // information.  Defined below.
839   class Handler;
840 
841   // A reporter class, which CallFrameInfo uses to report errors
842   // encountered while parsing call frame information.  Defined below.
843   class Reporter;
844 
845   // Create a DWARF CFI parser. BUFFER points to the contents of the
846   // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes.
847   // REPORTER is an error reporter the parser should use to report
848   // problems. READER is a ByteReader instance that has the endianness and
849   // address size set properly. Report the data we find to HANDLER.
850   //
851   // This class can also parse Linux C++ exception handling data, as found
852   // in '.eh_frame' sections. This data is a variant of DWARF CFI that is
853   // placed in loadable segments so that it is present in the program's
854   // address space, and is interpreted by the C++ runtime to search the
855   // call stack for a handler interested in the exception being thrown,
856   // actually pop the frames, and find cleanup code to run.
857   //
858   // There are two differences between the call frame information described
859   // in the DWARF standard and the exception handling data Linux places in
860   // the .eh_frame section:
861   //
862   // - Exception handling data uses uses a different format for call frame
863   //   information entry headers. The distinguished CIE id, the way FDEs
864   //   refer to their CIEs, and the way the end of the series of entries is
865   //   determined are all slightly different.
866   //
867   //   If the constructor's EH_FRAME argument is true, then the
868   //   CallFrameInfo parses the entry headers as Linux C++ exception
869   //   handling data. If EH_FRAME is false or omitted, the CallFrameInfo
870   //   parses standard DWARF call frame information.
871   //
872   // - Linux C++ exception handling data uses CIE augmentation strings
873   //   beginning with 'z' to specify the presence of additional data after
874   //   the CIE and FDE headers and special encodings used for addresses in
875   //   frame description entries.
876   //
877   //   CallFrameInfo can handle 'z' augmentations in either DWARF CFI or
878   //   exception handling data if you have supplied READER with the base
879   //   addresses needed to interpret the pointer encodings that 'z'
880   //   augmentations can specify. See the ByteReader interface for details
881   //   about the base addresses. See the CallFrameInfo::Handler interface
882   //   for details about the additional information one might find in
883   //   'z'-augmented data.
884   //
885   // Thus:
886   //
887   // - If you are parsing standard DWARF CFI, as found in a .debug_frame
888   //   section, you should pass false for the EH_FRAME argument, or omit
889   //   it, and you need not worry about providing READER with the
890   //   additional base addresses.
891   //
892   // - If you want to parse Linux C++ exception handling data from a
893   //   .eh_frame section, you should pass EH_FRAME as true, and call
894   //   READER's Set*Base member functions before calling our Start method.
895   //
896   // - If you want to parse DWARF CFI that uses the 'z' augmentations
897   //   (although I don't think any toolchain ever emits such data), you
898   //   could pass false for EH_FRAME, but call READER's Set*Base members.
899   //
900   // The extensions the Linux C++ ABI makes to DWARF for exception
901   // handling are described here, rather poorly:
902   // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
903   // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
904   //
905   // The mechanics of C++ exception handling, personality routines,
906   // and language-specific data areas are described here, rather nicely:
907   // http://www.codesourcery.com/public/cxx-abi/abi-eh.html
908   CallFrameInfo(const uint8_t *buffer, size_t buffer_length,
909                 ByteReader *reader, Handler *handler, Reporter *reporter,
910                 bool eh_frame = false)
buffer_(buffer)911       : buffer_(buffer), buffer_length_(buffer_length),
912         reader_(reader), handler_(handler), reporter_(reporter),
913         eh_frame_(eh_frame) { }
914 
~CallFrameInfo()915   ~CallFrameInfo() { }
916 
917   // Parse the entries in BUFFER, reporting what we find to HANDLER.
918   // Return true if we reach the end of the section successfully, or
919   // false if we encounter an error.
920   bool Start();
921 
922   // Return the textual name of KIND. For error reporting.
923   static const char *KindName(EntryKind kind);
924 
925  private:
926 
927   struct CIE;
928 
929   // A CFI entry, either an FDE or a CIE.
930   struct Entry {
931     // The starting offset of the entry in the section, for error
932     // reporting.
933     size_t offset;
934 
935     // The start of this entry in the buffer.
936     const uint8_t *start;
937 
938     // Which kind of entry this is.
939     //
940     // We want to be able to use this for error reporting even while we're
941     // in the midst of parsing. Error reporting code may assume that kind,
942     // offset, and start fields are valid, although kind may be kUnknown.
943     EntryKind kind;
944 
945     // The end of this entry's common prologue (initial length and id), and
946     // the start of this entry's kind-specific fields.
947     const uint8_t *fields;
948 
949     // The start of this entry's instructions.
950     const uint8_t *instructions;
951 
952     // The address past the entry's last byte in the buffer. (Note that
953     // since offset points to the entry's initial length field, and the
954     // length field is the number of bytes after that field, this is not
955     // simply buffer_ + offset + length.)
956     const uint8_t *end;
957 
958     // For both DWARF CFI and .eh_frame sections, this is the CIE id in a
959     // CIE, and the offset of the associated CIE in an FDE.
960     uint64_t id;
961 
962     // The CIE that applies to this entry, if we've parsed it. If this is a
963     // CIE, then this field points to this structure.
964     CIE *cie;
965   };
966 
967   // A common information entry (CIE).
968   struct CIE: public Entry {
969     uint8_t version;                      // CFI data version number
970     string augmentation;                // vendor format extension markers
971     uint64_t code_alignment_factor;       // scale for code address adjustments
972     int data_alignment_factor;          // scale for stack pointer adjustments
973     unsigned return_address_register;   // which register holds the return addr
974 
975     // True if this CIE includes Linux C++ ABI 'z' augmentation data.
976     bool has_z_augmentation;
977 
978     // Parsed 'z' augmentation data. These are meaningful only if
979     // has_z_augmentation is true.
980     bool has_z_lsda;                    // The 'z' augmentation included 'L'.
981     bool has_z_personality;             // The 'z' augmentation included 'P'.
982     bool has_z_signal_frame;            // The 'z' augmentation included 'S'.
983 
984     // If has_z_lsda is true, this is the encoding to be used for language-
985     // specific data area pointers in FDEs.
986     DwarfPointerEncoding lsda_encoding;
987 
988     // If has_z_personality is true, this is the encoding used for the
989     // personality routine pointer in the augmentation data.
990     DwarfPointerEncoding personality_encoding;
991 
992     // If has_z_personality is true, this is the address of the personality
993     // routine --- or, if personality_encoding & DW_EH_PE_indirect, the
994     // address where the personality routine's address is stored.
995     uint64_t personality_address;
996 
997     // This is the encoding used for addresses in the FDE header and
998     // in DW_CFA_set_loc instructions. This is always valid, whether
999     // or not we saw a 'z' augmentation string; its default value is
1000     // DW_EH_PE_absptr, which is what normal DWARF CFI uses.
1001     DwarfPointerEncoding pointer_encoding;
1002 
1003     // These were only introduced in DWARF4, so will not be set in older
1004     // versions.
1005     uint8_t address_size;
1006     uint8_t segment_size;
1007   };
1008 
1009   // A frame description entry (FDE).
1010   struct FDE: public Entry {
1011     uint64_t address;                     // start address of described code
1012     uint64_t size;                        // size of described code, in bytes
1013 
1014     // If cie->has_z_lsda is true, then this is the language-specific data
1015     // area's address --- or its address's address, if cie->lsda_encoding
1016     // has the DW_EH_PE_indirect bit set.
1017     uint64_t lsda_address;
1018   };
1019 
1020   // Internal use.
1021   class Rule;
1022   class UndefinedRule;
1023   class SameValueRule;
1024   class OffsetRule;
1025   class ValOffsetRule;
1026   class RegisterRule;
1027   class ExpressionRule;
1028   class ValExpressionRule;
1029   class RuleMap;
1030   class State;
1031 
1032   // Parse the initial length and id of a CFI entry, either a CIE, an FDE,
1033   // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the
1034   // data to parse. On success, populate ENTRY as appropriate, and return
1035   // true. On failure, report the problem, and return false. Even if we
1036   // return false, set ENTRY->end to the first byte after the entry if we
1037   // were able to figure that out, or NULL if we weren't.
1038   bool ReadEntryPrologue(const uint8_t *cursor, Entry *entry);
1039 
1040   // Parse the fields of a CIE after the entry prologue, including any 'z'
1041   // augmentation data. Assume that the 'Entry' fields of CIE are
1042   // populated; use CIE->fields and CIE->end as the start and limit for
1043   // parsing. On success, populate the rest of *CIE, and return true; on
1044   // failure, report the problem and return false.
1045   bool ReadCIEFields(CIE *cie);
1046 
1047   // Parse the fields of an FDE after the entry prologue, including any 'z'
1048   // augmentation data. Assume that the 'Entry' fields of *FDE are
1049   // initialized; use FDE->fields and FDE->end as the start and limit for
1050   // parsing. Assume that FDE->cie is fully initialized. On success,
1051   // populate the rest of *FDE, and return true; on failure, report the
1052   // problem and return false.
1053   bool ReadFDEFields(FDE *fde);
1054 
1055   // Report that ENTRY is incomplete, and return false. This is just a
1056   // trivial wrapper for invoking reporter_->Incomplete; it provides a
1057   // little brevity.
1058   bool ReportIncomplete(Entry *entry);
1059 
1060   // Return true if ENCODING has the DW_EH_PE_indirect bit set.
IsIndirectEncoding(DwarfPointerEncoding encoding)1061   static bool IsIndirectEncoding(DwarfPointerEncoding encoding) {
1062     return encoding & DW_EH_PE_indirect;
1063   }
1064 
1065   // The contents of the DWARF .debug_info section we're parsing.
1066   const uint8_t *buffer_;
1067   size_t buffer_length_;
1068 
1069   // For reading multi-byte values with the appropriate endianness.
1070   ByteReader *reader_;
1071 
1072   // The handler to which we should report the data we find.
1073   Handler *handler_;
1074 
1075   // For reporting problems in the info we're parsing.
1076   Reporter *reporter_;
1077 
1078   // True if we are processing .eh_frame-format data.
1079   bool eh_frame_;
1080 };
1081 
1082 // The handler class for CallFrameInfo.  The a CFI parser calls the
1083 // member functions of a handler object to report the data it finds.
1084 class CallFrameInfo::Handler {
1085  public:
1086   // The pseudo-register number for the canonical frame address.
1087   enum { kCFARegister = -1 };
1088 
Handler()1089   Handler() { }
~Handler()1090   virtual ~Handler() { }
1091 
1092   // The parser has found CFI for the machine code at ADDRESS,
1093   // extending for LENGTH bytes. OFFSET is the offset of the frame
1094   // description entry in the section, for use in error messages.
1095   // VERSION is the version number of the CFI format. AUGMENTATION is
1096   // a string describing any producer-specific extensions present in
1097   // the data. RETURN_ADDRESS is the number of the register that holds
1098   // the address to which the function should return.
1099   //
1100   // Entry should return true to process this CFI, or false to skip to
1101   // the next entry.
1102   //
1103   // The parser invokes Entry for each Frame Description Entry (FDE)
1104   // it finds.  The parser doesn't report Common Information Entries
1105   // to the handler explicitly; instead, if the handler elects to
1106   // process a given FDE, the parser reiterates the appropriate CIE's
1107   // contents at the beginning of the FDE's rules.
1108   virtual bool Entry(size_t offset, uint64_t address, uint64_t length,
1109                      uint8_t version, const string &augmentation,
1110                      unsigned return_address) = 0;
1111 
1112   // When the Entry function returns true, the parser calls these
1113   // handler functions repeatedly to describe the rules for recovering
1114   // registers at each instruction in the given range of machine code.
1115   // Immediately after a call to Entry, the handler should assume that
1116   // the rule for each callee-saves register is "unchanged" --- that
1117   // is, that the register still has the value it had in the caller.
1118   //
1119   // If a *Rule function returns true, we continue processing this entry's
1120   // instructions. If a *Rule function returns false, we stop evaluating
1121   // instructions, and skip to the next entry. Either way, we call End
1122   // before going on to the next entry.
1123   //
1124   // In all of these functions, if the REG parameter is kCFARegister, then
1125   // the rule describes how to find the canonical frame address.
1126   // kCFARegister may be passed as a BASE_REGISTER argument, meaning that
1127   // the canonical frame address should be used as the base address for the
1128   // computation. All other REG values will be positive.
1129 
1130   // At ADDRESS, register REG's value is not recoverable.
1131   virtual bool UndefinedRule(uint64_t address, int reg) = 0;
1132 
1133   // At ADDRESS, register REG's value is the same as that it had in
1134   // the caller.
1135   virtual bool SameValueRule(uint64_t address, int reg) = 0;
1136 
1137   // At ADDRESS, register REG has been saved at offset OFFSET from
1138   // BASE_REGISTER.
1139   virtual bool OffsetRule(uint64_t address, int reg,
1140                           int base_register, long offset) = 0;
1141 
1142   // At ADDRESS, the caller's value of register REG is the current
1143   // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an
1144   // address at which the register's value is saved.)
1145   virtual bool ValOffsetRule(uint64_t address, int reg,
1146                              int base_register, long offset) = 0;
1147 
1148   // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs
1149   // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that
1150   // BASE_REGISTER is the "home" for REG's saved value: if you want to
1151   // assign to a variable whose home is REG in the calling frame, you
1152   // should put the value in BASE_REGISTER.
1153   virtual bool RegisterRule(uint64_t address, int reg, int base_register) = 0;
1154 
1155   // At ADDRESS, the DWARF expression EXPRESSION yields the address at
1156   // which REG was saved.
1157   virtual bool ExpressionRule(uint64_t address, int reg,
1158                               const string &expression) = 0;
1159 
1160   // At ADDRESS, the DWARF expression EXPRESSION yields the caller's
1161   // value for REG. (This rule doesn't provide an address at which the
1162   // register's value is saved.)
1163   virtual bool ValExpressionRule(uint64_t address, int reg,
1164                                  const string &expression) = 0;
1165 
1166   // Indicate that the rules for the address range reported by the
1167   // last call to Entry are complete.  End should return true if
1168   // everything is okay, or false if an error has occurred and parsing
1169   // should stop.
1170   virtual bool End() = 0;
1171 
1172   // Handler functions for Linux C++ exception handling data. These are
1173   // only called if the data includes 'z' augmentation strings.
1174 
1175   // The Linux C++ ABI uses an extension of the DWARF CFI format to
1176   // walk the stack to propagate exceptions from the throw to the
1177   // appropriate catch, and do the appropriate cleanups along the way.
1178   // CFI entries used for exception handling have two additional data
1179   // associated with them:
1180   //
1181   // - The "language-specific data area" describes which exception
1182   //   types the function has 'catch' clauses for, and indicates how
1183   //   to go about re-entering the function at the appropriate catch
1184   //   clause. If the exception is not caught, it describes the
1185   //   destructors that must run before the frame is popped.
1186   //
1187   // - The "personality routine" is responsible for interpreting the
1188   //   language-specific data area's contents, and deciding whether
1189   //   the exception should continue to propagate down the stack,
1190   //   perhaps after doing some cleanup for this frame, or whether the
1191   //   exception will be caught here.
1192   //
1193   // In principle, the language-specific data area is opaque to
1194   // everybody but the personality routine. In practice, these values
1195   // may be useful or interesting to readers with extra context, and
1196   // we have to at least skip them anyway, so we might as well report
1197   // them to the handler.
1198 
1199   // This entry's exception handling personality routine's address is
1200   // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
1201   // which the routine's address is stored. The default definition for
1202   // this handler function simply returns true, allowing parsing of
1203   // the entry to continue.
PersonalityRoutine(uint64_t address,bool indirect)1204   virtual bool PersonalityRoutine(uint64_t address, bool indirect) {
1205     return true;
1206   }
1207 
1208   // This entry's language-specific data area (LSDA) is located at
1209   // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
1210   // which the area's address is stored. The default definition for
1211   // this handler function simply returns true, allowing parsing of
1212   // the entry to continue.
LanguageSpecificDataArea(uint64_t address,bool indirect)1213   virtual bool LanguageSpecificDataArea(uint64_t address, bool indirect) {
1214     return true;
1215   }
1216 
1217   // This entry describes a signal trampoline --- this frame is the
1218   // caller of a signal handler. The default definition for this
1219   // handler function simply returns true, allowing parsing of the
1220   // entry to continue.
1221   //
1222   // The best description of the rationale for and meaning of signal
1223   // trampoline CFI entries seems to be in the GCC bug database:
1224   // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208
SignalHandler()1225   virtual bool SignalHandler() { return true; }
1226 };
1227 
1228 // The CallFrameInfo class makes calls on an instance of this class to
1229 // report errors or warn about problems in the data it is parsing. The
1230 // default definitions of these methods print a message to stderr, but
1231 // you can make a derived class that overrides them.
1232 class CallFrameInfo::Reporter {
1233  public:
1234   // Create an error reporter which attributes troubles to the section
1235   // named SECTION in FILENAME.
1236   //
1237   // Normally SECTION would be .debug_frame, but the Mac puts CFI data
1238   // in a Mach-O section named __debug_frame. If we support
1239   // Linux-style exception handling data, we could be reading an
1240   // .eh_frame section.
1241   Reporter(const string &filename,
1242            const string &section = ".debug_frame")
filename_(filename)1243       : filename_(filename), section_(section) { }
~Reporter()1244   virtual ~Reporter() { }
1245 
1246   // The CFI entry at OFFSET ends too early to be well-formed. KIND
1247   // indicates what kind of entry it is; KIND can be kUnknown if we
1248   // haven't parsed enough of the entry to tell yet.
1249   virtual void Incomplete(uint64_t offset, CallFrameInfo::EntryKind kind);
1250 
1251   // The .eh_frame data has a four-byte zero at OFFSET where the next
1252   // entry's length would be; this is a terminator. However, the buffer
1253   // length as given to the CallFrameInfo constructor says there should be
1254   // more data.
1255   virtual void EarlyEHTerminator(uint64_t offset);
1256 
1257   // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the
1258   // section is not that large.
1259   virtual void CIEPointerOutOfRange(uint64_t offset, uint64_t cie_offset);
1260 
1261   // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry
1262   // there is not a CIE.
1263   virtual void BadCIEId(uint64_t offset, uint64_t cie_offset);
1264 
1265   // The FDE at OFFSET refers to a CIE with an address size we don't know how
1266   // to handle.
1267   virtual void UnexpectedAddressSize(uint64_t offset, uint8_t address_size);
1268 
1269   // The FDE at OFFSET refers to a CIE with an segment descriptor size we
1270   // don't know how to handle.
1271   virtual void UnexpectedSegmentSize(uint64_t offset, uint8_t segment_size);
1272 
1273   // The FDE at OFFSET refers to a CIE with version number VERSION,
1274   // which we don't recognize. We cannot parse DWARF CFI if it uses
1275   // a version number we don't recognize.
1276   virtual void UnrecognizedVersion(uint64_t offset, int version);
1277 
1278   // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION,
1279   // which we don't recognize. We cannot parse DWARF CFI if it uses
1280   // augmentations we don't recognize.
1281   virtual void UnrecognizedAugmentation(uint64_t offset,
1282                                         const string &augmentation);
1283 
1284   // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not
1285   // a valid encoding.
1286   virtual void InvalidPointerEncoding(uint64_t offset, uint8_t encoding);
1287 
1288   // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends
1289   // on a base address which has not been supplied.
1290   virtual void UnusablePointerEncoding(uint64_t offset, uint8_t encoding);
1291 
1292   // The CIE at OFFSET contains a DW_CFA_restore instruction at
1293   // INSN_OFFSET, which may not appear in a CIE.
1294   virtual void RestoreInCIE(uint64_t offset, uint64_t insn_offset);
1295 
1296   // The entry at OFFSET, of kind KIND, has an unrecognized
1297   // instruction at INSN_OFFSET.
1298   virtual void BadInstruction(uint64_t offset, CallFrameInfo::EntryKind kind,
1299                               uint64_t insn_offset);
1300 
1301   // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
1302   // KIND, establishes a rule that cites the CFA, but we have not
1303   // established a CFA rule yet.
1304   virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind,
1305                          uint64_t insn_offset);
1306 
1307   // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
1308   // KIND, is a DW_CFA_restore_state instruction, but the stack of
1309   // saved states is empty.
1310   virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind,
1311                                uint64_t insn_offset);
1312 
1313   // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry
1314   // at OFFSET, of kind KIND, would restore a state that has no CFA
1315   // rule, whereas the current state does have a CFA rule. This is
1316   // bogus input, which the CallFrameInfo::Handler interface doesn't
1317   // (and shouldn't) have any way to report.
1318   virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind,
1319                                uint64_t insn_offset);
1320 
1321  protected:
1322   // The name of the file whose CFI we're reading.
1323   string filename_;
1324 
1325   // The name of the CFI section in that file.
1326   string section_;
1327 };
1328 
1329 }  // namespace dwarf2reader
1330 
1331 #endif  // UTIL_DEBUGINFO_DWARF2READER_H__
1332