1 // dwarf_reader.h -- parse dwarf2/3 debug information for gold  -*- C++ -*-
2 
3 // Copyright (C) 2007-2014 Free Software Foundation, Inc.
4 // Written by Ian Lance Taylor <iant@google.com>.
5 
6 // This file is part of gold.
7 
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 3 of the License, or
11 // (at your option) any later version.
12 
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 // GNU General Public License for more details.
17 
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21 // MA 02110-1301, USA.
22 
23 #ifndef GOLD_DWARF_READER_H
24 #define GOLD_DWARF_READER_H
25 
26 #include <vector>
27 #include <map>
28 #include <limits.h>
29 #include <sys/types.h>
30 
31 #include "elfcpp.h"
32 #include "elfcpp_swap.h"
33 #include "dwarf.h"
34 #include "reloc.h"
35 
36 namespace gold
37 {
38 
39 class Dwarf_info_reader;
40 struct LineStateMachine;
41 
42 // This class is used to extract the section index and offset of
43 // the target of a relocation for a given offset within the section.
44 
45 class Elf_reloc_mapper
46 {
47  public:
Elf_reloc_mapper()48   Elf_reloc_mapper()
49   { }
50 
51   virtual
~Elf_reloc_mapper()52   ~Elf_reloc_mapper()
53   { }
54 
55   // Initialize the relocation tracker for section RELOC_SHNDX.
56   bool
initialize(unsigned int reloc_shndx,unsigned int reloc_type)57   initialize(unsigned int reloc_shndx, unsigned int reloc_type)
58   { return this->do_initialize(reloc_shndx, reloc_type); }
59 
60   // Return the next reloc_offset.
61   off_t
next_offset()62   next_offset()
63   { return this->do_next_offset(); }
64 
65   // Advance to the next relocation past OFFSET.
66   void
advance(off_t offset)67   advance(off_t offset)
68   { this->do_advance(offset); }
69 
70   // Return the section index and offset within the section of the target
71   // of the relocation for RELOC_OFFSET in the referring section.
72   unsigned int
get_reloc_target(off_t reloc_offset,off_t * target_offset)73   get_reloc_target(off_t reloc_offset, off_t* target_offset)
74   { return this->do_get_reloc_target(reloc_offset, target_offset); }
75 
76   // Checkpoint the current position in the reloc section.
77   uint64_t
checkpoint()78   checkpoint() const
79   { return this->do_checkpoint(); }
80 
81   // Reset the current position to the CHECKPOINT.
82   void
reset(uint64_t checkpoint)83   reset(uint64_t checkpoint)
84   { this->do_reset(checkpoint); }
85 
86  protected:
87   virtual bool
88   do_initialize(unsigned int, unsigned int) = 0;
89 
90   // Return the next reloc_offset.
91   virtual off_t
92   do_next_offset() = 0;
93 
94   // Advance to the next relocation past OFFSET.
95   virtual void
96   do_advance(off_t offset) = 0;
97 
98   virtual unsigned int
99   do_get_reloc_target(off_t reloc_offset, off_t* target_offset) = 0;
100 
101   // Checkpoint the current position in the reloc section.
102   virtual uint64_t
103   do_checkpoint() const = 0;
104 
105   // Reset the current position to the CHECKPOINT.
106   virtual void
107   do_reset(uint64_t checkpoint) = 0;
108 };
109 
110 template<int size, bool big_endian>
111 class Sized_elf_reloc_mapper : public Elf_reloc_mapper
112 {
113  public:
Sized_elf_reloc_mapper(Object * object,const unsigned char * symtab,off_t symtab_size)114   Sized_elf_reloc_mapper(Object* object, const unsigned char* symtab,
115 			 off_t symtab_size)
116     : object_(object), symtab_(symtab), symtab_size_(symtab_size),
117       reloc_type_(0), track_relocs_()
118   { }
119 
120  protected:
121   bool
122   do_initialize(unsigned int reloc_shndx, unsigned int reloc_type);
123 
124   // Return the next reloc_offset.
125   virtual off_t
do_next_offset()126   do_next_offset()
127   { return this->track_relocs_.next_offset(); }
128 
129   // Advance to the next relocation past OFFSET.
130   virtual void
do_advance(off_t offset)131   do_advance(off_t offset)
132   { this->track_relocs_.advance(offset); }
133 
134   unsigned int
135   do_get_reloc_target(off_t reloc_offset, off_t* target_offset);
136 
137   // Checkpoint the current position in the reloc section.
138   uint64_t
do_checkpoint()139   do_checkpoint() const
140   { return this->track_relocs_.checkpoint(); }
141 
142   // Reset the current position to the CHECKPOINT.
143   void
do_reset(uint64_t checkpoint)144   do_reset(uint64_t checkpoint)
145   { this->track_relocs_.reset(checkpoint); }
146 
147  private:
148   typedef typename elfcpp::Elf_types<size>::Elf_Addr Address;
149 
150   // Return the section index of symbol SYMNDX, and copy its value to *VALUE.
151   // Set *IS_ORDINARY true if the section index is an ordinary section index.
152   unsigned int
153   symbol_section(unsigned int symndx, Address* value, bool* is_ordinary);
154 
155   // The object file.
156   Object* object_;
157   // The ELF symbol table.
158   const unsigned char* symtab_;
159   // The size of the ELF symbol table.
160   off_t symtab_size_;
161   // Type of the relocation section (SHT_REL or SHT_RELA).
162   unsigned int reloc_type_;
163   // Relocations for the referring section.
164   Track_relocs<size, big_endian> track_relocs_;
165 };
166 
167 // This class is used to read the abbreviations table from the
168 // .debug_abbrev section of the object file.
169 
170 class Dwarf_abbrev_table
171 {
172  public:
173   // An attribute list entry.
174   struct Attribute
175   {
AttributeAttribute176     Attribute(unsigned int a, unsigned int f)
177       : attr(a), form(f)
178     { }
179     unsigned int attr;
180     unsigned int form;
181   };
182 
183   // An abbrev code entry.
184   struct Abbrev_code
185   {
Abbrev_codeAbbrev_code186     Abbrev_code(unsigned int t, bool hc)
187       : tag(t), has_children(hc), has_sibling_attribute(false), attributes()
188     {
189       this->attributes.reserve(10);
190     }
191 
192     void
add_attributeAbbrev_code193     add_attribute(unsigned int attr, unsigned int form)
194     {
195       this->attributes.push_back(Attribute(attr, form));
196     }
197 
198     // The DWARF tag.
199     unsigned int tag;
200     // True if the DIE has children.
201     bool has_children : 1;
202     // True if the DIE has a sibling attribute.
203     bool has_sibling_attribute : 1;
204     // The list of attributes and forms.
205     std::vector<Attribute> attributes;
206   };
207 
Dwarf_abbrev_table()208   Dwarf_abbrev_table()
209     : abbrev_shndx_(0), abbrev_offset_(0), buffer_(NULL), buffer_end_(NULL),
210       owns_buffer_(false), buffer_pos_(NULL), high_abbrev_codes_()
211   {
212     memset(this->low_abbrev_codes_, 0, sizeof(this->low_abbrev_codes_));
213   }
214 
~Dwarf_abbrev_table()215   ~Dwarf_abbrev_table()
216   {
217     if (this->owns_buffer_ && this->buffer_ != NULL)
218       delete[] this->buffer_;
219     this->clear_abbrev_codes();
220   }
221 
222   // Read the abbrev table from an object file.
223   bool
read_abbrevs(Relobj * object,unsigned int abbrev_shndx,off_t abbrev_offset)224   read_abbrevs(Relobj* object,
225 	       unsigned int abbrev_shndx,
226 	       off_t abbrev_offset)
227   {
228     // If we've already read this abbrev table, return immediately.
229     if (this->abbrev_shndx_ > 0
230 	&& this->abbrev_shndx_ == abbrev_shndx
231 	&& this->abbrev_offset_ == abbrev_offset)
232       return true;
233     return this->do_read_abbrevs(object, abbrev_shndx, abbrev_offset);
234   }
235 
236   // Return the abbrev code entry for CODE.  This is a fast path for
237   // abbrev codes that are in the direct lookup table.  If not found
238   // there, we call do_get_abbrev() to do the hard work.
239   const Abbrev_code*
get_abbrev(unsigned int code)240   get_abbrev(unsigned int code)
241   {
242     if (code < this->low_abbrev_code_max_
243 	&& this->low_abbrev_codes_[code] != NULL)
244       return this->low_abbrev_codes_[code];
245     return this->do_get_abbrev(code);
246   }
247 
248  private:
249   // Read the abbrev table from an object file.
250   bool
251   do_read_abbrevs(Relobj* object,
252 		  unsigned int abbrev_shndx,
253 		  off_t abbrev_offset);
254 
255   // Lookup the abbrev code entry for CODE.
256   const Abbrev_code*
257   do_get_abbrev(unsigned int code);
258 
259   // Store an abbrev code entry for CODE.
260   void
store_abbrev(unsigned int code,const Abbrev_code * entry)261   store_abbrev(unsigned int code, const Abbrev_code* entry)
262   {
263     if (code < this->low_abbrev_code_max_)
264       this->low_abbrev_codes_[code] = entry;
265     else
266       this->high_abbrev_codes_[code] = entry;
267   }
268 
269   // Clear the abbrev code table and release the memory it uses.
270   void
271   clear_abbrev_codes();
272 
273   typedef Unordered_map<unsigned int, const Abbrev_code*> Abbrev_code_table;
274 
275   // The section index of the current abbrev table.
276   unsigned int abbrev_shndx_;
277   // The offset within the section of the current abbrev table.
278   off_t abbrev_offset_;
279   // The buffer containing the .debug_abbrev section.
280   const unsigned char* buffer_;
281   const unsigned char* buffer_end_;
282   // True if this object owns the buffer and needs to delete it.
283   bool owns_buffer_;
284   // Pointer to the current position in the buffer.
285   const unsigned char* buffer_pos_;
286   // The table of abbrev codes.
287   // We use a direct-lookup array for low abbrev codes,
288   // and store the rest in a hash table.
289   static const unsigned int low_abbrev_code_max_ = 256;
290   const Abbrev_code* low_abbrev_codes_[low_abbrev_code_max_];
291   Abbrev_code_table high_abbrev_codes_;
292 };
293 
294 // A DWARF range list.  The start and end offsets are relative
295 // to the input section SHNDX.  Each range must lie entirely
296 // within a single section.
297 
298 class Dwarf_range_list
299 {
300  public:
301   struct Range
302   {
RangeRange303     Range(unsigned int a_shndx, off_t a_start, off_t a_end)
304       : shndx(a_shndx), start(a_start), end(a_end)
305     { }
306 
307     unsigned int shndx;
308     off_t start;
309     off_t end;
310   };
311 
Dwarf_range_list()312   Dwarf_range_list()
313     : range_list_()
314   { }
315 
316   void
add(unsigned int shndx,off_t start,off_t end)317   add(unsigned int shndx, off_t start, off_t end)
318   { this->range_list_.push_back(Range(shndx, start, end)); }
319 
320   size_t
size()321   size() const
322   { return this->range_list_.size(); }
323 
324   const Range&
325   operator[](off_t i) const
326   { return this->range_list_[i]; }
327 
328  private:
329   std::vector<Range> range_list_;
330 };
331 
332 // This class is used to read the ranges table from the
333 // .debug_ranges section of the object file.
334 
335 class Dwarf_ranges_table
336 {
337  public:
Dwarf_ranges_table(Dwarf_info_reader * dwinfo)338   Dwarf_ranges_table(Dwarf_info_reader* dwinfo)
339     : dwinfo_(dwinfo), ranges_shndx_(0), ranges_buffer_(NULL),
340       ranges_buffer_end_(NULL), owns_ranges_buffer_(false),
341       ranges_reloc_mapper_(NULL), reloc_type_(0), output_section_offset_(0)
342   { }
343 
~Dwarf_ranges_table()344   ~Dwarf_ranges_table()
345   {
346     if (this->owns_ranges_buffer_ && this->ranges_buffer_ != NULL)
347       delete[] this->ranges_buffer_;
348     if (this->ranges_reloc_mapper_ != NULL)
349       delete this->ranges_reloc_mapper_;
350   }
351 
352   // Read the ranges table from an object file.
353   bool
354   read_ranges_table(Relobj* object,
355 		    const unsigned char* symtab,
356 		    off_t symtab_size,
357 		    unsigned int ranges_shndx);
358 
359   // Read the range table from an object file.
360   Dwarf_range_list*
361   read_range_list(Relobj* object,
362 		  const unsigned char* symtab,
363 		  off_t symtab_size,
364 		  unsigned int address_size,
365 		  unsigned int ranges_shndx,
366 		  off_t ranges_offset);
367 
368   // Look for a relocation at offset OFF in the range table,
369   // and return the section index and offset of the target.
370   unsigned int
371   lookup_reloc(off_t off, off_t* target_off);
372 
373  private:
374   // The Dwarf_info_reader, for reading data.
375   Dwarf_info_reader* dwinfo_;
376   // The section index of the ranges table.
377   unsigned int ranges_shndx_;
378   // The buffer containing the .debug_ranges section.
379   const unsigned char* ranges_buffer_;
380   const unsigned char* ranges_buffer_end_;
381   // True if this object owns the buffer and needs to delete it.
382   bool owns_ranges_buffer_;
383   // Relocation mapper for the .debug_ranges section.
384   Elf_reloc_mapper* ranges_reloc_mapper_;
385   // Type of the relocation section (SHT_REL or SHT_RELA).
386   unsigned int reloc_type_;
387   // For incremental update links, this will hold the offset of the
388   // input section within the output section.  Offsets read from
389   // relocated data will be relative to the output section, and need
390   // to be corrected before reading data from the input section.
391   uint64_t output_section_offset_;
392 };
393 
394 // This class is used to read the pubnames and pubtypes tables from the
395 // .debug_pubnames and .debug_pubtypes sections of the object file.
396 
397 class Dwarf_pubnames_table
398 {
399  public:
Dwarf_pubnames_table(Dwarf_info_reader * dwinfo,bool is_pubtypes)400   Dwarf_pubnames_table(Dwarf_info_reader* dwinfo, bool is_pubtypes)
401     : dwinfo_(dwinfo), buffer_(NULL), buffer_end_(NULL), owns_buffer_(false),
402       offset_size_(0), pinfo_(NULL), end_of_table_(NULL),
403       is_pubtypes_(is_pubtypes), is_gnu_style_(false),
404       unit_length_(0), cu_offset_(0)
405   { }
406 
~Dwarf_pubnames_table()407   ~Dwarf_pubnames_table()
408   {
409     if (this->owns_buffer_ && this->buffer_ != NULL)
410       delete[] this->buffer_;
411   }
412 
413   // Read the pubnames section from the object file, using the symbol
414   // table for relocating it.
415   bool
416   read_section(Relobj* object, const unsigned char* symbol_table,
417                off_t symtab_size);
418 
419   // Read the header for the set at OFFSET.
420   bool
421   read_header(off_t offset);
422 
423   // Return the offset to the cu within the info or types section.
424   off_t
cu_offset()425   cu_offset()
426   { return this->cu_offset_; }
427 
428   // Return the size of this subsection of the table.  The unit length
429   // doesn't include the size of its own field.
430   off_t
subsection_size()431   subsection_size()
432   { return this->unit_length_; }
433 
434   // Read the next name from the set.  If the pubname table is gnu-style,
435   // FLAG_BYTE is set to the high-byte of a gdb_index version 7 cu_index.
436   const char*
437   next_name(uint8_t* flag_byte);
438 
439  private:
440   // The Dwarf_info_reader, for reading data.
441   Dwarf_info_reader* dwinfo_;
442   // The buffer containing the .debug_ranges section.
443   const unsigned char* buffer_;
444   const unsigned char* buffer_end_;
445   // True if this object owns the buffer and needs to delete it.
446   bool owns_buffer_;
447   // The size of a DWARF offset for the current set.
448   unsigned int offset_size_;
449   // The current position within the buffer.
450   const unsigned char* pinfo_;
451   // The end of the current pubnames table.
452   const unsigned char* end_of_table_;
453   // TRUE if this is a .debug_pubtypes section.
454   bool is_pubtypes_;
455   // Gnu-style pubnames table. This style has an extra flag byte between the
456   // offset and the name, and is used for generating version 7 of gdb-index.
457   bool is_gnu_style_;
458   // Fields read from the header.
459   uint64_t unit_length_;
460   off_t cu_offset_;
461 
462   // Track relocations for this table so we can find the CUs that
463   // correspond to the subsections.
464   Elf_reloc_mapper* reloc_mapper_;
465   // Type of the relocation section (SHT_REL or SHT_RELA).
466   unsigned int reloc_type_;
467 };
468 
469 // This class represents a DWARF Debug Info Entry (DIE).
470 
471 class Dwarf_die
472 {
473  public:
474   // An attribute value.
475   struct Attribute_value
476   {
477     unsigned int attr;
478     unsigned int form;
479     union
480     {
481       int64_t intval;
482       uint64_t uintval;
483       const char* stringval;
484       const unsigned char* blockval;
485       off_t refval;
486     } val;
487     union
488     {
489       // Section index for reference forms.
490       unsigned int shndx;
491       // Block length for block forms.
492       unsigned int blocklen;
493       // Attribute offset for DW_FORM_strp.
494       unsigned int attr_off;
495     } aux;
496   };
497 
498   // A list of attribute values.
499   typedef std::vector<Attribute_value> Attributes;
500 
501   Dwarf_die(Dwarf_info_reader* dwinfo,
502 	    off_t die_offset,
503 	    Dwarf_die* parent);
504 
505   // Return the DWARF tag for this DIE.
506   unsigned int
tag()507   tag() const
508   {
509     if (this->abbrev_code_ == NULL)
510       return 0;
511     return this->abbrev_code_->tag;
512   }
513 
514   // Return true if this DIE has children.
515   bool
has_children()516   has_children() const
517   {
518     gold_assert(this->abbrev_code_ != NULL);
519     return this->abbrev_code_->has_children;
520   }
521 
522   // Return true if this DIE has a sibling attribute.
523   bool
has_sibling_attribute()524   has_sibling_attribute() const
525   {
526     gold_assert(this->abbrev_code_ != NULL);
527     return this->abbrev_code_->has_sibling_attribute;
528   }
529 
530   // Return the value of attribute ATTR.
531   const Attribute_value*
532   attribute(unsigned int attr);
533 
534   // Return the value of the DW_AT_name attribute.
535   const char*
name()536   name()
537   {
538     if (this->name_ == NULL)
539       this->set_name();
540     return this->name_;
541   }
542 
543   // Return the value of the DW_AT_linkage_name
544   // or DW_AT_MIPS_linkage_name attribute.
545   const char*
linkage_name()546   linkage_name()
547   {
548     if (this->linkage_name_ == NULL)
549       this->set_linkage_name();
550     return this->linkage_name_;
551   }
552 
553   // Return the value of the DW_AT_specification attribute.
554   off_t
specification()555   specification()
556   {
557     if (!this->attributes_read_)
558       this->read_attributes();
559     return this->specification_;
560   }
561 
562   // Return the value of the DW_AT_abstract_origin attribute.
563   off_t
abstract_origin()564   abstract_origin()
565   {
566     if (!this->attributes_read_)
567       this->read_attributes();
568     return this->abstract_origin_;
569   }
570 
571   // Return the value of attribute ATTR as a string.
572   const char*
573   string_attribute(unsigned int attr);
574 
575   // Return the value of attribute ATTR as an integer.
576   int64_t
577   int_attribute(unsigned int attr);
578 
579   // Return the value of attribute ATTR as an unsigned integer.
580   uint64_t
581   uint_attribute(unsigned int attr);
582 
583   // Return the value of attribute ATTR as a reference.
584   off_t
585   ref_attribute(unsigned int attr, unsigned int* shndx);
586 
587   // Return the value of attribute ATTR as a address.
588   off_t
589   address_attribute(unsigned int attr, unsigned int* shndx);
590 
591   // Return the value of attribute ATTR as a flag.
592   bool
flag_attribute(unsigned int attr)593   flag_attribute(unsigned int attr)
594   { return this->int_attribute(attr) != 0; }
595 
596   // Return true if this DIE is a declaration.
597   bool
is_declaration()598   is_declaration()
599   { return this->flag_attribute(elfcpp::DW_AT_declaration); }
600 
601   // Return the parent of this DIE.
602   Dwarf_die*
parent()603   parent() const
604   { return this->parent_; }
605 
606   // Return the offset of this DIE.
607   off_t
offset()608   offset() const
609   { return this->die_offset_; }
610 
611   // Return the offset of this DIE's first child.
612   off_t
613   child_offset();
614 
615   // Set the offset of this DIE's next sibling.
616   void
set_sibling_offset(off_t sibling_offset)617   set_sibling_offset(off_t sibling_offset)
618   { this->sibling_offset_ = sibling_offset; }
619 
620   // Return the offset of this DIE's next sibling.
621   off_t
622   sibling_offset();
623 
624  private:
625   typedef Dwarf_abbrev_table::Abbrev_code Abbrev_code;
626 
627   // Read all the attributes of the DIE.
628   bool
629   read_attributes();
630 
631   // Set the name of the DIE if present.
632   void
633   set_name();
634 
635   // Set the linkage name if present.
636   void
637   set_linkage_name();
638 
639   // Skip all the attributes of the DIE and return the offset
640   // of the next DIE.
641   off_t
642   skip_attributes();
643 
644   // The Dwarf_info_reader, for reading attributes.
645   Dwarf_info_reader* dwinfo_;
646   // The parent of this DIE.
647   Dwarf_die* parent_;
648   // Offset of this DIE within its compilation unit.
649   off_t die_offset_;
650   // Offset of the first attribute, relative to the beginning of the DIE.
651   off_t attr_offset_;
652   // Offset of the first child, relative to the compilation unit.
653   off_t child_offset_;
654   // Offset of the next sibling, relative to the compilation unit.
655   off_t sibling_offset_;
656   // The abbreviation table entry.
657   const Abbrev_code* abbrev_code_;
658   // The list of attributes.
659   Attributes attributes_;
660   // True if the attributes have been read.
661   bool attributes_read_;
662   // The following fields hold common attributes to avoid a linear
663   // search through the attribute list.
664   // The DIE name (DW_AT_name).
665   const char* name_;
666   // Offset of the name in the string table (for DW_FORM_strp).
667   off_t name_off_;
668   // The linkage name (DW_AT_linkage_name or DW_AT_MIPS_linkage_name).
669   const char* linkage_name_;
670   // Offset of the linkage name in the string table (for DW_FORM_strp).
671   off_t linkage_name_off_;
672   // Section index of the string table (for DW_FORM_strp).
673   unsigned int string_shndx_;
674   // The value of a DW_AT_specification attribute.
675   off_t specification_;
676   // The value of a DW_AT_abstract_origin attribute.
677   off_t abstract_origin_;
678 };
679 
680 // This class is used to read the debug info from the .debug_info
681 // or .debug_types sections.  This is a base class that implements
682 // the generic parsing of the compilation unit header and DIE
683 // structure.  The parse() method parses the entire section, and
684 // calls the various visit_xxx() methods for each header.  Clients
685 // should derive a new class from this one and implement the
686 // visit_compilation_unit() and visit_type_unit() functions.
687 
688 class Dwarf_info_reader
689 {
690  public:
Dwarf_info_reader(bool is_type_unit,Relobj * object,const unsigned char * symtab,off_t symtab_size,unsigned int shndx,unsigned int reloc_shndx,unsigned int reloc_type)691   Dwarf_info_reader(bool is_type_unit,
692 		    Relobj* object,
693 		    const unsigned char* symtab,
694 		    off_t symtab_size,
695 		    unsigned int shndx,
696 		    unsigned int reloc_shndx,
697 		    unsigned int reloc_type)
698     : is_type_unit_(is_type_unit), object_(object), symtab_(symtab),
699       symtab_size_(symtab_size), shndx_(shndx), reloc_shndx_(reloc_shndx),
700       reloc_type_(reloc_type), abbrev_shndx_(0), string_shndx_(0),
701       buffer_(NULL), buffer_end_(NULL), cu_offset_(0), cu_length_(0),
702       offset_size_(0), address_size_(0), cu_version_(0),
703       abbrev_table_(), ranges_table_(this),
704       reloc_mapper_(NULL), string_buffer_(NULL), string_buffer_end_(NULL),
705       owns_string_buffer_(false), string_output_section_offset_(0)
706   { }
707 
708   virtual
~Dwarf_info_reader()709   ~Dwarf_info_reader()
710   {
711     if (this->reloc_mapper_ != NULL)
712       delete this->reloc_mapper_;
713     if (this->owns_string_buffer_ && this->string_buffer_ != NULL)
714       delete[] this->string_buffer_;
715   }
716 
717   // Begin parsing the debug info.  This calls visit_compilation_unit()
718   // or visit_type_unit() for each compilation or type unit found in the
719   // section, and visit_die() for each top-level DIE.
720   void
721   parse();
722 
723   // Return the abbrev code entry for a CODE.
724   const Dwarf_abbrev_table::Abbrev_code*
get_abbrev(unsigned int code)725   get_abbrev(unsigned int code)
726   { return this->abbrev_table_.get_abbrev(code); }
727 
728   // Return a pointer to the DWARF info buffer at OFFSET.
729   const unsigned char*
buffer_at_offset(off_t offset)730   buffer_at_offset(off_t offset) const
731   {
732     const unsigned char* p = this->buffer_ + this->cu_offset_ + offset;
733     if (this->check_buffer(p + 1))
734       return p;
735     return NULL;
736   }
737 
738   // Read a possibly unaligned integer of SIZE.
739   template <int valsize>
740   inline typename elfcpp::Valtype_base<valsize>::Valtype
741   read_from_pointer(const unsigned char* source);
742 
743   // Read a possibly unaligned integer of SIZE.  Update SOURCE after read.
744   template <int valsize>
745   inline typename elfcpp::Valtype_base<valsize>::Valtype
746   read_from_pointer(const unsigned char** source);
747 
748   // Look for a relocation at offset ATTR_OFF in the dwarf info,
749   // and return the section index and offset of the target.
750   unsigned int
751   lookup_reloc(off_t attr_off, off_t* target_off);
752 
753   // Return a string from the DWARF string table.
754   const char*
755   get_string(off_t str_off, unsigned int string_shndx);
756 
757   // Return the size of a DWARF offset.
758   unsigned int
offset_size()759   offset_size() const
760   { return this->offset_size_; }
761 
762   // Return the size of an address.
763   unsigned int
address_size()764   address_size() const
765   { return this->address_size_; }
766 
767   // Set the section index of the .debug_abbrev section.
768   // We use this if there are no relocations for the .debug_info section.
769   // If not set, the code parse() routine will search for the section by name.
770   void
set_abbrev_shndx(unsigned int abbrev_shndx)771   set_abbrev_shndx(unsigned int abbrev_shndx)
772   { this->abbrev_shndx_ = abbrev_shndx; }
773 
774   // Return a pointer to the object file's ELF symbol table.
775   const unsigned char*
symtab()776   symtab() const
777   { return this->symtab_; }
778 
779   // Return the size of the object file's ELF symbol table.
780   off_t
symtab_size()781   symtab_size() const
782   { return this->symtab_size_; }
783 
784   // Return the offset of the current compilation unit.
785   off_t
cu_offset()786   cu_offset() const
787   { return this->cu_offset_; }
788 
789  protected:
790   // Begin parsing the debug info.  This calls visit_compilation_unit()
791   // or visit_type_unit() for each compilation or type unit found in the
792   // section, and visit_die() for each top-level DIE.
793   template<bool big_endian>
794   void
795   do_parse();
796 
797   // The following methods are hooks that are meant to be implemented
798   // by a derived class.  A default, do-nothing, implementation of
799   // each is provided for this base class.
800 
801   // Visit a compilation unit.
802   virtual void
803   visit_compilation_unit(off_t cu_offset, off_t cu_length, Dwarf_die* root_die);
804 
805   // Visit a type unit.
806   virtual void
807   visit_type_unit(off_t tu_offset, off_t tu_length, off_t type_offset,
808 		  uint64_t signature, Dwarf_die* root_die);
809 
810   // Read the range table.
811   Dwarf_range_list*
read_range_list(unsigned int ranges_shndx,off_t ranges_offset)812   read_range_list(unsigned int ranges_shndx, off_t ranges_offset)
813   {
814     return this->ranges_table_.read_range_list(this->object_,
815 					       this->symtab_,
816 					       this->symtab_size_,
817 					       this->address_size_,
818 					       ranges_shndx,
819 					       ranges_offset);
820   }
821 
822   // Return the object.
823   Relobj*
object()824   object() const
825   { return this->object_; }
826 
827   // Checkpoint the relocation tracker.
828   uint64_t
get_reloc_checkpoint()829   get_reloc_checkpoint() const
830   { return this->reloc_mapper_->checkpoint(); }
831 
832   // Reset the relocation tracker to the CHECKPOINT.
833   void
reset_relocs(uint64_t checkpoint)834   reset_relocs(uint64_t checkpoint)
835   { this->reloc_mapper_->reset(checkpoint); }
836 
837  private:
838   // Print a warning about a corrupt debug section.
839   void
840   warn_corrupt_debug_section() const;
841 
842   // Check that P is within the bounds of the current section.
843   bool
check_buffer(const unsigned char * p)844   check_buffer(const unsigned char* p) const
845   {
846     if (p > this->buffer_ + this->cu_offset_ + this->cu_length_)
847       {
848 	this->warn_corrupt_debug_section();
849 	return false;
850       }
851     return true;
852   }
853 
854   // Read the DWARF string table.
855   bool
read_string_table(unsigned int string_shndx)856   read_string_table(unsigned int string_shndx)
857   {
858     // If we've already read this string table, return immediately.
859     if (this->string_shndx_ > 0 && this->string_shndx_ == string_shndx)
860       return true;
861     if (string_shndx == 0 && this->string_shndx_ > 0)
862       return true;
863     return this->do_read_string_table(string_shndx);
864   }
865 
866   bool
867   do_read_string_table(unsigned int string_shndx);
868 
869   // True if this is a type unit; false for a compilation unit.
870   bool is_type_unit_;
871   // The object containing the .debug_info or .debug_types input section.
872   Relobj* object_;
873   // The ELF symbol table.
874   const unsigned char* symtab_;
875   // The size of the ELF symbol table.
876   off_t symtab_size_;
877   // Index of the .debug_info or .debug_types section.
878   unsigned int shndx_;
879   // Index of the relocation section.
880   unsigned int reloc_shndx_;
881   // Type of the relocation section (SHT_REL or SHT_RELA).
882   unsigned int reloc_type_;
883   // Index of the .debug_abbrev section (0 if not known).
884   unsigned int abbrev_shndx_;
885   // Index of the .debug_str section.
886   unsigned int string_shndx_;
887   // The buffer for the debug info.
888   const unsigned char* buffer_;
889   const unsigned char* buffer_end_;
890   // Offset of the current compilation unit.
891   off_t cu_offset_;
892   // Length of the current compilation unit.
893   off_t cu_length_;
894   // Size of a DWARF offset for the current compilation unit.
895   unsigned int offset_size_;
896   // Size of an address for the target architecture.
897   unsigned int address_size_;
898   // Compilation unit version number.
899   unsigned int cu_version_;
900   // Abbreviations table for current compilation unit.
901   Dwarf_abbrev_table abbrev_table_;
902   // Ranges table for the current compilation unit.
903   Dwarf_ranges_table ranges_table_;
904   // Relocation mapper for the section.
905   Elf_reloc_mapper* reloc_mapper_;
906   // The buffer for the debug string table.
907   const char* string_buffer_;
908   const char* string_buffer_end_;
909   // True if this object owns the buffer and needs to delete it.
910   bool owns_string_buffer_;
911   // For incremental update links, this will hold the offset of the
912   // input .debug_str section within the output section.  Offsets read
913   // from relocated data will be relative to the output section, and need
914   // to be corrected before reading data from the input section.
915   uint64_t string_output_section_offset_;
916 };
917 
918 // We can't do better than to keep the offsets in a sorted vector.
919 // Here, offset is the key, and file_num/line_num is the value.
920 struct Offset_to_lineno_entry
921 {
922   off_t offset;
923   int header_num;  // which file-list to use (i.e. which .o file are we in)
924   // A pointer into files_.
925   unsigned int file_num : sizeof(int) * CHAR_BIT - 1;
926   // True if this was the last entry for the current offset, meaning
927   // it's the line that actually applies.
928   unsigned int last_line_for_offset : 1;
929   // The line number in the source file.  -1 to indicate end-of-function.
930   int line_num;
931 
932   // This sorts by offsets first, and then puts the correct line to
933   // report for a given offset at the beginning of the run of equal
934   // offsets (so that asking for 1 line gives the best answer).  This
935   // is not a total ordering.
936   bool operator<(const Offset_to_lineno_entry& that) const
937   {
938     if (this->offset != that.offset)
939       return this->offset < that.offset;
940     // Note the '>' which makes this sort 'true' first.
941     return this->last_line_for_offset > that.last_line_for_offset;
942   }
943 };
944 
945 // This class is used to read the line information from the debugging
946 // section of an object file.
947 
948 class Dwarf_line_info
949 {
950  public:
Dwarf_line_info()951   Dwarf_line_info()
952   { }
953 
954   virtual
~Dwarf_line_info()955   ~Dwarf_line_info()
956   { }
957 
958   // Given a section number and an offset, returns the associated
959   // file and line-number, as a string: "file:lineno".  If unable
960   // to do the mapping, returns the empty string.  You must call
961   // read_line_mappings() before calling this function.  If
962   // 'other_lines' is non-NULL, fills that in with other line
963   // numbers assigned to the same offset.
964   std::string
addr2line(unsigned int shndx,off_t offset,std::vector<std::string> * other_lines)965   addr2line(unsigned int shndx, off_t offset,
966             std::vector<std::string>* other_lines)
967   { return this->do_addr2line(shndx, offset, other_lines); }
968 
969   // A helper function for a single addr2line lookup.  It also keeps a
970   // cache of the last CACHE_SIZE Dwarf_line_info objects it created;
971   // set to 0 not to cache at all.  The larger CACHE_SIZE is, the more
972   // chance this routine won't have to re-create a Dwarf_line_info
973   // object for its addr2line computation; such creations are slow.
974   // NOTE: Not thread-safe, so only call from one thread at a time.
975   static std::string
976   one_addr2line(Object* object, unsigned int shndx, off_t offset,
977                 size_t cache_size, std::vector<std::string>* other_lines);
978 
979   // This reclaims all the memory that one_addr2line may have cached.
980   // Use this when you know you will not be calling one_addr2line again.
981   static void
982   clear_addr2line_cache();
983 
984  private:
985   virtual std::string
986   do_addr2line(unsigned int shndx, off_t offset,
987                std::vector<std::string>* other_lines) = 0;
988 };
989 
990 template<int size, bool big_endian>
991 class Sized_dwarf_line_info : public Dwarf_line_info
992 {
993  public:
994   // Initializes a .debug_line reader for a given object file.
995   // If SHNDX is specified and non-negative, only read the debug
996   // information that pertains to the specified section.
997   Sized_dwarf_line_info(Object* object, unsigned int read_shndx = -1U);
998 
999   virtual
~Sized_dwarf_line_info()1000   ~Sized_dwarf_line_info()
1001   {
1002     if (this->buffer_start_ != NULL)
1003       delete[] this->buffer_start_;
1004     if (this->str_buffer_start_ != NULL)
1005       delete[] this->str_buffer_start_;
1006   }
1007 
1008  private:
1009   const static int DWARF5_EXPERIMENTAL_LINE_TABLE = 0xf006;
1010 
1011   std::string
1012   do_addr2line(unsigned int shndx, off_t offset,
1013                std::vector<std::string>* other_lines);
1014 
1015   // Formats a file and line number to a string like "dirname/filename:lineno".
1016   std::string
1017   format_file_lineno(const Offset_to_lineno_entry& lineno) const;
1018 
1019   // Start processing line info, and populates the offset_map_.
1020   // If SHNDX is non-negative, only store debug information that
1021   // pertains to the specified section.
1022   void
1023   read_line_mappings(unsigned int shndx);
1024 
1025   // Reads the relocation section associated with .debug_line and
1026   // stores relocation information in reloc_map_.
1027   void
1028   read_relocs();
1029 
1030   // Reads the DWARF2/3 header for this line info.  Each takes as input
1031   // a starting buffer position, and returns the ending position.
1032   const unsigned char*
1033   read_header_prolog(const unsigned char* lineptr);
1034 
1035   const unsigned char*
1036   read_header_tables(const unsigned char* lineptr);
1037 
1038   const unsigned char*
1039   read_header_tables_v5(const unsigned char* lineptr);
1040 
1041   // Reads the DWARF2/3 line information.  If shndx is non-negative,
1042   // discard all line information that doesn't pertain to the given
1043   // section.
1044   const unsigned char*
1045   read_lines(const unsigned char* lineptr, const unsigned char* endptr,
1046 	     std::vector<LineStateMachine>* logicals,
1047 	     bool is_logicals_table, bool is_actuals_table,
1048 	     unsigned int shndx);
1049 
1050   // Process a single line info opcode at START using the state
1051   // machine at LSM.  Return true if we should define a line using the
1052   // current state of the line state machine.  Place the length of the
1053   // opcode in LEN.
1054   bool
1055   process_one_opcode(const unsigned char* start,
1056                      struct LineStateMachine* lsm, size_t* len,
1057                      std::vector<LineStateMachine>* logicals,
1058 		     bool is_logicals_table, bool is_actuals_table);
1059 
1060   // Some parts of processing differ depending on whether the input
1061   // was a .o file or not.
1062   bool input_is_relobj();
1063 
1064   // If we saw anything amiss while parsing, we set this to false.
1065   // Then addr2line will always fail (rather than return possibly-
1066   // corrupt data).
1067   bool data_valid_;
1068 
1069   // A DWARF2/3 line info header.  This is not the same size as in the
1070   // actual file, as the one in the file may have a 32 bit or 64 bit
1071   // lengths.
1072 
1073   struct Dwarf_line_infoHeader
1074   {
1075     off_t total_length;
1076     int version;
1077     off_t prologue_length;
1078     int min_insn_length; // insn stands for instructin
1079     int max_ops_per_insn;
1080     bool default_is_stmt; // stmt stands for statement
1081     signed char line_base;
1082     int line_range;
1083     unsigned char opcode_base;
1084     std::vector<unsigned char> std_opcode_lengths;
1085     int offset_size;
1086   } header_;
1087 
1088   // buffer is the buffer for our line info, starting at exactly where
1089   // the line info to read is.
1090   const unsigned char* buffer_;
1091   const unsigned char* buffer_end_;
1092   // If the buffer was allocated temporarily, and therefore must be
1093   // deallocated in the dtor, this contains a pointer to the start
1094   // of the buffer.
1095   const unsigned char* buffer_start_;
1096 
1097   // buffer is the buffer for our line info, starting at exactly where
1098   // the line info to read is.
1099   const unsigned char* str_buffer_;
1100   const unsigned char* str_buffer_end_;
1101   // If the buffer was allocated temporarily, and therefore must be
1102   // deallocated in the dtor, this contains a pointer to the start
1103   // of the buffer.
1104   const unsigned char* str_buffer_start_;
1105 
1106   // Pointer to the end of the header_length field (aka prologue_length).
1107   // The offsets to the line number programs are relative to this point.
1108   const unsigned char* end_of_header_length_;
1109 
1110   // Pointers to the start of the line number programs.
1111   const unsigned char* logicals_start_;
1112   const unsigned char* actuals_start_;
1113 
1114   // Pointer to the end of the current compilation unit.
1115   const unsigned char* end_of_unit_;
1116 
1117   // This has relocations that point into buffer.
1118   Sized_elf_reloc_mapper<size, big_endian>* reloc_mapper_;
1119   // The type of the reloc section in track_relocs_--SHT_REL or SHT_RELA.
1120   unsigned int track_relocs_type_;
1121 
1122   // This is used to figure out what section to apply a relocation to.
1123   const unsigned char* symtab_buffer_;
1124   section_size_type symtab_buffer_size_;
1125 
1126   // Holds the directories and files as we see them.  We have an array
1127   // of directory-lists, one for each .o file we're reading (usually
1128   // there will just be one, but there may be more if input is a .so).
1129   std::vector<std::vector<std::string> > directories_;
1130   // The first part is an index into directories_, the second the filename.
1131   std::vector<std::vector< std::pair<int, std::string> > > files_;
1132 
1133   // An index into the current directories_ and files_ vectors.
1134   int current_header_index_;
1135 
1136   // A sorted map from offset of the relocation target to the shndx
1137   // and addend for the relocation.
1138   typedef std::map<off_t, std::pair<unsigned int, off_t> >
1139   Reloc_map;
1140   Reloc_map reloc_map_;
1141 
1142   // We have a vector of offset->lineno entries for every input section.
1143   typedef Unordered_map<unsigned int, std::vector<Offset_to_lineno_entry> >
1144   Lineno_map;
1145 
1146   Lineno_map line_number_map_;
1147 };
1148 
1149 } // End namespace gold.
1150 
1151 #endif // !defined(GOLD_DWARF_READER_H)
1152