1 // Copyright (c) 2010 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
30 
31 // Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
32 // and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
33 
34 #include "common/dwarf/dwarf2reader.h"
35 
36 #include <assert.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <string.h>
40 
41 #include <map>
42 #include <memory>
43 #include <stack>
44 #include <string>
45 #include <utility>
46 
47 #include <sys/stat.h>
48 
49 #include "common/dwarf/bytereader-inl.h"
50 #include "common/dwarf/bytereader.h"
51 #include "common/dwarf/line_state_machine.h"
52 #include "common/using_std_string.h"
53 #include "google_breakpad/common/breakpad_types.h"
54 
55 namespace dwarf2reader {
56 
CompilationUnit(const string & path,const SectionMap & sections,uint64_t offset,ByteReader * reader,Dwarf2Handler * handler)57 CompilationUnit::CompilationUnit(const string& path,
58                                  const SectionMap& sections, uint64_t offset,
59                                  ByteReader* reader, Dwarf2Handler* handler)
60     : path_(path), offset_from_section_start_(offset), reader_(reader),
61       sections_(sections), handler_(handler), abbrevs_(),
62       string_buffer_(NULL), string_buffer_length_(0),
63       str_offsets_buffer_(NULL), str_offsets_buffer_length_(0),
64       addr_buffer_(NULL), addr_buffer_length_(0),
65       is_split_dwarf_(false), dwo_id_(0), dwo_name_(),
66       skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0),
67       have_checked_for_dwp_(false), dwp_path_(),
68       dwp_byte_reader_(), dwp_reader_() {}
69 
70 // Initialize a compilation unit from a .dwo or .dwp file.
71 // In this case, we need the .debug_addr section from the
72 // executable file that contains the corresponding skeleton
73 // compilation unit.  We also inherit the Dwarf2Handler from
74 // the executable file, and call it as if we were still
75 // processing the original compilation unit.
76 
SetSplitDwarf(const uint8_t * addr_buffer,uint64_t addr_buffer_length,uint64_t addr_base,uint64_t ranges_base,uint64_t dwo_id)77 void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer,
78                                     uint64_t addr_buffer_length,
79                                     uint64_t addr_base,
80                                     uint64_t ranges_base,
81                                     uint64_t dwo_id) {
82   is_split_dwarf_ = true;
83   addr_buffer_ = addr_buffer;
84   addr_buffer_length_ = addr_buffer_length;
85   addr_base_ = addr_base;
86   ranges_base_ = ranges_base;
87   skeleton_dwo_id_ = dwo_id;
88 }
89 
90 // Read a DWARF2/3 abbreviation section.
91 // Each abbrev consists of a abbreviation number, a tag, a byte
92 // specifying whether the tag has children, and a list of
93 // attribute/form pairs.
94 // The list of forms is terminated by a 0 for the attribute, and a
95 // zero for the form.  The entire abbreviation section is terminated
96 // by a zero for the code.
97 
ReadAbbrevs()98 void CompilationUnit::ReadAbbrevs() {
99   if (abbrevs_)
100     return;
101 
102   // First get the debug_abbrev section.  ".debug_abbrev" is the name
103   // recommended in the DWARF spec, and used on Linux;
104   // "__debug_abbrev" is the name used in Mac OS X Mach-O files.
105   SectionMap::const_iterator iter = sections_.find(".debug_abbrev");
106   if (iter == sections_.end())
107     iter = sections_.find("__debug_abbrev");
108   assert(iter != sections_.end());
109 
110   abbrevs_ = new std::vector<Abbrev>;
111   abbrevs_->resize(1);
112 
113   // The only way to check whether we are reading over the end of the
114   // buffer would be to first compute the size of the leb128 data by
115   // reading it, then go back and read it again.
116   const uint8_t *abbrev_start = iter->second.first +
117                                       header_.abbrev_offset;
118   const uint8_t *abbrevptr = abbrev_start;
119 #ifndef NDEBUG
120   const uint64_t abbrev_length = iter->second.second - header_.abbrev_offset;
121 #endif
122 
123   while (1) {
124     CompilationUnit::Abbrev abbrev;
125     size_t len;
126     const uint64_t number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
127 
128     if (number == 0)
129       break;
130     abbrev.number = number;
131     abbrevptr += len;
132 
133     assert(abbrevptr < abbrev_start + abbrev_length);
134     const uint64_t tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
135     abbrevptr += len;
136     abbrev.tag = static_cast<enum DwarfTag>(tag);
137 
138     assert(abbrevptr < abbrev_start + abbrev_length);
139     abbrev.has_children = reader_->ReadOneByte(abbrevptr);
140     abbrevptr += 1;
141 
142     assert(abbrevptr < abbrev_start + abbrev_length);
143 
144     while (1) {
145       const uint64_t nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
146       abbrevptr += len;
147 
148       assert(abbrevptr < abbrev_start + abbrev_length);
149       const uint64_t formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
150       abbrevptr += len;
151       if (nametemp == 0 && formtemp == 0)
152         break;
153 
154       const enum DwarfAttribute name =
155         static_cast<enum DwarfAttribute>(nametemp);
156       const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp);
157       abbrev.attributes.push_back(std::make_pair(name, form));
158     }
159     assert(abbrev.number == abbrevs_->size());
160     abbrevs_->push_back(abbrev);
161   }
162 }
163 
164 // Skips a single DIE's attributes.
SkipDIE(const uint8_t * start,const Abbrev & abbrev)165 const uint8_t *CompilationUnit::SkipDIE(const uint8_t* start,
166                                         const Abbrev& abbrev) {
167   for (AttributeList::const_iterator i = abbrev.attributes.begin();
168        i != abbrev.attributes.end();
169        i++)  {
170     start = SkipAttribute(start, i->second);
171   }
172   return start;
173 }
174 
175 // Skips a single attribute form's data.
SkipAttribute(const uint8_t * start,enum DwarfForm form)176 const uint8_t *CompilationUnit::SkipAttribute(const uint8_t *start,
177                                               enum DwarfForm form) {
178   size_t len;
179 
180   switch (form) {
181     case DW_FORM_indirect:
182       form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
183                                                                      &len));
184       start += len;
185       return SkipAttribute(start, form);
186 
187     case DW_FORM_flag_present:
188       return start;
189     case DW_FORM_data1:
190     case DW_FORM_flag:
191     case DW_FORM_ref1:
192       return start + 1;
193     case DW_FORM_ref2:
194     case DW_FORM_data2:
195       return start + 2;
196     case DW_FORM_ref4:
197     case DW_FORM_data4:
198       return start + 4;
199     case DW_FORM_ref8:
200     case DW_FORM_data8:
201     case DW_FORM_ref_sig8:
202       return start + 8;
203     case DW_FORM_string:
204       return start + strlen(reinterpret_cast<const char *>(start)) + 1;
205     case DW_FORM_udata:
206     case DW_FORM_ref_udata:
207     case DW_FORM_GNU_str_index:
208     case DW_FORM_GNU_addr_index:
209       reader_->ReadUnsignedLEB128(start, &len);
210       return start + len;
211 
212     case DW_FORM_sdata:
213       reader_->ReadSignedLEB128(start, &len);
214       return start + len;
215     case DW_FORM_addr:
216       return start + reader_->AddressSize();
217     case DW_FORM_ref_addr:
218       // DWARF2 and 3/4 differ on whether ref_addr is address size or
219       // offset size.
220       assert(header_.version >= 2);
221       if (header_.version == 2) {
222         return start + reader_->AddressSize();
223       } else if (header_.version >= 3) {
224         return start + reader_->OffsetSize();
225       }
226       break;
227 
228     case DW_FORM_block1:
229       return start + 1 + reader_->ReadOneByte(start);
230     case DW_FORM_block2:
231       return start + 2 + reader_->ReadTwoBytes(start);
232     case DW_FORM_block4:
233       return start + 4 + reader_->ReadFourBytes(start);
234     case DW_FORM_block:
235     case DW_FORM_exprloc: {
236       uint64_t size = reader_->ReadUnsignedLEB128(start, &len);
237       return start + size + len;
238     }
239     case DW_FORM_strp:
240     case DW_FORM_sec_offset:
241       return start + reader_->OffsetSize();
242   }
243   fprintf(stderr,"Unhandled form type");
244   return NULL;
245 }
246 
247 // Read a DWARF2/3 header.
248 // The header is variable length in DWARF3 (and DWARF2 as extended by
249 // most compilers), and consists of an length field, a version number,
250 // the offset in the .debug_abbrev section for our abbrevs, and an
251 // address size.
ReadHeader()252 void CompilationUnit::ReadHeader() {
253   const uint8_t *headerptr = buffer_;
254   size_t initial_length_size;
255 
256   assert(headerptr + 4 < buffer_ + buffer_length_);
257   const uint64_t initial_length
258     = reader_->ReadInitialLength(headerptr, &initial_length_size);
259   headerptr += initial_length_size;
260   header_.length = initial_length;
261 
262   assert(headerptr + 2 < buffer_ + buffer_length_);
263   header_.version = reader_->ReadTwoBytes(headerptr);
264   headerptr += 2;
265 
266   assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
267   header_.abbrev_offset = reader_->ReadOffset(headerptr);
268   headerptr += reader_->OffsetSize();
269 
270   // Compare against less than or equal because this may be the last
271   // section in the file.
272   assert(headerptr + 1 <= buffer_ + buffer_length_);
273   header_.address_size = reader_->ReadOneByte(headerptr);
274   reader_->SetAddressSize(header_.address_size);
275   headerptr += 1;
276 
277   after_header_ = headerptr;
278 
279   // This check ensures that we don't have to do checking during the
280   // reading of DIEs. header_.length does not include the size of the
281   // initial length.
282   assert(buffer_ + initial_length_size + header_.length <=
283         buffer_ + buffer_length_);
284 }
285 
Start()286 uint64_t CompilationUnit::Start() {
287   // First get the debug_info section.  ".debug_info" is the name
288   // recommended in the DWARF spec, and used on Linux; "__debug_info"
289   // is the name used in Mac OS X Mach-O files.
290   SectionMap::const_iterator iter = sections_.find(".debug_info");
291   if (iter == sections_.end())
292     iter = sections_.find("__debug_info");
293   assert(iter != sections_.end());
294 
295   // Set up our buffer
296   buffer_ = iter->second.first + offset_from_section_start_;
297   buffer_length_ = iter->second.second - offset_from_section_start_;
298 
299   // Read the header
300   ReadHeader();
301 
302   // Figure out the real length from the end of the initial length to
303   // the end of the compilation unit, since that is the value we
304   // return.
305   uint64_t ourlength = header_.length;
306   if (reader_->OffsetSize() == 8)
307     ourlength += 12;
308   else
309     ourlength += 4;
310 
311   // See if the user wants this compilation unit, and if not, just return.
312   if (!handler_->StartCompilationUnit(offset_from_section_start_,
313                                       reader_->AddressSize(),
314                                       reader_->OffsetSize(),
315                                       header_.length,
316                                       header_.version))
317     return ourlength;
318 
319   // Otherwise, continue by reading our abbreviation entries.
320   ReadAbbrevs();
321 
322   // Set the string section if we have one.  ".debug_str" is the name
323   // recommended in the DWARF spec, and used on Linux; "__debug_str"
324   // is the name used in Mac OS X Mach-O files.
325   iter = sections_.find(".debug_str");
326   if (iter == sections_.end())
327     iter = sections_.find("__debug_str");
328   if (iter != sections_.end()) {
329     string_buffer_ = iter->second.first;
330     string_buffer_length_ = iter->second.second;
331   }
332 
333   // Set the string offsets section if we have one.
334   iter = sections_.find(".debug_str_offsets");
335   if (iter != sections_.end()) {
336     str_offsets_buffer_ = iter->second.first;
337     str_offsets_buffer_length_ = iter->second.second;
338   }
339 
340   // Set the address section if we have one.
341   iter = sections_.find(".debug_addr");
342   if (iter != sections_.end()) {
343     addr_buffer_ = iter->second.first;
344     addr_buffer_length_ = iter->second.second;
345   }
346 
347   // Now that we have our abbreviations, start processing DIE's.
348   ProcessDIEs();
349 
350   // If this is a skeleton compilation unit generated with split DWARF,
351   // and the client needs the full debug info, we need to find the full
352   // compilation unit in a .dwo or .dwp file.
353   if (!is_split_dwarf_
354       && dwo_name_ != NULL
355       && handler_->NeedSplitDebugInfo())
356     ProcessSplitDwarf();
357 
358   return ourlength;
359 }
360 
361 // If one really wanted, you could merge SkipAttribute and
362 // ProcessAttribute
363 // This is all boring data manipulation and calling of the handler.
ProcessAttribute(uint64_t dieoffset,const uint8_t * start,enum DwarfAttribute attr,enum DwarfForm form)364 const uint8_t *CompilationUnit::ProcessAttribute(
365     uint64_t dieoffset, const uint8_t *start, enum DwarfAttribute attr,
366     enum DwarfForm form) {
367   size_t len;
368 
369   switch (form) {
370     // DW_FORM_indirect is never used because it is such a space
371     // waster.
372     case DW_FORM_indirect:
373       form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
374                                                                      &len));
375       start += len;
376       return ProcessAttribute(dieoffset, start, attr, form);
377 
378     case DW_FORM_flag_present:
379       ProcessAttributeUnsigned(dieoffset, attr, form, 1);
380       return start;
381     case DW_FORM_data1:
382     case DW_FORM_flag:
383       ProcessAttributeUnsigned(dieoffset, attr, form,
384                                reader_->ReadOneByte(start));
385       return start + 1;
386     case DW_FORM_data2:
387       ProcessAttributeUnsigned(dieoffset, attr, form,
388                                reader_->ReadTwoBytes(start));
389       return start + 2;
390     case DW_FORM_data4:
391       ProcessAttributeUnsigned(dieoffset, attr, form,
392                                reader_->ReadFourBytes(start));
393       return start + 4;
394     case DW_FORM_data8:
395       ProcessAttributeUnsigned(dieoffset, attr, form,
396                                reader_->ReadEightBytes(start));
397       return start + 8;
398     case DW_FORM_string: {
399       const char *str = reinterpret_cast<const char *>(start);
400       ProcessAttributeString(dieoffset, attr, form, str);
401       return start + strlen(str) + 1;
402     }
403     case DW_FORM_udata:
404       ProcessAttributeUnsigned(dieoffset, attr, form,
405                                reader_->ReadUnsignedLEB128(start, &len));
406       return start + len;
407 
408     case DW_FORM_sdata:
409       ProcessAttributeSigned(dieoffset, attr, form,
410                              reader_->ReadSignedLEB128(start, &len));
411       return start + len;
412     case DW_FORM_addr:
413       ProcessAttributeUnsigned(dieoffset, attr, form,
414                                reader_->ReadAddress(start));
415       return start + reader_->AddressSize();
416     case DW_FORM_sec_offset:
417       ProcessAttributeUnsigned(dieoffset, attr, form,
418                                reader_->ReadOffset(start));
419       return start + reader_->OffsetSize();
420 
421     case DW_FORM_ref1:
422       handler_->ProcessAttributeReference(dieoffset, attr, form,
423                                           reader_->ReadOneByte(start)
424                                           + offset_from_section_start_);
425       return start + 1;
426     case DW_FORM_ref2:
427       handler_->ProcessAttributeReference(dieoffset, attr, form,
428                                           reader_->ReadTwoBytes(start)
429                                           + offset_from_section_start_);
430       return start + 2;
431     case DW_FORM_ref4:
432       handler_->ProcessAttributeReference(dieoffset, attr, form,
433                                           reader_->ReadFourBytes(start)
434                                           + offset_from_section_start_);
435       return start + 4;
436     case DW_FORM_ref8:
437       handler_->ProcessAttributeReference(dieoffset, attr, form,
438                                           reader_->ReadEightBytes(start)
439                                           + offset_from_section_start_);
440       return start + 8;
441     case DW_FORM_ref_udata:
442       handler_->ProcessAttributeReference(dieoffset, attr, form,
443                                           reader_->ReadUnsignedLEB128(start,
444                                                                       &len)
445                                           + offset_from_section_start_);
446       return start + len;
447     case DW_FORM_ref_addr:
448       // DWARF2 and 3/4 differ on whether ref_addr is address size or
449       // offset size.
450       assert(header_.version >= 2);
451       if (header_.version == 2) {
452         handler_->ProcessAttributeReference(dieoffset, attr, form,
453                                             reader_->ReadAddress(start));
454         return start + reader_->AddressSize();
455       } else if (header_.version >= 3) {
456         handler_->ProcessAttributeReference(dieoffset, attr, form,
457                                             reader_->ReadOffset(start));
458         return start + reader_->OffsetSize();
459       }
460       break;
461     case DW_FORM_ref_sig8:
462       handler_->ProcessAttributeSignature(dieoffset, attr, form,
463                                           reader_->ReadEightBytes(start));
464       return start + 8;
465 
466     case DW_FORM_block1: {
467       uint64_t datalen = reader_->ReadOneByte(start);
468       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1,
469                                        datalen);
470       return start + 1 + datalen;
471     }
472     case DW_FORM_block2: {
473       uint64_t datalen = reader_->ReadTwoBytes(start);
474       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2,
475                                        datalen);
476       return start + 2 + datalen;
477     }
478     case DW_FORM_block4: {
479       uint64_t datalen = reader_->ReadFourBytes(start);
480       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4,
481                                        datalen);
482       return start + 4 + datalen;
483     }
484     case DW_FORM_block:
485     case DW_FORM_exprloc: {
486       uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
487       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
488                                        datalen);
489       return start + datalen + len;
490     }
491     case DW_FORM_strp: {
492       assert(string_buffer_ != NULL);
493 
494       const uint64_t offset = reader_->ReadOffset(start);
495       assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
496 
497       const char *str = reinterpret_cast<const char *>(string_buffer_ + offset);
498       ProcessAttributeString(dieoffset, attr, form, str);
499       return start + reader_->OffsetSize();
500     }
501 
502     case DW_FORM_GNU_str_index: {
503       uint64_t str_index = reader_->ReadUnsignedLEB128(start, &len);
504       const uint8_t* offset_ptr =
505           str_offsets_buffer_ + str_index * reader_->OffsetSize();
506       const uint64_t offset = reader_->ReadOffset(offset_ptr);
507       if (offset >= string_buffer_length_) {
508         return NULL;
509       }
510 
511       const char* str = reinterpret_cast<const char *>(string_buffer_) + offset;
512       ProcessAttributeString(dieoffset, attr, form, str);
513       return start + len;
514       break;
515     }
516     case DW_FORM_GNU_addr_index: {
517       uint64_t addr_index = reader_->ReadUnsignedLEB128(start, &len);
518       const uint8_t* addr_ptr =
519           addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize();
520       ProcessAttributeUnsigned(dieoffset, attr, form,
521                                reader_->ReadAddress(addr_ptr));
522       return start + len;
523     }
524   }
525   fprintf(stderr, "Unhandled form type\n");
526   return NULL;
527 }
528 
ProcessDIE(uint64_t dieoffset,const uint8_t * start,const Abbrev & abbrev)529 const uint8_t *CompilationUnit::ProcessDIE(uint64_t dieoffset,
530                                            const uint8_t *start,
531                                            const Abbrev& abbrev) {
532   for (AttributeList::const_iterator i = abbrev.attributes.begin();
533        i != abbrev.attributes.end();
534        i++)  {
535     start = ProcessAttribute(dieoffset, start, i->first, i->second);
536   }
537 
538   // If this is a compilation unit in a split DWARF object, verify that
539   // the dwo_id matches. If it does not match, we will ignore this
540   // compilation unit.
541   if (abbrev.tag == DW_TAG_compile_unit
542       && is_split_dwarf_
543       && dwo_id_ != skeleton_dwo_id_) {
544     return NULL;
545   }
546 
547   return start;
548 }
549 
ProcessDIEs()550 void CompilationUnit::ProcessDIEs() {
551   const uint8_t *dieptr = after_header_;
552   size_t len;
553 
554   // lengthstart is the place the length field is based on.
555   // It is the point in the header after the initial length field
556   const uint8_t *lengthstart = buffer_;
557 
558   // In 64 bit dwarf, the initial length is 12 bytes, because of the
559   // 0xffffffff at the start.
560   if (reader_->OffsetSize() == 8)
561     lengthstart += 12;
562   else
563     lengthstart += 4;
564 
565   std::stack<uint64_t> die_stack;
566 
567   while (dieptr < (lengthstart + header_.length)) {
568     // We give the user the absolute offset from the beginning of
569     // debug_info, since they need it to deal with ref_addr forms.
570     uint64_t absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
571 
572     uint64_t abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
573 
574     dieptr += len;
575 
576     // Abbrev == 0 represents the end of a list of children, or padding
577     // at the end of the compilation unit.
578     if (abbrev_num == 0) {
579       if (die_stack.size() == 0)
580         // If it is padding, then we are done with the compilation unit's DIEs.
581         return;
582       const uint64_t offset = die_stack.top();
583       die_stack.pop();
584       handler_->EndDIE(offset);
585       continue;
586     }
587 
588     const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
589     const enum DwarfTag tag = abbrev.tag;
590     if (!handler_->StartDIE(absolute_offset, tag)) {
591       dieptr = SkipDIE(dieptr, abbrev);
592     } else {
593       dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
594     }
595 
596     if (abbrev.has_children) {
597       die_stack.push(absolute_offset);
598     } else {
599       handler_->EndDIE(absolute_offset);
600     }
601   }
602 }
603 
604 // Check for a valid ELF file and return the Address size.
605 // Returns 0 if not a valid ELF file.
GetElfWidth(const ElfReader & elf)606 inline int GetElfWidth(const ElfReader& elf) {
607   if (elf.IsElf32File())
608     return 4;
609   if (elf.IsElf64File())
610     return 8;
611   return 0;
612 }
613 
ProcessSplitDwarf()614 void CompilationUnit::ProcessSplitDwarf() {
615   struct stat statbuf;
616   if (!have_checked_for_dwp_) {
617     // Look for a .dwp file in the same directory as the executable.
618     have_checked_for_dwp_ = true;
619     string dwp_suffix(".dwp");
620     dwp_path_ = path_ + dwp_suffix;
621     if (stat(dwp_path_.c_str(), &statbuf) != 0) {
622       // Fall back to a split .debug file in the same directory.
623       string debug_suffix(".debug");
624       dwp_path_ = path_;
625       size_t found = path_.rfind(debug_suffix);
626       if (found + debug_suffix.length() == path_.length())
627         dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix);
628     }
629     if (stat(dwp_path_.c_str(), &statbuf) == 0) {
630       ElfReader* elf = new ElfReader(dwp_path_);
631       int width = GetElfWidth(*elf);
632       if (width != 0) {
633         dwp_byte_reader_.reset(new ByteReader(reader_->GetEndianness()));
634         dwp_byte_reader_->SetAddressSize(width);
635         dwp_reader_.reset(new DwpReader(*dwp_byte_reader_, elf));
636         dwp_reader_->Initialize();
637       } else {
638         delete elf;
639       }
640     }
641   }
642   bool found_in_dwp = false;
643   if (dwp_reader_) {
644     // If we have a .dwp file, read the debug sections for the requested CU.
645     SectionMap sections;
646     dwp_reader_->ReadDebugSectionsForCU(dwo_id_, &sections);
647     if (!sections.empty()) {
648       found_in_dwp = true;
649       CompilationUnit dwp_comp_unit(dwp_path_, sections, 0,
650                                     dwp_byte_reader_.get(), handler_);
651       dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_,
652                                   ranges_base_, dwo_id_);
653       dwp_comp_unit.Start();
654     }
655   }
656   if (!found_in_dwp) {
657     // If no .dwp file, try to open the .dwo file.
658     if (stat(dwo_name_, &statbuf) == 0) {
659       ElfReader elf(dwo_name_);
660       int width = GetElfWidth(elf);
661       if (width != 0) {
662         ByteReader reader(ENDIANNESS_LITTLE);
663         reader.SetAddressSize(width);
664         SectionMap sections;
665         ReadDebugSectionsFromDwo(&elf, &sections);
666         CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader,
667                                       handler_);
668         dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_,
669                                     addr_base_, ranges_base_, dwo_id_);
670         dwo_comp_unit.Start();
671       }
672     }
673   }
674 }
675 
ReadDebugSectionsFromDwo(ElfReader * elf_reader,SectionMap * sections)676 void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader,
677                                                SectionMap* sections) {
678   static const char* const section_names[] = {
679     ".debug_abbrev",
680     ".debug_info",
681     ".debug_str_offsets",
682     ".debug_str"
683   };
684   for (unsigned int i = 0u;
685        i < sizeof(section_names)/sizeof(*(section_names)); ++i) {
686     string base_name = section_names[i];
687     string dwo_name = base_name + ".dwo";
688     size_t section_size;
689     const char* section_data = elf_reader->GetSectionByName(dwo_name,
690                                                             &section_size);
691     if (section_data != NULL)
692       sections->insert(std::make_pair(
693           base_name, std::make_pair(
694              reinterpret_cast<const uint8_t *>(section_data),
695              section_size)));
696   }
697 }
698 
DwpReader(const ByteReader & byte_reader,ElfReader * elf_reader)699 DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader)
700     : elf_reader_(elf_reader), byte_reader_(byte_reader),
701       cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL),
702       string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0),
703       nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL),
704       offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL),
705       abbrev_size_(0), info_data_(NULL), info_size_(0),
706       str_offsets_data_(NULL), str_offsets_size_(0) {}
707 
~DwpReader()708 DwpReader::~DwpReader() {
709   if (elf_reader_) delete elf_reader_;
710 }
711 
Initialize()712 void DwpReader::Initialize() {
713   cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index",
714                                             &cu_index_size_);
715   if (cu_index_ == NULL) {
716     return;
717   }
718   // The .debug_str.dwo section is shared by all CUs in the file.
719   string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo",
720                                                  &string_buffer_size_);
721 
722   version_ = byte_reader_.ReadFourBytes(
723       reinterpret_cast<const uint8_t *>(cu_index_));
724 
725   if (version_ == 1) {
726     nslots_ = byte_reader_.ReadFourBytes(
727         reinterpret_cast<const uint8_t *>(cu_index_)
728         + 3 * sizeof(uint32_t));
729     phash_ = cu_index_ + 4 * sizeof(uint32_t);
730     pindex_ = phash_ + nslots_ * sizeof(uint64_t);
731     shndx_pool_ = pindex_ + nslots_ * sizeof(uint32_t);
732     if (shndx_pool_ >= cu_index_ + cu_index_size_) {
733       version_ = 0;
734     }
735   } else if (version_ == 2) {
736     ncolumns_ = byte_reader_.ReadFourBytes(
737         reinterpret_cast<const uint8_t *>(cu_index_) + sizeof(uint32_t));
738     nunits_ = byte_reader_.ReadFourBytes(
739         reinterpret_cast<const uint8_t *>(cu_index_) + 2 * sizeof(uint32_t));
740     nslots_ = byte_reader_.ReadFourBytes(
741         reinterpret_cast<const uint8_t *>(cu_index_) + 3 * sizeof(uint32_t));
742     phash_ = cu_index_ + 4 * sizeof(uint32_t);
743     pindex_ = phash_ + nslots_ * sizeof(uint64_t);
744     offset_table_ = pindex_ + nslots_ * sizeof(uint32_t);
745     size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32_t);
746     abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo",
747                                                  &abbrev_size_);
748     info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_);
749     str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo",
750                                                       &str_offsets_size_);
751     if (size_table_ >= cu_index_ + cu_index_size_) {
752       version_ = 0;
753     }
754   }
755 }
756 
ReadDebugSectionsForCU(uint64_t dwo_id,SectionMap * sections)757 void DwpReader::ReadDebugSectionsForCU(uint64_t dwo_id,
758                                        SectionMap* sections) {
759   if (version_ == 1) {
760     int slot = LookupCU(dwo_id);
761     if (slot == -1) {
762       return;
763     }
764 
765     // The index table points to the section index pool, where we
766     // can read a list of section indexes for the debug sections
767     // for the CU whose dwo_id we are looking for.
768     int index = byte_reader_.ReadFourBytes(
769         reinterpret_cast<const uint8_t *>(pindex_)
770         + slot * sizeof(uint32_t));
771     const char* shndx_list = shndx_pool_ + index * sizeof(uint32_t);
772     for (;;) {
773       if (shndx_list >= cu_index_ + cu_index_size_) {
774         version_ = 0;
775         return;
776       }
777       unsigned int shndx = byte_reader_.ReadFourBytes(
778           reinterpret_cast<const uint8_t *>(shndx_list));
779       shndx_list += sizeof(uint32_t);
780       if (shndx == 0)
781         break;
782       const char* section_name = elf_reader_->GetSectionName(shndx);
783       size_t section_size;
784       const char* section_data;
785       // We're only interested in these four debug sections.
786       // The section names in the .dwo file end with ".dwo", but we
787       // add them to the sections table with their normal names.
788       if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) {
789         section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
790         sections->insert(std::make_pair(
791             ".debug_abbrev",
792             std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
793                                                               section_size)));
794       } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) {
795         section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
796         sections->insert(std::make_pair(
797             ".debug_info",
798             std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
799                            section_size)));
800       } else if (!strncmp(section_name, ".debug_str_offsets",
801                           strlen(".debug_str_offsets"))) {
802         section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
803         sections->insert(std::make_pair(
804             ".debug_str_offsets",
805             std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
806                            section_size)));
807       }
808     }
809     sections->insert(std::make_pair(
810         ".debug_str",
811         std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_),
812                        string_buffer_size_)));
813   } else if (version_ == 2) {
814     uint32_t index = LookupCUv2(dwo_id);
815     if (index == 0) {
816       return;
817     }
818 
819     // The index points to a row in each of the section offsets table
820     // and the section size table, where we can read the offsets and sizes
821     // of the contributions to each debug section from the CU whose dwo_id
822     // we are looking for. Row 0 of the section offsets table has the
823     // section ids for each column of the table. The size table begins
824     // with row 1.
825     const char* id_row = offset_table_;
826     const char* offset_row = offset_table_
827                              + index * ncolumns_ * sizeof(uint32_t);
828     const char* size_row =
829         size_table_ + (index - 1) * ncolumns_ * sizeof(uint32_t);
830     if (size_row + ncolumns_ * sizeof(uint32_t) > cu_index_ + cu_index_size_) {
831       version_ = 0;
832       return;
833     }
834     for (unsigned int col = 0u; col < ncolumns_; ++col) {
835       uint32_t section_id =
836           byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t *>(id_row)
837                                      + col * sizeof(uint32_t));
838       uint32_t offset = byte_reader_.ReadFourBytes(
839           reinterpret_cast<const uint8_t *>(offset_row)
840           + col * sizeof(uint32_t));
841       uint32_t size = byte_reader_.ReadFourBytes(
842           reinterpret_cast<const uint8_t *>(size_row) + col * sizeof(uint32_t));
843       if (section_id == DW_SECT_ABBREV) {
844         sections->insert(std::make_pair(
845             ".debug_abbrev",
846             std::make_pair(reinterpret_cast<const uint8_t *> (abbrev_data_)
847                            + offset, size)));
848       } else if (section_id == DW_SECT_INFO) {
849         sections->insert(std::make_pair(
850             ".debug_info",
851             std::make_pair(reinterpret_cast<const uint8_t *> (info_data_)
852                            + offset, size)));
853       } else if (section_id == DW_SECT_STR_OFFSETS) {
854         sections->insert(std::make_pair(
855             ".debug_str_offsets",
856             std::make_pair(reinterpret_cast<const uint8_t *> (str_offsets_data_)
857                            + offset, size)));
858       }
859     }
860     sections->insert(std::make_pair(
861         ".debug_str",
862         std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_),
863                        string_buffer_size_)));
864   }
865 }
866 
LookupCU(uint64_t dwo_id)867 int DwpReader::LookupCU(uint64_t dwo_id) {
868   uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1);
869   uint64_t probe = byte_reader_.ReadEightBytes(
870       reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64_t));
871   if (probe != 0 && probe != dwo_id) {
872     uint32_t secondary_hash =
873         (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1;
874     do {
875       slot = (slot + secondary_hash) & (nslots_ - 1);
876       probe = byte_reader_.ReadEightBytes(
877           reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64_t));
878     } while (probe != 0 && probe != dwo_id);
879   }
880   if (probe == 0)
881     return -1;
882   return slot;
883 }
884 
LookupCUv2(uint64_t dwo_id)885 uint32_t DwpReader::LookupCUv2(uint64_t dwo_id) {
886   uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1);
887   uint64_t probe = byte_reader_.ReadEightBytes(
888       reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64_t));
889   uint32_t index = byte_reader_.ReadFourBytes(
890       reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32_t));
891   if (index != 0 && probe != dwo_id) {
892     uint32_t secondary_hash =
893         (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1;
894     do {
895       slot = (slot + secondary_hash) & (nslots_ - 1);
896       probe = byte_reader_.ReadEightBytes(
897           reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64_t));
898       index = byte_reader_.ReadFourBytes(
899           reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32_t));
900     } while (index != 0 && probe != dwo_id);
901   }
902   return index;
903 }
904 
LineInfo(const uint8_t * buffer,uint64_t buffer_length,ByteReader * reader,LineInfoHandler * handler)905 LineInfo::LineInfo(const uint8_t *buffer, uint64_t buffer_length,
906                    ByteReader* reader, LineInfoHandler* handler):
907     handler_(handler), reader_(reader), buffer_(buffer) {
908 #ifndef NDEBUG
909   buffer_length_ = buffer_length;
910 #endif
911   header_.std_opcode_lengths = NULL;
912 }
913 
Start()914 uint64_t LineInfo::Start() {
915   ReadHeader();
916   ReadLines();
917   return after_header_ - buffer_;
918 }
919 
920 // The header for a debug_line section is mildly complicated, because
921 // the line info is very tightly encoded.
ReadHeader()922 void LineInfo::ReadHeader() {
923   const uint8_t *lineptr = buffer_;
924   size_t initial_length_size;
925 
926   const uint64_t initial_length
927     = reader_->ReadInitialLength(lineptr, &initial_length_size);
928 
929   lineptr += initial_length_size;
930   header_.total_length = initial_length;
931   assert(buffer_ + initial_length_size + header_.total_length <=
932         buffer_ + buffer_length_);
933 
934   // Address size *must* be set by CU ahead of time.
935   assert(reader_->AddressSize() != 0);
936 
937   header_.version = reader_->ReadTwoBytes(lineptr);
938   lineptr += 2;
939 
940   header_.prologue_length = reader_->ReadOffset(lineptr);
941   lineptr += reader_->OffsetSize();
942 
943   header_.min_insn_length = reader_->ReadOneByte(lineptr);
944   lineptr += 1;
945 
946   if (header_.version >= 4) {
947     __attribute__((unused)) uint8_t max_ops_per_insn =
948         reader_->ReadOneByte(lineptr);
949     ++lineptr;
950     assert(max_ops_per_insn == 1);
951   }
952 
953   header_.default_is_stmt = reader_->ReadOneByte(lineptr);
954   lineptr += 1;
955 
956   header_.line_base = *reinterpret_cast<const int8_t*>(lineptr);
957   lineptr += 1;
958 
959   header_.line_range = reader_->ReadOneByte(lineptr);
960   lineptr += 1;
961 
962   header_.opcode_base = reader_->ReadOneByte(lineptr);
963   lineptr += 1;
964 
965   header_.std_opcode_lengths = new std::vector<unsigned char>;
966   header_.std_opcode_lengths->resize(header_.opcode_base + 1);
967   (*header_.std_opcode_lengths)[0] = 0;
968   for (int i = 1; i < header_.opcode_base; i++) {
969     (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
970     lineptr += 1;
971   }
972 
973   // It is legal for the directory entry table to be empty.
974   if (*lineptr) {
975     uint32_t dirindex = 1;
976     while (*lineptr) {
977       const char *dirname = reinterpret_cast<const char *>(lineptr);
978       handler_->DefineDir(dirname, dirindex);
979       lineptr += strlen(dirname) + 1;
980       dirindex++;
981     }
982   }
983   lineptr++;
984 
985   // It is also legal for the file entry table to be empty.
986   if (*lineptr) {
987     uint32_t fileindex = 1;
988     size_t len;
989     while (*lineptr) {
990       const char *filename = reinterpret_cast<const char *>(lineptr);
991       lineptr += strlen(filename) + 1;
992 
993       uint64_t dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
994       lineptr += len;
995 
996       uint64_t mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
997       lineptr += len;
998 
999       uint64_t filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
1000       lineptr += len;
1001       handler_->DefineFile(filename, fileindex, static_cast<uint32_t>(dirindex),
1002                            mod_time, filelength);
1003       fileindex++;
1004     }
1005   }
1006   lineptr++;
1007 
1008   after_header_ = lineptr;
1009 }
1010 
1011 /* static */
ProcessOneOpcode(ByteReader * reader,LineInfoHandler * handler,const struct LineInfoHeader & header,const uint8_t * start,struct LineStateMachine * lsm,size_t * len,uintptr pc,bool * lsm_passes_pc)1012 bool LineInfo::ProcessOneOpcode(ByteReader* reader,
1013                                 LineInfoHandler* handler,
1014                                 const struct LineInfoHeader &header,
1015                                 const uint8_t *start,
1016                                 struct LineStateMachine* lsm,
1017                                 size_t* len,
1018                                 uintptr pc,
1019                                 bool *lsm_passes_pc) {
1020   size_t oplen = 0;
1021   size_t templen;
1022   uint8_t opcode = reader->ReadOneByte(start);
1023   oplen++;
1024   start++;
1025 
1026   // If the opcode is great than the opcode_base, it is a special
1027   // opcode. Most line programs consist mainly of special opcodes.
1028   if (opcode >= header.opcode_base) {
1029     opcode -= header.opcode_base;
1030     const int64_t advance_address = (opcode / header.line_range)
1031                                   * header.min_insn_length;
1032     const int32_t advance_line = (opcode % header.line_range)
1033                                + header.line_base;
1034 
1035     // Check if the lsm passes "pc". If so, mark it as passed.
1036     if (lsm_passes_pc &&
1037         lsm->address <= pc && pc < lsm->address + advance_address) {
1038       *lsm_passes_pc = true;
1039     }
1040 
1041     lsm->address += advance_address;
1042     lsm->line_num += advance_line;
1043     lsm->basic_block = true;
1044     *len = oplen;
1045     return true;
1046   }
1047 
1048   // Otherwise, we have the regular opcodes
1049   switch (opcode) {
1050     case DW_LNS_copy: {
1051       lsm->basic_block = false;
1052       *len = oplen;
1053       return true;
1054     }
1055 
1056     case DW_LNS_advance_pc: {
1057       uint64_t advance_address = reader->ReadUnsignedLEB128(start, &templen);
1058       oplen += templen;
1059 
1060       // Check if the lsm passes "pc". If so, mark it as passed.
1061       if (lsm_passes_pc && lsm->address <= pc &&
1062           pc < lsm->address + header.min_insn_length * advance_address) {
1063         *lsm_passes_pc = true;
1064       }
1065 
1066       lsm->address += header.min_insn_length * advance_address;
1067     }
1068       break;
1069     case DW_LNS_advance_line: {
1070       const int64_t advance_line = reader->ReadSignedLEB128(start, &templen);
1071       oplen += templen;
1072       lsm->line_num += static_cast<int32_t>(advance_line);
1073 
1074       // With gcc 4.2.1, we can get the line_no here for the first time
1075       // since DW_LNS_advance_line is called after DW_LNE_set_address is
1076       // called. So we check if the lsm passes "pc" here, not in
1077       // DW_LNE_set_address.
1078       if (lsm_passes_pc && lsm->address == pc) {
1079         *lsm_passes_pc = true;
1080       }
1081     }
1082       break;
1083     case DW_LNS_set_file: {
1084       const uint64_t fileno = reader->ReadUnsignedLEB128(start, &templen);
1085       oplen += templen;
1086       lsm->file_num = static_cast<uint32_t>(fileno);
1087     }
1088       break;
1089     case DW_LNS_set_column: {
1090       const uint64_t colno = reader->ReadUnsignedLEB128(start, &templen);
1091       oplen += templen;
1092       lsm->column_num = static_cast<uint32_t>(colno);
1093     }
1094       break;
1095     case DW_LNS_negate_stmt: {
1096       lsm->is_stmt = !lsm->is_stmt;
1097     }
1098       break;
1099     case DW_LNS_set_basic_block: {
1100       lsm->basic_block = true;
1101     }
1102       break;
1103     case DW_LNS_fixed_advance_pc: {
1104       const uint16_t advance_address = reader->ReadTwoBytes(start);
1105       oplen += 2;
1106 
1107       // Check if the lsm passes "pc". If so, mark it as passed.
1108       if (lsm_passes_pc &&
1109           lsm->address <= pc && pc < lsm->address + advance_address) {
1110         *lsm_passes_pc = true;
1111       }
1112 
1113       lsm->address += advance_address;
1114     }
1115       break;
1116     case DW_LNS_const_add_pc: {
1117       const int64_t advance_address = header.min_insn_length
1118                                     * ((255 - header.opcode_base)
1119                                        / header.line_range);
1120 
1121       // Check if the lsm passes "pc". If so, mark it as passed.
1122       if (lsm_passes_pc &&
1123           lsm->address <= pc && pc < lsm->address + advance_address) {
1124         *lsm_passes_pc = true;
1125       }
1126 
1127       lsm->address += advance_address;
1128     }
1129       break;
1130     case DW_LNS_extended_op: {
1131       const uint64_t extended_op_len = reader->ReadUnsignedLEB128(start,
1132                                                                 &templen);
1133       start += templen;
1134       oplen += templen + extended_op_len;
1135 
1136       const uint64_t extended_op = reader->ReadOneByte(start);
1137       start++;
1138 
1139       switch (extended_op) {
1140         case DW_LNE_end_sequence: {
1141           lsm->end_sequence = true;
1142           *len = oplen;
1143           return true;
1144         }
1145           break;
1146         case DW_LNE_set_address: {
1147           // With gcc 4.2.1, we cannot tell the line_no here since
1148           // DW_LNE_set_address is called before DW_LNS_advance_line is
1149           // called.  So we do not check if the lsm passes "pc" here.  See
1150           // also the comment in DW_LNS_advance_line.
1151           uint64_t address = reader->ReadAddress(start);
1152           lsm->address = address;
1153         }
1154           break;
1155         case DW_LNE_define_file: {
1156           const char *filename = reinterpret_cast<const char *>(start);
1157 
1158           templen = strlen(filename) + 1;
1159           start += templen;
1160 
1161           uint64_t dirindex = reader->ReadUnsignedLEB128(start, &templen);
1162           oplen += templen;
1163 
1164           const uint64_t mod_time = reader->ReadUnsignedLEB128(start,
1165                                                              &templen);
1166           oplen += templen;
1167 
1168           const uint64_t filelength = reader->ReadUnsignedLEB128(start,
1169                                                                &templen);
1170           oplen += templen;
1171 
1172           if (handler) {
1173             handler->DefineFile(filename, -1, static_cast<uint32_t>(dirindex),
1174                                 mod_time, filelength);
1175           }
1176         }
1177           break;
1178       }
1179     }
1180       break;
1181 
1182     default: {
1183       // Ignore unknown opcode  silently
1184       if (header.std_opcode_lengths) {
1185         for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) {
1186           reader->ReadUnsignedLEB128(start, &templen);
1187           start += templen;
1188           oplen += templen;
1189         }
1190       }
1191     }
1192       break;
1193   }
1194   *len = oplen;
1195   return false;
1196 }
1197 
ReadLines()1198 void LineInfo::ReadLines() {
1199   struct LineStateMachine lsm;
1200 
1201   // lengthstart is the place the length field is based on.
1202   // It is the point in the header after the initial length field
1203   const uint8_t *lengthstart = buffer_;
1204 
1205   // In 64 bit dwarf, the initial length is 12 bytes, because of the
1206   // 0xffffffff at the start.
1207   if (reader_->OffsetSize() == 8)
1208     lengthstart += 12;
1209   else
1210     lengthstart += 4;
1211 
1212   const uint8_t *lineptr = after_header_;
1213   lsm.Reset(header_.default_is_stmt);
1214 
1215   // The LineInfoHandler interface expects each line's length along
1216   // with its address, but DWARF only provides addresses (sans
1217   // length), and an end-of-sequence address; one infers the length
1218   // from the next address. So we report a line only when we get the
1219   // next line's address, or the end-of-sequence address.
1220   bool have_pending_line = false;
1221   uint64_t pending_address = 0;
1222   uint32_t pending_file_num = 0, pending_line_num = 0, pending_column_num = 0;
1223 
1224   while (lineptr < lengthstart + header_.total_length) {
1225     size_t oplength;
1226     bool add_row = ProcessOneOpcode(reader_, handler_, header_,
1227                                     lineptr, &lsm, &oplength, (uintptr)-1,
1228                                     NULL);
1229     if (add_row) {
1230       if (have_pending_line)
1231         handler_->AddLine(pending_address, lsm.address - pending_address,
1232                           pending_file_num, pending_line_num,
1233                           pending_column_num);
1234       if (lsm.end_sequence) {
1235         lsm.Reset(header_.default_is_stmt);
1236         have_pending_line = false;
1237       } else {
1238         pending_address = lsm.address;
1239         pending_file_num = lsm.file_num;
1240         pending_line_num = lsm.line_num;
1241         pending_column_num = lsm.column_num;
1242         have_pending_line = true;
1243       }
1244     }
1245     lineptr += oplength;
1246   }
1247 
1248   after_header_ = lengthstart + header_.total_length;
1249 }
1250 
RangeListReader(const uint8_t * buffer,uint64_t size,ByteReader * reader,RangeListHandler * handler)1251 RangeListReader::RangeListReader(const uint8_t *buffer, uint64_t size,
1252                                  ByteReader *reader, RangeListHandler *handler)
1253     : buffer_(buffer), size_(size), reader_(reader), handler_(handler) { }
1254 
ReadRangeList(uint64_t offset)1255 bool RangeListReader::ReadRangeList(uint64_t offset) {
1256   const uint64_t max_address =
1257     (reader_->AddressSize() == 4) ? 0xffffffffUL
1258                                   : 0xffffffffffffffffULL;
1259   const uint64_t entry_size = reader_->AddressSize() * 2;
1260   bool list_end = false;
1261 
1262   do {
1263     if (offset > size_ - entry_size) {
1264       return false; // Invalid range detected
1265     }
1266 
1267     uint64_t start_address = reader_->ReadAddress(buffer_ + offset);
1268     uint64_t end_address =
1269       reader_->ReadAddress(buffer_ + offset + reader_->AddressSize());
1270 
1271     if (start_address == max_address) { // Base address selection
1272       handler_->SetBaseAddress(end_address);
1273     } else if (start_address == 0 && end_address == 0) { // End-of-list
1274       handler_->Finish();
1275       list_end = true;
1276     } else { // Add a range entry
1277       handler_->AddRange(start_address, end_address);
1278     }
1279 
1280     offset += entry_size;
1281   } while (!list_end);
1282 
1283   return true;
1284 }
1285 
1286 // A DWARF rule for recovering the address or value of a register, or
1287 // computing the canonical frame address. There is one subclass of this for
1288 // each '*Rule' member function in CallFrameInfo::Handler.
1289 //
1290 // It's annoying that we have to handle Rules using pointers (because
1291 // the concrete instances can have an arbitrary size). They're small,
1292 // so it would be much nicer if we could just handle them by value
1293 // instead of fretting about ownership and destruction.
1294 //
1295 // It seems like all these could simply be instances of std::tr1::bind,
1296 // except that we need instances to be EqualityComparable, too.
1297 //
1298 // This could logically be nested within State, but then the qualified names
1299 // get horrendous.
1300 class CallFrameInfo::Rule {
1301  public:
~Rule()1302   virtual ~Rule() { }
1303 
1304   // Tell HANDLER that, at ADDRESS in the program, REG can be recovered using
1305   // this rule. If REG is kCFARegister, then this rule describes how to compute
1306   // the canonical frame address. Return what the HANDLER member function
1307   // returned.
1308   virtual bool Handle(Handler *handler,
1309                       uint64_t address, int reg) const = 0;
1310 
1311   // Equality on rules. We use these to decide which rules we need
1312   // to report after a DW_CFA_restore_state instruction.
1313   virtual bool operator==(const Rule &rhs) const = 0;
1314 
operator !=(const Rule & rhs) const1315   bool operator!=(const Rule &rhs) const { return ! (*this == rhs); }
1316 
1317   // Return a pointer to a copy of this rule.
1318   virtual Rule *Copy() const = 0;
1319 
1320   // If this is a base+offset rule, change its base register to REG.
1321   // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
SetBaseRegister(unsigned reg)1322   virtual void SetBaseRegister(unsigned reg) { }
1323 
1324   // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
1325   // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
SetOffset(long long offset)1326   virtual void SetOffset(long long offset) { }
1327 };
1328 
1329 // Rule: the value the register had in the caller cannot be recovered.
1330 class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
1331  public:
UndefinedRule()1332   UndefinedRule() { }
~UndefinedRule()1333   ~UndefinedRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1334   bool Handle(Handler *handler, uint64_t address, int reg) const {
1335     return handler->UndefinedRule(address, reg);
1336   }
operator ==(const Rule & rhs) const1337   bool operator==(const Rule &rhs) const {
1338     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1339     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1340     const UndefinedRule *our_rhs = dynamic_cast<const UndefinedRule *>(&rhs);
1341     return (our_rhs != NULL);
1342   }
Copy() const1343   Rule *Copy() const { return new UndefinedRule(*this); }
1344 };
1345 
1346 // Rule: the register's value is the same as that it had in the caller.
1347 class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
1348  public:
SameValueRule()1349   SameValueRule() { }
~SameValueRule()1350   ~SameValueRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1351   bool Handle(Handler *handler, uint64_t address, int reg) const {
1352     return handler->SameValueRule(address, reg);
1353   }
operator ==(const Rule & rhs) const1354   bool operator==(const Rule &rhs) const {
1355     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1356     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1357     const SameValueRule *our_rhs = dynamic_cast<const SameValueRule *>(&rhs);
1358     return (our_rhs != NULL);
1359   }
Copy() const1360   Rule *Copy() const { return new SameValueRule(*this); }
1361 };
1362 
1363 // Rule: the register is saved at OFFSET from BASE_REGISTER.  BASE_REGISTER
1364 // may be CallFrameInfo::Handler::kCFARegister.
1365 class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
1366  public:
OffsetRule(int base_register,long offset)1367   OffsetRule(int base_register, long offset)
1368       : base_register_(base_register), offset_(offset) { }
~OffsetRule()1369   ~OffsetRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1370   bool Handle(Handler *handler, uint64_t address, int reg) const {
1371     return handler->OffsetRule(address, reg, base_register_, offset_);
1372   }
operator ==(const Rule & rhs) const1373   bool operator==(const Rule &rhs) const {
1374     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1375     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1376     const OffsetRule *our_rhs = dynamic_cast<const OffsetRule *>(&rhs);
1377     return (our_rhs &&
1378             base_register_ == our_rhs->base_register_ &&
1379             offset_ == our_rhs->offset_);
1380   }
Copy() const1381   Rule *Copy() const { return new OffsetRule(*this); }
1382   // We don't actually need SetBaseRegister or SetOffset here, since they
1383   // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
1384   // doesn't make sense to use OffsetRule for computing the CFA: it
1385   // computes the address at which a register is saved, not a value.
1386  private:
1387   int base_register_;
1388   long offset_;
1389 };
1390 
1391 // Rule: the value the register had in the caller is the value of
1392 // BASE_REGISTER plus offset. BASE_REGISTER may be
1393 // CallFrameInfo::Handler::kCFARegister.
1394 class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
1395  public:
ValOffsetRule(int base_register,long offset)1396   ValOffsetRule(int base_register, long offset)
1397       : base_register_(base_register), offset_(offset) { }
~ValOffsetRule()1398   ~ValOffsetRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1399   bool Handle(Handler *handler, uint64_t address, int reg) const {
1400     return handler->ValOffsetRule(address, reg, base_register_, offset_);
1401   }
operator ==(const Rule & rhs) const1402   bool operator==(const Rule &rhs) const {
1403     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1404     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1405     const ValOffsetRule *our_rhs = dynamic_cast<const ValOffsetRule *>(&rhs);
1406     return (our_rhs &&
1407             base_register_ == our_rhs->base_register_ &&
1408             offset_ == our_rhs->offset_);
1409   }
Copy() const1410   Rule *Copy() const { return new ValOffsetRule(*this); }
SetBaseRegister(unsigned reg)1411   void SetBaseRegister(unsigned reg) { base_register_ = reg; }
SetOffset(long long offset)1412   void SetOffset(long long offset) { offset_ = offset; }
1413  private:
1414   int base_register_;
1415   long offset_;
1416 };
1417 
1418 // Rule: the register has been saved in another register REGISTER_NUMBER_.
1419 class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
1420  public:
RegisterRule(int register_number)1421   explicit RegisterRule(int register_number)
1422       : register_number_(register_number) { }
~RegisterRule()1423   ~RegisterRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1424   bool Handle(Handler *handler, uint64_t address, int reg) const {
1425     return handler->RegisterRule(address, reg, register_number_);
1426   }
operator ==(const Rule & rhs) const1427   bool operator==(const Rule &rhs) const {
1428     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1429     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1430     const RegisterRule *our_rhs = dynamic_cast<const RegisterRule *>(&rhs);
1431     return (our_rhs && register_number_ == our_rhs->register_number_);
1432   }
Copy() const1433   Rule *Copy() const { return new RegisterRule(*this); }
1434  private:
1435   int register_number_;
1436 };
1437 
1438 // Rule: EXPRESSION evaluates to the address at which the register is saved.
1439 class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
1440  public:
ExpressionRule(const string & expression)1441   explicit ExpressionRule(const string &expression)
1442       : expression_(expression) { }
~ExpressionRule()1443   ~ExpressionRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1444   bool Handle(Handler *handler, uint64_t address, int reg) const {
1445     return handler->ExpressionRule(address, reg, expression_);
1446   }
operator ==(const Rule & rhs) const1447   bool operator==(const Rule &rhs) const {
1448     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1449     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1450     const ExpressionRule *our_rhs = dynamic_cast<const ExpressionRule *>(&rhs);
1451     return (our_rhs && expression_ == our_rhs->expression_);
1452   }
Copy() const1453   Rule *Copy() const { return new ExpressionRule(*this); }
1454  private:
1455   string expression_;
1456 };
1457 
1458 // Rule: EXPRESSION evaluates to the address at which the register is saved.
1459 class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
1460  public:
ValExpressionRule(const string & expression)1461   explicit ValExpressionRule(const string &expression)
1462       : expression_(expression) { }
~ValExpressionRule()1463   ~ValExpressionRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1464   bool Handle(Handler *handler, uint64_t address, int reg) const {
1465     return handler->ValExpressionRule(address, reg, expression_);
1466   }
operator ==(const Rule & rhs) const1467   bool operator==(const Rule &rhs) const {
1468     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1469     // been carefully considered; cheap RTTI-like workarounds are forbidden.
1470     const ValExpressionRule *our_rhs =
1471         dynamic_cast<const ValExpressionRule *>(&rhs);
1472     return (our_rhs && expression_ == our_rhs->expression_);
1473   }
Copy() const1474   Rule *Copy() const { return new ValExpressionRule(*this); }
1475  private:
1476   string expression_;
1477 };
1478 
1479 // A map from register numbers to rules.
1480 class CallFrameInfo::RuleMap {
1481  public:
RuleMap()1482   RuleMap() : cfa_rule_(NULL) { }
RuleMap(const RuleMap & rhs)1483   RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; }
~RuleMap()1484   ~RuleMap() { Clear(); }
1485 
1486   RuleMap &operator=(const RuleMap &rhs);
1487 
1488   // Set the rule for computing the CFA to RULE. Take ownership of RULE.
SetCFARule(Rule * rule)1489   void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; }
1490 
1491   // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
1492   // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
1493   // DW_CFA_def_cfa_register, and for detecting references to the CFA before
1494   // a rule for it has been established.
CFARule() const1495   Rule *CFARule() const { return cfa_rule_; }
1496 
1497   // Return the rule for REG, or NULL if there is none. The caller takes
1498   // ownership of the result.
1499   Rule *RegisterRule(int reg) const;
1500 
1501   // Set the rule for computing REG to RULE. Take ownership of RULE.
1502   void SetRegisterRule(int reg, Rule *rule);
1503 
1504   // Make all the appropriate calls to HANDLER as if we were changing from
1505   // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
1506   // DW_CFA_restore_state, where lots of rules can change simultaneously.
1507   // Return true if all handlers returned true; otherwise, return false.
1508   bool HandleTransitionTo(Handler *handler, uint64_t address,
1509                           const RuleMap &new_rules) const;
1510 
1511  private:
1512   // A map from register numbers to Rules.
1513   typedef std::map<int, Rule *> RuleByNumber;
1514 
1515   // Remove all register rules and clear cfa_rule_.
1516   void Clear();
1517 
1518   // The rule for computing the canonical frame address. This RuleMap owns
1519   // this rule.
1520   Rule *cfa_rule_;
1521 
1522   // A map from register numbers to postfix expressions to recover
1523   // their values. This RuleMap owns the Rules the map refers to.
1524   RuleByNumber registers_;
1525 };
1526 
operator =(const RuleMap & rhs)1527 CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) {
1528   Clear();
1529   // Since each map owns the rules it refers to, assignment must copy them.
1530   if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
1531   for (RuleByNumber::const_iterator it = rhs.registers_.begin();
1532        it != rhs.registers_.end(); it++)
1533     registers_[it->first] = it->second->Copy();
1534   return *this;
1535 }
1536 
RegisterRule(int reg) const1537 CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const {
1538   assert(reg != Handler::kCFARegister);
1539   RuleByNumber::const_iterator it = registers_.find(reg);
1540   if (it != registers_.end())
1541     return it->second->Copy();
1542   else
1543     return NULL;
1544 }
1545 
SetRegisterRule(int reg,Rule * rule)1546 void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) {
1547   assert(reg != Handler::kCFARegister);
1548   assert(rule);
1549   Rule **slot = &registers_[reg];
1550   delete *slot;
1551   *slot = rule;
1552 }
1553 
HandleTransitionTo(Handler * handler,uint64_t address,const RuleMap & new_rules) const1554 bool CallFrameInfo::RuleMap::HandleTransitionTo(
1555     Handler *handler,
1556     uint64_t address,
1557     const RuleMap &new_rules) const {
1558   // Transition from cfa_rule_ to new_rules.cfa_rule_.
1559   if (cfa_rule_ && new_rules.cfa_rule_) {
1560     if (*cfa_rule_ != *new_rules.cfa_rule_ &&
1561         !new_rules.cfa_rule_->Handle(handler, address,
1562                                      Handler::kCFARegister))
1563       return false;
1564   } else if (cfa_rule_) {
1565     // this RuleMap has a CFA rule but new_rules doesn't.
1566     // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
1567     // it's garbage input. The instruction interpreter should have
1568     // detected this and warned, so take no action here.
1569   } else if (new_rules.cfa_rule_) {
1570     // This shouldn't be possible: NEW_RULES is some prior state, and
1571     // there's no way to remove entries.
1572     assert(0);
1573   } else {
1574     // Both CFA rules are empty.  No action needed.
1575   }
1576 
1577   // Traverse the two maps in order by register number, and report
1578   // whatever differences we find.
1579   RuleByNumber::const_iterator old_it = registers_.begin();
1580   RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
1581   while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
1582     if (old_it->first < new_it->first) {
1583       // This RuleMap has an entry for old_it->first, but NEW_RULES
1584       // doesn't.
1585       //
1586       // This isn't really the right thing to do, but since CFI generally
1587       // only mentions callee-saves registers, and GCC's convention for
1588       // callee-saves registers is that they are unchanged, it's a good
1589       // approximation.
1590       if (!handler->SameValueRule(address, old_it->first))
1591         return false;
1592       old_it++;
1593     } else if (old_it->first > new_it->first) {
1594       // NEW_RULES has entry for new_it->first, but this RuleMap
1595       // doesn't. This shouldn't be possible: NEW_RULES is some prior
1596       // state, and there's no way to remove entries.
1597       assert(0);
1598     } else {
1599       // Both maps have an entry for this register. Report the new
1600       // rule if it is different.
1601       if (*old_it->second != *new_it->second &&
1602           !new_it->second->Handle(handler, address, new_it->first))
1603         return false;
1604       new_it++, old_it++;
1605     }
1606   }
1607   // Finish off entries from this RuleMap with no counterparts in new_rules.
1608   while (old_it != registers_.end()) {
1609     if (!handler->SameValueRule(address, old_it->first))
1610       return false;
1611     old_it++;
1612   }
1613   // Since we only make transitions from a rule set to some previously
1614   // saved rule set, and we can only add rules to the map, NEW_RULES
1615   // must have fewer rules than *this.
1616   assert(new_it == new_rules.registers_.end());
1617 
1618   return true;
1619 }
1620 
1621 // Remove all register rules and clear cfa_rule_.
Clear()1622 void CallFrameInfo::RuleMap::Clear() {
1623   delete cfa_rule_;
1624   cfa_rule_ = NULL;
1625   for (RuleByNumber::iterator it = registers_.begin();
1626        it != registers_.end(); it++)
1627     delete it->second;
1628   registers_.clear();
1629 }
1630 
1631 // The state of the call frame information interpreter as it processes
1632 // instructions from a CIE and FDE.
1633 class CallFrameInfo::State {
1634  public:
1635   // Create a call frame information interpreter state with the given
1636   // reporter, reader, handler, and initial call frame info address.
State(ByteReader * reader,Handler * handler,Reporter * reporter,uint64_t address)1637   State(ByteReader *reader, Handler *handler, Reporter *reporter,
1638         uint64_t address)
1639       : reader_(reader), handler_(handler), reporter_(reporter),
1640         address_(address), entry_(NULL), cursor_(NULL) { }
1641 
1642   // Interpret instructions from CIE, save the resulting rule set for
1643   // DW_CFA_restore instructions, and return true. On error, report
1644   // the problem to reporter_ and return false.
1645   bool InterpretCIE(const CIE &cie);
1646 
1647   // Interpret instructions from FDE, and return true. On error,
1648   // report the problem to reporter_ and return false.
1649   bool InterpretFDE(const FDE &fde);
1650 
1651  private:
1652   // The operands of a CFI instruction, for ParseOperands.
1653   struct Operands {
1654     unsigned register_number;  // A register number.
1655     uint64_t offset;             // An offset or address.
1656     long signed_offset;        // A signed offset.
1657     string expression;         // A DWARF expression.
1658   };
1659 
1660   // Parse CFI instruction operands from STATE's instruction stream as
1661   // described by FORMAT. On success, populate OPERANDS with the
1662   // results, and return true. On failure, report the problem and
1663   // return false.
1664   //
1665   // Each character of FORMAT should be one of the following:
1666   //
1667   //   'r'  unsigned LEB128 register number (OPERANDS->register_number)
1668   //   'o'  unsigned LEB128 offset          (OPERANDS->offset)
1669   //   's'  signed LEB128 offset            (OPERANDS->signed_offset)
1670   //   'a'  machine-size address            (OPERANDS->offset)
1671   //        (If the CIE has a 'z' augmentation string, 'a' uses the
1672   //        encoding specified by the 'R' argument.)
1673   //   '1'  a one-byte offset               (OPERANDS->offset)
1674   //   '2'  a two-byte offset               (OPERANDS->offset)
1675   //   '4'  a four-byte offset              (OPERANDS->offset)
1676   //   '8'  an eight-byte offset            (OPERANDS->offset)
1677   //   'e'  a DW_FORM_block holding a       (OPERANDS->expression)
1678   //        DWARF expression
1679   bool ParseOperands(const char *format, Operands *operands);
1680 
1681   // Interpret one CFI instruction from STATE's instruction stream, update
1682   // STATE, report any rule changes to handler_, and return true. On
1683   // failure, report the problem and return false.
1684   bool DoInstruction();
1685 
1686   // The following Do* member functions are subroutines of DoInstruction,
1687   // factoring out the actual work of operations that have several
1688   // different encodings.
1689 
1690   // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
1691   // return true. On failure, report and return false. (Used for
1692   // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
1693   bool DoDefCFA(unsigned base_register, long offset);
1694 
1695   // Change the offset of the CFA rule to OFFSET, and return true. On
1696   // failure, report and return false. (Subroutine for
1697   // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
1698   bool DoDefCFAOffset(long offset);
1699 
1700   // Specify that REG can be recovered using RULE, and return true. On
1701   // failure, report and return false.
1702   bool DoRule(unsigned reg, Rule *rule);
1703 
1704   // Specify that REG can be found at OFFSET from the CFA, and return true.
1705   // On failure, report and return false. (Subroutine for DW_CFA_offset,
1706   // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
1707   bool DoOffset(unsigned reg, long offset);
1708 
1709   // Specify that the caller's value for REG is the CFA plus OFFSET,
1710   // and return true. On failure, report and return false. (Subroutine
1711   // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
1712   bool DoValOffset(unsigned reg, long offset);
1713 
1714   // Restore REG to the rule established in the CIE, and return true. On
1715   // failure, report and return false. (Subroutine for DW_CFA_restore and
1716   // DW_CFA_restore_extended.)
1717   bool DoRestore(unsigned reg);
1718 
1719   // Return the section offset of the instruction at cursor. For use
1720   // in error messages.
CursorOffset()1721   uint64_t CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
1722 
1723   // Report that entry_ is incomplete, and return false. For brevity.
ReportIncomplete()1724   bool ReportIncomplete() {
1725     reporter_->Incomplete(entry_->offset, entry_->kind);
1726     return false;
1727   }
1728 
1729   // For reading multi-byte values with the appropriate endianness.
1730   ByteReader *reader_;
1731 
1732   // The handler to which we should report the data we find.
1733   Handler *handler_;
1734 
1735   // For reporting problems in the info we're parsing.
1736   Reporter *reporter_;
1737 
1738   // The code address to which the next instruction in the stream applies.
1739   uint64_t address_;
1740 
1741   // The entry whose instructions we are currently processing. This is
1742   // first a CIE, and then an FDE.
1743   const Entry *entry_;
1744 
1745   // The next instruction to process.
1746   const uint8_t *cursor_;
1747 
1748   // The current set of rules.
1749   RuleMap rules_;
1750 
1751   // The set of rules established by the CIE, used by DW_CFA_restore
1752   // and DW_CFA_restore_extended. We set this after interpreting the
1753   // CIE's instructions.
1754   RuleMap cie_rules_;
1755 
1756   // A stack of saved states, for DW_CFA_remember_state and
1757   // DW_CFA_restore_state.
1758   std::stack<RuleMap> saved_rules_;
1759 };
1760 
InterpretCIE(const CIE & cie)1761 bool CallFrameInfo::State::InterpretCIE(const CIE &cie) {
1762   entry_ = &cie;
1763   cursor_ = entry_->instructions;
1764   while (cursor_ < entry_->end)
1765     if (!DoInstruction())
1766       return false;
1767   // Note the rules established by the CIE, for use by DW_CFA_restore
1768   // and DW_CFA_restore_extended.
1769   cie_rules_ = rules_;
1770   return true;
1771 }
1772 
InterpretFDE(const FDE & fde)1773 bool CallFrameInfo::State::InterpretFDE(const FDE &fde) {
1774   entry_ = &fde;
1775   cursor_ = entry_->instructions;
1776   while (cursor_ < entry_->end)
1777     if (!DoInstruction())
1778       return false;
1779   return true;
1780 }
1781 
ParseOperands(const char * format,Operands * operands)1782 bool CallFrameInfo::State::ParseOperands(const char *format,
1783                                          Operands *operands) {
1784   size_t len;
1785   const char *operand;
1786 
1787   for (operand = format; *operand; operand++) {
1788     size_t bytes_left = entry_->end - cursor_;
1789     switch (*operand) {
1790       case 'r':
1791         operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
1792         if (len > bytes_left) return ReportIncomplete();
1793         cursor_ += len;
1794         break;
1795 
1796       case 'o':
1797         operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
1798         if (len > bytes_left) return ReportIncomplete();
1799         cursor_ += len;
1800         break;
1801 
1802       case 's':
1803         operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
1804         if (len > bytes_left) return ReportIncomplete();
1805         cursor_ += len;
1806         break;
1807 
1808       case 'a':
1809         operands->offset =
1810           reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
1811                                       &len);
1812         if (len > bytes_left) return ReportIncomplete();
1813         cursor_ += len;
1814         break;
1815 
1816       case '1':
1817         if (1 > bytes_left) return ReportIncomplete();
1818         operands->offset = static_cast<unsigned char>(*cursor_++);
1819         break;
1820 
1821       case '2':
1822         if (2 > bytes_left) return ReportIncomplete();
1823         operands->offset = reader_->ReadTwoBytes(cursor_);
1824         cursor_ += 2;
1825         break;
1826 
1827       case '4':
1828         if (4 > bytes_left) return ReportIncomplete();
1829         operands->offset = reader_->ReadFourBytes(cursor_);
1830         cursor_ += 4;
1831         break;
1832 
1833       case '8':
1834         if (8 > bytes_left) return ReportIncomplete();
1835         operands->offset = reader_->ReadEightBytes(cursor_);
1836         cursor_ += 8;
1837         break;
1838 
1839       case 'e': {
1840         size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
1841         if (len > bytes_left || expression_length > bytes_left - len)
1842           return ReportIncomplete();
1843         cursor_ += len;
1844         operands->expression = string(reinterpret_cast<const char *>(cursor_),
1845                                       expression_length);
1846         cursor_ += expression_length;
1847         break;
1848       }
1849 
1850       default:
1851           assert(0);
1852     }
1853   }
1854 
1855   return true;
1856 }
1857 
DoInstruction()1858 bool CallFrameInfo::State::DoInstruction() {
1859   CIE *cie = entry_->cie;
1860   Operands ops;
1861 
1862   // Our entry's kind should have been set by now.
1863   assert(entry_->kind != kUnknown);
1864 
1865   // We shouldn't have been invoked unless there were more
1866   // instructions to parse.
1867   assert(cursor_ < entry_->end);
1868 
1869   unsigned opcode = *cursor_++;
1870   if ((opcode & 0xc0) != 0) {
1871     switch (opcode & 0xc0) {
1872       // Advance the address.
1873       case DW_CFA_advance_loc: {
1874         size_t code_offset = opcode & 0x3f;
1875         address_ += code_offset * cie->code_alignment_factor;
1876         break;
1877       }
1878 
1879       // Find a register at an offset from the CFA.
1880       case DW_CFA_offset:
1881         if (!ParseOperands("o", &ops) ||
1882             !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
1883           return false;
1884         break;
1885 
1886       // Restore the rule established for a register by the CIE.
1887       case DW_CFA_restore:
1888         if (!DoRestore(opcode & 0x3f)) return false;
1889         break;
1890 
1891       // The 'if' above should have excluded this possibility.
1892       default:
1893         assert(0);
1894     }
1895 
1896     // Return here, so the big switch below won't be indented.
1897     return true;
1898   }
1899 
1900   switch (opcode) {
1901     // Set the address.
1902     case DW_CFA_set_loc:
1903       if (!ParseOperands("a", &ops)) return false;
1904       address_ = ops.offset;
1905       break;
1906 
1907     // Advance the address.
1908     case DW_CFA_advance_loc1:
1909       if (!ParseOperands("1", &ops)) return false;
1910       address_ += ops.offset * cie->code_alignment_factor;
1911       break;
1912 
1913     // Advance the address.
1914     case DW_CFA_advance_loc2:
1915       if (!ParseOperands("2", &ops)) return false;
1916       address_ += ops.offset * cie->code_alignment_factor;
1917       break;
1918 
1919     // Advance the address.
1920     case DW_CFA_advance_loc4:
1921       if (!ParseOperands("4", &ops)) return false;
1922       address_ += ops.offset * cie->code_alignment_factor;
1923       break;
1924 
1925     // Advance the address.
1926     case DW_CFA_MIPS_advance_loc8:
1927       if (!ParseOperands("8", &ops)) return false;
1928       address_ += ops.offset * cie->code_alignment_factor;
1929       break;
1930 
1931     // Compute the CFA by adding an offset to a register.
1932     case DW_CFA_def_cfa:
1933       if (!ParseOperands("ro", &ops) ||
1934           !DoDefCFA(ops.register_number, ops.offset))
1935         return false;
1936       break;
1937 
1938     // Compute the CFA by adding an offset to a register.
1939     case DW_CFA_def_cfa_sf:
1940       if (!ParseOperands("rs", &ops) ||
1941           !DoDefCFA(ops.register_number,
1942                     ops.signed_offset * cie->data_alignment_factor))
1943         return false;
1944       break;
1945 
1946     // Change the base register used to compute the CFA.
1947     case DW_CFA_def_cfa_register: {
1948       if (!ParseOperands("r", &ops)) return false;
1949       Rule *cfa_rule = rules_.CFARule();
1950       if (!cfa_rule) {
1951         if (!DoDefCFA(ops.register_number, ops.offset)) {
1952           reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1953           return false;
1954         }
1955       } else {
1956         cfa_rule->SetBaseRegister(ops.register_number);
1957         if (!cfa_rule->Handle(handler_, address_,
1958                               Handler::kCFARegister))
1959         return false;
1960       }
1961       break;
1962     }
1963 
1964     // Change the offset used to compute the CFA.
1965     case DW_CFA_def_cfa_offset:
1966       if (!ParseOperands("o", &ops) ||
1967           !DoDefCFAOffset(ops.offset))
1968         return false;
1969       break;
1970 
1971     // Change the offset used to compute the CFA.
1972     case DW_CFA_def_cfa_offset_sf:
1973       if (!ParseOperands("s", &ops) ||
1974           !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
1975         return false;
1976       break;
1977 
1978     // Specify an expression whose value is the CFA.
1979     case DW_CFA_def_cfa_expression: {
1980       if (!ParseOperands("e", &ops))
1981         return false;
1982       Rule *rule = new ValExpressionRule(ops.expression);
1983       rules_.SetCFARule(rule);
1984       if (!rule->Handle(handler_, address_,
1985                         Handler::kCFARegister))
1986         return false;
1987       break;
1988     }
1989 
1990     // The register's value cannot be recovered.
1991     case DW_CFA_undefined: {
1992       if (!ParseOperands("r", &ops) ||
1993           !DoRule(ops.register_number, new UndefinedRule()))
1994         return false;
1995       break;
1996     }
1997 
1998     // The register's value is unchanged from its value in the caller.
1999     case DW_CFA_same_value: {
2000       if (!ParseOperands("r", &ops) ||
2001           !DoRule(ops.register_number, new SameValueRule()))
2002         return false;
2003       break;
2004     }
2005 
2006     // Find a register at an offset from the CFA.
2007     case DW_CFA_offset_extended:
2008       if (!ParseOperands("ro", &ops) ||
2009           !DoOffset(ops.register_number,
2010                     ops.offset * cie->data_alignment_factor))
2011         return false;
2012       break;
2013 
2014     // The register is saved at an offset from the CFA.
2015     case DW_CFA_offset_extended_sf:
2016       if (!ParseOperands("rs", &ops) ||
2017           !DoOffset(ops.register_number,
2018                     ops.signed_offset * cie->data_alignment_factor))
2019         return false;
2020       break;
2021 
2022     // The register is saved at an offset from the CFA.
2023     case DW_CFA_GNU_negative_offset_extended:
2024       if (!ParseOperands("ro", &ops) ||
2025           !DoOffset(ops.register_number,
2026                     -ops.offset * cie->data_alignment_factor))
2027         return false;
2028       break;
2029 
2030     // The register's value is the sum of the CFA plus an offset.
2031     case DW_CFA_val_offset:
2032       if (!ParseOperands("ro", &ops) ||
2033           !DoValOffset(ops.register_number,
2034                        ops.offset * cie->data_alignment_factor))
2035         return false;
2036       break;
2037 
2038     // The register's value is the sum of the CFA plus an offset.
2039     case DW_CFA_val_offset_sf:
2040       if (!ParseOperands("rs", &ops) ||
2041           !DoValOffset(ops.register_number,
2042                        ops.signed_offset * cie->data_alignment_factor))
2043         return false;
2044       break;
2045 
2046     // The register has been saved in another register.
2047     case DW_CFA_register: {
2048       if (!ParseOperands("ro", &ops) ||
2049           !DoRule(ops.register_number, new RegisterRule(ops.offset)))
2050         return false;
2051       break;
2052     }
2053 
2054     // An expression yields the address at which the register is saved.
2055     case DW_CFA_expression: {
2056       if (!ParseOperands("re", &ops) ||
2057           !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
2058         return false;
2059       break;
2060     }
2061 
2062     // An expression yields the caller's value for the register.
2063     case DW_CFA_val_expression: {
2064       if (!ParseOperands("re", &ops) ||
2065           !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
2066         return false;
2067       break;
2068     }
2069 
2070     // Restore the rule established for a register by the CIE.
2071     case DW_CFA_restore_extended:
2072       if (!ParseOperands("r", &ops) ||
2073           !DoRestore( ops.register_number))
2074         return false;
2075       break;
2076 
2077     // Save the current set of rules on a stack.
2078     case DW_CFA_remember_state:
2079       saved_rules_.push(rules_);
2080       break;
2081 
2082     // Pop the current set of rules off the stack.
2083     case DW_CFA_restore_state: {
2084       if (saved_rules_.empty()) {
2085         reporter_->EmptyStateStack(entry_->offset, entry_->kind,
2086                                    CursorOffset());
2087         return false;
2088       }
2089       const RuleMap &new_rules = saved_rules_.top();
2090       if (rules_.CFARule() && !new_rules.CFARule()) {
2091         reporter_->ClearingCFARule(entry_->offset, entry_->kind,
2092                                    CursorOffset());
2093         return false;
2094       }
2095       rules_.HandleTransitionTo(handler_, address_, new_rules);
2096       rules_ = new_rules;
2097       saved_rules_.pop();
2098       break;
2099     }
2100 
2101     // No operation.  (Padding instruction.)
2102     case DW_CFA_nop:
2103       break;
2104 
2105     // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
2106     // are saved in registers 24 through 31 (%i0-%i7), and registers
2107     // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
2108     // (0-15 * the register size). The register numbers must be
2109     // hard-coded. A GNU extension, and not a pretty one.
2110     case DW_CFA_GNU_window_save: {
2111       // Save %o0-%o7 in %i0-%i7.
2112       for (int i = 8; i < 16; i++)
2113         if (!DoRule(i, new RegisterRule(i + 16)))
2114           return false;
2115       // Save %l0-%l7 and %i0-%i7 at the CFA.
2116       for (int i = 16; i < 32; i++)
2117         // Assume that the byte reader's address size is the same as
2118         // the architecture's register size. !@#%*^ hilarious.
2119         if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
2120                                       (i - 16) * reader_->AddressSize())))
2121           return false;
2122       break;
2123     }
2124 
2125     // I'm not sure what this is. GDB doesn't use it for unwinding.
2126     case DW_CFA_GNU_args_size:
2127       if (!ParseOperands("o", &ops)) return false;
2128       break;
2129 
2130     // An opcode we don't recognize.
2131     default: {
2132       reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
2133       return false;
2134     }
2135   }
2136 
2137   return true;
2138 }
2139 
DoDefCFA(unsigned base_register,long offset)2140 bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
2141   Rule *rule = new ValOffsetRule(base_register, offset);
2142   rules_.SetCFARule(rule);
2143   return rule->Handle(handler_, address_,
2144                       Handler::kCFARegister);
2145 }
2146 
DoDefCFAOffset(long offset)2147 bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
2148   Rule *cfa_rule = rules_.CFARule();
2149   if (!cfa_rule) {
2150     reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2151     return false;
2152   }
2153   cfa_rule->SetOffset(offset);
2154   return cfa_rule->Handle(handler_, address_,
2155                           Handler::kCFARegister);
2156 }
2157 
DoRule(unsigned reg,Rule * rule)2158 bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) {
2159   rules_.SetRegisterRule(reg, rule);
2160   return rule->Handle(handler_, address_, reg);
2161 }
2162 
DoOffset(unsigned reg,long offset)2163 bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
2164   if (!rules_.CFARule()) {
2165     reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2166     return false;
2167   }
2168   return DoRule(reg,
2169                 new OffsetRule(Handler::kCFARegister, offset));
2170 }
2171 
DoValOffset(unsigned reg,long offset)2172 bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
2173   if (!rules_.CFARule()) {
2174     reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2175     return false;
2176   }
2177   return DoRule(reg,
2178                 new ValOffsetRule(Handler::kCFARegister, offset));
2179 }
2180 
DoRestore(unsigned reg)2181 bool CallFrameInfo::State::DoRestore(unsigned reg) {
2182   // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
2183   if (entry_->kind == kCIE) {
2184     reporter_->RestoreInCIE(entry_->offset, CursorOffset());
2185     return false;
2186   }
2187   Rule *rule = cie_rules_.RegisterRule(reg);
2188   if (!rule) {
2189     // This isn't really the right thing to do, but since CFI generally
2190     // only mentions callee-saves registers, and GCC's convention for
2191     // callee-saves registers is that they are unchanged, it's a good
2192     // approximation.
2193     rule = new SameValueRule();
2194   }
2195   return DoRule(reg, rule);
2196 }
2197 
ReadEntryPrologue(const uint8_t * cursor,Entry * entry)2198 bool CallFrameInfo::ReadEntryPrologue(const uint8_t *cursor, Entry *entry) {
2199   const uint8_t *buffer_end = buffer_ + buffer_length_;
2200 
2201   // Initialize enough of ENTRY for use in error reporting.
2202   entry->offset = cursor - buffer_;
2203   entry->start = cursor;
2204   entry->kind = kUnknown;
2205   entry->end = NULL;
2206 
2207   // Read the initial length. This sets reader_'s offset size.
2208   size_t length_size;
2209   uint64_t length = reader_->ReadInitialLength(cursor, &length_size);
2210   if (length_size > size_t(buffer_end - cursor))
2211     return ReportIncomplete(entry);
2212   cursor += length_size;
2213 
2214   // In a .eh_frame section, a length of zero marks the end of the series
2215   // of entries.
2216   if (length == 0 && eh_frame_) {
2217     entry->kind = kTerminator;
2218     entry->end = cursor;
2219     return true;
2220   }
2221 
2222   // Validate the length.
2223   if (length > size_t(buffer_end - cursor))
2224     return ReportIncomplete(entry);
2225 
2226   // The length is the number of bytes after the initial length field;
2227   // we have that position handy at this point, so compute the end
2228   // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
2229   // and the length didn't fit in a size_t, we would have rejected it
2230   // above.)
2231   entry->end = cursor + length;
2232 
2233   // Parse the next field: either the offset of a CIE or a CIE id.
2234   size_t offset_size = reader_->OffsetSize();
2235   if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
2236   entry->id = reader_->ReadOffset(cursor);
2237 
2238   // Don't advance cursor past id field yet; in .eh_frame data we need
2239   // the id's position to compute the section offset of an FDE's CIE.
2240 
2241   // Now we can decide what kind of entry this is.
2242   if (eh_frame_) {
2243     // In .eh_frame data, an ID of zero marks the entry as a CIE, and
2244     // anything else is an offset from the id field of the FDE to the start
2245     // of the CIE.
2246     if (entry->id == 0) {
2247       entry->kind = kCIE;
2248     } else {
2249       entry->kind = kFDE;
2250       // Turn the offset from the id into an offset from the buffer's start.
2251       entry->id = (cursor - buffer_) - entry->id;
2252     }
2253   } else {
2254     // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
2255     // offset size for the entry) marks the entry as a CIE, and anything
2256     // else is the offset of the CIE from the beginning of the section.
2257     if (offset_size == 4)
2258       entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
2259     else {
2260       assert(offset_size == 8);
2261       entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
2262     }
2263   }
2264 
2265   // Now advance cursor past the id.
2266    cursor += offset_size;
2267 
2268   // The fields specific to this kind of entry start here.
2269   entry->fields = cursor;
2270 
2271   entry->cie = NULL;
2272 
2273   return true;
2274 }
2275 
ReadCIEFields(CIE * cie)2276 bool CallFrameInfo::ReadCIEFields(CIE *cie) {
2277   const uint8_t *cursor = cie->fields;
2278   size_t len;
2279 
2280   assert(cie->kind == kCIE);
2281 
2282   // Prepare for early exit.
2283   cie->version = 0;
2284   cie->augmentation.clear();
2285   cie->code_alignment_factor = 0;
2286   cie->data_alignment_factor = 0;
2287   cie->return_address_register = 0;
2288   cie->has_z_augmentation = false;
2289   cie->pointer_encoding = DW_EH_PE_absptr;
2290   cie->instructions = 0;
2291 
2292   // Parse the version number.
2293   if (cie->end - cursor < 1)
2294     return ReportIncomplete(cie);
2295   cie->version = reader_->ReadOneByte(cursor);
2296   cursor++;
2297 
2298   // If we don't recognize the version, we can't parse any more fields of the
2299   // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
2300   // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
2301   // the difference between those versions seems to be the same as for
2302   // .debug_frame.
2303   if (cie->version < 1 || cie->version > 4) {
2304     reporter_->UnrecognizedVersion(cie->offset, cie->version);
2305     return false;
2306   }
2307 
2308   const uint8_t *augmentation_start = cursor;
2309   const uint8_t *augmentation_end =
2310       reinterpret_cast<const uint8_t *>(memchr(augmentation_start, '\0',
2311                                                cie->end - augmentation_start));
2312   if (! augmentation_end) return ReportIncomplete(cie);
2313   cursor = augmentation_end;
2314   cie->augmentation = string(reinterpret_cast<const char *>(augmentation_start),
2315                              cursor - augmentation_start);
2316   // Skip the terminating '\0'.
2317   cursor++;
2318 
2319   // Is this CFI augmented?
2320   if (!cie->augmentation.empty()) {
2321     // Is it an augmentation we recognize?
2322     if (cie->augmentation[0] == DW_Z_augmentation_start) {
2323       // Linux C++ ABI 'z' augmentation, used for exception handling data.
2324       cie->has_z_augmentation = true;
2325     } else {
2326       // Not an augmentation we recognize. Augmentations can have arbitrary
2327       // effects on the form of rest of the content, so we have to give up.
2328       reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2329       return false;
2330     }
2331   }
2332 
2333   if (cie->version >= 4) {
2334     cie->address_size = *cursor++;
2335     if (cie->address_size != 8 && cie->address_size != 4) {
2336       reporter_->UnexpectedAddressSize(cie->offset, cie->address_size);
2337       return false;
2338     }
2339 
2340     cie->segment_size = *cursor++;
2341     if (cie->segment_size != 0) {
2342       reporter_->UnexpectedSegmentSize(cie->offset, cie->segment_size);
2343       return false;
2344     }
2345   }
2346 
2347   // Parse the code alignment factor.
2348   cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
2349   if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2350   cursor += len;
2351 
2352   // Parse the data alignment factor.
2353   cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
2354   if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2355   cursor += len;
2356 
2357   // Parse the return address register. This is a ubyte in version 1, and
2358   // a ULEB128 in version 3.
2359   if (cie->version == 1) {
2360     if (cursor >= cie->end) return ReportIncomplete(cie);
2361     cie->return_address_register = uint8_t(*cursor++);
2362   } else {
2363     cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
2364     if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2365     cursor += len;
2366   }
2367 
2368   // If we have a 'z' augmentation string, find the augmentation data and
2369   // use the augmentation string to parse it.
2370   if (cie->has_z_augmentation) {
2371     uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
2372     if (size_t(cie->end - cursor) < len + data_size)
2373       return ReportIncomplete(cie);
2374     cursor += len;
2375     const uint8_t *data = cursor;
2376     cursor += data_size;
2377     const uint8_t *data_end = cursor;
2378 
2379     cie->has_z_lsda = false;
2380     cie->has_z_personality = false;
2381     cie->has_z_signal_frame = false;
2382 
2383     // Walk the augmentation string, and extract values from the
2384     // augmentation data as the string directs.
2385     for (size_t i = 1; i < cie->augmentation.size(); i++) {
2386       switch (cie->augmentation[i]) {
2387         case DW_Z_has_LSDA:
2388           // The CIE's augmentation data holds the language-specific data
2389           // area pointer's encoding, and the FDE's augmentation data holds
2390           // the pointer itself.
2391           cie->has_z_lsda = true;
2392           // Fetch the LSDA encoding from the augmentation data.
2393           if (data >= data_end) return ReportIncomplete(cie);
2394           cie->lsda_encoding = DwarfPointerEncoding(*data++);
2395           if (!reader_->ValidEncoding(cie->lsda_encoding)) {
2396             reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
2397             return false;
2398           }
2399           // Don't check if the encoding is usable here --- we haven't
2400           // read the FDE's fields yet, so we're not prepared for
2401           // DW_EH_PE_funcrel, although that's a fine encoding for the
2402           // LSDA to use, since it appears in the FDE.
2403           break;
2404 
2405         case DW_Z_has_personality_routine:
2406           // The CIE's augmentation data holds the personality routine
2407           // pointer's encoding, followed by the pointer itself.
2408           cie->has_z_personality = true;
2409           // Fetch the personality routine pointer's encoding from the
2410           // augmentation data.
2411           if (data >= data_end) return ReportIncomplete(cie);
2412           cie->personality_encoding = DwarfPointerEncoding(*data++);
2413           if (!reader_->ValidEncoding(cie->personality_encoding)) {
2414             reporter_->InvalidPointerEncoding(cie->offset,
2415                                               cie->personality_encoding);
2416             return false;
2417           }
2418           if (!reader_->UsableEncoding(cie->personality_encoding)) {
2419             reporter_->UnusablePointerEncoding(cie->offset,
2420                                                cie->personality_encoding);
2421             return false;
2422           }
2423           // Fetch the personality routine's pointer itself from the data.
2424           cie->personality_address =
2425             reader_->ReadEncodedPointer(data, cie->personality_encoding,
2426                                         &len);
2427           if (len > size_t(data_end - data))
2428             return ReportIncomplete(cie);
2429           data += len;
2430           break;
2431 
2432         case DW_Z_has_FDE_address_encoding:
2433           // The CIE's augmentation data holds the pointer encoding to use
2434           // for addresses in the FDE.
2435           if (data >= data_end) return ReportIncomplete(cie);
2436           cie->pointer_encoding = DwarfPointerEncoding(*data++);
2437           if (!reader_->ValidEncoding(cie->pointer_encoding)) {
2438             reporter_->InvalidPointerEncoding(cie->offset,
2439                                               cie->pointer_encoding);
2440             return false;
2441           }
2442           if (!reader_->UsableEncoding(cie->pointer_encoding)) {
2443             reporter_->UnusablePointerEncoding(cie->offset,
2444                                                cie->pointer_encoding);
2445             return false;
2446           }
2447           break;
2448 
2449         case DW_Z_is_signal_trampoline:
2450           // Frames using this CIE are signal delivery frames.
2451           cie->has_z_signal_frame = true;
2452           break;
2453 
2454         default:
2455           // An augmentation we don't recognize.
2456           reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2457           return false;
2458       }
2459     }
2460   }
2461 
2462   // The CIE's instructions start here.
2463   cie->instructions = cursor;
2464 
2465   return true;
2466 }
2467 
ReadFDEFields(FDE * fde)2468 bool CallFrameInfo::ReadFDEFields(FDE *fde) {
2469   const uint8_t *cursor = fde->fields;
2470   size_t size;
2471 
2472   fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
2473                                              &size);
2474   if (size > size_t(fde->end - cursor))
2475     return ReportIncomplete(fde);
2476   cursor += size;
2477   reader_->SetFunctionBase(fde->address);
2478 
2479   // For the length, we strip off the upper nybble of the encoding used for
2480   // the starting address.
2481   DwarfPointerEncoding length_encoding =
2482     DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
2483   fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
2484   if (size > size_t(fde->end - cursor))
2485     return ReportIncomplete(fde);
2486   cursor += size;
2487 
2488   // If the CIE has a 'z' augmentation string, then augmentation data
2489   // appears here.
2490   if (fde->cie->has_z_augmentation) {
2491     uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
2492     if (size_t(fde->end - cursor) < size + data_size)
2493       return ReportIncomplete(fde);
2494     cursor += size;
2495 
2496     // In the abstract, we should walk the augmentation string, and extract
2497     // items from the FDE's augmentation data as we encounter augmentation
2498     // string characters that specify their presence: the ordering of items
2499     // in the augmentation string determines the arrangement of values in
2500     // the augmentation data.
2501     //
2502     // In practice, there's only ever one value in FDE augmentation data
2503     // that we support --- the LSDA pointer --- and we have to bail if we
2504     // see any unrecognized augmentation string characters. So if there is
2505     // anything here at all, we know what it is, and where it starts.
2506     if (fde->cie->has_z_lsda) {
2507       // Check whether the LSDA's pointer encoding is usable now: only once
2508       // we've parsed the FDE's starting address do we call reader_->
2509       // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
2510       // usable.
2511       if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
2512         reporter_->UnusablePointerEncoding(fde->cie->offset,
2513                                            fde->cie->lsda_encoding);
2514         return false;
2515       }
2516 
2517       fde->lsda_address =
2518         reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
2519       if (size > data_size)
2520         return ReportIncomplete(fde);
2521       // Ideally, we would also complain here if there were unconsumed
2522       // augmentation data.
2523     }
2524 
2525     cursor += data_size;
2526   }
2527 
2528   // The FDE's instructions start after those.
2529   fde->instructions = cursor;
2530 
2531   return true;
2532 }
2533 
Start()2534 bool CallFrameInfo::Start() {
2535   const uint8_t *buffer_end = buffer_ + buffer_length_;
2536   const uint8_t *cursor;
2537   bool all_ok = true;
2538   const uint8_t *entry_end;
2539   bool ok;
2540 
2541   // Traverse all the entries in buffer_, skipping CIEs and offering
2542   // FDEs to the handler.
2543   for (cursor = buffer_; cursor < buffer_end;
2544        cursor = entry_end, all_ok = all_ok && ok) {
2545     FDE fde;
2546 
2547     // Make it easy to skip this entry with 'continue': assume that
2548     // things are not okay until we've checked all the data, and
2549     // prepare the address of the next entry.
2550     ok = false;
2551 
2552     // Read the entry's prologue.
2553     if (!ReadEntryPrologue(cursor, &fde)) {
2554       if (!fde.end) {
2555         // If we couldn't even figure out this entry's extent, then we
2556         // must stop processing entries altogether.
2557         all_ok = false;
2558         break;
2559       }
2560       entry_end = fde.end;
2561       continue;
2562     }
2563 
2564     // The next iteration picks up after this entry.
2565     entry_end = fde.end;
2566 
2567     // Did we see an .eh_frame terminating mark?
2568     if (fde.kind == kTerminator) {
2569       // If there appears to be more data left in the section after the
2570       // terminating mark, warn the user. But this is just a warning;
2571       // we leave all_ok true.
2572       if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
2573       break;
2574     }
2575 
2576     // In this loop, we skip CIEs. We only parse them fully when we
2577     // parse an FDE that refers to them. This limits our memory
2578     // consumption (beyond the buffer itself) to that needed to
2579     // process the largest single entry.
2580     if (fde.kind != kFDE) {
2581       ok = true;
2582       continue;
2583     }
2584 
2585     // Validate the CIE pointer.
2586     if (fde.id > buffer_length_) {
2587       reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
2588       continue;
2589     }
2590 
2591     CIE cie;
2592 
2593     // Parse this FDE's CIE header.
2594     if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
2595       continue;
2596     // This had better be an actual CIE.
2597     if (cie.kind != kCIE) {
2598       reporter_->BadCIEId(fde.offset, fde.id);
2599       continue;
2600     }
2601     if (!ReadCIEFields(&cie))
2602       continue;
2603 
2604     // TODO(nbilling): This could lead to strange behavior if a single buffer
2605     // contained a mixture of DWARF versions as well as address sizes. Not
2606     // sure if it's worth handling such a case.
2607 
2608     // DWARF4 CIE specifies address_size, so use it for this call frame.
2609     if (cie.version >= 4) {
2610       reader_->SetAddressSize(cie.address_size);
2611     }
2612 
2613     // We now have the values that govern both the CIE and the FDE.
2614     cie.cie = &cie;
2615     fde.cie = &cie;
2616 
2617     // Parse the FDE's header.
2618     if (!ReadFDEFields(&fde))
2619       continue;
2620 
2621     // Call Entry to ask the consumer if they're interested.
2622     if (!handler_->Entry(fde.offset, fde.address, fde.size,
2623                          cie.version, cie.augmentation,
2624                          cie.return_address_register)) {
2625       // The handler isn't interested in this entry. That's not an error.
2626       ok = true;
2627       continue;
2628     }
2629 
2630     if (cie.has_z_augmentation) {
2631       // Report the personality routine address, if we have one.
2632       if (cie.has_z_personality) {
2633         if (!handler_
2634             ->PersonalityRoutine(cie.personality_address,
2635                                  IsIndirectEncoding(cie.personality_encoding)))
2636           continue;
2637       }
2638 
2639       // Report the language-specific data area address, if we have one.
2640       if (cie.has_z_lsda) {
2641         if (!handler_
2642             ->LanguageSpecificDataArea(fde.lsda_address,
2643                                        IsIndirectEncoding(cie.lsda_encoding)))
2644           continue;
2645       }
2646 
2647       // If this is a signal-handling frame, report that.
2648       if (cie.has_z_signal_frame) {
2649         if (!handler_->SignalHandler())
2650           continue;
2651       }
2652     }
2653 
2654     // Interpret the CIE's instructions, and then the FDE's instructions.
2655     State state(reader_, handler_, reporter_, fde.address);
2656     ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
2657 
2658     // Tell the ByteReader that the function start address from the
2659     // FDE header is no longer valid.
2660     reader_->ClearFunctionBase();
2661 
2662     // Report the end of the entry.
2663     handler_->End();
2664   }
2665 
2666   return all_ok;
2667 }
2668 
KindName(EntryKind kind)2669 const char *CallFrameInfo::KindName(EntryKind kind) {
2670   if (kind == CallFrameInfo::kUnknown)
2671     return "entry";
2672   else if (kind == CallFrameInfo::kCIE)
2673     return "common information entry";
2674   else if (kind == CallFrameInfo::kFDE)
2675     return "frame description entry";
2676   else {
2677     assert (kind == CallFrameInfo::kTerminator);
2678     return ".eh_frame sequence terminator";
2679   }
2680 }
2681 
ReportIncomplete(Entry * entry)2682 bool CallFrameInfo::ReportIncomplete(Entry *entry) {
2683   reporter_->Incomplete(entry->offset, entry->kind);
2684   return false;
2685 }
2686 
Incomplete(uint64_t offset,CallFrameInfo::EntryKind kind)2687 void CallFrameInfo::Reporter::Incomplete(uint64_t offset,
2688                                          CallFrameInfo::EntryKind kind) {
2689   fprintf(stderr,
2690           "%s: CFI %s at offset 0x%" PRIx64 " in '%s': entry ends early\n",
2691           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2692           section_.c_str());
2693 }
2694 
EarlyEHTerminator(uint64_t offset)2695 void CallFrameInfo::Reporter::EarlyEHTerminator(uint64_t offset) {
2696   fprintf(stderr,
2697           "%s: CFI at offset 0x%" PRIx64 " in '%s': saw end-of-data marker"
2698           " before end of section contents\n",
2699           filename_.c_str(), offset, section_.c_str());
2700 }
2701 
CIEPointerOutOfRange(uint64_t offset,uint64_t cie_offset)2702 void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64_t offset,
2703                                                    uint64_t cie_offset) {
2704   fprintf(stderr,
2705           "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2706           " CIE pointer is out of range: 0x%" PRIx64 "\n",
2707           filename_.c_str(), offset, section_.c_str(), cie_offset);
2708 }
2709 
BadCIEId(uint64_t offset,uint64_t cie_offset)2710 void CallFrameInfo::Reporter::BadCIEId(uint64_t offset, uint64_t cie_offset) {
2711   fprintf(stderr,
2712           "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2713           " CIE pointer does not point to a CIE: 0x%" PRIx64 "\n",
2714           filename_.c_str(), offset, section_.c_str(), cie_offset);
2715 }
2716 
UnexpectedAddressSize(uint64_t offset,uint8_t address_size)2717 void CallFrameInfo::Reporter::UnexpectedAddressSize(uint64_t offset,
2718                                                     uint8_t address_size) {
2719   fprintf(stderr,
2720           "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2721           " CIE specifies unexpected address size: %d\n",
2722           filename_.c_str(), offset, section_.c_str(), address_size);
2723 }
2724 
UnexpectedSegmentSize(uint64_t offset,uint8_t segment_size)2725 void CallFrameInfo::Reporter::UnexpectedSegmentSize(uint64_t offset,
2726                                                     uint8_t segment_size) {
2727   fprintf(stderr,
2728           "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2729           " CIE specifies unexpected segment size: %d\n",
2730           filename_.c_str(), offset, section_.c_str(), segment_size);
2731 }
2732 
UnrecognizedVersion(uint64_t offset,int version)2733 void CallFrameInfo::Reporter::UnrecognizedVersion(uint64_t offset, int version) {
2734   fprintf(stderr,
2735           "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2736           " CIE specifies unrecognized version: %d\n",
2737           filename_.c_str(), offset, section_.c_str(), version);
2738 }
2739 
UnrecognizedAugmentation(uint64_t offset,const string & aug)2740 void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64_t offset,
2741                                                        const string &aug) {
2742   fprintf(stderr,
2743           "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2744           " CIE specifies unrecognized augmentation: '%s'\n",
2745           filename_.c_str(), offset, section_.c_str(), aug.c_str());
2746 }
2747 
InvalidPointerEncoding(uint64_t offset,uint8_t encoding)2748 void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64_t offset,
2749                                                      uint8_t encoding) {
2750   fprintf(stderr,
2751           "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
2752           " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
2753           filename_.c_str(), offset, section_.c_str(), encoding);
2754 }
2755 
UnusablePointerEncoding(uint64_t offset,uint8_t encoding)2756 void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64_t offset,
2757                                                       uint8_t encoding) {
2758   fprintf(stderr,
2759           "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
2760           " 'z' augmentation specifies a pointer encoding for which"
2761           " we have no base address: 0x%02x\n",
2762           filename_.c_str(), offset, section_.c_str(), encoding);
2763 }
2764 
RestoreInCIE(uint64_t offset,uint64_t insn_offset)2765 void CallFrameInfo::Reporter::RestoreInCIE(uint64_t offset, uint64_t insn_offset) {
2766   fprintf(stderr,
2767           "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
2768           " the DW_CFA_restore instruction at offset 0x%" PRIx64
2769           " cannot be used in a common information entry\n",
2770           filename_.c_str(), offset, section_.c_str(), insn_offset);
2771 }
2772 
BadInstruction(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)2773 void CallFrameInfo::Reporter::BadInstruction(uint64_t offset,
2774                                              CallFrameInfo::EntryKind kind,
2775                                              uint64_t insn_offset) {
2776   fprintf(stderr,
2777           "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
2778           " the instruction at offset 0x%" PRIx64 " is unrecognized\n",
2779           filename_.c_str(), CallFrameInfo::KindName(kind),
2780           offset, section_.c_str(), insn_offset);
2781 }
2782 
NoCFARule(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)2783 void CallFrameInfo::Reporter::NoCFARule(uint64_t offset,
2784                                         CallFrameInfo::EntryKind kind,
2785                                         uint64_t insn_offset) {
2786   fprintf(stderr,
2787           "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
2788           " the instruction at offset 0x%" PRIx64 " assumes that a CFA rule has"
2789           " been set, but none has been set\n",
2790           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2791           section_.c_str(), insn_offset);
2792 }
2793 
EmptyStateStack(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)2794 void CallFrameInfo::Reporter::EmptyStateStack(uint64_t offset,
2795                                               CallFrameInfo::EntryKind kind,
2796                                               uint64_t insn_offset) {
2797   fprintf(stderr,
2798           "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
2799           " the DW_CFA_restore_state instruction at offset 0x%" PRIx64
2800           " should pop a saved state from the stack, but the stack is empty\n",
2801           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2802           section_.c_str(), insn_offset);
2803 }
2804 
ClearingCFARule(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)2805 void CallFrameInfo::Reporter::ClearingCFARule(uint64_t offset,
2806                                               CallFrameInfo::EntryKind kind,
2807                                               uint64_t insn_offset) {
2808   fprintf(stderr,
2809           "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
2810           " the DW_CFA_restore_state instruction at offset 0x%" PRIx64
2811           " would clear the CFA rule in effect\n",
2812           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2813           section_.c_str(), insn_offset);
2814 }
2815 
2816 }  // namespace dwarf2reader
2817