1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "dwarf_info.h"
18 
19 #include <cinttypes>
20 #include <queue>
21 
22 #include "berberis/base/stringprintf.h"
23 
24 #include "dwarf_constants.h"
25 
26 namespace nogrod {
27 
28 namespace {
29 
30 using berberis::StringPrintf;
31 
32 class DwarfParser {
33  public:
DwarfParser(const Buffer<uint8_t> * abbrev_buf,const Buffer<uint8_t> * info_buf,const StringTable * debug_str_table,const std::optional<StringOffsetTable> & string_offset_table)34   DwarfParser(const Buffer<uint8_t>* abbrev_buf,
35               const Buffer<uint8_t>* info_buf,
36               const StringTable* debug_str_table,
37               const std::optional<StringOffsetTable>& string_offset_table)
38       : abbrev_buf_{abbrev_buf},
39         info_buf_{info_buf},
40         debug_str_table_{debug_str_table},
41         string_offset_table_{string_offset_table} {}
42 
ReadDwarfInfo(std::vector<std::unique_ptr<DwarfCompilationUnit>> * compilation_units,std::unordered_map<uint64_t,std::unique_ptr<DwarfDie>> * die_map,std::string * error_msg)43   [[nodiscard]] bool ReadDwarfInfo(
44       std::vector<std::unique_ptr<DwarfCompilationUnit>>* compilation_units,
45       std::unordered_map<uint64_t, std::unique_ptr<DwarfDie>>* die_map,
46       std::string* error_msg) {
47     ByteInputStream bs(info_buf_->data(), info_buf_->size());
48     DwarfContext context(&bs, debug_str_table_, string_offset_table_);
49 
50     while (bs.available()) {
51       std::unique_ptr<DwarfCompilationUnit> cu = ReadCompilationUnit(&context, die_map, error_msg);
52       if (!cu) {
53         return false;
54       }
55       compilation_units->push_back(std::move(cu));
56     }
57 
58     return true;
59   }
60 
61  private:
ReadAttribute(const DwarfCompilationUnitHeader * cu,const DwarfAbbrevAttribute * abbrev_attr,DwarfContext * context,std::string * error_msg)62   [[nodiscard]] static std::unique_ptr<DwarfAttribute> ReadAttribute(
63       const DwarfCompilationUnitHeader* cu,
64       const DwarfAbbrevAttribute* abbrev_attr,
65       DwarfContext* context,
66       std::string* error_msg) {
67     const DwarfClass* attribute_class = abbrev_attr->dwarf_class();
68     return attribute_class->ReadAttribute(cu, abbrev_attr, context, error_msg);
69   }
70 
ReadOneDie(DwarfContext * context,const DwarfDie * parent_die,const DwarfCompilationUnitHeader * cu,const std::unordered_map<uint64_t,DwarfAbbrev> * abbrev_map,std::unordered_map<uint64_t,std::unique_ptr<DwarfDie>> * die_map,std::string * error_msg)71   [[nodiscard]] static const DwarfDie* ReadOneDie(
72       DwarfContext* context,
73       const DwarfDie* parent_die,
74       const DwarfCompilationUnitHeader* cu,
75       const std::unordered_map<uint64_t, DwarfAbbrev>* abbrev_map,
76       std::unordered_map<uint64_t, std::unique_ptr<DwarfDie>>* die_map,
77       std::string* error_msg) {
78     ByteInputStream* bs = context->info_stream();
79 
80     uint64_t offset = bs->offset();
81     uint64_t abbrev_code = bs->ReadLeb128();
82 
83     if (abbrev_code == 0) {
84       // null-die
85       std::unique_ptr<DwarfDie> null_die(new DwarfDie(cu, parent_die, offset, 0));
86       const DwarfDie* result = null_die.get();
87       (*die_map)[offset] = std::move(null_die);
88       return result;
89     }
90 
91     auto it = abbrev_map->find(abbrev_code);
92     if (it == abbrev_map->end()) {
93       *error_msg = StringPrintf("<%" PRIx64 "> Abbrev code %" PRId64
94                                 " was not found in .debug_abbrev "
95                                 "with offset %" PRIx64,
96                                 bs->offset(),
97                                 abbrev_code,
98                                 cu->abbrev_offset());
99       return nullptr;
100     }
101 
102     auto& abbrev = it->second;
103 
104     std::unique_ptr<DwarfDie> die(new DwarfDie(cu, parent_die, offset, abbrev.tag()));
105 
106     for (auto& abbrev_attr : abbrev.attributes()) {
107       std::unique_ptr<DwarfAttribute> attribute =
108           ReadAttribute(cu, abbrev_attr.get(), context, error_msg);
109       if (!attribute) {
110         return nullptr;
111       }
112 
113       if (attribute->name() == DW_AT_str_offsets_base) {
114         if (abbrev.tag() != DW_TAG_compile_unit) {
115           *error_msg = StringPrintf(
116               "<%" PRIx64
117               "> DW_AT_str_offsets_base is only supported for DW_TAG_compile_unit abbrev.",
118               bs->offset());
119           return nullptr;
120         }
121 
122         context->SetStrOffsetsBase(attribute->Uint64Value().value());
123       }
124 
125       die->AddAttribute(attribute.release());
126     }
127 
128     die->ResolveAttributes(context);
129 
130     if (abbrev.has_children()) {
131       while (true) {
132         const DwarfDie* child_die =
133             ReadOneDie(context, die.get(), cu, abbrev_map, die_map, error_msg);
134         if (!child_die) {
135           return nullptr;
136         }
137 
138         if (child_die->tag() == 0) {
139           break;
140         }
141 
142         die->AddChild(child_die);
143       }
144     }
145 
146     const DwarfDie* result = die.get();
147 
148     (*die_map)[offset] = std::move(die);
149 
150     return result;
151   }
152 
ReadCompilationUnit(DwarfContext * context,std::unordered_map<uint64_t,std::unique_ptr<DwarfDie>> * die_map,std::string * error_msg)153   [[nodiscard]] std::unique_ptr<DwarfCompilationUnit> ReadCompilationUnit(
154       DwarfContext* context,
155       std::unordered_map<uint64_t, std::unique_ptr<DwarfDie>>* die_map,
156       std::string* error_msg) {
157     ByteInputStream* bs = context->info_stream();
158 
159     uint64_t offset = bs->offset();
160 
161     uint64_t unit_length = bs->ReadUint32();
162     bool is_dwarf64 = false;
163     if (unit_length == 0xFFFFFFFF) {
164       unit_length = bs->ReadUint64();
165       is_dwarf64 = true;
166     }
167 
168     uint16_t version = bs->ReadUint16();
169     uint64_t abbrev_offset;
170     uint8_t address_size;
171 
172     if (version >= 2 && version <= 4) {
173       abbrev_offset = is_dwarf64 ? bs->ReadUint64() : bs->ReadUint32();
174       address_size = bs->ReadUint8();
175     } else if (version == 5) {
176       uint8_t unit_type = bs->ReadUint8();
177       // TODO(dimitry): can a .so file have DW_UT_partial CUs?
178       if (unit_type != DW_UT_compile) {
179         *error_msg =
180             StringPrintf("Unsupported DWARF5 compilation unit type encoding: %x", unit_type);
181         return nullptr;
182       }
183 
184       address_size = bs->ReadUint8();
185       abbrev_offset = is_dwarf64 ? bs->ReadUint64() : bs->ReadUint32();
186     } else {
187       *error_msg =
188           StringPrintf("Unsupported dwarf version: %d, CU offset: 0x%" PRIx64, version, offset);
189       return nullptr;
190     }
191 
192     std::unique_ptr<DwarfCompilationUnit> cu(new DwarfCompilationUnit(
193         offset, unit_length, version, abbrev_offset, address_size, is_dwarf64));
194 
195     // Even though in .so files abbrev codes is a sequence [1..n]
196     // the spec does not specify this as a requirement. Therefore
197     // it is safer to use unordered_map.
198     std::unordered_map<uint64_t, DwarfAbbrev>* abbrev_map =
199         ReadAbbrev(version, abbrev_offset, error_msg);
200 
201     if (abbrev_map == nullptr) {
202       *error_msg =
203           StringPrintf("error reading abbrev for compilation unit at offset 0x%" PRIx64 ": %s",
204                        offset,
205                        error_msg->c_str());
206       return nullptr;
207     }
208 
209     // We expect this attribute to be set if needed in the DW_TAG_compile_unit die.
210     context->ResetStrOffsetsBase();
211 
212     // CU consists of one DIE (DW_TAG_compile_unit) - read it
213     const DwarfDie* cu_die =
214         ReadOneDie(context, nullptr, &cu->header(), abbrev_map, die_map, error_msg);
215 
216     if (!cu_die) {
217       return nullptr;
218     }
219 
220     if (cu_die->tag() != DW_TAG_compile_unit) {
221       *error_msg = StringPrintf(
222           "Unexpected DIE tag for Compilation Unit: %d, expected DW_TAG_compile_unit(%d)",
223           cu_die->tag(),
224           DW_TAG_compile_unit);
225       return nullptr;
226     }
227 
228     cu->SetDie(cu_die);
229 
230     return cu;
231   }
232 
ReadAbbrev(uint16_t version,uint64_t offset,std::string * error_msg)233   std::unordered_map<uint64_t, DwarfAbbrev>* ReadAbbrev(uint16_t version,
234                                                         uint64_t offset,
235                                                         std::string* error_msg) {
236     auto it = abbrevs_.find(offset);
237     if (it != abbrevs_.end()) {
238       return &it->second;
239     }
240 
241     if (offset >= abbrev_buf_->size()) {
242       *error_msg = StringPrintf(
243           "abbrev offset (%" PRId64 ") is out of bounds: %" PRId64, offset, abbrev_buf_->size());
244       return nullptr;
245     }
246 
247     std::unordered_map<uint64_t, DwarfAbbrev> abbrev_map;
248     ByteInputStream bs(abbrev_buf_->data() + offset, abbrev_buf_->size() - offset);
249     while (true) {
250       uint64_t code = bs.ReadLeb128();
251 
252       // The abbreviations for a given compilation unit end with an entry consisting of a 0 byte
253       // for the abbreviation code.
254       if (code == 0) {
255         break;
256       }
257 
258       uint64_t entry_tag = bs.ReadLeb128();
259       uint8_t has_children = bs.ReadUint8();
260 
261       DwarfAbbrev abbrev(code, entry_tag, has_children == DW_CHILDREN_yes);
262 
263       while (true) {
264         uint64_t attr_offset = offset + bs.offset();
265         uint64_t attr_name = bs.ReadLeb128();
266         uint64_t attr_form = bs.ReadLeb128();
267         int64_t value = 0;
268         // The series of attribute specifications ends with an entry containing 0 for the name
269         // and 0 for the form.
270         if (attr_name == 0 && attr_form == 0) {
271           break;
272         }
273 
274         // "The attribute form DW_FORM_implicit_const is another special case. For
275         // attributes with this form, the attribute specification contains a third part, which is
276         // a signed LEB128 number."
277 
278         if (attr_form == DW_FORM_implicit_const) {
279           value = bs.ReadSleb128();
280         }
281 
282         std::unique_ptr<const DwarfAbbrevAttribute> abbrev_attribute =
283             DwarfAbbrevAttribute::CreateAbbrevAttribute(
284                 version, attr_name, attr_form, value, error_msg);
285 
286         if (!abbrev_attribute) {
287           *error_msg =
288               StringPrintf("error getting attribute at debug_abbrev offset 0x%" PRIx64 ": %s",
289                            attr_offset,
290                            error_msg->c_str());
291           return nullptr;
292         }
293         abbrev.AddAttribute(std::move(abbrev_attribute));
294       }
295 
296       abbrev_map[code] = std::move(abbrev);
297     }
298 
299     abbrevs_[offset] = std::move(abbrev_map);
300     return &abbrevs_[offset];
301   }
302 
303  private:
304   const Buffer<uint8_t>* abbrev_buf_;
305   const Buffer<uint8_t>* info_buf_;
306   const StringTable* debug_str_table_;
307   const std::optional<StringOffsetTable>& string_offset_table_;
308 
309   std::unordered_map<uint64_t, std::unordered_map<uint64_t, DwarfAbbrev>> abbrevs_;
310 };
311 
312 }  // namespace
313 
DwarfCompilationUnit(uint64_t unit_offset,uint64_t unit_length,uint16_t version,uint64_t abbrev_offset,uint8_t address_size,bool is_dwarf64)314 DwarfCompilationUnit::DwarfCompilationUnit(uint64_t unit_offset,
315                                            uint64_t unit_length,
316                                            uint16_t version,
317                                            uint64_t abbrev_offset,
318                                            uint8_t address_size,
319                                            bool is_dwarf64)
320     : header_(unit_offset, unit_length, version, abbrev_offset, address_size, is_dwarf64),
321       cu_die_(nullptr) {}
322 
SetDie(const DwarfDie * die)323 void DwarfCompilationUnit::SetDie(const DwarfDie* die) {
324   cu_die_ = die;
325 }
326 
DwarfInfo(Buffer<uint8_t> abbrev_buf,Buffer<uint8_t> info_buf,StringTable string_table,std::optional<StringOffsetTable> string_offset_table)327 DwarfInfo::DwarfInfo(Buffer<uint8_t> abbrev_buf,
328                      Buffer<uint8_t> info_buf,
329                      StringTable string_table,
330                      std::optional<StringOffsetTable> string_offset_table)
331     : abbrev_buf_{std::move(abbrev_buf)},
332       info_buf_{std::move(info_buf)},
333       string_table_{std::move(string_table)},
334       string_offset_table_{std::move(string_offset_table)} {}
335 
Parse(std::string * error_msg)336 bool DwarfInfo::Parse(std::string* error_msg) {
337   DwarfParser parser(&abbrev_buf_, &info_buf_, &string_table_, string_offset_table_);
338   if (!parser.ReadDwarfInfo(&compilation_units_, &die_offset_map_, error_msg)) {
339     return false;
340   }
341 
342   return true;
343 }
344 
FindDiesByName(const std::string & name) const345 std::vector<const DwarfDie*> DwarfInfo::FindDiesByName(const std::string& name) const {
346   std::vector<const DwarfDie*> result;
347 
348   for (auto& cu : compilation_units_) {
349     const DwarfDie* cu_die = cu->GetDie();
350 
351     // DIE and name prefix
352     std::queue<std::pair<const DwarfDie*, std::string>> visit_queue;
353     visit_queue.push(make_pair(cu_die, std::string("")));
354     while (!visit_queue.empty()) {
355       auto current = visit_queue.front();
356       visit_queue.pop();  // why doesn't pop() return the value on the front again?
357       auto current_die = current.first;
358       auto current_prefix = current.second;
359 
360       for (const DwarfDie* child : current_die->children()) {
361         // TODO(random-googler): Can we rely on DW_AT_linkage_name being present for all members?
362         // It looks like if member is not a function (DW_TAG_member) it lacks
363         // DW_AT_linkage_name. There is non-zero chance that this is going to
364         // need a C++ mangler in order to resolve all the names.
365         if (child->tag() == DW_TAG_class_type || child->tag() == DW_TAG_structure_type ||
366             child->tag() == DW_TAG_namespace) {
367           auto die_name = child->GetStringAttribute(DW_AT_name);
368           if (!die_name) {
369             // do not search anonymous dies
370             continue;
371           }
372           visit_queue.push(make_pair(child, current_prefix + die_name.value() + "::"));
373         }
374 
375         auto die_name = child->GetStringAttribute(DW_AT_linkage_name);
376 
377         if (!die_name) {
378           die_name = child->GetStringAttribute(DW_AT_name);
379           if (die_name) {
380             die_name = make_optional(current_prefix + die_name.value());
381           }
382         }
383 
384         if (die_name && die_name.value() == name) {
385           result.push_back(child);
386         }
387       }
388     }
389   }
390 
391   return result;
392 }
393 
GetDieByOffset(uint64_t offset) const394 const DwarfDie* DwarfInfo::GetDieByOffset(uint64_t offset) const {
395   auto it = die_offset_map_.find(offset);
396   if (it == die_offset_map_.end()) {
397     return nullptr;
398   }
399 
400   return it->second.get();
401 }
402 
DwarfDie(const DwarfCompilationUnitHeader * cu,const DwarfDie * parent,uint64_t offset,uint16_t tag)403 DwarfDie::DwarfDie(const DwarfCompilationUnitHeader* cu,
404                    const DwarfDie* parent,
405                    uint64_t offset,
406                    uint16_t tag)
407     : compilation_unit_header_(cu), parent_(parent), offset_(offset), tag_(tag) {}
408 
AddAttribute(DwarfAttribute * attr)409 void DwarfDie::AddAttribute(DwarfAttribute* attr) {
410   attributes_.push_back(std::unique_ptr<DwarfAttribute>(attr));
411 }
412 
AddChild(const DwarfDie * child)413 void DwarfDie::AddChild(const DwarfDie* child) {
414   children_.push_back(child);
415 }
416 
GetStringAttribute(uint16_t attr_name) const417 std::optional<std::string> DwarfDie::GetStringAttribute(uint16_t attr_name) const {
418   for (auto& attr : attributes_) {
419     if (attr->name() == attr_name) {
420       std::optional<std::string> result = attr->StringValue();
421       CHECK(result.has_value());
422       return result;
423     }
424   }
425   return {};
426 }
427 
GetUint64Attribute(uint16_t attr_name) const428 std::optional<uint64_t> DwarfDie::GetUint64Attribute(uint16_t attr_name) const {
429   for (auto& attr : attributes_) {
430     if (attr->name() == attr_name) {
431       std::optional<uint64_t> result = attr->Uint64Value();
432       CHECK(result.has_value());
433       return result;
434     }
435   }
436   return {};
437 }
438 
GetBoolAttributeOr(uint16_t attr_name,bool default_value) const439 bool DwarfDie::GetBoolAttributeOr(uint16_t attr_name, bool default_value) const {
440   for (auto& attr : attributes_) {
441     if (attr->name() == attr_name) {
442       std::optional<bool> result = attr->BoolValue();
443       CHECK(result.has_value());
444       return result.value();
445     }
446   }
447 
448   return default_value;
449 }
450 
ResolveAttributes(DwarfContext * context)451 void DwarfDie::ResolveAttributes(DwarfContext* context) {
452   for (auto& attr : attributes_) {
453     attr->Resolve(context);
454   }
455 }
456 
457 }  // namespace nogrod
458