1 //===- DWARFAcceleratorTable.h ----------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H
11 #define LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H
12 
13 #include "llvm/ADT/DenseSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/BinaryFormat/Dwarf.h"
16 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
17 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
18 #include <cstdint>
19 #include <utility>
20 
21 namespace llvm {
22 
23 class raw_ostream;
24 class ScopedPrinter;
25 
26 /// The accelerator tables are designed to allow efficient random access
27 /// (using a symbol name as a key) into debug info by providing an index of the
28 /// debug info DIEs. This class implements the common functionality of Apple and
29 /// DWARF 5 accelerator tables.
30 /// TODO: Generalize the rest of the AppleAcceleratorTable interface and move it
31 /// to this class.
32 class DWARFAcceleratorTable {
33 protected:
34   DWARFDataExtractor AccelSection;
35   DataExtractor StringSection;
36 
37 public:
38   /// An abstract class representing a single entry in the accelerator tables.
39   class Entry {
40   protected:
41     SmallVector<DWARFFormValue, 3> Values;
42 
43     Entry() = default;
44 
45     // Make these protected so only (final) subclasses can be copied around.
46     Entry(const Entry &) = default;
47     Entry(Entry &&) = default;
48     Entry &operator=(const Entry &) = default;
49     Entry &operator=(Entry &&) = default;
50     ~Entry() = default;
51 
52 
53   public:
54     /// Returns the Offset of the Compilation Unit associated with this
55     /// Accelerator Entry or None if the Compilation Unit offset is not recorded
56     /// in this Accelerator Entry.
57     virtual Optional<uint64_t> getCUOffset() const = 0;
58 
59     /// Returns the Tag of the Debug Info Entry associated with this
60     /// Accelerator Entry or None if the Tag is not recorded in this
61     /// Accelerator Entry.
62     virtual Optional<dwarf::Tag> getTag() const = 0;
63 
64     /// Returns the raw values of fields in the Accelerator Entry. In general,
65     /// these can only be interpreted with the help of the metadata in the
66     /// owning Accelerator Table.
getValues()67     ArrayRef<DWARFFormValue> getValues() const { return Values; }
68   };
69 
DWARFAcceleratorTable(const DWARFDataExtractor & AccelSection,DataExtractor StringSection)70   DWARFAcceleratorTable(const DWARFDataExtractor &AccelSection,
71                         DataExtractor StringSection)
72       : AccelSection(AccelSection), StringSection(StringSection) {}
73   virtual ~DWARFAcceleratorTable();
74 
75   virtual llvm::Error extract() = 0;
76   virtual void dump(raw_ostream &OS) const = 0;
77 
78   DWARFAcceleratorTable(const DWARFAcceleratorTable &) = delete;
79   void operator=(const DWARFAcceleratorTable &) = delete;
80 };
81 
82 /// This implements the Apple accelerator table format, a precursor of the
83 /// DWARF 5 accelerator table format.
84 class AppleAcceleratorTable : public DWARFAcceleratorTable {
85   struct Header {
86     uint32_t Magic;
87     uint16_t Version;
88     uint16_t HashFunction;
89     uint32_t BucketCount;
90     uint32_t HashCount;
91     uint32_t HeaderDataLength;
92 
93     void dump(ScopedPrinter &W) const;
94   };
95 
96   struct HeaderData {
97     using AtomType = uint16_t;
98     using Form = dwarf::Form;
99 
100     uint32_t DIEOffsetBase;
101     SmallVector<std::pair<AtomType, Form>, 3> Atoms;
102 
103     Optional<uint64_t> extractOffset(Optional<DWARFFormValue> Value) const;
104   };
105 
106   struct Header Hdr;
107   struct HeaderData HdrData;
108   bool IsValid = false;
109 
110   /// Returns true if we should continue scanning for entries or false if we've
111   /// reached the last (sentinel) entry of encountered a parsing error.
112   bool dumpName(ScopedPrinter &W, SmallVectorImpl<DWARFFormValue> &AtomForms,
113                 uint32_t *DataOffset) const;
114 
115 public:
116   /// Apple-specific implementation of an Accelerator Entry.
117   class Entry final : public DWARFAcceleratorTable::Entry {
118     const HeaderData *HdrData = nullptr;
119 
120     Entry(const HeaderData &Data);
121     Entry() = default;
122 
123     void extract(const AppleAcceleratorTable &AccelTable, uint32_t *Offset);
124 
125   public:
126     Optional<uint64_t> getCUOffset() const override;
127 
128     /// Returns the Section Offset of the Debug Info Entry associated with this
129     /// Accelerator Entry or None if the DIE offset is not recorded in this
130     /// Accelerator Entry. The returned offset is relative to the start of the
131     /// Section containing the DIE.
132     Optional<uint64_t> getDIESectionOffset() const;
133 
134     Optional<dwarf::Tag> getTag() const override;
135 
136     /// Returns the value of the Atom in this Accelerator Entry, if the Entry
137     /// contains such Atom.
138     Optional<DWARFFormValue> lookup(HeaderData::AtomType Atom) const;
139 
140     friend class AppleAcceleratorTable;
141     friend class ValueIterator;
142   };
143 
144   class ValueIterator : public std::iterator<std::input_iterator_tag, Entry> {
145     const AppleAcceleratorTable *AccelTable = nullptr;
146     Entry Current;           ///< The current entry.
147     unsigned DataOffset = 0; ///< Offset into the section.
148     unsigned Data = 0; ///< Current data entry.
149     unsigned NumData = 0; ///< Number of data entries.
150 
151     /// Advance the iterator.
152     void Next();
153   public:
154     /// Construct a new iterator for the entries at \p DataOffset.
155     ValueIterator(const AppleAcceleratorTable &AccelTable, unsigned DataOffset);
156     /// End marker.
157     ValueIterator() = default;
158 
159     const Entry &operator*() const { return Current; }
160     ValueIterator &operator++() { Next(); return *this; }
161     ValueIterator operator++(int) {
162       ValueIterator I = *this;
163       Next();
164       return I;
165     }
166     friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
167       return A.NumData == B.NumData && A.DataOffset == B.DataOffset;
168     }
169     friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
170       return !(A == B);
171     }
172   };
173 
AppleAcceleratorTable(const DWARFDataExtractor & AccelSection,DataExtractor StringSection)174   AppleAcceleratorTable(const DWARFDataExtractor &AccelSection,
175                         DataExtractor StringSection)
176       : DWARFAcceleratorTable(AccelSection, StringSection) {}
177 
178   llvm::Error extract() override;
179   uint32_t getNumBuckets();
180   uint32_t getNumHashes();
181   uint32_t getSizeHdr();
182   uint32_t getHeaderDataLength();
183 
184   /// Return the Atom description, which can be used to interpret the raw values
185   /// of the Accelerator Entries in this table.
186   ArrayRef<std::pair<HeaderData::AtomType, HeaderData::Form>> getAtomsDesc();
187   bool validateForms();
188 
189   /// Return information related to the DWARF DIE we're looking for when
190   /// performing a lookup by name.
191   ///
192   /// \param HashDataOffset an offset into the hash data table
193   /// \returns <DieOffset, DieTag>
194   /// DieOffset is the offset into the .debug_info section for the DIE
195   /// related to the input hash data offset.
196   /// DieTag is the tag of the DIE
197   std::pair<uint32_t, dwarf::Tag> readAtoms(uint32_t &HashDataOffset);
198   void dump(raw_ostream &OS) const override;
199 
200   /// Look up all entries in the accelerator table matching \c Key.
201   iterator_range<ValueIterator> equal_range(StringRef Key) const;
202 };
203 
204 /// .debug_names section consists of one or more units. Each unit starts with a
205 /// header, which is followed by a list of compilation units, local and foreign
206 /// type units.
207 ///
208 /// These may be followed by an (optional) hash lookup table, which consists of
209 /// an array of buckets and hashes similar to the apple tables above. The only
210 /// difference is that the hashes array is 1-based, and consequently an empty
211 /// bucket is denoted by 0 and not UINT32_MAX.
212 ///
213 /// Next is the name table, which consists of an array of names and array of
214 /// entry offsets. This is different from the apple tables, which store names
215 /// next to the actual entries.
216 ///
217 /// The structure of the entries is described by an abbreviations table, which
218 /// comes after the name table. Unlike the apple tables, which have a uniform
219 /// entry structure described in the header, each .debug_names entry may have
220 /// different index attributes (DW_IDX_???) attached to it.
221 ///
222 /// The last segment consists of a list of entries, which is a 0-terminated list
223 /// referenced by the name table and interpreted with the help of the
224 /// abbreviation table.
225 class DWARFDebugNames : public DWARFAcceleratorTable {
226   /// The fixed-size part of a Dwarf 5 Name Index header
227   struct HeaderPOD {
228     uint32_t UnitLength;
229     uint16_t Version;
230     uint16_t Padding;
231     uint32_t CompUnitCount;
232     uint32_t LocalTypeUnitCount;
233     uint32_t ForeignTypeUnitCount;
234     uint32_t BucketCount;
235     uint32_t NameCount;
236     uint32_t AbbrevTableSize;
237     uint32_t AugmentationStringSize;
238   };
239 
240 public:
241   class NameIndex;
242   class NameIterator;
243   class ValueIterator;
244 
245   /// Dwarf 5 Name Index header.
246   struct Header : public HeaderPOD {
247     SmallString<8> AugmentationString;
248 
249     Error extract(const DWARFDataExtractor &AS, uint32_t *Offset);
250     void dump(ScopedPrinter &W) const;
251   };
252 
253   /// Index attribute and its encoding.
254   struct AttributeEncoding {
255     dwarf::Index Index;
256     dwarf::Form Form;
257 
AttributeEncodingAttributeEncoding258     constexpr AttributeEncoding(dwarf::Index Index, dwarf::Form Form)
259         : Index(Index), Form(Form) {}
260 
261     friend bool operator==(const AttributeEncoding &LHS,
262                            const AttributeEncoding &RHS) {
263       return LHS.Index == RHS.Index && LHS.Form == RHS.Form;
264     }
265   };
266 
267   /// Abbreviation describing the encoding of Name Index entries.
268   struct Abbrev {
269     uint32_t Code;  ///< Abbreviation code
270     dwarf::Tag Tag; ///< Dwarf Tag of the described entity.
271     std::vector<AttributeEncoding> Attributes; ///< List of index attributes.
272 
AbbrevAbbrev273     Abbrev(uint32_t Code, dwarf::Tag Tag,
274            std::vector<AttributeEncoding> Attributes)
275         : Code(Code), Tag(Tag), Attributes(std::move(Attributes)) {}
276 
277     void dump(ScopedPrinter &W) const;
278   };
279 
280   /// DWARF v5-specific implementation of an Accelerator Entry.
281   class Entry final : public DWARFAcceleratorTable::Entry {
282     const NameIndex *NameIdx;
283     const Abbrev *Abbr;
284 
285     Entry(const NameIndex &NameIdx, const Abbrev &Abbr);
286 
287   public:
288     Optional<uint64_t> getCUOffset() const override;
getTag()289     Optional<dwarf::Tag> getTag() const override { return tag(); }
290 
291     /// Returns the Index into the Compilation Unit list of the owning Name
292     /// Index or None if this Accelerator Entry does not have an associated
293     /// Compilation Unit. It is up to the user to verify that the returned Index
294     /// is valid in the owning NameIndex (or use getCUOffset(), which will
295     /// handle that check itself). Note that entries in NameIndexes which index
296     /// just a single Compilation Unit are implicitly associated with that unit,
297     /// so this function will return 0 even without an explicit
298     /// DW_IDX_compile_unit attribute.
299     Optional<uint64_t> getCUIndex() const;
300 
301     /// .debug_names-specific getter, which always succeeds (DWARF v5 index
302     /// entries always have a tag).
tag()303     dwarf::Tag tag() const { return Abbr->Tag; }
304 
305     /// Returns the Offset of the DIE within the containing CU or TU.
306     Optional<uint64_t> getDIEUnitOffset() const;
307 
308     /// Return the Abbreviation that can be used to interpret the raw values of
309     /// this Accelerator Entry.
getAbbrev()310     const Abbrev &getAbbrev() const { return *Abbr; }
311 
312     /// Returns the value of the Index Attribute in this Accelerator Entry, if
313     /// the Entry contains such Attribute.
314     Optional<DWARFFormValue> lookup(dwarf::Index Index) const;
315 
316     void dump(ScopedPrinter &W) const;
317 
318     friend class NameIndex;
319     friend class ValueIterator;
320   };
321 
322   /// Error returned by NameIndex::getEntry to report it has reached the end of
323   /// the entry list.
324   class SentinelError : public ErrorInfo<SentinelError> {
325   public:
326     static char ID;
327 
log(raw_ostream & OS)328     void log(raw_ostream &OS) const override { OS << "Sentinel"; }
329     std::error_code convertToErrorCode() const override;
330   };
331 
332 private:
333   /// DenseMapInfo for struct Abbrev.
334   struct AbbrevMapInfo {
335     static Abbrev getEmptyKey();
336     static Abbrev getTombstoneKey();
getHashValueAbbrevMapInfo337     static unsigned getHashValue(uint32_t Code) {
338       return DenseMapInfo<uint32_t>::getHashValue(Code);
339     }
getHashValueAbbrevMapInfo340     static unsigned getHashValue(const Abbrev &Abbr) {
341       return getHashValue(Abbr.Code);
342     }
isEqualAbbrevMapInfo343     static bool isEqual(uint32_t LHS, const Abbrev &RHS) {
344       return LHS == RHS.Code;
345     }
isEqualAbbrevMapInfo346     static bool isEqual(const Abbrev &LHS, const Abbrev &RHS) {
347       return LHS.Code == RHS.Code;
348     }
349   };
350 
351 public:
352   /// A single entry in the Name Table (Dwarf 5 sect. 6.1.1.4.6) of the Name
353   /// Index.
354   class NameTableEntry {
355     DataExtractor StrData;
356 
357     uint32_t Index;
358     uint32_t StringOffset;
359     uint32_t EntryOffset;
360 
361   public:
NameTableEntry(const DataExtractor & StrData,uint32_t Index,uint32_t StringOffset,uint32_t EntryOffset)362     NameTableEntry(const DataExtractor &StrData, uint32_t Index,
363                    uint32_t StringOffset, uint32_t EntryOffset)
364         : StrData(StrData), Index(Index), StringOffset(StringOffset),
365           EntryOffset(EntryOffset) {}
366 
367     /// Return the index of this name in the parent Name Index.
getIndex()368     uint32_t getIndex() const { return Index; }
369 
370     /// Returns the offset of the name of the described entities.
getStringOffset()371     uint32_t getStringOffset() const { return StringOffset; }
372 
373     /// Return the string referenced by this name table entry or nullptr if the
374     /// string offset is not valid.
getString()375     const char *getString() const {
376       uint32_t Off = StringOffset;
377       return StrData.getCStr(&Off);
378     }
379 
380     /// Returns the offset of the first Entry in the list.
getEntryOffset()381     uint32_t getEntryOffset() const { return EntryOffset; }
382   };
383 
384   /// Represents a single accelerator table within the Dwarf 5 .debug_names
385   /// section.
386   class NameIndex {
387     DenseSet<Abbrev, AbbrevMapInfo> Abbrevs;
388     struct Header Hdr;
389     const DWARFDebugNames &Section;
390 
391     // Base of the whole unit and of various important tables, as offsets from
392     // the start of the section.
393     uint32_t Base;
394     uint32_t CUsBase;
395     uint32_t BucketsBase;
396     uint32_t HashesBase;
397     uint32_t StringOffsetsBase;
398     uint32_t EntryOffsetsBase;
399     uint32_t EntriesBase;
400 
401     void dumpCUs(ScopedPrinter &W) const;
402     void dumpLocalTUs(ScopedPrinter &W) const;
403     void dumpForeignTUs(ScopedPrinter &W) const;
404     void dumpAbbreviations(ScopedPrinter &W) const;
405     bool dumpEntry(ScopedPrinter &W, uint32_t *Offset) const;
406     void dumpName(ScopedPrinter &W, const NameTableEntry &NTE,
407                   Optional<uint32_t> Hash) const;
408     void dumpBucket(ScopedPrinter &W, uint32_t Bucket) const;
409 
410     Expected<AttributeEncoding> extractAttributeEncoding(uint32_t *Offset);
411 
412     Expected<std::vector<AttributeEncoding>>
413     extractAttributeEncodings(uint32_t *Offset);
414 
415     Expected<Abbrev> extractAbbrev(uint32_t *Offset);
416 
417   public:
NameIndex(const DWARFDebugNames & Section,uint32_t Base)418     NameIndex(const DWARFDebugNames &Section, uint32_t Base)
419         : Section(Section), Base(Base) {}
420 
421     /// Reads offset of compilation unit CU. CU is 0-based.
422     uint32_t getCUOffset(uint32_t CU) const;
getCUCount()423     uint32_t getCUCount() const { return Hdr.CompUnitCount; }
424 
425     /// Reads offset of local type unit TU, TU is 0-based.
426     uint32_t getLocalTUOffset(uint32_t TU) const;
getLocalTUCount()427     uint32_t getLocalTUCount() const { return Hdr.LocalTypeUnitCount; }
428 
429     /// Reads signature of foreign type unit TU. TU is 0-based.
430     uint64_t getForeignTUSignature(uint32_t TU) const;
getForeignTUCount()431     uint32_t getForeignTUCount() const { return Hdr.ForeignTypeUnitCount; }
432 
433     /// Reads an entry in the Bucket Array for the given Bucket. The returned
434     /// value is a (1-based) index into the Names, StringOffsets and
435     /// EntryOffsets arrays. The input Bucket index is 0-based.
436     uint32_t getBucketArrayEntry(uint32_t Bucket) const;
getBucketCount()437     uint32_t getBucketCount() const { return Hdr.BucketCount; }
438 
439     /// Reads an entry in the Hash Array for the given Index. The input Index
440     /// is 1-based.
441     uint32_t getHashArrayEntry(uint32_t Index) const;
442 
443     /// Reads an entry in the Name Table for the given Index. The Name Table
444     /// consists of two arrays -- String Offsets and Entry Offsets. The returned
445     /// offsets are relative to the starts of respective sections. Input Index
446     /// is 1-based.
447     NameTableEntry getNameTableEntry(uint32_t Index) const;
448 
getNameCount()449     uint32_t getNameCount() const { return Hdr.NameCount; }
450 
getAbbrevs()451     const DenseSet<Abbrev, AbbrevMapInfo> &getAbbrevs() const {
452       return Abbrevs;
453     }
454 
455     Expected<Entry> getEntry(uint32_t *Offset) const;
456 
457     /// Look up all entries in this Name Index matching \c Key.
458     iterator_range<ValueIterator> equal_range(StringRef Key) const;
459 
begin()460     NameIterator begin() const { return NameIterator(this, 1); }
end()461     NameIterator end() const { return NameIterator(this, getNameCount() + 1); }
462 
463     llvm::Error extract();
getUnitOffset()464     uint32_t getUnitOffset() const { return Base; }
getNextUnitOffset()465     uint32_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; }
466     void dump(ScopedPrinter &W) const;
467 
468     friend class DWARFDebugNames;
469   };
470 
471   class ValueIterator : public std::iterator<std::input_iterator_tag, Entry> {
472 
473     /// The Name Index we are currently iterating through. The implementation
474     /// relies on the fact that this can also be used as an iterator into the
475     /// "NameIndices" vector in the Accelerator section.
476     const NameIndex *CurrentIndex = nullptr;
477 
478     /// Whether this is a local iterator (searches in CurrentIndex only) or not
479     /// (searches all name indices).
480     bool IsLocal;
481 
482     Optional<Entry> CurrentEntry;
483     unsigned DataOffset = 0; ///< Offset into the section.
484     std::string Key;         ///< The Key we are searching for.
485     Optional<uint32_t> Hash; ///< Hash of Key, if it has been computed.
486 
487     bool getEntryAtCurrentOffset();
488     Optional<uint32_t> findEntryOffsetInCurrentIndex();
489     bool findInCurrentIndex();
490     void searchFromStartOfCurrentIndex();
491     void next();
492 
493     /// Set the iterator to the "end" state.
setEnd()494     void setEnd() { *this = ValueIterator(); }
495 
496   public:
497     /// Create a "begin" iterator for looping over all entries in the
498     /// accelerator table matching Key. The iterator will run through all Name
499     /// Indexes in the section in sequence.
500     ValueIterator(const DWARFDebugNames &AccelTable, StringRef Key);
501 
502     /// Create a "begin" iterator for looping over all entries in a specific
503     /// Name Index. Other indices in the section will not be visited.
504     ValueIterator(const NameIndex &NI, StringRef Key);
505 
506     /// End marker.
507     ValueIterator() = default;
508 
509     const Entry &operator*() const { return *CurrentEntry; }
510     ValueIterator &operator++() {
511       next();
512       return *this;
513     }
514     ValueIterator operator++(int) {
515       ValueIterator I = *this;
516       next();
517       return I;
518     }
519 
520     friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
521       return A.CurrentIndex == B.CurrentIndex && A.DataOffset == B.DataOffset;
522     }
523     friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
524       return !(A == B);
525     }
526   };
527 
528   class NameIterator {
529 
530     /// The Name Index we are iterating through.
531     const NameIndex *CurrentIndex;
532 
533     /// The current name in the Name Index.
534     uint32_t CurrentName;
535 
next()536     void next() {
537       assert(CurrentName <= CurrentIndex->getNameCount());
538       ++CurrentName;
539     }
540 
541   public:
542     using iterator_category = std::input_iterator_tag;
543     using value_type = NameTableEntry;
544     using difference_type = uint32_t;
545     using pointer = NameTableEntry *;
546     using reference = NameTableEntry; // We return entries by value.
547 
548     /// Creates an iterator whose initial position is name CurrentName in
549     /// CurrentIndex.
NameIterator(const NameIndex * CurrentIndex,uint32_t CurrentName)550     NameIterator(const NameIndex *CurrentIndex, uint32_t CurrentName)
551         : CurrentIndex(CurrentIndex), CurrentName(CurrentName) {}
552 
553     NameTableEntry operator*() const {
554       return CurrentIndex->getNameTableEntry(CurrentName);
555     }
556     NameIterator &operator++() {
557       next();
558       return *this;
559     }
560     NameIterator operator++(int) {
561       NameIterator I = *this;
562       next();
563       return I;
564     }
565 
566     friend bool operator==(const NameIterator &A, const NameIterator &B) {
567       return A.CurrentIndex == B.CurrentIndex && A.CurrentName == B.CurrentName;
568     }
569     friend bool operator!=(const NameIterator &A, const NameIterator &B) {
570       return !(A == B);
571     }
572   };
573 
574 private:
575   SmallVector<NameIndex, 0> NameIndices;
576   DenseMap<uint32_t, const NameIndex *> CUToNameIndex;
577 
578 public:
DWARFDebugNames(const DWARFDataExtractor & AccelSection,DataExtractor StringSection)579   DWARFDebugNames(const DWARFDataExtractor &AccelSection,
580                   DataExtractor StringSection)
581       : DWARFAcceleratorTable(AccelSection, StringSection) {}
582 
583   llvm::Error extract() override;
584   void dump(raw_ostream &OS) const override;
585 
586   /// Look up all entries in the accelerator table matching \c Key.
587   iterator_range<ValueIterator> equal_range(StringRef Key) const;
588 
589   using const_iterator = SmallVector<NameIndex, 0>::const_iterator;
begin()590   const_iterator begin() const { return NameIndices.begin(); }
end()591   const_iterator end() const { return NameIndices.end(); }
592 
593   /// Return the Name Index covering the compile unit at CUOffset, or nullptr if
594   /// there is no Name Index covering that unit.
595   const NameIndex *getCUNameIndex(uint32_t CUOffset);
596 };
597 
598 } // end namespace llvm
599 
600 #endif // LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H
601