1 //===- TypeHashing.h ---------------------------------------------*- C++-*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
11 #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
12 
13 #include "llvm/ADT/DenseMapInfo.h"
14 #include "llvm/ADT/Hashing.h"
15 
16 #include "llvm/DebugInfo/CodeView/CodeView.h"
17 #include "llvm/DebugInfo/CodeView/TypeCollection.h"
18 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
19 
20 #include "llvm/Support/FormatProviders.h"
21 
22 #include <type_traits>
23 
24 namespace llvm {
25 namespace codeview {
26 
27 /// A locally hashed type represents a straightforward hash code of a serialized
28 /// record.  The record is simply serialized, and then the bytes are hashed by
29 /// a standard algorithm.  This is sufficient for the case of de-duplicating
30 /// records within a single sequence of types, because if two records both have
31 /// a back-reference to the same type in the same stream, they will both have
32 /// the same numeric value for the TypeIndex of the back reference.
33 struct LocallyHashedType {
34   hash_code Hash;
35   ArrayRef<uint8_t> RecordData;
36 
37   /// Given a type, compute its local hash.
38   static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData);
39 
40   /// Given a sequence of types, compute all of the local hashes.
41   template <typename Range>
hashTypesLocallyHashedType42   static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
43     std::vector<LocallyHashedType> Hashes;
44     Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
45     for (const auto &R : Records)
46       Hashes.push_back(hashType(R));
47 
48     return Hashes;
49   }
50 
51   static std::vector<LocallyHashedType>
hashTypeCollectionLocallyHashedType52   hashTypeCollection(TypeCollection &Types) {
53     std::vector<LocallyHashedType> Hashes;
54     Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
55       Hashes.push_back(hashType(Type.RecordData));
56     });
57     return Hashes;
58   }
59 };
60 
61 enum class GlobalTypeHashAlg : uint16_t {
62   SHA1 = 0, // standard 20-byte SHA1 hash
63   SHA1_8    // last 8-bytes of standard SHA1 hash
64 };
65 
66 /// A globally hashed type represents a hash value that is sufficient to
67 /// uniquely identify a record across multiple type streams or type sequences.
68 /// This works by, for any given record A which references B, replacing the
69 /// TypeIndex that refers to B with a previously-computed global hash for B.  As
70 /// this is a recursive algorithm (e.g. the global hash of B also depends on the
71 /// global hashes of the types that B refers to), a global hash can uniquely
72 /// identify identify that A occurs in another stream that has a completely
73 /// different graph structure.  Although the hash itself is slower to compute,
74 /// probing is much faster with a globally hashed type, because the hash itself
75 /// is considered "as good as" the original type.  Since type records can be
76 /// quite large, this makes the equality comparison of the hash much faster than
77 /// equality comparison of a full record.
78 struct GloballyHashedType {
79   GloballyHashedType() = default;
GloballyHashedTypeGloballyHashedType80   GloballyHashedType(StringRef H)
81       : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
GloballyHashedTypeGloballyHashedType82   GloballyHashedType(ArrayRef<uint8_t> H) {
83     assert(H.size() == 8);
84     ::memcpy(Hash.data(), H.data(), 8);
85   }
86   std::array<uint8_t, 8> Hash;
87 
88   /// Given a sequence of bytes representing a record, compute a global hash for
89   /// this record.  Due to the nature of global hashes incorporating the hashes
90   /// of referenced records, this function requires a list of types and ids
91   /// that RecordData might reference, indexable by TypeIndex.
92   static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData,
93                                      ArrayRef<GloballyHashedType> PreviousTypes,
94                                      ArrayRef<GloballyHashedType> PreviousIds);
95 
96   /// Given a sequence of bytes representing a record, compute a global hash for
97   /// this record.  Due to the nature of global hashes incorporating the hashes
98   /// of referenced records, this function requires a list of types and ids
99   /// that RecordData might reference, indexable by TypeIndex.
hashTypeGloballyHashedType100   static GloballyHashedType hashType(CVType Type,
101                                      ArrayRef<GloballyHashedType> PreviousTypes,
102                                      ArrayRef<GloballyHashedType> PreviousIds) {
103     return hashType(Type.RecordData, PreviousTypes, PreviousIds);
104   }
105 
106   /// Given a sequence of combined type and ID records, compute global hashes
107   /// for each of them, returning the results in a vector of hashed types.
108   template <typename Range>
hashTypesGloballyHashedType109   static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
110     std::vector<GloballyHashedType> Hashes;
111     for (const auto &R : Records)
112       Hashes.push_back(hashType(R, Hashes, Hashes));
113 
114     return Hashes;
115   }
116 
117   /// Given a sequence of combined type and ID records, compute global hashes
118   /// for each of them, returning the results in a vector of hashed types.
119   template <typename Range>
120   static std::vector<GloballyHashedType>
hashIdsGloballyHashedType121   hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) {
122     std::vector<GloballyHashedType> IdHashes;
123     for (const auto &R : Records)
124       IdHashes.push_back(hashType(R, TypeHashes, IdHashes));
125 
126     return IdHashes;
127   }
128 
129   static std::vector<GloballyHashedType>
hashTypeCollectionGloballyHashedType130   hashTypeCollection(TypeCollection &Types) {
131     std::vector<GloballyHashedType> Hashes;
132     Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
133       Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes));
134     });
135     return Hashes;
136   }
137 };
138 #if defined(_MSC_VER)
139 // is_trivially_copyable is not available in older versions of libc++, but it is
140 // available in all supported versions of MSVC, so at least this gives us some
141 // coverage.
142 static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
143               "GloballyHashedType must be trivially copyable so that we can "
144               "reinterpret_cast arrays of hash data to arrays of "
145               "GloballyHashedType");
146 #endif
147 } // namespace codeview
148 
149 template <> struct DenseMapInfo<codeview::LocallyHashedType> {
150   static codeview::LocallyHashedType Empty;
151   static codeview::LocallyHashedType Tombstone;
152 
153   static codeview::LocallyHashedType getEmptyKey() { return Empty; }
154 
155   static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }
156 
157   static unsigned getHashValue(codeview::LocallyHashedType Val) {
158     return Val.Hash;
159   }
160 
161   static bool isEqual(codeview::LocallyHashedType LHS,
162                       codeview::LocallyHashedType RHS) {
163     if (LHS.Hash != RHS.Hash)
164       return false;
165     return LHS.RecordData == RHS.RecordData;
166   }
167 };
168 
169 template <> struct DenseMapInfo<codeview::GloballyHashedType> {
170   static codeview::GloballyHashedType Empty;
171   static codeview::GloballyHashedType Tombstone;
172 
173   static codeview::GloballyHashedType getEmptyKey() { return Empty; }
174 
175   static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }
176 
177   static unsigned getHashValue(codeview::GloballyHashedType Val) {
178     return *reinterpret_cast<const unsigned *>(Val.Hash.data());
179   }
180 
181   static bool isEqual(codeview::GloballyHashedType LHS,
182                       codeview::GloballyHashedType RHS) {
183     return LHS.Hash == RHS.Hash;
184   }
185 };
186 
187 template <> struct format_provider<codeview::LocallyHashedType> {
188 public:
189   static void format(const codeview::LocallyHashedType &V,
190                      llvm::raw_ostream &Stream, StringRef Style) {
191     write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8);
192   }
193 };
194 
195 template <> struct format_provider<codeview::GloballyHashedType> {
196 public:
197   static void format(const codeview::GloballyHashedType &V,
198                      llvm::raw_ostream &Stream, StringRef Style) {
199     for (uint8_t B : V.Hash) {
200       write_hex(Stream, B, HexPrintStyle::Upper, 2);
201     }
202   }
203 };
204 
205 } // namespace llvm
206 
207 #endif
208