1 //
2 //  MappedHash.h
3 //
4 
5 #ifndef liblldb_MappedHash_h_
6 #define liblldb_MappedHash_h_
7 
8 #include <assert.h>
9 #include <stdint.h>
10 
11 #include <map>
12 #include <vector>
13 
14 #include "lldb/Core/DataExtractor.h"
15 #include "lldb/Core/Stream.h"
16 
17 class MappedHash
18 {
19 public:
20 
21     enum HashFunctionType
22     {
23         eHashFunctionDJB        = 0u    // Daniel J Bernstein hash function that is also used by the ELF GNU_HASH sections
24     };
25 
26 
27     static uint32_t
HashStringUsingDJB(const char * s)28     HashStringUsingDJB (const char *s)
29     {
30         uint32_t h = 5381;
31 
32         for (unsigned char c = *s; c; c = *++s)
33             h = ((h << 5) + h) + c;
34 
35         return h;
36     }
37 
38     static uint32_t
HashString(uint32_t hash_function,const char * s)39     HashString (uint32_t hash_function, const char *s)
40     {
41         switch (hash_function)
42         {
43             case MappedHash::eHashFunctionDJB:
44                 return HashStringUsingDJB (s);
45 
46             default:
47                 break;
48         }
49         assert (!"Invalid hash function index");
50         return 0;
51     }
52 
53 
54     static const uint32_t HASH_MAGIC = 0x48415348u;
55     static const uint32_t HASH_CIGAM = 0x48534148u;
56 
57     template <typename T>
58     struct Header
59 	{
60         typedef T HeaderData;
61 
62         uint32_t    magic;             // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
63         uint16_t    version;           // Version number
64 		uint16_t    hash_function;     // The hash function enumeration that was used
65 		uint32_t    bucket_count;      // The number of buckets in this hash table
66 		uint32_t    hashes_count;      // The total number of unique hash values and hash data offsets in this table
67         uint32_t    header_data_len;   // The size in bytes of the "header_data" template member below
68         HeaderData  header_data;       //
69 
HeaderHeader70 		Header () :
71             magic (HASH_MAGIC),
72             version (1),
73             hash_function (eHashFunctionDJB),
74             bucket_count (0),
75             hashes_count (0),
76             header_data_len (sizeof(T)),
77             header_data ()
78 		{
79 		}
80 
81         virtual
~HeaderHeader82         ~Header()
83         {
84         }
85 
86         size_t
GetByteSizeHeader87         GetByteSize() const
88         {
89             return  sizeof(magic) +
90                     sizeof(version) +
91                     sizeof(hash_function) +
92                     sizeof(bucket_count) +
93                     sizeof(hashes_count) +
94                     sizeof(header_data_len) +
95                     header_data_len;
96         }
97 
98         virtual size_t
99         GetByteSize (const HeaderData &header_data) = 0;
100 
101         void
SetHeaderDataByteSizeHeader102         SetHeaderDataByteSize (uint32_t header_data_byte_size)
103         {
104             header_data_len = header_data_byte_size;
105         }
106 
107         void
DumpHeader108         Dump (lldb_private::Stream &s)
109         {
110             s.Printf ("header.magic              = 0x%8.8x\n", magic);
111             s.Printf ("header.version            = 0x%4.4x\n", version);
112             s.Printf ("header.hash_function      = 0x%4.4x\n", hash_function);
113             s.Printf ("header.bucket_count       = 0x%8.8x %u\n", bucket_count, bucket_count);
114             s.Printf ("header.hashes_count       = 0x%8.8x %u\n", hashes_count, hashes_count);
115             s.Printf ("header.header_data_len    = 0x%8.8x %u\n", header_data_len, header_data_len);
116         }
117 
118         virtual lldb::offset_t
ReadHeader119         Read (lldb_private::DataExtractor &data, lldb::offset_t offset)
120         {
121             if (data.ValidOffsetForDataOfSize (offset,
122                                                sizeof (magic) +
123                                                sizeof (version) +
124                                                sizeof (hash_function) +
125                                                sizeof (bucket_count) +
126                                                sizeof (hashes_count) +
127                                                sizeof (header_data_len)))
128             {
129                 magic = data.GetU32 (&offset);
130                 if (magic != HASH_MAGIC)
131                 {
132                     if (magic == HASH_CIGAM)
133                     {
134                         switch (data.GetByteOrder())
135                         {
136                             case lldb::eByteOrderBig:
137                                 data.SetByteOrder(lldb::eByteOrderLittle);
138                                 break;
139                             case lldb::eByteOrderLittle:
140                                 data.SetByteOrder(lldb::eByteOrderBig);
141                                 break;
142                             default:
143                                 return LLDB_INVALID_OFFSET;
144                         }
145                     }
146                     else
147                     {
148                         // Magic bytes didn't match
149                         version = 0;
150                         return LLDB_INVALID_OFFSET;
151                     }
152                 }
153 
154                 version = data.GetU16 (&offset);
155                 if (version != 1)
156                 {
157                     // Unsupported version
158                     return LLDB_INVALID_OFFSET;
159                 }
160                 hash_function       = data.GetU16 (&offset);
161                 if (hash_function == 4)
162                     hash_function = 0; // Deal with pre-release version of this table...
163                 bucket_count        = data.GetU32 (&offset);
164                 hashes_count        = data.GetU32 (&offset);
165                 header_data_len     = data.GetU32 (&offset);
166                 return offset;
167             }
168             return LLDB_INVALID_OFFSET;
169         }
170 //
171 //        // Returns a buffer that contains a serialized version of this table
172 //        // that must be freed with free().
173 //        virtual void *
174 //        Write (int fd);
175 	};
176 
177     template <typename __KeyType, class __HeaderDataType, class __ValueType>
178     class ExportTable
179     {
180     public:
181         typedef __HeaderDataType HeaderDataType;
182         typedef Header<HeaderDataType> HeaderType;
183         typedef __KeyType KeyType;
184         typedef __ValueType ValueType;
185 
186         struct Entry
187         {
188             uint32_t    hash;
189             KeyType     key;
190             ValueType   value;
191         };
192 
193         typedef std::vector<ValueType> ValueArrayType;
194 
195         typedef std::map<KeyType, ValueArrayType> HashData;
196         // Map a name hash to one or more name infos
197         typedef std::map<uint32_t, HashData> HashToHashData;
198 
199         virtual KeyType
200         GetKeyForStringType (const char *cstr) const = 0;
201 
202         virtual size_t
203         GetByteSize (const HashData &key_to_key_values) = 0;
204 
205         virtual bool
206         WriteHashData (const HashData &hash_data,
207                        lldb_private::Stream &ostrm) = 0;
208 //
209         void
AddEntry(const char * cstr,const ValueType & value)210         AddEntry (const char *cstr, const ValueType &value)
211         {
212             Entry entry;
213             entry.hash = MappedHash::HashString (eHashFunctionDJB, cstr);
214             entry.key = GetKeyForStringType (cstr);
215             entry.value = value;
216             m_entries.push_back (entry);
217         }
218 
219         void
Save(const HeaderDataType & header_data,lldb_private::Stream & ostrm)220         Save (const HeaderDataType &header_data,
221               lldb_private::Stream &ostrm)
222         {
223             if (m_entries.empty())
224                 return;
225 
226             const uint32_t num_entries = m_entries.size();
227             uint32_t i = 0;
228 
229             HeaderType header;
230 
231             header.magic = HASH_MAGIC;
232             header.version = 1;
233             header.hash_function = eHashFunctionDJB;
234             header.bucket_count = 0;
235             header.hashes_count = 0;
236             header.prologue_length = header_data.GetByteSize();
237 
238             // We need to figure out the number of unique hashes first before we can
239             // calculate the number of buckets we want to use.
240             typedef std::vector<uint32_t> hash_coll;
241             hash_coll unique_hashes;
242             unique_hashes.resize (num_entries);
243             for (i=0; i<num_entries; ++i)
244                 unique_hashes[i] = m_entries[i].hash;
245             std::sort (unique_hashes.begin(), unique_hashes.end());
246             hash_coll::iterator pos = std::unique (unique_hashes.begin(), unique_hashes.end());
247             const size_t num_unique_hashes = std::distance (unique_hashes.begin(), pos);
248 
249             if (num_unique_hashes > 1024)
250                 header.bucket_count = num_unique_hashes/4;
251             else if (num_unique_hashes > 16)
252                 header.bucket_count = num_unique_hashes/2;
253             else
254                 header.bucket_count = num_unique_hashes;
255             if (header.bucket_count == 0)
256                 header.bucket_count = 1;
257 
258 
259             std::vector<HashToHashData> hash_buckets;
260             std::vector<uint32_t> hash_indexes (header.bucket_count, 0);
261             std::vector<uint32_t> hash_values;
262             std::vector<uint32_t> hash_offsets;
263             hash_buckets.resize (header.bucket_count);
264             uint32_t bucket_entry_empties = 0;
265             //StreamString hash_file_data(Stream::eBinary, dwarf->GetObjectFile()->GetAddressByteSize(), dwarf->GetObjectFile()->GetByteSize());
266 
267             // Push all of the hashes into their buckets and create all bucket
268             // entries all populated with data.
269             for (i=0; i<num_entries; ++i)
270             {
271                 const uint32_t hash = m_entries[i].hash;
272                 const uint32_t bucket_idx = hash % header.bucket_count;
273                 const uint32_t strp_offset = m_entries[i].str_offset;
274                 const uint32_t die_offset = m_entries[i].die_offset;
275                 hash_buckets[bucket_idx][hash][strp_offset].push_back(die_offset);
276             }
277 
278             // Now for each bucket we write the bucket value which is the
279             // number of hashes and the hash index encoded into a single
280             // 32 bit unsigned integer.
281             for (i=0; i<header.bucket_count; ++i)
282             {
283                 HashToHashData &bucket_entry = hash_buckets[i];
284 
285                 if (bucket_entry.empty())
286                 {
287                     // Empty bucket
288                     ++bucket_entry_empties;
289                     hash_indexes[i] = UINT32_MAX;
290                 }
291                 else
292                 {
293                     const uint32_t hash_value_index = hash_values.size();
294                     uint32_t hash_count = 0;
295                     typename HashToHashData::const_iterator pos, end = bucket_entry.end();
296                     for (pos = bucket_entry.begin(); pos != end; ++pos)
297                     {
298                         hash_values.push_back (pos->first);
299                         hash_offsets.push_back (GetByteSize (pos->second));
300                         ++hash_count;
301                     }
302 
303                     hash_indexes[i] = hash_value_index;
304                 }
305             }
306             header.hashes_count = hash_values.size();
307 
308             // Write the header out now that we have the hash_count
309             header.Write (ostrm);
310 
311             // Now for each bucket we write the start index of the hashes
312             // for the current bucket, or UINT32_MAX if the bucket is empty
313             for (i=0; i<header.bucket_count; ++i)
314             {
315                 ostrm.PutHex32(hash_indexes[i]);
316             }
317 
318             // Now we need to write out all of the hash values
319             for (i=0; i<header.hashes_count; ++i)
320             {
321                 ostrm.PutHex32(hash_values[i]);
322             }
323 
324             // Now we need to write out all of the hash data offsets,
325             // there is an offset for each hash in the hashes array
326             // that was written out above
327             for (i=0; i<header.hashes_count; ++i)
328             {
329                 ostrm.PutHex32(hash_offsets[i]);
330             }
331 
332             // Now we write the data for each hash and verify we got the offset
333             // correct above...
334             for (i=0; i<header.bucket_count; ++i)
335             {
336                 HashToHashData &bucket_entry = hash_buckets[i];
337 
338                 typename HashToHashData::const_iterator pos, end = bucket_entry.end();
339                 for (pos = bucket_entry.begin(); pos != end; ++pos)
340                 {
341                     if (!bucket_entry.empty())
342                     {
343                         WriteHashData (pos->second);
344                     }
345                 }
346             }
347         }
348     protected:
349         typedef std::vector<Entry> collection;
350         collection m_entries;
351     };
352     // A class for reading and using a saved hash table from a block of data
353     // in memory
354     template <typename __KeyType, class __HeaderType, class __HashData>
355     class MemoryTable
356     {
357     public:
358         typedef __HeaderType HeaderType;
359         typedef __KeyType KeyType;
360         typedef __HashData HashData;
361 
362         enum Result
363         {
364             eResultKeyMatch         = 0u, // The entry was found, key matched and "pair" was filled in successfully
365             eResultKeyMismatch      = 1u, // Bucket hash data collision, but key didn't match
366             eResultEndOfHashData    = 2u, // The chain of items for this hash data in this bucket is terminated, search no more
367             eResultError            = 3u  // Error parsing the hash data, abort
368         };
369 
370         struct Pair
371         {
372             KeyType key;
373             HashData value;
374         };
375 
MemoryTable(lldb_private::DataExtractor & data)376         MemoryTable (lldb_private::DataExtractor &data) :
377             m_header (),
378             m_hash_indexes (NULL),
379             m_hash_values (NULL),
380             m_hash_offsets (NULL)
381         {
382             lldb::offset_t offset = m_header.Read (data, 0);
383             if (offset != LLDB_INVALID_OFFSET && IsValid ())
384             {
385                 m_hash_indexes = (uint32_t *)data.GetData (&offset, m_header.bucket_count * sizeof(uint32_t));
386                 m_hash_values  = (uint32_t *)data.GetData (&offset, m_header.hashes_count * sizeof(uint32_t));
387                 m_hash_offsets = (uint32_t *)data.GetData (&offset, m_header.hashes_count * sizeof(uint32_t));
388             }
389         }
390 
391         virtual
~MemoryTable()392         ~MemoryTable ()
393         {
394         }
395 
396         bool
IsValid()397         IsValid () const
398         {
399             return m_header.version == 1 &&
400                    m_header.hash_function == eHashFunctionDJB &&
401                    m_header.bucket_count > 0 &&
402                    m_header.hashes_count > 0;
403         }
404 
405         uint32_t
GetHashIndex(uint32_t bucket_idx)406         GetHashIndex (uint32_t bucket_idx) const
407         {
408             if (m_hash_indexes && bucket_idx < m_header.bucket_count)
409                 return m_hash_indexes[bucket_idx];
410             return UINT32_MAX;
411         }
412 
413         uint32_t
GetHashValue(uint32_t hash_idx)414         GetHashValue (uint32_t hash_idx) const
415         {
416             if (m_hash_values && hash_idx < m_header.hashes_count)
417                 return m_hash_values[hash_idx];
418             return UINT32_MAX;
419         }
420 
421         uint32_t
GetHashDataOffset(uint32_t hash_idx)422         GetHashDataOffset (uint32_t hash_idx) const
423         {
424             if (m_hash_offsets && hash_idx < m_header.hashes_count)
425                 return m_hash_offsets[hash_idx];
426             return UINT32_MAX;
427         }
428 
429         bool
Find(const char * name,Pair & pair)430         Find (const char *name, Pair &pair) const
431         {
432             if (IsValid ())
433             {
434                 const uint32_t bucket_count = m_header.bucket_count;
435                 const uint32_t hash_count = m_header.hashes_count;
436                 const uint32_t hash_value = MappedHash::HashString (m_header.hash_function, name);
437                 const uint32_t bucket_idx = hash_value % bucket_count;
438                 uint32_t hash_idx = GetHashIndex (bucket_idx);
439                 if (hash_idx < hash_count)
440                 {
441                     for (; hash_idx < hash_count; ++hash_idx)
442                     {
443                         const uint32_t curr_hash_value = GetHashValue (hash_idx);
444                         if (curr_hash_value == hash_value)
445                         {
446                             lldb::offset_t hash_data_offset = GetHashDataOffset (hash_idx);
447                             while (hash_data_offset != UINT32_MAX)
448                             {
449                                 const lldb::offset_t prev_hash_data_offset = hash_data_offset;
450                                 Result hash_result = GetHashDataForName (name, &hash_data_offset, pair);
451                                 // Check the result of getting our hash data
452                                 switch (hash_result)
453                                 {
454                                 case eResultKeyMatch:
455                                     return true;
456 
457                                 case eResultKeyMismatch:
458                                     if (prev_hash_data_offset == hash_data_offset)
459                                         return false;
460                                     break;
461 
462                                 case eResultEndOfHashData:
463                                     // The last HashData for this key has been reached, stop searching
464                                     return false;
465                                 case eResultError:
466                                     // Error parsing the hash data, abort
467                                     return false;
468                                 }
469                             }
470                         }
471                         if ((curr_hash_value % bucket_count) != bucket_idx)
472                             break;
473                     }
474                 }
475             }
476             return false;
477         }
478 
479         // This method must be implemented in any subclasses.
480         // The KeyType is user specified and must somehow result in a string
481         // value. For example, the KeyType might be a string offset in a string
482         // table and subclasses can store their string table as a member of the
483         // subclass and return a valie "const char *" given a "key". The value
484         // could also be a C string pointer, in which case just returning "key"
485         // will suffice.
486 
487         virtual const char *
488         GetStringForKeyType (KeyType key) const = 0;
489 
490         virtual bool
491         ReadHashData (uint32_t hash_data_offset,
492                       HashData &hash_data) const = 0;
493 
494         // This method must be implemented in any subclasses and it must try to
495         // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
496         // parameter. This offset should be updated as bytes are consumed and
497         // a value "Result" enum should be returned. If the "name" matches the
498         // full name for the "pair.key" (which must be filled in by this call),
499         // then the HashData in the pair ("pair.value") should be extracted and
500         // filled in and "eResultKeyMatch" should be returned. If "name" doesn't
501         // match this string for the key, then "eResultKeyMismatch" should be
502         // returned and all data for the current HashData must be consumed or
503         // skipped and the "hash_data_offset_ptr" offset needs to be updated to
504         // point to the next HashData. If the end of the HashData objects for
505         // a given hash value have been reached, then "eResultEndOfHashData"
506         // should be returned. If anything else goes wrong during parsing,
507         // return "eResultError" and the corresponding "Find()" function will
508         // be canceled and return false.
509 
510         virtual Result
511         GetHashDataForName (const char *name,
512                             lldb::offset_t* hash_data_offset_ptr,
513                             Pair &pair) const = 0;
514 
515         const HeaderType &
GetHeader()516         GetHeader()
517         {
518             return m_header;
519         }
520 
521 
522         void
ForEach(std::function<bool (const HashData & hash_data)> const & callback)523         ForEach (std::function <bool(const HashData &hash_data)> const &callback) const
524         {
525             const size_t num_hash_offsets = m_header.hashes_count;
526             for (size_t i=0; i<num_hash_offsets; ++i)
527             {
528                 uint32_t hash_data_offset = GetHashDataOffset (i);
529                 if (hash_data_offset != UINT32_MAX)
530                 {
531                     HashData hash_data;
532                     if (ReadHashData (hash_data_offset, hash_data))
533                     {
534                         // If the callback returns false, then we are done and should stop
535                         if (callback(hash_data) == false)
536                             return;
537                     }
538                 }
539             }
540         }
541 
542     protected:
543         // Implementation agnostic information
544         HeaderType m_header;
545         uint32_t *m_hash_indexes;
546         uint32_t *m_hash_values;
547         uint32_t *m_hash_offsets;
548     };
549 
550 };
551 
552 #endif // #ifndef liblldb_MappedHash_h_
553