1 
2 /*--------------------------------------------------------------------*/
3 /*--- A separately-chained hash table.               m_hashtable.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2005-2017 Nicholas Nethercote
11       njn@valgrind.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "pub_core_basics.h"
32 #include "pub_core_debuglog.h"
33 #include "pub_core_hashtable.h"
34 #include "pub_core_libcassert.h"
35 #include "pub_core_libcbase.h"
36 #include "pub_core_libcprint.h"
37 #include "pub_core_mallocfree.h"
38 
39 /*--------------------------------------------------------------------*/
40 /*--- Declarations                                                 ---*/
41 /*--------------------------------------------------------------------*/
42 
43 #define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains)
44 
45 struct _VgHashTable {
46    UInt         n_chains;   // should be prime
47    UInt         n_elements;
48    VgHashNode*  iterNode;   // current iterator node
49    UInt         iterChain;  // next chain to be traversed by the iterator
50    VgHashNode** chains;     // expanding array of hash chains
51    Bool         iterOK;     // table safe to iterate over?
52    const HChar* name;       // name of table (for debugging only)
53 };
54 
55 #define N_HASH_PRIMES 20
56 
57 static const SizeT primes[N_HASH_PRIMES] = {
58          769UL,         1543UL,         3079UL,          6151UL,
59        12289UL,        24593UL,        49157UL,         98317UL,
60       196613UL,       393241UL,       786433UL,       1572869UL,
61      3145739UL,      6291469UL,     12582917UL,      25165843UL,
62     50331653UL,    100663319UL,    201326611UL,     402653189UL
63 };
64 
65 /*--------------------------------------------------------------------*/
66 /*--- Functions                                                    ---*/
67 /*--------------------------------------------------------------------*/
68 
VG_(HT_construct)69 VgHashTable *VG_(HT_construct) ( const HChar* name )
70 {
71    /* Initialises to zero, ie. all entries NULL */
72    SizeT       n_chains = primes[0];
73    SizeT       sz       = n_chains * sizeof(VgHashNode*);
74    VgHashTable *table   = VG_(calloc)("hashtable.Hc.1",
75                                       1, sizeof(struct _VgHashTable));
76    table->chains        = VG_(calloc)("hashtable.Hc.2", 1, sz);
77    table->n_chains      = n_chains;
78    table->n_elements    = 0;
79    table->iterOK        = True;
80    table->name          = name;
81    vg_assert(name);
82    return table;
83 }
84 
VG_(HT_count_nodes)85 UInt VG_(HT_count_nodes) ( const VgHashTable *table )
86 {
87    return table->n_elements;
88 }
89 
resize(VgHashTable * table)90 static void resize ( VgHashTable *table )
91 {
92    Int          i;
93    SizeT        sz;
94    SizeT        old_chains = table->n_chains;
95    SizeT        new_chains = old_chains + 1;
96    VgHashNode** chains;
97    VgHashNode * node;
98 
99    /* If we've run out of primes, do nothing. */
100    if (old_chains == primes[N_HASH_PRIMES-1])
101       return;
102 
103    vg_assert(old_chains >= primes[0]
104              && old_chains < primes[N_HASH_PRIMES-1]);
105 
106    for (i = 0; i < N_HASH_PRIMES; i++) {
107       if (primes[i] > new_chains) {
108          new_chains = primes[i];
109          break;
110       }
111    }
112 
113    vg_assert(new_chains > old_chains);
114    vg_assert(new_chains > primes[0]
115              && new_chains <= primes[N_HASH_PRIMES-1]);
116 
117    VG_(debugLog)(
118       1, "hashtable",
119          "resizing table `%s' from %lu to %lu (total elems %lu)\n",
120          table->name, (UWord)old_chains, (UWord)new_chains,
121          (UWord)table->n_elements );
122 
123    table->n_chains = new_chains;
124    sz = new_chains * sizeof(VgHashNode*);
125    chains = VG_(calloc)("hashtable.resize.1", 1, sz);
126 
127    for (i = 0; i < old_chains; i++) {
128       node = table->chains[i];
129       while (node != NULL) {
130          VgHashNode* next = node->next;
131          UWord chain = CHAIN_NO(node->key, table);
132          node->next = chains[chain];
133          chains[chain] = node;
134          node = next;
135       }
136    }
137 
138    VG_(free)(table->chains);
139    table->chains = chains;
140 }
141 
142 /* Puts a new, heap allocated VgHashNode, into the VgHashTable.  Prepends
143    the node to the appropriate chain.  No duplicate key detection is done. */
VG_(HT_add_node)144 void VG_(HT_add_node) ( VgHashTable *table, void* vnode )
145 {
146    VgHashNode* node     = (VgHashNode*)vnode;
147    UWord chain          = CHAIN_NO(node->key, table);
148    node->next           = table->chains[chain];
149    table->chains[chain] = node;
150    table->n_elements++;
151    if ( (1 * (ULong)table->n_elements) > (1 * (ULong)table->n_chains) ) {
152       resize(table);
153    }
154 
155    /* Table has been modified; hence HT_Next should assert. */
156    table->iterOK = False;
157 }
158 
159 /* Looks up a VgHashNode by key in the table.  Returns NULL if not found. */
VG_(HT_lookup)160 void* VG_(HT_lookup) ( const VgHashTable *table, UWord key )
161 {
162    VgHashNode* curr = table->chains[ CHAIN_NO(key, table) ];
163 
164    while (curr) {
165       if (key == curr->key) {
166          return curr;
167       }
168       curr = curr->next;
169    }
170    return NULL;
171 }
172 
173 /* Looks up a VgHashNode by node in the table.  Returns NULL if not found.
174    GEN!!! marks the lines that differs from VG_(HT_lookup). */
VG_(HT_gen_lookup)175 void* VG_(HT_gen_lookup) ( const VgHashTable *table, const void* node,
176                            HT_Cmp_t cmp )
177 {
178    const VgHashNode* hnode = node; // GEN!!!
179    VgHashNode* curr = table->chains[ CHAIN_NO(hnode->key, table) ]; // GEN!!!
180 
181    while (curr) {
182       if (hnode->key == curr->key && cmp (hnode, curr) == 0) { // GEN!!!
183          return curr;
184       }
185       curr = curr->next;
186    }
187    return NULL;
188 }
189 
190 /* Removes a VgHashNode from the table.  Returns NULL if not found. */
VG_(HT_remove)191 void* VG_(HT_remove) ( VgHashTable *table, UWord key )
192 {
193    UWord        chain         = CHAIN_NO(key, table);
194    VgHashNode*  curr          =   table->chains[chain];
195    VgHashNode** prev_next_ptr = &(table->chains[chain]);
196 
197    /* Table has been modified; hence HT_Next should assert. */
198    table->iterOK = False;
199 
200    while (curr) {
201       if (key == curr->key) {
202          *prev_next_ptr = curr->next;
203          table->n_elements--;
204          return curr;
205       }
206       prev_next_ptr = &(curr->next);
207       curr = curr->next;
208    }
209    return NULL;
210 }
211 
212 /* Removes a VgHashNode by node from the table.  Returns NULL if not found.
213    GEN!!! marks the lines that differs from VG_(HT_remove). */
VG_(HT_gen_remove)214 void* VG_(HT_gen_remove) ( VgHashTable *table, const void* node, HT_Cmp_t cmp  )
215 {
216    const VgHashNode* hnode    = node; // GEN!!!
217    UWord        chain         = CHAIN_NO(hnode->key, table); // GEN!!!
218    VgHashNode*  curr          =   table->chains[chain];
219    VgHashNode** prev_next_ptr = &(table->chains[chain]);
220 
221    /* Table has been modified; hence HT_Next should assert. */
222    table->iterOK = False;
223 
224    while (curr) {
225       if (hnode->key == curr->key && cmp(hnode, curr) == 0) { // GEN!!!
226          *prev_next_ptr = curr->next;
227          table->n_elements--;
228          return curr;
229       }
230       prev_next_ptr = &(curr->next);
231       curr = curr->next;
232    }
233    return NULL;
234 }
235 
VG_(HT_print_stats)236 void VG_(HT_print_stats) ( const VgHashTable *table, HT_Cmp_t cmp )
237 {
238    #define MAXOCCUR 20
239    UInt elt_occurences[MAXOCCUR+1];
240    UInt key_occurences[MAXOCCUR+1];
241    UInt cno_occurences[MAXOCCUR+1];
242    /* Key occurence  : how many ht elements have the same key.
243       elt_occurences : how many elements are inserted multiple time.
244       cno_occurences : how many chains have that length.
245       The last entry in these arrays collects all occurences >= MAXOCCUR. */
246    #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[MAXOCCUR]++ : occur[n]++)
247    UInt i;
248    UInt nkey, nelt, ncno;
249    VgHashNode *cnode, *node;
250 
251    VG_(memset)(key_occurences, 0, sizeof(key_occurences));
252    VG_(memset)(elt_occurences, 0, sizeof(elt_occurences));
253    VG_(memset)(cno_occurences, 0, sizeof(cno_occurences));
254 
255    // Note that the below algorithm is quadractic in nr of elements in a chain
256    // but if that happens, the hash table/function is really bad and that
257    // should be fixed.
258    for (i = 0; i < table->n_chains; i++) {
259       ncno = 0;
260       for (cnode = table->chains[i]; cnode != NULL; cnode = cnode->next) {
261          ncno++;
262 
263          nkey = 0;
264          // Is the same cnode->key existing before cnode ?
265          for (node = table->chains[i]; node != cnode; node = node->next) {
266             if (node->key == cnode->key)
267                nkey++;
268          }
269          // If cnode->key not in a previous node, count occurences of key.
270          if (nkey == 0) {
271             for (node = cnode; node != NULL; node = node->next) {
272                if (node->key == cnode->key)
273                   nkey++;
274             }
275             INCOCCUR(key_occurences, nkey);
276          }
277 
278          nelt = 0;
279          // Is the same cnode element existing before cnode ?
280          for (node = table->chains[i]; node != cnode; node = node->next) {
281             if (node->key == cnode->key
282                 && (cmp == NULL || cmp (node, cnode) == 0)) {
283                nelt++;
284             }
285          }
286          // If cnode element not in a previous node, count occurences of elt.
287          if (nelt == 0) {
288             for (node = cnode; node != NULL; node = node->next) {
289                if (node->key == cnode->key
290                    && (cmp == NULL || cmp (node, cnode) == 0)) {
291                   nelt++;
292                }
293             }
294             INCOCCUR(elt_occurences, nelt);
295          }
296       }
297       INCOCCUR(cno_occurences, ncno);
298    }
299 
300    VG_(message)(Vg_DebugMsg,
301                 "nr occurences of"
302                 " chains of len N,"
303                 " N-plicated keys,"
304                 " N-plicated elts\n");
305    nkey = nelt = ncno = 0;
306    for (i = 0; i <= MAXOCCUR; i++) {
307       if (elt_occurences[i] > 0
308           || key_occurences[i] > 0
309           || cno_occurences[i] > 0)
310          VG_(message)(Vg_DebugMsg,
311                       "%s=%2u : nr chain %6u, nr keys %6u, nr elts %6u\n",
312                       i == MAXOCCUR ? ">" : "N", i,
313                       cno_occurences[i], key_occurences[i], elt_occurences[i]);
314       nkey += key_occurences[i];
315       nelt += elt_occurences[i];
316       ncno += cno_occurences[i];
317    }
318    VG_(message)(Vg_DebugMsg,
319                 "total nr of unique   slots: %6u, keys %6u, elts %6u."
320                 " Avg chain len %3.1f\n",
321                 ncno, nkey, nelt,
322                 (Double)nelt/(Double)(ncno == cno_occurences[0] ?
323                                       1 : ncno - cno_occurences[0]));
324 }
325 
326 
327 /* Allocates a suitably-sized array, copies pointers to all the hashtable
328    elements into it, then returns both the array and the size of it.  The
329    array must be freed with VG_(free).
330 */
VG_(HT_to_array)331 VgHashNode** VG_(HT_to_array) (const VgHashTable *table, /*OUT*/ UInt* n_elems)
332 {
333    UInt       i, j;
334    VgHashNode** arr;
335    VgHashNode*  node;
336 
337    *n_elems = table->n_elements;
338    if (*n_elems == 0)
339       return NULL;
340 
341    arr = VG_(malloc)( "hashtable.Hta.1", *n_elems * sizeof(VgHashNode*) );
342 
343    j = 0;
344    for (i = 0; i < table->n_chains; i++) {
345       for (node = table->chains[i]; node != NULL; node = node->next) {
346          arr[j++] = node;
347       }
348    }
349    vg_assert(j == *n_elems);
350 
351    return arr;
352 }
353 
VG_(HT_ResetIter)354 void VG_(HT_ResetIter)(VgHashTable *table)
355 {
356    vg_assert(table);
357    table->iterNode  = NULL;
358    table->iterChain = 0;
359    table->iterOK    = True;
360 }
361 
VG_(HT_Next)362 void* VG_(HT_Next)(VgHashTable *table)
363 {
364    Int i;
365    vg_assert(table);
366    /* See long comment on HT_Next prototype in pub_tool_hashtable.h.
367       In short if this fails, it means the caller tried to modify the
368       table whilst iterating over it, which is a bug.
369       One exception: HT_remove_at_Iter can remove the current entry and
370       leave the iterator in a valid state for HT_Next. */
371    vg_assert(table->iterOK);
372 
373    if (table->iterNode && table->iterNode->next) {
374       table->iterNode = table->iterNode->next;
375       return table->iterNode;
376    }
377 
378    for (i = table->iterChain; i < table->n_chains; i++) {
379       if (table->chains[i]) {
380          table->iterNode  = table->chains[i];
381          table->iterChain = i + 1;  // Next chain to be traversed
382          return table->iterNode;
383       }
384    }
385    return NULL;
386 }
387 
VG_(HT_remove_at_Iter)388 void VG_(HT_remove_at_Iter)(VgHashTable *table)
389 {
390    vg_assert(table);
391    vg_assert(table->iterOK);
392    vg_assert(table->iterNode);
393 
394    const UInt curChain = table->iterChain - 1; // chain of iterNode.
395 
396 
397    if (table->chains[curChain] == table->iterNode) {
398       /* iterNode is the first of its chain -> remove it from the chain. */
399       table->chains[curChain] = table->iterNode->next;
400       /* Setup the iterator to visit first node of curChain: */
401       table->iterNode  = NULL;
402       table->iterChain = curChain;
403    } else {
404       /* iterNode is somewhere inside curChain chain */
405       VgHashNode* prev = table->chains[curChain];
406 
407       while (prev->next != table->iterNode)
408          prev = prev->next;
409       /* Remove iterNode from the chain. */
410       prev->next = table->iterNode->next;
411       /* Setup the iterator to visit prev->next, which is the node
412          that was after the deleted node. */
413       table->iterNode = prev;
414    }
415 
416    table->n_elements--;
417 }
418 
VG_(HT_destruct)419 void VG_(HT_destruct)(VgHashTable *table, void(*freenode_fn)(void*))
420 {
421    UInt       i;
422    VgHashNode *node, *node_next;
423 
424    for (i = 0; i < table->n_chains; i++) {
425       for (node = table->chains[i]; node != NULL; node = node_next) {
426          node_next = node->next;
427          freenode_fn(node);
428       }
429    }
430    VG_(free)(table->chains);
431    VG_(free)(table);
432 }
433 
434 /*--------------------------------------------------------------------*/
435 /*--- end                                                          ---*/
436 /*--------------------------------------------------------------------*/
437