1 /*
2  * This file is part of ltrace.
3  * Copyright (C) 2012, 2013 Petr Machata, Red Hat Inc.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18  * 02110-1301 USA
19  */
20 
21 #include <string.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include "dict.h"
25 
26 struct status_bits {
27 	unsigned char taken : 1;
28 	unsigned char erased : 1;
29 };
30 
31 static struct status_bits *
bitp(struct dict * dict,size_t n)32 bitp(struct dict *dict, size_t n)
33 {
34 	return VECT_ELEMENT(&dict->status, struct status_bits, n);
35 }
36 
37 void
dict_init(struct dict * dict,size_t key_size,size_t value_size,size_t (* hash1)(const void *),int (* eq)(const void *,const void *),size_t (* hash2)(size_t))38 dict_init(struct dict *dict,
39 	  size_t key_size, size_t value_size,
40 	  size_t (*hash1)(const void *),
41 	  int (*eq)(const void *, const void *),
42 	  size_t (*hash2)(size_t))
43 {
44 	assert(hash1 != NULL);
45 	assert(eq != NULL);
46 
47 	vect_init(&dict->keys, key_size);
48 	vect_init(&dict->values, value_size);
49 	VECT_INIT(&dict->status, struct status_bits);
50 	dict->size = 0;
51 
52 	dict->hash1 = hash1;
53 	dict->hash2 = hash2;
54 	dict->eq = eq;
55 }
56 
57 struct clone_data {
58 	struct dict *target;
59 	int (*clone_key)(void *tgt, const void *src, void *data);
60 	int (*clone_value)(void *tgt, const void *src, void *data);
61 	void (*dtor_key)(void *tgt, void *data);
62 	void (*dtor_value)(void *tgt, void *data);
63 	void *data;
64 };
65 
66 static enum callback_status
clone_cb(void * key,void * value,void * data)67 clone_cb(void *key, void *value, void *data)
68 {
69 	struct clone_data *clone_data = data;
70 
71 	char nkey[clone_data->target->keys.elt_size];
72 	if (clone_data->clone_key == NULL)
73 		memmove(nkey, key, sizeof(nkey));
74 	else if (clone_data->clone_key(&nkey, key, clone_data->data) < 0)
75 		return CBS_STOP;
76 
77 	char nvalue[clone_data->target->values.elt_size];
78 	if (clone_data->clone_value == NULL) {
79 		memmove(nvalue, value, sizeof(nvalue));
80 	} else if (clone_data->clone_value(&nvalue, value,
81 					 clone_data->data) < 0) {
82 	fail:
83 		if (clone_data->clone_key != NULL)
84 			clone_data->dtor_key(&nkey, clone_data->data);
85 		return CBS_STOP;
86 	}
87 
88 	if (dict_insert(clone_data->target, nkey, nvalue) < 0) {
89 		if (clone_data->clone_value != NULL)
90 			clone_data->dtor_value(&nvalue, clone_data->data);
91 		goto fail;
92 	}
93 
94 	return CBS_CONT;
95 }
96 
97 int
dict_clone(struct dict * target,const struct dict * source,int (* clone_key)(void * tgt,const void * src,void * data),void (* dtor_key)(void * tgt,void * data),int (* clone_value)(void * tgt,const void * src,void * data),void (* dtor_value)(void * tgt,void * data),void * data)98 dict_clone(struct dict *target, const struct dict *source,
99 	   int (*clone_key)(void *tgt, const void *src, void *data),
100 	   void (*dtor_key)(void *tgt, void *data),
101 	   int (*clone_value)(void *tgt, const void *src, void *data),
102 	   void (*dtor_value)(void *tgt, void *data),
103 	   void *data)
104 {
105 	assert((clone_key != NULL) == (dtor_key != NULL));
106 	assert((clone_value != NULL) == (dtor_value != NULL));
107 
108 	dict_init(target, source->keys.elt_size, source->values.elt_size,
109 		  source->hash1, source->eq, source->hash2);
110 	struct clone_data clone_data = {
111 		target, clone_key, clone_value, dtor_key, dtor_value, data
112 	};
113 	if (dict_each((struct dict *)source, NULL,
114 		      clone_cb, &clone_data) != NULL) {
115 		dict_destroy(target, dtor_key, dtor_value, data);
116 		return -1;
117 	}
118 	return 0;
119 }
120 
121 size_t
dict_size(const struct dict * dict)122 dict_size(const struct dict *dict)
123 {
124 	return dict->size;
125 }
126 
127 int
dict_empty(const struct dict * dict)128 dict_empty(const struct dict *dict)
129 {
130 	return dict->size == 0;
131 }
132 
133 struct destroy_data {
134 	void (*dtor_key)(void *tgt, void *data);
135 	void (*dtor_value)(void *tgt, void *data);
136 	void *data;
137 };
138 
139 static enum callback_status
destroy_cb(void * key,void * value,void * data)140 destroy_cb(void *key, void *value, void *data)
141 {
142 	struct destroy_data *destroy_data = data;
143 	if (destroy_data->dtor_key)
144 		destroy_data->dtor_key(key, destroy_data->data);
145 	if (destroy_data->dtor_value)
146 		destroy_data->dtor_value(value, destroy_data->data);
147 	return CBS_CONT;
148 }
149 
150 void
dict_destroy(struct dict * dict,void (* dtor_key)(void * tgt,void * data),void (* dtor_value)(void * tgt,void * data),void * data)151 dict_destroy(struct dict *dict,
152 	     void (*dtor_key)(void *tgt, void *data),
153 	     void (*dtor_value)(void *tgt, void *data),
154 	     void *data)
155 {
156 	/* Some slots are unused (the corresponding keys and values
157 	 * are uninitialized), so we can't call dtors for them.
158 	 * Iterate DICT instead.  */
159 	if (dtor_key != NULL || dtor_value != NULL) {
160 		struct destroy_data destroy_data = {
161 			dtor_key, dtor_value, data
162 		};
163 		dict_each(dict, NULL, destroy_cb, &destroy_data);
164 	}
165 
166 	vect_destroy(&dict->keys, NULL, NULL);
167 	vect_destroy(&dict->values, NULL, NULL);
168 	vect_destroy(&dict->status, NULL, NULL);
169 }
170 
171 static size_t
default_secondary_hash(size_t pos)172 default_secondary_hash(size_t pos)
173 {
174 	return pos % 97 + 1;
175 }
176 
177 static size_t
small_secondary_hash(size_t pos)178 small_secondary_hash(size_t pos)
179 {
180 	return 1;
181 }
182 
183 static inline size_t
n(struct dict * dict)184 n(struct dict *dict)
185 {
186 	return vect_size(&dict->keys);
187 }
188 
189 static inline size_t (*
hash2(struct dict * dict)190 hash2(struct dict *dict))(size_t)
191 {
192 	if (dict->hash2 != NULL)
193 		return dict->hash2;
194 	else if (n(dict) < 100)
195 		return small_secondary_hash;
196 	else
197 		return default_secondary_hash;
198 }
199 
200 static void *
getkey(struct dict * dict,size_t pos)201 getkey(struct dict *dict, size_t pos)
202 {
203 	return ((unsigned char *)dict->keys.data)
204 		+ dict->keys.elt_size * pos;
205 }
206 
207 static void *
getvalue(struct dict * dict,size_t pos)208 getvalue(struct dict *dict, size_t pos)
209 {
210 	return ((unsigned char *)dict->values.data)
211 		+ dict->values.elt_size * pos;
212 }
213 
214 static size_t
find_slot(struct dict * dict,const void * key,int * foundp,int * should_rehash,size_t * pi)215 find_slot(struct dict *dict, const void *key,
216 	  int *foundp, int *should_rehash, size_t *pi)
217 {
218 	assert(n(dict) > 0);
219 	size_t pos = dict->hash1(key) % n(dict);
220 	size_t pos0 = -1;
221 	size_t d = hash2(dict)(pos);
222 	size_t i = 0;
223 	*foundp = 0;
224 
225 	/* We skip over any taken or erased slots.  But we remember
226 	 * the first erased that we find, and if we don't find the key
227 	 * later, we return that position.  */
228 	for (; bitp(dict, pos)->taken || bitp(dict, pos)->erased;
229 	     pos = (pos + d) % n(dict)) {
230 		if (pos0 == (size_t)-1 && bitp(dict, pos)->erased)
231 			pos0 = pos;
232 
233 		/* If there is a loop, but we've seen an erased
234 		 * element, take that one.  Otherwise give up.  */
235 		if (++i > dict->size) {
236 			if (pos0 != (size_t)-1)
237 				break;
238 			return (size_t)-1;
239 		}
240 
241 		if (bitp(dict, pos)->taken
242 		    && dict->eq(getkey(dict, pos), key)) {
243 			*foundp = 1;
244 			break;
245 		}
246 	}
247 
248 	if (!*foundp && pos0 != (size_t)-1)
249 		pos = pos0;
250 
251 	/* If the hash table degraded into a linked list, request a
252 	 * rehash.  */
253 	if (should_rehash != NULL)
254 		*should_rehash = i > 10 && i > n(dict) / 10;
255 
256 	if (pi != NULL)
257 		*pi = i;
258 	return pos;
259 }
260 
261 static enum callback_status
rehash_move(void * key,void * value,void * data)262 rehash_move(void *key, void *value, void *data)
263 {
264 	if (dict_insert(data, key, value) < 0)
265 		return CBS_STOP;
266 	else
267 		return CBS_CONT;
268 }
269 
270 static int
rehash(struct dict * dict,size_t nn)271 rehash(struct dict *dict, size_t nn)
272 {
273 	assert(nn != n(dict));
274 	int ret = -1;
275 
276 	struct dict tmp;
277 	dict_init(&tmp, dict->keys.elt_size, dict->values.elt_size,
278 		  dict->hash1, dict->eq, dict->hash2);
279 
280 	/* To honor all invariants (so that we can safely call
281 	 * dict_destroy), we first make a request to _reserve_ enough
282 	 * room in all vectors.  This has no observable effect on
283 	 * contents of vectors.  */
284 	if (vect_reserve(&tmp.keys, nn) < 0
285 	    || vect_reserve(&tmp.values, nn) < 0
286 	    || vect_reserve(&tmp.status, nn) < 0)
287 		goto done;
288 
289 	/* Now that we know that there is enough size in vectors, we
290 	 * simply bump the size.  */
291 	tmp.keys.size = nn;
292 	tmp.values.size = nn;
293 	size_t old_size = tmp.status.size;
294 	tmp.status.size = nn;
295 	memset(VECT_ELEMENT(&tmp.status, struct status_bits, old_size),
296 	       0, (tmp.status.size - old_size) * tmp.status.elt_size);
297 
298 	/* At this point, TMP is once more an empty dictionary with NN
299 	 * slots.  Now move stuff from DICT to TMP.  */
300 	if (dict_each(dict, NULL, rehash_move, &tmp) != NULL)
301 		goto done;
302 
303 	/* And now swap contents of DICT and TMP, and we are done.  */
304 	{
305 		struct dict tmp2 = *dict;
306 		*dict = tmp;
307 		tmp = tmp2;
308 	}
309 
310 	ret = 0;
311 
312 done:
313 	/* We only want to release the containers, not the actual data
314 	 * that they hold, so it's fine if we don't pass any dtor.  */
315 	dict_destroy(&tmp, NULL, NULL, NULL);
316 	return ret;
317 
318 }
319 
320 static const size_t primes[] = {
321 	13, 31, 61, 127, 251, 509, 1021, 2039, 4093,
322 	8191, 16381, 32749, 65521, 130981, 0
323 };
324 
325 static size_t
larger_size(size_t current)326 larger_size(size_t current)
327 {
328 	if (current == 0)
329 		return primes[0];
330 
331 	if (current < primes[sizeof(primes)/sizeof(*primes) - 2]) {
332 		size_t i;
333 		for (i = 0; primes[i] != 0; ++i)
334 			if (primes[i] > current)
335 				return primes[i];
336 		abort();
337 	}
338 
339 	/* We ran out of primes, so invent a new one.  The following
340 	 * gives primes until about 17M elements (and then some more
341 	 * later).  */
342 	return 2 * current + 6585;
343 }
344 
345 static size_t
smaller_size(size_t current)346 smaller_size(size_t current)
347 {
348 	if (current <= primes[0])
349 		return primes[0];
350 
351 	if (current <= primes[sizeof(primes)/sizeof(*primes) - 2]) {
352 		size_t i;
353 		size_t prev = 0;
354 		for (i = 0; primes[i] != 0; ++i) {
355 			if (primes[i] >= current)
356 				return prev;
357 			prev = primes[i];
358 		}
359 		abort();
360 	}
361 
362 	return (current - 6585) / 2;
363 }
364 
365 int
dict_insert(struct dict * dict,void * key,void * value)366 dict_insert(struct dict *dict, void *key, void *value)
367 {
368 	if (n(dict) == 0 || dict->size > 0.7 * n(dict))
369 	rehash:
370 		if (rehash(dict, larger_size(n(dict))) < 0)
371 			return -1;
372 
373 	int found;
374 	int should_rehash;
375 	size_t slot_n = find_slot(dict, key, &found, &should_rehash, NULL);
376 	if (slot_n == (size_t)-1)
377 		return -1;
378 	if (found)
379 		return 1;
380 	assert(!bitp(dict, slot_n)->taken);
381 
382 	/* If rehash was requested, do that, and retry.  But just live
383 	 * with it for apparently sparse tables.  No resizing can fix
384 	 * a rubbish hash.  */
385 	if (should_rehash && dict->size > 0.3 * n(dict))
386 		goto rehash;
387 
388 	memmove(getkey(dict, slot_n), key, dict->keys.elt_size);
389 	memmove(getvalue(dict, slot_n), value, dict->values.elt_size);
390 
391 	bitp(dict, slot_n)->taken = 1;
392 	bitp(dict, slot_n)->erased = 0;
393 	++dict->size;
394 
395 	return 0;
396 }
397 
398 void *
dict_find(struct dict * dict,const void * key)399 dict_find(struct dict *dict, const void *key)
400 {
401 	if (dict->size == 0)
402 		return NULL;
403 	assert(n(dict) > 0);
404 
405 	int found;
406 	size_t slot_n = find_slot(dict, key, &found, NULL, NULL);
407 	if (found)
408 		return getvalue(dict, slot_n);
409 	else
410 		return NULL;
411 }
412 
413 int
dict_erase(struct dict * dict,const void * key,void (* dtor_key)(void * tgt,void * data),void (* dtor_value)(void * tgt,void * data),void * data)414 dict_erase(struct dict *dict, const void *key,
415 	   void (*dtor_key)(void *tgt, void *data),
416 	   void (*dtor_value)(void *tgt, void *data),
417 	   void *data)
418 {
419 	int found;
420 	size_t i;
421 	size_t slot_n = find_slot(dict, key, &found, NULL, &i);
422 	if (!found)
423 		return -1;
424 
425 	if (dtor_key != NULL)
426 		dtor_key(getkey(dict, slot_n), data);
427 	if (dtor_value != NULL)
428 		dtor_value(getvalue(dict, slot_n), data);
429 
430 	bitp(dict, slot_n)->taken = 0;
431 	bitp(dict, slot_n)->erased = 1;
432 	--dict->size;
433 
434 	if (dict->size < 0.3 * n(dict)) {
435 		size_t smaller = smaller_size(n(dict));
436 		if (smaller != n(dict))
437 			/* Don't mind if it fails when shrinking.  */
438 			rehash(dict, smaller);
439 	}
440 
441 	return 0;
442 }
443 
444 void *
dict_each(struct dict * dict,void * start_after,enum callback_status (* cb)(void *,void *,void *),void * data)445 dict_each(struct dict *dict, void *start_after,
446 	  enum callback_status (*cb)(void *, void *, void *), void *data)
447 {
448 	size_t i;
449 	if (start_after != NULL)
450 		i = ((start_after - dict->keys.data) / dict->keys.elt_size) + 1;
451 	else
452 		i = 0;
453 
454 	for (; i < dict->keys.size; ++i)
455 		if (bitp(dict, i)->taken && !bitp(dict, i)->erased) {
456 			void *key = getkey(dict, i);
457 			if (cb(key, getvalue(dict, i), data) != CBS_CONT)
458 				return key;
459 		}
460 
461 	return NULL;
462 }
463 
464 size_t
dict_hash_int(const int * key)465 dict_hash_int(const int *key)
466 {
467 	return (size_t)(*key * 2654435761U);
468 }
469 
470 int
dict_eq_int(const int * key1,const int * key2)471 dict_eq_int(const int *key1, const int *key2)
472 {
473 	return *key1 == *key2;
474 }
475 
476 size_t
dict_hash_string(const char ** key)477 dict_hash_string(const char **key)
478 {
479 	size_t h = 5381;
480 	const char *str = *key;
481 	while (*str != 0)
482 		h = h * 33 ^ *str++;
483 	return h;
484 }
485 
486 int
dict_eq_string(const char ** key1,const char ** key2)487 dict_eq_string(const char **key1, const char **key2)
488 {
489 	return strcmp(*key1, *key2) == 0;
490 }
491 
492 void
dict_dtor_string(const char ** key,void * data)493 dict_dtor_string(const char **key, void *data)
494 {
495 	free((char *)*key);
496 }
497 
498 int
dict_clone_string(const char ** tgt,const char ** src,void * data)499 dict_clone_string(const char **tgt, const char **src, void *data)
500 {
501 	*tgt = strdup(*src);
502 	return *tgt != NULL ? 0 : -1;
503 }
504 
505 #ifdef TEST
506 static enum callback_status
dump(int * key,int * value,void * data)507 dump(int *key, int *value, void *data)
508 {
509 	char *seen = data;
510 	assert(seen[*key] == 0);
511 	seen[*key] = 1;
512 	assert(*value == *key * 2 + 1);
513 	return CBS_STOP;
514 }
515 
516 static size_t
dict_hash_int_silly(const int * key)517 dict_hash_int_silly(const int *key)
518 {
519 	return *key % 10;
520 }
521 
522 static void
verify(struct dict * di,size_t len,char * seen)523 verify(struct dict *di, size_t len, char *seen)
524 {
525 	size_t ct = 0;
526 	int *it;
527 	for (it = NULL; (it = DICT_EACH(di, int, int, it, dump, seen)) != NULL;)
528 		ct++;
529 	assert(ct == len);
530 	memset(seen, 0, len);
531 }
532 
533 static enum callback_status
fill_keys(int * key,int * value,void * data)534 fill_keys(int *key, int *value, void *data)
535 {
536 	int *array = data;
537 	array[++array[0]] = *key;
538 	return CBS_CONT;
539 }
540 
541 static void
test1(void)542 test1(void)
543 {
544 	struct dict di;
545 	DICT_INIT(&di, int, int, dict_hash_int, dict_eq_int, NULL);
546 
547 	char seen[100000] = {};
548 	size_t i;
549 	for (i = 0; i < sizeof(seen); ++i) {
550 		int key = i;
551 		int value = 2 * i + 1;
552 		DICT_INSERT(&di, &key, &value);
553 		int *valp = DICT_FIND_REF(&di, &key, int);
554 		assert(valp != NULL);
555 		assert(*valp == value);
556 		assert(dict_size(&di) == i + 1);
557 	}
558 
559 	verify(&di, sizeof(seen), seen);
560 
561 	struct dict d2;
562 	DICT_CLONE(&d2, &di, int, int, NULL, NULL, NULL, NULL, NULL);
563 	DICT_DESTROY(&di, int, int, NULL, NULL, NULL);
564 	verify(&d2, sizeof(seen), seen);
565 
566 	/* Now we try to gradually erase all elements.  We can't erase
567 	 * inside a DICT_EACH call, so copy first keys to a separate
568 	 * memory area first.  */
569 	int keys[d2.size + 1];
570 	size_t ct = 0;
571 	keys[0] = 0;
572 	DICT_EACH(&d2, int, int, NULL, fill_keys, keys);
573 	for (i = 0; i < (size_t)keys[0]; ++i) {
574 		assert(DICT_ERASE(&d2, &keys[i + 1], int,
575 				  NULL, NULL, NULL) == 0);
576 		++ct;
577 	}
578 	assert(ct == sizeof(seen));
579 	DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
580 }
581 
582 static void
test_erase(void)583 test_erase(void)
584 {
585 	int i;
586 
587 	/* To test erase, we need a relatively bad hash function, so
588 	 * that there are some overlapping chains in the table.  */
589 	struct dict d2;
590 	DICT_INIT(&d2, int, int, dict_hash_int_silly, dict_eq_int, NULL);
591 	const int limit = 500;
592 	for (i = 0; i < limit; ++i) {
593 		int key = 2 * i + 1;
594 		int value = 2 * key + 1;
595 		DICT_INSERT(&d2, &key, &value);
596 	}
597 
598 	/* Now we try to delete each of the keys, and verify that none
599 	 * of the chains was broken.  */
600 	for (i = 0; i < limit; ++i) {
601 		struct dict copy;
602 		DICT_CLONE(&copy, &d2, int, int, NULL, NULL, NULL, NULL, NULL);
603 		int key = 2 * i + 1;
604 		DICT_ERASE(&copy, &key, int, NULL, NULL, NULL);
605 		assert(dict_size(&copy) == dict_size(&d2) - 1);
606 
607 		int j;
608 		for (j = 0; j < limit; ++j) {
609 			key = 2 * j + 1;
610 			int *valp = DICT_FIND_REF(&copy, &key, int);
611 			if (i != j) {
612 				assert(valp != NULL);
613 				assert(*valp == 2 * key + 1);
614 			} else {
615 				assert(valp == NULL);
616 			}
617 		}
618 
619 		DICT_DESTROY(&copy, int, int, NULL, NULL, NULL);
620 	}
621 	DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
622 }
623 
main(int argc,char * argv[])624 int main(int argc, char *argv[])
625 {
626 	test1();
627 	test_erase();
628 	return 0;
629 }
630 
631 #endif
632