1 /*
2 ** upb_table
3 **
4 ** This header is INTERNAL-ONLY!  Its interfaces are not public or stable!
5 ** This file defines very fast int->upb_value (inttable) and string->upb_value
6 ** (strtable) hash tables.
7 **
8 ** The table uses chained scatter with Brent's variation (inspired by the Lua
9 ** implementation of hash tables).  The hash function for strings is Austin
10 ** Appleby's "MurmurHash."
11 **
12 ** The inttable uses uintptr_t as its key, which guarantees it can be used to
13 ** store pointers or integers of at least 32 bits (upb isn't really useful on
14 ** systems where sizeof(void*) < 4).
15 **
16 ** The table must be homogeneous (all values of the same type).  In debug
17 ** mode, we check this on insert and lookup.
18 */
19 
20 #ifndef UPB_TABLE_H_
21 #define UPB_TABLE_H_
22 
23 #include <stdint.h>
24 #include <string.h>
25 #include "upb/upb.h"
26 
27 #include "upb/port_def.inc"
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32 
33 
34 /* upb_value ******************************************************************/
35 
36 /* A tagged union (stored untagged inside the table) so that we can check that
37  * clients calling table accessors are correctly typed without having to have
38  * an explosion of accessors. */
39 typedef enum {
40   UPB_CTYPE_INT32    = 1,
41   UPB_CTYPE_INT64    = 2,
42   UPB_CTYPE_UINT32   = 3,
43   UPB_CTYPE_UINT64   = 4,
44   UPB_CTYPE_BOOL     = 5,
45   UPB_CTYPE_CSTR     = 6,
46   UPB_CTYPE_PTR      = 7,
47   UPB_CTYPE_CONSTPTR = 8,
48   UPB_CTYPE_FPTR     = 9,
49   UPB_CTYPE_FLOAT    = 10,
50   UPB_CTYPE_DOUBLE   = 11
51 } upb_ctype_t;
52 
53 typedef struct {
54   uint64_t val;
55 } upb_value;
56 
57 /* Like strdup(), which isn't always available since it's not ANSI C. */
58 char *upb_strdup(const char *s, upb_alloc *a);
59 /* Variant that works with a length-delimited rather than NULL-delimited string,
60  * as supported by strtable. */
61 char *upb_strdup2(const char *s, size_t len, upb_alloc *a);
62 
upb_gstrdup(const char * s)63 UPB_INLINE char *upb_gstrdup(const char *s) {
64   return upb_strdup(s, &upb_alloc_global);
65 }
66 
_upb_value_setval(upb_value * v,uint64_t val)67 UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val) {
68   v->val = val;
69 }
70 
_upb_value_val(uint64_t val)71 UPB_INLINE upb_value _upb_value_val(uint64_t val) {
72   upb_value ret;
73   _upb_value_setval(&ret, val);
74   return ret;
75 }
76 
77 /* For each value ctype, define the following set of functions:
78  *
79  * // Get/set an int32 from a upb_value.
80  * int32_t upb_value_getint32(upb_value val);
81  * void upb_value_setint32(upb_value *val, int32_t cval);
82  *
83  * // Construct a new upb_value from an int32.
84  * upb_value upb_value_int32(int32_t val); */
85 #define FUNCS(name, membername, type_t, converter, proto_type) \
86   UPB_INLINE void upb_value_set ## name(upb_value *val, type_t cval) { \
87     val->val = (converter)cval; \
88   } \
89   UPB_INLINE upb_value upb_value_ ## name(type_t val) { \
90     upb_value ret; \
91     upb_value_set ## name(&ret, val); \
92     return ret; \
93   } \
94   UPB_INLINE type_t upb_value_get ## name(upb_value val) { \
95     return (type_t)(converter)val.val; \
96   }
97 
FUNCS(int32,int32,int32_t,int32_t,UPB_CTYPE_INT32)98 FUNCS(int32,    int32,        int32_t,      int32_t,    UPB_CTYPE_INT32)
99 FUNCS(int64,    int64,        int64_t,      int64_t,    UPB_CTYPE_INT64)
100 FUNCS(uint32,   uint32,       uint32_t,     uint32_t,   UPB_CTYPE_UINT32)
101 FUNCS(uint64,   uint64,       uint64_t,     uint64_t,   UPB_CTYPE_UINT64)
102 FUNCS(bool,     _bool,        bool,         bool,       UPB_CTYPE_BOOL)
103 FUNCS(cstr,     cstr,         char*,        uintptr_t,  UPB_CTYPE_CSTR)
104 FUNCS(ptr,      ptr,          void*,        uintptr_t,  UPB_CTYPE_PTR)
105 FUNCS(constptr, constptr,     const void*,  uintptr_t,  UPB_CTYPE_CONSTPTR)
106 FUNCS(fptr,     fptr,         upb_func*,    uintptr_t,  UPB_CTYPE_FPTR)
107 
108 #undef FUNCS
109 
110 UPB_INLINE void upb_value_setfloat(upb_value *val, float cval) {
111   memcpy(&val->val, &cval, sizeof(cval));
112 }
113 
upb_value_setdouble(upb_value * val,double cval)114 UPB_INLINE void upb_value_setdouble(upb_value *val, double cval) {
115   memcpy(&val->val, &cval, sizeof(cval));
116 }
117 
upb_value_float(float cval)118 UPB_INLINE upb_value upb_value_float(float cval) {
119   upb_value ret;
120   upb_value_setfloat(&ret, cval);
121   return ret;
122 }
123 
upb_value_double(double cval)124 UPB_INLINE upb_value upb_value_double(double cval) {
125   upb_value ret;
126   upb_value_setdouble(&ret, cval);
127   return ret;
128 }
129 
130 #undef SET_TYPE
131 
132 
133 /* upb_tabkey *****************************************************************/
134 
135 /* Either:
136  *   1. an actual integer key, or
137  *   2. a pointer to a string prefixed by its uint32_t length, owned by us.
138  *
139  * ...depending on whether this is a string table or an int table.  We would
140  * make this a union of those two types, but C89 doesn't support statically
141  * initializing a non-first union member. */
142 typedef uintptr_t upb_tabkey;
143 
upb_tabstr(upb_tabkey key,uint32_t * len)144 UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
145   char* mem = (char*)key;
146   if (len) memcpy(len, mem, sizeof(*len));
147   return mem + sizeof(*len);
148 }
149 
upb_tabstrview(upb_tabkey key)150 UPB_INLINE upb_strview upb_tabstrview(upb_tabkey key) {
151   upb_strview ret;
152   uint32_t len;
153   ret.data = upb_tabstr(key, &len);
154   ret.size = len;
155   return ret;
156 }
157 
158 /* upb_tabval *****************************************************************/
159 
160 typedef struct upb_tabval {
161   uint64_t val;
162 } upb_tabval;
163 
164 #define UPB_TABVALUE_EMPTY_INIT  {-1}
165 
166 /* upb_table ******************************************************************/
167 
168 typedef struct _upb_tabent {
169   upb_tabkey key;
170   upb_tabval val;
171 
172   /* Internal chaining.  This is const so we can create static initializers for
173    * tables.  We cast away const sometimes, but *only* when the containing
174    * upb_table is known to be non-const.  This requires a bit of care, but
175    * the subtlety is confined to table.c. */
176   const struct _upb_tabent *next;
177 } upb_tabent;
178 
179 typedef struct {
180   size_t count;          /* Number of entries in the hash part. */
181   uint32_t mask;         /* Mask to turn hash value -> bucket. */
182   uint32_t max_count;    /* Max count before we hit our load limit. */
183   uint8_t size_lg2;      /* Size of the hashtable part is 2^size_lg2 entries. */
184 
185   /* Hash table entries.
186    * Making this const isn't entirely accurate; what we really want is for it to
187    * have the same const-ness as the table it's inside.  But there's no way to
188    * declare that in C.  So we have to make it const so that we can statically
189    * initialize const hash tables.  Then we cast away const when we have to.
190    */
191   const upb_tabent *entries;
192 } upb_table;
193 
194 typedef struct {
195   upb_table t;
196 } upb_strtable;
197 
198 typedef struct {
199   upb_table t;              /* For entries that don't fit in the array part. */
200   const upb_tabval *array;  /* Array part of the table. See const note above. */
201   size_t array_size;        /* Array part size. */
202   size_t array_count;       /* Array part number of elements. */
203 } upb_inttable;
204 
205 #define UPB_ARRAY_EMPTYENT -1
206 
upb_table_size(const upb_table * t)207 UPB_INLINE size_t upb_table_size(const upb_table *t) {
208   if (t->size_lg2 == 0)
209     return 0;
210   else
211     return 1 << t->size_lg2;
212 }
213 
214 /* Internal-only functions, in .h file only out of necessity. */
upb_tabent_isempty(const upb_tabent * e)215 UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) {
216   return e->key == 0;
217 }
218 
219 /* Used by some of the unit tests for generic hashing functionality. */
220 uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed);
221 
upb_intkey(uintptr_t key)222 UPB_INLINE uintptr_t upb_intkey(uintptr_t key) {
223   return key;
224 }
225 
upb_inthash(uintptr_t key)226 UPB_INLINE uint32_t upb_inthash(uintptr_t key) {
227   return (uint32_t)key;
228 }
229 
upb_getentry(const upb_table * t,uint32_t hash)230 static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) {
231   return t->entries + (hash & t->mask);
232 }
233 
upb_arrhas(upb_tabval key)234 UPB_INLINE bool upb_arrhas(upb_tabval key) {
235   return key.val != (uint64_t)-1;
236 }
237 
238 /* Initialize and uninitialize a table, respectively.  If memory allocation
239  * failed, false is returned that the table is uninitialized. */
240 bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a);
241 bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype,
242                         size_t expected_size, upb_alloc *a);
243 void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a);
244 void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a);
245 
upb_inttable_init(upb_inttable * table,upb_ctype_t ctype)246 UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) {
247   return upb_inttable_init2(table, ctype, &upb_alloc_global);
248 }
249 
upb_strtable_init(upb_strtable * table,upb_ctype_t ctype)250 UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) {
251   return upb_strtable_init2(table, ctype, 4, &upb_alloc_global);
252 }
253 
upb_inttable_uninit(upb_inttable * table)254 UPB_INLINE void upb_inttable_uninit(upb_inttable *table) {
255   upb_inttable_uninit2(table, &upb_alloc_global);
256 }
257 
upb_strtable_uninit(upb_strtable * table)258 UPB_INLINE void upb_strtable_uninit(upb_strtable *table) {
259   upb_strtable_uninit2(table, &upb_alloc_global);
260 }
261 
262 /* Returns the number of values in the table. */
263 size_t upb_inttable_count(const upb_inttable *t);
upb_strtable_count(const upb_strtable * t)264 UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
265   return t->t.count;
266 }
267 
268 void upb_inttable_packedsize(const upb_inttable *t, size_t *size);
269 void upb_strtable_packedsize(const upb_strtable *t, size_t *size);
270 upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs,
271                                 size_t size);
272 upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs,
273                                 size_t size);
274 void upb_strtable_clear(upb_strtable *t);
275 
276 /* Inserts the given key into the hashtable with the given value.  The key must
277  * not already exist in the hash table.  For string tables, the key must be
278  * NULL-terminated, and the table will make an internal copy of the key.
279  * Inttables must not insert a value of UINTPTR_MAX.
280  *
281  * If a table resize was required but memory allocation failed, false is
282  * returned and the table is unchanged. */
283 bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
284                           upb_alloc *a);
285 bool upb_strtable_insert3(upb_strtable *t, const char *key, size_t len,
286                           upb_value val, upb_alloc *a);
287 
upb_inttable_insert(upb_inttable * t,uintptr_t key,upb_value val)288 UPB_INLINE bool upb_inttable_insert(upb_inttable *t, uintptr_t key,
289                                     upb_value val) {
290   return upb_inttable_insert2(t, key, val, &upb_alloc_global);
291 }
292 
upb_strtable_insert2(upb_strtable * t,const char * key,size_t len,upb_value val)293 UPB_INLINE bool upb_strtable_insert2(upb_strtable *t, const char *key,
294                                      size_t len, upb_value val) {
295   return upb_strtable_insert3(t, key, len, val, &upb_alloc_global);
296 }
297 
298 /* For NULL-terminated strings. */
upb_strtable_insert(upb_strtable * t,const char * key,upb_value val)299 UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
300                                     upb_value val) {
301   return upb_strtable_insert2(t, key, strlen(key), val);
302 }
303 
304 /* Looks up key in this table, returning "true" if the key was found.
305  * If v is non-NULL, copies the value for this key into *v. */
306 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
307 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
308                           upb_value *v);
309 
310 /* For NULL-terminated strings. */
upb_strtable_lookup(const upb_strtable * t,const char * key,upb_value * v)311 UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
312                                     upb_value *v) {
313   return upb_strtable_lookup2(t, key, strlen(key), v);
314 }
315 
316 /* Removes an item from the table.  Returns true if the remove was successful,
317  * and stores the removed item in *val if non-NULL. */
318 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
319 bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
320                           upb_value *val, upb_alloc *alloc);
321 
upb_strtable_remove2(upb_strtable * t,const char * key,size_t len,upb_value * val)322 UPB_INLINE bool upb_strtable_remove2(upb_strtable *t, const char *key,
323                                      size_t len, upb_value *val) {
324   return upb_strtable_remove3(t, key, len, val, &upb_alloc_global);
325 }
326 
327 /* For NULL-terminated strings. */
upb_strtable_remove(upb_strtable * t,const char * key,upb_value * v)328 UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
329                                     upb_value *v) {
330   return upb_strtable_remove2(t, key, strlen(key), v);
331 }
332 
333 /* Updates an existing entry in an inttable.  If the entry does not exist,
334  * returns false and does nothing.  Unlike insert/remove, this does not
335  * invalidate iterators. */
336 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val);
337 
338 /* Convenience routines for inttables with pointer keys. */
339 bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
340                              upb_alloc *a);
341 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val);
342 bool upb_inttable_lookupptr(
343     const upb_inttable *t, const void *key, upb_value *val);
344 
upb_inttable_insertptr(upb_inttable * t,const void * key,upb_value val)345 UPB_INLINE bool upb_inttable_insertptr(upb_inttable *t, const void *key,
346                                        upb_value val) {
347   return upb_inttable_insertptr2(t, key, val, &upb_alloc_global);
348 }
349 
350 /* Optimizes the table for the current set of entries, for both memory use and
351  * lookup time.  Client should call this after all entries have been inserted;
352  * inserting more entries is legal, but will likely require a table resize. */
353 void upb_inttable_compact2(upb_inttable *t, upb_alloc *a);
354 
upb_inttable_compact(upb_inttable * t)355 UPB_INLINE void upb_inttable_compact(upb_inttable *t) {
356   upb_inttable_compact2(t, &upb_alloc_global);
357 }
358 
359 /* A special-case inlinable version of the lookup routine for 32-bit
360  * integers. */
upb_inttable_lookup32(const upb_inttable * t,uint32_t key,upb_value * v)361 UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
362                                       upb_value *v) {
363   *v = upb_value_int32(0);  /* Silence compiler warnings. */
364   if (key < t->array_size) {
365     upb_tabval arrval = t->array[key];
366     if (upb_arrhas(arrval)) {
367       _upb_value_setval(v, arrval.val);
368       return true;
369     } else {
370       return false;
371     }
372   } else {
373     const upb_tabent *e;
374     if (t->t.entries == NULL) return false;
375     for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) {
376       if ((uint32_t)e->key == key) {
377         _upb_value_setval(v, e->val.val);
378         return true;
379       }
380       if (e->next == NULL) return false;
381     }
382   }
383 }
384 
385 /* Exposed for testing only. */
386 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a);
387 
388 /* Iterators ******************************************************************/
389 
390 /* Iterators for int and string tables.  We are subject to some kind of unusual
391  * design constraints:
392  *
393  * For high-level languages:
394  *  - we must be able to guarantee that we don't crash or corrupt memory even if
395  *    the program accesses an invalidated iterator.
396  *
397  * For C++11 range-based for:
398  *  - iterators must be copyable
399  *  - iterators must be comparable
400  *  - it must be possible to construct an "end" value.
401  *
402  * Iteration order is undefined.
403  *
404  * Modifying the table invalidates iterators.  upb_{str,int}table_done() is
405  * guaranteed to work even on an invalidated iterator, as long as the table it
406  * is iterating over has not been freed.  Calling next() or accessing data from
407  * an invalidated iterator yields unspecified elements from the table, but it is
408  * guaranteed not to crash and to return real table elements (except when done()
409  * is true). */
410 
411 
412 /* upb_strtable_iter **********************************************************/
413 
414 /*   upb_strtable_iter i;
415  *   upb_strtable_begin(&i, t);
416  *   for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
417  *     const char *key = upb_strtable_iter_key(&i);
418  *     const upb_value val = upb_strtable_iter_value(&i);
419  *     // ...
420  *   }
421  */
422 
423 typedef struct {
424   const upb_strtable *t;
425   size_t index;
426 } upb_strtable_iter;
427 
428 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
429 void upb_strtable_next(upb_strtable_iter *i);
430 bool upb_strtable_done(const upb_strtable_iter *i);
431 upb_strview upb_strtable_iter_key(const upb_strtable_iter *i);
432 upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
433 void upb_strtable_iter_setdone(upb_strtable_iter *i);
434 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
435                                const upb_strtable_iter *i2);
436 
437 
438 /* upb_inttable_iter **********************************************************/
439 
440 /*   upb_inttable_iter i;
441  *   upb_inttable_begin(&i, t);
442  *   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
443  *     uintptr_t key = upb_inttable_iter_key(&i);
444  *     upb_value val = upb_inttable_iter_value(&i);
445  *     // ...
446  *   }
447  */
448 
449 typedef struct {
450   const upb_inttable *t;
451   size_t index;
452   bool array_part;
453 } upb_inttable_iter;
454 
str_tabent(const upb_strtable_iter * i)455 UPB_INLINE const upb_tabent *str_tabent(const upb_strtable_iter *i) {
456   return &i->t->t.entries[i->index];
457 }
458 
459 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t);
460 void upb_inttable_next(upb_inttable_iter *i);
461 bool upb_inttable_done(const upb_inttable_iter *i);
462 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i);
463 upb_value upb_inttable_iter_value(const upb_inttable_iter *i);
464 void upb_inttable_iter_setdone(upb_inttable_iter *i);
465 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
466                                const upb_inttable_iter *i2);
467 
468 
469 #ifdef __cplusplus
470 }  /* extern "C" */
471 #endif
472 
473 #include "upb/port_undef.inc"
474 
475 #endif  /* UPB_TABLE_H_ */
476