1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_LIBARTBASE_BASE_HASH_SET_H_
18 #define ART_LIBARTBASE_BASE_HASH_SET_H_
19
20 #include <stdint.h>
21
22 #include <functional>
23 #include <iterator>
24 #include <memory>
25 #include <string>
26 #include <type_traits>
27 #include <utility>
28
29 #include <android-base/logging.h>
30
31 #include "base/data_hash.h"
32 #include "bit_utils.h"
33 #include "macros.h"
34
35 namespace art {
36
37 template <class Elem, class HashSetType>
38 class HashSetIterator {
39 public:
40 using iterator_category = std::forward_iterator_tag;
41 using value_type = Elem;
42 using difference_type = std::ptrdiff_t;
43 using pointer = Elem*;
44 using reference = Elem&;
45
46 HashSetIterator(const HashSetIterator&) = default;
47 HashSetIterator(HashSetIterator&&) = default;
HashSetIterator(HashSetType * hash_set,size_t index)48 HashSetIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {}
49
50 // Conversion from iterator to const_iterator.
51 template <class OtherElem,
52 class OtherHashSetType,
53 typename = typename std::enable_if<
54 std::is_same<Elem, const OtherElem>::value &&
55 std::is_same<HashSetType, const OtherHashSetType>::value>::type>
HashSetIterator(const HashSetIterator<OtherElem,OtherHashSetType> & other)56 HashSetIterator(const HashSetIterator<OtherElem, OtherHashSetType>& other)
57 : index_(other.index_), hash_set_(other.hash_set_) {}
58
59 HashSetIterator& operator=(const HashSetIterator&) = default;
60 HashSetIterator& operator=(HashSetIterator&&) = default;
61
62 bool operator==(const HashSetIterator& other) const {
63 return hash_set_ == other.hash_set_ && this->index_ == other.index_;
64 }
65
66 bool operator!=(const HashSetIterator& other) const {
67 return !(*this == other);
68 }
69
70 HashSetIterator operator++() { // Value after modification.
71 this->index_ = hash_set_->NextNonEmptySlot(index_);
72 return *this;
73 }
74
75 HashSetIterator operator++(int) {
76 HashSetIterator temp = *this;
77 ++*this;
78 return temp;
79 }
80
81 Elem& operator*() const {
82 DCHECK(!hash_set_->IsFreeSlot(this->index_));
83 return hash_set_->ElementForIndex(this->index_);
84 }
85
86 Elem* operator->() const {
87 return &**this;
88 }
89
90 private:
91 size_t index_;
92 HashSetType* hash_set_;
93
94 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
95 friend bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
96 const HashSetIterator<Elem2, HashSetType2>& rhs);
97 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc> friend class HashSet;
98 template <class OtherElem, class OtherHashSetType> friend class HashSetIterator;
99 };
100
101 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
102 bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
103 const HashSetIterator<Elem2, HashSetType2>& rhs) {
104 static_assert(
105 std::is_convertible<HashSetIterator<Elem1, HashSetType1>,
106 HashSetIterator<Elem2, HashSetType2>>::value ||
107 std::is_convertible<HashSetIterator<Elem2, HashSetType2>,
108 HashSetIterator<Elem1, HashSetType1>>::value, "Bad iterator types.");
109 DCHECK_EQ(lhs.hash_set_, rhs.hash_set_);
110 return lhs.index_ == rhs.index_;
111 }
112
113 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
114 bool operator!=(const HashSetIterator<Elem1, HashSetType1>& lhs,
115 const HashSetIterator<Elem2, HashSetType2>& rhs) {
116 return !(lhs == rhs);
117 }
118
119 // Returns true if an item is empty.
120 template <class T>
121 class DefaultEmptyFn {
122 public:
MakeEmpty(T & item)123 void MakeEmpty(T& item) const {
124 item = T();
125 }
IsEmpty(const T & item)126 bool IsEmpty(const T& item) const {
127 return item == T();
128 }
129 };
130
131 template <class T>
132 class DefaultEmptyFn<T*> {
133 public:
MakeEmpty(T * & item)134 void MakeEmpty(T*& item) const {
135 item = nullptr;
136 }
IsEmpty(T * const & item)137 bool IsEmpty(T* const& item) const {
138 return item == nullptr;
139 }
140 };
141
142 template <class T>
143 using DefaultHashFn = typename std::conditional<std::is_same<T, std::string>::value,
144 DataHash,
145 std::hash<T>>::type;
146
147 struct DefaultStringEquals {
148 // Allow comparison with anything that can be compared to std::string,
149 // for example std::string_view.
150 template <typename T>
operatorDefaultStringEquals151 bool operator()(const std::string& lhs, const T& rhs) const {
152 return lhs == rhs;
153 }
154 };
155
156 template <class T>
157 using DefaultPred = typename std::conditional<std::is_same<T, std::string>::value,
158 DefaultStringEquals,
159 std::equal_to<T>>::type;
160
161 // Low memory version of a hash set, uses less memory than std::unordered_multiset since elements
162 // aren't boxed. Uses linear probing to resolve collisions.
163 // EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
164 // TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
165 // and more complicated.
166 template <class T,
167 class EmptyFn = DefaultEmptyFn<T>,
168 class HashFn = DefaultHashFn<T>,
169 class Pred = DefaultPred<T>,
170 class Alloc = std::allocator<T>>
171 class HashSet {
172 public:
173 using value_type = T;
174 using allocator_type = Alloc;
175 using reference = T&;
176 using const_reference = const T&;
177 using pointer = T*;
178 using const_pointer = const T*;
179 using iterator = HashSetIterator<T, HashSet>;
180 using const_iterator = HashSetIterator<const T, const HashSet>;
181 using size_type = size_t;
182 using difference_type = ptrdiff_t;
183
184 static constexpr double kDefaultMinLoadFactor = 0.4;
185 static constexpr double kDefaultMaxLoadFactor = 0.7;
186 static constexpr size_t kMinBuckets = 1000;
187
188 // If we don't own the data, this will create a new array which owns the data.
clear()189 void clear() {
190 DeallocateStorage();
191 num_elements_ = 0;
192 elements_until_expand_ = 0;
193 }
194
HashSet()195 HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
196
HashSet(double min_load_factor,double max_load_factor)197 HashSet(double min_load_factor, double max_load_factor) noexcept
198 : num_elements_(0u),
199 num_buckets_(0u),
200 elements_until_expand_(0u),
201 owns_data_(false),
202 data_(nullptr),
203 min_load_factor_(min_load_factor),
204 max_load_factor_(max_load_factor) {
205 DCHECK_GT(min_load_factor, 0.0);
206 DCHECK_LT(max_load_factor, 1.0);
207 }
208
HashSet(const allocator_type & alloc)209 explicit HashSet(const allocator_type& alloc) noexcept
210 : allocfn_(alloc),
211 hashfn_(),
212 emptyfn_(),
213 pred_(),
214 num_elements_(0u),
215 num_buckets_(0u),
216 elements_until_expand_(0u),
217 owns_data_(false),
218 data_(nullptr),
219 min_load_factor_(kDefaultMinLoadFactor),
220 max_load_factor_(kDefaultMaxLoadFactor) {
221 }
222
HashSet(const HashSet & other)223 HashSet(const HashSet& other) noexcept
224 : allocfn_(other.allocfn_),
225 hashfn_(other.hashfn_),
226 emptyfn_(other.emptyfn_),
227 pred_(other.pred_),
228 num_elements_(other.num_elements_),
229 num_buckets_(0),
230 elements_until_expand_(other.elements_until_expand_),
231 owns_data_(false),
232 data_(nullptr),
233 min_load_factor_(other.min_load_factor_),
234 max_load_factor_(other.max_load_factor_) {
235 AllocateStorage(other.NumBuckets());
236 for (size_t i = 0; i < num_buckets_; ++i) {
237 ElementForIndex(i) = other.data_[i];
238 }
239 }
240
241 // noexcept required so that the move constructor is used instead of copy constructor.
242 // b/27860101
HashSet(HashSet && other)243 HashSet(HashSet&& other) noexcept
244 : allocfn_(std::move(other.allocfn_)),
245 hashfn_(std::move(other.hashfn_)),
246 emptyfn_(std::move(other.emptyfn_)),
247 pred_(std::move(other.pred_)),
248 num_elements_(other.num_elements_),
249 num_buckets_(other.num_buckets_),
250 elements_until_expand_(other.elements_until_expand_),
251 owns_data_(other.owns_data_),
252 data_(other.data_),
253 min_load_factor_(other.min_load_factor_),
254 max_load_factor_(other.max_load_factor_) {
255 other.num_elements_ = 0u;
256 other.num_buckets_ = 0u;
257 other.elements_until_expand_ = 0u;
258 other.owns_data_ = false;
259 other.data_ = nullptr;
260 }
261
262 // Construct from existing data.
263 // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
264 // passed in ptr_.
HashSet(const uint8_t * ptr,bool make_copy_of_data,size_t * read_count)265 HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
266 uint64_t temp;
267 size_t offset = 0;
268 offset = ReadFromBytes(ptr, offset, &temp);
269 num_elements_ = static_cast<uint64_t>(temp);
270 offset = ReadFromBytes(ptr, offset, &temp);
271 num_buckets_ = static_cast<uint64_t>(temp);
272 CHECK_LE(num_elements_, num_buckets_);
273 offset = ReadFromBytes(ptr, offset, &temp);
274 elements_until_expand_ = static_cast<uint64_t>(temp);
275 offset = ReadFromBytes(ptr, offset, &min_load_factor_);
276 offset = ReadFromBytes(ptr, offset, &max_load_factor_);
277 if (!make_copy_of_data) {
278 owns_data_ = false;
279 data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
280 offset += sizeof(*data_) * num_buckets_;
281 } else {
282 AllocateStorage(num_buckets_);
283 // Write elements, not that this may not be safe for cross compilation if the elements are
284 // pointer sized.
285 for (size_t i = 0; i < num_buckets_; ++i) {
286 offset = ReadFromBytes(ptr, offset, &data_[i]);
287 }
288 }
289 // Caller responsible for aligning.
290 *read_count = offset;
291 }
292
293 // Returns how large the table is after being written. If target is null, then no writing happens
294 // but the size is still returned. Target must be 8 byte aligned.
WriteToMemory(uint8_t * ptr)295 size_t WriteToMemory(uint8_t* ptr) const {
296 size_t offset = 0;
297 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
298 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
299 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
300 offset = WriteToBytes(ptr, offset, min_load_factor_);
301 offset = WriteToBytes(ptr, offset, max_load_factor_);
302 // Write elements, not that this may not be safe for cross compilation if the elements are
303 // pointer sized.
304 for (size_t i = 0; i < num_buckets_; ++i) {
305 offset = WriteToBytes(ptr, offset, data_[i]);
306 }
307 // Caller responsible for aligning.
308 return offset;
309 }
310
~HashSet()311 ~HashSet() {
312 DeallocateStorage();
313 }
314
315 HashSet& operator=(HashSet&& other) noexcept {
316 HashSet(std::move(other)).swap(*this); // NOLINT [runtime/explicit] [5]
317 return *this;
318 }
319
320 HashSet& operator=(const HashSet& other) noexcept {
321 HashSet(other).swap(*this); // NOLINT(runtime/explicit) - a case of lint gone mad.
322 return *this;
323 }
324
325 // Lower case for c++11 for each.
begin()326 iterator begin() {
327 iterator ret(this, 0);
328 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
329 ++ret; // Skip all the empty slots.
330 }
331 return ret;
332 }
333
334 // Lower case for c++11 for each. const version.
begin()335 const_iterator begin() const {
336 const_iterator ret(this, 0);
337 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
338 ++ret; // Skip all the empty slots.
339 }
340 return ret;
341 }
342
343 // Lower case for c++11 for each.
end()344 iterator end() {
345 return iterator(this, NumBuckets());
346 }
347
348 // Lower case for c++11 for each. const version.
end()349 const_iterator end() const {
350 return const_iterator(this, NumBuckets());
351 }
352
size()353 size_t size() const {
354 return num_elements_;
355 }
356
empty()357 bool empty() const {
358 return size() == 0;
359 }
360
361 // Erase algorithm:
362 // Make an empty slot where the iterator is pointing.
363 // Scan forwards until we hit another empty slot.
364 // If an element in between doesn't rehash to the range from the current empty slot to the
365 // iterator. It must be before the empty slot, in that case we can move it to the empty slot
366 // and set the empty slot to be the location we just moved from.
367 // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
368 // element to its actual location/index.
369 // Note that since erase shuffles back elements, it may result in the same element being visited
370 // twice during HashSet iteration. This happens when an element already visited during iteration
371 // gets shuffled to the end of the bucket array.
erase(iterator it)372 iterator erase(iterator it) {
373 // empty_index is the index that will become empty.
374 size_t empty_index = it.index_;
375 DCHECK(!IsFreeSlot(empty_index));
376 size_t next_index = empty_index;
377 bool filled = false; // True if we filled the empty index.
378 while (true) {
379 next_index = NextIndex(next_index);
380 T& next_element = ElementForIndex(next_index);
381 // If the next element is empty, we are done. Make sure to clear the current empty index.
382 if (emptyfn_.IsEmpty(next_element)) {
383 emptyfn_.MakeEmpty(ElementForIndex(empty_index));
384 break;
385 }
386 // Otherwise try to see if the next element can fill the current empty index.
387 const size_t next_hash = hashfn_(next_element);
388 // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
389 // nothing we can do.
390 size_t next_ideal_index = IndexForHash(next_hash);
391 // Loop around if needed for our check.
392 size_t unwrapped_next_index = next_index;
393 if (unwrapped_next_index < empty_index) {
394 unwrapped_next_index += NumBuckets();
395 }
396 // Loop around if needed for our check.
397 size_t unwrapped_next_ideal_index = next_ideal_index;
398 if (unwrapped_next_ideal_index < empty_index) {
399 unwrapped_next_ideal_index += NumBuckets();
400 }
401 if (unwrapped_next_ideal_index <= empty_index ||
402 unwrapped_next_ideal_index > unwrapped_next_index) {
403 // If the target index isn't within our current range it must have been probed from before
404 // the empty index.
405 ElementForIndex(empty_index) = std::move(next_element);
406 filled = true; // TODO: Optimize
407 empty_index = next_index;
408 }
409 }
410 --num_elements_;
411 // If we didn't fill the slot then we need go to the next non free slot.
412 if (!filled) {
413 ++it;
414 }
415 return it;
416 }
417
418 // Find an element, returns end() if not found.
419 // Allows custom key (K) types, example of when this is useful:
420 // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
421 // object in the heap for performance solution.
422 template <typename K>
find(const K & key)423 iterator find(const K& key) {
424 return FindWithHash(key, hashfn_(key));
425 }
426
427 template <typename K>
find(const K & key)428 const_iterator find(const K& key) const {
429 return FindWithHash(key, hashfn_(key));
430 }
431
432 template <typename K>
FindWithHash(const K & key,size_t hash)433 iterator FindWithHash(const K& key, size_t hash) {
434 return iterator(this, FindIndex(key, hash));
435 }
436
437 template <typename K>
FindWithHash(const K & key,size_t hash)438 const_iterator FindWithHash(const K& key, size_t hash) const {
439 return const_iterator(this, FindIndex(key, hash));
440 }
441
442 // Insert an element with hint, allows duplicates.
443 // Note: The hint is not very useful for a HashSet<> unless there are many hash conflicts
444 // and in that case the use of HashSet<> itself should be reconsidered.
insert(const_iterator hint ATTRIBUTE_UNUSED,const T & element)445 std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, const T& element) {
446 return insert(element);
447 }
insert(const_iterator hint ATTRIBUTE_UNUSED,T && element)448 std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, T&& element) {
449 return insert(std::move(element));
450 }
451
452 // Insert an element, allows duplicates.
insert(const T & element)453 std::pair<iterator, bool> insert(const T& element) {
454 return InsertWithHash(element, hashfn_(element));
455 }
insert(T && element)456 std::pair<iterator, bool> insert(T&& element) {
457 return InsertWithHash(std::move(element), hashfn_(element));
458 }
459
460 template <typename U, typename = typename std::enable_if<std::is_convertible<U, T>::value>::type>
InsertWithHash(U && element,size_t hash)461 std::pair<iterator, bool> InsertWithHash(U&& element, size_t hash) {
462 DCHECK_EQ(hash, hashfn_(element));
463 if (num_elements_ >= elements_until_expand_) {
464 Expand();
465 DCHECK_LT(num_elements_, elements_until_expand_);
466 }
467 bool find_failed = false;
468 auto find_fail_fn = [&](size_t index) {
469 find_failed = true;
470 return index;
471 };
472 size_t index = FindIndexImpl(element, hash, find_fail_fn);
473 if (find_failed) {
474 data_[index] = std::forward<U>(element);
475 ++num_elements_;
476 }
477 return std::make_pair(iterator(this, index), find_failed);
478 }
479
swap(HashSet & other)480 void swap(HashSet& other) {
481 // Use argument-dependent lookup with fall-back to std::swap() for function objects.
482 using std::swap;
483 swap(allocfn_, other.allocfn_);
484 swap(hashfn_, other.hashfn_);
485 swap(emptyfn_, other.emptyfn_);
486 swap(pred_, other.pred_);
487 std::swap(data_, other.data_);
488 std::swap(num_buckets_, other.num_buckets_);
489 std::swap(num_elements_, other.num_elements_);
490 std::swap(elements_until_expand_, other.elements_until_expand_);
491 std::swap(min_load_factor_, other.min_load_factor_);
492 std::swap(max_load_factor_, other.max_load_factor_);
493 std::swap(owns_data_, other.owns_data_);
494 }
495
get_allocator()496 allocator_type get_allocator() const {
497 return allocfn_;
498 }
499
ShrinkToMaximumLoad()500 void ShrinkToMaximumLoad() {
501 Resize(size() / max_load_factor_);
502 }
503
504 // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
505 // set. No-op if the hash set is already large enough to do this.
reserve(size_t num_elements)506 void reserve(size_t num_elements) {
507 size_t num_buckets = num_elements / max_load_factor_;
508 // Deal with rounding errors. Add one for rounding.
509 while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
510 ++num_buckets;
511 }
512 if (num_buckets > NumBuckets()) {
513 Resize(num_buckets);
514 }
515 }
516
517 // To distance that inserted elements were probed. Used for measuring how good hash functions
518 // are.
TotalProbeDistance()519 size_t TotalProbeDistance() const {
520 size_t total = 0;
521 for (size_t i = 0; i < NumBuckets(); ++i) {
522 const T& element = ElementForIndex(i);
523 if (!emptyfn_.IsEmpty(element)) {
524 size_t ideal_location = IndexForHash(hashfn_(element));
525 if (ideal_location > i) {
526 total += i + NumBuckets() - ideal_location;
527 } else {
528 total += i - ideal_location;
529 }
530 }
531 }
532 return total;
533 }
534
535 // Calculate the current load factor and return it.
CalculateLoadFactor()536 double CalculateLoadFactor() const {
537 return static_cast<double>(size()) / static_cast<double>(NumBuckets());
538 }
539
540 // Make sure that everything reinserts in the right spot. Returns the number of errors.
Verify()541 size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
542 size_t errors = 0;
543 for (size_t i = 0; i < num_buckets_; ++i) {
544 T& element = data_[i];
545 if (!emptyfn_.IsEmpty(element)) {
546 T temp;
547 emptyfn_.MakeEmpty(temp);
548 std::swap(temp, element);
549 size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
550 if (i != first_slot) {
551 LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
552 ++errors;
553 }
554 std::swap(temp, element);
555 }
556 }
557 return errors;
558 }
559
GetMinLoadFactor()560 double GetMinLoadFactor() const {
561 return min_load_factor_;
562 }
563
GetMaxLoadFactor()564 double GetMaxLoadFactor() const {
565 return max_load_factor_;
566 }
567
568 // Change the load factor of the hash set. If the current load factor is greater than the max
569 // specified, then we resize the hash table storage.
SetLoadFactor(double min_load_factor,double max_load_factor)570 void SetLoadFactor(double min_load_factor, double max_load_factor) {
571 DCHECK_LT(min_load_factor, max_load_factor);
572 DCHECK_GT(min_load_factor, 0.0);
573 DCHECK_LT(max_load_factor, 1.0);
574 min_load_factor_ = min_load_factor;
575 max_load_factor_ = max_load_factor;
576 elements_until_expand_ = NumBuckets() * max_load_factor_;
577 // If the current load factor isn't in the range, then resize to the mean of the minimum and
578 // maximum load factor.
579 const double load_factor = CalculateLoadFactor();
580 if (load_factor > max_load_factor_) {
581 Resize(size() / ((min_load_factor_ + max_load_factor_) * 0.5));
582 }
583 }
584
585 // The hash set expands when Size() reaches ElementsUntilExpand().
ElementsUntilExpand()586 size_t ElementsUntilExpand() const {
587 return elements_until_expand_;
588 }
589
NumBuckets()590 size_t NumBuckets() const {
591 return num_buckets_;
592 }
593
594 private:
ElementForIndex(size_t index)595 T& ElementForIndex(size_t index) {
596 DCHECK_LT(index, NumBuckets());
597 DCHECK(data_ != nullptr);
598 return data_[index];
599 }
600
ElementForIndex(size_t index)601 const T& ElementForIndex(size_t index) const {
602 DCHECK_LT(index, NumBuckets());
603 DCHECK(data_ != nullptr);
604 return data_[index];
605 }
606
IndexForHash(size_t hash)607 size_t IndexForHash(size_t hash) const {
608 // Protect against undefined behavior (division by zero).
609 if (UNLIKELY(num_buckets_ == 0)) {
610 return 0;
611 }
612 return hash % num_buckets_;
613 }
614
NextIndex(size_t index)615 size_t NextIndex(size_t index) const {
616 if (UNLIKELY(++index >= num_buckets_)) {
617 DCHECK_EQ(index, NumBuckets());
618 return 0;
619 }
620 return index;
621 }
622
623 // Find the hash table slot for an element, or return NumBuckets() if not found.
624 // This value for not found is important so that iterator(this, FindIndex(...)) == end().
625 template <typename K>
FindIndex(const K & element,size_t hash)626 size_t FindIndex(const K& element, size_t hash) const {
627 // Guard against failing to get an element for a non-existing index.
628 if (UNLIKELY(NumBuckets() == 0)) {
629 return 0;
630 }
631 auto fail_fn = [&](size_t index ATTRIBUTE_UNUSED) { return NumBuckets(); };
632 return FindIndexImpl(element, hash, fail_fn);
633 }
634
635 // Find the hash table slot for an element, or return an empty slot index if not found.
636 template <typename K, typename FailFn>
FindIndexImpl(const K & element,size_t hash,FailFn fail_fn)637 size_t FindIndexImpl(const K& element, size_t hash, FailFn fail_fn) const {
638 DCHECK_NE(NumBuckets(), 0u);
639 DCHECK_EQ(hashfn_(element), hash);
640 size_t index = IndexForHash(hash);
641 while (true) {
642 const T& slot = ElementForIndex(index);
643 if (emptyfn_.IsEmpty(slot)) {
644 return fail_fn(index);
645 }
646 if (pred_(slot, element)) {
647 return index;
648 }
649 index = NextIndex(index);
650 }
651 }
652
IsFreeSlot(size_t index)653 bool IsFreeSlot(size_t index) const {
654 return emptyfn_.IsEmpty(ElementForIndex(index));
655 }
656
657 // Allocate a number of buckets.
AllocateStorage(size_t num_buckets)658 void AllocateStorage(size_t num_buckets) {
659 num_buckets_ = num_buckets;
660 data_ = allocfn_.allocate(num_buckets_);
661 owns_data_ = true;
662 for (size_t i = 0; i < num_buckets_; ++i) {
663 allocfn_.construct(allocfn_.address(data_[i]));
664 emptyfn_.MakeEmpty(data_[i]);
665 }
666 }
667
DeallocateStorage()668 void DeallocateStorage() {
669 if (owns_data_) {
670 for (size_t i = 0; i < NumBuckets(); ++i) {
671 allocfn_.destroy(allocfn_.address(data_[i]));
672 }
673 if (data_ != nullptr) {
674 allocfn_.deallocate(data_, NumBuckets());
675 }
676 owns_data_ = false;
677 }
678 data_ = nullptr;
679 num_buckets_ = 0;
680 }
681
682 // Expand the set based on the load factors.
Expand()683 void Expand() {
684 size_t min_index = static_cast<size_t>(size() / min_load_factor_);
685 // Resize based on the minimum load factor.
686 Resize(min_index);
687 }
688
689 // Expand / shrink the table to the new specified size.
Resize(size_t new_size)690 void Resize(size_t new_size) {
691 if (new_size < kMinBuckets) {
692 new_size = kMinBuckets;
693 }
694 DCHECK_GE(new_size, size());
695 T* const old_data = data_;
696 size_t old_num_buckets = num_buckets_;
697 // Reinsert all of the old elements.
698 const bool owned_data = owns_data_;
699 AllocateStorage(new_size);
700 for (size_t i = 0; i < old_num_buckets; ++i) {
701 T& element = old_data[i];
702 if (!emptyfn_.IsEmpty(element)) {
703 data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
704 }
705 if (owned_data) {
706 allocfn_.destroy(allocfn_.address(element));
707 }
708 }
709 if (owned_data) {
710 allocfn_.deallocate(old_data, old_num_buckets);
711 }
712
713 // When we hit elements_until_expand_, we are at the max load factor and must expand again.
714 elements_until_expand_ = NumBuckets() * max_load_factor_;
715 }
716
FirstAvailableSlot(size_t index)717 ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
718 DCHECK_LT(index, NumBuckets()); // Don't try to get a slot out of range.
719 size_t non_empty_count = 0;
720 while (!emptyfn_.IsEmpty(data_[index])) {
721 index = NextIndex(index);
722 non_empty_count++;
723 DCHECK_LE(non_empty_count, NumBuckets()); // Don't loop forever.
724 }
725 return index;
726 }
727
NextNonEmptySlot(size_t index)728 size_t NextNonEmptySlot(size_t index) const {
729 const size_t num_buckets = NumBuckets();
730 DCHECK_LT(index, num_buckets);
731 do {
732 ++index;
733 } while (index < num_buckets && IsFreeSlot(index));
734 return index;
735 }
736
737 // Return new offset.
738 template <typename Elem>
WriteToBytes(uint8_t * ptr,size_t offset,Elem n)739 static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
740 DCHECK_ALIGNED(ptr + offset, sizeof(n));
741 if (ptr != nullptr) {
742 *reinterpret_cast<Elem*>(ptr + offset) = n;
743 }
744 return offset + sizeof(n);
745 }
746
747 template <typename Elem>
ReadFromBytes(const uint8_t * ptr,size_t offset,Elem * out)748 static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
749 DCHECK(ptr != nullptr);
750 DCHECK_ALIGNED(ptr + offset, sizeof(*out));
751 *out = *reinterpret_cast<const Elem*>(ptr + offset);
752 return offset + sizeof(*out);
753 }
754
755 Alloc allocfn_; // Allocator function.
756 HashFn hashfn_; // Hashing function.
757 EmptyFn emptyfn_; // IsEmpty/SetEmpty function.
758 Pred pred_; // Equals function.
759 size_t num_elements_; // Number of inserted elements.
760 size_t num_buckets_; // Number of hash table buckets.
761 size_t elements_until_expand_; // Maximum number of elements until we expand the table.
762 bool owns_data_; // If we own data_ and are responsible for freeing it.
763 T* data_; // Backing storage.
764 double min_load_factor_;
765 double max_load_factor_;
766
767 template <class Elem, class HashSetType>
768 friend class HashSetIterator;
769
770 ART_FRIEND_TEST(InternTableTest, CrossHash);
771 };
772
773 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
swap(HashSet<T,EmptyFn,HashFn,Pred,Alloc> & lhs,HashSet<T,EmptyFn,HashFn,Pred,Alloc> & rhs)774 void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
775 HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
776 lhs.swap(rhs);
777 }
778
779 } // namespace art
780
781 #endif // ART_LIBARTBASE_BASE_HASH_SET_H_
782