1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_LIBARTBASE_BASE_HASH_SET_H_
18 #define ART_LIBARTBASE_BASE_HASH_SET_H_
19
20 #include <stdint.h>
21
22 #include <functional>
23 #include <iterator>
24 #include <memory>
25 #include <string>
26 #include <type_traits>
27 #include <utility>
28
29 #include <android-base/logging.h>
30
31 #include "base/data_hash.h"
32 #include "bit_utils.h"
33 #include "macros.h"
34
35 namespace art {
36
37 template <class Elem, class HashSetType>
38 class HashSetIterator {
39 public:
40 using iterator_category = std::forward_iterator_tag;
41 using value_type = Elem;
42 using difference_type = std::ptrdiff_t;
43 using pointer = Elem*;
44 using reference = Elem&;
45
46 HashSetIterator(const HashSetIterator&) = default;
47 HashSetIterator(HashSetIterator&&) = default;
HashSetIterator(HashSetType * hash_set,size_t index)48 HashSetIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {}
49
50 // Conversion from iterator to const_iterator.
51 template <class OtherElem,
52 class OtherHashSetType,
53 typename = typename std::enable_if<
54 std::is_same<Elem, const OtherElem>::value &&
55 std::is_same<HashSetType, const OtherHashSetType>::value>::type>
HashSetIterator(const HashSetIterator<OtherElem,OtherHashSetType> & other)56 HashSetIterator(const HashSetIterator<OtherElem, OtherHashSetType>& other)
57 : index_(other.index_), hash_set_(other.hash_set_) {}
58
59 HashSetIterator& operator=(const HashSetIterator&) = default;
60 HashSetIterator& operator=(HashSetIterator&&) = default;
61
62 bool operator==(const HashSetIterator& other) const {
63 return hash_set_ == other.hash_set_ && this->index_ == other.index_;
64 }
65
66 bool operator!=(const HashSetIterator& other) const {
67 return !(*this == other);
68 }
69
70 HashSetIterator operator++() { // Value after modification.
71 this->index_ = hash_set_->NextNonEmptySlot(index_);
72 return *this;
73 }
74
75 HashSetIterator operator++(int) {
76 HashSetIterator temp = *this;
77 ++*this;
78 return temp;
79 }
80
81 Elem& operator*() const {
82 DCHECK(!hash_set_->IsFreeSlot(this->index_));
83 return hash_set_->ElementForIndex(this->index_);
84 }
85
86 Elem* operator->() const {
87 return &**this;
88 }
89
90 private:
91 size_t index_;
92 HashSetType* hash_set_;
93
94 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
95 friend bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
96 const HashSetIterator<Elem2, HashSetType2>& rhs);
97 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc> friend class HashSet;
98 template <class OtherElem, class OtherHashSetType> friend class HashSetIterator;
99 };
100
101 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
102 bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
103 const HashSetIterator<Elem2, HashSetType2>& rhs) {
104 static_assert(
105 std::is_convertible<HashSetIterator<Elem1, HashSetType1>,
106 HashSetIterator<Elem2, HashSetType2>>::value ||
107 std::is_convertible<HashSetIterator<Elem2, HashSetType2>,
108 HashSetIterator<Elem1, HashSetType1>>::value, "Bad iterator types.");
109 DCHECK_EQ(lhs.hash_set_, rhs.hash_set_);
110 return lhs.index_ == rhs.index_;
111 }
112
113 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
114 bool operator!=(const HashSetIterator<Elem1, HashSetType1>& lhs,
115 const HashSetIterator<Elem2, HashSetType2>& rhs) {
116 return !(lhs == rhs);
117 }
118
119 // Returns true if an item is empty.
120 template <class T>
121 class DefaultEmptyFn {
122 public:
MakeEmpty(T & item)123 void MakeEmpty(T& item) const {
124 item = T();
125 }
IsEmpty(const T & item)126 bool IsEmpty(const T& item) const {
127 return item == T();
128 }
129 };
130
131 template <class T>
132 class DefaultEmptyFn<T*> {
133 public:
MakeEmpty(T * & item)134 void MakeEmpty(T*& item) const {
135 item = nullptr;
136 }
IsEmpty(T * const & item)137 bool IsEmpty(T* const& item) const {
138 return item == nullptr;
139 }
140 };
141
142 template <class T>
143 using DefaultHashFn = typename std::conditional<std::is_same<T, std::string>::value,
144 DataHash,
145 std::hash<T>>::type;
146
147 struct DefaultStringEquals {
148 // Allow comparison with anything that can be compared to std::string,
149 // for example std::string_view.
150 template <typename T>
operatorDefaultStringEquals151 bool operator()(const std::string& lhs, const T& rhs) const {
152 return lhs == rhs;
153 }
154 };
155
156 template <class T>
157 using DefaultPred = typename std::conditional<std::is_same<T, std::string>::value,
158 DefaultStringEquals,
159 std::equal_to<T>>::type;
160
161 // Low memory version of a hash set, uses less memory than std::unordered_multiset since elements
162 // aren't boxed. Uses linear probing to resolve collisions.
163 // EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
164 // TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
165 // and more complicated.
166 template <class T,
167 class EmptyFn = DefaultEmptyFn<T>,
168 class HashFn = DefaultHashFn<T>,
169 class Pred = DefaultPred<T>,
170 class Alloc = std::allocator<T>>
171 class HashSet {
172 public:
173 using value_type = T;
174 using allocator_type = Alloc;
175 using reference = T&;
176 using const_reference = const T&;
177 using pointer = T*;
178 using const_pointer = const T*;
179 using iterator = HashSetIterator<T, HashSet>;
180 using const_iterator = HashSetIterator<const T, const HashSet>;
181 using size_type = size_t;
182 using difference_type = ptrdiff_t;
183
184 static constexpr double kDefaultMinLoadFactor = 0.4;
185 static constexpr double kDefaultMaxLoadFactor = 0.7;
186 static constexpr size_t kMinBuckets = 1000;
187
188 // If we don't own the data, this will create a new array which owns the data.
clear()189 void clear() {
190 DeallocateStorage();
191 num_elements_ = 0;
192 elements_until_expand_ = 0;
193 }
194
HashSet()195 HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
HashSet(const allocator_type & alloc)196 explicit HashSet(const allocator_type& alloc) noexcept
197 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, alloc) {}
198
HashSet(double min_load_factor,double max_load_factor)199 HashSet(double min_load_factor, double max_load_factor) noexcept
200 : HashSet(min_load_factor, max_load_factor, allocator_type()) {}
HashSet(double min_load_factor,double max_load_factor,const allocator_type & alloc)201 HashSet(double min_load_factor, double max_load_factor, const allocator_type& alloc) noexcept
202 : HashSet(min_load_factor, max_load_factor, HashFn(), Pred(), alloc) {}
203
HashSet(const HashFn & hashfn,const Pred & pred)204 HashSet(const HashFn& hashfn,
205 const Pred& pred) noexcept
206 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, hashfn, pred) {}
HashSet(const HashFn & hashfn,const Pred & pred,const allocator_type & alloc)207 HashSet(const HashFn& hashfn,
208 const Pred& pred,
209 const allocator_type& alloc) noexcept
210 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, hashfn, pred, alloc) {}
211
HashSet(double min_load_factor,double max_load_factor,const HashFn & hashfn,const Pred & pred)212 HashSet(double min_load_factor,
213 double max_load_factor,
214 const HashFn& hashfn,
215 const Pred& pred) noexcept
216 : HashSet(min_load_factor, max_load_factor, hashfn, pred, allocator_type()) {}
HashSet(double min_load_factor,double max_load_factor,const HashFn & hashfn,const Pred & pred,const allocator_type & alloc)217 HashSet(double min_load_factor,
218 double max_load_factor,
219 const HashFn& hashfn,
220 const Pred& pred,
221 const allocator_type& alloc) noexcept
222 : allocfn_(alloc),
223 hashfn_(hashfn),
224 emptyfn_(),
225 pred_(pred),
226 num_elements_(0u),
227 num_buckets_(0u),
228 elements_until_expand_(0u),
229 owns_data_(false),
230 data_(nullptr),
231 min_load_factor_(min_load_factor),
232 max_load_factor_(max_load_factor) {
233 DCHECK_GT(min_load_factor, 0.0);
234 DCHECK_LT(max_load_factor, 1.0);
235 }
236
HashSet(const HashSet & other)237 HashSet(const HashSet& other) noexcept
238 : allocfn_(other.allocfn_),
239 hashfn_(other.hashfn_),
240 emptyfn_(other.emptyfn_),
241 pred_(other.pred_),
242 num_elements_(other.num_elements_),
243 num_buckets_(0),
244 elements_until_expand_(other.elements_until_expand_),
245 owns_data_(false),
246 data_(nullptr),
247 min_load_factor_(other.min_load_factor_),
248 max_load_factor_(other.max_load_factor_) {
249 AllocateStorage(other.NumBuckets());
250 for (size_t i = 0; i < num_buckets_; ++i) {
251 ElementForIndex(i) = other.data_[i];
252 }
253 }
254
255 // noexcept required so that the move constructor is used instead of copy constructor.
256 // b/27860101
HashSet(HashSet && other)257 HashSet(HashSet&& other) noexcept
258 : allocfn_(std::move(other.allocfn_)),
259 hashfn_(std::move(other.hashfn_)),
260 emptyfn_(std::move(other.emptyfn_)),
261 pred_(std::move(other.pred_)),
262 num_elements_(other.num_elements_),
263 num_buckets_(other.num_buckets_),
264 elements_until_expand_(other.elements_until_expand_),
265 owns_data_(other.owns_data_),
266 data_(other.data_),
267 min_load_factor_(other.min_load_factor_),
268 max_load_factor_(other.max_load_factor_) {
269 other.num_elements_ = 0u;
270 other.num_buckets_ = 0u;
271 other.elements_until_expand_ = 0u;
272 other.owns_data_ = false;
273 other.data_ = nullptr;
274 }
275
276 // Construct with pre-existing buffer, usually stack-allocated,
277 // to avoid malloc/free overhead for small HashSet<>s.
HashSet(value_type * buffer,size_t buffer_size)278 HashSet(value_type* buffer, size_t buffer_size)
279 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, buffer, buffer_size) {}
HashSet(value_type * buffer,size_t buffer_size,const allocator_type & alloc)280 HashSet(value_type* buffer, size_t buffer_size, const allocator_type& alloc)
281 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, buffer, buffer_size, alloc) {}
HashSet(double min_load_factor,double max_load_factor,value_type * buffer,size_t buffer_size)282 HashSet(double min_load_factor, double max_load_factor, value_type* buffer, size_t buffer_size)
283 : HashSet(min_load_factor, max_load_factor, buffer, buffer_size, allocator_type()) {}
HashSet(double min_load_factor,double max_load_factor,value_type * buffer,size_t buffer_size,const allocator_type & alloc)284 HashSet(double min_load_factor,
285 double max_load_factor,
286 value_type* buffer,
287 size_t buffer_size,
288 const allocator_type& alloc)
289 : allocfn_(alloc),
290 num_elements_(0u),
291 num_buckets_(buffer_size),
292 elements_until_expand_(buffer_size * max_load_factor),
293 owns_data_(false),
294 data_(buffer),
295 min_load_factor_(min_load_factor),
296 max_load_factor_(max_load_factor) {
297 DCHECK_GT(min_load_factor, 0.0);
298 DCHECK_LT(max_load_factor, 1.0);
299 for (size_t i = 0; i != buffer_size; ++i) {
300 emptyfn_.MakeEmpty(buffer[i]);
301 }
302 }
303
304 // Construct from existing data.
305 // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
306 // passed in ptr_.
HashSet(const uint8_t * ptr,bool make_copy_of_data,size_t * read_count)307 HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
308 uint64_t temp;
309 size_t offset = 0;
310 offset = ReadFromBytes(ptr, offset, &temp);
311 num_elements_ = static_cast<uint64_t>(temp);
312 offset = ReadFromBytes(ptr, offset, &temp);
313 num_buckets_ = static_cast<uint64_t>(temp);
314 CHECK_LE(num_elements_, num_buckets_);
315 offset = ReadFromBytes(ptr, offset, &temp);
316 elements_until_expand_ = static_cast<uint64_t>(temp);
317 offset = ReadFromBytes(ptr, offset, &min_load_factor_);
318 offset = ReadFromBytes(ptr, offset, &max_load_factor_);
319 if (!make_copy_of_data) {
320 owns_data_ = false;
321 data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
322 offset += sizeof(*data_) * num_buckets_;
323 } else {
324 AllocateStorage(num_buckets_);
325 // Write elements, not that this may not be safe for cross compilation if the elements are
326 // pointer sized.
327 for (size_t i = 0; i < num_buckets_; ++i) {
328 offset = ReadFromBytes(ptr, offset, &data_[i]);
329 }
330 }
331 // Caller responsible for aligning.
332 *read_count = offset;
333 }
334
335 // Returns how large the table is after being written. If target is null, then no writing happens
336 // but the size is still returned. Target must be 8 byte aligned.
WriteToMemory(uint8_t * ptr)337 size_t WriteToMemory(uint8_t* ptr) const {
338 size_t offset = 0;
339 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
340 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
341 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
342 offset = WriteToBytes(ptr, offset, min_load_factor_);
343 offset = WriteToBytes(ptr, offset, max_load_factor_);
344 // Write elements, not that this may not be safe for cross compilation if the elements are
345 // pointer sized.
346 for (size_t i = 0; i < num_buckets_; ++i) {
347 offset = WriteToBytes(ptr, offset, data_[i]);
348 }
349 // Caller responsible for aligning.
350 return offset;
351 }
352
~HashSet()353 ~HashSet() {
354 DeallocateStorage();
355 }
356
357 HashSet& operator=(HashSet&& other) noexcept {
358 HashSet(std::move(other)).swap(*this); // NOLINT [runtime/explicit] [5]
359 return *this;
360 }
361
362 HashSet& operator=(const HashSet& other) noexcept {
363 HashSet(other).swap(*this); // NOLINT(runtime/explicit) - a case of lint gone mad.
364 return *this;
365 }
366
367 // Lower case for c++11 for each.
begin()368 iterator begin() {
369 iterator ret(this, 0);
370 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
371 ++ret; // Skip all the empty slots.
372 }
373 return ret;
374 }
375
376 // Lower case for c++11 for each. const version.
begin()377 const_iterator begin() const {
378 const_iterator ret(this, 0);
379 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
380 ++ret; // Skip all the empty slots.
381 }
382 return ret;
383 }
384
385 // Lower case for c++11 for each.
end()386 iterator end() {
387 return iterator(this, NumBuckets());
388 }
389
390 // Lower case for c++11 for each. const version.
end()391 const_iterator end() const {
392 return const_iterator(this, NumBuckets());
393 }
394
size()395 size_t size() const {
396 return num_elements_;
397 }
398
empty()399 bool empty() const {
400 return size() == 0;
401 }
402
403 // Erase algorithm:
404 // Make an empty slot where the iterator is pointing.
405 // Scan forwards until we hit another empty slot.
406 // If an element in between doesn't rehash to the range from the current empty slot to the
407 // iterator. It must be before the empty slot, in that case we can move it to the empty slot
408 // and set the empty slot to be the location we just moved from.
409 // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
410 // element to its actual location/index.
411 // Note that since erase shuffles back elements, it may result in the same element being visited
412 // twice during HashSet iteration. This happens when an element already visited during iteration
413 // gets shuffled to the end of the bucket array.
erase(iterator it)414 iterator erase(iterator it) {
415 // empty_index is the index that will become empty.
416 size_t empty_index = it.index_;
417 DCHECK(!IsFreeSlot(empty_index));
418 size_t next_index = empty_index;
419 bool filled = false; // True if we filled the empty index.
420 while (true) {
421 next_index = NextIndex(next_index);
422 T& next_element = ElementForIndex(next_index);
423 // If the next element is empty, we are done. Make sure to clear the current empty index.
424 if (emptyfn_.IsEmpty(next_element)) {
425 emptyfn_.MakeEmpty(ElementForIndex(empty_index));
426 break;
427 }
428 // Otherwise try to see if the next element can fill the current empty index.
429 const size_t next_hash = hashfn_(next_element);
430 // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
431 // nothing we can do.
432 size_t next_ideal_index = IndexForHash(next_hash);
433 // Loop around if needed for our check.
434 size_t unwrapped_next_index = next_index;
435 if (unwrapped_next_index < empty_index) {
436 unwrapped_next_index += NumBuckets();
437 }
438 // Loop around if needed for our check.
439 size_t unwrapped_next_ideal_index = next_ideal_index;
440 if (unwrapped_next_ideal_index < empty_index) {
441 unwrapped_next_ideal_index += NumBuckets();
442 }
443 if (unwrapped_next_ideal_index <= empty_index ||
444 unwrapped_next_ideal_index > unwrapped_next_index) {
445 // If the target index isn't within our current range it must have been probed from before
446 // the empty index.
447 ElementForIndex(empty_index) = std::move(next_element);
448 filled = true; // TODO: Optimize
449 empty_index = next_index;
450 }
451 }
452 --num_elements_;
453 // If we didn't fill the slot then we need go to the next non free slot.
454 if (!filled) {
455 ++it;
456 }
457 return it;
458 }
459
460 // Find an element, returns end() if not found.
461 // Allows custom key (K) types, example of when this is useful:
462 // Set of Class* indexed by name, want to find a class with a name but can't allocate
463 // a temporary Class object in the heap for performance solution.
464 template <typename K>
find(const K & key)465 iterator find(const K& key) {
466 return FindWithHash(key, hashfn_(key));
467 }
468
469 template <typename K>
find(const K & key)470 const_iterator find(const K& key) const {
471 return FindWithHash(key, hashfn_(key));
472 }
473
474 template <typename K>
FindWithHash(const K & key,size_t hash)475 iterator FindWithHash(const K& key, size_t hash) {
476 return iterator(this, FindIndex(key, hash));
477 }
478
479 template <typename K>
FindWithHash(const K & key,size_t hash)480 const_iterator FindWithHash(const K& key, size_t hash) const {
481 return const_iterator(this, FindIndex(key, hash));
482 }
483
484 // Insert an element with hint.
485 // Note: The hint is not very useful for a HashSet<> unless there are many hash conflicts
486 // and in that case the use of HashSet<> itself should be reconsidered.
insert(const_iterator hint ATTRIBUTE_UNUSED,const T & element)487 std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, const T& element) {
488 return insert(element);
489 }
insert(const_iterator hint ATTRIBUTE_UNUSED,T && element)490 std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, T&& element) {
491 return insert(std::move(element));
492 }
493
494 // Insert an element.
insert(const T & element)495 std::pair<iterator, bool> insert(const T& element) {
496 return InsertWithHash(element, hashfn_(element));
497 }
insert(T && element)498 std::pair<iterator, bool> insert(T&& element) {
499 return InsertWithHash(std::move(element), hashfn_(element));
500 }
501
502 template <typename U, typename = typename std::enable_if<std::is_convertible<U, T>::value>::type>
InsertWithHash(U && element,size_t hash)503 std::pair<iterator, bool> InsertWithHash(U&& element, size_t hash) {
504 DCHECK_EQ(hash, hashfn_(element));
505 if (num_elements_ >= elements_until_expand_) {
506 Expand();
507 DCHECK_LT(num_elements_, elements_until_expand_);
508 }
509 bool find_failed = false;
510 auto find_fail_fn = [&](size_t index) {
511 find_failed = true;
512 return index;
513 };
514 size_t index = FindIndexImpl(element, hash, find_fail_fn);
515 if (find_failed) {
516 data_[index] = std::forward<U>(element);
517 ++num_elements_;
518 }
519 return std::make_pair(iterator(this, index), find_failed);
520 }
521
swap(HashSet & other)522 void swap(HashSet& other) {
523 // Use argument-dependent lookup with fall-back to std::swap() for function objects.
524 using std::swap;
525 swap(allocfn_, other.allocfn_);
526 swap(hashfn_, other.hashfn_);
527 swap(emptyfn_, other.emptyfn_);
528 swap(pred_, other.pred_);
529 std::swap(data_, other.data_);
530 std::swap(num_buckets_, other.num_buckets_);
531 std::swap(num_elements_, other.num_elements_);
532 std::swap(elements_until_expand_, other.elements_until_expand_);
533 std::swap(min_load_factor_, other.min_load_factor_);
534 std::swap(max_load_factor_, other.max_load_factor_);
535 std::swap(owns_data_, other.owns_data_);
536 }
537
get_allocator()538 allocator_type get_allocator() const {
539 return allocfn_;
540 }
541
ShrinkToMaximumLoad()542 void ShrinkToMaximumLoad() {
543 Resize(size() / max_load_factor_);
544 }
545
546 // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
547 // set. No-op if the hash set is already large enough to do this.
reserve(size_t num_elements)548 void reserve(size_t num_elements) {
549 size_t num_buckets = num_elements / max_load_factor_;
550 // Deal with rounding errors. Add one for rounding.
551 while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
552 ++num_buckets;
553 }
554 if (num_buckets > NumBuckets()) {
555 Resize(num_buckets);
556 }
557 }
558
559 // To distance that inserted elements were probed. Used for measuring how good hash functions
560 // are.
TotalProbeDistance()561 size_t TotalProbeDistance() const {
562 size_t total = 0;
563 for (size_t i = 0; i < NumBuckets(); ++i) {
564 const T& element = ElementForIndex(i);
565 if (!emptyfn_.IsEmpty(element)) {
566 size_t ideal_location = IndexForHash(hashfn_(element));
567 if (ideal_location > i) {
568 total += i + NumBuckets() - ideal_location;
569 } else {
570 total += i - ideal_location;
571 }
572 }
573 }
574 return total;
575 }
576
577 // Calculate the current load factor and return it.
CalculateLoadFactor()578 double CalculateLoadFactor() const {
579 return static_cast<double>(size()) / static_cast<double>(NumBuckets());
580 }
581
582 // Make sure that everything reinserts in the right spot. Returns the number of errors.
Verify()583 size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
584 size_t errors = 0;
585 for (size_t i = 0; i < num_buckets_; ++i) {
586 T& element = data_[i];
587 if (!emptyfn_.IsEmpty(element)) {
588 T temp;
589 emptyfn_.MakeEmpty(temp);
590 std::swap(temp, element);
591 size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
592 if (i != first_slot) {
593 LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
594 ++errors;
595 }
596 std::swap(temp, element);
597 }
598 }
599 return errors;
600 }
601
GetMinLoadFactor()602 double GetMinLoadFactor() const {
603 return min_load_factor_;
604 }
605
GetMaxLoadFactor()606 double GetMaxLoadFactor() const {
607 return max_load_factor_;
608 }
609
610 // Change the load factor of the hash set. If the current load factor is greater than the max
611 // specified, then we resize the hash table storage.
SetLoadFactor(double min_load_factor,double max_load_factor)612 void SetLoadFactor(double min_load_factor, double max_load_factor) {
613 DCHECK_LT(min_load_factor, max_load_factor);
614 DCHECK_GT(min_load_factor, 0.0);
615 DCHECK_LT(max_load_factor, 1.0);
616 min_load_factor_ = min_load_factor;
617 max_load_factor_ = max_load_factor;
618 elements_until_expand_ = NumBuckets() * max_load_factor_;
619 // If the current load factor isn't in the range, then resize to the mean of the minimum and
620 // maximum load factor.
621 const double load_factor = CalculateLoadFactor();
622 if (load_factor > max_load_factor_) {
623 Resize(size() / ((min_load_factor_ + max_load_factor_) * 0.5));
624 }
625 }
626
627 // The hash set expands when Size() reaches ElementsUntilExpand().
ElementsUntilExpand()628 size_t ElementsUntilExpand() const {
629 return elements_until_expand_;
630 }
631
NumBuckets()632 size_t NumBuckets() const {
633 return num_buckets_;
634 }
635
636 private:
ElementForIndex(size_t index)637 T& ElementForIndex(size_t index) {
638 DCHECK_LT(index, NumBuckets());
639 DCHECK(data_ != nullptr);
640 return data_[index];
641 }
642
ElementForIndex(size_t index)643 const T& ElementForIndex(size_t index) const {
644 DCHECK_LT(index, NumBuckets());
645 DCHECK(data_ != nullptr);
646 return data_[index];
647 }
648
IndexForHash(size_t hash)649 size_t IndexForHash(size_t hash) const {
650 // Protect against undefined behavior (division by zero).
651 if (UNLIKELY(num_buckets_ == 0)) {
652 return 0;
653 }
654 return hash % num_buckets_;
655 }
656
NextIndex(size_t index)657 size_t NextIndex(size_t index) const {
658 if (UNLIKELY(++index >= num_buckets_)) {
659 DCHECK_EQ(index, NumBuckets());
660 return 0;
661 }
662 return index;
663 }
664
665 // Find the hash table slot for an element, or return NumBuckets() if not found.
666 // This value for not found is important so that iterator(this, FindIndex(...)) == end().
667 template <typename K>
FindIndex(const K & element,size_t hash)668 size_t FindIndex(const K& element, size_t hash) const {
669 // Guard against failing to get an element for a non-existing index.
670 if (UNLIKELY(NumBuckets() == 0)) {
671 return 0;
672 }
673 auto fail_fn = [&](size_t index ATTRIBUTE_UNUSED) { return NumBuckets(); };
674 return FindIndexImpl(element, hash, fail_fn);
675 }
676
677 // Find the hash table slot for an element, or return an empty slot index if not found.
678 template <typename K, typename FailFn>
FindIndexImpl(const K & element,size_t hash,FailFn fail_fn)679 size_t FindIndexImpl(const K& element, size_t hash, FailFn fail_fn) const {
680 DCHECK_NE(NumBuckets(), 0u);
681 DCHECK_EQ(hashfn_(element), hash);
682 size_t index = IndexForHash(hash);
683 while (true) {
684 const T& slot = ElementForIndex(index);
685 if (emptyfn_.IsEmpty(slot)) {
686 return fail_fn(index);
687 }
688 if (pred_(slot, element)) {
689 return index;
690 }
691 index = NextIndex(index);
692 }
693 }
694
IsFreeSlot(size_t index)695 bool IsFreeSlot(size_t index) const {
696 return emptyfn_.IsEmpty(ElementForIndex(index));
697 }
698
699 // Allocate a number of buckets.
AllocateStorage(size_t num_buckets)700 void AllocateStorage(size_t num_buckets) {
701 num_buckets_ = num_buckets;
702 data_ = allocfn_.allocate(num_buckets_);
703 owns_data_ = true;
704 for (size_t i = 0; i < num_buckets_; ++i) {
705 allocfn_.construct(allocfn_.address(data_[i]));
706 emptyfn_.MakeEmpty(data_[i]);
707 }
708 }
709
DeallocateStorage()710 void DeallocateStorage() {
711 if (owns_data_) {
712 for (size_t i = 0; i < NumBuckets(); ++i) {
713 allocfn_.destroy(allocfn_.address(data_[i]));
714 }
715 if (data_ != nullptr) {
716 allocfn_.deallocate(data_, NumBuckets());
717 }
718 owns_data_ = false;
719 }
720 data_ = nullptr;
721 num_buckets_ = 0;
722 }
723
724 // Expand the set based on the load factors.
Expand()725 void Expand() {
726 size_t min_index = static_cast<size_t>(size() / min_load_factor_);
727 // Resize based on the minimum load factor.
728 Resize(min_index);
729 }
730
731 // Expand / shrink the table to the new specified size.
Resize(size_t new_size)732 void Resize(size_t new_size) {
733 if (new_size < kMinBuckets) {
734 new_size = kMinBuckets;
735 }
736 DCHECK_GE(new_size, size());
737 T* const old_data = data_;
738 size_t old_num_buckets = num_buckets_;
739 // Reinsert all of the old elements.
740 const bool owned_data = owns_data_;
741 AllocateStorage(new_size);
742 for (size_t i = 0; i < old_num_buckets; ++i) {
743 T& element = old_data[i];
744 if (!emptyfn_.IsEmpty(element)) {
745 data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
746 }
747 if (owned_data) {
748 allocfn_.destroy(allocfn_.address(element));
749 }
750 }
751 if (owned_data) {
752 allocfn_.deallocate(old_data, old_num_buckets);
753 }
754
755 // When we hit elements_until_expand_, we are at the max load factor and must expand again.
756 elements_until_expand_ = NumBuckets() * max_load_factor_;
757 }
758
FirstAvailableSlot(size_t index)759 ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
760 DCHECK_LT(index, NumBuckets()); // Don't try to get a slot out of range.
761 size_t non_empty_count = 0;
762 while (!emptyfn_.IsEmpty(data_[index])) {
763 index = NextIndex(index);
764 non_empty_count++;
765 DCHECK_LE(non_empty_count, NumBuckets()); // Don't loop forever.
766 }
767 return index;
768 }
769
NextNonEmptySlot(size_t index)770 size_t NextNonEmptySlot(size_t index) const {
771 const size_t num_buckets = NumBuckets();
772 DCHECK_LT(index, num_buckets);
773 do {
774 ++index;
775 } while (index < num_buckets && IsFreeSlot(index));
776 return index;
777 }
778
779 // Return new offset.
780 template <typename Elem>
WriteToBytes(uint8_t * ptr,size_t offset,Elem n)781 static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
782 DCHECK_ALIGNED(ptr + offset, sizeof(n));
783 if (ptr != nullptr) {
784 *reinterpret_cast<Elem*>(ptr + offset) = n;
785 }
786 return offset + sizeof(n);
787 }
788
789 template <typename Elem>
ReadFromBytes(const uint8_t * ptr,size_t offset,Elem * out)790 static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
791 DCHECK(ptr != nullptr);
792 DCHECK_ALIGNED(ptr + offset, sizeof(*out));
793 *out = *reinterpret_cast<const Elem*>(ptr + offset);
794 return offset + sizeof(*out);
795 }
796
797 Alloc allocfn_; // Allocator function.
798 HashFn hashfn_; // Hashing function.
799 EmptyFn emptyfn_; // IsEmpty/SetEmpty function.
800 Pred pred_; // Equals function.
801 size_t num_elements_; // Number of inserted elements.
802 size_t num_buckets_; // Number of hash table buckets.
803 size_t elements_until_expand_; // Maximum number of elements until we expand the table.
804 bool owns_data_; // If we own data_ and are responsible for freeing it.
805 T* data_; // Backing storage.
806 double min_load_factor_;
807 double max_load_factor_;
808
809 template <class Elem, class HashSetType>
810 friend class HashSetIterator;
811
812 ART_FRIEND_TEST(InternTableTest, CrossHash);
813 ART_FRIEND_TEST(HashSetTest, Preallocated);
814 };
815
816 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
swap(HashSet<T,EmptyFn,HashFn,Pred,Alloc> & lhs,HashSet<T,EmptyFn,HashFn,Pred,Alloc> & rhs)817 void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
818 HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
819 lhs.swap(rhs);
820 }
821
822 } // namespace art
823
824 #endif // ART_LIBARTBASE_BASE_HASH_SET_H_
825