1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_LIBARTBASE_BASE_HASH_SET_H_
18 #define ART_LIBARTBASE_BASE_HASH_SET_H_
19 
20 #include <stdint.h>
21 
22 #include <functional>
23 #include <iterator>
24 #include <memory>
25 #include <string>
26 #include <type_traits>
27 #include <utility>
28 
29 #include <android-base/logging.h>
30 
31 #include "base/data_hash.h"
32 #include "bit_utils.h"
33 #include "macros.h"
34 
35 namespace art {
36 
37 template <class Elem, class HashSetType>
38 class HashSetIterator {
39  public:
40   using iterator_category = std::forward_iterator_tag;
41   using value_type = Elem;
42   using difference_type = std::ptrdiff_t;
43   using pointer = Elem*;
44   using reference = Elem&;
45 
46   HashSetIterator(const HashSetIterator&) = default;
47   HashSetIterator(HashSetIterator&&) noexcept = default;
HashSetIterator(HashSetType * hash_set,size_t index)48   HashSetIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {}
49 
50   // Conversion from iterator to const_iterator.
51   template <class OtherElem,
52             class OtherHashSetType,
53             typename = std::enable_if_t<
54                 std::is_same_v<Elem, const OtherElem> &&
55                 std::is_same_v<HashSetType, const OtherHashSetType>>>
HashSetIterator(const HashSetIterator<OtherElem,OtherHashSetType> & other)56   HashSetIterator(const HashSetIterator<OtherElem, OtherHashSetType>& other)
57       : index_(other.index_), hash_set_(other.hash_set_) {}
58 
59   HashSetIterator& operator=(const HashSetIterator&) = default;
60   HashSetIterator& operator=(HashSetIterator&&) noexcept = default;
61 
62   bool operator==(const HashSetIterator& other) const {
63     return hash_set_ == other.hash_set_ && this->index_ == other.index_;
64   }
65 
66   bool operator!=(const HashSetIterator& other) const {
67     return !(*this == other);
68   }
69 
70   HashSetIterator operator++() {  // Value after modification.
71     this->index_ = hash_set_->NextNonEmptySlot(index_);
72     return *this;
73   }
74 
75   HashSetIterator operator++(int) {
76     HashSetIterator temp = *this;
77     ++*this;
78     return temp;
79   }
80 
81   Elem& operator*() const {
82     DCHECK(!hash_set_->IsFreeSlot(this->index_));
83     return hash_set_->ElementForIndex(this->index_);
84   }
85 
86   Elem* operator->() const {
87     return &**this;
88   }
89 
90  private:
91   size_t index_;
92   HashSetType* hash_set_;
93 
94   template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
95   friend bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
96                          const HashSetIterator<Elem2, HashSetType2>& rhs);
97   template <class T, class EmptyFn, class HashFn, class Pred, class Alloc> friend class HashSet;
98   template <class OtherElem, class OtherHashSetType> friend class HashSetIterator;
99 };
100 
101 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
102 bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
103                 const HashSetIterator<Elem2, HashSetType2>& rhs) {
104   static_assert(
105       std::is_convertible_v<HashSetIterator<Elem1, HashSetType1>,
106                             HashSetIterator<Elem2, HashSetType2>> ||
107       std::is_convertible_v<HashSetIterator<Elem2, HashSetType2>,
108                             HashSetIterator<Elem1, HashSetType1>>, "Bad iterator types.");
109   DCHECK_EQ(lhs.hash_set_, rhs.hash_set_);
110   return lhs.index_ == rhs.index_;
111 }
112 
113 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
114 bool operator!=(const HashSetIterator<Elem1, HashSetType1>& lhs,
115                 const HashSetIterator<Elem2, HashSetType2>& rhs) {
116   return !(lhs == rhs);
117 }
118 
119 // Returns true if an item is empty.
120 template <class T>
121 class DefaultEmptyFn {
122  public:
MakeEmpty(T & item)123   void MakeEmpty(T& item) const {
124     item = T();
125   }
IsEmpty(const T & item)126   bool IsEmpty(const T& item) const {
127     return item == T();
128   }
129 };
130 
131 template <class T>
132 class DefaultEmptyFn<T*> {
133  public:
MakeEmpty(T * & item)134   void MakeEmpty(T*& item) const {
135     item = nullptr;
136   }
IsEmpty(T * const & item)137   bool IsEmpty(T* const& item) const {
138     return item == nullptr;
139   }
140 };
141 
142 template <>
143 class DefaultEmptyFn<std::string> {
144  public:
MakeEmpty(std::string & item)145   void MakeEmpty(std::string& item) const {
146     item = std::string();
147   }
IsEmpty(const std::string & item)148   bool IsEmpty(const std::string& item) const {
149     return item.empty();
150   }
151 };
152 
153 template <class T>
154 using DefaultHashFn = std::conditional_t<std::is_same_v<T, std::string>, DataHash, std::hash<T>>;
155 
156 struct DefaultStringEquals {
157   // Allow comparison with anything that can be compared to std::string,
158   // for example std::string_view.
159   template <typename T>
operatorDefaultStringEquals160   bool operator()(const std::string& lhs, const T& rhs) const {
161     return lhs == rhs;
162   }
163 };
164 
165 template <class T>
166 using DefaultPred =
167     std::conditional_t<std::is_same_v<T, std::string>, DefaultStringEquals, std::equal_to<T>>;
168 
169 // Low memory version of a hash set, uses less memory than std::unordered_multiset since elements
170 // aren't boxed. Uses linear probing to resolve collisions.
171 // EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
172 // TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
173 // and more complicated.
174 template <class T,
175           class EmptyFn = DefaultEmptyFn<T>,
176           class HashFn = DefaultHashFn<T>,
177           class Pred = DefaultPred<T>,
178           class Alloc = std::allocator<T>>
179 class HashSet {
180  public:
181   using value_type = T;
182   using allocator_type = Alloc;
183   using reference = T&;
184   using const_reference = const T&;
185   using pointer = T*;
186   using const_pointer = const T*;
187   using iterator = HashSetIterator<T, HashSet>;
188   using const_iterator = HashSetIterator<const T, const HashSet>;
189   using size_type = size_t;
190   using difference_type = ptrdiff_t;
191 
192   static constexpr double kDefaultMinLoadFactor = 0.4;
193   static constexpr double kDefaultMaxLoadFactor = 0.7;
194   static constexpr size_t kMinBuckets = 1000;
195 
196   // If we don't own the data, this will create a new array which owns the data.
clear()197   void clear() {
198     DeallocateStorage();
199     num_elements_ = 0;
200     elements_until_expand_ = 0;
201   }
202 
HashSet()203   HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
HashSet(const allocator_type & alloc)204   explicit HashSet(const allocator_type& alloc) noexcept
205       : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, alloc) {}
206 
HashSet(double min_load_factor,double max_load_factor)207   HashSet(double min_load_factor, double max_load_factor) noexcept
208       : HashSet(min_load_factor, max_load_factor, allocator_type()) {}
HashSet(double min_load_factor,double max_load_factor,const allocator_type & alloc)209   HashSet(double min_load_factor, double max_load_factor, const allocator_type& alloc) noexcept
210       : HashSet(min_load_factor, max_load_factor, HashFn(), Pred(), alloc) {}
211 
HashSet(const HashFn & hashfn,const Pred & pred)212   HashSet(const HashFn& hashfn,
213           const Pred& pred) noexcept
214       : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, hashfn, pred) {}
HashSet(const HashFn & hashfn,const Pred & pred,const allocator_type & alloc)215   HashSet(const HashFn& hashfn,
216           const Pred& pred,
217           const allocator_type& alloc) noexcept
218       : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, hashfn, pred, alloc) {}
219 
HashSet(double min_load_factor,double max_load_factor,const HashFn & hashfn,const Pred & pred)220   HashSet(double min_load_factor,
221           double max_load_factor,
222           const HashFn& hashfn,
223           const Pred& pred) noexcept
224       : HashSet(min_load_factor, max_load_factor, hashfn, pred, allocator_type()) {}
HashSet(double min_load_factor,double max_load_factor,const HashFn & hashfn,const Pred & pred,const allocator_type & alloc)225   HashSet(double min_load_factor,
226           double max_load_factor,
227           const HashFn& hashfn,
228           const Pred& pred,
229           const allocator_type& alloc) noexcept
230       : allocfn_(alloc),
231         hashfn_(hashfn),
232         emptyfn_(),
233         pred_(pred),
234         num_elements_(0u),
235         num_buckets_(0u),
236         elements_until_expand_(0u),
237         owns_data_(false),
238         data_(nullptr),
239         min_load_factor_(min_load_factor),
240         max_load_factor_(max_load_factor) {
241     DCHECK_GT(min_load_factor, 0.0);
242     DCHECK_LT(max_load_factor, 1.0);
243   }
244 
HashSet(const HashSet & other)245   HashSet(const HashSet& other)
246       : allocfn_(other.allocfn_),
247         hashfn_(other.hashfn_),
248         emptyfn_(other.emptyfn_),
249         pred_(other.pred_),
250         num_elements_(other.num_elements_),
251         num_buckets_(0),
252         elements_until_expand_(other.elements_until_expand_),
253         owns_data_(false),
254         data_(nullptr),
255         min_load_factor_(other.min_load_factor_),
256         max_load_factor_(other.max_load_factor_) {
257     AllocateStorage(other.NumBuckets());
258     for (size_t i = 0; i < num_buckets_; ++i) {
259       ElementForIndex(i) = other.data_[i];
260     }
261   }
262 
263   // noexcept required so that the move constructor is used instead of copy constructor.
264   // b/27860101
HashSet(HashSet && other)265   HashSet(HashSet&& other) noexcept
266       : allocfn_(std::move(other.allocfn_)),
267         hashfn_(std::move(other.hashfn_)),
268         emptyfn_(std::move(other.emptyfn_)),
269         pred_(std::move(other.pred_)),
270         num_elements_(other.num_elements_),
271         num_buckets_(other.num_buckets_),
272         elements_until_expand_(other.elements_until_expand_),
273         owns_data_(other.owns_data_),
274         data_(other.data_),
275         min_load_factor_(other.min_load_factor_),
276         max_load_factor_(other.max_load_factor_) {
277     other.num_elements_ = 0u;
278     other.num_buckets_ = 0u;
279     other.elements_until_expand_ = 0u;
280     other.owns_data_ = false;
281     other.data_ = nullptr;
282   }
283 
284   // Construct with pre-existing buffer, usually stack-allocated,
285   // to avoid malloc/free overhead for small HashSet<>s.
HashSet(value_type * buffer,size_t buffer_size)286   HashSet(value_type* buffer, size_t buffer_size)
287       : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, buffer, buffer_size) {}
HashSet(value_type * buffer,size_t buffer_size,const allocator_type & alloc)288   HashSet(value_type* buffer, size_t buffer_size, const allocator_type& alloc)
289       : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, buffer, buffer_size, alloc) {}
HashSet(double min_load_factor,double max_load_factor,value_type * buffer,size_t buffer_size)290   HashSet(double min_load_factor, double max_load_factor, value_type* buffer, size_t buffer_size)
291       : HashSet(min_load_factor, max_load_factor, buffer, buffer_size, allocator_type()) {}
HashSet(double min_load_factor,double max_load_factor,value_type * buffer,size_t buffer_size,const allocator_type & alloc)292   HashSet(double min_load_factor,
293           double max_load_factor,
294           value_type* buffer,
295           size_t buffer_size,
296           const allocator_type& alloc)
297       : HashSet(min_load_factor, max_load_factor, HashFn(), Pred(), buffer, buffer_size, alloc) {}
HashSet(double min_load_factor,double max_load_factor,const HashFn & hashfn,const Pred & pred,value_type * buffer,size_t buffer_size,const allocator_type & alloc)298   HashSet(double min_load_factor,
299           double max_load_factor,
300           const HashFn& hashfn,
301           const Pred& pred,
302           value_type* buffer,
303           size_t buffer_size,
304           const allocator_type& alloc)
305       : allocfn_(alloc),
306         hashfn_(hashfn),
307         pred_(pred),
308         num_elements_(0u),
309         num_buckets_(buffer_size),
310         elements_until_expand_(buffer_size * max_load_factor),
311         owns_data_(false),
312         data_(buffer),
313         min_load_factor_(min_load_factor),
314         max_load_factor_(max_load_factor) {
315     DCHECK_GT(min_load_factor, 0.0);
316     DCHECK_LT(max_load_factor, 1.0);
317     for (size_t i = 0; i != buffer_size; ++i) {
318       emptyfn_.MakeEmpty(buffer[i]);
319     }
320   }
321 
322   // Construct from existing data.
323   // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
324   // passed in ptr_.
HashSet(const uint8_t * ptr,bool make_copy_of_data,size_t * read_count)325   HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
326     uint64_t temp;
327     size_t offset = 0;
328     offset = ReadFromBytes(ptr, offset, &temp);
329     num_elements_ = static_cast<uint64_t>(temp);
330     offset = ReadFromBytes(ptr, offset, &temp);
331     num_buckets_ = static_cast<uint64_t>(temp);
332     CHECK_LE(num_elements_, num_buckets_);
333     offset = ReadFromBytes(ptr, offset, &temp);
334     elements_until_expand_ = static_cast<uint64_t>(temp);
335     offset = ReadFromBytes(ptr, offset, &min_load_factor_);
336     offset = ReadFromBytes(ptr, offset, &max_load_factor_);
337     if (!make_copy_of_data) {
338       owns_data_ = false;
339       data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
340       offset += sizeof(*data_) * num_buckets_;
341     } else {
342       AllocateStorage(num_buckets_);
343       // Write elements, not that this may not be safe for cross compilation if the elements are
344       // pointer sized.
345       for (size_t i = 0; i < num_buckets_; ++i) {
346         offset = ReadFromBytes(ptr, offset, &data_[i]);
347       }
348     }
349     // Caller responsible for aligning.
350     *read_count = offset;
351   }
352 
353   // Returns how large the table is after being written. If target is null, then no writing happens
354   // but the size is still returned. Target must be 8 byte aligned.
WriteToMemory(uint8_t * ptr)355   size_t WriteToMemory(uint8_t* ptr) const {
356     size_t offset = 0;
357     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
358     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
359     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
360     offset = WriteToBytes(ptr, offset, min_load_factor_);
361     offset = WriteToBytes(ptr, offset, max_load_factor_);
362     // Write elements, not that this may not be safe for cross compilation if the elements are
363     // pointer sized.
364     for (size_t i = 0; i < num_buckets_; ++i) {
365       offset = WriteToBytes(ptr, offset, data_[i]);
366     }
367     // Caller responsible for aligning.
368     return offset;
369   }
370 
~HashSet()371   ~HashSet() {
372     DeallocateStorage();
373   }
374 
375   HashSet& operator=(HashSet&& other) noexcept {
376     HashSet(std::move(other)).swap(*this);  // NOLINT [runtime/explicit] [5]
377     return *this;
378   }
379 
380   HashSet& operator=(const HashSet& other) {
381     HashSet(other).swap(*this);  // NOLINT(runtime/explicit) - a case of lint gone mad.
382     return *this;
383   }
384 
385   // Lower case for c++11 for each.
begin()386   iterator begin() {
387     iterator ret(this, 0);
388     if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
389       ++ret;  // Skip all the empty slots.
390     }
391     return ret;
392   }
393 
394   // Lower case for c++11 for each. const version.
begin()395   const_iterator begin() const {
396     const_iterator ret(this, 0);
397     if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
398       ++ret;  // Skip all the empty slots.
399     }
400     return ret;
401   }
402 
403   // Lower case for c++11 for each.
end()404   iterator end() {
405     return iterator(this, NumBuckets());
406   }
407 
408   // Lower case for c++11 for each. const version.
end()409   const_iterator end() const {
410     return const_iterator(this, NumBuckets());
411   }
412 
size()413   size_t size() const {
414     return num_elements_;
415   }
416 
empty()417   bool empty() const {
418     return size() == 0;
419   }
420 
421   // Erase algorithm:
422   // Make an empty slot where the iterator is pointing.
423   // Scan forwards until we hit another empty slot.
424   // If an element in between doesn't rehash to the range from the current empty slot to the
425   // iterator. It must be before the empty slot, in that case we can move it to the empty slot
426   // and set the empty slot to be the location we just moved from.
427   // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
428   // element to its actual location/index.
429   // Note that since erase shuffles back elements, it may result in the same element being visited
430   // twice during HashSet iteration. This happens when an element already visited during iteration
431   // gets shuffled to the end of the bucket array.
erase(iterator it)432   iterator erase(iterator it) {
433     // empty_index is the index that will become empty.
434     size_t empty_index = it.index_;
435     DCHECK(!IsFreeSlot(empty_index));
436     size_t next_index = empty_index;
437     bool filled = false;  // True if we filled the empty index.
438     while (true) {
439       next_index = NextIndex(next_index);
440       T& next_element = ElementForIndex(next_index);
441       // If the next element is empty, we are done. Make sure to clear the current empty index.
442       if (emptyfn_.IsEmpty(next_element)) {
443         emptyfn_.MakeEmpty(ElementForIndex(empty_index));
444         break;
445       }
446       // Otherwise try to see if the next element can fill the current empty index.
447       const size_t next_hash = hashfn_(next_element);
448       // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
449       // nothing we can do.
450       size_t next_ideal_index = IndexForHash(next_hash);
451       // Loop around if needed for our check.
452       size_t unwrapped_next_index = next_index;
453       if (unwrapped_next_index < empty_index) {
454         unwrapped_next_index += NumBuckets();
455       }
456       // Loop around if needed for our check.
457       size_t unwrapped_next_ideal_index = next_ideal_index;
458       if (unwrapped_next_ideal_index < empty_index) {
459         unwrapped_next_ideal_index += NumBuckets();
460       }
461       if (unwrapped_next_ideal_index <= empty_index ||
462           unwrapped_next_ideal_index > unwrapped_next_index) {
463         // If the target index isn't within our current range it must have been probed from before
464         // the empty index.
465         ElementForIndex(empty_index) = std::move(next_element);
466         filled = true;  // TODO: Optimize
467         empty_index = next_index;
468       }
469     }
470     --num_elements_;
471     // If we didn't fill the slot then we need go to the next non free slot.
472     if (!filled) {
473       ++it;
474     }
475     return it;
476   }
477 
478   // Find an element, returns end() if not found.
479   // Allows custom key (K) types, example of when this is useful:
480   // Set of Class* indexed by name, want to find a class with a name but can't allocate
481   // a temporary Class object in the heap for performance solution.
482   template <typename K>
find(const K & key)483   iterator find(const K& key) {
484     return FindWithHash(key, hashfn_(key));
485   }
486 
487   template <typename K>
find(const K & key)488   const_iterator find(const K& key) const {
489     return FindWithHash(key, hashfn_(key));
490   }
491 
492   template <typename K>
FindWithHash(const K & key,size_t hash)493   iterator FindWithHash(const K& key, size_t hash) {
494     return iterator(this, FindIndex(key, hash));
495   }
496 
497   template <typename K>
FindWithHash(const K & key,size_t hash)498   const_iterator FindWithHash(const K& key, size_t hash) const {
499     return const_iterator(this, FindIndex(key, hash));
500   }
501 
502   // Insert an element with hint.
503   // Note: The hint is not very useful for a HashSet<> unless there are many hash conflicts
504   // and in that case the use of HashSet<> itself should be reconsidered.
insert(const_iterator hint,const T & element)505   std::pair<iterator, bool> insert([[maybe_unused]] const_iterator hint, const T& element) {
506     return insert(element);
507   }
insert(const_iterator hint,T && element)508   std::pair<iterator, bool> insert([[maybe_unused]] const_iterator hint, T&& element) {
509     return insert(std::move(element));
510   }
511 
512   // Insert an element.
insert(const T & element)513   std::pair<iterator, bool> insert(const T& element) {
514     return InsertWithHash(element, hashfn_(element));
515   }
insert(T && element)516   std::pair<iterator, bool> insert(T&& element) {
517     return InsertWithHash(std::move(element), hashfn_(element));
518   }
519 
520   template <typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
InsertWithHash(U && element,size_t hash)521   std::pair<iterator, bool> InsertWithHash(U&& element, size_t hash) {
522     DCHECK_EQ(hash, hashfn_(element));
523     if (num_elements_ >= elements_until_expand_) {
524       Expand();
525       DCHECK_LT(num_elements_, elements_until_expand_);
526     }
527     bool find_failed = false;
528     auto find_fail_fn = [&](size_t index) ALWAYS_INLINE {
529       find_failed = true;
530       return index;
531     };
532     size_t index = FindIndexImpl(element, hash, find_fail_fn);
533     if (find_failed) {
534       data_[index] = std::forward<U>(element);
535       ++num_elements_;
536     }
537     return std::make_pair(iterator(this, index), find_failed);
538   }
539 
540   // Insert an element known not to be in the `HashSet<>`.
Put(const T & element)541   void Put(const T& element) {
542     return PutWithHash(element, hashfn_(element));
543   }
Put(T && element)544   void Put(T&& element) {
545     return PutWithHash(std::move(element), hashfn_(element));
546   }
547 
548   template <typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
PutWithHash(U && element,size_t hash)549   void PutWithHash(U&& element, size_t hash) {
550     DCHECK_EQ(hash, hashfn_(element));
551     if (num_elements_ >= elements_until_expand_) {
552       Expand();
553       DCHECK_LT(num_elements_, elements_until_expand_);
554     }
555     auto find_fail_fn = [](size_t index) ALWAYS_INLINE { return index; };
556     size_t index = FindIndexImpl</*kCanFind=*/ false>(element, hash, find_fail_fn);
557     data_[index] = std::forward<U>(element);
558     ++num_elements_;
559   }
560 
swap(HashSet & other)561   void swap(HashSet& other) {
562     // Use argument-dependent lookup with fall-back to std::swap() for function objects.
563     using std::swap;
564     swap(allocfn_, other.allocfn_);
565     swap(hashfn_, other.hashfn_);
566     swap(emptyfn_, other.emptyfn_);
567     swap(pred_, other.pred_);
568     std::swap(data_, other.data_);
569     std::swap(num_buckets_, other.num_buckets_);
570     std::swap(num_elements_, other.num_elements_);
571     std::swap(elements_until_expand_, other.elements_until_expand_);
572     std::swap(min_load_factor_, other.min_load_factor_);
573     std::swap(max_load_factor_, other.max_load_factor_);
574     std::swap(owns_data_, other.owns_data_);
575   }
576 
get_allocator()577   allocator_type get_allocator() const {
578     return allocfn_;
579   }
580 
ShrinkToMaximumLoad()581   void ShrinkToMaximumLoad() {
582     Resize(size() / max_load_factor_);
583   }
584 
585   // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
586   // set. No-op if the hash set is already large enough to do this.
reserve(size_t num_elements)587   void reserve(size_t num_elements) {
588     size_t num_buckets = num_elements / max_load_factor_;
589     // Deal with rounding errors. Add one for rounding.
590     while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
591       ++num_buckets;
592     }
593     if (num_buckets > NumBuckets()) {
594       Resize(num_buckets);
595     }
596   }
597 
598   // To distance that inserted elements were probed. Used for measuring how good hash functions
599   // are.
TotalProbeDistance()600   size_t TotalProbeDistance() const {
601     size_t total = 0;
602     for (size_t i = 0; i < NumBuckets(); ++i) {
603       const T& element = ElementForIndex(i);
604       if (!emptyfn_.IsEmpty(element)) {
605         size_t ideal_location = IndexForHash(hashfn_(element));
606         if (ideal_location > i) {
607           total += i + NumBuckets() - ideal_location;
608         } else {
609           total += i - ideal_location;
610         }
611       }
612     }
613     return total;
614   }
615 
616   // Calculate the current load factor and return it.
CalculateLoadFactor()617   double CalculateLoadFactor() const {
618     return static_cast<double>(size()) / static_cast<double>(NumBuckets());
619   }
620 
621   // Make sure that everything reinserts in the right spot. Returns the number of errors.
Verify()622   size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
623     size_t errors = 0;
624     for (size_t i = 0; i < num_buckets_; ++i) {
625       T& element = data_[i];
626       if (!emptyfn_.IsEmpty(element)) {
627         T temp;
628         emptyfn_.MakeEmpty(temp);
629         std::swap(temp, element);
630         size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
631         if (i != first_slot) {
632           LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
633           ++errors;
634         }
635         std::swap(temp, element);
636       }
637     }
638     return errors;
639   }
640 
GetMinLoadFactor()641   double GetMinLoadFactor() const {
642     return min_load_factor_;
643   }
644 
GetMaxLoadFactor()645   double GetMaxLoadFactor() const {
646     return max_load_factor_;
647   }
648 
649   // Change the load factor of the hash set. If the current load factor is greater than the max
650   // specified, then we resize the hash table storage.
SetLoadFactor(double min_load_factor,double max_load_factor)651   void SetLoadFactor(double min_load_factor, double max_load_factor) {
652     DCHECK_LT(min_load_factor, max_load_factor);
653     DCHECK_GT(min_load_factor, 0.0);
654     DCHECK_LT(max_load_factor, 1.0);
655     min_load_factor_ = min_load_factor;
656     max_load_factor_ = max_load_factor;
657     elements_until_expand_ = NumBuckets() * max_load_factor_;
658     // If the current load factor isn't in the range, then resize to the mean of the minimum and
659     // maximum load factor.
660     const double load_factor = CalculateLoadFactor();
661     if (load_factor > max_load_factor_) {
662       Resize(size() / ((min_load_factor_ + max_load_factor_) * 0.5));
663     }
664   }
665 
666   // The hash set expands when Size() reaches ElementsUntilExpand().
ElementsUntilExpand()667   size_t ElementsUntilExpand() const {
668     return elements_until_expand_;
669   }
670 
NumBuckets()671   size_t NumBuckets() const {
672     return num_buckets_;
673   }
674 
675  private:
ElementForIndex(size_t index)676   T& ElementForIndex(size_t index) {
677     DCHECK_LT(index, NumBuckets());
678     DCHECK(data_ != nullptr);
679     return data_[index];
680   }
681 
ElementForIndex(size_t index)682   const T& ElementForIndex(size_t index) const {
683     DCHECK_LT(index, NumBuckets());
684     DCHECK(data_ != nullptr);
685     return data_[index];
686   }
687 
IndexForHash(size_t hash)688   size_t IndexForHash(size_t hash) const {
689     // Protect against undefined behavior (division by zero).
690     if (UNLIKELY(num_buckets_ == 0)) {
691       return 0;
692     }
693     return hash % num_buckets_;
694   }
695 
NextIndex(size_t index)696   size_t NextIndex(size_t index) const {
697     if (UNLIKELY(++index >= num_buckets_)) {
698       DCHECK_EQ(index, NumBuckets());
699       return 0;
700     }
701     return index;
702   }
703 
704   // Find the hash table slot for an element, or return NumBuckets() if not found.
705   // This value for not found is important so that iterator(this, FindIndex(...)) == end().
706   template <typename K>
707   ALWAYS_INLINE
FindIndex(const K & element,size_t hash)708   size_t FindIndex(const K& element, size_t hash) const {
709     // Guard against failing to get an element for a non-existing index.
710     if (UNLIKELY(NumBuckets() == 0)) {
711       return 0;
712     }
713     auto fail_fn = [&]([[maybe_unused]] size_t index) ALWAYS_INLINE { return NumBuckets(); };
714     return FindIndexImpl(element, hash, fail_fn);
715   }
716 
717   // Find the hash table slot for an element, or return an empty slot index if not found.
718   template <bool kCanFind = true, typename K, typename FailFn>
719   ALWAYS_INLINE
FindIndexImpl(const K & element,size_t hash,FailFn fail_fn)720   size_t FindIndexImpl(const K& element, size_t hash, FailFn fail_fn) const {
721     DCHECK_NE(NumBuckets(), 0u);
722     DCHECK_EQ(hashfn_(element), hash);
723     size_t index = IndexForHash(hash);
724     while (true) {
725       const T& slot = ElementForIndex(index);
726       if (emptyfn_.IsEmpty(slot)) {
727         return fail_fn(index);
728       }
729       if (!kCanFind) {
730         DCHECK(!pred_(slot, element));
731       } else if (pred_(slot, element)) {
732         return index;
733       }
734       index = NextIndex(index);
735     }
736   }
737 
IsFreeSlot(size_t index)738   bool IsFreeSlot(size_t index) const {
739     return emptyfn_.IsEmpty(ElementForIndex(index));
740   }
741 
742   // Allocate a number of buckets.
AllocateStorage(size_t num_buckets)743   void AllocateStorage(size_t num_buckets) {
744     num_buckets_ = num_buckets;
745     data_ = allocfn_.allocate(num_buckets_);
746     owns_data_ = true;
747     for (size_t i = 0; i < num_buckets_; ++i) {
748       std::allocator_traits<allocator_type>::construct(allocfn_, std::addressof(data_[i]));
749       emptyfn_.MakeEmpty(data_[i]);
750     }
751   }
752 
DeallocateStorage()753   void DeallocateStorage() {
754     if (owns_data_) {
755       for (size_t i = 0; i < NumBuckets(); ++i) {
756         std::allocator_traits<allocator_type>::destroy(allocfn_, std::addressof(data_[i]));
757       }
758       if (data_ != nullptr) {
759         allocfn_.deallocate(data_, NumBuckets());
760       }
761       owns_data_ = false;
762     }
763     data_ = nullptr;
764     num_buckets_ = 0;
765   }
766 
767   // Expand the set based on the load factors.
Expand()768   void Expand() {
769     size_t min_index = static_cast<size_t>(size() / min_load_factor_);
770     // Resize based on the minimum load factor.
771     Resize(min_index);
772   }
773 
774   // Expand / shrink the table to the new specified size.
Resize(size_t new_size)775   void Resize(size_t new_size) {
776     if (new_size < kMinBuckets) {
777       new_size = kMinBuckets;
778     }
779     DCHECK_GE(new_size, size());
780     T* const old_data = data_;
781     size_t old_num_buckets = num_buckets_;
782     // Reinsert all of the old elements.
783     const bool owned_data = owns_data_;
784     AllocateStorage(new_size);
785     for (size_t i = 0; i < old_num_buckets; ++i) {
786       T& element = old_data[i];
787       if (!emptyfn_.IsEmpty(element)) {
788         data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
789       }
790       if (owned_data) {
791         std::allocator_traits<allocator_type>::destroy(allocfn_, std::addressof(element));
792       }
793     }
794     if (owned_data) {
795       allocfn_.deallocate(old_data, old_num_buckets);
796     }
797 
798     // When we hit elements_until_expand_, we are at the max load factor and must expand again.
799     elements_until_expand_ = NumBuckets() * max_load_factor_;
800   }
801 
FirstAvailableSlot(size_t index)802   ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
803     DCHECK_LT(index, NumBuckets());  // Don't try to get a slot out of range.
804     size_t non_empty_count = 0;
805     while (!emptyfn_.IsEmpty(data_[index])) {
806       index = NextIndex(index);
807       non_empty_count++;
808       DCHECK_LE(non_empty_count, NumBuckets());  // Don't loop forever.
809     }
810     return index;
811   }
812 
NextNonEmptySlot(size_t index)813   size_t NextNonEmptySlot(size_t index) const {
814     const size_t num_buckets = NumBuckets();
815     DCHECK_LT(index, num_buckets);
816     do {
817       ++index;
818     } while (index < num_buckets && IsFreeSlot(index));
819     return index;
820   }
821 
822   // Return new offset.
823   template <typename Elem>
WriteToBytes(uint8_t * ptr,size_t offset,Elem n)824   static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
825     DCHECK_ALIGNED(ptr + offset, sizeof(n));
826     if (ptr != nullptr) {
827       *reinterpret_cast<Elem*>(ptr + offset) = n;
828     }
829     return offset + sizeof(n);
830   }
831 
832   template <typename Elem>
ReadFromBytes(const uint8_t * ptr,size_t offset,Elem * out)833   static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
834     DCHECK(ptr != nullptr);
835     DCHECK_ALIGNED(ptr + offset, sizeof(*out));
836     *out = *reinterpret_cast<const Elem*>(ptr + offset);
837     return offset + sizeof(*out);
838   }
839 
840   Alloc allocfn_;  // Allocator function.
841   HashFn hashfn_;  // Hashing function.
842   EmptyFn emptyfn_;  // IsEmpty/SetEmpty function.
843   Pred pred_;  // Equals function.
844   size_t num_elements_;  // Number of inserted elements.
845   size_t num_buckets_;  // Number of hash table buckets.
846   size_t elements_until_expand_;  // Maximum number of elements until we expand the table.
847   bool owns_data_;  // If we own data_ and are responsible for freeing it.
848   T* data_;  // Backing storage.
849   double min_load_factor_;
850   double max_load_factor_;
851 
852   template <class Elem, class HashSetType>
853   friend class HashSetIterator;
854 
855   ART_FRIEND_TEST(InternTableTest, CrossHash);
856   ART_FRIEND_TEST(HashSetTest, Preallocated);
857 };
858 
859 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
swap(HashSet<T,EmptyFn,HashFn,Pred,Alloc> & lhs,HashSet<T,EmptyFn,HashFn,Pred,Alloc> & rhs)860 void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
861           HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
862   lhs.swap(rhs);
863 }
864 
865 }  // namespace art
866 
867 #endif  // ART_LIBARTBASE_BASE_HASH_SET_H_
868