1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_LIBARTBASE_BASE_HASH_SET_H_
18 #define ART_LIBARTBASE_BASE_HASH_SET_H_
19 
20 #include <stdint.h>
21 
22 #include <functional>
23 #include <iterator>
24 #include <memory>
25 #include <string>
26 #include <type_traits>
27 #include <utility>
28 
29 #include <android-base/logging.h>
30 
31 #include "base/data_hash.h"
32 #include "bit_utils.h"
33 #include "macros.h"
34 
35 namespace art {
36 
37 template <class Elem, class HashSetType>
38 class HashSetIterator : std::iterator<std::forward_iterator_tag, Elem> {
39  public:
40   HashSetIterator(const HashSetIterator&) = default;
41   HashSetIterator(HashSetIterator&&) = default;
HashSetIterator(HashSetType * hash_set,size_t index)42   HashSetIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {}
43 
44   // Conversion from iterator to const_iterator.
45   template <class OtherElem,
46             class OtherHashSetType,
47             typename = typename std::enable_if<
48                 std::is_same<Elem, const OtherElem>::value &&
49                 std::is_same<HashSetType, const OtherHashSetType>::value>::type>
HashSetIterator(const HashSetIterator<OtherElem,OtherHashSetType> & other)50   HashSetIterator(const HashSetIterator<OtherElem, OtherHashSetType>& other)
51       : index_(other.index_), hash_set_(other.hash_set_) {}
52 
53   HashSetIterator& operator=(const HashSetIterator&) = default;
54   HashSetIterator& operator=(HashSetIterator&&) = default;
55 
56   bool operator==(const HashSetIterator& other) const {
57     return hash_set_ == other.hash_set_ && this->index_ == other.index_;
58   }
59 
60   bool operator!=(const HashSetIterator& other) const {
61     return !(*this == other);
62   }
63 
64   HashSetIterator operator++() {  // Value after modification.
65     this->index_ = hash_set_->NextNonEmptySlot(index_);
66     return *this;
67   }
68 
69   HashSetIterator operator++(int) {
70     HashSetIterator temp = *this;
71     ++*this;
72     return temp;
73   }
74 
75   Elem& operator*() const {
76     DCHECK(!hash_set_->IsFreeSlot(this->index_));
77     return hash_set_->ElementForIndex(this->index_);
78   }
79 
80   Elem* operator->() const {
81     return &**this;
82   }
83 
84  private:
85   size_t index_;
86   HashSetType* hash_set_;
87 
88   template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
89   friend bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
90                          const HashSetIterator<Elem2, HashSetType2>& rhs);
91   template <class T, class EmptyFn, class HashFn, class Pred, class Alloc> friend class HashSet;
92   template <class OtherElem, class OtherHashSetType> friend class HashSetIterator;
93 };
94 
95 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
96 bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
97                 const HashSetIterator<Elem2, HashSetType2>& rhs) {
98   static_assert(
99       std::is_convertible<HashSetIterator<Elem1, HashSetType1>,
100                           HashSetIterator<Elem2, HashSetType2>>::value ||
101       std::is_convertible<HashSetIterator<Elem2, HashSetType2>,
102                           HashSetIterator<Elem1, HashSetType1>>::value, "Bad iterator types.");
103   DCHECK_EQ(lhs.hash_set_, rhs.hash_set_);
104   return lhs.index_ == rhs.index_;
105 }
106 
107 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
108 bool operator!=(const HashSetIterator<Elem1, HashSetType1>& lhs,
109                 const HashSetIterator<Elem2, HashSetType2>& rhs) {
110   return !(lhs == rhs);
111 }
112 
113 // Returns true if an item is empty.
114 template <class T>
115 class DefaultEmptyFn {
116  public:
MakeEmpty(T & item)117   void MakeEmpty(T& item) const {
118     item = T();
119   }
IsEmpty(const T & item)120   bool IsEmpty(const T& item) const {
121     return item == T();
122   }
123 };
124 
125 template <class T>
126 class DefaultEmptyFn<T*> {
127  public:
MakeEmpty(T * & item)128   void MakeEmpty(T*& item) const {
129     item = nullptr;
130   }
IsEmpty(T * const & item)131   bool IsEmpty(T* const& item) const {
132     return item == nullptr;
133   }
134 };
135 
136 template <class T>
137 using DefaultHashFn = typename std::conditional<std::is_same<T, std::string>::value,
138                                                 DataHash,
139                                                 std::hash<T>>::type;
140 
141 struct DefaultStringEquals {
142   // Allow comparison with anything that can be compared to std::string,
143   // for example std::string_view.
144   template <typename T>
operatorDefaultStringEquals145   bool operator()(const std::string& lhs, const T& rhs) const {
146     return lhs == rhs;
147   }
148 };
149 
150 template <class T>
151 using DefaultPred = typename std::conditional<std::is_same<T, std::string>::value,
152                                               DefaultStringEquals,
153                                               std::equal_to<T>>::type;
154 
155 // Low memory version of a hash set, uses less memory than std::unordered_multiset since elements
156 // aren't boxed. Uses linear probing to resolve collisions.
157 // EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
158 // TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
159 // and more complicated.
160 template <class T,
161           class EmptyFn = DefaultEmptyFn<T>,
162           class HashFn = DefaultHashFn<T>,
163           class Pred = DefaultPred<T>,
164           class Alloc = std::allocator<T>>
165 class HashSet {
166  public:
167   using value_type = T;
168   using allocator_type = Alloc;
169   using reference = T&;
170   using const_reference = const T&;
171   using pointer = T*;
172   using const_pointer = const T*;
173   using iterator = HashSetIterator<T, HashSet>;
174   using const_iterator = HashSetIterator<const T, const HashSet>;
175   using size_type = size_t;
176   using difference_type = ptrdiff_t;
177 
178   static constexpr double kDefaultMinLoadFactor = 0.4;
179   static constexpr double kDefaultMaxLoadFactor = 0.7;
180   static constexpr size_t kMinBuckets = 1000;
181 
182   // If we don't own the data, this will create a new array which owns the data.
clear()183   void clear() {
184     DeallocateStorage();
185     num_elements_ = 0;
186     elements_until_expand_ = 0;
187   }
188 
HashSet()189   HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
190 
HashSet(double min_load_factor,double max_load_factor)191   HashSet(double min_load_factor, double max_load_factor) noexcept
192       : num_elements_(0u),
193         num_buckets_(0u),
194         elements_until_expand_(0u),
195         owns_data_(false),
196         data_(nullptr),
197         min_load_factor_(min_load_factor),
198         max_load_factor_(max_load_factor) {
199     DCHECK_GT(min_load_factor, 0.0);
200     DCHECK_LT(max_load_factor, 1.0);
201   }
202 
HashSet(const allocator_type & alloc)203   explicit HashSet(const allocator_type& alloc) noexcept
204       : allocfn_(alloc),
205         hashfn_(),
206         emptyfn_(),
207         pred_(),
208         num_elements_(0u),
209         num_buckets_(0u),
210         elements_until_expand_(0u),
211         owns_data_(false),
212         data_(nullptr),
213         min_load_factor_(kDefaultMinLoadFactor),
214         max_load_factor_(kDefaultMaxLoadFactor) {
215   }
216 
HashSet(const HashSet & other)217   HashSet(const HashSet& other) noexcept
218       : allocfn_(other.allocfn_),
219         hashfn_(other.hashfn_),
220         emptyfn_(other.emptyfn_),
221         pred_(other.pred_),
222         num_elements_(other.num_elements_),
223         num_buckets_(0),
224         elements_until_expand_(other.elements_until_expand_),
225         owns_data_(false),
226         data_(nullptr),
227         min_load_factor_(other.min_load_factor_),
228         max_load_factor_(other.max_load_factor_) {
229     AllocateStorage(other.NumBuckets());
230     for (size_t i = 0; i < num_buckets_; ++i) {
231       ElementForIndex(i) = other.data_[i];
232     }
233   }
234 
235   // noexcept required so that the move constructor is used instead of copy constructor.
236   // b/27860101
HashSet(HashSet && other)237   HashSet(HashSet&& other) noexcept
238       : allocfn_(std::move(other.allocfn_)),
239         hashfn_(std::move(other.hashfn_)),
240         emptyfn_(std::move(other.emptyfn_)),
241         pred_(std::move(other.pred_)),
242         num_elements_(other.num_elements_),
243         num_buckets_(other.num_buckets_),
244         elements_until_expand_(other.elements_until_expand_),
245         owns_data_(other.owns_data_),
246         data_(other.data_),
247         min_load_factor_(other.min_load_factor_),
248         max_load_factor_(other.max_load_factor_) {
249     other.num_elements_ = 0u;
250     other.num_buckets_ = 0u;
251     other.elements_until_expand_ = 0u;
252     other.owns_data_ = false;
253     other.data_ = nullptr;
254   }
255 
256   // Construct from existing data.
257   // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
258   // passed in ptr_.
HashSet(const uint8_t * ptr,bool make_copy_of_data,size_t * read_count)259   HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
260     uint64_t temp;
261     size_t offset = 0;
262     offset = ReadFromBytes(ptr, offset, &temp);
263     num_elements_ = static_cast<uint64_t>(temp);
264     offset = ReadFromBytes(ptr, offset, &temp);
265     num_buckets_ = static_cast<uint64_t>(temp);
266     CHECK_LE(num_elements_, num_buckets_);
267     offset = ReadFromBytes(ptr, offset, &temp);
268     elements_until_expand_ = static_cast<uint64_t>(temp);
269     offset = ReadFromBytes(ptr, offset, &min_load_factor_);
270     offset = ReadFromBytes(ptr, offset, &max_load_factor_);
271     if (!make_copy_of_data) {
272       owns_data_ = false;
273       data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
274       offset += sizeof(*data_) * num_buckets_;
275     } else {
276       AllocateStorage(num_buckets_);
277       // Write elements, not that this may not be safe for cross compilation if the elements are
278       // pointer sized.
279       for (size_t i = 0; i < num_buckets_; ++i) {
280         offset = ReadFromBytes(ptr, offset, &data_[i]);
281       }
282     }
283     // Caller responsible for aligning.
284     *read_count = offset;
285   }
286 
287   // Returns how large the table is after being written. If target is null, then no writing happens
288   // but the size is still returned. Target must be 8 byte aligned.
WriteToMemory(uint8_t * ptr)289   size_t WriteToMemory(uint8_t* ptr) const {
290     size_t offset = 0;
291     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
292     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
293     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
294     offset = WriteToBytes(ptr, offset, min_load_factor_);
295     offset = WriteToBytes(ptr, offset, max_load_factor_);
296     // Write elements, not that this may not be safe for cross compilation if the elements are
297     // pointer sized.
298     for (size_t i = 0; i < num_buckets_; ++i) {
299       offset = WriteToBytes(ptr, offset, data_[i]);
300     }
301     // Caller responsible for aligning.
302     return offset;
303   }
304 
~HashSet()305   ~HashSet() {
306     DeallocateStorage();
307   }
308 
309   HashSet& operator=(HashSet&& other) noexcept {
310     HashSet(std::move(other)).swap(*this);  // NOLINT [runtime/explicit] [5]
311     return *this;
312   }
313 
314   HashSet& operator=(const HashSet& other) noexcept {
315     HashSet(other).swap(*this);  // NOLINT(runtime/explicit) - a case of lint gone mad.
316     return *this;
317   }
318 
319   // Lower case for c++11 for each.
begin()320   iterator begin() {
321     iterator ret(this, 0);
322     if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
323       ++ret;  // Skip all the empty slots.
324     }
325     return ret;
326   }
327 
328   // Lower case for c++11 for each. const version.
begin()329   const_iterator begin() const {
330     const_iterator ret(this, 0);
331     if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
332       ++ret;  // Skip all the empty slots.
333     }
334     return ret;
335   }
336 
337   // Lower case for c++11 for each.
end()338   iterator end() {
339     return iterator(this, NumBuckets());
340   }
341 
342   // Lower case for c++11 for each. const version.
end()343   const_iterator end() const {
344     return const_iterator(this, NumBuckets());
345   }
346 
size()347   size_t size() const {
348     return num_elements_;
349   }
350 
empty()351   bool empty() const {
352     return size() == 0;
353   }
354 
355   // Erase algorithm:
356   // Make an empty slot where the iterator is pointing.
357   // Scan forwards until we hit another empty slot.
358   // If an element in between doesn't rehash to the range from the current empty slot to the
359   // iterator. It must be before the empty slot, in that case we can move it to the empty slot
360   // and set the empty slot to be the location we just moved from.
361   // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
362   // element to its actual location/index.
363   // Note that since erase shuffles back elements, it may result in the same element being visited
364   // twice during HashSet iteration. This happens when an element already visited during iteration
365   // gets shuffled to the end of the bucket array.
erase(iterator it)366   iterator erase(iterator it) {
367     // empty_index is the index that will become empty.
368     size_t empty_index = it.index_;
369     DCHECK(!IsFreeSlot(empty_index));
370     size_t next_index = empty_index;
371     bool filled = false;  // True if we filled the empty index.
372     while (true) {
373       next_index = NextIndex(next_index);
374       T& next_element = ElementForIndex(next_index);
375       // If the next element is empty, we are done. Make sure to clear the current empty index.
376       if (emptyfn_.IsEmpty(next_element)) {
377         emptyfn_.MakeEmpty(ElementForIndex(empty_index));
378         break;
379       }
380       // Otherwise try to see if the next element can fill the current empty index.
381       const size_t next_hash = hashfn_(next_element);
382       // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
383       // nothing we can do.
384       size_t next_ideal_index = IndexForHash(next_hash);
385       // Loop around if needed for our check.
386       size_t unwrapped_next_index = next_index;
387       if (unwrapped_next_index < empty_index) {
388         unwrapped_next_index += NumBuckets();
389       }
390       // Loop around if needed for our check.
391       size_t unwrapped_next_ideal_index = next_ideal_index;
392       if (unwrapped_next_ideal_index < empty_index) {
393         unwrapped_next_ideal_index += NumBuckets();
394       }
395       if (unwrapped_next_ideal_index <= empty_index ||
396           unwrapped_next_ideal_index > unwrapped_next_index) {
397         // If the target index isn't within our current range it must have been probed from before
398         // the empty index.
399         ElementForIndex(empty_index) = std::move(next_element);
400         filled = true;  // TODO: Optimize
401         empty_index = next_index;
402       }
403     }
404     --num_elements_;
405     // If we didn't fill the slot then we need go to the next non free slot.
406     if (!filled) {
407       ++it;
408     }
409     return it;
410   }
411 
412   // Find an element, returns end() if not found.
413   // Allows custom key (K) types, example of when this is useful:
414   // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
415   // object in the heap for performance solution.
416   template <typename K>
find(const K & key)417   iterator find(const K& key) {
418     return FindWithHash(key, hashfn_(key));
419   }
420 
421   template <typename K>
find(const K & key)422   const_iterator find(const K& key) const {
423     return FindWithHash(key, hashfn_(key));
424   }
425 
426   template <typename K>
FindWithHash(const K & key,size_t hash)427   iterator FindWithHash(const K& key, size_t hash) {
428     return iterator(this, FindIndex(key, hash));
429   }
430 
431   template <typename K>
FindWithHash(const K & key,size_t hash)432   const_iterator FindWithHash(const K& key, size_t hash) const {
433     return const_iterator(this, FindIndex(key, hash));
434   }
435 
436   // Insert an element with hint, allows duplicates.
437   // Note: The hint is not very useful for a HashSet<> unless there are many hash conflicts
438   // and in that case the use of HashSet<> itself should be reconsidered.
insert(const_iterator hint ATTRIBUTE_UNUSED,const T & element)439   iterator insert(const_iterator hint ATTRIBUTE_UNUSED, const T& element) {
440     return insert(element);
441   }
insert(const_iterator hint ATTRIBUTE_UNUSED,T && element)442   iterator insert(const_iterator hint ATTRIBUTE_UNUSED, T&& element) {
443     return insert(std::move(element));
444   }
445 
446   // Insert an element, allows duplicates.
insert(const T & element)447   iterator insert(const T& element) {
448     return InsertWithHash(element, hashfn_(element));
449   }
insert(T && element)450   iterator insert(T&& element) {
451     return InsertWithHash(std::move(element), hashfn_(element));
452   }
453 
454   template <typename U, typename = typename std::enable_if<std::is_convertible<U, T>::value>::type>
InsertWithHash(U && element,size_t hash)455   iterator InsertWithHash(U&& element, size_t hash) {
456     DCHECK_EQ(hash, hashfn_(element));
457     if (num_elements_ >= elements_until_expand_) {
458       Expand();
459       DCHECK_LT(num_elements_, elements_until_expand_);
460     }
461     const size_t index = FirstAvailableSlot(IndexForHash(hash));
462     data_[index] = std::forward<U>(element);
463     ++num_elements_;
464     return iterator(this, index);
465   }
466 
swap(HashSet & other)467   void swap(HashSet& other) {
468     // Use argument-dependent lookup with fall-back to std::swap() for function objects.
469     using std::swap;
470     swap(allocfn_, other.allocfn_);
471     swap(hashfn_, other.hashfn_);
472     swap(emptyfn_, other.emptyfn_);
473     swap(pred_, other.pred_);
474     std::swap(data_, other.data_);
475     std::swap(num_buckets_, other.num_buckets_);
476     std::swap(num_elements_, other.num_elements_);
477     std::swap(elements_until_expand_, other.elements_until_expand_);
478     std::swap(min_load_factor_, other.min_load_factor_);
479     std::swap(max_load_factor_, other.max_load_factor_);
480     std::swap(owns_data_, other.owns_data_);
481   }
482 
get_allocator()483   allocator_type get_allocator() const {
484     return allocfn_;
485   }
486 
ShrinkToMaximumLoad()487   void ShrinkToMaximumLoad() {
488     Resize(size() / max_load_factor_);
489   }
490 
491   // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
492   // set. No-op if the hash set is already large enough to do this.
reserve(size_t num_elements)493   void reserve(size_t num_elements) {
494     size_t num_buckets = num_elements / max_load_factor_;
495     // Deal with rounding errors. Add one for rounding.
496     while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
497       ++num_buckets;
498     }
499     if (num_buckets > NumBuckets()) {
500       Resize(num_buckets);
501     }
502   }
503 
504   // To distance that inserted elements were probed. Used for measuring how good hash functions
505   // are.
TotalProbeDistance()506   size_t TotalProbeDistance() const {
507     size_t total = 0;
508     for (size_t i = 0; i < NumBuckets(); ++i) {
509       const T& element = ElementForIndex(i);
510       if (!emptyfn_.IsEmpty(element)) {
511         size_t ideal_location = IndexForHash(hashfn_(element));
512         if (ideal_location > i) {
513           total += i + NumBuckets() - ideal_location;
514         } else {
515           total += i - ideal_location;
516         }
517       }
518     }
519     return total;
520   }
521 
522   // Calculate the current load factor and return it.
CalculateLoadFactor()523   double CalculateLoadFactor() const {
524     return static_cast<double>(size()) / static_cast<double>(NumBuckets());
525   }
526 
527   // Make sure that everything reinserts in the right spot. Returns the number of errors.
Verify()528   size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
529     size_t errors = 0;
530     for (size_t i = 0; i < num_buckets_; ++i) {
531       T& element = data_[i];
532       if (!emptyfn_.IsEmpty(element)) {
533         T temp;
534         emptyfn_.MakeEmpty(temp);
535         std::swap(temp, element);
536         size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
537         if (i != first_slot) {
538           LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
539           ++errors;
540         }
541         std::swap(temp, element);
542       }
543     }
544     return errors;
545   }
546 
GetMinLoadFactor()547   double GetMinLoadFactor() const {
548     return min_load_factor_;
549   }
550 
GetMaxLoadFactor()551   double GetMaxLoadFactor() const {
552     return max_load_factor_;
553   }
554 
555   // Change the load factor of the hash set. If the current load factor is greater than the max
556   // specified, then we resize the hash table storage.
SetLoadFactor(double min_load_factor,double max_load_factor)557   void SetLoadFactor(double min_load_factor, double max_load_factor) {
558     DCHECK_LT(min_load_factor, max_load_factor);
559     DCHECK_GT(min_load_factor, 0.0);
560     DCHECK_LT(max_load_factor, 1.0);
561     min_load_factor_ = min_load_factor;
562     max_load_factor_ = max_load_factor;
563     elements_until_expand_ = NumBuckets() * max_load_factor_;
564     // If the current load factor isn't in the range, then resize to the mean of the minimum and
565     // maximum load factor.
566     const double load_factor = CalculateLoadFactor();
567     if (load_factor > max_load_factor_) {
568       Resize(size() / ((min_load_factor_ + max_load_factor_) * 0.5));
569     }
570   }
571 
572   // The hash set expands when Size() reaches ElementsUntilExpand().
ElementsUntilExpand()573   size_t ElementsUntilExpand() const {
574     return elements_until_expand_;
575   }
576 
NumBuckets()577   size_t NumBuckets() const {
578     return num_buckets_;
579   }
580 
581  private:
ElementForIndex(size_t index)582   T& ElementForIndex(size_t index) {
583     DCHECK_LT(index, NumBuckets());
584     DCHECK(data_ != nullptr);
585     return data_[index];
586   }
587 
ElementForIndex(size_t index)588   const T& ElementForIndex(size_t index) const {
589     DCHECK_LT(index, NumBuckets());
590     DCHECK(data_ != nullptr);
591     return data_[index];
592   }
593 
IndexForHash(size_t hash)594   size_t IndexForHash(size_t hash) const {
595     // Protect against undefined behavior (division by zero).
596     if (UNLIKELY(num_buckets_ == 0)) {
597       return 0;
598     }
599     return hash % num_buckets_;
600   }
601 
NextIndex(size_t index)602   size_t NextIndex(size_t index) const {
603     if (UNLIKELY(++index >= num_buckets_)) {
604       DCHECK_EQ(index, NumBuckets());
605       return 0;
606     }
607     return index;
608   }
609 
610   // Find the hash table slot for an element, or return NumBuckets() if not found.
611   // This value for not found is important so that iterator(this, FindIndex(...)) == end().
612   template <typename K>
FindIndex(const K & element,size_t hash)613   size_t FindIndex(const K& element, size_t hash) const {
614     // Guard against failing to get an element for a non-existing index.
615     if (UNLIKELY(NumBuckets() == 0)) {
616       return 0;
617     }
618     DCHECK_EQ(hashfn_(element), hash);
619     size_t index = IndexForHash(hash);
620     while (true) {
621       const T& slot = ElementForIndex(index);
622       if (emptyfn_.IsEmpty(slot)) {
623         return NumBuckets();
624       }
625       if (pred_(slot, element)) {
626         return index;
627       }
628       index = NextIndex(index);
629     }
630   }
631 
IsFreeSlot(size_t index)632   bool IsFreeSlot(size_t index) const {
633     return emptyfn_.IsEmpty(ElementForIndex(index));
634   }
635 
636   // Allocate a number of buckets.
AllocateStorage(size_t num_buckets)637   void AllocateStorage(size_t num_buckets) {
638     num_buckets_ = num_buckets;
639     data_ = allocfn_.allocate(num_buckets_);
640     owns_data_ = true;
641     for (size_t i = 0; i < num_buckets_; ++i) {
642       allocfn_.construct(allocfn_.address(data_[i]));
643       emptyfn_.MakeEmpty(data_[i]);
644     }
645   }
646 
DeallocateStorage()647   void DeallocateStorage() {
648     if (owns_data_) {
649       for (size_t i = 0; i < NumBuckets(); ++i) {
650         allocfn_.destroy(allocfn_.address(data_[i]));
651       }
652       if (data_ != nullptr) {
653         allocfn_.deallocate(data_, NumBuckets());
654       }
655       owns_data_ = false;
656     }
657     data_ = nullptr;
658     num_buckets_ = 0;
659   }
660 
661   // Expand the set based on the load factors.
Expand()662   void Expand() {
663     size_t min_index = static_cast<size_t>(size() / min_load_factor_);
664     // Resize based on the minimum load factor.
665     Resize(min_index);
666   }
667 
668   // Expand / shrink the table to the new specified size.
Resize(size_t new_size)669   void Resize(size_t new_size) {
670     if (new_size < kMinBuckets) {
671       new_size = kMinBuckets;
672     }
673     DCHECK_GE(new_size, size());
674     T* const old_data = data_;
675     size_t old_num_buckets = num_buckets_;
676     // Reinsert all of the old elements.
677     const bool owned_data = owns_data_;
678     AllocateStorage(new_size);
679     for (size_t i = 0; i < old_num_buckets; ++i) {
680       T& element = old_data[i];
681       if (!emptyfn_.IsEmpty(element)) {
682         data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
683       }
684       if (owned_data) {
685         allocfn_.destroy(allocfn_.address(element));
686       }
687     }
688     if (owned_data) {
689       allocfn_.deallocate(old_data, old_num_buckets);
690     }
691 
692     // When we hit elements_until_expand_, we are at the max load factor and must expand again.
693     elements_until_expand_ = NumBuckets() * max_load_factor_;
694   }
695 
FirstAvailableSlot(size_t index)696   ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
697     DCHECK_LT(index, NumBuckets());  // Don't try to get a slot out of range.
698     size_t non_empty_count = 0;
699     while (!emptyfn_.IsEmpty(data_[index])) {
700       index = NextIndex(index);
701       non_empty_count++;
702       DCHECK_LE(non_empty_count, NumBuckets());  // Don't loop forever.
703     }
704     return index;
705   }
706 
NextNonEmptySlot(size_t index)707   size_t NextNonEmptySlot(size_t index) const {
708     const size_t num_buckets = NumBuckets();
709     DCHECK_LT(index, num_buckets);
710     do {
711       ++index;
712     } while (index < num_buckets && IsFreeSlot(index));
713     return index;
714   }
715 
716   // Return new offset.
717   template <typename Elem>
WriteToBytes(uint8_t * ptr,size_t offset,Elem n)718   static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
719     DCHECK_ALIGNED(ptr + offset, sizeof(n));
720     if (ptr != nullptr) {
721       *reinterpret_cast<Elem*>(ptr + offset) = n;
722     }
723     return offset + sizeof(n);
724   }
725 
726   template <typename Elem>
ReadFromBytes(const uint8_t * ptr,size_t offset,Elem * out)727   static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
728     DCHECK(ptr != nullptr);
729     DCHECK_ALIGNED(ptr + offset, sizeof(*out));
730     *out = *reinterpret_cast<const Elem*>(ptr + offset);
731     return offset + sizeof(*out);
732   }
733 
734   Alloc allocfn_;  // Allocator function.
735   HashFn hashfn_;  // Hashing function.
736   EmptyFn emptyfn_;  // IsEmpty/SetEmpty function.
737   Pred pred_;  // Equals function.
738   size_t num_elements_;  // Number of inserted elements.
739   size_t num_buckets_;  // Number of hash table buckets.
740   size_t elements_until_expand_;  // Maximum number of elements until we expand the table.
741   bool owns_data_;  // If we own data_ and are responsible for freeing it.
742   T* data_;  // Backing storage.
743   double min_load_factor_;
744   double max_load_factor_;
745 
746   template <class Elem, class HashSetType>
747   friend class HashSetIterator;
748 
749   ART_FRIEND_TEST(InternTableTest, CrossHash);
750 };
751 
752 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
swap(HashSet<T,EmptyFn,HashFn,Pred,Alloc> & lhs,HashSet<T,EmptyFn,HashFn,Pred,Alloc> & rhs)753 void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
754           HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
755   lhs.swap(rhs);
756 }
757 
758 }  // namespace art
759 
760 #endif  // ART_LIBARTBASE_BASE_HASH_SET_H_
761