1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_RUNTIME_BASE_HASH_SET_H_
18 #define ART_RUNTIME_BASE_HASH_SET_H_
19 
20 #include <functional>
21 #include <memory>
22 #include <stdint.h>
23 #include <utility>
24 
25 #include "logging.h"
26 
27 namespace art {
28 
29 // Returns true if an item is empty.
30 template <class T>
31 class DefaultEmptyFn {
32  public:
MakeEmpty(T & item)33   void MakeEmpty(T& item) const {
34     item = T();
35   }
IsEmpty(const T & item)36   bool IsEmpty(const T& item) const {
37     return item == T();
38   }
39 };
40 
41 template <class T>
42 class DefaultEmptyFn<T*> {
43  public:
MakeEmpty(T * & item)44   void MakeEmpty(T*& item) const {
45     item = nullptr;
46   }
IsEmpty(const T * & item)47   bool IsEmpty(const T*& item) const {
48     return item == nullptr;
49   }
50 };
51 
52 // Low memory version of a hash set, uses less memory than std::unordered_set since elements aren't
53 // boxed. Uses linear probing.
54 // EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item)
55 template <class T, class EmptyFn = DefaultEmptyFn<T>, class HashFn = std::hash<T>,
56     class Pred = std::equal_to<T>, class Alloc = std::allocator<T>>
57 class HashSet {
58  public:
59   static constexpr double kDefaultMinLoadFactor = 0.5;
60   static constexpr double kDefaultMaxLoadFactor = 0.9;
61   static constexpr size_t kMinBuckets = 1000;
62 
63   class Iterator {
64    public:
65     Iterator(const Iterator&) = default;
Iterator(HashSet * hash_set,size_t index)66     Iterator(HashSet* hash_set, size_t index) : hash_set_(hash_set), index_(index) {
67     }
68     Iterator& operator=(const Iterator&) = default;
69     bool operator==(const Iterator& other) const {
70       return hash_set_ == other.hash_set_ && index_ == other.index_;
71     }
72     bool operator!=(const Iterator& other) const {
73       return !(*this == other);
74     }
75     Iterator operator++() {  // Value after modification.
76       index_ = NextNonEmptySlot(index_);
77       return *this;
78     }
79     Iterator operator++(int) {
80       Iterator temp = *this;
81       index_ = NextNonEmptySlot(index_);
82       return temp;
83     }
84     T& operator*() {
85       DCHECK(!hash_set_->IsFreeSlot(GetIndex()));
86       return hash_set_->ElementForIndex(index_);
87     }
88     const T& operator*() const {
89       DCHECK(!hash_set_->IsFreeSlot(GetIndex()));
90       return hash_set_->ElementForIndex(index_);
91     }
92     T* operator->() {
93       return &**this;
94     }
95     const T* operator->() const {
96       return &**this;
97     }
98     // TODO: Operator -- --(int)
99 
100    private:
101     HashSet* hash_set_;
102     size_t index_;
103 
GetIndex()104     size_t GetIndex() const {
105       return index_;
106     }
NextNonEmptySlot(size_t index)107     size_t NextNonEmptySlot(size_t index) const {
108       const size_t num_buckets = hash_set_->NumBuckets();
109       DCHECK_LT(index, num_buckets);
110       do {
111         ++index;
112       } while (index < num_buckets && hash_set_->IsFreeSlot(index));
113       return index;
114     }
115 
116     friend class HashSet;
117   };
118 
Clear()119   void Clear() {
120     DeallocateStorage();
121     AllocateStorage(1);
122     num_elements_ = 0;
123     elements_until_expand_ = 0;
124   }
HashSet()125   HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr),
126       min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) {
127     Clear();
128   }
HashSet(const HashSet & other)129   HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
130     *this = other;
131   }
HashSet(HashSet && other)132   HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
133     *this = std::move(other);
134   }
~HashSet()135   ~HashSet() {
136     DeallocateStorage();
137   }
138   HashSet& operator=(HashSet&& other) {
139     std::swap(data_, other.data_);
140     std::swap(num_buckets_, other.num_buckets_);
141     std::swap(num_elements_, other.num_elements_);
142     std::swap(elements_until_expand_, other.elements_until_expand_);
143     std::swap(min_load_factor_, other.min_load_factor_);
144     std::swap(max_load_factor_, other.max_load_factor_);
145     return *this;
146   }
147   HashSet& operator=(const HashSet& other) {
148     DeallocateStorage();
149     AllocateStorage(other.NumBuckets());
150     for (size_t i = 0; i < num_buckets_; ++i) {
151       ElementForIndex(i) = other.data_[i];
152     }
153     num_elements_ = other.num_elements_;
154     elements_until_expand_ = other.elements_until_expand_;
155     min_load_factor_ = other.min_load_factor_;
156     max_load_factor_ = other.max_load_factor_;
157     return *this;
158   }
159   // Lower case for c++11 for each.
begin()160   Iterator begin() {
161     Iterator ret(this, 0);
162     if (num_buckets_ != 0 && IsFreeSlot(ret.GetIndex())) {
163       ++ret;  // Skip all the empty slots.
164     }
165     return ret;
166   }
167   // Lower case for c++11 for each.
end()168   Iterator end() {
169     return Iterator(this, NumBuckets());
170   }
Empty()171   bool Empty() {
172     return begin() == end();
173   }
174   // Erase algorithm:
175   // Make an empty slot where the iterator is pointing.
176   // Scan fowards until we hit another empty slot.
177   // If an element inbetween doesn't rehash to the range from the current empty slot to the
178   // iterator. It must be before the empty slot, in that case we can move it to the empty slot
179   // and set the empty slot to be the location we just moved from.
180   // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
181   // element to its actual location/index.
Erase(Iterator it)182   Iterator Erase(Iterator it) {
183     // empty_index is the index that will become empty.
184     size_t empty_index = it.GetIndex();
185     DCHECK(!IsFreeSlot(empty_index));
186     size_t next_index = empty_index;
187     bool filled = false;  // True if we filled the empty index.
188     while (true) {
189       next_index = NextIndex(next_index);
190       T& next_element = ElementForIndex(next_index);
191       // If the next element is empty, we are done. Make sure to clear the current empty index.
192       if (emptyfn_.IsEmpty(next_element)) {
193         emptyfn_.MakeEmpty(ElementForIndex(empty_index));
194         break;
195       }
196       // Otherwise try to see if the next element can fill the current empty index.
197       const size_t next_hash = hashfn_(next_element);
198       // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
199       // nothing we can do.
200       size_t next_ideal_index = IndexForHash(next_hash);
201       // Loop around if needed for our check.
202       size_t unwrapped_next_index = next_index;
203       if (unwrapped_next_index < empty_index) {
204         unwrapped_next_index += NumBuckets();
205       }
206       // Loop around if needed for our check.
207       size_t unwrapped_next_ideal_index = next_ideal_index;
208       if (unwrapped_next_ideal_index < empty_index) {
209         unwrapped_next_ideal_index += NumBuckets();
210       }
211       if (unwrapped_next_ideal_index <= empty_index ||
212           unwrapped_next_ideal_index > unwrapped_next_index) {
213         // If the target index isn't within our current range it must have been probed from before
214         // the empty index.
215         ElementForIndex(empty_index) = std::move(next_element);
216         filled = true;  // TODO: Optimize
217         empty_index = next_index;
218       }
219     }
220     --num_elements_;
221     // If we didn't fill the slot then we need go to the next non free slot.
222     if (!filled) {
223       ++it;
224     }
225     return it;
226   }
227   // Find an element, returns end() if not found.
228   // Allows custom K types, example of when this is useful.
229   // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
230   // object in the heap for performance solution.
231   template <typename K>
Find(const K & element)232   Iterator Find(const K& element) {
233     return FindWithHash(element, hashfn_(element));
234   }
235   template <typename K>
FindWithHash(const K & element,size_t hash)236   Iterator FindWithHash(const K& element, size_t hash) {
237     DCHECK_EQ(hashfn_(element), hash);
238     size_t index = IndexForHash(hash);
239     while (true) {
240       T& slot = ElementForIndex(index);
241       if (emptyfn_.IsEmpty(slot)) {
242         return end();
243       }
244       if (pred_(slot, element)) {
245         return Iterator(this, index);
246       }
247       index = NextIndex(index);
248     }
249   }
250   // Insert an element, allows duplicates.
Insert(const T & element)251   void Insert(const T& element) {
252     InsertWithHash(element, hashfn_(element));
253   }
InsertWithHash(const T & element,size_t hash)254   void InsertWithHash(const T& element, size_t hash) {
255     DCHECK_EQ(hash, hashfn_(element));
256     if (num_elements_ >= elements_until_expand_) {
257       Expand();
258       DCHECK_LT(num_elements_, elements_until_expand_);
259     }
260     const size_t index = FirstAvailableSlot(IndexForHash(hash));
261     data_[index] = element;
262     ++num_elements_;
263   }
Size()264   size_t Size() const {
265     return num_elements_;
266   }
ShrinkToMaximumLoad()267   void ShrinkToMaximumLoad() {
268     Resize(Size() / max_load_factor_);
269   }
270   // To distance that inserted elements were probed. Used for measuring how good hash functions
271   // are.
TotalProbeDistance()272   size_t TotalProbeDistance() const {
273     size_t total = 0;
274     for (size_t i = 0; i < NumBuckets(); ++i) {
275       const T& element = ElementForIndex(i);
276       if (!emptyfn_.IsEmpty(element)) {
277         size_t ideal_location = IndexForHash(hashfn_(element));
278         if (ideal_location > i) {
279           total += i + NumBuckets() - ideal_location;
280         } else {
281           total += i - ideal_location;
282         }
283       }
284     }
285     return total;
286   }
287   // Calculate the current load factor and return it.
CalculateLoadFactor()288   double CalculateLoadFactor() const {
289     return static_cast<double>(Size()) / static_cast<double>(NumBuckets());
290   }
291   // Make sure that everything reinserts in the right spot. Returns the number of errors.
Verify()292   size_t Verify() {
293     size_t errors = 0;
294     for (size_t i = 0; i < num_buckets_; ++i) {
295       T& element = data_[i];
296       if (!emptyfn_.IsEmpty(element)) {
297         T temp;
298         emptyfn_.MakeEmpty(temp);
299         std::swap(temp, element);
300         size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
301         if (i != first_slot) {
302           LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
303           ++errors;
304         }
305         std::swap(temp, element);
306       }
307     }
308     return errors;
309   }
310 
311  private:
ElementForIndex(size_t index)312   T& ElementForIndex(size_t index) {
313     DCHECK_LT(index, NumBuckets());
314     DCHECK(data_ != nullptr);
315     return data_[index];
316   }
ElementForIndex(size_t index)317   const T& ElementForIndex(size_t index) const {
318     DCHECK_LT(index, NumBuckets());
319     DCHECK(data_ != nullptr);
320     return data_[index];
321   }
IndexForHash(size_t hash)322   size_t IndexForHash(size_t hash) const {
323     return hash % num_buckets_;
324   }
NextIndex(size_t index)325   size_t NextIndex(size_t index) const {
326     if (UNLIKELY(++index >= num_buckets_)) {
327       DCHECK_EQ(index, NumBuckets());
328       return 0;
329     }
330     return index;
331   }
IsFreeSlot(size_t index)332   bool IsFreeSlot(size_t index) const {
333     return emptyfn_.IsEmpty(ElementForIndex(index));
334   }
NumBuckets()335   size_t NumBuckets() const {
336     return num_buckets_;
337   }
338   // Allocate a number of buckets.
AllocateStorage(size_t num_buckets)339   void AllocateStorage(size_t num_buckets) {
340     num_buckets_ = num_buckets;
341     data_ = allocfn_.allocate(num_buckets_);
342     for (size_t i = 0; i < num_buckets_; ++i) {
343       allocfn_.construct(allocfn_.address(data_[i]));
344       emptyfn_.MakeEmpty(data_[i]);
345     }
346   }
DeallocateStorage()347   void DeallocateStorage() {
348     if (num_buckets_ != 0) {
349       for (size_t i = 0; i < NumBuckets(); ++i) {
350         allocfn_.destroy(allocfn_.address(data_[i]));
351       }
352       allocfn_.deallocate(data_, NumBuckets());
353       data_ = nullptr;
354       num_buckets_ = 0;
355     }
356   }
357   // Expand the set based on the load factors.
Expand()358   void Expand() {
359     size_t min_index = static_cast<size_t>(Size() / min_load_factor_);
360     if (min_index < kMinBuckets) {
361       min_index = kMinBuckets;
362     }
363     // Resize based on the minimum load factor.
364     Resize(min_index);
365     // When we hit elements_until_expand_, we are at the max load factor and must expand again.
366     elements_until_expand_ = NumBuckets() * max_load_factor_;
367   }
368   // Expand / shrink the table to the new specified size.
Resize(size_t new_size)369   void Resize(size_t new_size) {
370     DCHECK_GE(new_size, Size());
371     T* old_data = data_;
372     size_t old_num_buckets = num_buckets_;
373     // Reinsert all of the old elements.
374     AllocateStorage(new_size);
375     for (size_t i = 0; i < old_num_buckets; ++i) {
376       T& element = old_data[i];
377       if (!emptyfn_.IsEmpty(element)) {
378         data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
379       }
380       allocfn_.destroy(allocfn_.address(element));
381     }
382     allocfn_.deallocate(old_data, old_num_buckets);
383   }
FirstAvailableSlot(size_t index)384   ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
385     while (!emptyfn_.IsEmpty(data_[index])) {
386       index = NextIndex(index);
387     }
388     return index;
389   }
390 
391   Alloc allocfn_;  // Allocator function.
392   HashFn hashfn_;  // Hashing function.
393   EmptyFn emptyfn_;  // IsEmpty/SetEmpty function.
394   Pred pred_;  // Equals function.
395   size_t num_elements_;  // Number of inserted elements.
396   size_t num_buckets_;  // Number of hash table buckets.
397   size_t elements_until_expand_;  // Maxmimum number of elements until we expand the table.
398   T* data_;  // Backing storage.
399   double min_load_factor_;
400   double max_load_factor_;
401 
402   friend class Iterator;
403 };
404 
405 }  // namespace art
406 
407 #endif  // ART_RUNTIME_BASE_HASH_SET_H_
408