1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "hash_set.h"
18 
19 #include <forward_list>
20 #include <map>
21 #include <sstream>
22 #include <string>
23 #include <string_view>
24 #include <unordered_set>
25 #include <vector>
26 
27 #include <gtest/gtest.h>
28 
29 #include "hash_map.h"
30 
31 namespace art {
32 
33 struct IsEmptyFnString {
MakeEmptyart::IsEmptyFnString34   void MakeEmpty(std::string& item) const {
35     item.clear();
36   }
IsEmptyart::IsEmptyFnString37   bool IsEmpty(const std::string& item) const {
38     return item.empty();
39   }
40 };
41 
42 class HashSetTest : public testing::Test {
43  public:
HashSetTest()44   HashSetTest() : seed_(97421), unique_number_(0) {
45   }
RandomString(size_t len)46   std::string RandomString(size_t len) {
47     std::ostringstream oss;
48     for (size_t i = 0; i < len; ++i) {
49       oss << static_cast<char>('A' + PRand() % 64);
50     }
51     static_assert(' ' < 'A', "space must be less than a");
52     oss << " " << unique_number_++;  // Relies on ' ' < 'A'
53     return oss.str();
54   }
SetSeed(size_t seed)55   void SetSeed(size_t seed) {
56     seed_ = seed;
57   }
PRand()58   size_t PRand() {  // Pseudo random.
59     seed_ = seed_ * 1103515245 + 12345;
60     return seed_;
61   }
62 
63  private:
64   size_t seed_;
65   size_t unique_number_;
66 };
67 
TEST_F(HashSetTest,TestSmoke)68 TEST_F(HashSetTest, TestSmoke) {
69   HashSet<std::string, IsEmptyFnString> hash_set;
70   const std::string test_string = "hello world 1234";
71   ASSERT_TRUE(hash_set.empty());
72   ASSERT_EQ(hash_set.size(), 0U);
73   hash_set.insert(test_string);
74   auto it = hash_set.find(test_string);
75   ASSERT_EQ(*it, test_string);
76   auto after_it = hash_set.erase(it);
77   ASSERT_TRUE(after_it == hash_set.end());
78   ASSERT_TRUE(hash_set.empty());
79   ASSERT_EQ(hash_set.size(), 0U);
80   it = hash_set.find(test_string);
81   ASSERT_TRUE(it == hash_set.end());
82 }
83 
TEST_F(HashSetTest,TestInsertAndErase)84 TEST_F(HashSetTest, TestInsertAndErase) {
85   HashSet<std::string, IsEmptyFnString> hash_set;
86   static constexpr size_t count = 1000;
87   std::vector<std::string> strings;
88   for (size_t i = 0; i < count; ++i) {
89     // Insert a bunch of elements and make sure we can find them.
90     strings.push_back(RandomString(10));
91     hash_set.insert(strings[i]);
92     auto it = hash_set.find(strings[i]);
93     ASSERT_TRUE(it != hash_set.end());
94     ASSERT_EQ(*it, strings[i]);
95   }
96   ASSERT_EQ(strings.size(), hash_set.size());
97   // Try to erase the odd strings.
98   for (size_t i = 1; i < count; i += 2) {
99     auto it = hash_set.find(strings[i]);
100     ASSERT_TRUE(it != hash_set.end());
101     ASSERT_EQ(*it, strings[i]);
102     hash_set.erase(it);
103   }
104   // Test removed.
105   for (size_t i = 1; i < count; i += 2) {
106     auto it = hash_set.find(strings[i]);
107     ASSERT_TRUE(it == hash_set.end());
108   }
109   for (size_t i = 0; i < count; i += 2) {
110     auto it = hash_set.find(strings[i]);
111     ASSERT_TRUE(it != hash_set.end());
112     ASSERT_EQ(*it, strings[i]);
113   }
114 }
115 
TEST_F(HashSetTest,TestIterator)116 TEST_F(HashSetTest, TestIterator) {
117   HashSet<std::string, IsEmptyFnString> hash_set;
118   ASSERT_TRUE(hash_set.begin() == hash_set.end());
119   static constexpr size_t count = 1000;
120   std::vector<std::string> strings;
121   for (size_t i = 0; i < count; ++i) {
122     // Insert a bunch of elements and make sure we can find them.
123     strings.push_back(RandomString(10));
124     hash_set.insert(strings[i]);
125   }
126   // Make sure we visit each string exactly once.
127   std::map<std::string, size_t> found_count;
128   for (const std::string& s : hash_set) {
129     ++found_count[s];
130   }
131   for (size_t i = 0; i < count; ++i) {
132     ASSERT_EQ(found_count[strings[i]], 1U);
133   }
134   found_count.clear();
135   // Remove all the elements with iterator erase.
136   for (auto it = hash_set.begin(); it != hash_set.end();) {
137     ++found_count[*it];
138     it = hash_set.erase(it);
139     ASSERT_EQ(hash_set.Verify(), 0U);
140   }
141   for (size_t i = 0; i < count; ++i) {
142     ASSERT_EQ(found_count[strings[i]], 1U);
143   }
144 }
145 
TEST_F(HashSetTest,TestSwap)146 TEST_F(HashSetTest, TestSwap) {
147   HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb;
148   std::vector<std::string> strings;
149   static constexpr size_t count = 1000;
150   for (size_t i = 0; i < count; ++i) {
151     strings.push_back(RandomString(10));
152     hash_seta.insert(strings[i]);
153   }
154   std::swap(hash_seta, hash_setb);
155   hash_seta.insert("TEST");
156   hash_setb.insert("TEST2");
157   for (size_t i = 0; i < count; ++i) {
158     strings.push_back(RandomString(10));
159     hash_seta.insert(strings[i]);
160   }
161 }
162 
TEST_F(HashSetTest,TestShrink)163 TEST_F(HashSetTest, TestShrink) {
164   HashSet<std::string, IsEmptyFnString> hash_set;
165   std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
166   for (size_t i = 0; i < strings.size(); ++i) {
167     // Insert some strings into the beginning of our hash set to establish an initial size
168     hash_set.insert(strings[i]);
169   }
170 
171   hash_set.ShrinkToMaximumLoad();
172   const double initial_load = hash_set.CalculateLoadFactor();
173 
174   // Insert a bunch of random strings to guarantee that we grow the capacity.
175   std::vector<std::string> random_strings;
176   static constexpr size_t count = 1000;
177   for (size_t i = 0; i < count; ++i) {
178     random_strings.push_back(RandomString(10));
179     hash_set.insert(random_strings[i]);
180   }
181 
182   // Erase all the extra strings which guarantees that our load factor will be really bad.
183   for (size_t i = 0; i < count; ++i) {
184     hash_set.erase(hash_set.find(random_strings[i]));
185   }
186 
187   const double bad_load = hash_set.CalculateLoadFactor();
188   EXPECT_GT(initial_load, bad_load);
189 
190   // Shrink again, the load factor should be good again.
191   hash_set.ShrinkToMaximumLoad();
192   EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
193 
194   // Make sure all the initial elements we had are still there
195   for (const std::string& initial_string : strings) {
196     EXPECT_NE(hash_set.end(), hash_set.find(initial_string))
197         << "expected to find " << initial_string;
198   }
199 }
200 
TEST_F(HashSetTest,TestLoadFactor)201 TEST_F(HashSetTest, TestLoadFactor) {
202   HashSet<std::string, IsEmptyFnString> hash_set;
203   static constexpr size_t kStringCount = 1000;
204   static constexpr double kEpsilon = 0.01;
205   for (size_t i = 0; i < kStringCount; ++i) {
206     hash_set.insert(RandomString(i % 10 + 1));
207   }
208   // Check that changing the load factor resizes the table to be within the target range.
209   EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor());
210   EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
211   hash_set.SetLoadFactor(0.1, 0.3);
212   EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor());
213   EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor());
214   EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
215   hash_set.SetLoadFactor(0.6, 0.8);
216   EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
217 }
218 
TEST_F(HashSetTest,TestStress)219 TEST_F(HashSetTest, TestStress) {
220   HashSet<std::string, IsEmptyFnString> hash_set;
221   std::unordered_set<std::string> std_set;
222   std::vector<std::string> strings;
223   static constexpr size_t string_count = 2000;
224   static constexpr size_t operations = 100000;
225   static constexpr size_t target_size = 5000;
226   for (size_t i = 0; i < string_count; ++i) {
227     strings.push_back(RandomString(i % 10 + 1));
228   }
229   const size_t seed = time(nullptr);
230   SetSeed(seed);
231   LOG(INFO) << "Starting stress test with seed " << seed;
232   for (size_t i = 0; i < operations; ++i) {
233     ASSERT_EQ(hash_set.size(), std_set.size());
234     size_t delta = std::abs(static_cast<ssize_t>(target_size) -
235                             static_cast<ssize_t>(hash_set.size()));
236     size_t n = PRand();
237     if (n % target_size == 0) {
238       hash_set.clear();
239       std_set.clear();
240       ASSERT_TRUE(hash_set.empty());
241       ASSERT_TRUE(std_set.empty());
242     } else  if (n % target_size < delta) {
243       // Skew towards adding elements until we are at the desired size.
244       const std::string& s = strings[PRand() % string_count];
245       hash_set.insert(s);
246       std_set.insert(s);
247       ASSERT_EQ(*hash_set.find(s), *std_set.find(s));
248     } else {
249       const std::string& s = strings[PRand() % string_count];
250       auto it1 = hash_set.find(s);
251       auto it2 = std_set.find(s);
252       ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end());
253       if (it1 != hash_set.end()) {
254         ASSERT_EQ(*it1, *it2);
255         hash_set.erase(it1);
256         std_set.erase(it2);
257       }
258     }
259   }
260 }
261 
262 struct IsEmptyStringPair {
MakeEmptyart::IsEmptyStringPair263   void MakeEmpty(std::pair<std::string, int>& pair) const {
264     pair.first.clear();
265   }
IsEmptyart::IsEmptyStringPair266   bool IsEmpty(const std::pair<std::string, int>& pair) const {
267     return pair.first.empty();
268   }
269 };
270 
TEST_F(HashSetTest,TestHashMap)271 TEST_F(HashSetTest, TestHashMap) {
272   HashMap<std::string, int, IsEmptyStringPair> hash_map;
273   hash_map.insert(std::make_pair(std::string("abcd"), 123));
274   hash_map.insert(std::make_pair(std::string("abcd"), 124));
275   hash_map.insert(std::make_pair(std::string("bags"), 444));
276   auto it = hash_map.find(std::string("abcd"));
277   ASSERT_EQ(it->second, 123);
278   hash_map.erase(it);
279   it = hash_map.find(std::string("abcd"));
280   ASSERT_EQ(it, hash_map.end());
281 }
282 
283 struct IsEmptyFnVectorInt {
MakeEmptyart::IsEmptyFnVectorInt284   void MakeEmpty(std::vector<int>& item) const {
285     item.clear();
286   }
IsEmptyart::IsEmptyFnVectorInt287   bool IsEmpty(const std::vector<int>& item) const {
288     return item.empty();
289   }
290 };
291 
292 template <typename T>
HashIntSequence(T begin,T end)293 size_t HashIntSequence(T begin, T end) {
294   size_t hash = 0;
295   for (auto iter = begin; iter != end; ++iter) {
296     hash = hash * 2 + *iter;
297   }
298   return hash;
299 }
300 
301 struct VectorIntHashEquals {
operator ()art::VectorIntHashEquals302   std::size_t operator()(const std::vector<int>& item) const {
303     return HashIntSequence(item.begin(), item.end());
304   }
305 
operator ()art::VectorIntHashEquals306   std::size_t operator()(const std::forward_list<int>& item) const {
307     return HashIntSequence(item.begin(), item.end());
308   }
309 
operator ()art::VectorIntHashEquals310   bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
311     return a == b;
312   }
313 
operator ()art::VectorIntHashEquals314   bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const {
315     auto aiter = a.begin();
316     auto biter = b.begin();
317     while (aiter != a.end() && biter != b.end()) {
318       if (*aiter != *biter) {
319         return false;
320       }
321       aiter++;
322       biter++;
323     }
324     return (aiter == a.end() && biter == b.end());
325   }
326 };
327 
TEST_F(HashSetTest,TestLookupByAlternateKeyType)328 TEST_F(HashSetTest, TestLookupByAlternateKeyType) {
329   HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set;
330   hash_set.insert(std::vector<int>({1, 2, 3, 4}));
331   hash_set.insert(std::vector<int>({4, 2}));
332   ASSERT_EQ(hash_set.end(), hash_set.find(std::vector<int>({1, 1, 1, 1})));
333   ASSERT_NE(hash_set.end(), hash_set.find(std::vector<int>({1, 2, 3, 4})));
334   ASSERT_EQ(hash_set.end(), hash_set.find(std::forward_list<int>({1, 1, 1, 1})));
335   ASSERT_NE(hash_set.end(), hash_set.find(std::forward_list<int>({1, 2, 3, 4})));
336 }
337 
TEST_F(HashSetTest,TestReserve)338 TEST_F(HashSetTest, TestReserve) {
339   HashSet<std::string, IsEmptyFnString> hash_set;
340   std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096};
341   for (size_t size : sizes) {
342     hash_set.reserve(size);
343     const size_t buckets_before = hash_set.NumBuckets();
344     // Check that we expanded enough.
345     CHECK_GE(hash_set.ElementsUntilExpand(), size);
346     // Try inserting elements until we are at our reserve size and ensure the hash set did not
347     // expand.
348     while (hash_set.size() < size) {
349       hash_set.insert(std::to_string(hash_set.size()));
350     }
351     CHECK_EQ(hash_set.NumBuckets(), buckets_before);
352   }
353   // Check the behaviour for shrinking, it does not necessarily resize down.
354   constexpr size_t size = 100;
355   hash_set.reserve(size);
356   CHECK_GE(hash_set.ElementsUntilExpand(), size);
357 }
358 
TEST_F(HashSetTest,IteratorConversion)359 TEST_F(HashSetTest, IteratorConversion) {
360   const char* test_string = "test string";
361   HashSet<std::string> hash_set;
362   HashSet<std::string>::iterator it = hash_set.insert(test_string).first;
363   HashSet<std::string>::const_iterator cit = it;
364   ASSERT_TRUE(it == cit);
365   ASSERT_EQ(*it, *cit);
366 }
367 
TEST_F(HashSetTest,StringSearchStringView)368 TEST_F(HashSetTest, StringSearchStringView) {
369   const char* test_string = "test string";
370   HashSet<std::string> hash_set;
371   HashSet<std::string>::iterator insert_pos = hash_set.insert(test_string).first;
372   HashSet<std::string>::iterator it = hash_set.find(std::string_view(test_string));
373   ASSERT_TRUE(it == insert_pos);
374 }
375 
TEST_F(HashSetTest,DoubleInsert)376 TEST_F(HashSetTest, DoubleInsert) {
377   const char* test_string = "test string";
378   HashSet<std::string> hash_set;
379   hash_set.insert(test_string);
380   hash_set.insert(test_string);
381   ASSERT_EQ(1u, hash_set.size());
382 }
383 
TEST_F(HashSetTest,Preallocated)384 TEST_F(HashSetTest, Preallocated) {
385   static const size_t kBufferSize = 64;
386   uint32_t buffer[kBufferSize];
387   HashSet<uint32_t> hash_set(buffer, kBufferSize);
388   size_t max_without_resize = kBufferSize * hash_set.GetMaxLoadFactor();
389   for (size_t i = 0; i != max_without_resize; ++i) {
390     hash_set.insert(i);
391   }
392   ASSERT_FALSE(hash_set.owns_data_);
393   hash_set.insert(max_without_resize);
394   ASSERT_TRUE(hash_set.owns_data_);
395 }
396 
397 class SmallIndexEmptyFn {
398  public:
MakeEmpty(uint16_t & item) const399   void MakeEmpty(uint16_t& item) const {
400     item = std::numeric_limits<uint16_t>::max();
401   }
IsEmpty(const uint16_t & item) const402   bool IsEmpty(const uint16_t& item) const {
403     return item == std::numeric_limits<uint16_t>::max();
404   }
405 };
406 
407 class StatefulHashFn {
408  public:
StatefulHashFn(const std::vector<std::string> * strings)409   explicit StatefulHashFn(const std::vector<std::string>* strings)
410       : strings_(strings) {}
411 
operator ()(const uint16_t & index) const412   size_t operator() (const uint16_t& index) const {
413     CHECK_LT(index, strings_->size());
414     return (*this)((*strings_)[index]);
415   }
416 
operator ()(std::string_view s) const417   size_t operator() (std::string_view s) const {
418     return DataHash()(s);
419   }
420 
421  private:
422   const std::vector<std::string>* strings_;
423 };
424 
425 class StatefulPred {
426  public:
StatefulPred(const std::vector<std::string> * strings)427   explicit StatefulPred(const std::vector<std::string>* strings)
428       : strings_(strings) {}
429 
operator ()(const uint16_t & lhs,const uint16_t & rhs) const430   bool operator() (const uint16_t& lhs, const uint16_t& rhs) const {
431     CHECK_LT(rhs, strings_->size());
432     return (*this)(lhs, (*strings_)[rhs]);
433   }
434 
operator ()(const uint16_t & lhs,std::string_view rhs) const435   bool operator() (const uint16_t& lhs, std::string_view rhs) const {
436     CHECK_LT(lhs, strings_->size());
437     return (*strings_)[lhs] == rhs;
438   }
439 
440  private:
441   const std::vector<std::string>* strings_;
442 };
443 
TEST_F(HashSetTest,StatefulHashSet)444 TEST_F(HashSetTest, StatefulHashSet) {
445   std::vector<std::string> strings{
446       "duplicate",
447       "a",
448       "b",
449       "xyz",
450       "___",
451       "123",
452       "placeholder",
453       "duplicate"
454   };
455   const size_t duplicateFirstIndex = 0;
456   const size_t duplicateSecondIndex = strings.size() - 1u;
457   const size_t otherIndex = 1u;
458 
459   StatefulHashFn hashfn(&strings);
460   StatefulPred pred(&strings);
461   HashSet<uint16_t, SmallIndexEmptyFn, StatefulHashFn, StatefulPred> hash_set(hashfn, pred);
462   for (size_t index = 0, size = strings.size(); index != size; ++index) {
463     bool inserted = hash_set.insert(index).second;
464     ASSERT_EQ(index != duplicateSecondIndex, inserted) << index;
465   }
466 
467   // Check search by string.
468   for (size_t index = 0, size = strings.size(); index != size; ++index) {
469     auto it = hash_set.find(strings[index]);
470     ASSERT_FALSE(it == hash_set.end());
471     ASSERT_EQ(index == duplicateSecondIndex ? duplicateFirstIndex : index, *it) << index;
472   }
473   ASSERT_TRUE(hash_set.find("missing") == hash_set.end());
474 
475   // Check search by index.
476   for (size_t index = 0, size = strings.size(); index != size; ++index) {
477     auto it = hash_set.find(index);
478     ASSERT_FALSE(it == hash_set.end());
479     ASSERT_EQ(index == duplicateSecondIndex ? duplicateFirstIndex : index, *it) << index;
480   }
481   // Note: Searching for index >= strings.size() is not supported by Stateful{HashFn,Pred}.
482 
483   // Test removal and search by missing index.
484   auto remove_it = hash_set.find(otherIndex);
485   ASSERT_FALSE(remove_it == hash_set.end());
486   hash_set.erase(remove_it);
487   auto search_it = hash_set.find(otherIndex);
488   ASSERT_TRUE(search_it == hash_set.end());
489 }
490 
491 }  // namespace art
492