1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
18 #define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
19 
20 #include "dedupe_set.h"
21 
22 #include <inttypes.h>
23 
24 #include <algorithm>
25 #include <unordered_map>
26 
27 #include "android-base/stringprintf.h"
28 
29 #include "base/hash_set.h"
30 #include "base/mutex.h"
31 #include "base/stl_util.h"
32 #include "base/time_utils.h"
33 
34 namespace art {
35 
36 template <typename InKey,
37           typename StoreKey,
38           typename Alloc,
39           typename HashType,
40           typename HashFunc,
41           HashType kShard>
42 struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats {
43   size_t collision_sum = 0u;
44   size_t collision_max = 0u;
45   size_t total_probe_distance = 0u;
46   size_t total_size = 0u;
47 };
48 
49 template <typename InKey,
50           typename StoreKey,
51           typename Alloc,
52           typename HashType,
53           typename HashFunc,
54           HashType kShard>
55 class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard {
56  public:
57   Shard(const Alloc& alloc, const std::string& lock_name)
58       : alloc_(alloc),
59         lock_name_(lock_name),
60         lock_(lock_name_.c_str()),
61         keys_() {
62   }
63 
64   ~Shard() {
65     for (const HashedKey<StoreKey>& key : keys_) {
66       DCHECK(key.Key() != nullptr);
67       alloc_.Destroy(key.Key());
68     }
69   }
70 
71   const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) {
72     MutexLock lock(self, lock_);
73     HashedKey<InKey> hashed_in_key(hash, &in_key);
74     auto it = keys_.find(hashed_in_key);
75     if (it != keys_.end()) {
76       DCHECK(it->Key() != nullptr);
77       return it->Key();
78     }
79     const StoreKey* store_key = alloc_.Copy(in_key);
80     keys_.insert(HashedKey<StoreKey> { hash, store_key });
81     return store_key;
82   }
83 
84   void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
85     // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
86     // for bookkeeping while collecting the stats.
87     std::unordered_map<HashType, size_t> stats;
88     {
89       MutexLock lock(self, lock_);
90       // Note: The total_probe_distance will be updated with the current state.
91       // It may have been higher before a re-hash.
92       global_stats->total_probe_distance += keys_.TotalProbeDistance();
93       global_stats->total_size += keys_.size();
94       for (const HashedKey<StoreKey>& key : keys_) {
95         auto it = stats.find(key.Hash());
96         if (it == stats.end()) {
97           stats.insert({key.Hash(), 1u});
98         } else {
99           ++it->second;
100         }
101       }
102     }
103     for (const auto& entry : stats) {
104       size_t number_of_entries = entry.second;
105       if (number_of_entries > 1u) {
106         global_stats->collision_sum += number_of_entries - 1u;
107         global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries);
108       }
109     }
110   }
111 
112  private:
113   template <typename T>
114   class HashedKey {
115    public:
116     HashedKey() : hash_(0u), key_(nullptr) { }
117     HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { }
118 
119     size_t Hash() const {
120       return hash_;
121     }
122 
123     const T* Key() const {
124       return key_;
125     }
126 
127     bool IsEmpty() const {
128       return Key() == nullptr;
129     }
130 
131     void MakeEmpty() {
132       key_ = nullptr;
133     }
134 
135    private:
136     size_t hash_;
137     const T* key_;
138   };
139 
140   class ShardEmptyFn {
141    public:
142     bool IsEmpty(const HashedKey<StoreKey>& key) const {
143       return key.IsEmpty();
144     }
145 
146     void MakeEmpty(HashedKey<StoreKey>& key) {
147       key.MakeEmpty();
148     }
149   };
150 
151   struct ShardHashFn {
152     template <typename T>
153     size_t operator()(const HashedKey<T>& key) const {
154       return key.Hash();
155     }
156   };
157 
158   struct ShardPred {
159     typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type
160     operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const {
161       DCHECK(lhs.Key() != nullptr);
162       DCHECK(rhs.Key() != nullptr);
163       // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers.
164       return lhs.Key() == rhs.Key();
165     }
166 
167     template <typename LeftT, typename RightT>
168     bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const {
169       DCHECK(lhs.Key() != nullptr);
170       DCHECK(rhs.Key() != nullptr);
171       return lhs.Hash() == rhs.Hash() &&
172           lhs.Key()->size() == rhs.Key()->size() &&
173           std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin());
174     }
175   };
176 
177   Alloc alloc_;
178   const std::string lock_name_;
179   Mutex lock_;
180   HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_);
181 };
182 
183 template <typename InKey,
184           typename StoreKey,
185           typename Alloc,
186           typename HashType,
187           typename HashFunc,
188           HashType kShard>
189 const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add(
190     Thread* self, const InKey& key) {
191   uint64_t hash_start;
192   if (kIsDebugBuild) {
193     hash_start = NanoTime();
194   }
195   HashType raw_hash = HashFunc()(key);
196   if (kIsDebugBuild) {
197     uint64_t hash_end = NanoTime();
198     hash_time_ += hash_end - hash_start;
199   }
200   HashType shard_hash = raw_hash / kShard;
201   HashType shard_bin = raw_hash % kShard;
202   return shards_[shard_bin]->Add(self, shard_hash, key);
203 }
204 
205 template <typename InKey,
206           typename StoreKey,
207           typename Alloc,
208           typename HashType,
209           typename HashFunc,
210           HashType kShard>
211 DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name,
212                                                                          const Alloc& alloc)
213     : hash_time_(0) {
214   for (HashType i = 0; i < kShard; ++i) {
215     std::ostringstream oss;
216     oss << set_name << " lock " << i;
217     shards_[i].reset(new Shard(alloc, oss.str()));
218   }
219 }
220 
221 template <typename InKey,
222           typename StoreKey,
223           typename Alloc,
224           typename HashType,
225           typename HashFunc,
226           HashType kShard>
227 DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() {
228   // Everything done by member destructors.
229 }
230 
231 template <typename InKey,
232           typename StoreKey,
233           typename Alloc,
234           typename HashType,
235           typename HashFunc,
236           HashType kShard>
237 std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
238     Thread* self) const {
239   Stats stats;
240   for (HashType shard = 0; shard < kShard; ++shard) {
241     shards_[shard]->UpdateStats(self, &stats);
242   }
243   return android::base::StringPrintf("%zu collisions, %zu max hash collisions, "
244                                      "%zu/%zu probe distance, %" PRIu64 " ns hash time",
245                                      stats.collision_sum,
246                                      stats.collision_max,
247                                      stats.total_probe_distance,
248                                      stats.total_size,
249                                      hash_time_);
250 }
251 
252 
253 }  // namespace art
254 
255 #endif  // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
256