1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
18 #define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
19 
20 #include "dedupe_set.h"
21 
22 #include <algorithm>
23 #include <inttypes.h>
24 #include <unordered_map>
25 
26 #include "base/mutex.h"
27 #include "base/hash_set.h"
28 #include "base/stl_util.h"
29 #include "base/stringprintf.h"
30 #include "base/time_utils.h"
31 
32 namespace art {
33 
34 template <typename InKey,
35           typename StoreKey,
36           typename Alloc,
37           typename HashType,
38           typename HashFunc,
39           HashType kShard>
40 struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats {
41   size_t collision_sum = 0u;
42   size_t collision_max = 0u;
43   size_t total_probe_distance = 0u;
44   size_t total_size = 0u;
45 };
46 
47 template <typename InKey,
48           typename StoreKey,
49           typename Alloc,
50           typename HashType,
51           typename HashFunc,
52           HashType kShard>
53 class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard {
54  public:
55   Shard(const Alloc& alloc, const std::string& lock_name)
56       : alloc_(alloc),
57         lock_name_(lock_name),
58         lock_(lock_name_.c_str()),
59         keys_() {
60   }
61 
62   ~Shard() {
63     for (const HashedKey<StoreKey>& key : keys_) {
64       DCHECK(key.Key() != nullptr);
65       alloc_.Destroy(key.Key());
66     }
67   }
68 
69   const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) {
70     MutexLock lock(self, lock_);
71     HashedKey<InKey> hashed_in_key(hash, &in_key);
72     auto it = keys_.Find(hashed_in_key);
73     if (it != keys_.end()) {
74       DCHECK(it->Key() != nullptr);
75       return it->Key();
76     }
77     const StoreKey* store_key = alloc_.Copy(in_key);
78     keys_.Insert(HashedKey<StoreKey> { hash, store_key });
79     return store_key;
80   }
81 
82   void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
83     // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
84     // for bookkeeping while collecting the stats.
85     std::unordered_map<HashType, size_t> stats;
86     {
87       MutexLock lock(self, lock_);
88       // Note: The total_probe_distance will be updated with the current state.
89       // It may have been higher before a re-hash.
90       global_stats->total_probe_distance += keys_.TotalProbeDistance();
91       global_stats->total_size += keys_.Size();
92       for (const HashedKey<StoreKey>& key : keys_) {
93         auto it = stats.find(key.Hash());
94         if (it == stats.end()) {
95           stats.insert({key.Hash(), 1u});
96         } else {
97           ++it->second;
98         }
99       }
100     }
101     for (const auto& entry : stats) {
102       size_t number_of_entries = entry.second;
103       if (number_of_entries > 1u) {
104         global_stats->collision_sum += number_of_entries - 1u;
105         global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries);
106       }
107     }
108   }
109 
110  private:
111   template <typename T>
112   class HashedKey {
113    public:
114     HashedKey() : hash_(0u), key_(nullptr) { }
115     HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { }
116 
117     size_t Hash() const {
118       return hash_;
119     }
120 
121     const T* Key() const {
122       return key_;
123     }
124 
125     bool IsEmpty() const {
126       return Key() == nullptr;
127     }
128 
129     void MakeEmpty() {
130       key_ = nullptr;
131     }
132 
133    private:
134     size_t hash_;
135     const T* key_;
136   };
137 
138   class ShardEmptyFn {
139    public:
140     bool IsEmpty(const HashedKey<StoreKey>& key) const {
141       return key.IsEmpty();
142     }
143 
144     void MakeEmpty(HashedKey<StoreKey>& key) {
145       key.MakeEmpty();
146     }
147   };
148 
149   struct ShardHashFn {
150     template <typename T>
151     size_t operator()(const HashedKey<T>& key) const {
152       return key.Hash();
153     }
154   };
155 
156   struct ShardPred {
157     typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type
158     operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const {
159       DCHECK(lhs.Key() != nullptr);
160       DCHECK(rhs.Key() != nullptr);
161       // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers.
162       return lhs.Key() == rhs.Key();
163     }
164 
165     template <typename LeftT, typename RightT>
166     bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const {
167       DCHECK(lhs.Key() != nullptr);
168       DCHECK(rhs.Key() != nullptr);
169       return lhs.Hash() == rhs.Hash() &&
170           lhs.Key()->size() == rhs.Key()->size() &&
171           std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin());
172     }
173   };
174 
175   Alloc alloc_;
176   const std::string lock_name_;
177   Mutex lock_;
178   HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_);
179 };
180 
181 template <typename InKey,
182           typename StoreKey,
183           typename Alloc,
184           typename HashType,
185           typename HashFunc,
186           HashType kShard>
187 const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add(
188     Thread* self, const InKey& key) {
189   uint64_t hash_start;
190   if (kIsDebugBuild) {
191     hash_start = NanoTime();
192   }
193   HashType raw_hash = HashFunc()(key);
194   if (kIsDebugBuild) {
195     uint64_t hash_end = NanoTime();
196     hash_time_ += hash_end - hash_start;
197   }
198   HashType shard_hash = raw_hash / kShard;
199   HashType shard_bin = raw_hash % kShard;
200   return shards_[shard_bin]->Add(self, shard_hash, key);
201 }
202 
203 template <typename InKey,
204           typename StoreKey,
205           typename Alloc,
206           typename HashType,
207           typename HashFunc,
208           HashType kShard>
209 DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name,
210                                                                          const Alloc& alloc)
211     : hash_time_(0) {
212   for (HashType i = 0; i < kShard; ++i) {
213     std::ostringstream oss;
214     oss << set_name << " lock " << i;
215     shards_[i].reset(new Shard(alloc, oss.str()));
216   }
217 }
218 
219 template <typename InKey,
220           typename StoreKey,
221           typename Alloc,
222           typename HashType,
223           typename HashFunc,
224           HashType kShard>
225 DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() {
226   // Everything done by member destructors.
227 }
228 
229 template <typename InKey,
230           typename StoreKey,
231           typename Alloc,
232           typename HashType,
233           typename HashFunc,
234           HashType kShard>
235 std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
236     Thread* self) const {
237   Stats stats;
238   for (HashType shard = 0; shard < kShard; ++shard) {
239     shards_[shard]->UpdateStats(self, &stats);
240   }
241   return StringPrintf("%zu collisions, %zu max hash collisions, "
242                       "%zu/%zu probe distance, %" PRIu64 " ns hash time",
243                       stats.collision_sum,
244                       stats.collision_max,
245                       stats.total_probe_distance,
246                       stats.total_size,
247                       hash_time_);
248 }
249 
250 
251 }  // namespace art
252 
253 #endif  // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
254