1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_DEDUPE_SET_H_
18 #define ART_COMPILER_UTILS_DEDUPE_SET_H_
19 
20 #include <algorithm>
21 #include <inttypes.h>
22 #include <memory>
23 #include <set>
24 #include <string>
25 
26 #include "base/mutex.h"
27 #include "base/stl_util.h"
28 #include "base/stringprintf.h"
29 #include "utils/swap_space.h"
30 
31 namespace art {
32 
33 // A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the
34 // Add method. The data-structure is thread-safe through the use of internal locks, it also
35 // supports the lock being sharded.
36 template <typename InKey, typename StoreKey, typename HashType, typename HashFunc,
37           HashType kShard = 1>
38 class DedupeSet {
39   typedef std::pair<HashType, const InKey*> HashedInKey;
40   struct HashedKey {
41     StoreKey* store_ptr;
42     union {
43       HashType store_hash;        // Valid if store_ptr != nullptr.
44       const HashedInKey* in_key;  // Valid if store_ptr == nullptr.
45     };
46   };
47 
48   class Comparator {
49    public:
operator()50     bool operator()(const HashedKey& a, const HashedKey& b) const {
51       HashType a_hash = (a.store_ptr != nullptr) ? a.store_hash : a.in_key->first;
52       HashType b_hash = (b.store_ptr != nullptr) ? b.store_hash : b.in_key->first;
53       if (a_hash != b_hash) {
54         return a_hash < b_hash;
55       }
56       if (a.store_ptr != nullptr && b.store_ptr != nullptr) {
57         return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
58                                             b.store_ptr->begin(), b.store_ptr->end());
59       } else if (a.store_ptr != nullptr && b.store_ptr == nullptr) {
60         return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
61                                             b.in_key->second->begin(), b.in_key->second->end());
62       } else if (a.store_ptr == nullptr && b.store_ptr != nullptr) {
63         return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
64                                             b.store_ptr->begin(), b.store_ptr->end());
65       } else {
66         return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
67                                             b.in_key->second->begin(), b.in_key->second->end());
68       }
69     }
70   };
71 
72  public:
Add(Thread * self,const InKey & key)73   StoreKey* Add(Thread* self, const InKey& key) {
74     uint64_t hash_start;
75     if (kIsDebugBuild) {
76       hash_start = NanoTime();
77     }
78     HashType raw_hash = HashFunc()(key);
79     if (kIsDebugBuild) {
80       uint64_t hash_end = NanoTime();
81       hash_time_ += hash_end - hash_start;
82     }
83     HashType shard_hash = raw_hash / kShard;
84     HashType shard_bin = raw_hash % kShard;
85     HashedInKey hashed_in_key(shard_hash, &key);
86     HashedKey hashed_key;
87     hashed_key.store_ptr = nullptr;
88     hashed_key.in_key = &hashed_in_key;
89     MutexLock lock(self, *lock_[shard_bin]);
90     auto it = keys_[shard_bin].find(hashed_key);
91     if (it != keys_[shard_bin].end()) {
92       DCHECK(it->store_ptr != nullptr);
93       return it->store_ptr;
94     }
95     hashed_key.store_ptr = CreateStoreKey(key);
96     hashed_key.store_hash = shard_hash;
97     keys_[shard_bin].insert(hashed_key);
98     return hashed_key.store_ptr;
99   }
100 
DedupeSet(const char * set_name,SwapAllocator<void> & alloc)101   explicit DedupeSet(const char* set_name, SwapAllocator<void>& alloc)
102       : allocator_(alloc), hash_time_(0) {
103     for (HashType i = 0; i < kShard; ++i) {
104       std::ostringstream oss;
105       oss << set_name << " lock " << i;
106       lock_name_[i] = oss.str();
107       lock_[i].reset(new Mutex(lock_name_[i].c_str()));
108     }
109   }
110 
~DedupeSet()111   ~DedupeSet() {
112     // Have to manually free all pointers.
113     for (auto& shard : keys_) {
114       for (const auto& hashed_key : shard) {
115         DCHECK(hashed_key.store_ptr != nullptr);
116         DeleteStoreKey(hashed_key.store_ptr);
117       }
118     }
119   }
120 
DumpStats()121   std::string DumpStats() const {
122     size_t collision_sum = 0;
123     size_t collision_max = 0;
124     for (HashType shard = 0; shard < kShard; ++shard) {
125       HashType last_hash = 0;
126       size_t collision_cur_max = 0;
127       for (const HashedKey& key : keys_[shard]) {
128         DCHECK(key.store_ptr != nullptr);
129         if (key.store_hash == last_hash) {
130           collision_cur_max++;
131           if (collision_cur_max > 1) {
132             collision_sum++;
133             if (collision_cur_max > collision_max) {
134               collision_max = collision_cur_max;
135             }
136           }
137         } else {
138           collision_cur_max = 1;
139           last_hash = key.store_hash;
140         }
141       }
142     }
143     return StringPrintf("%zu collisions, %zu max bucket size, %" PRIu64 " ns hash time",
144                         collision_sum, collision_max, hash_time_);
145   }
146 
147  private:
CreateStoreKey(const InKey & key)148   StoreKey* CreateStoreKey(const InKey& key) {
149     StoreKey* ret = allocator_.allocate(1);
150     allocator_.construct(ret, key.begin(), key.end(), allocator_);
151     return ret;
152   }
153 
DeleteStoreKey(StoreKey * key)154   void DeleteStoreKey(StoreKey* key) {
155     SwapAllocator<StoreKey> alloc(allocator_);
156     alloc.destroy(key);
157     alloc.deallocate(key, 1);
158   }
159 
160   std::string lock_name_[kShard];
161   std::unique_ptr<Mutex> lock_[kShard];
162   std::set<HashedKey, Comparator> keys_[kShard];
163   SwapAllocator<StoreKey> allocator_;
164   uint64_t hash_time_;
165 
166   DISALLOW_COPY_AND_ASSIGN(DedupeSet);
167 };
168 
169 }  // namespace art
170 
171 #endif  // ART_COMPILER_UTILS_DEDUPE_SET_H_
172