1 /*
2  * Copyright 2014 Google Inc. All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SEMISTATIC_MAP_TEMPLATES_H
18 #define SEMISTATIC_MAP_TEMPLATES_H
19 
20 #if !IN_FRUIT_CPP_FILE
21 #error "Fruit .template.h file included in non-cpp file."
22 #endif
23 
24 #include <algorithm>
25 #include <cassert>
26 #include <chrono>
27 #include <random>
28 #include <utility>
29 // This include is not necessary for GCC/Clang, but it's necessary for MSVC.
30 #include <numeric>
31 
32 #include <fruit/impl/data_structures/semistatic_map.h>
33 
34 #include <fruit/impl/data_structures/arena_allocator.h>
35 #include <fruit/impl/data_structures/fixed_size_vector.templates.h>
36 #include <fruit/impl/fruit_assert.h>
37 
38 namespace fruit {
39 namespace impl {
40 
41 template <typename Key, typename Value>
42 template <typename Iter>
SemistaticMap(Iter values_begin,Iter values_end,std::size_t num_values,MemoryPool & memory_pool)43 SemistaticMap<Key, Value>::SemistaticMap(
44     Iter values_begin, Iter values_end, std::size_t num_values, MemoryPool& memory_pool) {
45   NumBits num_bits = pickNumBits(num_values);
46   std::size_t num_buckets = size_t(1) << num_bits;
47 
48   FixedSizeVector<Unsigned, ArenaAllocator<Unsigned>> count(num_buckets, 0, ArenaAllocator<Unsigned>(memory_pool));
49 
50   hash_function.shift = (sizeof(Unsigned) * CHAR_BIT - num_bits);
51 
52   // The cast is a no-op in some systems (e.g. GCC and Clang under Linux 64bit) but it's needed in other systems (e.g.
53   // MSVC).
54   unsigned seed = (unsigned)std::chrono::system_clock::now().time_since_epoch().count();
55   std::default_random_engine random_generator(seed);
56   std::uniform_int_distribution<Unsigned> random_distribution;
57 
58   while (1) {
59     hash_function.a = random_distribution(random_generator);
60 
61     for (Iter itr = values_begin; !(itr == values_end); ++itr) {
62       Unsigned& this_count = count[hash((*itr).first)];
63       ++this_count;
64       if (this_count == beta) {
65         goto pick_another;
66       }
67     }
68     break;
69 
70   pick_another:
71     std::memset(count.data(), 0, num_buckets * sizeof(Unsigned));
72   }
73 
74   values = FixedSizeVector<value_type>(num_values, value_type());
75 
76   std::partial_sum(count.begin(), count.end(), count.begin());
77   lookup_table = FixedSizeVector<CandidateValuesRange>(count.size());
78   for (Unsigned n : count) {
79     lookup_table.push_back(CandidateValuesRange{values.data() + n, values.data() + n});
80   }
81 
82   // At this point lookup_table[h] is the number of keys in [first, last) that have a hash <=h.
83   // Note that even though we ensure this after construction, it is not maintained by insert() so it's not an invariant.
84 
85   Iter itr = values_begin;
86   for (std::size_t i = 0; i < num_values; ++i, ++itr) {
87     value_type*& first_value_ptr = lookup_table[hash((*itr).first)].begin;
88     --first_value_ptr;
89     FruitAssert(values.data() <= first_value_ptr);
90     FruitAssert(first_value_ptr < values.data() + values.size());
91     *first_value_ptr = *itr;
92   }
93 }
94 
95 template <typename Key, typename Value>
SemistaticMap(const SemistaticMap<Key,Value> & map,std::vector<value_type,ArenaAllocator<value_type>> && new_elements)96 SemistaticMap<Key, Value>::SemistaticMap(const SemistaticMap<Key, Value>& map,
97                                          std::vector<value_type, ArenaAllocator<value_type>>&& new_elements)
98     : hash_function(map.hash_function), lookup_table(map.lookup_table, map.lookup_table.size()) {
99 
100   // Sort by hash.
101   std::sort(new_elements.begin(), new_elements.end(),
102             [this](const value_type& x, const value_type& y) { return hash(x.first) < hash(y.first); });
103 
104   std::size_t num_additional_values = new_elements.size();
105   // Add the space needed to store copies of the old buckets.
106   for (auto itr = new_elements.begin(), itr_end = new_elements.end(); itr != itr_end; /* no increment */) {
107     Unsigned h = hash(itr->first);
108     auto p = map.lookup_table[h];
109     num_additional_values += (p.end - p.begin);
110     for (; itr != itr_end && hash(itr->first) == h; ++itr) {
111     }
112   }
113 
114   values = FixedSizeVector<value_type>(num_additional_values);
115 
116   // Now actually perform the insertions.
117 
118   if (new_elements.empty()) {
119     // This is to workaround a bug in the STL shipped with GCC <4.8.2, where calling data() on an
120     // empty vector causes undefined behavior (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59829).
121     return;
122   }
123   for (value_type *itr = new_elements.data(), *itr_end = new_elements.data() + new_elements.size(); itr != itr_end;
124        /* no increment */) {
125     Unsigned h = hash(itr->first);
126     auto p = map.lookup_table[h];
127     num_additional_values += (p.end - p.begin);
128     value_type* first = itr;
129     for (; itr != itr_end && hash(itr->first) == h; ++itr) {
130     }
131     value_type* last = itr;
132     insert(h, first, last);
133   }
134 }
135 
136 template <typename Key, typename Value>
insert(std::size_t h,const value_type * elems_begin,const value_type * elems_end)137 void SemistaticMap<Key, Value>::insert(std::size_t h, const value_type* elems_begin, const value_type* elems_end) {
138 
139   value_type* old_bucket_begin = lookup_table[h].begin;
140   value_type* old_bucket_end = lookup_table[h].end;
141 
142   lookup_table[h].begin = values.data() + values.size();
143 
144   // Step 1: re-insert all keys with the same hash at the end (if any).
145   for (value_type* p = old_bucket_begin; p != old_bucket_end; ++p) {
146     values.push_back(*p);
147   }
148 
149   // Step 2: also insert the new keys and values
150   for (auto itr = elems_begin; itr != elems_end; ++itr) {
151     values.push_back(*itr);
152   }
153 
154   lookup_table[h].end = values.data() + values.size();
155 
156   // The old sequence is no longer pointed to by any index in the lookup table, but recompacting the vectors would be
157   // too slow.
158 }
159 
160 template <typename Key, typename Value>
at(Key key)161 const Value& SemistaticMap<Key, Value>::at(Key key) const {
162   Unsigned h = hash(key);
163   for (const value_type* p = lookup_table[h].begin; /* p!=lookup_table[h].end but no need to check */; ++p) {
164     FruitAssert(p != lookup_table[h].end);
165     if (p->first == key) {
166       return p->second;
167     }
168   }
169 }
170 
171 template <typename Key, typename Value>
find(Key key)172 const Value* SemistaticMap<Key, Value>::find(Key key) const {
173   Unsigned h = hash(key);
174   for (const value_type *p = lookup_table[h].begin, *p_end = lookup_table[h].end; p != p_end; ++p) {
175     if (p->first == key) {
176       return &(p->second);
177     }
178   }
179   return nullptr;
180 }
181 
182 template <typename Key, typename Value>
pickNumBits(std::size_t n)183 typename SemistaticMap<Key, Value>::NumBits SemistaticMap<Key, Value>::pickNumBits(std::size_t n) {
184   NumBits result = 1;
185   while ((std::size_t(1) << result) < n) {
186     ++result;
187   }
188   return result + 1;
189 }
190 
191 // This is here so that we don't have to include fixed_size_vector.templates.h in fruit.h.
192 template <typename Key, typename Value>
~SemistaticMap()193 SemistaticMap<Key, Value>::~SemistaticMap() {}
194 
195 } // namespace impl
196 } // namespace fruit
197 
198 #endif // SEMISTATIC_MAP_TEMPLATES_H
199