1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_COMPILER_PERSISTENT_MAP_H_
6 #define V8_COMPILER_PERSISTENT_MAP_H_
7 
8 #include <array>
9 #include <tuple>
10 
11 #include "src/base/functional.h"
12 #include "src/zone/zone-containers.h"
13 
14 namespace v8 {
15 namespace internal {
16 namespace compiler {
17 
18 // PersistentMap is a persistent map datastructure based on hash trees (a binary
19 // tree using the bits of a hash value as addresses). The map is a conceptually
20 // infinite: All keys are initially mapped to a default value, values are
21 // deleted by overwriting them with the default value. The iterators produce
22 // exactly the keys that are not the default value. The hash values should have
23 // high variance in their high bits, so dense integers are a bad choice.
24 // Complexity:
25 // - Copy and assignment: O(1)
26 // - access: O(log n)
27 // - update: O(log n) time and space
28 // - iteration: amortized O(1) per step
29 // - Zip: O(n)
30 // - equality check: O(n)
31 // TODO(tebbi): Cache map transitions to avoid re-allocation of the same map.
32 // TODO(tebbi): Implement an O(1) equality check based on hash consing or
33 //              something similar.
34 template <class Key, class Value, class Hasher = base::hash<Key>>
35 class PersistentMap {
36  public:
37   using key_type = Key;
38   using mapped_type = Value;
39   using value_type = std::pair<Key, Value>;
40 
41  private:
42   static constexpr size_t kHashBits = 32;
43   enum Bit : int { kLeft = 0, kRight = 1 };
44 
45   // Access hash bits starting from the high bits and compare them according to
46   // their unsigned value. This way, the order in the hash tree is compatible
47   // with numeric hash comparisons.
48   class HashValue;
49 
50   struct KeyValue : std::pair<Key, Value> {
keyKeyValue51     const Key& key() const { return this->first; }
valueKeyValue52     const Value& value() const { return this->second; }
53     using std::pair<Key, Value>::pair;
54   };
55 
56   struct FocusedTree;
57 
58  public:
59   // Depth of the last added element. This is a cheap estimate for the size of
60   // the hash tree.
last_depth()61   size_t last_depth() const {
62     if (tree_) {
63       return tree_->length;
64     } else {
65       return 0;
66     }
67   }
68 
Get(const Key & key)69   const Value& Get(const Key& key) const {
70     HashValue key_hash = HashValue(Hasher()(key));
71     const FocusedTree* tree = FindHash(key_hash);
72     return GetFocusedValue(tree, key);
73   }
74 
75   // Add or overwrite an existing key-value pair.
76   void Set(Key key, Value value);
77 
78   bool operator==(const PersistentMap& other) const {
79     if (tree_ == other.tree_) return true;
80     if (def_value_ != other.def_value_) return false;
81     for (const std::tuple<Key, Value, Value>& triple : Zip(other)) {
82       if (std::get<1>(triple) != std::get<2>(triple)) return false;
83     }
84     return true;
85   }
86 
87   bool operator!=(const PersistentMap& other) const {
88     return !(*this == other);
89   }
90 
91   // The iterator produces key-value pairs in the lexicographical order of
92   // hash value and key. It produces exactly the key-value pairs where the value
93   // is not the default value.
94   class iterator;
95 
begin()96   iterator begin() const {
97     if (!tree_) return end();
98     return iterator::begin(tree_, def_value_);
99   }
end()100   iterator end() const { return iterator::end(def_value_); }
101 
102   // Iterator to traverse two maps in lockstep, producing matching value pairs
103   // for each key where at least one value is different from the respective
104   // default.
105   class double_iterator;
106 
107   // An iterable to iterate over the two maps in lockstep.
108   struct ZipIterable {
109     PersistentMap a;
110     PersistentMap b;
beginZipIterable111     double_iterator begin() { return double_iterator(a.begin(), b.begin()); }
endZipIterable112     double_iterator end() { return double_iterator(a.end(), b.end()); }
113   };
114 
Zip(const PersistentMap & other)115   ZipIterable Zip(const PersistentMap& other) const { return {*this, other}; }
116 
117   explicit PersistentMap(Zone* zone, Value def_value = Value())
PersistentMap(nullptr,zone,def_value)118       : PersistentMap(nullptr, zone, def_value) {}
119 
120  private:
121   // Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
122   const FocusedTree* FindHash(HashValue hash) const;
123 
124   // Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
125   // Output the path to this {FocusedTree} and its length. If no such
126   // {FocusedTree} exists, return {nullptr} and output the path to the last node
127   // with a matching hash prefix. Note that {length} is the length of the found
128   // path and may be less than the length of the found {FocusedTree}.
129   const FocusedTree* FindHash(HashValue hash,
130                               std::array<const FocusedTree*, kHashBits>* path,
131                               int* length) const;
132 
133   // Load value from the leaf node on the focused path of {tree}.
134   const Value& GetFocusedValue(const FocusedTree* tree, const Key& key) const;
135 
136   // Return the {FocusedTree} representing the left (bit==kLeft) or right
137   // (bit==kRight) child of the node on the path of {tree} at tree level
138   // {level}.
139   static const FocusedTree* GetChild(const FocusedTree* tree, int level,
140                                      Bit bit);
141 
142   // Find the leftmost path in the tree, starting at the node at tree level
143   // {level} on the path of {start}. Output the level of the leaf to {level} and
144   // the path to {path}.
145   static const FocusedTree* FindLeftmost(
146       const FocusedTree* start, int* level,
147       std::array<const FocusedTree*, kHashBits>* path);
148 
PersistentMap(const FocusedTree * tree,Zone * zone,Value def_value)149   PersistentMap(const FocusedTree* tree, Zone* zone, Value def_value)
150       : tree_(tree), def_value_(def_value), zone_(zone) {}
151 
152   const FocusedTree* tree_;
153   Value def_value_;
154   Zone* zone_;
155 };
156 
157 // This structure represents a hash tree with one focused path to a specific
158 // leaf. For the focused leaf, it stores key, value and key hash. The path is
159 // defined by the hash bits of the focused leaf. In a traditional tree
160 // datastructure, the nodes of a path form a linked list with the values being
161 // the pointers outside of this path. Instead of storing all of these nodes,
162 // we store an array of the pointers pointing outside of the path. This is
163 // similar to the stack used when doing DFS traversal of a tree. The hash of
164 // the leaf is used to know if the pointers point to the left or the
165 // right of the path. As there is no explicit representation of a tree node,
166 // this structure also represents all the nodes on its path. The intended node
167 // depends on the tree depth, which is always clear from the referencing
168 // context. So the pointer to a {FocusedTree} stored in the
169 // {PersistentMap.tree_} always references the root, while a pointer from a
170 // focused node of another {FocusedTree} always references to one tree level
171 // lower than before.
172 template <class Key, class Value, class Hasher>
173 struct PersistentMap<Key, Value, Hasher>::FocusedTree {
174   KeyValue key_value;
175   // The depth of the focused path, that is, the number of pointers stored in
176   // this structure.
177   int8_t length;
178   HashValue key_hash;
179   // Out-of-line storage for hash collisions.
180   const ZoneMap<Key, Value>* more;
181   using more_iterator = typename ZoneMap<Key, Value>::const_iterator;
182   // {path_array} has to be the last member: To store an array inline, we
183   // over-allocate memory for this structure and access memory beyond
184   // {path_array}. This corresponds to a flexible array member as defined in
185   // C99.
186   const FocusedTree* path_array[1];
187   const FocusedTree*& path(int i) {
188     DCHECK(i < length);
189     return reinterpret_cast<const FocusedTree**>(
190         reinterpret_cast<byte*>(this) + offsetof(FocusedTree, path_array))[i];
191   }
192   const FocusedTree* path(int i) const {
193     DCHECK(i < length);
194     return reinterpret_cast<const FocusedTree* const*>(
195         reinterpret_cast<const byte*>(this) +
196         offsetof(FocusedTree, path_array))[i];
197   }
198 };
199 
200 template <class Key, class Value, class Hasher>
201 class PersistentMap<Key, Value, Hasher>::HashValue {
202  public:
203   explicit HashValue(size_t hash) : bits_(static_cast<uint32_t>(hash)) {}
204 
205   Bit operator[](int pos) const {
206     DCHECK_LT(pos, kHashBits);
207     return bits_ & (static_cast<decltype(bits_)>(1) << (kHashBits - pos - 1))
208                ? kRight
209                : kLeft;
210   }
211 
212   bool operator<(HashValue other) const { return bits_ < other.bits_; }
213   bool operator==(HashValue other) const { return bits_ == other.bits_; }
214   bool operator!=(HashValue other) const { return bits_ != other.bits_; }
215   HashValue operator^(HashValue other) const {
216     return HashValue(bits_ ^ other.bits_);
217   }
218 
219  private:
220   static_assert(sizeof(uint32_t) * 8 == kHashBits, "wrong type for bits_");
221   uint32_t bits_;
222 };
223 
224 template <class Key, class Value, class Hasher>
225 class PersistentMap<Key, Value, Hasher>::iterator {
226  public:
227   const value_type operator*() const {
228     if (current_->more) {
229       return *more_iter_;
230     } else {
231       return current_->key_value;
232     }
233   }
234 
235   iterator& operator++() {
236     do {
237       if (!current_) {
238         // Iterator is past the end.
239         return *this;
240       }
241       if (current_->more) {
242         DCHECK(more_iter_ != current_->more->end());
243         ++more_iter_;
244         if (more_iter_ != current_->more->end()) return *this;
245       }
246       if (level_ == 0) {
247         *this = end(def_value_);
248         return *this;
249       }
250       --level_;
251       while (current_->key_hash[level_] == kRight || path_[level_] == nullptr) {
252         if (level_ == 0) {
253           *this = end(def_value_);
254           return *this;
255         }
256         --level_;
257       }
258       const FocusedTree* first_right_alternative = path_[level_];
259       level_++;
260       current_ = FindLeftmost(first_right_alternative, &level_, &path_);
261       if (current_->more) {
262         more_iter_ = current_->more->begin();
263       }
264     } while (!((**this).second != def_value()));
265     return *this;
266   }
267 
268   bool operator==(const iterator& other) const {
269     if (is_end()) return other.is_end();
270     if (other.is_end()) return false;
271     if (current_->key_hash != other.current_->key_hash) {
272       return false;
273     } else {
274       return (**this).first == (*other).first;
275     }
276   }
277   bool operator!=(const iterator& other) const { return !(*this == other); }
278 
279   bool operator<(const iterator& other) const {
280     if (is_end()) return false;
281     if (other.is_end()) return true;
282     if (current_->key_hash == other.current_->key_hash) {
283       return (**this).first < (*other).first;
284     } else {
285       return current_->key_hash < other.current_->key_hash;
286     }
287   }
288 
289   bool is_end() const { return current_ == nullptr; }
290 
291   const Value& def_value() { return def_value_; }
292 
293   static iterator begin(const FocusedTree* tree, Value def_value) {
294     iterator i(def_value);
295     i.current_ = FindLeftmost(tree, &i.level_, &i.path_);
296     if (i.current_->more) {
297       i.more_iter_ = i.current_->more->begin();
298     }
299     // Skip entries with default value. PersistentMap iterators must never point
300     // to a default value.
301     while (!i.is_end() && !((*i).second != def_value)) ++i;
302     return i;
303   }
304 
305   static iterator end(Value def_value) { return iterator(def_value); }
306 
307  private:
308   int level_;
309   typename FocusedTree::more_iterator more_iter_;
310   const FocusedTree* current_;
311   std::array<const FocusedTree*, kHashBits> path_;
312   Value def_value_;
313 
314   explicit iterator(Value def_value)
315       : level_(0), current_(nullptr), def_value_(def_value) {}
316 };
317 
318 template <class Key, class Value, class Hasher>
319 class PersistentMap<Key, Value, Hasher>::double_iterator {
320  public:
321   std::tuple<Key, Value, Value> operator*() {
322     if (first_current_) {
323       auto pair = *first_;
324       return std::make_tuple(
325           pair.first, pair.second,
326           second_current_ ? (*second_).second : second_.def_value());
327     } else {
328       DCHECK(second_current_);
329       auto pair = *second_;
330       return std::make_tuple(pair.first, first_.def_value(), pair.second);
331     }
332   }
333 
334   double_iterator& operator++() {
335 #ifdef DEBUG
336     iterator old_first = first_;
337     iterator old_second = second_;
338 #endif
339     if (first_current_) {
340       ++first_;
341       DCHECK(old_first < first_);
342     }
343     if (second_current_) {
344       ++second_;
345       DCHECK(old_second < second_);
346     }
347     return *this = double_iterator(first_, second_);
348   }
349 
350   double_iterator(iterator first, iterator second)
351       : first_(first), second_(second) {
352     if (first_ == second_) {
353       first_current_ = second_current_ = true;
354     } else if (first_ < second_) {
355       first_current_ = true;
356       second_current_ = false;
357     } else {
358       DCHECK(second_ < first_);
359       first_current_ = false;
360       second_current_ = true;
361     }
362   }
363 
364   bool operator!=(const double_iterator& other) {
365     return first_ != other.first_ || second_ != other.second_;
366   }
367 
368   bool is_end() const { return first_.is_end() && second_.is_end(); }
369 
370  private:
371   iterator first_;
372   iterator second_;
373   bool first_current_;
374   bool second_current_;
375 };
376 
377 template <class Key, class Value, class Hasher>
378 void PersistentMap<Key, Value, Hasher>::Set(Key key, Value value) {
379   HashValue key_hash = HashValue(Hasher()(key));
380   std::array<const FocusedTree*, kHashBits> path;
381   int length = 0;
382   const FocusedTree* old = FindHash(key_hash, &path, &length);
383   ZoneMap<Key, Value>* more = nullptr;
384   if (!(GetFocusedValue(old, key) != value)) return;
385   if (old && !(old->more == nullptr && old->key_value.key() == key)) {
386     more = new (zone_->New(sizeof(*more))) ZoneMap<Key, Value>(zone_);
387     if (old->more) {
388       *more = *old->more;
389     } else {
390       (*more)[old->key_value.key()] = old->key_value.value();
391     }
392     (*more)[key] = value;
393   }
394   FocusedTree* tree =
395       new (zone_->New(sizeof(FocusedTree) +
396                       std::max(0, length - 1) * sizeof(const FocusedTree*)))
397           FocusedTree{KeyValue(std::move(key), std::move(value)),
398                       static_cast<int8_t>(length),
399                       key_hash,
400                       more,
401                       {}};
402   for (int i = 0; i < length; ++i) {
403     tree->path(i) = path[i];
404   }
405   *this = PersistentMap(tree, zone_, def_value_);
406 }
407 
408 template <class Key, class Value, class Hasher>
409 const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
410 PersistentMap<Key, Value, Hasher>::FindHash(HashValue hash) const {
411   const FocusedTree* tree = tree_;
412   int level = 0;
413   while (tree && hash != tree->key_hash) {
414     while ((hash ^ tree->key_hash)[level] == 0) {
415       ++level;
416     }
417     tree = level < tree->length ? tree->path(level) : nullptr;
418     ++level;
419   }
420   return tree;
421 }
422 
423 template <class Key, class Value, class Hasher>
424 const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
425 PersistentMap<Key, Value, Hasher>::FindHash(
426     HashValue hash, std::array<const FocusedTree*, kHashBits>* path,
427     int* length) const {
428   const FocusedTree* tree = tree_;
429   int level = 0;
430   while (tree && hash != tree->key_hash) {
431     int map_length = tree->length;
432     while ((hash ^ tree->key_hash)[level] == 0) {
433       (*path)[level] = level < map_length ? tree->path(level) : nullptr;
434       ++level;
435     }
436     (*path)[level] = tree;
437     tree = level < tree->length ? tree->path(level) : nullptr;
438     ++level;
439   }
440   if (tree) {
441     while (level < tree->length) {
442       (*path)[level] = tree->path(level);
443       ++level;
444     }
445   }
446   *length = level;
447   return tree;
448 }
449 
450 template <class Key, class Value, class Hasher>
451 const Value& PersistentMap<Key, Value, Hasher>::GetFocusedValue(
452     const FocusedTree* tree, const Key& key) const {
453   if (!tree) {
454     return def_value_;
455   }
456   if (tree->more) {
457     auto it = tree->more->find(key);
458     if (it == tree->more->end())
459       return def_value_;
460     else
461       return it->second;
462   } else {
463     if (key == tree->key_value.key()) {
464       return tree->key_value.value();
465     } else {
466       return def_value_;
467     }
468   }
469 }
470 
471 template <class Key, class Value, class Hasher>
472 const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
473 PersistentMap<Key, Value, Hasher>::GetChild(const FocusedTree* tree, int level,
474                                             Bit bit) {
475   if (tree->key_hash[level] == bit) {
476     return tree;
477   } else if (level < tree->length) {
478     return tree->path(level);
479   } else {
480     return nullptr;
481   }
482 }
483 
484 template <class Key, class Value, class Hasher>
485 const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
486 PersistentMap<Key, Value, Hasher>::FindLeftmost(
487     const FocusedTree* start, int* level,
488     std::array<const FocusedTree*, kHashBits>* path) {
489   const FocusedTree* current = start;
490   while (*level < current->length) {
491     if (const FocusedTree* child = GetChild(current, *level, kLeft)) {
492       (*path)[*level] = GetChild(current, *level, kRight);
493       current = child;
494       ++*level;
495     } else if (const FocusedTree* child = GetChild(current, *level, kRight)) {
496       (*path)[*level] = GetChild(current, *level, kLeft);
497       current = child;
498       ++*level;
499     } else {
500       UNREACHABLE();
501     }
502   }
503   return current;
504 }
505 
506 template <class Key, class Value, class Hasher>
507 std::ostream& operator<<(std::ostream& os,
508                          const PersistentMap<Key, Value, Hasher>& map) {
509   os << "{";
510   bool first = true;
511   for (auto pair : map) {
512     if (!first) os << ", ";
513     first = false;
514     os << pair.first << ": " << pair.second;
515   }
516   return os << "}";
517 }
518 
519 }  // namespace compiler
520 }  // namespace internal
521 }  // namespace v8
522 
523 #endif  // V8_COMPILER_PERSISTENT_MAP_H_
524