1 // Copyright (c) 2017 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef SOURCE_COMP_MOVE_TO_FRONT_H_
16 #define SOURCE_COMP_MOVE_TO_FRONT_H_
17 
18 #include <cassert>
19 #include <cstdint>
20 #include <map>
21 #include <set>
22 #include <unordered_map>
23 #include <vector>
24 
25 namespace spvtools {
26 namespace comp {
27 
28 // Log(n) move-to-front implementation. Implements the following functions:
29 // Insert - pushes value to the front of the mtf sequence
30 //          (only unique values allowed).
31 // Remove - remove value from the sequence.
32 // ValueFromRank - access value by its 1-indexed rank in the sequence.
33 // RankFromValue - get the rank of the given value in the sequence.
34 // Accessing a value with ValueFromRank or RankFromValue moves the value to the
35 // front of the sequence (rank of 1).
36 //
37 // The implementation is based on an AVL-based order statistic tree. The tree
38 // is ordered by timestamps issued when values are inserted or accessed (recent
39 // values go to the left side of the tree, old values are gradually rotated to
40 // the right side).
41 //
42 // Terminology
43 // rank: 1-indexed rank showing how recently the value was inserted or accessed.
44 // node: handle used internally to access node data.
45 // size: size of the subtree of a node (including the node).
46 // height: distance from a node to the farthest leaf.
47 class MoveToFront {
48  public:
49   explicit MoveToFront(size_t reserve_capacity = 4) {
50     nodes_.reserve(reserve_capacity);
51 
52     // Create NIL node.
53     nodes_.emplace_back(Node());
54   }
55 
56   virtual ~MoveToFront() = default;
57 
58   // Inserts value in the move-to-front sequence. Does nothing if the value is
59   // already in the sequence. Returns true if insertion was successful.
60   // The inserted value is placed at the front of the sequence (rank 1).
61   bool Insert(uint32_t value);
62 
63   // Removes value from move-to-front sequence. Returns false iff the value
64   // was not found.
65   bool Remove(uint32_t value);
66 
67   // Computes 1-indexed rank of value in the move-to-front sequence and moves
68   // the value to the front. Example:
69   // Before the call: 4 8 2 1 7
70   // RankFromValue(8) returns 2
71   // After the call: 8 4 2 1 7
72   // Returns true iff the value was found in the sequence.
73   bool RankFromValue(uint32_t value, uint32_t* rank);
74 
75   // Returns value corresponding to a 1-indexed rank in the move-to-front
76   // sequence and moves the value to the front. Example:
77   // Before the call: 4 8 2 1 7
78   // ValueFromRank(2) returns 8
79   // After the call: 8 4 2 1 7
80   // Returns true iff the rank is within bounds [1, GetSize()].
81   bool ValueFromRank(uint32_t rank, uint32_t* value);
82 
83   // Moves the value to the front of the sequence.
84   // Returns false iff value is not in the sequence.
85   bool Promote(uint32_t value);
86 
87   // Returns true iff the move-to-front sequence contains the value.
88   bool HasValue(uint32_t value) const;
89 
90   // Returns the number of elements in the move-to-front sequence.
GetSize()91   uint32_t GetSize() const { return SizeOf(root_); }
92 
93  protected:
94   // Internal tree data structure uses handles instead of pointers. Leaves and
95   // root parent reference a singleton under handle 0. Although dereferencing
96   // a null pointer is not possible, inappropriate access to handle 0 would
97   // cause an assertion. Handles are not garbage collected if value was
98   // deprecated
99   // with DeprecateValue(). But handles are recycled when a node is
100   // repositioned.
101 
102   // Internal tree data structure node.
103   struct Node {
104     // Timestamp from a logical clock which updates every time the element is
105     // accessed through ValueFromRank or RankFromValue.
106     uint32_t timestamp = 0;
107     // The size of the node's subtree, including the node.
108     // SizeOf(LeftOf(node)) + SizeOf(RightOf(node)) + 1.
109     uint32_t size = 0;
110     // Handles to connected nodes.
111     uint32_t left = 0;
112     uint32_t right = 0;
113     uint32_t parent = 0;
114     // Distance to the farthest leaf.
115     // Leaves have height 0, real nodes at least 1.
116     uint32_t height = 0;
117     // Stored value.
118     uint32_t value = 0;
119   };
120 
121   // Creates node and sets correct values. Non-NIL nodes should be created only
122   // through this function. If the node with this value has been created
123   // previously
124   // and since orphaned, reuses the old node instead of creating a new one.
125   uint32_t CreateNode(uint32_t timestamp, uint32_t value);
126 
127   // Node accessor methods. Naming is designed to be similar to natural
128   // language as these functions tend to be used in sequences, for example:
129   // ParentOf(LeftestDescendentOf(RightOf(node)))
130 
131   // Returns value of the node referenced by |handle|.
ValueOf(uint32_t node)132   uint32_t ValueOf(uint32_t node) const { return nodes_.at(node).value; }
133 
134   // Returns left child of |node|.
LeftOf(uint32_t node)135   uint32_t LeftOf(uint32_t node) const { return nodes_.at(node).left; }
136 
137   // Returns right child of |node|.
RightOf(uint32_t node)138   uint32_t RightOf(uint32_t node) const { return nodes_.at(node).right; }
139 
140   // Returns parent of |node|.
ParentOf(uint32_t node)141   uint32_t ParentOf(uint32_t node) const { return nodes_.at(node).parent; }
142 
143   // Returns timestamp of |node|.
TimestampOf(uint32_t node)144   uint32_t TimestampOf(uint32_t node) const {
145     assert(node);
146     return nodes_.at(node).timestamp;
147   }
148 
149   // Returns size of |node|.
SizeOf(uint32_t node)150   uint32_t SizeOf(uint32_t node) const { return nodes_.at(node).size; }
151 
152   // Returns height of |node|.
HeightOf(uint32_t node)153   uint32_t HeightOf(uint32_t node) const { return nodes_.at(node).height; }
154 
155   // Returns mutable reference to value of |node|.
MutableValueOf(uint32_t node)156   uint32_t& MutableValueOf(uint32_t node) {
157     assert(node);
158     return nodes_.at(node).value;
159   }
160 
161   // Returns mutable reference to handle of left child of |node|.
MutableLeftOf(uint32_t node)162   uint32_t& MutableLeftOf(uint32_t node) {
163     assert(node);
164     return nodes_.at(node).left;
165   }
166 
167   // Returns mutable reference to handle of right child of |node|.
MutableRightOf(uint32_t node)168   uint32_t& MutableRightOf(uint32_t node) {
169     assert(node);
170     return nodes_.at(node).right;
171   }
172 
173   // Returns mutable reference to handle of parent of |node|.
MutableParentOf(uint32_t node)174   uint32_t& MutableParentOf(uint32_t node) {
175     assert(node);
176     return nodes_.at(node).parent;
177   }
178 
179   // Returns mutable reference to timestamp of |node|.
MutableTimestampOf(uint32_t node)180   uint32_t& MutableTimestampOf(uint32_t node) {
181     assert(node);
182     return nodes_.at(node).timestamp;
183   }
184 
185   // Returns mutable reference to size of |node|.
MutableSizeOf(uint32_t node)186   uint32_t& MutableSizeOf(uint32_t node) {
187     assert(node);
188     return nodes_.at(node).size;
189   }
190 
191   // Returns mutable reference to height of |node|.
MutableHeightOf(uint32_t node)192   uint32_t& MutableHeightOf(uint32_t node) {
193     assert(node);
194     return nodes_.at(node).height;
195   }
196 
197   // Returns true iff |node| is left child of its parent.
IsLeftChild(uint32_t node)198   bool IsLeftChild(uint32_t node) const {
199     assert(node);
200     return LeftOf(ParentOf(node)) == node;
201   }
202 
203   // Returns true iff |node| is right child of its parent.
IsRightChild(uint32_t node)204   bool IsRightChild(uint32_t node) const {
205     assert(node);
206     return RightOf(ParentOf(node)) == node;
207   }
208 
209   // Returns true iff |node| has no relatives.
IsOrphan(uint32_t node)210   bool IsOrphan(uint32_t node) const {
211     assert(node);
212     return !ParentOf(node) && !LeftOf(node) && !RightOf(node);
213   }
214 
215   // Returns true iff |node| is in the tree.
IsInTree(uint32_t node)216   bool IsInTree(uint32_t node) const {
217     assert(node);
218     return node == root_ || !IsOrphan(node);
219   }
220 
221   // Returns the height difference between right and left subtrees.
BalanceOf(uint32_t node)222   int BalanceOf(uint32_t node) const {
223     return int(HeightOf(RightOf(node))) - int(HeightOf(LeftOf(node)));
224   }
225 
226   // Updates size and height of the node, assuming that the children have
227   // correct values.
228   void UpdateNode(uint32_t node);
229 
230   // Returns the most LeftOf(LeftOf(... descendent which is not leaf.
LeftestDescendantOf(uint32_t node)231   uint32_t LeftestDescendantOf(uint32_t node) const {
232     uint32_t parent = 0;
233     while (node) {
234       parent = node;
235       node = LeftOf(node);
236     }
237     return parent;
238   }
239 
240   // Returns the most RightOf(RightOf(... descendent which is not leaf.
RightestDescendantOf(uint32_t node)241   uint32_t RightestDescendantOf(uint32_t node) const {
242     uint32_t parent = 0;
243     while (node) {
244       parent = node;
245       node = RightOf(node);
246     }
247     return parent;
248   }
249 
250   // Inserts node in the tree. The node must be an orphan.
251   void InsertNode(uint32_t node);
252 
253   // Removes node from the tree. May change value_to_node_ if removal uses a
254   // scapegoat. Returns the removed (orphaned) handle for recycling. The
255   // returned handle may not be equal to |node| if scapegoat was used.
256   uint32_t RemoveNode(uint32_t node);
257 
258   // Rotates |node| left, reassigns all connections and returns the node
259   // which takes place of the |node|.
260   uint32_t RotateLeft(const uint32_t node);
261 
262   // Rotates |node| right, reassigns all connections and returns the node
263   // which takes place of the |node|.
264   uint32_t RotateRight(const uint32_t node);
265 
266   // Root node handle. The tree is empty if root_ is 0.
267   uint32_t root_ = 0;
268 
269   // Incremented counters for next timestamp and value.
270   uint32_t next_timestamp_ = 1;
271 
272   // Holds all tree nodes. Indices of this vector are node handles.
273   std::vector<Node> nodes_;
274 
275   // Maps ids to node handles.
276   std::unordered_map<uint32_t, uint32_t> value_to_node_;
277 
278   // Cache for the last accessed value in the sequence.
279   uint32_t last_accessed_value_ = 0;
280   bool last_accessed_value_valid_ = false;
281 };
282 
283 class MultiMoveToFront {
284  public:
285   // Inserts |value| to sequence with handle |mtf|.
286   // Returns false if |mtf| already has |value|.
Insert(uint64_t mtf,uint32_t value)287   bool Insert(uint64_t mtf, uint32_t value) {
288     if (GetMtf(mtf).Insert(value)) {
289       val_to_mtfs_[value].insert(mtf);
290       return true;
291     }
292     return false;
293   }
294 
295   // Removes |value| from sequence with handle |mtf|.
296   // Returns false if |mtf| doesn't have |value|.
Remove(uint64_t mtf,uint32_t value)297   bool Remove(uint64_t mtf, uint32_t value) {
298     if (GetMtf(mtf).Remove(value)) {
299       val_to_mtfs_[value].erase(mtf);
300       return true;
301     }
302     assert(val_to_mtfs_[value].count(mtf) == 0);
303     return false;
304   }
305 
306   // Removes |value| from all sequences which have it.
RemoveFromAll(uint32_t value)307   void RemoveFromAll(uint32_t value) {
308     auto it = val_to_mtfs_.find(value);
309     if (it == val_to_mtfs_.end()) return;
310 
311     auto& mtfs_containing_value = it->second;
312     for (uint64_t mtf : mtfs_containing_value) {
313       GetMtf(mtf).Remove(value);
314     }
315 
316     val_to_mtfs_.erase(value);
317   }
318 
319   // Computes rank of |value| in sequence |mtf|.
320   // Returns false if |mtf| doesn't have |value|.
RankFromValue(uint64_t mtf,uint32_t value,uint32_t * rank)321   bool RankFromValue(uint64_t mtf, uint32_t value, uint32_t* rank) {
322     return GetMtf(mtf).RankFromValue(value, rank);
323   }
324 
325   // Finds |value| with |rank| in sequence |mtf|.
326   // Returns false if |rank| is out of bounds.
ValueFromRank(uint64_t mtf,uint32_t rank,uint32_t * value)327   bool ValueFromRank(uint64_t mtf, uint32_t rank, uint32_t* value) {
328     return GetMtf(mtf).ValueFromRank(rank, value);
329   }
330 
331   // Returns size of |mtf| sequence.
GetSize(uint64_t mtf)332   uint32_t GetSize(uint64_t mtf) { return GetMtf(mtf).GetSize(); }
333 
334   // Promotes |value| in all sequences which have it.
Promote(uint32_t value)335   void Promote(uint32_t value) {
336     const auto it = val_to_mtfs_.find(value);
337     if (it == val_to_mtfs_.end()) return;
338 
339     const auto& mtfs_containing_value = it->second;
340     for (uint64_t mtf : mtfs_containing_value) {
341       GetMtf(mtf).Promote(value);
342     }
343   }
344 
345   // Inserts |value| in sequence |mtf| or promotes if it's already there.
InsertOrPromote(uint64_t mtf,uint32_t value)346   void InsertOrPromote(uint64_t mtf, uint32_t value) {
347     if (!Insert(mtf, value)) {
348       GetMtf(mtf).Promote(value);
349     }
350   }
351 
352   // Returns if |mtf| sequence has |value|.
HasValue(uint64_t mtf,uint32_t value)353   bool HasValue(uint64_t mtf, uint32_t value) {
354     return GetMtf(mtf).HasValue(value);
355   }
356 
357  private:
358   // Returns actual MoveToFront object corresponding to |handle|.
359   // As multiple operations are often performed consecutively for the same
360   // sequence, the last returned value is cached.
GetMtf(uint64_t handle)361   MoveToFront& GetMtf(uint64_t handle) {
362     if (!cached_mtf_ || cached_handle_ != handle) {
363       cached_handle_ = handle;
364       cached_mtf_ = &mtfs_[handle];
365     }
366 
367     return *cached_mtf_;
368   }
369 
370   // Container holding MoveToFront objects. Map key is sequence handle.
371   std::map<uint64_t, MoveToFront> mtfs_;
372 
373   // Container mapping value to sequences which contain that value.
374   std::unordered_map<uint32_t, std::set<uint64_t>> val_to_mtfs_;
375 
376   // Cache for the last accessed sequence.
377   uint64_t cached_handle_ = 0;
378   MoveToFront* cached_mtf_ = nullptr;
379 };
380 
381 }  // namespace comp
382 }  // namespace spvtools
383 
384 #endif  // SOURCE_COMP_MOVE_TO_FRONT_H_
385