1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "gvn.h"
18 
19 #include "base/arena_bit_vector.h"
20 #include "base/arena_containers.h"
21 #include "base/bit_vector-inl.h"
22 #include "side_effects_analysis.h"
23 #include "utils.h"
24 
25 namespace art {
26 
27 /**
28  * A ValueSet holds instructions that can replace other instructions. It is updated
29  * through the `Add` method, and the `Kill` method. The `Kill` method removes
30  * instructions that are affected by the given side effect.
31  *
32  * The `Lookup` method returns an equivalent instruction to the given instruction
33  * if there is one in the set. In GVN, we would say those instructions have the
34  * same "number".
35  */
36 class ValueSet : public ArenaObject<kArenaAllocGvn> {
37  public:
38   // Constructs an empty ValueSet which owns all its buckets.
ValueSet(ArenaAllocator * allocator)39   explicit ValueSet(ArenaAllocator* allocator)
40       : allocator_(allocator),
41         num_buckets_(kMinimumNumberOfBuckets),
42         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
43         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
44         num_entries_(0u) {
45     // ArenaAllocator returns zeroed memory, so no need to set buckets to null.
46     DCHECK(IsPowerOfTwo(num_buckets_));
47     buckets_owned_.SetInitialBits(num_buckets_);
48   }
49 
50   // Copy constructor. Depending on the load factor, it will either make a deep
51   // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
ValueSet(ArenaAllocator * allocator,const ValueSet & other)52   ValueSet(ArenaAllocator* allocator, const ValueSet& other)
53       : allocator_(allocator),
54         num_buckets_(other.IdealBucketCount()),
55         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
56         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
57         num_entries_(0u) {
58     // ArenaAllocator returns zeroed memory, so entries of buckets_ and
59     // buckets_owned_ are initialized to null and false, respectively.
60     DCHECK(IsPowerOfTwo(num_buckets_));
61     PopulateFromInternal(other, /* is_dirty */ false);
62   }
63 
64   // Erases all values in this set and populates it with values from `other`.
PopulateFrom(const ValueSet & other)65   void PopulateFrom(const ValueSet& other) {
66     if (this == &other) {
67       return;
68     }
69     PopulateFromInternal(other, /* is_dirty */ true);
70   }
71 
72   // Returns true if `this` has enough buckets so that if `other` is copied into
73   // it, the load factor will not cross the upper threshold.
74   // If `exact_match` is set, true is returned only if `this` has the ideal
75   // number of buckets. Larger number of buckets is allowed otherwise.
CanHoldCopyOf(const ValueSet & other,bool exact_match)76   bool CanHoldCopyOf(const ValueSet& other, bool exact_match) {
77     if (exact_match) {
78       return other.IdealBucketCount() == num_buckets_;
79     } else {
80       return other.IdealBucketCount() <= num_buckets_;
81     }
82   }
83 
84   // Adds an instruction in the set.
Add(HInstruction * instruction)85   void Add(HInstruction* instruction) {
86     DCHECK(Lookup(instruction) == nullptr);
87     size_t hash_code = HashCode(instruction);
88     size_t index = BucketIndex(hash_code);
89 
90     if (!buckets_owned_.IsBitSet(index)) {
91       CloneBucket(index);
92     }
93     buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]);
94     ++num_entries_;
95   }
96 
97   // If in the set, returns an equivalent instruction to the given instruction.
98   // Returns null otherwise.
Lookup(HInstruction * instruction) const99   HInstruction* Lookup(HInstruction* instruction) const {
100     size_t hash_code = HashCode(instruction);
101     size_t index = BucketIndex(hash_code);
102 
103     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
104       if (node->GetHashCode() == hash_code) {
105         HInstruction* existing = node->GetInstruction();
106         if (existing->Equals(instruction)) {
107           return existing;
108         }
109       }
110     }
111     return nullptr;
112   }
113 
114   // Returns whether instruction is in the set.
Contains(HInstruction * instruction) const115   bool Contains(HInstruction* instruction) const {
116     size_t hash_code = HashCode(instruction);
117     size_t index = BucketIndex(hash_code);
118 
119     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
120       if (node->GetInstruction() == instruction) {
121         return true;
122       }
123     }
124     return false;
125   }
126 
127   // Removes all instructions in the set affected by the given side effects.
Kill(SideEffects side_effects)128   void Kill(SideEffects side_effects) {
129     DeleteAllImpureWhich([side_effects](Node* node) {
130       return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects);
131     });
132   }
133 
Clear()134   void Clear() {
135     num_entries_ = 0;
136     for (size_t i = 0; i < num_buckets_; ++i) {
137       buckets_[i] = nullptr;
138     }
139     buckets_owned_.SetInitialBits(num_buckets_);
140   }
141 
142   // Updates this set by intersecting with instructions in a predecessor's set.
IntersectWith(ValueSet * predecessor)143   void IntersectWith(ValueSet* predecessor) {
144     if (IsEmpty()) {
145       return;
146     } else if (predecessor->IsEmpty()) {
147       Clear();
148     } else {
149       // Pure instructions do not need to be tested because only impure
150       // instructions can be killed.
151       DeleteAllImpureWhich([predecessor](Node* node) {
152         return !predecessor->Contains(node->GetInstruction());
153       });
154     }
155   }
156 
IsEmpty() const157   bool IsEmpty() const { return num_entries_ == 0; }
GetNumberOfEntries() const158   size_t GetNumberOfEntries() const { return num_entries_; }
159 
160  private:
161   // Copies all entries from `other` to `this`.
162   // If `is_dirty` is set to true, existing data will be wiped first. It is
163   // assumed that `buckets_` and `buckets_owned_` are zero-allocated otherwise.
PopulateFromInternal(const ValueSet & other,bool is_dirty)164   void PopulateFromInternal(const ValueSet& other, bool is_dirty) {
165     DCHECK_NE(this, &other);
166     DCHECK_GE(num_buckets_, other.IdealBucketCount());
167 
168     if (num_buckets_ == other.num_buckets_) {
169       // Hash table remains the same size. We copy the bucket pointers and leave
170       // all buckets_owned_ bits false.
171       if (is_dirty) {
172         buckets_owned_.ClearAllBits();
173       } else {
174         DCHECK_EQ(buckets_owned_.NumSetBits(), 0u);
175       }
176       memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*));
177     } else {
178       // Hash table size changes. We copy and rehash all entries, and set all
179       // buckets_owned_ bits to true.
180       if (is_dirty) {
181         memset(buckets_, 0, num_buckets_ * sizeof(Node*));
182       } else {
183         if (kIsDebugBuild) {
184           for (size_t i = 0; i < num_buckets_; ++i) {
185             DCHECK(buckets_[i] == nullptr) << i;
186           }
187         }
188       }
189       for (size_t i = 0; i < other.num_buckets_; ++i) {
190         for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) {
191           size_t new_index = BucketIndex(node->GetHashCode());
192           buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
193         }
194       }
195       buckets_owned_.SetInitialBits(num_buckets_);
196     }
197 
198     num_entries_ = other.num_entries_;
199   }
200 
201   class Node : public ArenaObject<kArenaAllocGvn> {
202    public:
Node(HInstruction * instruction,size_t hash_code,Node * next)203     Node(HInstruction* instruction, size_t hash_code, Node* next)
204         : instruction_(instruction), hash_code_(hash_code), next_(next) {}
205 
GetHashCode() const206     size_t GetHashCode() const { return hash_code_; }
GetInstruction() const207     HInstruction* GetInstruction() const { return instruction_; }
GetNext() const208     Node* GetNext() const { return next_; }
SetNext(Node * node)209     void SetNext(Node* node) { next_ = node; }
210 
Dup(ArenaAllocator * allocator,Node * new_next=nullptr)211     Node* Dup(ArenaAllocator* allocator, Node* new_next = nullptr) {
212       return new (allocator) Node(instruction_, hash_code_, new_next);
213     }
214 
215    private:
216     HInstruction* const instruction_;
217     const size_t hash_code_;
218     Node* next_;
219 
220     DISALLOW_COPY_AND_ASSIGN(Node);
221   };
222 
223   // Creates our own copy of a bucket that is currently pointing to a parent.
224   // This algorithm can be called while iterating over the bucket because it
225   // preserves the order of entries in the bucket and will return the clone of
226   // the given 'iterator'.
CloneBucket(size_t index,Node * iterator=nullptr)227   Node* CloneBucket(size_t index, Node* iterator = nullptr) {
228     DCHECK(!buckets_owned_.IsBitSet(index));
229     Node* clone_current = nullptr;
230     Node* clone_previous = nullptr;
231     Node* clone_iterator = nullptr;
232     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
233       clone_current = node->Dup(allocator_, nullptr);
234       if (node == iterator) {
235         clone_iterator = clone_current;
236       }
237       if (clone_previous == nullptr) {
238         buckets_[index] = clone_current;
239       } else {
240         clone_previous->SetNext(clone_current);
241       }
242       clone_previous = clone_current;
243     }
244     buckets_owned_.SetBit(index);
245     return clone_iterator;
246   }
247 
248   // Iterates over buckets with impure instructions (even indices) and deletes
249   // the ones on which 'cond' returns true.
250   template<typename Functor>
DeleteAllImpureWhich(Functor cond)251   void DeleteAllImpureWhich(Functor cond) {
252     for (size_t i = 0; i < num_buckets_; i += 2) {
253       Node* node = buckets_[i];
254       Node* previous = nullptr;
255 
256       if (node == nullptr) {
257         continue;
258       }
259 
260       if (!buckets_owned_.IsBitSet(i)) {
261         // Bucket is not owned but maybe we won't need to change it at all.
262         // Iterate as long as the entries don't satisfy 'cond'.
263         while (node != nullptr) {
264           if (cond(node)) {
265             // We do need to delete an entry but we do not own the bucket.
266             // Clone the bucket, make sure 'previous' and 'node' point to
267             // the cloned entries and break.
268             previous = CloneBucket(i, previous);
269             node = (previous == nullptr) ? buckets_[i] : previous->GetNext();
270             break;
271           }
272           previous = node;
273           node = node->GetNext();
274         }
275       }
276 
277       // By this point we either own the bucket and can start deleting entries,
278       // or we do not own it but no entries matched 'cond'.
279       DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr);
280 
281       // We iterate over the remainder of entries and delete those that match
282       // the given condition.
283       while (node != nullptr) {
284         Node* next = node->GetNext();
285         if (cond(node)) {
286           if (previous == nullptr) {
287             buckets_[i] = next;
288           } else {
289             previous->SetNext(next);
290           }
291         } else {
292           previous = node;
293         }
294         node = next;
295       }
296     }
297   }
298 
299   // Computes a bucket count such that the load factor is reasonable.
300   // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2.
IdealBucketCount() const301   size_t IdealBucketCount() const {
302     size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1));
303     if (bucket_count > kMinimumNumberOfBuckets) {
304       return bucket_count;
305     } else {
306       return kMinimumNumberOfBuckets;
307     }
308   }
309 
310   // Generates a hash code for an instruction.
HashCode(HInstruction * instruction) const311   size_t HashCode(HInstruction* instruction) const {
312     size_t hash_code = instruction->ComputeHashCode();
313     // Pure instructions are put into odd buckets to speed up deletion. Note that in the
314     // case of irreducible loops, we don't put pure instructions in odd buckets, as we
315     // need to delete them when entering the loop.
316     if (instruction->GetSideEffects().HasDependencies() ||
317         instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) {
318       return (hash_code << 1) | 0;
319     } else {
320       return (hash_code << 1) | 1;
321     }
322   }
323 
324   // Converts a hash code to a bucket index.
BucketIndex(size_t hash_code) const325   size_t BucketIndex(size_t hash_code) const {
326     return hash_code & (num_buckets_ - 1);
327   }
328 
329   ArenaAllocator* const allocator_;
330 
331   // The internal bucket implementation of the set.
332   size_t const num_buckets_;
333   Node** const buckets_;
334 
335   // Flags specifying which buckets were copied into the set from its parent.
336   // If a flag is not set, the corresponding bucket points to entries in the
337   // parent and must be cloned prior to making changes.
338   ArenaBitVector buckets_owned_;
339 
340   // The number of entries in the set.
341   size_t num_entries_;
342 
343   static constexpr size_t kMinimumNumberOfBuckets = 8;
344 
345   DISALLOW_COPY_AND_ASSIGN(ValueSet);
346 };
347 
348 /**
349  * Optimization phase that removes redundant instruction.
350  */
351 class GlobalValueNumberer : public ValueObject {
352  public:
GlobalValueNumberer(ArenaAllocator * allocator,HGraph * graph,const SideEffectsAnalysis & side_effects)353   GlobalValueNumberer(ArenaAllocator* allocator,
354                       HGraph* graph,
355                       const SideEffectsAnalysis& side_effects)
356       : graph_(graph),
357         allocator_(allocator),
358         side_effects_(side_effects),
359         sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)),
360         visited_blocks_(
361             allocator, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) {}
362 
363   void Run();
364 
365  private:
366   // Per-block GVN. Will also update the ValueSet of the dominated and
367   // successor blocks.
368   void VisitBasicBlock(HBasicBlock* block);
369 
370   HGraph* graph_;
371   ArenaAllocator* const allocator_;
372   const SideEffectsAnalysis& side_effects_;
373 
FindSetFor(HBasicBlock * block) const374   ValueSet* FindSetFor(HBasicBlock* block) const {
375     ValueSet* result = sets_[block->GetBlockId()];
376     DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId();
377     return result;
378   }
379 
AbandonSetFor(HBasicBlock * block)380   void AbandonSetFor(HBasicBlock* block) {
381     DCHECK(sets_[block->GetBlockId()] != nullptr)
382         << "Block B" << block->GetBlockId() << " expected to have a set";
383     sets_[block->GetBlockId()] = nullptr;
384   }
385 
386   // Returns false if the GlobalValueNumberer has already visited all blocks
387   // which may reference `block`.
388   bool WillBeReferencedAgain(HBasicBlock* block) const;
389 
390   // Iterates over visited blocks and finds one which has a ValueSet such that:
391   // (a) it will not be referenced in the future, and
392   // (b) it can hold a copy of `reference_set` with a reasonable load factor.
393   HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block,
394                                                  const ValueSet& reference_set) const;
395 
396   // ValueSet for blocks. Initially null, but for an individual block they
397   // are allocated and populated by the dominator, and updated by all blocks
398   // in the path from the dominator to the block.
399   ArenaVector<ValueSet*> sets_;
400 
401   // BitVector which serves as a fast-access map from block id to
402   // visited/unvisited boolean.
403   ArenaBitVector visited_blocks_;
404 
405   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
406 };
407 
Run()408 void GlobalValueNumberer::Run() {
409   DCHECK(side_effects_.HasRun());
410   sets_[graph_->GetEntryBlock()->GetBlockId()] = new (allocator_) ValueSet(allocator_);
411 
412   // Use the reverse post order to ensure the non back-edge predecessors of a block are
413   // visited before the block itself.
414   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
415     VisitBasicBlock(it.Current());
416   }
417 }
418 
VisitBasicBlock(HBasicBlock * block)419 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
420   ValueSet* set = nullptr;
421 
422   const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
423   if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
424     // The entry block should only accumulate constant instructions, and
425     // the builder puts constants only in the entry block.
426     // Therefore, there is no need to propagate the value set to the next block.
427     set = new (allocator_) ValueSet(allocator_);
428   } else {
429     HBasicBlock* dominator = block->GetDominator();
430     ValueSet* dominator_set = FindSetFor(dominator);
431 
432     if (dominator->GetSuccessors().size() == 1) {
433       // `block` is a direct successor of its dominator. No need to clone the
434       // dominator's set, `block` can take over its ownership including its buckets.
435       DCHECK_EQ(dominator->GetSingleSuccessor(), block);
436       AbandonSetFor(dominator);
437       set = dominator_set;
438     } else {
439       // Try to find a basic block which will never be referenced again and whose
440       // ValueSet can therefore be recycled. We will need to copy `dominator_set`
441       // into the recycled set, so we pass `dominator_set` as a reference for size.
442       HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set);
443       if (recyclable == nullptr) {
444         // No block with a suitable ValueSet found. Allocate a new one and
445         // copy `dominator_set` into it.
446         set = new (allocator_) ValueSet(allocator_, *dominator_set);
447       } else {
448         // Block with a recyclable ValueSet found. Clone `dominator_set` into it.
449         set = FindSetFor(recyclable);
450         AbandonSetFor(recyclable);
451         set->PopulateFrom(*dominator_set);
452       }
453     }
454 
455     if (!set->IsEmpty()) {
456       if (block->IsLoopHeader()) {
457         if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
458           // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
459           // loop header. We clear the set at entry of irreducible loops and any loop containing
460           // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
461           // across the irreducible loop.
462           // Note that, if we're not compiling OSR, we could still do GVN and introduce
463           // phis at irreducible loop headers. We decided it was not worth the complexity.
464           set->Clear();
465         } else {
466           DCHECK(!block->GetLoopInformation()->IsIrreducible());
467           DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
468           set->Kill(side_effects_.GetLoopEffects(block));
469         }
470       } else if (predecessors.size() > 1) {
471         for (HBasicBlock* predecessor : predecessors) {
472           set->IntersectWith(FindSetFor(predecessor));
473           if (set->IsEmpty()) {
474             break;
475           }
476         }
477       }
478     }
479   }
480 
481   sets_[block->GetBlockId()] = set;
482 
483   HInstruction* current = block->GetFirstInstruction();
484   while (current != nullptr) {
485     // Save the next instruction in case `current` is removed from the graph.
486     HInstruction* next = current->GetNext();
487     // Do not kill the set with the side effects of the instruction just now: if
488     // the instruction is GVN'ed, we don't need to kill.
489     if (current->CanBeMoved()) {
490       if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
491         // For commutative ops, (x op y) will be treated the same as (y op x)
492         // after fixed ordering.
493         current->AsBinaryOperation()->OrderInputs();
494       }
495       HInstruction* existing = set->Lookup(current);
496       if (existing != nullptr) {
497         // This replacement doesn't make more OrderInputs() necessary since
498         // current is either used by an instruction that it dominates,
499         // which hasn't been visited yet due to the order we visit instructions.
500         // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway.
501         current->ReplaceWith(existing);
502         current->GetBlock()->RemoveInstruction(current);
503       } else {
504         set->Kill(current->GetSideEffects());
505         set->Add(current);
506       }
507     } else {
508       set->Kill(current->GetSideEffects());
509     }
510     current = next;
511   }
512 
513   visited_blocks_.SetBit(block->GetBlockId());
514 }
515 
WillBeReferencedAgain(HBasicBlock * block) const516 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
517   DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
518 
519   for (auto dominated_block : block->GetDominatedBlocks()) {
520     if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
521       return true;
522     }
523   }
524 
525   for (auto successor : block->GetSuccessors()) {
526     if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
527       return true;
528     }
529   }
530 
531   return false;
532 }
533 
FindVisitedBlockWithRecyclableSet(HBasicBlock * block,const ValueSet & reference_set) const534 HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
535     HBasicBlock* block, const ValueSet& reference_set) const {
536   HBasicBlock* secondary_match = nullptr;
537 
538   for (size_t block_id : visited_blocks_.Indexes()) {
539     ValueSet* current_set = sets_[block_id];
540     if (current_set == nullptr) {
541       // Set was already recycled.
542       continue;
543     }
544 
545     HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id];
546 
547     // We test if `current_set` has enough buckets to store a copy of
548     // `reference_set` with a reasonable load factor. If we find a set whose
549     // number of buckets matches perfectly, we return right away. If we find one
550     // that is larger, we return it if no perfectly-matching set is found.
551     // Note that we defer testing WillBeReferencedAgain until all other criteria
552     // have been satisfied because it might be expensive.
553     if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) {
554       if (!WillBeReferencedAgain(current_block)) {
555         return current_block;
556       }
557     } else if (secondary_match == nullptr &&
558                current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) {
559       if (!WillBeReferencedAgain(current_block)) {
560         secondary_match = current_block;
561       }
562     }
563   }
564 
565   return secondary_match;
566 }
567 
Run()568 void GVNOptimization::Run() {
569   GlobalValueNumberer gvn(graph_->GetArena(), graph_, side_effects_);
570   gvn.Run();
571 }
572 
573 }  // namespace art
574