1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "load_store_elimination.h"
18 
19 #include <algorithm>
20 #include <optional>
21 #include <sstream>
22 #include <variant>
23 
24 #include "base/arena_allocator.h"
25 #include "base/arena_bit_vector.h"
26 #include "base/array_ref.h"
27 #include "base/bit_vector-inl.h"
28 #include "base/bit_vector.h"
29 #include "base/globals.h"
30 #include "base/indenter.h"
31 #include "base/iteration_range.h"
32 #include "base/scoped_arena_allocator.h"
33 #include "base/scoped_arena_containers.h"
34 #include "base/transform_iterator.h"
35 #include "escape.h"
36 #include "handle.h"
37 #include "load_store_analysis.h"
38 #include "mirror/class_loader.h"
39 #include "mirror/dex_cache.h"
40 #include "nodes.h"
41 #include "oat/stack_map.h"
42 #include "optimizing_compiler_stats.h"
43 #include "reference_type_propagation.h"
44 #include "side_effects_analysis.h"
45 
46 /**
47  * The general algorithm of load-store elimination (LSE).
48  *
49  * We use load-store analysis to collect a list of heap locations and perform
50  * alias analysis of those heap locations. LSE then keeps track of a list of
51  * heap values corresponding to the heap locations and stores that put those
52  * values in these locations.
53  *  - In phase 1, we visit basic blocks in reverse post order and for each basic
54  *    block, visit instructions sequentially, recording heap values and looking
55  *    for loads and stores to eliminate without relying on loop Phis.
56  *  - In phase 2, we look for loads that can be replaced by creating loop Phis
57  *    or using a loop-invariant value.
58  *  - In phase 3, we determine which stores are dead and can be eliminated and
59  *    based on that information we re-evaluate whether some kept stores are
60  *    storing the same value as the value in the heap location; such stores are
61  *    also marked for elimination.
62  *  - In phase 4, we commit the changes, replacing loads marked for elimination
63  *    in previous processing and removing stores not marked for keeping. We also
64  *    remove allocations that are no longer needed.
65  *  - In phase 5, we move allocations which only escape along some executions
66  *    closer to their escape points and fixup non-escaping paths with their actual
67  *    values, creating PHIs when needed.
68  *
69  * 1. Walk over blocks and their instructions.
70  *
71  * The initial set of heap values for a basic block is
72  *  - For a loop header of an irreducible loop, all heap values are unknown.
73  *  - For a loop header of a normal loop, all values unknown at the end of the
74  *    preheader are initialized to unknown, other heap values are set to Phi
75  *    placeholders as we cannot determine yet whether these values are known on
76  *    all back-edges. We use Phi placeholders also for array heap locations with
77  *    index defined inside the loop but this helps only when the value remains
78  *    zero from the array allocation throughout the loop.
79  *  - For catch blocks, we clear all assumptions since we arrived due to an
80  *    instruction throwing.
81  *  - For other basic blocks, we merge incoming values from the end of all
82  *    predecessors. If any incoming value is unknown, the start value for this
83  *    block is also unknown. Otherwise, if all the incoming values are the same
84  *    (including the case of a single predecessor), the incoming value is used.
85  *    Otherwise, we use a Phi placeholder to indicate different incoming values.
86  *    We record whether such Phi placeholder depends on a loop Phi placeholder.
87  *
88  * For each instruction in the block
89  *  - If the instruction is a load from a heap location with a known value not
90  *    dependent on a loop Phi placeholder, the load can be eliminated, either by
91  *    using an existing instruction or by creating new Phi(s) instead. In order
92  *    to maintain the validity of all heap locations during the optimization
93  *    phase, we only record substitutes at this phase and the real elimination
94  *    is delayed till the end of LSE. Loads that require a loop Phi placeholder
95  *    replacement are recorded for processing later.
96  *  - If the instruction is a store, it updates the heap value for the heap
97  *    location with the stored value and records the store itself so that we can
98  *    mark it for keeping if the value becomes observable. Heap values are
99  *    invalidated for heap locations that may alias with the store instruction's
100  *    heap location and their recorded stores are marked for keeping as they are
101  *    now potentially observable. The store instruction can be eliminated unless
102  *    the value stored is later needed e.g. by a load from the same/aliased heap
103  *    location or the heap location persists at method return/deoptimization.
104  *  - A store that stores the same value as the heap value is eliminated.
105  *  - For newly instantiated instances, their heap values are initialized to
106  *    language defined default values.
107  *  - Finalizable objects are considered as persisting at method
108  *    return/deoptimization.
109  *  - Some instructions such as invokes are treated as loading and invalidating
110  *    all the heap values, depending on the instruction's side effects.
111  *  - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
112  *    partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
113  *    alias and no load/store is eliminated in such case.
114  *
115  * The time complexity of the initial phase has several components. The total
116  * time for the initialization of heap values for all blocks is
117  *    O(heap_locations * edges)
118  * and the time complexity for simple instruction processing is
119  *    O(instructions).
120  * See the description of phase 3 for additional complexity due to matching of
121  * existing Phis for replacing loads.
122  *
123  * 2. Process loads that depend on loop Phi placeholders.
124  *
125  * We go over these loads to determine whether they can be eliminated. We look
126  * for the set of all Phi placeholders that feed the load and depend on a loop
127  * Phi placeholder and, if we find no unknown value, we construct the necessary
128  * Phi(s) or, if all other inputs are identical, i.e. the location does not
129  * change in the loop, just use that input. If we do find an unknown input, this
130  * must be from a loop back-edge and we replace the loop Phi placeholder with
131  * unknown value and re-process loads and stores that previously depended on
132  * loop Phi placeholders. This shall find at least one load of an unknown value
133  * which is now known to be unreplaceable or a new unknown value on a back-edge
134  * and we repeat this process until each load is either marked for replacement
135  * or found to be unreplaceable. As we mark at least one additional loop Phi
136  * placeholder as unreplacable in each iteration, this process shall terminate.
137  *
138  * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
139  * is limited by the number of Phi placeholders and their dependencies we need
140  * to search with worst-case time complexity
141  *    O(phi_placeholder_dependencies) .
142  * The dependencies are usually just the Phi placeholders' potential inputs,
143  * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
144  * replacement search, there are additional dependencies to consider, see below.
145  *
146  * In the successful case (no unknown inputs found) we use the Floyd-Warshall
147  * algorithm to determine transitive closures for each found Phi placeholder,
148  * and then match or materialize Phis from the smallest transitive closure,
149  * so that we can determine if such subset has a single other input. This has
150  * time complexity
151  *    O(phi_placeholders_found^3) .
152  * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
153  * contribute to this as such Phi placeholders are replaced immediately.
154  * The total time of all such successful cases has time complexity
155  *    O(phi_placeholders^3)
156  * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
157  * argument applies to the searches used to find all successful cases, so their
158  * total contribution is also just an insignificant
159  *    O(phi_placeholder_dependencies) .
160  * The materialization of Phis has an insignificant total time complexity
161  *    O(phi_placeholders * edges) .
162  *
163  * If we find an unknown input, we re-process heap values and loads with a time
164  * complexity that's the same as the phase 1 in the worst case. Adding this to
165  * the depth-first search time complexity yields
166  *    O(phi_placeholder_dependencies + heap_locations * edges + instructions)
167  * for a single iteration. We can ignore the middle term as it's proprotional
168  * to the number of Phi placeholder inputs included in the first term. Using
169  * the upper limit of number of such iterations, the total time complexity is
170  *    O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
171  *
172  * The upper bound of Phi placeholder inputs is
173  *    heap_locations * edges
174  * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
175  * include other heap locations in predecessor blocks with the upper bound of
176  *    heap_locations^2 * edges .
177  * Using the estimate
178  *    edges <= blocks^2
179  * and
180  *    phi_placeholders <= heap_locations * blocks ,
181  * the worst-case time complexity of the
182  *    O(phi_placeholder_dependencies * phi_placeholders)
183  * term from unknown input cases is actually
184  *    O(heap_locations^3 * blocks^3) ,
185  * exactly as the estimate for the Floyd-Warshall parts of successful cases.
186  * Adding the other term from the unknown input cases (to account for the case
187  * with significantly more instructions than blocks and heap locations), the
188  * phase 2 time complexity is
189  *    O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
190  *
191  * See the description of phase 3 for additional complexity due to matching of
192  * existing Phis for replacing loads.
193  *
194  * 3. Determine which stores to keep and which to eliminate.
195  *
196  * During instruction processing in phase 1 and re-processing in phase 2, we are
197  * keeping a record of the stores and Phi placeholders that become observable
198  * and now propagate the observable Phi placeholders to all actual stores that
199  * feed them. Having determined observable stores, we look for stores that just
200  * overwrite the old value with the same. Since ignoring non-observable stores
201  * actually changes the old values in heap locations, we need to recalculate
202  * Phi placeholder replacements but we proceed similarly to the previous phase.
203  * We look for the set of all Phis that feed the old value replaced by the store
204  * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
205  * value, we try to match existing Phis (we do not create new Phis anymore) or,
206  * if all other inputs are identical, i.e. the location does not change in the
207  * loop, just use that input. If this succeeds and the old value is identical to
208  * the value we're storing, such store shall be eliminated.
209  *
210  * The work is similar to the phase 2, except that we're not re-processing loads
211  * and stores anymore, so the time complexity of phase 3 is
212  *    O(heap_locations^3 * blocks^3) .
213  *
214  * There is additional complexity in matching existing Phis shared between the
215  * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
216  * time (this could be difficult and slow), so each matching attempt is just
217  * looking at Phis in the block (both old Phis and newly created Phis) and their
218  * inputs. As we create at most `heap_locations` Phis in each block, the upper
219  * bound on the number of Phis we look at is
220  *    heap_locations * (old_phis + heap_locations)
221  * and the worst-case time complexity is
222  *    O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
223  * The first term is lower than one term in phase 2, so the relevant part is
224  *    O(heap_locations * old_phis * edges) .
225  *
226  * 4. Replace loads and remove unnecessary stores and singleton allocations.
227  *
228  * A special type of objects called singletons are instantiated in the method
229  * and have a single name, i.e. no aliases. Singletons have exclusive heap
230  * locations since they have no aliases. Singletons are helpful in narrowing
231  * down the life span of a heap location such that they do not always need to
232  * participate in merging heap values. Allocation of a singleton can be
233  * eliminated if that singleton is not used and does not persist at method
234  * return/deoptimization.
235  *
236  * The time complexity of this phase is
237  *    O(instructions + instruction_uses) .
238  *
239  * FIXME: The time complexities described above assumes that the
240  * HeapLocationCollector finds a heap location for an instruction in O(1)
241  * time but it is currently O(heap_locations); this can be fixed by adding
242  * a hash map to the HeapLocationCollector.
243  */
244 
245 namespace art HIDDEN {
246 
247 #define LSE_VLOG \
248   if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
249 
250 class HeapRefHolder;
251 
252 // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
253 class LSEVisitor final : private HGraphDelegateVisitor {
254  public:
255   LSEVisitor(HGraph* graph,
256              const HeapLocationCollector& heap_location_collector,
257              OptimizingCompilerStats* stats);
258 
259   void Run();
260 
261  private:
262   class PhiPlaceholder {
263    public:
PhiPlaceholder()264     constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {}
PhiPlaceholder(uint32_t block_id,size_t heap_location)265     constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location)
266         : block_id_(block_id), heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
267 
268     constexpr PhiPlaceholder(const PhiPlaceholder& p) = default;
269     constexpr PhiPlaceholder(PhiPlaceholder&& p) = default;
270     constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default;
271     constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default;
272 
GetBlockId() const273     constexpr uint32_t GetBlockId() const {
274       return block_id_;
275     }
276 
GetHeapLocation() const277     constexpr size_t GetHeapLocation() const {
278       return heap_location_;
279     }
280 
Equals(const PhiPlaceholder & p2) const281     constexpr bool Equals(const PhiPlaceholder& p2) const {
282       return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_;
283     }
284 
Dump(std::ostream & oss) const285     void Dump(std::ostream& oss) const {
286       oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]";
287     }
288 
289    private:
290     uint32_t block_id_;
291     uint32_t heap_location_;
292   };
293 
294   struct Marker {};
295 
296   class Value;
297 
298   class PriorValueHolder {
299    public:
300     constexpr explicit PriorValueHolder(Value prior);
301 
IsInstruction() const302     constexpr bool IsInstruction() const {
303       return std::holds_alternative<HInstruction*>(value_);
304     }
IsPhi() const305     constexpr bool IsPhi() const {
306       return std::holds_alternative<PhiPlaceholder>(value_);
307     }
IsDefault() const308     constexpr bool IsDefault() const {
309       return std::holds_alternative<Marker>(value_);
310     }
GetPhiPlaceholder() const311     constexpr PhiPlaceholder GetPhiPlaceholder() const {
312       DCHECK(IsPhi());
313       return std::get<PhiPlaceholder>(value_);
314     }
GetInstruction() const315     constexpr HInstruction* GetInstruction() const {
316       DCHECK(IsInstruction());
317       return std::get<HInstruction*>(value_);
318     }
319 
320     Value ToValue() const;
321     void Dump(std::ostream& oss) const;
322 
Equals(PriorValueHolder other) const323     constexpr bool Equals(PriorValueHolder other) const {
324       return value_ == other.value_;
325     }
326 
327    private:
328     std::variant<Marker, HInstruction*, PhiPlaceholder> value_;
329   };
330 
331   friend constexpr bool operator==(const Marker&, const Marker&);
332   friend constexpr bool operator==(const PriorValueHolder& p1, const PriorValueHolder& p2);
333   friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
334   friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
335 
336   class Value {
337    public:
338     enum class ValuelessType {
339       kInvalid,
340       kPureUnknown,
341       kDefault,
342     };
343     struct MergedUnknownMarker {
344       PhiPlaceholder phi_;
345     };
346     struct NeedsNonLoopPhiMarker {
347       PhiPlaceholder phi_;
348     };
349     struct NeedsLoopPhiMarker {
350       PhiPlaceholder phi_;
351     };
352 
Invalid()353     static constexpr Value Invalid() {
354       return Value(ValuelessType::kInvalid);
355     }
356 
357     // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
358     // A heap location can be set to an unknown heap value when:
359     // - it is coming from outside the method,
360     // - it is killed due to aliasing, or side effects, or merging with an unknown value.
PureUnknown()361     static constexpr Value PureUnknown() {
362       return Value(ValuelessType::kPureUnknown);
363     }
364 
PartialUnknown(Value old_value)365     static constexpr Value PartialUnknown(Value old_value) {
366       if (old_value.IsInvalid() || old_value.IsPureUnknown()) {
367         return PureUnknown();
368       } else {
369         return Value(PriorValueHolder(old_value));
370       }
371     }
372 
MergedUnknown(PhiPlaceholder phi_placeholder)373     static constexpr Value MergedUnknown(PhiPlaceholder phi_placeholder) {
374       return Value(MergedUnknownMarker{phi_placeholder});
375     }
376 
377     // Default heap value after an allocation.
378     // A heap location can be set to that value right after an allocation.
Default()379     static constexpr Value Default() {
380       return Value(ValuelessType::kDefault);
381     }
382 
ForInstruction(HInstruction * instruction)383     static constexpr Value ForInstruction(HInstruction* instruction) {
384       return Value(instruction);
385     }
386 
ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)387     static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
388       return Value(NeedsNonLoopPhiMarker{phi_placeholder});
389     }
390 
ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)391     static constexpr Value ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
392       return Value(NeedsLoopPhiMarker{phi_placeholder});
393     }
394 
ForPhiPlaceholder(PhiPlaceholder phi_placeholder,bool needs_loop_phi)395     static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) {
396       return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder)
397                             : ForNonLoopPhiPlaceholder(phi_placeholder);
398     }
399 
IsValid() const400     constexpr bool IsValid() const {
401       return !IsInvalid();
402     }
403 
IsInvalid() const404     constexpr bool IsInvalid() const {
405       return std::holds_alternative<ValuelessType>(value_) &&
406              GetValuelessType() == ValuelessType::kInvalid;
407     }
408 
IsPartialUnknown() const409     bool IsPartialUnknown() const {
410       return std::holds_alternative<PriorValueHolder>(value_);
411     }
412 
IsMergedUnknown() const413     bool IsMergedUnknown() const {
414       return std::holds_alternative<MergedUnknownMarker>(value_);
415     }
416 
IsPureUnknown() const417     bool IsPureUnknown() const {
418       return std::holds_alternative<ValuelessType>(value_) &&
419              GetValuelessType() == ValuelessType::kPureUnknown;
420     }
421 
IsUnknown() const422     bool IsUnknown() const {
423       return IsPureUnknown() || IsMergedUnknown() || IsPartialUnknown();
424     }
425 
IsDefault() const426     bool IsDefault() const {
427       return std::holds_alternative<ValuelessType>(value_) &&
428              GetValuelessType() == ValuelessType::kDefault;
429     }
430 
IsInstruction() const431     bool IsInstruction() const {
432       return std::holds_alternative<HInstruction*>(value_);
433     }
434 
NeedsNonLoopPhi() const435     bool NeedsNonLoopPhi() const {
436       return std::holds_alternative<NeedsNonLoopPhiMarker>(value_);
437     }
438 
NeedsLoopPhi() const439     bool NeedsLoopPhi() const {
440       return std::holds_alternative<NeedsLoopPhiMarker>(value_);
441     }
442 
NeedsPhi() const443     bool NeedsPhi() const {
444       return NeedsNonLoopPhi() || NeedsLoopPhi();
445     }
446 
GetInstruction() const447     HInstruction* GetInstruction() const {
448       DCHECK(IsInstruction()) << *this;
449       return std::get<HInstruction*>(value_);
450     }
451 
GetPriorValue() const452     PriorValueHolder GetPriorValue() const {
453       DCHECK(IsPartialUnknown());
454       return std::get<PriorValueHolder>(value_);
455     }
456 
GetPhiPlaceholder() const457     PhiPlaceholder GetPhiPlaceholder() const {
458       DCHECK(NeedsPhi() || IsMergedUnknown());
459       if (NeedsNonLoopPhi()) {
460         return std::get<NeedsNonLoopPhiMarker>(value_).phi_;
461       } else if (NeedsLoopPhi()) {
462         return std::get<NeedsLoopPhiMarker>(value_).phi_;
463       } else {
464         return std::get<MergedUnknownMarker>(value_).phi_;
465       }
466     }
467 
GetMergeBlockId() const468     uint32_t GetMergeBlockId() const {
469       DCHECK(IsMergedUnknown()) << this;
470       return std::get<MergedUnknownMarker>(value_).phi_.GetBlockId();
471     }
472 
GetMergeBlock(const HGraph * graph) const473     HBasicBlock* GetMergeBlock(const HGraph* graph) const {
474       DCHECK(IsMergedUnknown()) << *this;
475       return graph->GetBlocks()[GetMergeBlockId()];
476     }
477 
GetHeapLocation() const478     size_t GetHeapLocation() const {
479       DCHECK(IsMergedUnknown() || NeedsPhi()) << this;
480       return GetPhiPlaceholder().GetHeapLocation();
481     }
482 
483     constexpr bool ExactEquals(Value other) const;
484 
485     constexpr bool Equals(Value other) const;
486 
Equals(HInstruction * instruction) const487     constexpr bool Equals(HInstruction* instruction) const {
488       return Equals(ForInstruction(instruction));
489     }
490 
491     std::ostream& Dump(std::ostream& os) const;
492 
493     // Public for use with lists.
Value()494     constexpr Value() : value_(ValuelessType::kInvalid) {}
495 
496    private:
497     using ValueHolder = std::variant<ValuelessType,
498                                      HInstruction*,
499                                      MergedUnknownMarker,
500                                      NeedsNonLoopPhiMarker,
501                                      NeedsLoopPhiMarker,
502                                      PriorValueHolder>;
GetValuelessType() const503     constexpr ValuelessType GetValuelessType() const {
504       return std::get<ValuelessType>(value_);
505     }
506 
Value(ValueHolder v)507     constexpr explicit Value(ValueHolder v) : value_(v) {}
508 
509     friend std::ostream& operator<<(std::ostream& os, const Value& v);
510 
511     ValueHolder value_;
512 
513     static_assert(std::is_move_assignable<PhiPlaceholder>::value);
514   };
515 
516   friend constexpr bool operator==(const Value::NeedsLoopPhiMarker& p1,
517                                    const Value::NeedsLoopPhiMarker& p2);
518   friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1,
519                                    const Value::NeedsNonLoopPhiMarker& p2);
520   friend constexpr bool operator==(const Value::MergedUnknownMarker& p1,
521                                    const Value::MergedUnknownMarker& p2);
522 
523   // Get Phi placeholder index for access to `phi_placeholder_replacements_`
524   // and "visited" bit vectors during depth-first searches.
PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const525   size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const {
526     size_t res =
527         phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() +
528         phi_placeholder.GetHeapLocation();
529     DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res))
530         << res << "blks: " << GetGraph()->GetBlocks().size()
531         << " hls: " << heap_location_collector_.GetNumberOfHeapLocations();
532     return res;
533   }
534 
PhiPlaceholderIndex(Value phi_placeholder) const535   size_t PhiPlaceholderIndex(Value phi_placeholder) const {
536     return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
537   }
538 
IsEscapingObject(ReferenceInfo * info)539   bool IsEscapingObject(ReferenceInfo* info) { return !info->IsSingletonAndRemovable(); }
540 
GetPhiPlaceholderAt(size_t off) const541   PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
542     DCHECK_LT(off, num_phi_placeholders_);
543     size_t id = off % heap_location_collector_.GetNumberOfHeapLocations();
544     // Technically this should be (off - id) / NumberOfHeapLocations
545     // but due to truncation it's all the same.
546     size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations();
547     return GetPhiPlaceholder(blk_id, id);
548   }
549 
GetPhiPlaceholder(uint32_t block_id,size_t idx) const550   PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
551     DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id;
552     return PhiPlaceholder(block_id, idx);
553   }
554 
Replacement(Value value) const555   Value Replacement(Value value) const {
556     DCHECK(value.NeedsPhi()) << value << " phase: " << current_phase_;
557     Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
558     DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
559     DCHECK(replacement.IsUnknown() ||
560            FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
561     return replacement;
562   }
563 
ReplacementOrValue(Value value) const564   Value ReplacementOrValue(Value value) const {
565     if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
566       return Replacement(value);
567     } else {
568       DCHECK_IMPLIES(value.IsInstruction(),
569                      FindSubstitute(value.GetInstruction()) == value.GetInstruction());
570       return value;
571     }
572   }
573 
574   // The record of a heap value and instruction(s) that feed that value.
575   struct ValueRecord {
576     Value value;
577     Value stored_by;
578   };
579 
FindOrAddTypeConversionIfNecessary(HInstruction * instruction,HInstruction * value,DataType::Type expected_type)580   HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
581                                                       HInstruction* value,
582                                                       DataType::Type expected_type) {
583     // Should never add type conversion into boolean value.
584     if (expected_type == DataType::Type::kBool ||
585         DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
586         // TODO: This prevents type conversion of default values but we can still insert
587         // type conversion of other constants and there is no constant folding pass after LSE.
588         IsZeroBitPattern(value)) {
589       return nullptr;
590     }
591 
592     // Check if there is already a suitable TypeConversion we can reuse.
593     for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
594       if (use.GetUser()->IsTypeConversion() &&
595           use.GetUser()->GetType() == expected_type &&
596           // TODO: We could move the TypeConversion to a common dominator
597           // if it does not cross irreducible loop header.
598           use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
599           // Don't share across irreducible loop headers.
600           // TODO: can be more fine-grained than this by testing each dominator.
601           (use.GetUser()->GetBlock() == instruction->GetBlock() ||
602            !GetGraph()->HasIrreducibleLoops())) {
603         if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
604             use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
605           // Move the TypeConversion before the instruction.
606           use.GetUser()->MoveBefore(instruction);
607         }
608         DCHECK(use.GetUser()->StrictlyDominates(instruction));
609         return use.GetUser()->AsTypeConversion();
610       }
611     }
612 
613     // We must create a new TypeConversion instruction.
614     HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
615           expected_type, value, instruction->GetDexPc());
616     instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
617     return type_conversion;
618   }
619 
620   // Find an instruction's substitute if it's a removed load.
621   // Return the same instruction if it should not be removed.
FindSubstitute(HInstruction * instruction) const622   HInstruction* FindSubstitute(HInstruction* instruction) const {
623     size_t id = static_cast<size_t>(instruction->GetId());
624     if (id >= substitute_instructions_for_loads_.size()) {
625       // New Phi (may not be in the graph yet), or default value.
626       DCHECK(!IsLoad(instruction));
627       return instruction;
628     }
629     HInstruction* substitute = substitute_instructions_for_loads_[id];
630     DCHECK(substitute == nullptr || IsLoad(instruction));
631     return (substitute != nullptr) ? substitute : instruction;
632   }
633 
AddRemovedLoad(HInstruction * load,HInstruction * heap_value)634   void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
635     DCHECK(IsLoad(load));
636     DCHECK_EQ(FindSubstitute(load), load);
637     DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
638         "Unexpected heap_value that has a substitute " << heap_value->DebugName();
639 
640     // The load expects to load the heap value as type load->GetType().
641     // However the tracked heap value may not be of that type. An explicit
642     // type conversion may be needed.
643     // There are actually three types involved here:
644     // (1) tracked heap value's type (type A)
645     // (2) heap location (field or element)'s type (type B)
646     // (3) load's type (type C)
647     // We guarantee that type A stored as type B and then fetched out as
648     // type C is the same as casting from type A to type C directly, since
649     // type B and type C will have the same size which is guaranteed in
650     // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
651     // So we only need one type conversion from type A to type C.
652     HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
653         load, heap_value, load->GetType());
654 
655     substitute_instructions_for_loads_[load->GetId()] =
656         type_conversion != nullptr ? type_conversion : heap_value;
657   }
658 
IsLoad(HInstruction * instruction)659   static bool IsLoad(HInstruction* instruction) {
660     // Unresolved load is not treated as a load.
661     return instruction->IsInstanceFieldGet() ||
662            instruction->IsStaticFieldGet() ||
663            instruction->IsVecLoad() ||
664            instruction->IsArrayGet();
665   }
666 
IsStore(HInstruction * instruction)667   static bool IsStore(HInstruction* instruction) {
668     // Unresolved store is not treated as a store.
669     return instruction->IsInstanceFieldSet() ||
670            instruction->IsArraySet() ||
671            instruction->IsVecStore() ||
672            instruction->IsStaticFieldSet();
673   }
674 
675   // Check if it is allowed to use default values or Phis for the specified load.
IsDefaultOrPhiAllowedForLoad(HInstruction * instruction)676   static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
677     DCHECK(IsLoad(instruction));
678     // Using defaults for VecLoads requires to create additional vector operations.
679     // As there are some issues with scheduling vector operations it is better to avoid creating
680     // them.
681     return !instruction->IsVecOperation();
682   }
683 
684   // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
685   // This is necessary if the stored heap value can be observed.
KeepStores(Value value)686   void KeepStores(Value value) {
687     if (value.IsPureUnknown() || value.IsPartialUnknown()) {
688       return;
689     }
690     if (value.IsMergedUnknown() || value.NeedsPhi()) {
691       phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
692     } else {
693       HInstruction* instruction = value.GetInstruction();
694       DCHECK(IsStore(instruction));
695       kept_stores_.SetBit(instruction->GetId());
696     }
697   }
698 
699   // If a heap location X may alias with heap location at `loc_index`
700   // and heap_values of that heap location X holds a store, keep that store.
701   // It's needed for a dependent load that's not eliminated since any store
702   // that may put value into the load's heap location needs to be kept.
KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord> & heap_values,size_t loc_index)703   void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
704                                      size_t loc_index) {
705     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
706       if (i == loc_index) {
707         // We use this function when reading a location with unknown value and
708         // therefore we cannot know what exact store wrote that unknown value.
709         // But we can have a phi placeholder here marking multiple stores to keep.
710         DCHECK(!heap_values[i].stored_by.IsInstruction());
711         KeepStores(heap_values[i].stored_by);
712         heap_values[i].stored_by = Value::PureUnknown();
713       } else if (heap_location_collector_.MayAlias(i, loc_index)) {
714         KeepStores(heap_values[i].stored_by);
715         heap_values[i].stored_by = Value::PureUnknown();
716       }
717     }
718   }
719 
GetDefaultValue(DataType::Type type)720   HInstruction* GetDefaultValue(DataType::Type type) {
721     switch (type) {
722       case DataType::Type::kReference:
723         return GetGraph()->GetNullConstant();
724       case DataType::Type::kBool:
725       case DataType::Type::kUint8:
726       case DataType::Type::kInt8:
727       case DataType::Type::kUint16:
728       case DataType::Type::kInt16:
729       case DataType::Type::kInt32:
730         return GetGraph()->GetIntConstant(0);
731       case DataType::Type::kInt64:
732         return GetGraph()->GetLongConstant(0);
733       case DataType::Type::kFloat32:
734         return GetGraph()->GetFloatConstant(0);
735       case DataType::Type::kFloat64:
736         return GetGraph()->GetDoubleConstant(0);
737       default:
738         UNREACHABLE();
739     }
740   }
741 
CanValueBeKeptIfSameAsNew(Value value,HInstruction * new_value,HInstruction * new_value_set_instr)742   bool CanValueBeKeptIfSameAsNew(Value value,
743                                  HInstruction* new_value,
744                                  HInstruction* new_value_set_instr) {
745     // For field/array set location operations, if the value is the same as the new_value
746     // it can be kept even if aliasing happens. All aliased operations will access the same memory
747     // range.
748     // For vector values, this is not true. For example:
749     //  packed_data = [0xA, 0xB, 0xC, 0xD];            <-- Different values in each lane.
750     //  VecStore array[i  ,i+1,i+2,i+3] = packed_data;
751     //  VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
752     //  VecLoad  vx = array[i,i+1,i+2,i+3];            <-- Cannot be eliminated because the value
753     //                                                     here is not packed_data anymore.
754     //
755     // TODO: to allow such 'same value' optimization on vector data,
756     // LSA needs to report more fine-grain MAY alias information:
757     // (1) May alias due to two vector data partial overlap.
758     //     e.g. a[i..i+3] and a[i+1,..,i+4].
759     // (2) May alias due to two vector data may complete overlap each other.
760     //     e.g. a[i..i+3] and b[i..i+3].
761     // (3) May alias but the exact relationship between two locations is unknown.
762     //     e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
763     // This 'same value' optimization can apply only on case (2).
764     if (new_value_set_instr->IsVecOperation()) {
765       return false;
766     }
767 
768     return value.Equals(new_value);
769   }
770 
771   Value PrepareLoopValue(HBasicBlock* block, size_t idx);
772   Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
773   void PrepareLoopRecords(HBasicBlock* block);
774   Value MergePredecessorValues(HBasicBlock* block, size_t idx);
775   void MergePredecessorRecords(HBasicBlock* block);
776 
777   void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type);
778 
779   void VisitGetLocation(HInstruction* instruction, size_t idx);
780   void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
RecordFieldInfo(const FieldInfo * info,size_t heap_loc)781   void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
782     field_infos_[heap_loc] = info;
783   }
784 
785   void VisitBasicBlock(HBasicBlock* block) override;
786 
787   enum class Phase {
788     kLoadElimination,
789     kStoreElimination,
790   };
791 
792   bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
793 
794   bool TryReplacingLoopPhiPlaceholderWithDefault(
795       PhiPlaceholder phi_placeholder,
796       DataType::Type type,
797       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
798   bool TryReplacingLoopPhiPlaceholderWithSingleInput(
799       PhiPlaceholder phi_placeholder,
800       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
801   std::optional<PhiPlaceholder> FindLoopPhisToMaterialize(
802       PhiPlaceholder phi_placeholder,
803       /*out*/ ArenaBitVector* phi_placeholders_to_materialize,
804       DataType::Type type,
805       bool can_use_default_or_phi);
806   bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
807                            DataType::Type type);
808   bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
809   bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
810                            DataType::Type type);
811   bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
812   std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
813                                                          HInstruction* load);
814   void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
815   void ProcessLoadsRequiringLoopPhis();
816 
817   void SearchPhiPlaceholdersForKeptStores();
818   void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
819   void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
820   void FindStoresWritingOldValues();
821   void FinishFullLSE();
822 
HandleAcquireLoad(HInstruction * instruction)823   void HandleAcquireLoad(HInstruction* instruction) {
824     DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) ||
825            (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) ||
826            (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter()))
827         << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
828 
829     // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must
830     // invalidate all current values.
831     ScopedArenaVector<ValueRecord>& heap_values =
832         heap_values_for_[instruction->GetBlock()->GetBlockId()];
833     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
834       KeepStores(heap_values[i].stored_by);
835       heap_values[i].stored_by = Value::PureUnknown();
836       heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
837     }
838 
839     // Note that there's no need to record the load as subsequent acquire loads shouldn't be
840     // eliminated either.
841   }
842 
HandleReleaseStore(HInstruction * instruction)843   void HandleReleaseStore(HInstruction* instruction) {
844     DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) ||
845            (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) ||
846            (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter()))
847         << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
848 
849     // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but
850     // they will push the modifications for other threads to see. Therefore, we must keep the
851     // stores but there's no need to clobber the value.
852     ScopedArenaVector<ValueRecord>& heap_values =
853         heap_values_for_[instruction->GetBlock()->GetBlockId()];
854     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
855       KeepStores(heap_values[i].stored_by);
856       heap_values[i].stored_by = Value::PureUnknown();
857     }
858 
859     // Note that there's no need to record the store as subsequent release store shouldn't be
860     // eliminated either.
861   }
862 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)863   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
864     HInstruction* object = instruction->InputAt(0);
865     if (instruction->IsVolatile()) {
866       ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
867           heap_location_collector_.HuntForOriginalReference(object));
868       if (!ref_info->IsSingletonAndRemovable()) {
869         HandleAcquireLoad(instruction);
870         return;
871       }
872       // Treat it as a normal load if it is a removable singleton.
873     }
874 
875     const FieldInfo& field = instruction->GetFieldInfo();
876     VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
877   }
878 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)879   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
880     HInstruction* object = instruction->InputAt(0);
881     if (instruction->IsVolatile()) {
882       ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
883           heap_location_collector_.HuntForOriginalReference(object));
884       if (!ref_info->IsSingletonAndRemovable()) {
885         HandleReleaseStore(instruction);
886         return;
887       }
888       // Treat it as a normal store if it is a removable singleton.
889     }
890 
891     const FieldInfo& field = instruction->GetFieldInfo();
892     HInstruction* value = instruction->InputAt(1);
893     size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
894     VisitSetLocation(instruction, idx, value);
895   }
896 
VisitStaticFieldGet(HStaticFieldGet * instruction)897   void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
898     if (instruction->IsVolatile()) {
899       HandleAcquireLoad(instruction);
900       return;
901     }
902 
903     const FieldInfo& field = instruction->GetFieldInfo();
904     HInstruction* cls = instruction->InputAt(0);
905     VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
906   }
907 
VisitStaticFieldSet(HStaticFieldSet * instruction)908   void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
909     if (instruction->IsVolatile()) {
910       HandleReleaseStore(instruction);
911       return;
912     }
913 
914     const FieldInfo& field = instruction->GetFieldInfo();
915     HInstruction* cls = instruction->InputAt(0);
916     HInstruction* value = instruction->InputAt(1);
917     size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
918     VisitSetLocation(instruction, idx, value);
919   }
920 
VisitMonitorOperation(HMonitorOperation * monitor_op)921   void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
922     HInstruction* object = monitor_op->InputAt(0);
923     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
924         heap_location_collector_.HuntForOriginalReference(object));
925     if (ref_info->IsSingletonAndRemovable()) {
926       // If the object is a removable singleton, we know that no other threads will have
927       // access to it, and we can remove the MonitorOperation instruction.
928       // MONITOR_ENTER throws when encountering a null object. If `object` is a removable
929       // singleton, it is guaranteed to be non-null so we don't have to worry about the NullCheck.
930       DCHECK(!object->CanBeNull());
931       monitor_op->GetBlock()->RemoveInstruction(monitor_op);
932       MaybeRecordStat(stats_, MethodCompilationStat::kRemovedMonitorOp);
933       return;
934     }
935 
936     // We detected a monitor operation that we couldn't remove. See also LSEVisitor::Run().
937     monitor_op->GetBlock()->GetGraph()->SetHasMonitorOperations(true);
938     if (monitor_op->IsEnter()) {
939       HandleAcquireLoad(monitor_op);
940     } else {
941       HandleReleaseStore(monitor_op);
942     }
943   }
944 
VisitArrayGet(HArrayGet * instruction)945   void VisitArrayGet(HArrayGet* instruction) override {
946     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
947   }
948 
VisitArraySet(HArraySet * instruction)949   void VisitArraySet(HArraySet* instruction) override {
950     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
951     VisitSetLocation(instruction, idx, instruction->GetValue());
952   }
953 
VisitVecLoad(HVecLoad * instruction)954   void VisitVecLoad(HVecLoad* instruction) override {
955     DCHECK(!instruction->IsPredicated());
956     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
957   }
958 
VisitVecStore(HVecStore * instruction)959   void VisitVecStore(HVecStore* instruction) override {
960     DCHECK(!instruction->IsPredicated());
961     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
962     VisitSetLocation(instruction, idx, instruction->GetValue());
963   }
964 
VisitDeoptimize(HDeoptimize * instruction)965   void VisitDeoptimize(HDeoptimize* instruction) override {
966     // If we are in a try, even singletons are observable.
967     const bool inside_a_try = instruction->GetBlock()->IsTryBlock();
968     HBasicBlock* block = instruction->GetBlock();
969     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
970     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
971       Value* stored_by = &heap_values[i].stored_by;
972       if (stored_by->IsUnknown()) {
973         continue;
974       }
975       // Stores are generally observeable after deoptimization, except
976       // for singletons that don't escape in the deoptimization environment.
977       bool observable = true;
978       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
979       if (!inside_a_try && info->IsSingleton()) {
980         HInstruction* reference = info->GetReference();
981         // Finalizable objects always escape.
982         const bool finalizable_object =
983             reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
984         if (!finalizable_object && !IsEscapingObject(info)) {
985           // Check whether the reference for a store is used by an environment local of
986           // the HDeoptimize. If not, the singleton is not observed after deoptimization.
987           const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
988           observable = std::any_of(
989               env_uses.begin(),
990               env_uses.end(),
991               [instruction](const HUseListNode<HEnvironment*>& use) {
992                 return use.GetUser()->GetHolder() == instruction;
993               });
994         }
995       }
996       if (observable) {
997         KeepStores(*stored_by);
998         *stored_by = Value::PureUnknown();
999       }
1000     }
1001   }
1002 
1003   // Keep necessary stores before exiting a method via return/throw.
HandleExit(HBasicBlock * block,bool must_keep_stores=false)1004   void HandleExit(HBasicBlock* block, bool must_keep_stores = false) {
1005     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1006     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1007       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1008       if (must_keep_stores || IsEscapingObject(ref_info)) {
1009         KeepStores(heap_values[i].stored_by);
1010         heap_values[i].stored_by = Value::PureUnknown();
1011       }
1012     }
1013   }
1014 
VisitReturn(HReturn * instruction)1015   void VisitReturn(HReturn* instruction) override {
1016     HandleExit(instruction->GetBlock());
1017   }
1018 
VisitReturnVoid(HReturnVoid * return_void)1019   void VisitReturnVoid(HReturnVoid* return_void) override {
1020     HandleExit(return_void->GetBlock());
1021   }
1022 
HandleThrowingInstruction(HInstruction * instruction)1023   void HandleThrowingInstruction(HInstruction* instruction) {
1024     DCHECK(instruction->CanThrow());
1025     // If we are inside of a try, singletons can become visible since we may not exit the method.
1026     HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock());
1027   }
1028 
VisitMethodEntryHook(HMethodEntryHook * method_entry)1029   void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
1030     HandleThrowingInstruction(method_entry);
1031   }
1032 
VisitMethodExitHook(HMethodExitHook * method_exit)1033   void VisitMethodExitHook(HMethodExitHook* method_exit) override {
1034     HandleThrowingInstruction(method_exit);
1035   }
1036 
VisitDivZeroCheck(HDivZeroCheck * div_zero_check)1037   void VisitDivZeroCheck(HDivZeroCheck* div_zero_check) override {
1038     HandleThrowingInstruction(div_zero_check);
1039   }
1040 
VisitNullCheck(HNullCheck * null_check)1041   void VisitNullCheck(HNullCheck* null_check) override {
1042     HandleThrowingInstruction(null_check);
1043   }
1044 
VisitBoundsCheck(HBoundsCheck * bounds_check)1045   void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
1046     HandleThrowingInstruction(bounds_check);
1047   }
1048 
VisitLoadClass(HLoadClass * load_class)1049   void VisitLoadClass(HLoadClass* load_class) override {
1050     if (load_class->CanThrow()) {
1051       HandleThrowingInstruction(load_class);
1052     }
1053   }
1054 
VisitLoadString(HLoadString * load_string)1055   void VisitLoadString(HLoadString* load_string) override {
1056     if (load_string->CanThrow()) {
1057       HandleThrowingInstruction(load_string);
1058     }
1059   }
1060 
VisitLoadMethodHandle(HLoadMethodHandle * load_method_handle)1061   void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override {
1062     HandleThrowingInstruction(load_method_handle);
1063   }
1064 
VisitLoadMethodType(HLoadMethodType * load_method_type)1065   void VisitLoadMethodType(HLoadMethodType* load_method_type) override {
1066     HandleThrowingInstruction(load_method_type);
1067   }
1068 
VisitStringBuilderAppend(HStringBuilderAppend * sb_append)1069   void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
1070     HandleThrowingInstruction(sb_append);
1071   }
1072 
VisitThrow(HThrow * throw_instruction)1073   void VisitThrow(HThrow* throw_instruction) override {
1074     HandleThrowingInstruction(throw_instruction);
1075   }
1076 
VisitCheckCast(HCheckCast * check_cast)1077   void VisitCheckCast(HCheckCast* check_cast) override {
1078     HandleThrowingInstruction(check_cast);
1079   }
1080 
HandleInvoke(HInstruction * instruction)1081   void HandleInvoke(HInstruction* instruction) {
1082     // If `instruction` can throw we have to presume all stores are visible.
1083     const bool can_throw = instruction->CanThrow();
1084     // If we are in a try, even singletons are observable.
1085     const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock();
1086     SideEffects side_effects = instruction->GetSideEffects();
1087     ScopedArenaVector<ValueRecord>& heap_values =
1088         heap_values_for_[instruction->GetBlock()->GetBlockId()];
1089     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1090       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1091       // We don't need to do anything if the reference has not escaped at this point.
1092       // This is true if we never escape.
1093       if (!can_throw_inside_a_try && ref_info->IsSingleton()) {
1094         // Singleton references cannot be seen by the callee.
1095       } else {
1096         if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
1097           // Previous stores may become visible (read) and/or impossible for LSE to track (write).
1098           KeepStores(heap_values[i].stored_by);
1099           heap_values[i].stored_by = Value::PureUnknown();
1100         }
1101         if (side_effects.DoesAnyWrite()) {
1102           // The value may be clobbered.
1103           heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1104         }
1105       }
1106     }
1107   }
1108 
VisitInvoke(HInvoke * invoke)1109   void VisitInvoke(HInvoke* invoke) override {
1110     HandleInvoke(invoke);
1111   }
1112 
VisitClinitCheck(HClinitCheck * clinit)1113   void VisitClinitCheck(HClinitCheck* clinit) override {
1114     // Class initialization check can result in class initializer calling arbitrary methods.
1115     HandleInvoke(clinit);
1116   }
1117 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)1118   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
1119     // Conservatively treat it as an invocation.
1120     HandleInvoke(instruction);
1121   }
1122 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)1123   void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
1124     // Conservatively treat it as an invocation.
1125     HandleInvoke(instruction);
1126   }
1127 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)1128   void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
1129     // Conservatively treat it as an invocation.
1130     HandleInvoke(instruction);
1131   }
1132 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)1133   void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
1134     // Conservatively treat it as an invocation.
1135     HandleInvoke(instruction);
1136   }
1137 
VisitNewInstance(HNewInstance * new_instance)1138   void VisitNewInstance(HNewInstance* new_instance) override {
1139     // If we are in a try, even singletons are observable.
1140     const bool inside_a_try = new_instance->GetBlock()->IsTryBlock();
1141     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
1142     if (ref_info == nullptr) {
1143       // new_instance isn't used for field accesses. No need to process it.
1144       return;
1145     }
1146     if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
1147       DCHECK(!new_instance->IsFinalizable());
1148       // new_instance can potentially be eliminated.
1149       singleton_new_instances_.push_back(new_instance);
1150     }
1151     HBasicBlock* block = new_instance->GetBlock();
1152     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1153     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1154       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1155       HInstruction* ref = info->GetReference();
1156       size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
1157       if (ref == new_instance) {
1158         if (offset >= mirror::kObjectHeaderSize ||
1159             MemberOffset(offset) == mirror::Object::MonitorOffset()) {
1160           // Instance fields except the header fields are set to default heap values.
1161           // The shadow$_monitor_ field is set to the default value however.
1162           heap_values[i].value = Value::Default();
1163           heap_values[i].stored_by = Value::PureUnknown();
1164         } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
1165           // The shadow$_klass_ field is special and has an actual value however.
1166           heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
1167           heap_values[i].stored_by = Value::PureUnknown();
1168         }
1169       } else if (inside_a_try || IsEscapingObject(info)) {
1170         // Since NewInstance can throw, we presume all previous stores could be visible.
1171         KeepStores(heap_values[i].stored_by);
1172         heap_values[i].stored_by = Value::PureUnknown();
1173       }
1174     }
1175   }
1176 
VisitNewArray(HNewArray * new_array)1177   void VisitNewArray(HNewArray* new_array) override {
1178     // If we are in a try, even singletons are observable.
1179     const bool inside_a_try = new_array->GetBlock()->IsTryBlock();
1180     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
1181     if (ref_info == nullptr) {
1182       // new_array isn't used for array accesses. No need to process it.
1183       return;
1184     }
1185     if (ref_info->IsSingletonAndRemovable()) {
1186       if (new_array->GetLength()->IsIntConstant() &&
1187           new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
1188         // new_array can potentially be eliminated.
1189         singleton_new_instances_.push_back(new_array);
1190       } else {
1191         // new_array may throw NegativeArraySizeException. Keep it.
1192       }
1193     }
1194     HBasicBlock* block = new_array->GetBlock();
1195     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1196     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1197       HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
1198       ReferenceInfo* info = location->GetReferenceInfo();
1199       HInstruction* ref = info->GetReference();
1200       if (ref == new_array && location->GetIndex() != nullptr) {
1201         // Array elements are set to default heap values.
1202         heap_values[i].value = Value::Default();
1203         heap_values[i].stored_by = Value::PureUnknown();
1204       } else if (inside_a_try || IsEscapingObject(info)) {
1205         // Since NewArray can throw, we presume all previous stores could be visible.
1206         KeepStores(heap_values[i].stored_by);
1207         heap_values[i].stored_by = Value::PureUnknown();
1208       }
1209     }
1210   }
1211 
VisitInstruction(HInstruction * instruction)1212   void VisitInstruction(HInstruction* instruction) override {
1213     // Throwing instructions must be handled specially.
1214     DCHECK(!instruction->CanThrow());
1215   }
1216 
1217   const HeapLocationCollector& heap_location_collector_;
1218 
1219   // Use local allocator for allocating memory.
1220   ScopedArenaAllocator allocator_;
1221 
1222   // The number of unique phi_placeholders there possibly are
1223   size_t num_phi_placeholders_;
1224 
1225   // One array of heap value records for each block.
1226   ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
1227 
1228   // We record loads and stores for re-processing when we find a loop Phi placeholder
1229   // with unknown value from a predecessor, and also for removing stores that are
1230   // found to be dead, i.e. not marked in `kept_stores_` at the end.
1231   struct LoadStoreRecord {
1232     HInstruction* load_or_store;
1233     size_t heap_location_index;
1234   };
1235   ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1236 
1237   // We record the substitute instructions for loads that should be
1238   // eliminated but may be used by heap locations. They'll be removed
1239   // in the end. These are indexed by the load's id.
1240   ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1241 
1242   // Record stores to keep in a bit vector indexed by instruction ID.
1243   ArenaBitVector kept_stores_;
1244   // When we need to keep all stores that feed a Phi placeholder, we just record the
1245   // index of that placeholder for processing after graph traversal.
1246   ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1247 
1248   // Loads that would require a loop Phi to replace are recorded for processing
1249   // later as we do not have enough information from back-edges to determine if
1250   // a suitable Phi can be found or created when we visit these loads.
1251   // This is a flat "map" indexed by the load instruction id.
1252   ScopedArenaVector<ValueRecord*> loads_requiring_loop_phi_;
1253 
1254   // For stores, record the old value records that were replaced and the stored values.
1255   struct StoreRecord {
StoreRecordart::LSEVisitor::StoreRecord1256     StoreRecord(ValueRecord old_value_record_in, HInstruction* stored_value_in)
1257         : old_value_record(old_value_record_in), stored_value(stored_value_in) {}
1258     ValueRecord old_value_record;
1259     HInstruction* stored_value;
1260   };
1261   // This is a flat "map" indexed by the store instruction id.
1262   ScopedArenaVector<StoreRecord*> store_records_;
1263 
1264   // Replacements for Phi placeholders.
1265   // The invalid heap value is used to mark Phi placeholders that cannot be replaced.
1266   ScopedArenaVector<Value> phi_placeholder_replacements_;
1267 
1268   ScopedArenaVector<HInstruction*> singleton_new_instances_;
1269 
1270   // The field infos for each heap location (if relevant).
1271   ScopedArenaVector<const FieldInfo*> field_infos_;
1272 
1273   Phase current_phase_;
1274 
1275   friend struct ScopedRestoreHeapValues;
1276 
1277   friend std::ostream& operator<<(std::ostream& os, const Value& v);
1278   friend std::ostream& operator<<(std::ostream& os, const PriorValueHolder& v);
1279   friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
1280 
1281   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1282 };
1283 
operator <<(std::ostream & oss,const LSEVisitor::PriorValueHolder & p)1284 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PriorValueHolder& p) {
1285   p.Dump(oss);
1286   return oss;
1287 }
1288 
operator <<(std::ostream & oss,const LSEVisitor::Phase & phase)1289 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
1290   switch (phase) {
1291     case LSEVisitor::Phase::kLoadElimination:
1292       return oss << "kLoadElimination";
1293     case LSEVisitor::Phase::kStoreElimination:
1294       return oss << "kStoreElimination";
1295   }
1296 }
1297 
Dump(std::ostream & oss) const1298 void LSEVisitor::PriorValueHolder::Dump(std::ostream& oss) const {
1299   if (IsDefault()) {
1300     oss << "Default";
1301   } else if (IsPhi()) {
1302     oss << "Phi: " << GetPhiPlaceholder();
1303   } else {
1304     oss << "Instruction: " << *GetInstruction();
1305   }
1306 }
1307 
PriorValueHolder(Value val)1308 constexpr LSEVisitor::PriorValueHolder::PriorValueHolder(Value val)
1309     : value_(Marker{}) {
1310   DCHECK(!val.IsInvalid() && !val.IsPureUnknown());
1311   if (val.IsPartialUnknown()) {
1312     value_ = val.GetPriorValue().value_;
1313   } else if (val.IsMergedUnknown() || val.NeedsPhi()) {
1314     value_ = val.GetPhiPlaceholder();
1315   } else if (val.IsInstruction()) {
1316     value_ = val.GetInstruction();
1317   } else {
1318     DCHECK(val.IsDefault());
1319   }
1320 }
1321 
operator ==(const LSEVisitor::Marker &,const LSEVisitor::Marker &)1322 constexpr bool operator==(const LSEVisitor::Marker&, const LSEVisitor::Marker&) {
1323   return true;
1324 }
1325 
operator ==(const LSEVisitor::PriorValueHolder & p1,const LSEVisitor::PriorValueHolder & p2)1326 constexpr bool operator==(const LSEVisitor::PriorValueHolder& p1,
1327                           const LSEVisitor::PriorValueHolder& p2) {
1328   return p1.Equals(p2);
1329 }
1330 
operator ==(const LSEVisitor::PhiPlaceholder & p1,const LSEVisitor::PhiPlaceholder & p2)1331 constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
1332                           const LSEVisitor::PhiPlaceholder& p2) {
1333   return p1.Equals(p2);
1334 }
1335 
operator ==(const LSEVisitor::Value::NeedsLoopPhiMarker & p1,const LSEVisitor::Value::NeedsLoopPhiMarker & p2)1336 constexpr bool operator==(const LSEVisitor::Value::NeedsLoopPhiMarker& p1,
1337                           const LSEVisitor::Value::NeedsLoopPhiMarker& p2) {
1338   return p1.phi_ == p2.phi_;
1339 }
1340 
operator ==(const LSEVisitor::Value::NeedsNonLoopPhiMarker & p1,const LSEVisitor::Value::NeedsNonLoopPhiMarker & p2)1341 constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1,
1342                           const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) {
1343   return p1.phi_ == p2.phi_;
1344 }
1345 
operator ==(const LSEVisitor::Value::MergedUnknownMarker & p1,const LSEVisitor::Value::MergedUnknownMarker & p2)1346 constexpr bool operator==(const LSEVisitor::Value::MergedUnknownMarker& p1,
1347                           const LSEVisitor::Value::MergedUnknownMarker& p2) {
1348   return p1.phi_ == p2.phi_;
1349 }
1350 
operator <<(std::ostream & oss,const LSEVisitor::PhiPlaceholder & p)1351 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) {
1352   p.Dump(oss);
1353   return oss;
1354 }
1355 
ToValue() const1356 LSEVisitor::Value LSEVisitor::PriorValueHolder::ToValue() const {
1357   if (IsDefault()) {
1358     return Value::Default();
1359   } else if (IsPhi()) {
1360     return Value::ForLoopPhiPlaceholder(GetPhiPlaceholder());
1361   } else {
1362     return Value::ForInstruction(GetInstruction());
1363   }
1364 }
1365 
ExactEquals(LSEVisitor::Value other) const1366 constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
1367   return value_ == other.value_;
1368 }
1369 
Equals(LSEVisitor::Value other) const1370 constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
1371   // Only valid values can be compared.
1372   DCHECK(IsValid());
1373   DCHECK(other.IsValid());
1374   if (value_ == other.value_) {
1375     // Note: Two unknown values are considered different.
1376     return !IsUnknown();
1377   } else {
1378     // Default is considered equal to zero-bit-pattern instructions.
1379     return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
1380             (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
1381   }
1382 }
1383 
Dump(std::ostream & os) const1384 std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1385   if (std::holds_alternative<LSEVisitor::Value::ValuelessType>(value_)) {
1386     switch (GetValuelessType()) {
1387       case ValuelessType::kDefault:
1388         return os << "Default";
1389       case ValuelessType::kPureUnknown:
1390         return os << "PureUnknown";
1391       case ValuelessType::kInvalid:
1392         return os << "Invalid";
1393     }
1394   } else if (IsPartialUnknown()) {
1395     return os << "PartialUnknown[" << GetPriorValue() << "]";
1396   } else if (IsInstruction()) {
1397     return os << "Instruction[id: " << GetInstruction()->GetId()
1398               << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
1399   } else if (IsMergedUnknown()) {
1400     return os << "MergedUnknown[block: " << GetPhiPlaceholder().GetBlockId()
1401               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1402 
1403   } else if (NeedsLoopPhi()) {
1404     return os << "NeedsLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1405               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1406   } else {
1407     return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1408               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1409   }
1410 }
1411 
operator <<(std::ostream & os,const LSEVisitor::Value & v)1412 std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1413   return v.Dump(os);
1414 }
1415 
LSEVisitor(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)1416 LSEVisitor::LSEVisitor(HGraph* graph,
1417                        const HeapLocationCollector& heap_location_collector,
1418                        OptimizingCompilerStats* stats)
1419     : HGraphDelegateVisitor(graph, stats),
1420       heap_location_collector_(heap_location_collector),
1421       allocator_(graph->GetArenaStack()),
1422       num_phi_placeholders_(GetGraph()->GetBlocks().size() *
1423                             heap_location_collector_.GetNumberOfHeapLocations()),
1424       heap_values_for_(graph->GetBlocks().size(),
1425                        ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1426                        allocator_.Adapter(kArenaAllocLSE)),
1427       loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
1428       // We may add new instructions (default values, Phis) but we're not adding loads
1429       // or stores, so we shall not need to resize following vector and BitVector.
1430       substitute_instructions_for_loads_(
1431           graph->GetCurrentInstructionId(), nullptr, allocator_.Adapter(kArenaAllocLSE)),
1432       kept_stores_(&allocator_,
1433                    /*start_bits=*/graph->GetCurrentInstructionId(),
1434                    /*expandable=*/false,
1435                    kArenaAllocLSE),
1436       phi_placeholders_to_search_for_kept_stores_(&allocator_,
1437                                                   num_phi_placeholders_,
1438                                                   /*expandable=*/false,
1439                                                   kArenaAllocLSE),
1440       loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1441       store_records_(allocator_.Adapter(kArenaAllocLSE)),
1442       phi_placeholder_replacements_(
1443           num_phi_placeholders_, Value::Invalid(), allocator_.Adapter(kArenaAllocLSE)),
1444       singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
1445       field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
1446                    allocator_.Adapter(kArenaAllocLSE)),
1447       current_phase_(Phase::kLoadElimination) {}
1448 
PrepareLoopValue(HBasicBlock * block,size_t idx)1449 LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1450   // If the pre-header value is known (which implies that the reference dominates this
1451   // block), use a Phi placeholder for the value in the loop header. If all predecessors
1452   // are later found to have a known value, we can replace loads from this location,
1453   // either with the pre-header value or with a new Phi. For array locations, the index
1454   // may be defined inside the loop but the only known value in that case should be the
1455   // default value or a Phi placeholder that can be replaced only with the default value.
1456   HLoopInformation* loop_info = block->GetLoopInformation();
1457   uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1458   Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1459   if (pre_header_value.IsUnknown()) {
1460     return pre_header_value;
1461   }
1462   if (kIsDebugBuild) {
1463     // Check that the reference indeed dominates this loop.
1464     HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1465     HInstruction* ref = location->GetReferenceInfo()->GetReference();
1466     CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1467         << GetGraph()->PrettyMethod();
1468     // Check that the index, if defined inside the loop, tracks a default value
1469     // or a Phi placeholder requiring a loop Phi.
1470     HInstruction* index = location->GetIndex();
1471     if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1472       CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1473           << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1474           << pre_header_value;
1475     }
1476   }
1477   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1478   return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder));
1479 }
1480 
PrepareLoopStoredBy(HBasicBlock * block,size_t idx)1481 LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1482   // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1483   // if the value in the location escapes. This is not applicable to singletons that are
1484   // defined inside the loop as they shall be dead in the loop header.
1485   const ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1486   const HInstruction* reference = ref_info->GetReference();
1487   // Finalizable objects always escape.
1488   const bool is_finalizable =
1489       reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1490   if (ref_info->IsSingleton() &&
1491       block->GetLoopInformation()->Contains(*reference->GetBlock()) &&
1492       !is_finalizable) {
1493     return Value::PureUnknown();
1494   }
1495   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1496   return Value::ForLoopPhiPlaceholder(phi_placeholder);
1497 }
1498 
PrepareLoopRecords(HBasicBlock * block)1499 void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1500   DCHECK(block->IsLoopHeader());
1501   int block_id = block->GetBlockId();
1502   HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1503   ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1504       heap_values_for_[pre_header->GetBlockId()];
1505   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1506   DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1507   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1508   DCHECK(heap_values.empty());
1509 
1510   // Don't eliminate loads in irreducible loops.
1511   if (block->GetLoopInformation()->IsIrreducible()) {
1512     heap_values.resize(num_heap_locations,
1513                        {/*value=*/Value::Invalid(), /*stored_by=*/Value::PureUnknown()});
1514     // Also keep the stores before the loop header, including in blocks that were not visited yet.
1515     bool is_osr = GetGraph()->IsCompilingOsr();
1516     for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1517       heap_values[idx].value =
1518           is_osr ? Value::PureUnknown()
1519                  : Value::MergedUnknown(GetPhiPlaceholder(block->GetBlockId(), idx));
1520       KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1521     }
1522     return;
1523   }
1524 
1525   // Fill `heap_values` based on values from pre-header.
1526   heap_values.reserve(num_heap_locations);
1527   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1528     heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1529   }
1530 }
1531 
MergePredecessorValues(HBasicBlock * block,size_t idx)1532 LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1533   ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1534   DCHECK(!predecessors.empty());
1535   Value merged_value =
1536       ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1537   for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1538     Value pred_value =
1539         ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1540     if (pred_value.Equals(merged_value)) {
1541       // Value is the same. No need to update our merged value.
1542       continue;
1543     } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1544       // If one is unknown and the other is a different type of unknown
1545       PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1546       merged_value = Value::MergedUnknown(phi_placeholder);
1547       // We know that at least one of the merge points is unknown (and both are
1548       // not pure-unknowns since that's captured above). This means that the
1549       // overall value needs to be a MergedUnknown. Just return that.
1550       break;
1551     } else {
1552       // There are conflicting known values. We may still be able to replace loads with a Phi.
1553       PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1554       // Propagate the need for a new loop Phi from all predecessors.
1555       bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1556       merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1557     }
1558   }
1559   DCHECK_IMPLIES(merged_value.IsPureUnknown(), block->GetPredecessors().size() <= 1)
1560       << merged_value << " in " << GetGraph()->PrettyMethod();
1561   return merged_value;
1562 }
1563 
MergePredecessorRecords(HBasicBlock * block)1564 void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1565   if (block->IsExitBlock()) {
1566     // Exit block doesn't really merge values since the control flow ends in
1567     // its predecessors. Each predecessor needs to make sure stores are kept
1568     // if necessary.
1569     return;
1570   }
1571 
1572   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1573   DCHECK(heap_values.empty());
1574   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1575   if (block->GetPredecessors().empty() || block->IsCatchBlock()) {
1576     DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
1577     heap_values.resize(num_heap_locations,
1578                        {/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()});
1579     return;
1580   }
1581 
1582   heap_values.reserve(num_heap_locations);
1583   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1584     Value merged_value = MergePredecessorValues(block, idx);
1585     if (kIsDebugBuild) {
1586       if (merged_value.NeedsPhi()) {
1587         uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId();
1588         CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1589       } else if (merged_value.IsInstruction()) {
1590         CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1591       }
1592     }
1593     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1594     Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
1595     for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1596       uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1597       Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1598       if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1599           !merged_stored_by.Equals(stored_by)) {
1600         // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1601         PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1602         merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1603         break;
1604       }
1605     }
1606     heap_values.push_back({ merged_value, merged_stored_by });
1607   }
1608 }
1609 
FindOrConstructNonLoopPhi(HBasicBlock * block,const ScopedArenaVector<HInstruction * > & phi_inputs,DataType::Type type)1610 static HInstruction* FindOrConstructNonLoopPhi(
1611     HBasicBlock* block,
1612     const ScopedArenaVector<HInstruction*>& phi_inputs,
1613     DataType::Type type) {
1614   for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1615     HInstruction* phi = phi_it.Current();
1616     DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1617     auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1618       return lhs == rhs.GetInstruction();
1619     };
1620     if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1621       return phi;
1622     }
1623   }
1624   ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1625   HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1626   for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1627     DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1628     phi->SetRawInputAt(i, phi_inputs[i]);
1629   }
1630   block->AddPhi(phi);
1631   if (type == DataType::Type::kReference) {
1632     // Update reference type information. Pass invalid handles, these are not used for Phis.
1633     ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1634                                        Handle<mirror::DexCache>(),
1635                                        /* is_first_run= */ false);
1636     rtp_fixup.Visit(phi);
1637   }
1638   return phi;
1639 }
1640 
MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder,DataType::Type type)1641 void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) {
1642   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1643   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1644   size_t idx = phi_placeholder.GetHeapLocation();
1645 
1646   // Use local allocator to reduce peak memory usage.
1647   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1648   // Reuse the same vector for collecting phi inputs.
1649   ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1650 
1651   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1652   work_queue.push_back(phi_placeholder);
1653   while (!work_queue.empty()) {
1654     PhiPlaceholder current_phi_placeholder = work_queue.back();
1655     if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1656       // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1657       // that directly or indirectly depends on it, so it was already processed as part of the
1658       // other Phi placeholder's dependencies before this one got back to the top of the stack.
1659       work_queue.pop_back();
1660       continue;
1661     }
1662     uint32_t current_block_id = current_phi_placeholder.GetBlockId();
1663     HBasicBlock* current_block = blocks[current_block_id];
1664     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1665 
1666     // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1667     // And the only way for such merged value to reach a different heap location is through
1668     // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1669     // seen here are tied to one heap location.
1670     DCHECK(!current_block->IsLoopHeader())
1671         << current_phi_placeholder << " phase: " << current_phase_;
1672     DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx);
1673 
1674     phi_inputs.clear();
1675     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1676       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1677       DCHECK(!pred_value.IsPureUnknown()) << pred_value << " block " << current_block->GetBlockId()
1678                                           << " pred: " << predecessor->GetBlockId();
1679       if (pred_value.NeedsNonLoopPhi()) {
1680         // We need to process the Phi placeholder first.
1681         work_queue.push_back(pred_value.GetPhiPlaceholder());
1682       } else if (pred_value.IsDefault()) {
1683         phi_inputs.push_back(GetDefaultValue(type));
1684       } else {
1685         DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId()
1686                                            << " pred: " << predecessor->GetBlockId();
1687         phi_inputs.push_back(pred_value.GetInstruction());
1688       }
1689     }
1690     if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1691       // All inputs are available. Find or construct the Phi replacement.
1692       phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1693           Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1694       // Remove the block from the queue.
1695       DCHECK_EQ(current_phi_placeholder, work_queue.back());
1696       work_queue.pop_back();
1697     }
1698   }
1699 }
1700 
VisitGetLocation(HInstruction * instruction,size_t idx)1701 void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1702   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1703   uint32_t block_id = instruction->GetBlock()->GetBlockId();
1704   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1705   ValueRecord& record = heap_values[idx];
1706   if (instruction->IsFieldAccess()) {
1707     RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1708   }
1709   DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1710   loads_and_stores_.push_back({ instruction, idx });
1711   if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1712       !IsDefaultOrPhiAllowedForLoad(instruction)) {
1713     record.value = Value::PureUnknown();
1714   }
1715   if (record.value.IsDefault()) {
1716     KeepStores(record.stored_by);
1717     HInstruction* constant = GetDefaultValue(instruction->GetType());
1718     AddRemovedLoad(instruction, constant);
1719     record.value = Value::ForInstruction(constant);
1720   } else if (record.value.IsUnknown()) {
1721     // Load isn't eliminated. Put the load as the value into the HeapLocation.
1722     // This acts like GVN but with better aliasing analysis.
1723     Value old_value = record.value;
1724     record.value = Value::ForInstruction(instruction);
1725     KeepStoresIfAliasedToLocation(heap_values, idx);
1726     KeepStores(old_value);
1727   } else if (record.value.NeedsLoopPhi()) {
1728     // We do not know yet if the value is known for all back edges. Record for future processing.
1729     if (loads_requiring_loop_phi_.empty()) {
1730       loads_requiring_loop_phi_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1731     }
1732     DCHECK_EQ(loads_requiring_loop_phi_[instruction->GetId()], nullptr);
1733     loads_requiring_loop_phi_[instruction->GetId()] =
1734         new (allocator_.Alloc<ValueRecord>(kArenaAllocLSE)) ValueRecord(record);
1735   } else {
1736     // This load can be eliminated but we may need to construct non-loop Phis.
1737     if (record.value.NeedsNonLoopPhi()) {
1738       MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1739       record.value = Replacement(record.value);
1740     }
1741     HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1742     AddRemovedLoad(instruction, heap_value);
1743   }
1744 }
1745 
VisitSetLocation(HInstruction * instruction,size_t idx,HInstruction * value)1746 void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1747   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1748   DCHECK(!IsStore(value)) << value->DebugName();
1749   if (instruction->IsFieldAccess()) {
1750     RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1751   }
1752   // value may already have a substitute.
1753   value = FindSubstitute(value);
1754   HBasicBlock* block = instruction->GetBlock();
1755   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1756   ValueRecord& record = heap_values[idx];
1757   DCHECK_IMPLIES(record.value.IsInstruction(),
1758                  FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1759 
1760   if (record.value.Equals(value)) {
1761     // Store into the heap location with the same value.
1762     // This store can be eliminated right away.
1763     block->RemoveInstruction(instruction);
1764     return;
1765   }
1766 
1767   if (store_records_.empty()) {
1768     store_records_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1769   }
1770   DCHECK_EQ(store_records_[instruction->GetId()], nullptr);
1771   store_records_[instruction->GetId()] =
1772       new (allocator_.Alloc<StoreRecord>(kArenaAllocLSE)) StoreRecord(record, value);
1773   loads_and_stores_.push_back({ instruction, idx });
1774 
1775   // If the `record.stored_by` specified a store from this block, it shall be removed
1776   // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1777   // `kept_stores_` anymore after we update the `record.stored_by` below.
1778   DCHECK(!record.stored_by.IsInstruction() ||
1779          record.stored_by.GetInstruction()->GetBlock() != block ||
1780          record.stored_by.GetInstruction()->CanThrow() ||
1781          !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1782 
1783   if (instruction->CanThrow()) {
1784     // Previous stores can become visible.
1785     HandleThrowingInstruction(instruction);
1786     // We cannot remove a possibly throwing store.
1787     // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1788     kept_stores_.SetBit(instruction->GetId());
1789   }
1790 
1791   // Update the record.
1792   // Note that the `value` can be a newly created `Phi` with an id that falls outside
1793   // the allocated `loads_requiring_loop_phi_` range.
1794   DCHECK_IMPLIES(IsLoad(value) && !loads_requiring_loop_phi_.empty(),
1795                  static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size());
1796   if (static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size() &&
1797       loads_requiring_loop_phi_[value->GetId()] != nullptr) {
1798     // Propapate the Phi placeholder to the record.
1799     record.value = loads_requiring_loop_phi_[value->GetId()]->value;
1800     DCHECK(record.value.NeedsLoopPhi());
1801   } else {
1802     record.value = Value::ForInstruction(value);
1803   }
1804   // Track the store in the value record. If the value is loaded or needed after
1805   // return/deoptimization later, this store isn't really redundant.
1806   record.stored_by = Value::ForInstruction(instruction);
1807 
1808   // This store may kill values in other heap locations due to aliasing.
1809   for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1810     if (i == idx ||
1811         heap_values[i].value.IsUnknown() ||
1812         CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1813         !heap_location_collector_.MayAlias(i, idx)) {
1814       continue;
1815     }
1816     // Kill heap locations that may alias and keep previous stores to these locations.
1817     KeepStores(heap_values[i].stored_by);
1818     heap_values[i].stored_by = Value::PureUnknown();
1819     heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1820   }
1821 }
1822 
VisitBasicBlock(HBasicBlock * block)1823 void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1824   // Populate the heap_values array for this block.
1825   // TODO: try to reuse the heap_values array from one predecessor if possible.
1826   if (block->IsLoopHeader()) {
1827     PrepareLoopRecords(block);
1828   } else {
1829     MergePredecessorRecords(block);
1830   }
1831   // Visit non-Phi instructions.
1832   VisitNonPhiInstructions(block);
1833 }
1834 
MayAliasOnBackEdge(HBasicBlock * loop_header,size_t idx1,size_t idx2) const1835 bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const {
1836   DCHECK_NE(idx1, idx2);
1837   DCHECK(loop_header->IsLoopHeader());
1838   if (heap_location_collector_.MayAlias(idx1, idx2)) {
1839     return true;
1840   }
1841   // For array locations with index defined inside the loop, include
1842   // all other locations in the array, even those that LSA declares
1843   // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
1844   // refer to the same locations for different iterations. (LSA's
1845   // `ComputeMayAlias()` does not consider different loop iterations.)
1846   HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1);
1847   HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2);
1848   if (loc1->IsArray() &&
1849       loc2->IsArray() &&
1850       HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(),
1851                                                 loc2->GetReferenceInfo())) {
1852     HLoopInformation* loop_info = loop_header->GetLoopInformation();
1853     if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) ||
1854         loop_info->Contains(*loc2->GetIndex()->GetBlock())) {
1855       // Consider the locations aliasing. Do not optimize the case where both indexes
1856       // are loop invariants defined inside the loop, rely on LICM to pull them out.
1857       return true;
1858     }
1859   }
1860   return false;
1861 }
1862 
TryReplacingLoopPhiPlaceholderWithDefault(PhiPlaceholder phi_placeholder,DataType::Type type,ArenaBitVector * phi_placeholders_to_materialize)1863 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1864     PhiPlaceholder phi_placeholder,
1865     DataType::Type type,
1866     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1867   // Use local allocator to reduce peak memory usage.
1868   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1869   ArenaBitVector visited(&allocator,
1870                          /*start_bits=*/ num_phi_placeholders_,
1871                          /*expandable=*/ false,
1872                          kArenaAllocLSE);
1873   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1874 
1875   // Use depth first search to check if any non-Phi input is unknown.
1876   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1877   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1878   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1879   work_queue.push_back(phi_placeholder);
1880   while (!work_queue.empty()) {
1881     PhiPlaceholder current_phi_placeholder = work_queue.back();
1882     work_queue.pop_back();
1883     HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()];
1884     DCHECK_GE(block->GetPredecessors().size(), 2u);
1885     size_t idx = current_phi_placeholder.GetHeapLocation();
1886     for (HBasicBlock* predecessor : block->GetPredecessors()) {
1887       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1888       if (value.NeedsPhi()) {
1889         // Visit the predecessor Phi placeholder if it's not visited yet.
1890         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1891           visited.SetBit(PhiPlaceholderIndex(value));
1892           work_queue.push_back(value.GetPhiPlaceholder());
1893         }
1894       } else if (!value.Equals(Value::Default())) {
1895         return false;  // Report failure.
1896       }
1897     }
1898     if (block->IsLoopHeader()) {
1899       // For back-edges we need to check all locations that write to the same array,
1900       // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
1901       // as they may actually refer to the same locations for different iterations.
1902       for (size_t i = 0; i != num_heap_locations; ++i) {
1903         if (i == idx ||
1904             heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
1905                 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
1906           continue;
1907         }
1908         for (HBasicBlock* predecessor : block->GetPredecessors()) {
1909           // Check if there were any writes to this location.
1910           // Note: We could simply process the values but due to the vector operation
1911           // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
1912           // the value to change and not be equal to default. To work around this and
1913           // allow replacing the non-vector load of loop-invariant default values
1914           // anyway, skip over paths that do not have any writes.
1915           ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
1916           while (record.stored_by.NeedsLoopPhi() &&
1917                  blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) {
1918             HLoopInformation* loop_info =
1919                 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation();
1920             record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
1921           }
1922           Value value = ReplacementOrValue(record.value);
1923           if (value.NeedsPhi()) {
1924             // Visit the predecessor Phi placeholder if it's not visited yet.
1925             if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1926               visited.SetBit(PhiPlaceholderIndex(value));
1927               work_queue.push_back(value.GetPhiPlaceholder());
1928             }
1929           } else if (!value.Equals(Value::Default())) {
1930             return false;  // Report failure.
1931           }
1932         }
1933       }
1934     }
1935   }
1936 
1937   // Record replacement and report success.
1938   HInstruction* replacement = GetDefaultValue(type);
1939   for (uint32_t phi_placeholder_index : visited.Indexes()) {
1940     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1941     PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
1942     HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
1943     // We use both vector and non vector operations to analyze the information. However, we replace
1944     // only non vector operations in this code path.
1945     if (!hl->IsVecOp()) {
1946       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1947       phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
1948     }
1949   }
1950   return true;
1951 }
1952 
TryReplacingLoopPhiPlaceholderWithSingleInput(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize)1953 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
1954     PhiPlaceholder phi_placeholder,
1955     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1956   // Use local allocator to reduce peak memory usage.
1957   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1958   ArenaBitVector visited(&allocator,
1959                          /*start_bits=*/ num_phi_placeholders_,
1960                          /*expandable=*/ false,
1961                          kArenaAllocLSE);
1962   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1963 
1964   // Use depth first search to check if any non-Phi input is unknown.
1965   HInstruction* replacement = nullptr;
1966   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1967   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1968   work_queue.push_back(phi_placeholder);
1969   while (!work_queue.empty()) {
1970     PhiPlaceholder current_phi_placeholder = work_queue.back();
1971     work_queue.pop_back();
1972     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
1973     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1974     size_t idx = current_phi_placeholder.GetHeapLocation();
1975     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1976       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1977       if (value.NeedsPhi()) {
1978         // Visit the predecessor Phi placeholder if it's not visited yet.
1979         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1980           visited.SetBit(PhiPlaceholderIndex(value));
1981           work_queue.push_back(value.GetPhiPlaceholder());
1982         }
1983       } else {
1984         if (!value.IsInstruction() ||
1985             (replacement != nullptr && replacement != value.GetInstruction())) {
1986           return false;  // Report failure.
1987         }
1988         replacement = value.GetInstruction();
1989       }
1990     }
1991     // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment
1992     // for back-edges, it is not needed here. When looking for a single input
1993     // instruction coming from before the loop, the array index must also be
1994     // defined before the loop and the aliasing analysis done by LSA is sufficient.
1995     // Any writes of a different value with an index that is not loop invariant
1996     // would invalidate the heap location in `VisitSetLocation()`.
1997   }
1998 
1999   // Record replacement and report success.
2000   DCHECK(replacement != nullptr);
2001   for (uint32_t phi_placeholder_index : visited.Indexes()) {
2002     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2003     PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
2004     HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
2005     // We use both vector and non vector operations to analyze the information. However, we replace
2006     // only vector operations in this code path.
2007     if (hl->IsVecOp()) {
2008       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2009       phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
2010     }
2011   }
2012   return true;
2013 }
2014 
FindLoopPhisToMaterialize(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize,DataType::Type type,bool can_use_default_or_phi)2015 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::FindLoopPhisToMaterialize(
2016     PhiPlaceholder phi_placeholder,
2017     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize,
2018     DataType::Type type,
2019     bool can_use_default_or_phi) {
2020   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2021 
2022   // Use local allocator to reduce peak memory usage.
2023   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2024   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2025 
2026   // Use depth first search to check if any non-Phi input is unknown.
2027   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2028   phi_placeholders_to_materialize->ClearAllBits();
2029   phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
2030   work_queue.push_back(phi_placeholder);
2031   while (!work_queue.empty()) {
2032     PhiPlaceholder current_phi_placeholder = work_queue.back();
2033     work_queue.pop_back();
2034     if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
2035       // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
2036       DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
2037                  Value::Default()));
2038       continue;
2039     }
2040     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2041     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2042     size_t idx = current_phi_placeholder.GetHeapLocation();
2043     if (current_block->IsLoopHeader()) {
2044       // If the index is defined inside the loop, it may reference different elements of the
2045       // array on each iteration. Since we do not track if all elements of an array are set
2046       // to the same value explicitly, the only known value in pre-header can be the default
2047       // value from NewArray or a Phi placeholder depending on a default value from some outer
2048       // loop pre-header. This Phi placeholder can be replaced only by the default value.
2049       HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
2050       if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
2051         if (can_use_default_or_phi &&
2052             TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
2053                                                       type,
2054                                                       phi_placeholders_to_materialize)) {
2055           continue;
2056         } else {
2057           return current_phi_placeholder;  // Report the loop Phi placeholder.
2058         }
2059       }
2060       // A similar situation arises with the index defined outside the loop if we cannot use
2061       // default values or Phis, i.e. for vector loads, as we can only replace the Phi
2062       // placeholder with a single instruction defined before the loop.
2063       if (!can_use_default_or_phi) {
2064         DCHECK(index != nullptr);  // Vector operations are array operations.
2065         if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
2066                                                           phi_placeholders_to_materialize)) {
2067           continue;
2068         } else {
2069           return current_phi_placeholder;  // Report the loop Phi placeholder.
2070         }
2071       }
2072     }
2073     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2074       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2075       Value value = ReplacementOrValue(heap_values[idx].value);
2076       if (value.IsUnknown()) {
2077         // We cannot create a Phi for this loop Phi placeholder.
2078         return current_phi_placeholder;  // Report the loop Phi placeholder.
2079       }
2080       // For arrays, the location may have been clobbered by writes to other locations
2081       // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`.
2082       if (current_block->IsLoopHeader() &&
2083           predecessor != current_block->GetLoopInformation()->GetPreHeader() &&
2084           heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) {
2085         for (size_t i = 0, size = heap_values.size(); i != size; ++i) {
2086           if (i != idx &&
2087               !heap_values[i].stored_by.IsUnknown() &&
2088               MayAliasOnBackEdge(current_block, idx, i)) {
2089             // We cannot create a Phi for this loop Phi placeholder.
2090             return current_phi_placeholder;
2091           }
2092         }
2093       }
2094       if (value.NeedsLoopPhi()) {
2095         // Visit the predecessor Phi placeholder if it's not visited yet.
2096         if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
2097           phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
2098           work_queue.push_back(value.GetPhiPlaceholder());
2099           LSE_VLOG << "For materialization of " << phi_placeholder
2100                    << " we need to materialize " << value;
2101         }
2102       }
2103     }
2104   }
2105 
2106   // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
2107   return std::nullopt;
2108 }
2109 
MaterializeLoopPhis(const ScopedArenaVector<size_t> & phi_placeholder_indexes,DataType::Type type)2110 bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
2111                                      DataType::Type type) {
2112   return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
2113 }
2114 
MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,DataType::Type type)2115 bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
2116                                      DataType::Type type) {
2117   // Materialize all predecessors that do not need a loop Phi and determine if all inputs
2118   // other than loop Phis are the same.
2119   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2120   std::optional<Value> other_value = std::nullopt;
2121   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2122     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2123     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2124     DCHECK_GE(block->GetPredecessors().size(), 2u);
2125     size_t idx = phi_placeholder.GetHeapLocation();
2126     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2127       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2128       if (value.NeedsNonLoopPhi()) {
2129         DCHECK(current_phase_ == Phase::kLoadElimination) << current_phase_;
2130         MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
2131         value = Replacement(value);
2132       }
2133       if (!value.NeedsLoopPhi()) {
2134         if (!other_value) {
2135           // The first other value we found.
2136           other_value = value;
2137         } else if (!other_value->IsInvalid()) {
2138           // Check if the current `value` differs from the previous `other_value`.
2139           if (!value.Equals(*other_value)) {
2140             other_value = Value::Invalid();
2141           }
2142         }
2143       }
2144     }
2145   }
2146 
2147   DCHECK(other_value.has_value());
2148   if (!other_value->IsInvalid()) {
2149     HInstruction* replacement =
2150         (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction();
2151     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2152       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2153     }
2154     return true;
2155   }
2156 
2157   // If we're materializing only a single Phi, try to match it with an existing Phi.
2158   // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
2159   // This also covers the case when after replacing a previous set of Phi placeholders,
2160   // we continue with a Phi placeholder that does not really need a loop Phi anymore.
2161   if (phi_placeholder_indexes.size() == 1u) {
2162     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]);
2163     size_t idx = phi_placeholder.GetHeapLocation();
2164     HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()];
2165     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
2166     for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
2167       HInstruction* phi = phi_it.Current();
2168       DCHECK_EQ(phi->InputCount(), predecessors.size());
2169       ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
2170       auto cmp = [=](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
2171         Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
2172         if (value.NeedsPhi()) {
2173           DCHECK(value.GetPhiPlaceholder() == phi_placeholder);
2174           return lhs.GetInstruction() == phi;
2175         } else {
2176           DCHECK(value.IsDefault() || value.IsInstruction());
2177           return value.Equals(lhs.GetInstruction());
2178         }
2179       };
2180       if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
2181         phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
2182         return true;
2183       }
2184     }
2185   }
2186 
2187   if (current_phase_ == Phase::kStoreElimination) {
2188     // We're not creating Phis during the final store elimination phase.
2189     return false;
2190   }
2191 
2192   // There are different inputs to the Phi chain. Create the Phis.
2193   ArenaAllocator* allocator = GetGraph()->GetAllocator();
2194   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2195     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2196     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2197     CHECK_GE(block->GetPredecessors().size(), 2u);
2198     phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
2199         new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
2200   }
2201   // Fill the Phi inputs.
2202   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2203     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2204     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2205     size_t idx = phi_placeholder.GetHeapLocation();
2206     HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
2207     DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType()))
2208         << "type=" << type << " vs phi-type=" << phi->GetType();
2209     for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
2210       HBasicBlock* predecessor = block->GetPredecessors()[i];
2211       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2212       HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
2213       DCHECK_NE(input->GetType(), DataType::Type::kVoid);
2214       phi->SetRawInputAt(i, input);
2215       DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType()))
2216           << " input: " << input->GetType() << value << " phi: " << phi->GetType()
2217           << " request: " << type;
2218     }
2219   }
2220   // Add the Phis to their blocks.
2221   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2222     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2223     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2224     block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
2225   }
2226   if (type == DataType::Type::kReference) {
2227     ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
2228     ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
2229     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2230       phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
2231     }
2232     // Update reference type information. Pass invalid handles, these are not used for Phis.
2233     ReferenceTypePropagation rtp_fixup(GetGraph(),
2234                                        Handle<mirror::DexCache>(),
2235                                        /* is_first_run= */ false);
2236     rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
2237   }
2238 
2239   return true;
2240 }
2241 
MaterializeLoopPhis(const ArenaBitVector & phi_placeholders_to_materialize,DataType::Type type)2242 bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
2243                                      DataType::Type type) {
2244   // Use local allocator to reduce peak memory usage.
2245   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2246 
2247   // We want to recognize when a subset of these loop Phis that do not need other
2248   // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
2249   // i.e. that instruction can be used instead of each Phi in the set. See for example
2250   // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
2251   // materialize these loop Phis from the smallest transitive closure.
2252 
2253   // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
2254   // assign new indexes to the Phi placeholders, making the matrix dense.
2255   ScopedArenaVector<size_t> matrix_indexes(num_phi_placeholders_,
2256                                            static_cast<size_t>(-1),  // Invalid.
2257                                            allocator.Adapter(kArenaAllocLSE));
2258   ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
2259   size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
2260   phi_placeholder_indexes.reserve(num_phi_placeholders);
2261   for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
2262     matrix_indexes[marker_index] = phi_placeholder_indexes.size();
2263     phi_placeholder_indexes.push_back(marker_index);
2264   }
2265   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2266   ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
2267   dependencies.reserve(num_phi_placeholders);
2268   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2269     static constexpr bool kExpandable = false;
2270     dependencies.push_back(
2271         ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
2272     ArenaBitVector* current_dependencies = dependencies.back();
2273     current_dependencies->SetBit(matrix_index);  // Count the Phi placeholder as its own dependency.
2274     PhiPlaceholder current_phi_placeholder =
2275         GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]);
2276     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2277     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2278     size_t idx = current_phi_placeholder.GetHeapLocation();
2279     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2280       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2281       if (pred_value.NeedsLoopPhi()) {
2282         size_t pred_value_index = PhiPlaceholderIndex(pred_value);
2283         DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
2284         DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
2285         current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
2286       }
2287     }
2288   }
2289 
2290   // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
2291   for (size_t k = 0; k != num_phi_placeholders; ++k) {
2292     for (size_t i = 0; i != num_phi_placeholders; ++i) {
2293       for (size_t j = 0; j != num_phi_placeholders; ++j) {
2294         if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
2295           dependencies[i]->SetBit(j);
2296         }
2297       }
2298     }
2299   }
2300 
2301   // Count the number of transitive dependencies for each replaceable Phi placeholder.
2302   ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
2303   num_dependencies.reserve(num_phi_placeholders);
2304   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2305     num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
2306   }
2307 
2308   // Pick a Phi placeholder with the smallest number of transitive dependencies and
2309   // materialize it and its dependencies. Repeat until we have materialized all.
2310   ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
2311   current_subset.reserve(num_phi_placeholders);
2312   size_t remaining_phi_placeholders = num_phi_placeholders;
2313   while (remaining_phi_placeholders != 0u) {
2314     auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
2315     DCHECK_LE(*it, remaining_phi_placeholders);
2316     size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
2317     ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
2318     size_t current_num_dependencies = num_dependencies[current_matrix_index];
2319     current_subset.clear();
2320     for (uint32_t matrix_index : current_dependencies->Indexes()) {
2321       current_subset.push_back(phi_placeholder_indexes[matrix_index]);
2322     }
2323     if (!MaterializeLoopPhis(current_subset, type)) {
2324       DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2325       // This is the final store elimination phase and we shall not be able to eliminate any
2326       // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
2327       for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2328         if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2329           DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
2330           phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] =
2331               Value::PureUnknown();
2332         }
2333       }
2334       return false;
2335     }
2336     for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2337       if (current_dependencies->IsBitSet(matrix_index)) {
2338         // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
2339         // so that they shall never be the minimum again.
2340         num_dependencies[matrix_index] = num_phi_placeholders;
2341       } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2342         // Remove dependencies from other Phi placeholders.
2343         dependencies[matrix_index]->Subtract(current_dependencies);
2344         num_dependencies[matrix_index] -= current_num_dependencies;
2345       }
2346     }
2347     remaining_phi_placeholders -= current_num_dependencies;
2348   }
2349   return true;
2350 }
2351 
FullyMaterializePhi(PhiPlaceholder phi_placeholder,DataType::Type type)2352 bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
2353   ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
2354   ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
2355   auto res =
2356       FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
2357   CHECK(!res.has_value()) << *res;
2358   return MaterializeLoopPhis(abv, type);
2359 }
2360 
TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,HInstruction * load)2361 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
2362     PhiPlaceholder phi_placeholder, HInstruction* load) {
2363   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2364 
2365   // Use local allocator to reduce peak memory usage.
2366   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2367 
2368   // Find Phi placeholders to materialize.
2369   ArenaBitVector phi_placeholders_to_materialize(
2370       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2371   DataType::Type type = load->GetType();
2372   bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
2373   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2374       phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
2375   if (loop_phi_with_unknown_input) {
2376     DCHECK_GE(GetGraph()
2377                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2378                   ->GetPredecessors()
2379                   .size(),
2380               2u);
2381     return loop_phi_with_unknown_input;  // Return failure.
2382   }
2383 
2384   DCHECK_EQ(current_phase_, Phase::kLoadElimination);
2385   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2386   DCHECK(success);
2387 
2388   // Report success.
2389   return std::nullopt;
2390 }
2391 
2392 // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
2393 // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
2394 // propagate the load(s) as the new value(s) to successors; this may uncover new elimination
2395 // opportunities. If we find no such load, we shall at least propagate an unknown value to some
2396 // heap location that is needed by another loop Phi placeholder.
ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input)2397 void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) {
2398   DCHECK(!loads_requiring_loop_phi_.empty());
2399   size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2400   DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
2401   phi_placeholder_replacements_[loop_phi_with_unknown_input_index] =
2402       Value::MergedUnknown(loop_phi_with_unknown_input);
2403 
2404   uint32_t block_id = loop_phi_with_unknown_input.GetBlockId();
2405   const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2406   size_t reverse_post_order_index = 0;
2407   size_t reverse_post_order_size = reverse_post_order.size();
2408   size_t loads_and_stores_index = 0u;
2409   size_t loads_and_stores_size = loads_and_stores_.size();
2410 
2411   // Skip blocks and instructions before the block containing the loop phi with unknown input.
2412   DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2413   while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2414     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2415     while (loads_and_stores_index != loads_and_stores_size &&
2416            loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2417       ++loads_and_stores_index;
2418     }
2419     ++reverse_post_order_index;
2420     DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2421   }
2422 
2423   // Use local allocator to reduce peak memory usage.
2424   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2425   // Reuse one temporary vector for all remaining blocks.
2426   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2427   ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
2428 
2429   auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2430     Value value;
2431     if (block->IsLoopHeader()) {
2432       if (block->GetLoopInformation()->IsIrreducible()) {
2433         PhiPlaceholder placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
2434         value = Value::MergedUnknown(placeholder);
2435       } else {
2436         value = PrepareLoopValue(block, idx);
2437       }
2438     } else {
2439       value = MergePredecessorValues(block, idx);
2440     }
2441     DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2442     return value;
2443   };
2444 
2445   // Process remaining blocks and instructions.
2446   bool found_unreplaceable_load = false;
2447   bool replaced_heap_value_with_unknown = false;
2448   for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2449     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2450     if (block->IsExitBlock()) {
2451       continue;
2452     }
2453 
2454     // We shall reconstruct only the heap values that we need for processing loads and stores.
2455     local_heap_values.clear();
2456     local_heap_values.resize(num_heap_locations, Value::Invalid());
2457 
2458     for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2459       HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2460       size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2461       if (load_or_store->GetBlock() != block) {
2462         break;  // End of instructions from the current block.
2463       }
2464       if (IsStore(load_or_store)) {
2465         StoreRecord* store_record = store_records_[load_or_store->GetId()];
2466         DCHECK(store_record != nullptr);
2467         HInstruction* stored_value = store_record->stored_value;
2468         DCHECK(stored_value != nullptr);
2469         // Note that the `stored_value` can be a newly created `Phi` with an id that falls
2470         // outside the allocated `loads_requiring_loop_phi_` range.
2471         DCHECK_IMPLIES(
2472             IsLoad(stored_value),
2473             static_cast<size_t>(stored_value->GetId()) < loads_requiring_loop_phi_.size());
2474         if (static_cast<size_t>(stored_value->GetId()) >= loads_requiring_loop_phi_.size() ||
2475             loads_requiring_loop_phi_[stored_value->GetId()] == nullptr) {
2476           continue;  // This store never needed a loop Phi.
2477         }
2478         ValueRecord* record = loads_requiring_loop_phi_[stored_value->GetId()];
2479         // Process the store by updating `local_heap_values[idx]`. The last update shall
2480         // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2481         // at the end of the block.
2482         Value replacement = ReplacementOrValue(record->value);
2483         if (replacement.NeedsLoopPhi()) {
2484           // No replacement yet, use the Phi placeholder from the load.
2485           DCHECK(record->value.NeedsLoopPhi());
2486           local_heap_values[idx] = record->value;
2487         } else {
2488           // If the load fetched a known value, use it, otherwise use the load.
2489           local_heap_values[idx] = Value::ForInstruction(
2490               replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2491         }
2492       } else {
2493         // Process the load unless it has previously been marked unreplaceable.
2494         DCHECK(IsLoad(load_or_store));
2495         ValueRecord* record = loads_requiring_loop_phi_[load_or_store->GetId()];
2496         if (record == nullptr) {
2497           continue;  // This load never needed a loop Phi.
2498         }
2499         if (record->value.NeedsLoopPhi()) {
2500           if (local_heap_values[idx].IsInvalid()) {
2501             local_heap_values[idx] = get_initial_value(block, idx);
2502           }
2503           if (local_heap_values[idx].IsUnknown()) {
2504             // This load cannot be replaced. Keep stores that feed the Phi placeholder
2505             // (no aliasing since then, otherwise the Phi placeholder would not have been
2506             // propagated as a value to this load) and store the load as the new heap value.
2507             found_unreplaceable_load = true;
2508             KeepStores(record->value);
2509             record->value = Value::MergedUnknown(record->value.GetPhiPlaceholder());
2510             local_heap_values[idx] = Value::ForInstruction(load_or_store);
2511           } else if (local_heap_values[idx].NeedsLoopPhi()) {
2512             // The load may still be replaced with a Phi later.
2513             DCHECK(local_heap_values[idx].Equals(record->value));
2514           } else {
2515             // This load can be eliminated but we may need to construct non-loop Phis.
2516             if (local_heap_values[idx].NeedsNonLoopPhi()) {
2517               MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2518                                      load_or_store->GetType());
2519               local_heap_values[idx] = Replacement(local_heap_values[idx]);
2520             }
2521             record->value = local_heap_values[idx];
2522             DCHECK(local_heap_values[idx].IsDefault() || local_heap_values[idx].IsInstruction())
2523                 << "The replacement heap value can be an HIR instruction or the default value.";
2524             HInstruction* heap_value = local_heap_values[idx].IsDefault() ?
2525                                            GetDefaultValue(load_or_store->GetType()) :
2526                                            local_heap_values[idx].GetInstruction();
2527             AddRemovedLoad(load_or_store, heap_value);
2528           }
2529         }
2530       }
2531     }
2532 
2533     // All heap values that previously needed a loop Phi at the end of the block
2534     // need to be updated for processing successors.
2535     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2536     for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2537       if (heap_values[idx].value.NeedsLoopPhi()) {
2538         if (local_heap_values[idx].IsValid()) {
2539           heap_values[idx].value = local_heap_values[idx];
2540         } else {
2541           heap_values[idx].value = get_initial_value(block, idx);
2542         }
2543         if (heap_values[idx].value.IsUnknown()) {
2544           replaced_heap_value_with_unknown = true;
2545         }
2546       }
2547     }
2548   }
2549   DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2550 }
2551 
ProcessLoadsRequiringLoopPhis()2552 void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2553   // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2554   // make the result of the processing depend on the order in which we process these loads.
2555   // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2556   // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2557   if (loads_requiring_loop_phi_.empty()) {
2558     return;  // No loads to process.
2559   }
2560   for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2561     ValueRecord* record = loads_requiring_loop_phi_[load_store_record.load_or_store->GetId()];
2562     if (record == nullptr) {
2563       continue;
2564     }
2565     HInstruction* load = load_store_record.load_or_store;
2566     while (record->value.NeedsLoopPhi() &&
2567            phi_placeholder_replacements_[PhiPlaceholderIndex(record->value)].IsInvalid()) {
2568       std::optional<PhiPlaceholder> loop_phi_with_unknown_input =
2569           TryToMaterializeLoopPhis(record->value.GetPhiPlaceholder(), load);
2570       DCHECK_EQ(loop_phi_with_unknown_input.has_value(),
2571                 phi_placeholder_replacements_[PhiPlaceholderIndex(record->value)].IsInvalid());
2572       if (loop_phi_with_unknown_input) {
2573         DCHECK_GE(GetGraph()
2574                       ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2575                       ->GetPredecessors()
2576                       .size(),
2577                   2u);
2578         ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input);
2579       }
2580     }
2581     // The load could have been marked as unreplaceable (and stores marked for keeping)
2582     // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput().
2583     DCHECK(record->value.IsUnknown() ||
2584            record->value.IsInstruction() ||
2585            record->value.NeedsLoopPhi());
2586     if (record->value.NeedsLoopPhi()) {
2587       record->value = Replacement(record->value);
2588       HInstruction* heap_value = record->value.GetInstruction();
2589       AddRemovedLoad(load, heap_value);
2590     }
2591   }
2592 }
2593 
SearchPhiPlaceholdersForKeptStores()2594 void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2595   ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2596   size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2597   work_queue.reserve(((start_size * 3u) + 1u) / 2u);  // Reserve 1.5x start size, rounded up.
2598   for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2599     work_queue.push_back(index);
2600   }
2601   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2602   while (!work_queue.empty()) {
2603     uint32_t cur_phi_idx = work_queue.back();
2604     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
2605     work_queue.pop_back();
2606     size_t idx = phi_placeholder.GetHeapLocation();
2607     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2608     DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder
2609                              << " (blocks: " << blocks.size() << ")";
2610     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2611       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2612       // For loop back-edges we must also preserve all stores to locations that
2613       // may alias with the location `idx`.
2614       // TODO: Add tests cases around this.
2615       bool is_back_edge =
2616           block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2617       size_t start = is_back_edge ? 0u : idx;
2618       size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2619       for (size_t i = start; i != end; ++i) {
2620         Value stored_by = heap_values[i].stored_by;
2621         if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
2622           if (stored_by.NeedsPhi()) {
2623             size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2624             if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2625               phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2626               work_queue.push_back(phi_placeholder_index);
2627             }
2628           } else {
2629             DCHECK(IsStore(stored_by.GetInstruction()));
2630             ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2631             DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2632                                   << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2633                                   << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2634             kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2635           }
2636         }
2637       }
2638     }
2639   }
2640 }
2641 
UpdateValueRecordForStoreElimination(ValueRecord * value_record)2642 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2643   while (value_record->stored_by.IsInstruction() &&
2644          !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2645     StoreRecord* store_record = store_records_[value_record->stored_by.GetInstruction()->GetId()];
2646     DCHECK(store_record != nullptr);
2647     *value_record = store_record->old_value_record;
2648   }
2649   if (value_record->stored_by.NeedsPhi() &&
2650       !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2651            PhiPlaceholderIndex(value_record->stored_by))) {
2652     // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2653     // Phi placeholder to recalculate the actual value.
2654     value_record->value = value_record->stored_by;
2655   }
2656   value_record->value = ReplacementOrValue(value_record->value);
2657   if (value_record->value.NeedsNonLoopPhi()) {
2658     // Treat all Phi placeholders as requiring loop Phis at this point.
2659     // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2660     value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2661   }
2662 }
2663 
FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,DataType::Type type)2664 void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,
2665                                                DataType::Type type) {
2666   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2667 
2668   // Use local allocator to reduce peak memory usage.
2669   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2670   ArenaBitVector visited(&allocator,
2671                          /*start_bits=*/ num_phi_placeholders_,
2672                          /*expandable=*/ false,
2673                          kArenaAllocLSE);
2674 
2675   // Find Phi placeholders to try and match against existing Phis or other replacement values.
2676   ArenaBitVector phi_placeholders_to_materialize(
2677       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2678   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2679       phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true);
2680   if (loop_phi_with_unknown_input) {
2681     DCHECK_GE(GetGraph()
2682                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2683                   ->GetPredecessors()
2684                   .size(),
2685               2u);
2686     // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2687     phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::PureUnknown();
2688     phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] =
2689         Value::PureUnknown();
2690     return;
2691   }
2692 
2693   DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2694   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2695   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2696   DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2697             !success);
2698 }
2699 
2700 struct ScopedRestoreHeapValues {
2701  public:
ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2702   ScopedRestoreHeapValues(ArenaStack* alloc,
2703                           size_t num_heap_locs,
2704                           ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore)
2705       : alloc_(alloc),
2706         updated_values_(alloc_.Adapter(kArenaAllocLSE)),
2707         to_restore_(to_restore) {
2708     updated_values_.reserve(num_heap_locs * to_restore_.size());
2709   }
2710 
~ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2711   ~ScopedRestoreHeapValues() {
2712     for (const auto& rec : updated_values_) {
2713       to_restore_[rec.blk_id][rec.heap_loc].value = rec.val_;
2714     }
2715   }
2716 
2717   template<typename Func>
ForEachRecordart::ScopedRestoreHeapValues2718   void ForEachRecord(Func&& func) {
2719     for (size_t blk_id : Range(to_restore_.size())) {
2720       for (size_t heap_loc : Range(to_restore_[blk_id].size())) {
2721         LSEVisitor::ValueRecord* vr = &to_restore_[blk_id][heap_loc];
2722         LSEVisitor::Value initial = vr->value;
2723         func(vr);
2724         if (!vr->value.ExactEquals(initial)) {
2725           updated_values_.push_back({blk_id, heap_loc, initial});
2726         }
2727       }
2728     }
2729   }
2730 
2731  private:
2732   struct UpdateRecord {
2733     size_t blk_id;
2734     size_t heap_loc;
2735     LSEVisitor::Value val_;
2736   };
2737   ScopedArenaAllocator alloc_;
2738   ScopedArenaVector<UpdateRecord> updated_values_;
2739   ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore_;
2740 
2741   DISALLOW_COPY_AND_ASSIGN(ScopedRestoreHeapValues);
2742 };
2743 
FindStoresWritingOldValues()2744 void LSEVisitor::FindStoresWritingOldValues() {
2745   // Partial LSE relies on knowing the real heap-values not the
2746   // store-replacement versions so we need to restore the map after removing
2747   // stores.
2748   ScopedRestoreHeapValues heap_vals(allocator_.GetArenaStack(),
2749                                     heap_location_collector_.GetNumberOfHeapLocations(),
2750                                     heap_values_for_);
2751   // The Phi placeholder replacements have so far been used for eliminating loads,
2752   // tracking values that would be stored if all stores were kept. As we want to
2753   // compare actual old values after removing unmarked stores, prune the Phi
2754   // placeholder replacements that can be fed by values we may not actually store.
2755   // Replacements marked as unknown can be kept as they are fed by some unknown
2756   // value and would end up as unknown again if we recalculated them.
2757   for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2758     if (!phi_placeholder_replacements_[i].IsUnknown() &&
2759         !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2760       phi_placeholder_replacements_[i] = Value::Invalid();
2761     }
2762   }
2763 
2764   // Update heap values at end of blocks.
2765   heap_vals.ForEachRecord([&](ValueRecord* rec) {
2766     UpdateValueRecordForStoreElimination(rec);
2767   });
2768 
2769   if (kIsDebugBuild) {
2770     heap_vals.ForEachRecord([](ValueRecord* rec) {
2771       DCHECK(!rec->value.NeedsNonLoopPhi()) << rec->value;
2772     });
2773   }
2774 
2775   // Use local allocator to reduce peak memory usage.
2776   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2777   // Mark the stores we want to eliminate in a separate bit vector.
2778   ArenaBitVector eliminated_stores(&allocator,
2779                                    /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2780                                    /*expandable=*/ false,
2781                                    kArenaAllocLSE);
2782 
2783   for (uint32_t store_id : kept_stores_.Indexes()) {
2784     DCHECK(kept_stores_.IsBitSet(store_id));
2785     StoreRecord* store_record = store_records_[store_id];
2786     DCHECK(store_record != nullptr);
2787     UpdateValueRecordForStoreElimination(&store_record->old_value_record);
2788     if (store_record->old_value_record.value.NeedsPhi()) {
2789       DataType::Type type = store_record->stored_value->GetType();
2790       FindOldValueForPhiPlaceholder(store_record->old_value_record.value.GetPhiPlaceholder(), type);
2791       store_record->old_value_record.value =
2792           ReplacementOrValue(store_record->old_value_record.value);
2793     }
2794     DCHECK(!store_record->old_value_record.value.NeedsPhi());
2795     HInstruction* stored_value = FindSubstitute(store_record->stored_value);
2796     if (store_record->old_value_record.value.Equals(stored_value)) {
2797       eliminated_stores.SetBit(store_id);
2798     }
2799   }
2800 
2801   // Commit the stores to eliminate by removing them from `kept_stores_`.
2802   kept_stores_.Subtract(&eliminated_stores);
2803 }
2804 
Run()2805 void LSEVisitor::Run() {
2806   // 0. Set HasMonitorOperations to false. If we encounter some MonitorOperations that we can't
2807   // remove, we will set it to true in VisitMonitorOperation.
2808   GetGraph()->SetHasMonitorOperations(false);
2809 
2810   // 1. Process blocks and instructions in reverse post order.
2811   for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2812     VisitBasicBlock(block);
2813   }
2814 
2815   // 2. Process loads that require loop Phis, trying to find/create replacements.
2816   current_phase_ = Phase::kLoadElimination;
2817   ProcessLoadsRequiringLoopPhis();
2818 
2819   // 3. Determine which stores to keep and which to eliminate.
2820   current_phase_ = Phase::kStoreElimination;
2821   // Finish marking stores for keeping.
2822   SearchPhiPlaceholdersForKeptStores();
2823 
2824   // Find stores that write the same value as is already present in the location.
2825   FindStoresWritingOldValues();
2826 
2827   // 4. Replace loads and remove unnecessary stores and singleton allocations.
2828   FinishFullLSE();
2829 }
2830 
2831 
FinishFullLSE()2832 void LSEVisitor::FinishFullLSE() {
2833   // Remove recorded load instructions that should be eliminated.
2834   for (const LoadStoreRecord& record : loads_and_stores_) {
2835     size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
2836     HInstruction* substitute = substitute_instructions_for_loads_[id];
2837     if (substitute == nullptr) {
2838       continue;
2839     }
2840     HInstruction* load = record.load_or_store;
2841     DCHECK(load != nullptr);
2842     DCHECK(IsLoad(load));
2843     DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
2844     // We proactively retrieve the substitute for a removed load, so
2845     // a load that has a substitute should not be observed as a heap
2846     // location value.
2847     DCHECK_EQ(FindSubstitute(substitute), substitute);
2848 
2849     load->ReplaceWith(substitute);
2850     load->GetBlock()->RemoveInstruction(load);
2851     if ((load->IsInstanceFieldGet() && load->AsInstanceFieldGet()->IsVolatile()) ||
2852         (load->IsStaticFieldGet() && load->AsStaticFieldGet()->IsVolatile())) {
2853       MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileLoad);
2854     }
2855   }
2856 
2857   // Remove all the stores we can.
2858   for (const LoadStoreRecord& record : loads_and_stores_) {
2859     if (IsStore(record.load_or_store) && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
2860       record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
2861       if ((record.load_or_store->IsInstanceFieldSet() &&
2862            record.load_or_store->AsInstanceFieldSet()->IsVolatile()) ||
2863           (record.load_or_store->IsStaticFieldSet() &&
2864            record.load_or_store->AsStaticFieldSet()->IsVolatile())) {
2865         MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileStore);
2866       }
2867     }
2868   }
2869 
2870   // Eliminate singleton-classified instructions:
2871   //   * - Constructor fences (they never escape this thread).
2872   //   * - Allocations (if they are unused).
2873   for (HInstruction* new_instance : singleton_new_instances_) {
2874     size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
2875     MaybeRecordStat(stats_,
2876                     MethodCompilationStat::kConstructorFenceRemovedLSE,
2877                     removed);
2878 
2879     if (!new_instance->HasNonEnvironmentUses()) {
2880       new_instance->RemoveEnvironmentUsers();
2881       new_instance->GetBlock()->RemoveInstruction(new_instance);
2882       MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
2883     }
2884   }
2885 }
2886 
2887 // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore
2888 // cannot be directly allocated with an arena allocator, so we need to wrap it.
2889 class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
2890  public:
LSEVisitorWrapper(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)2891   LSEVisitorWrapper(HGraph* graph,
2892                     const HeapLocationCollector& heap_location_collector,
2893                     OptimizingCompilerStats* stats)
2894       : lse_visitor_(graph, heap_location_collector, stats) {}
2895 
Run()2896   void Run() {
2897     lse_visitor_.Run();
2898   }
2899 
2900  private:
2901   LSEVisitor lse_visitor_;
2902 };
2903 
Run()2904 bool LoadStoreElimination::Run() {
2905   if (graph_->IsDebuggable()) {
2906     // Debugger may set heap values or trigger deoptimization of callers.
2907     // Skip this optimization.
2908     return false;
2909   }
2910   ScopedArenaAllocator allocator(graph_->GetArenaStack());
2911   LoadStoreAnalysis lsa(graph_, stats_, &allocator);
2912   lsa.Run();
2913   const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
2914   if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
2915     // No HeapLocation information from LSA, skip this optimization.
2916     return false;
2917   }
2918 
2919   // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
2920   // memory operations with different predicates.
2921   // TODO: support predicated SIMD.
2922   if (graph_->HasPredicatedSIMD()) {
2923     return false;
2924   }
2925 
2926   std::unique_ptr<LSEVisitorWrapper> lse_visitor(
2927       new (&allocator) LSEVisitorWrapper(graph_, heap_location_collector, stats_));
2928   lse_visitor->Run();
2929   return true;
2930 }
2931 
2932 #undef LSE_VLOG
2933 
2934 }  // namespace art
2935