1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "load_store_elimination.h"
18 
19 #include <algorithm>
20 #include <optional>
21 #include <sstream>
22 #include <variant>
23 
24 #include "base/arena_allocator.h"
25 #include "base/arena_bit_vector.h"
26 #include "base/array_ref.h"
27 #include "base/bit_vector-inl.h"
28 #include "base/bit_vector.h"
29 #include "base/globals.h"
30 #include "base/indenter.h"
31 #include "base/iteration_range.h"
32 #include "base/scoped_arena_allocator.h"
33 #include "base/scoped_arena_containers.h"
34 #include "base/transform_iterator.h"
35 #include "escape.h"
36 #include "execution_subgraph.h"
37 #include "handle.h"
38 #include "load_store_analysis.h"
39 #include "mirror/class_loader.h"
40 #include "mirror/dex_cache.h"
41 #include "nodes.h"
42 #include "optimizing/execution_subgraph.h"
43 #include "optimizing_compiler_stats.h"
44 #include "reference_type_propagation.h"
45 #include "side_effects_analysis.h"
46 #include "stack_map.h"
47 
48 /**
49  * The general algorithm of load-store elimination (LSE).
50  *
51  * We use load-store analysis to collect a list of heap locations and perform
52  * alias analysis of those heap locations. LSE then keeps track of a list of
53  * heap values corresponding to the heap locations and stores that put those
54  * values in these locations.
55  *  - In phase 1, we visit basic blocks in reverse post order and for each basic
56  *    block, visit instructions sequentially, recording heap values and looking
57  *    for loads and stores to eliminate without relying on loop Phis.
58  *  - In phase 2, we look for loads that can be replaced by creating loop Phis
59  *    or using a loop-invariant value.
60  *  - In phase 3, we determine which stores are dead and can be eliminated and
61  *    based on that information we re-evaluate whether some kept stores are
62  *    storing the same value as the value in the heap location; such stores are
63  *    also marked for elimination.
64  *  - In phase 4, we commit the changes, replacing loads marked for elimination
65  *    in previous processing and removing stores not marked for keeping. We also
66  *    remove allocations that are no longer needed.
67  *  - In phase 5, we move allocations which only escape along some executions
68  *    closer to their escape points and fixup non-escaping paths with their actual
69  *    values, creating PHIs when needed.
70  *
71  * 1. Walk over blocks and their instructions.
72  *
73  * The initial set of heap values for a basic block is
74  *  - For a loop header of an irreducible loop, all heap values are unknown.
75  *  - For a loop header of a normal loop, all values unknown at the end of the
76  *    preheader are initialized to unknown, other heap values are set to Phi
77  *    placeholders as we cannot determine yet whether these values are known on
78  *    all back-edges. We use Phi placeholders also for array heap locations with
79  *    index defined inside the loop but this helps only when the value remains
80  *    zero from the array allocation throughout the loop.
81  *  - For other basic blocks, we merge incoming values from the end of all
82  *    predecessors. If any incoming value is unknown, the start value for this
83  *    block is also unknown. Otherwise, if all the incoming values are the same
84  *    (including the case of a single predecessor), the incoming value is used.
85  *    Otherwise, we use a Phi placeholder to indicate different incoming values.
86  *    We record whether such Phi placeholder depends on a loop Phi placeholder.
87  *
88  * For each instruction in the block
89  *  - If the instruction is a load from a heap location with a known value not
90  *    dependent on a loop Phi placeholder, the load can be eliminated, either by
91  *    using an existing instruction or by creating new Phi(s) instead. In order
92  *    to maintain the validity of all heap locations during the optimization
93  *    phase, we only record substitutes at this phase and the real elimination
94  *    is delayed till the end of LSE. Loads that require a loop Phi placeholder
95  *    replacement are recorded for processing later. We also keep track of the
96  *    heap-value at the start load so that later partial-LSE can predicate the
97  *    load.
98  *  - If the instruction is a store, it updates the heap value for the heap
99  *    location with the stored value and records the store itself so that we can
100  *    mark it for keeping if the value becomes observable. Heap values are
101  *    invalidated for heap locations that may alias with the store instruction's
102  *    heap location and their recorded stores are marked for keeping as they are
103  *    now potentially observable. The store instruction can be eliminated unless
104  *    the value stored is later needed e.g. by a load from the same/aliased heap
105  *    location or the heap location persists at method return/deoptimization.
106  *  - A store that stores the same value as the heap value is eliminated.
107  *  - For newly instantiated instances, their heap values are initialized to
108  *    language defined default values.
109  *  - Finalizable objects are considered as persisting at method
110  *    return/deoptimization.
111  *  - Some instructions such as invokes are treated as loading and invalidating
112  *    all the heap values, depending on the instruction's side effects.
113  *  - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
114  *    partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
115  *    alias and no load/store is eliminated in such case.
116  *  - Currently this LSE algorithm doesn't handle graph with try-catch, due to
117  *    the special block merging structure.
118  *
119  * The time complexity of the initial phase has several components. The total
120  * time for the initialization of heap values for all blocks is
121  *    O(heap_locations * edges)
122  * and the time complexity for simple instruction processing is
123  *    O(instructions).
124  * See the description of phase 3 for additional complexity due to matching of
125  * existing Phis for replacing loads.
126  *
127  * 2. Process loads that depend on loop Phi placeholders.
128  *
129  * We go over these loads to determine whether they can be eliminated. We look
130  * for the set of all Phi placeholders that feed the load and depend on a loop
131  * Phi placeholder and, if we find no unknown value, we construct the necessary
132  * Phi(s) or, if all other inputs are identical, i.e. the location does not
133  * change in the loop, just use that input. If we do find an unknown input, this
134  * must be from a loop back-edge and we replace the loop Phi placeholder with
135  * unknown value and re-process loads and stores that previously depended on
136  * loop Phi placeholders. This shall find at least one load of an unknown value
137  * which is now known to be unreplaceable or a new unknown value on a back-edge
138  * and we repeat this process until each load is either marked for replacement
139  * or found to be unreplaceable. As we mark at least one additional loop Phi
140  * placeholder as unreplacable in each iteration, this process shall terminate.
141  *
142  * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
143  * is limited by the number of Phi placeholders and their dependencies we need
144  * to search with worst-case time complexity
145  *    O(phi_placeholder_dependencies) .
146  * The dependencies are usually just the Phi placeholders' potential inputs,
147  * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
148  * replacement search, there are additional dependencies to consider, see below.
149  *
150  * In the successful case (no unknown inputs found) we use the Floyd-Warshall
151  * algorithm to determine transitive closures for each found Phi placeholder,
152  * and then match or materialize Phis from the smallest transitive closure,
153  * so that we can determine if such subset has a single other input. This has
154  * time complexity
155  *    O(phi_placeholders_found^3) .
156  * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
157  * contribute to this as such Phi placeholders are replaced immediately.
158  * The total time of all such successful cases has time complexity
159  *    O(phi_placeholders^3)
160  * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
161  * argument applies to the searches used to find all successful cases, so their
162  * total contribution is also just an insignificant
163  *    O(phi_placeholder_dependencies) .
164  * The materialization of Phis has an insignificant total time complexity
165  *    O(phi_placeholders * edges) .
166  *
167  * If we find an unknown input, we re-process heap values and loads with a time
168  * complexity that's the same as the phase 1 in the worst case. Adding this to
169  * the depth-first search time complexity yields
170  *    O(phi_placeholder_dependencies + heap_locations * edges + instructions)
171  * for a single iteration. We can ignore the middle term as it's proprotional
172  * to the number of Phi placeholder inputs included in the first term. Using
173  * the upper limit of number of such iterations, the total time complexity is
174  *    O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
175  *
176  * The upper bound of Phi placeholder inputs is
177  *    heap_locations * edges
178  * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
179  * include other heap locations in predecessor blocks with the upper bound of
180  *    heap_locations^2 * edges .
181  * Using the estimate
182  *    edges <= blocks^2
183  * and
184  *    phi_placeholders <= heap_locations * blocks ,
185  * the worst-case time complexity of the
186  *    O(phi_placeholder_dependencies * phi_placeholders)
187  * term from unknown input cases is actually
188  *    O(heap_locations^3 * blocks^3) ,
189  * exactly as the estimate for the Floyd-Warshall parts of successful cases.
190  * Adding the other term from the unknown input cases (to account for the case
191  * with significantly more instructions than blocks and heap locations), the
192  * phase 2 time complexity is
193  *    O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
194  *
195  * See the description of phase 3 for additional complexity due to matching of
196  * existing Phis for replacing loads.
197  *
198  * 3. Determine which stores to keep and which to eliminate.
199  *
200  * During instruction processing in phase 1 and re-processing in phase 2, we are
201  * keeping a record of the stores and Phi placeholders that become observable
202  * and now propagate the observable Phi placeholders to all actual stores that
203  * feed them. Having determined observable stores, we look for stores that just
204  * overwrite the old value with the same. Since ignoring non-observable stores
205  * actually changes the old values in heap locations, we need to recalculate
206  * Phi placeholder replacements but we proceed similarly to the previous phase.
207  * We look for the set of all Phis that feed the old value replaced by the store
208  * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
209  * value, we try to match existing Phis (we do not create new Phis anymore) or,
210  * if all other inputs are identical, i.e. the location does not change in the
211  * loop, just use that input. If this succeeds and the old value is identical to
212  * the value we're storing, such store shall be eliminated.
213  *
214  * The work is similar to the phase 2, except that we're not re-processing loads
215  * and stores anymore, so the time complexity of phase 3 is
216  *    O(heap_locations^3 * blocks^3) .
217  *
218  * There is additional complexity in matching existing Phis shared between the
219  * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
220  * time (this could be difficult and slow), so each matching attempt is just
221  * looking at Phis in the block (both old Phis and newly created Phis) and their
222  * inputs. As we create at most `heap_locations` Phis in each block, the upper
223  * bound on the number of Phis we look at is
224  *    heap_locations * (old_phis + heap_locations)
225  * and the worst-case time complexity is
226  *    O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
227  * The first term is lower than one term in phase 2, so the relevant part is
228  *    O(heap_locations * old_phis * edges) .
229  *
230  * 4. Replace loads and remove unnecessary stores and singleton allocations.
231  *
232  * A special type of objects called singletons are instantiated in the method
233  * and have a single name, i.e. no aliases. Singletons have exclusive heap
234  * locations since they have no aliases. Singletons are helpful in narrowing
235  * down the life span of a heap location such that they do not always need to
236  * participate in merging heap values. Allocation of a singleton can be
237  * eliminated if that singleton is not used and does not persist at method
238  * return/deoptimization.
239  *
240  * The time complexity of this phase is
241  *    O(instructions + instruction_uses) .
242  *
243  * 5. Partial LSE
244  *
245  * Move allocations closer to their escapes and remove/predicate loads and
246  * stores as required.
247  *
248  * Partial singletons are objects which only escape from the function or have
249  * multiple names along certain execution paths. In cases where we recognize
250  * these partial singletons we can move the allocation and initialization
251  * closer to the actual escape(s). We can then perform a simplified version of
252  * LSE step 2 to determine the unescaped value of any reads performed after the
253  * object may have escaped. These are used to replace these reads with
254  * 'predicated-read' instructions where the value is only read if the object
255  * has actually escaped. We use the existence of the object itself as the
256  * marker of whether escape has occurred.
257  *
258  * There are several steps in this sub-pass
259  *
260  * 5.1 Group references
261  *
262  * Since all heap-locations for a single reference escape at the same time, we
263  * need to group the heap-locations by reference and process them at the same
264  * time.
265  *
266  *    O(heap_locations).
267  *
268  * FIXME: The time complexity above assumes we can bucket the heap-locations in
269  * O(1) which is not true since we just perform a linear-scan of the heap-ref
270  * list. Since there are generally only a small number of heap-references which
271  * are partial-singletons this is fine and lower real overhead than a hash map.
272  *
273  * 5.2 Generate materializations
274  *
275  * Once we have the references we add new 'materialization blocks' on the edges
276  * where escape becomes inevitable. This information is calculated by the
277  * execution-subgraphs created during load-store-analysis. We create new
278  * 'materialization's in these blocks and initialize them with the value of
279  * each heap-location ignoring side effects (since the object hasn't escaped
280  * yet). Worst case this is the same time-complexity as step 3 since we may
281  * need to materialize phis.
282  *
283  *    O(heap_locations^2 * materialization_edges)
284  *
285  * 5.3 Propagate materializations
286  *
287  * Since we use the materialization as the marker for escape we need to
288  * propagate it throughout the graph. Since the subgraph analysis considers any
289  * lifetime that escapes a loop (and hence would require a loop-phi) to be
290  * escaping at the loop-header we do not need to create any loop-phis to do
291  * this.
292  *
293  *    O(edges)
294  *
295  * NB: Currently the subgraph analysis considers all objects to have their
296  * lifetimes start at the entry block. This simplifies that analysis enormously
297  * but means that we cannot distinguish between an escape in a loop where the
298  * lifetime does not escape the loop (in which case this pass could optimize)
299  * and one where it does escape the loop (in which case the whole loop is
300  * escaping). This is a shortcoming that would be good to fix at some point.
301  *
302  * 5.4 Propagate partial values
303  *
304  * We need to replace loads and stores to the partial reference with predicated
305  * ones that have default non-escaping values. Again this is the same as step 3.
306  *
307  *   O(heap_locations^2 * edges)
308  *
309  * 5.5 Final fixup
310  *
311  * Now all we need to do is replace and remove uses of the old reference with the
312  * appropriate materialization.
313  *
314  *   O(instructions + uses)
315  *
316  * FIXME: The time complexities described above assumes that the
317  * HeapLocationCollector finds a heap location for an instruction in O(1)
318  * time but it is currently O(heap_locations); this can be fixed by adding
319  * a hash map to the HeapLocationCollector.
320  */
321 
322 namespace art {
323 
324 #define LSE_VLOG \
325   if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
326 
327 class PartialLoadStoreEliminationHelper;
328 class HeapRefHolder;
329 
330 // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
331 class LSEVisitor final : private HGraphDelegateVisitor {
332  public:
333   LSEVisitor(HGraph* graph,
334              const HeapLocationCollector& heap_location_collector,
335              bool perform_partial_lse,
336              OptimizingCompilerStats* stats);
337 
338   void Run();
339 
340  private:
341   class PhiPlaceholder {
342    public:
PhiPlaceholder()343     constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {}
PhiPlaceholder(uint32_t block_id,size_t heap_location)344     constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location)
345         : block_id_(block_id), heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
346 
347     constexpr PhiPlaceholder(const PhiPlaceholder& p) = default;
348     constexpr PhiPlaceholder(PhiPlaceholder&& p) = default;
349     constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default;
350     constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default;
351 
GetBlockId() const352     constexpr uint32_t GetBlockId() const {
353       return block_id_;
354     }
355 
GetHeapLocation() const356     constexpr size_t GetHeapLocation() const {
357       return heap_location_;
358     }
359 
Equals(const PhiPlaceholder & p2) const360     constexpr bool Equals(const PhiPlaceholder& p2) const {
361       return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_;
362     }
363 
Dump(std::ostream & oss) const364     void Dump(std::ostream& oss) const {
365       oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]";
366     }
367 
368    private:
369     uint32_t block_id_;
370     uint32_t heap_location_;
371   };
372 
373   struct Marker {};
374 
375   class Value;
376 
377   class PriorValueHolder {
378    public:
379     constexpr explicit PriorValueHolder(Value prior);
380 
IsInstruction() const381     constexpr bool IsInstruction() const {
382       return std::holds_alternative<HInstruction*>(value_);
383     }
IsPhi() const384     constexpr bool IsPhi() const {
385       return std::holds_alternative<PhiPlaceholder>(value_);
386     }
IsDefault() const387     constexpr bool IsDefault() const {
388       return std::holds_alternative<Marker>(value_);
389     }
GetPhiPlaceholder() const390     constexpr PhiPlaceholder GetPhiPlaceholder() const {
391       DCHECK(IsPhi());
392       return std::get<PhiPlaceholder>(value_);
393     }
GetInstruction() const394     constexpr HInstruction* GetInstruction() const {
395       DCHECK(IsInstruction());
396       return std::get<HInstruction*>(value_);
397     }
398 
399     Value ToValue() const;
400     void Dump(std::ostream& oss) const;
401 
Equals(PriorValueHolder other) const402     constexpr bool Equals(PriorValueHolder other) const {
403       return value_ == other.value_;
404     }
405 
406    private:
407     std::variant<Marker, HInstruction*, PhiPlaceholder> value_;
408   };
409 
410   friend constexpr bool operator==(const Marker&, const Marker&);
411   friend constexpr bool operator==(const PriorValueHolder& p1, const PriorValueHolder& p2);
412   friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
413   friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
414 
415   class Value {
416    public:
417     enum class ValuelessType {
418       kInvalid,
419       kPureUnknown,
420       kDefault,
421     };
422     struct MergedUnknownMarker {
423       PhiPlaceholder phi_;
424     };
425     struct NeedsNonLoopPhiMarker {
426       PhiPlaceholder phi_;
427     };
428     struct NeedsLoopPhiMarker {
429       PhiPlaceholder phi_;
430     };
431 
Invalid()432     static constexpr Value Invalid() {
433       return Value(ValuelessType::kInvalid);
434     }
435 
436     // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
437     // A heap location can be set to an unknown heap value when:
438     // - it is coming from outside the method,
439     // - it is killed due to aliasing, or side effects, or merging with an unknown value.
PureUnknown()440     static constexpr Value PureUnknown() {
441       return Value(ValuelessType::kPureUnknown);
442     }
443 
PartialUnknown(Value old_value)444     static constexpr Value PartialUnknown(Value old_value) {
445       if (old_value.IsInvalid() || old_value.IsPureUnknown()) {
446         return PureUnknown();
447       } else {
448         return Value(PriorValueHolder(old_value));
449       }
450     }
451 
MergedUnknown(PhiPlaceholder phi_placeholder)452     static constexpr Value MergedUnknown(PhiPlaceholder phi_placeholder) {
453       return Value(MergedUnknownMarker{phi_placeholder});
454     }
455 
456     // Default heap value after an allocation.
457     // A heap location can be set to that value right after an allocation.
Default()458     static constexpr Value Default() {
459       return Value(ValuelessType::kDefault);
460     }
461 
ForInstruction(HInstruction * instruction)462     static constexpr Value ForInstruction(HInstruction* instruction) {
463       return Value(instruction);
464     }
465 
ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)466     static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
467       return Value(NeedsNonLoopPhiMarker{phi_placeholder});
468     }
469 
ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)470     static constexpr Value ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
471       return Value(NeedsLoopPhiMarker{phi_placeholder});
472     }
473 
ForPhiPlaceholder(PhiPlaceholder phi_placeholder,bool needs_loop_phi)474     static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) {
475       return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder)
476                             : ForNonLoopPhiPlaceholder(phi_placeholder);
477     }
478 
IsValid() const479     constexpr bool IsValid() const {
480       return !IsInvalid();
481     }
482 
IsInvalid() const483     constexpr bool IsInvalid() const {
484       return std::holds_alternative<ValuelessType>(value_) &&
485              GetValuelessType() == ValuelessType::kInvalid;
486     }
487 
IsPartialUnknown() const488     bool IsPartialUnknown() const {
489       return std::holds_alternative<PriorValueHolder>(value_);
490     }
491 
IsMergedUnknown() const492     bool IsMergedUnknown() const {
493       return std::holds_alternative<MergedUnknownMarker>(value_);
494     }
495 
IsPureUnknown() const496     bool IsPureUnknown() const {
497       return std::holds_alternative<ValuelessType>(value_) &&
498              GetValuelessType() == ValuelessType::kPureUnknown;
499     }
500 
IsUnknown() const501     bool IsUnknown() const {
502       return IsPureUnknown() || IsMergedUnknown() || IsPartialUnknown();
503     }
504 
IsDefault() const505     bool IsDefault() const {
506       return std::holds_alternative<ValuelessType>(value_) &&
507              GetValuelessType() == ValuelessType::kDefault;
508     }
509 
IsInstruction() const510     bool IsInstruction() const {
511       return std::holds_alternative<HInstruction*>(value_);
512     }
513 
NeedsNonLoopPhi() const514     bool NeedsNonLoopPhi() const {
515       return std::holds_alternative<NeedsNonLoopPhiMarker>(value_);
516     }
517 
NeedsLoopPhi() const518     bool NeedsLoopPhi() const {
519       return std::holds_alternative<NeedsLoopPhiMarker>(value_);
520     }
521 
NeedsPhi() const522     bool NeedsPhi() const {
523       return NeedsNonLoopPhi() || NeedsLoopPhi();
524     }
525 
GetInstruction() const526     HInstruction* GetInstruction() const {
527       DCHECK(IsInstruction()) << *this;
528       return std::get<HInstruction*>(value_);
529     }
530 
GetPriorValue() const531     PriorValueHolder GetPriorValue() const {
532       DCHECK(IsPartialUnknown());
533       return std::get<PriorValueHolder>(value_);
534     }
535 
GetPhiPlaceholder() const536     PhiPlaceholder GetPhiPlaceholder() const {
537       DCHECK(NeedsPhi() || IsMergedUnknown());
538       if (NeedsNonLoopPhi()) {
539         return std::get<NeedsNonLoopPhiMarker>(value_).phi_;
540       } else if (NeedsLoopPhi()) {
541         return std::get<NeedsLoopPhiMarker>(value_).phi_;
542       } else {
543         return std::get<MergedUnknownMarker>(value_).phi_;
544       }
545     }
546 
GetMergeBlockId() const547     uint32_t GetMergeBlockId() const {
548       DCHECK(IsMergedUnknown()) << this;
549       return std::get<MergedUnknownMarker>(value_).phi_.GetBlockId();
550     }
551 
GetMergeBlock(const HGraph * graph) const552     HBasicBlock* GetMergeBlock(const HGraph* graph) const {
553       DCHECK(IsMergedUnknown()) << *this;
554       return graph->GetBlocks()[GetMergeBlockId()];
555     }
556 
GetHeapLocation() const557     size_t GetHeapLocation() const {
558       DCHECK(IsMergedUnknown() || NeedsPhi()) << this;
559       return GetPhiPlaceholder().GetHeapLocation();
560     }
561 
562     constexpr bool ExactEquals(Value other) const;
563 
564     constexpr bool Equals(Value other) const;
565 
Equals(HInstruction * instruction) const566     constexpr bool Equals(HInstruction* instruction) const {
567       return Equals(ForInstruction(instruction));
568     }
569 
570     std::ostream& Dump(std::ostream& os) const;
571 
572     // Public for use with lists.
Value()573     constexpr Value() : value_(ValuelessType::kInvalid) {}
574 
575    private:
576     using ValueHolder = std::variant<ValuelessType,
577                                      HInstruction*,
578                                      MergedUnknownMarker,
579                                      NeedsNonLoopPhiMarker,
580                                      NeedsLoopPhiMarker,
581                                      PriorValueHolder>;
GetValuelessType() const582     constexpr ValuelessType GetValuelessType() const {
583       return std::get<ValuelessType>(value_);
584     }
585 
Value(ValueHolder v)586     constexpr explicit Value(ValueHolder v) : value_(v) {}
587 
588     friend std::ostream& operator<<(std::ostream& os, const Value& v);
589 
590     ValueHolder value_;
591 
592     static_assert(std::is_move_assignable<PhiPlaceholder>::value);
593   };
594 
595   friend constexpr bool operator==(const Value::NeedsLoopPhiMarker& p1,
596                                    const Value::NeedsLoopPhiMarker& p2);
597   friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1,
598                                    const Value::NeedsNonLoopPhiMarker& p2);
599   friend constexpr bool operator==(const Value::MergedUnknownMarker& p1,
600                                    const Value::MergedUnknownMarker& p2);
601 
602   // Get Phi placeholder index for access to `phi_placeholder_replacements_`
603   // and "visited" bit vectors during depth-first searches.
PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const604   size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const {
605     size_t res =
606         phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() +
607         phi_placeholder.GetHeapLocation();
608     DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res))
609         << res << "blks: " << GetGraph()->GetBlocks().size()
610         << " hls: " << heap_location_collector_.GetNumberOfHeapLocations();
611     return res;
612   }
613 
PhiPlaceholderIndex(Value phi_placeholder) const614   size_t PhiPlaceholderIndex(Value phi_placeholder) const {
615     return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
616   }
617 
IsPartialNoEscape(HBasicBlock * blk,size_t idx)618   bool IsPartialNoEscape(HBasicBlock* blk, size_t idx) {
619     auto* ri = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
620     if (!ri->IsPartialSingleton()) {
621       return false;
622     }
623     ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
624         ri->GetNoEscapeSubgraph()->GetExcludedCohorts();
625     return std::none_of(cohorts.cbegin(),
626                         cohorts.cend(),
627                         [&](const ExecutionSubgraph::ExcludedCohort& ex) -> bool {
628                           // Make sure we haven't yet and never will escape.
629                           return ex.PrecedesBlock(blk) ||
630                                  ex.ContainsBlock(blk) ||
631                                  ex.SucceedsBlock(blk);
632                         });
633   }
634 
GetPhiPlaceholderAt(size_t off) const635   PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
636     DCHECK_LT(off, num_phi_placeholders_);
637     size_t id = off % heap_location_collector_.GetNumberOfHeapLocations();
638     // Technically this should be (off - id) / NumberOfHeapLocations
639     // but due to truncation it's all the same.
640     size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations();
641     return GetPhiPlaceholder(blk_id, id);
642   }
643 
GetPhiPlaceholder(uint32_t block_id,size_t idx) const644   PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
645     DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id;
646     return PhiPlaceholder(block_id, idx);
647   }
648 
Replacement(Value value) const649   Value Replacement(Value value) const {
650     DCHECK(value.NeedsPhi() ||
651            (current_phase_ == Phase::kPartialElimination && value.IsMergedUnknown()))
652         << value << " phase: " << current_phase_;
653     Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
654     DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
655     DCHECK(replacement.IsUnknown() ||
656            FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
657     return replacement;
658   }
659 
ReplacementOrValue(Value value) const660   Value ReplacementOrValue(Value value) const {
661     if (current_phase_ == Phase::kPartialElimination) {
662       // In this phase we are materializing the default values which are used
663       // only if the partial singleton did not escape, so we can replace
664       // a partial unknown with the prior value.
665       if (value.IsPartialUnknown()) {
666         value = value.GetPriorValue().ToValue();
667       }
668       if ((value.IsMergedUnknown() || value.NeedsPhi()) &&
669           phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
670         value = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
671         DCHECK(!value.IsMergedUnknown());
672         DCHECK(!value.NeedsPhi());
673       } else if (value.IsMergedUnknown()) {
674         return Value::ForLoopPhiPlaceholder(value.GetPhiPlaceholder());
675       }
676       if (value.IsInstruction() && value.GetInstruction()->IsInstanceFieldGet()) {
677         DCHECK_LT(static_cast<size_t>(value.GetInstruction()->GetId()),
678                   substitute_instructions_for_loads_.size());
679         HInstruction* substitute =
680             substitute_instructions_for_loads_[value.GetInstruction()->GetId()];
681         if (substitute != nullptr) {
682           DCHECK(substitute->IsPredicatedInstanceFieldGet());
683           return Value::ForInstruction(substitute);
684         }
685       }
686       DCHECK(!value.IsInstruction() ||
687              FindSubstitute(value.GetInstruction()) == value.GetInstruction());
688       return value;
689     }
690     if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
691       return Replacement(value);
692     } else {
693       DCHECK(!value.IsInstruction() ||
694              FindSubstitute(value.GetInstruction()) == value.GetInstruction());
695       return value;
696     }
697   }
698 
699   // The record of a heap value and instruction(s) that feed that value.
700   struct ValueRecord {
701     Value value;
702     Value stored_by;
703   };
704 
FindOrAddTypeConversionIfNecessary(HInstruction * instruction,HInstruction * value,DataType::Type expected_type)705   HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
706                                                       HInstruction* value,
707                                                       DataType::Type expected_type) {
708     // Should never add type conversion into boolean value.
709     if (expected_type == DataType::Type::kBool ||
710         DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
711         // TODO: This prevents type conversion of default values but we can still insert
712         // type conversion of other constants and there is no constant folding pass after LSE.
713         IsZeroBitPattern(value)) {
714       return nullptr;
715     }
716 
717     // Check if there is already a suitable TypeConversion we can reuse.
718     for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
719       if (use.GetUser()->IsTypeConversion() &&
720           use.GetUser()->GetType() == expected_type &&
721           // TODO: We could move the TypeConversion to a common dominator
722           // if it does not cross irreducible loop header.
723           use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
724           // Don't share across irreducible loop headers.
725           // TODO: can be more fine-grained than this by testing each dominator.
726           (use.GetUser()->GetBlock() == instruction->GetBlock() ||
727            !GetGraph()->HasIrreducibleLoops())) {
728         if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
729             use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
730           // Move the TypeConversion before the instruction.
731           use.GetUser()->MoveBefore(instruction);
732         }
733         DCHECK(use.GetUser()->StrictlyDominates(instruction));
734         return use.GetUser()->AsTypeConversion();
735       }
736     }
737 
738     // We must create a new TypeConversion instruction.
739     HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
740           expected_type, value, instruction->GetDexPc());
741     instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
742     return type_conversion;
743   }
744 
745   // Find an instruction's substitute if it's a removed load.
746   // Return the same instruction if it should not be removed.
FindSubstitute(HInstruction * instruction) const747   HInstruction* FindSubstitute(HInstruction* instruction) const {
748     size_t id = static_cast<size_t>(instruction->GetId());
749     if (id >= substitute_instructions_for_loads_.size()) {
750       // New Phi (may not be in the graph yet), default value or PredicatedInstanceFieldGet.
751       DCHECK(!IsLoad(instruction) || instruction->IsPredicatedInstanceFieldGet());
752       return instruction;
753     }
754     HInstruction* substitute = substitute_instructions_for_loads_[id];
755     DCHECK(substitute == nullptr || IsLoad(instruction));
756     return (substitute != nullptr) ? substitute : instruction;
757   }
758 
AddRemovedLoad(HInstruction * load,HInstruction * heap_value)759   void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
760     DCHECK(IsLoad(load));
761     DCHECK_EQ(FindSubstitute(load), load);
762     DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
763         "Unexpected heap_value that has a substitute " << heap_value->DebugName();
764 
765     // The load expects to load the heap value as type load->GetType().
766     // However the tracked heap value may not be of that type. An explicit
767     // type conversion may be needed.
768     // There are actually three types involved here:
769     // (1) tracked heap value's type (type A)
770     // (2) heap location (field or element)'s type (type B)
771     // (3) load's type (type C)
772     // We guarantee that type A stored as type B and then fetched out as
773     // type C is the same as casting from type A to type C directly, since
774     // type B and type C will have the same size which is guaranteed in
775     // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
776     // So we only need one type conversion from type A to type C.
777     HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
778         load, heap_value, load->GetType());
779 
780     substitute_instructions_for_loads_[load->GetId()] =
781         type_conversion != nullptr ? type_conversion : heap_value;
782   }
783 
IsLoad(HInstruction * instruction)784   static bool IsLoad(HInstruction* instruction) {
785     // Unresolved load is not treated as a load.
786     return instruction->IsInstanceFieldGet() ||
787            instruction->IsPredicatedInstanceFieldGet() ||
788            instruction->IsStaticFieldGet() ||
789            instruction->IsVecLoad() ||
790            instruction->IsArrayGet();
791   }
792 
IsStore(HInstruction * instruction)793   static bool IsStore(HInstruction* instruction) {
794     // Unresolved store is not treated as a store.
795     return instruction->IsInstanceFieldSet() ||
796            instruction->IsArraySet() ||
797            instruction->IsVecStore() ||
798            instruction->IsStaticFieldSet();
799   }
800 
801   // Check if it is allowed to use default values or Phis for the specified load.
IsDefaultOrPhiAllowedForLoad(HInstruction * instruction)802   static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
803     DCHECK(IsLoad(instruction));
804     // Using defaults for VecLoads requires to create additional vector operations.
805     // As there are some issues with scheduling vector operations it is better to avoid creating
806     // them.
807     return !instruction->IsVecOperation();
808   }
809 
810   // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
811   // This is necessary if the stored heap value can be observed.
KeepStores(Value value)812   void KeepStores(Value value) {
813     if (value.IsPureUnknown() || value.IsPartialUnknown()) {
814       return;
815     }
816     if (value.IsMergedUnknown()) {
817       kept_merged_unknowns_.SetBit(PhiPlaceholderIndex(value));
818       phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
819       return;
820     }
821     if (value.NeedsPhi()) {
822       phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
823     } else {
824       HInstruction* instruction = value.GetInstruction();
825       DCHECK(IsStore(instruction));
826       kept_stores_.SetBit(instruction->GetId());
827     }
828   }
829 
830   // If a heap location X may alias with heap location at `loc_index`
831   // and heap_values of that heap location X holds a store, keep that store.
832   // It's needed for a dependent load that's not eliminated since any store
833   // that may put value into the load's heap location needs to be kept.
KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord> & heap_values,size_t loc_index)834   void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
835                                      size_t loc_index) {
836     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
837       if (i == loc_index) {
838         // We use this function when reading a location with unknown value and
839         // therefore we cannot know what exact store wrote that unknown value.
840         // But we can have a phi placeholder here marking multiple stores to keep.
841         DCHECK(
842             !heap_values[i].stored_by.IsInstruction() ||
843             heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->IsPartialSingleton());
844         KeepStores(heap_values[i].stored_by);
845         heap_values[i].stored_by = Value::PureUnknown();
846       } else if (heap_location_collector_.MayAlias(i, loc_index)) {
847         KeepStores(heap_values[i].stored_by);
848         heap_values[i].stored_by = Value::PureUnknown();
849       }
850     }
851   }
852 
853   // `instruction` is being removed. Try to see if the null check on it
854   // can be removed. This can happen if the same value is set in two branches
855   // but not in dominators. Such as:
856   //   int[] a = foo();
857   //   if () {
858   //     a[0] = 2;
859   //   } else {
860   //     a[0] = 2;
861   //   }
862   //   // a[0] can now be replaced with constant 2, and the null check on it can be removed.
TryRemovingNullCheck(HInstruction * instruction)863   void TryRemovingNullCheck(HInstruction* instruction) {
864     HInstruction* prev = instruction->GetPrevious();
865     if ((prev != nullptr) && prev->IsNullCheck() && (prev == instruction->InputAt(0))) {
866       // Previous instruction is a null check for this instruction. Remove the null check.
867       prev->ReplaceWith(prev->InputAt(0));
868       prev->GetBlock()->RemoveInstruction(prev);
869     }
870   }
871 
GetDefaultValue(DataType::Type type)872   HInstruction* GetDefaultValue(DataType::Type type) {
873     switch (type) {
874       case DataType::Type::kReference:
875         return GetGraph()->GetNullConstant();
876       case DataType::Type::kBool:
877       case DataType::Type::kUint8:
878       case DataType::Type::kInt8:
879       case DataType::Type::kUint16:
880       case DataType::Type::kInt16:
881       case DataType::Type::kInt32:
882         return GetGraph()->GetIntConstant(0);
883       case DataType::Type::kInt64:
884         return GetGraph()->GetLongConstant(0);
885       case DataType::Type::kFloat32:
886         return GetGraph()->GetFloatConstant(0);
887       case DataType::Type::kFloat64:
888         return GetGraph()->GetDoubleConstant(0);
889       default:
890         UNREACHABLE();
891     }
892   }
893 
CanValueBeKeptIfSameAsNew(Value value,HInstruction * new_value,HInstruction * new_value_set_instr)894   bool CanValueBeKeptIfSameAsNew(Value value,
895                                  HInstruction* new_value,
896                                  HInstruction* new_value_set_instr) {
897     // For field/array set location operations, if the value is the same as the new_value
898     // it can be kept even if aliasing happens. All aliased operations will access the same memory
899     // range.
900     // For vector values, this is not true. For example:
901     //  packed_data = [0xA, 0xB, 0xC, 0xD];            <-- Different values in each lane.
902     //  VecStore array[i  ,i+1,i+2,i+3] = packed_data;
903     //  VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
904     //  VecLoad  vx = array[i,i+1,i+2,i+3];            <-- Cannot be eliminated because the value
905     //                                                     here is not packed_data anymore.
906     //
907     // TODO: to allow such 'same value' optimization on vector data,
908     // LSA needs to report more fine-grain MAY alias information:
909     // (1) May alias due to two vector data partial overlap.
910     //     e.g. a[i..i+3] and a[i+1,..,i+4].
911     // (2) May alias due to two vector data may complete overlap each other.
912     //     e.g. a[i..i+3] and b[i..i+3].
913     // (3) May alias but the exact relationship between two locations is unknown.
914     //     e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
915     // This 'same value' optimization can apply only on case (2).
916     if (new_value_set_instr->IsVecOperation()) {
917       return false;
918     }
919 
920     return value.Equals(new_value);
921   }
922 
923   Value PrepareLoopValue(HBasicBlock* block, size_t idx);
924   Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
925   void PrepareLoopRecords(HBasicBlock* block);
926   Value MergePredecessorValues(HBasicBlock* block, size_t idx);
927   void MergePredecessorRecords(HBasicBlock* block);
928 
929   void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type);
930 
931   void VisitGetLocation(HInstruction* instruction, size_t idx);
932   void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
RecordFieldInfo(const FieldInfo * info,size_t heap_loc)933   void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
934     field_infos_[heap_loc] = info;
935   }
936 
937   void VisitBasicBlock(HBasicBlock* block) override;
938 
939   enum class Phase {
940     kLoadElimination,
941     kStoreElimination,
942     kPartialElimination,
943   };
944 
945   bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
946 
947   bool TryReplacingLoopPhiPlaceholderWithDefault(
948       PhiPlaceholder phi_placeholder,
949       DataType::Type type,
950       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
951   bool TryReplacingLoopPhiPlaceholderWithSingleInput(
952       PhiPlaceholder phi_placeholder,
953       /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
954   std::optional<PhiPlaceholder> FindLoopPhisToMaterialize(
955       PhiPlaceholder phi_placeholder,
956       /*out*/ ArenaBitVector* phi_placeholders_to_materialize,
957       DataType::Type type,
958       bool can_use_default_or_phi);
959   bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
960                            DataType::Type type);
961   bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
962   bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
963                            DataType::Type type);
964   bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
965   std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
966                                                          HInstruction* load);
967   void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
968   void ProcessLoadsRequiringLoopPhis();
969 
970   void SearchPhiPlaceholdersForKeptStores();
971   void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
972   void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
973   void FindStoresWritingOldValues();
974   void FinishFullLSE();
975   void PrepareForPartialPhiComputation();
976   // Create materialization block and materialization object for the given predecessor of entry.
977   HInstruction* SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
978                                             HeapRefHolder&& holder,
979                                             size_t pred_idx,
980                                             HBasicBlock* blk);
981   // Returns the value that would be read by the 'read' instruction on
982   // 'orig_new_inst' if 'orig_new_inst' has not escaped.
983   HInstruction* GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read);
984   void MovePartialEscapes();
985 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)986   void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override {
987     LOG(FATAL) << "Visited instruction " << instruction->DumpWithoutArgs()
988                << " but LSE should be the only source of predicated-ifield-gets!";
989   }
990 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)991   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
992     HInstruction* object = instruction->InputAt(0);
993     const FieldInfo& field = instruction->GetFieldInfo();
994     VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
995   }
996 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)997   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
998     HInstruction* object = instruction->InputAt(0);
999     const FieldInfo& field = instruction->GetFieldInfo();
1000     HInstruction* value = instruction->InputAt(1);
1001     size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
1002     VisitSetLocation(instruction, idx, value);
1003   }
1004 
VisitStaticFieldGet(HStaticFieldGet * instruction)1005   void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
1006     HInstruction* cls = instruction->InputAt(0);
1007     const FieldInfo& field = instruction->GetFieldInfo();
1008     VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
1009   }
1010 
VisitStaticFieldSet(HStaticFieldSet * instruction)1011   void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
1012     HInstruction* cls = instruction->InputAt(0);
1013     const FieldInfo& field = instruction->GetFieldInfo();
1014     HInstruction* value = instruction->InputAt(1);
1015     size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
1016     VisitSetLocation(instruction, idx, value);
1017   }
1018 
VisitArrayGet(HArrayGet * instruction)1019   void VisitArrayGet(HArrayGet* instruction) override {
1020     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1021   }
1022 
VisitArraySet(HArraySet * instruction)1023   void VisitArraySet(HArraySet* instruction) override {
1024     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1025     VisitSetLocation(instruction, idx, instruction->GetValue());
1026   }
1027 
VisitVecLoad(HVecLoad * instruction)1028   void VisitVecLoad(HVecLoad* instruction) override {
1029     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
1030   }
1031 
VisitVecStore(HVecStore * instruction)1032   void VisitVecStore(HVecStore* instruction) override {
1033     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
1034     VisitSetLocation(instruction, idx, instruction->GetValue());
1035   }
1036 
VisitDeoptimize(HDeoptimize * instruction)1037   void VisitDeoptimize(HDeoptimize* instruction) override {
1038     ScopedArenaVector<ValueRecord>& heap_values =
1039         heap_values_for_[instruction->GetBlock()->GetBlockId()];
1040     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1041       Value* stored_by = &heap_values[i].stored_by;
1042       if (stored_by->IsUnknown()) {
1043         continue;
1044       }
1045       // Stores are generally observeable after deoptimization, except
1046       // for singletons that don't escape in the deoptimization environment.
1047       bool observable = true;
1048       ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1049       if (info->IsSingleton()) {
1050         HInstruction* reference = info->GetReference();
1051         // Finalizable objects always escape.
1052         if (!reference->IsNewInstance() || !reference->AsNewInstance()->IsFinalizable()) {
1053           // Check whether the reference for a store is used by an environment local of
1054           // the HDeoptimize. If not, the singleton is not observed after deoptimization.
1055           const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
1056           observable = std::any_of(
1057               env_uses.begin(),
1058               env_uses.end(),
1059               [instruction](const HUseListNode<HEnvironment*>& use) {
1060                 return use.GetUser()->GetHolder() == instruction;
1061               });
1062         }
1063       }
1064       if (observable) {
1065         KeepStores(*stored_by);
1066         *stored_by = Value::PureUnknown();
1067       }
1068     }
1069   }
1070 
1071   // Keep necessary stores before exiting a method via return/throw.
HandleExit(HBasicBlock * block)1072   void HandleExit(HBasicBlock* block) {
1073     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1074     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1075       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1076       if (!ref_info->IsSingletonAndRemovable() &&
1077           !(ref_info->IsPartialSingleton() && IsPartialNoEscape(block, i))) {
1078         KeepStores(heap_values[i].stored_by);
1079         heap_values[i].stored_by = Value::PureUnknown();
1080       }
1081     }
1082   }
1083 
VisitReturn(HReturn * instruction)1084   void VisitReturn(HReturn* instruction) override {
1085     HandleExit(instruction->GetBlock());
1086   }
1087 
VisitReturnVoid(HReturnVoid * return_void)1088   void VisitReturnVoid(HReturnVoid* return_void) override {
1089     HandleExit(return_void->GetBlock());
1090   }
1091 
VisitThrow(HThrow * throw_instruction)1092   void VisitThrow(HThrow* throw_instruction) override {
1093     HandleExit(throw_instruction->GetBlock());
1094   }
1095 
HandleInvoke(HInstruction * instruction)1096   void HandleInvoke(HInstruction* instruction) {
1097     SideEffects side_effects = instruction->GetSideEffects();
1098     ScopedArenaVector<ValueRecord>& heap_values =
1099         heap_values_for_[instruction->GetBlock()->GetBlockId()];
1100     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1101       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1102       HBasicBlock* blk = instruction->GetBlock();
1103       // We don't need to do anything if the reference has not escaped at this point.
1104       // This is true if either we (1) never escape or (2) sometimes escape but
1105       // there is no possible execution where we have done so at this time. NB
1106       // We count being in the excluded cohort as escaping. Technically, this is
1107       // a bit over-conservative (since we can have multiple non-escaping calls
1108       // before a single escaping one) but this simplifies everything greatly.
1109       auto partial_singleton_did_not_escape = [](ReferenceInfo* ref_info, HBasicBlock* blk) {
1110         DCHECK(ref_info->IsPartialSingleton());
1111         if (!ref_info->GetNoEscapeSubgraph()->ContainsBlock(blk)) {
1112           return false;
1113         }
1114         ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
1115             ref_info->GetNoEscapeSubgraph()->GetExcludedCohorts();
1116         return std::none_of(cohorts.begin(),
1117                             cohorts.end(),
1118                             [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
1119                               return cohort.PrecedesBlock(blk);
1120                             });
1121       };
1122       if (ref_info->IsSingleton() ||
1123           // partial and we aren't currently escaping and we haven't escaped yet.
1124           (ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk))) {
1125         // Singleton references cannot be seen by the callee.
1126       } else {
1127         if (side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
1128           // Previous stores may become visible (read) and/or impossible for LSE to track (write).
1129           KeepStores(heap_values[i].stored_by);
1130           heap_values[i].stored_by = Value::PureUnknown();
1131         }
1132         if (side_effects.DoesAnyWrite()) {
1133           // The value may be clobbered.
1134           heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1135         }
1136       }
1137     }
1138   }
1139 
VisitInvoke(HInvoke * invoke)1140   void VisitInvoke(HInvoke* invoke) override {
1141     HandleInvoke(invoke);
1142   }
1143 
VisitClinitCheck(HClinitCheck * clinit)1144   void VisitClinitCheck(HClinitCheck* clinit) override {
1145     // Class initialization check can result in class initializer calling arbitrary methods.
1146     HandleInvoke(clinit);
1147   }
1148 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)1149   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
1150     // Conservatively treat it as an invocation.
1151     HandleInvoke(instruction);
1152   }
1153 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)1154   void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
1155     // Conservatively treat it as an invocation.
1156     HandleInvoke(instruction);
1157   }
1158 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)1159   void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
1160     // Conservatively treat it as an invocation.
1161     HandleInvoke(instruction);
1162   }
1163 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)1164   void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
1165     // Conservatively treat it as an invocation.
1166     HandleInvoke(instruction);
1167   }
1168 
VisitNewInstance(HNewInstance * new_instance)1169   void VisitNewInstance(HNewInstance* new_instance) override {
1170     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
1171     if (ref_info == nullptr) {
1172       // new_instance isn't used for field accesses. No need to process it.
1173       return;
1174     }
1175     if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
1176       DCHECK(!new_instance->IsFinalizable());
1177       // new_instance can potentially be eliminated.
1178       singleton_new_instances_.push_back(new_instance);
1179     }
1180     ScopedArenaVector<ValueRecord>& heap_values =
1181         heap_values_for_[new_instance->GetBlock()->GetBlockId()];
1182     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1183       HInstruction* ref =
1184           heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->GetReference();
1185       size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
1186       if (ref == new_instance) {
1187         if (offset >= mirror::kObjectHeaderSize ||
1188             MemberOffset(offset) == mirror::Object::MonitorOffset()) {
1189           // Instance fields except the header fields are set to default heap values.
1190           // The shadow$_monitor_ field is set to the default value however.
1191           heap_values[i].value = Value::Default();
1192           heap_values[i].stored_by = Value::PureUnknown();
1193         } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
1194           // The shadow$_klass_ field is special and has an actual value however.
1195           heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
1196           heap_values[i].stored_by = Value::PureUnknown();
1197         }
1198       }
1199     }
1200   }
1201 
VisitNewArray(HNewArray * new_array)1202   void VisitNewArray(HNewArray* new_array) override {
1203     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
1204     if (ref_info == nullptr) {
1205       // new_array isn't used for array accesses. No need to process it.
1206       return;
1207     }
1208     if (ref_info->IsSingletonAndRemovable()) {
1209       if (new_array->GetLength()->IsIntConstant() &&
1210           new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
1211         // new_array can potentially be eliminated.
1212         singleton_new_instances_.push_back(new_array);
1213       } else {
1214         // new_array may throw NegativeArraySizeException. Keep it.
1215       }
1216     }
1217     ScopedArenaVector<ValueRecord>& heap_values =
1218         heap_values_for_[new_array->GetBlock()->GetBlockId()];
1219     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1220       HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
1221       HInstruction* ref = location->GetReferenceInfo()->GetReference();
1222       if (ref == new_array && location->GetIndex() != nullptr) {
1223         // Array elements are set to default heap values.
1224         heap_values[i].value = Value::Default();
1225         heap_values[i].stored_by = Value::PureUnknown();
1226       }
1227     }
1228   }
1229 
ShouldPerformPartialLSE() const1230   bool ShouldPerformPartialLSE() const {
1231     return perform_partial_lse_ && !GetGraph()->IsCompilingOsr();
1232   }
1233 
1234   bool perform_partial_lse_;
1235 
1236   const HeapLocationCollector& heap_location_collector_;
1237 
1238   // Use local allocator for allocating memory.
1239   ScopedArenaAllocator allocator_;
1240 
1241   // The number of unique phi_placeholders there possibly are
1242   size_t num_phi_placeholders_;
1243 
1244   // One array of heap value records for each block.
1245   ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
1246 
1247   // We record loads and stores for re-processing when we find a loop Phi placeholder
1248   // with unknown value from a predecessor, and also for removing stores that are
1249   // found to be dead, i.e. not marked in `kept_stores_` at the end.
1250   struct LoadStoreRecord {
1251     HInstruction* load_or_store;
1252     size_t heap_location_index;
1253   };
1254   ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1255 
1256   // We record the substitute instructions for loads that should be
1257   // eliminated but may be used by heap locations. They'll be removed
1258   // in the end. These are indexed by the load's id.
1259   ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1260 
1261   // Value at the start of the given instruction for instructions which directly
1262   // read from a heap-location (i.e. FieldGet). The mapping to heap-location is
1263   // implicit through the fact that each instruction can only directly refer to
1264   // a single heap-location.
1265   ScopedArenaHashMap<HInstruction*, Value> intermediate_values_;
1266 
1267   // Record stores to keep in a bit vector indexed by instruction ID.
1268   ArenaBitVector kept_stores_;
1269   // When we need to keep all stores that feed a Phi placeholder, we just record the
1270   // index of that placeholder for processing after graph traversal.
1271   ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1272 
1273   // Loads that would require a loop Phi to replace are recorded for processing
1274   // later as we do not have enough information from back-edges to determine if
1275   // a suitable Phi can be found or created when we visit these loads.
1276   ScopedArenaHashMap<HInstruction*, ValueRecord> loads_requiring_loop_phi_;
1277 
1278   // For stores, record the old value records that were replaced and the stored values.
1279   struct StoreRecord {
1280     ValueRecord old_value_record;
1281     HInstruction* stored_value;
1282   };
1283   // Small pre-allocated initial buffer avoids initializing a large one until it's really needed.
1284   static constexpr size_t kStoreRecordsInitialBufferSize = 16;
1285   std::pair<HInstruction*, StoreRecord> store_records_buffer_[kStoreRecordsInitialBufferSize];
1286   ScopedArenaHashMap<HInstruction*, StoreRecord> store_records_;
1287 
1288   // Replacements for Phi placeholders.
1289   // The invalid heap value is used to mark Phi placeholders that cannot be replaced.
1290   ScopedArenaVector<Value> phi_placeholder_replacements_;
1291 
1292   // Merged-unknowns that must have their predecessor values kept to ensure
1293   // partially escaped values are written
1294   ArenaBitVector kept_merged_unknowns_;
1295 
1296   ScopedArenaVector<HInstruction*> singleton_new_instances_;
1297 
1298   // The field infos for each heap location (if relevant).
1299   ScopedArenaVector<const FieldInfo*> field_infos_;
1300 
1301   Phase current_phase_;
1302 
1303   friend class PartialLoadStoreEliminationHelper;
1304   friend struct ScopedRestoreHeapValues;
1305 
1306   friend std::ostream& operator<<(std::ostream& os, const Value& v);
1307   friend std::ostream& operator<<(std::ostream& os, const PriorValueHolder& v);
1308   friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
1309 
1310   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1311 };
1312 
operator <<(std::ostream & oss,const LSEVisitor::PriorValueHolder & p)1313 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PriorValueHolder& p) {
1314   p.Dump(oss);
1315   return oss;
1316 }
1317 
operator <<(std::ostream & oss,const LSEVisitor::Phase & phase)1318 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
1319   switch (phase) {
1320     case LSEVisitor::Phase::kLoadElimination:
1321       return oss << "kLoadElimination";
1322     case LSEVisitor::Phase::kStoreElimination:
1323       return oss << "kStoreElimination";
1324     case LSEVisitor::Phase::kPartialElimination:
1325       return oss << "kPartialElimination";
1326   }
1327 }
1328 
Dump(std::ostream & oss) const1329 void LSEVisitor::PriorValueHolder::Dump(std::ostream& oss) const {
1330   if (IsDefault()) {
1331     oss << "Default";
1332   } else if (IsPhi()) {
1333     oss << "Phi: " << GetPhiPlaceholder();
1334   } else {
1335     oss << "Instruction: " << *GetInstruction();
1336   }
1337 }
1338 
PriorValueHolder(Value val)1339 constexpr LSEVisitor::PriorValueHolder::PriorValueHolder(Value val)
1340     : value_(Marker{}) {
1341   DCHECK(!val.IsInvalid() && !val.IsPureUnknown());
1342   if (val.IsPartialUnknown()) {
1343     value_ = val.GetPriorValue().value_;
1344   } else if (val.IsMergedUnknown() || val.NeedsPhi()) {
1345     value_ = val.GetPhiPlaceholder();
1346   } else if (val.IsInstruction()) {
1347     value_ = val.GetInstruction();
1348   } else {
1349     DCHECK(val.IsDefault());
1350   }
1351 }
1352 
operator ==(const LSEVisitor::Marker &,const LSEVisitor::Marker &)1353 constexpr bool operator==(const LSEVisitor::Marker&, const LSEVisitor::Marker&) {
1354   return true;
1355 }
1356 
operator ==(const LSEVisitor::PriorValueHolder & p1,const LSEVisitor::PriorValueHolder & p2)1357 constexpr bool operator==(const LSEVisitor::PriorValueHolder& p1,
1358                           const LSEVisitor::PriorValueHolder& p2) {
1359   return p1.Equals(p2);
1360 }
1361 
operator ==(const LSEVisitor::PhiPlaceholder & p1,const LSEVisitor::PhiPlaceholder & p2)1362 constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
1363                           const LSEVisitor::PhiPlaceholder& p2) {
1364   return p1.Equals(p2);
1365 }
1366 
operator ==(const LSEVisitor::Value::NeedsLoopPhiMarker & p1,const LSEVisitor::Value::NeedsLoopPhiMarker & p2)1367 constexpr bool operator==(const LSEVisitor::Value::NeedsLoopPhiMarker& p1,
1368                           const LSEVisitor::Value::NeedsLoopPhiMarker& p2) {
1369   return p1.phi_ == p2.phi_;
1370 }
1371 
operator ==(const LSEVisitor::Value::NeedsNonLoopPhiMarker & p1,const LSEVisitor::Value::NeedsNonLoopPhiMarker & p2)1372 constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1,
1373                           const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) {
1374   return p1.phi_ == p2.phi_;
1375 }
1376 
operator ==(const LSEVisitor::Value::MergedUnknownMarker & p1,const LSEVisitor::Value::MergedUnknownMarker & p2)1377 constexpr bool operator==(const LSEVisitor::Value::MergedUnknownMarker& p1,
1378                           const LSEVisitor::Value::MergedUnknownMarker& p2) {
1379   return p1.phi_ == p2.phi_;
1380 }
1381 
operator <<(std::ostream & oss,const LSEVisitor::PhiPlaceholder & p)1382 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) {
1383   p.Dump(oss);
1384   return oss;
1385 }
1386 
ToValue() const1387 LSEVisitor::Value LSEVisitor::PriorValueHolder::ToValue() const {
1388   if (IsDefault()) {
1389     return Value::Default();
1390   } else if (IsPhi()) {
1391     return Value::ForLoopPhiPlaceholder(GetPhiPlaceholder());
1392   } else {
1393     return Value::ForInstruction(GetInstruction());
1394   }
1395 }
1396 
ExactEquals(LSEVisitor::Value other) const1397 constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
1398   return value_ == other.value_;
1399 }
1400 
Equals(LSEVisitor::Value other) const1401 constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
1402   // Only valid values can be compared.
1403   DCHECK(IsValid());
1404   DCHECK(other.IsValid());
1405   if (value_ == other.value_) {
1406     // Note: Two unknown values are considered different.
1407     return !IsUnknown();
1408   } else {
1409     // Default is considered equal to zero-bit-pattern instructions.
1410     return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
1411             (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
1412   }
1413 }
1414 
Dump(std::ostream & os) const1415 std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1416   if (std::holds_alternative<LSEVisitor::Value::ValuelessType>(value_)) {
1417     switch (GetValuelessType()) {
1418       case ValuelessType::kDefault:
1419         return os << "Default";
1420       case ValuelessType::kPureUnknown:
1421         return os << "PureUnknown";
1422       case ValuelessType::kInvalid:
1423         return os << "Invalid";
1424     }
1425   } else if (IsPartialUnknown()) {
1426     return os << "PartialUnknown[" << GetPriorValue() << "]";
1427   } else if (IsInstruction()) {
1428     return os << "Instruction[id: " << GetInstruction()->GetId()
1429               << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
1430   } else if (IsMergedUnknown()) {
1431     return os << "MergedUnknown[block: " << GetPhiPlaceholder().GetBlockId()
1432               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1433 
1434   } else if (NeedsLoopPhi()) {
1435     return os << "NeedsLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1436               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1437   } else {
1438     return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1439               << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1440   }
1441 }
1442 
operator <<(std::ostream & os,const LSEVisitor::Value & v)1443 std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1444   return v.Dump(os);
1445 }
1446 
LSEVisitor(HGraph * graph,const HeapLocationCollector & heap_location_collector,bool perform_partial_lse,OptimizingCompilerStats * stats)1447 LSEVisitor::LSEVisitor(HGraph* graph,
1448                        const HeapLocationCollector& heap_location_collector,
1449                        bool perform_partial_lse,
1450                        OptimizingCompilerStats* stats)
1451     : HGraphDelegateVisitor(graph, stats),
1452       perform_partial_lse_(perform_partial_lse),
1453       heap_location_collector_(heap_location_collector),
1454       allocator_(graph->GetArenaStack()),
1455       num_phi_placeholders_(GetGraph()->GetBlocks().size() *
1456                             heap_location_collector_.GetNumberOfHeapLocations()),
1457       heap_values_for_(graph->GetBlocks().size(),
1458                        ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1459                        allocator_.Adapter(kArenaAllocLSE)),
1460       loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
1461       // We may add new instructions (default values, Phis) but we're not adding loads
1462       // or stores, so we shall not need to resize following vector and BitVector.
1463       substitute_instructions_for_loads_(graph->GetCurrentInstructionId(),
1464                                          nullptr,
1465                                          allocator_.Adapter(kArenaAllocLSE)),
1466       intermediate_values_(allocator_.Adapter(kArenaAllocLSE)),
1467       kept_stores_(&allocator_,
1468                    /*start_bits=*/graph->GetCurrentInstructionId(),
1469                    /*expandable=*/false,
1470                    kArenaAllocLSE),
1471       phi_placeholders_to_search_for_kept_stores_(&allocator_,
1472                                                   num_phi_placeholders_,
1473                                                   /*expandable=*/false,
1474                                                   kArenaAllocLSE),
1475       loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1476       store_records_(store_records_buffer_,
1477                      kStoreRecordsInitialBufferSize,
1478                      allocator_.Adapter(kArenaAllocLSE)),
1479       phi_placeholder_replacements_(num_phi_placeholders_,
1480                                     Value::Invalid(),
1481                                     allocator_.Adapter(kArenaAllocLSE)),
1482       kept_merged_unknowns_(&allocator_,
1483                             /*start_bits=*/num_phi_placeholders_,
1484                             /*expandable=*/false,
1485                             kArenaAllocLSE),
1486       singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
1487       field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
1488                    allocator_.Adapter(kArenaAllocLSE)),
1489       current_phase_(Phase::kLoadElimination) {
1490   // Clear bit vectors.
1491   phi_placeholders_to_search_for_kept_stores_.ClearAllBits();
1492   kept_stores_.ClearAllBits();
1493 }
1494 
PrepareLoopValue(HBasicBlock * block,size_t idx)1495 LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1496   // If the pre-header value is known (which implies that the reference dominates this
1497   // block), use a Phi placeholder for the value in the loop header. If all predecessors
1498   // are later found to have a known value, we can replace loads from this location,
1499   // either with the pre-header value or with a new Phi. For array locations, the index
1500   // may be defined inside the loop but the only known value in that case should be the
1501   // default value or a Phi placeholder that can be replaced only with the default value.
1502   HLoopInformation* loop_info = block->GetLoopInformation();
1503   uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1504   Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1505   if (pre_header_value.IsUnknown()) {
1506     return pre_header_value;
1507   }
1508   if (kIsDebugBuild) {
1509     // Check that the reference indeed dominates this loop.
1510     HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1511     HInstruction* ref = location->GetReferenceInfo()->GetReference();
1512     CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1513         << GetGraph()->PrettyMethod();
1514     // Check that the index, if defined inside the loop, tracks a default value
1515     // or a Phi placeholder requiring a loop Phi.
1516     HInstruction* index = location->GetIndex();
1517     if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1518       CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1519           << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1520           << pre_header_value;
1521     }
1522   }
1523   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1524   return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder));
1525 }
1526 
PrepareLoopStoredBy(HBasicBlock * block,size_t idx)1527 LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1528   // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1529   // if the value in the location escapes. This is not applicable to singletons that are
1530   // defined inside the loop as they shall be dead in the loop header.
1531   ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1532   if (ref_info->IsSingleton() &&
1533       block->GetLoopInformation()->Contains(*ref_info->GetReference()->GetBlock())) {
1534     return Value::PureUnknown();
1535   }
1536   PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1537   return Value::ForLoopPhiPlaceholder(phi_placeholder);
1538 }
1539 
PrepareLoopRecords(HBasicBlock * block)1540 void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1541   DCHECK(block->IsLoopHeader());
1542   int block_id = block->GetBlockId();
1543   HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1544   ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1545       heap_values_for_[pre_header->GetBlockId()];
1546   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1547   DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1548   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1549   DCHECK(heap_values.empty());
1550 
1551   // Don't eliminate loads in irreducible loops.
1552   if (block->GetLoopInformation()->IsIrreducible()) {
1553     heap_values.resize(num_heap_locations,
1554                        {/*value=*/Value::Invalid(), /*stored_by=*/Value::PureUnknown()});
1555     // Also keep the stores before the loop header, including in blocks that were not visited yet.
1556     bool is_osr = GetGraph()->IsCompilingOsr();
1557     for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1558       heap_values[idx].value =
1559           is_osr ? Value::PureUnknown()
1560                  : Value::MergedUnknown(GetPhiPlaceholder(block->GetBlockId(), idx));
1561       KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1562     }
1563     return;
1564   }
1565 
1566   // Fill `heap_values` based on values from pre-header.
1567   heap_values.reserve(num_heap_locations);
1568   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1569     heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1570   }
1571 }
1572 
MergePredecessorValues(HBasicBlock * block,size_t idx)1573 LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1574   ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1575   DCHECK(!predecessors.empty());
1576   Value merged_value =
1577       ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1578   for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1579     Value pred_value =
1580         ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1581     if (pred_value.Equals(merged_value)) {
1582       // Value is the same. No need to update our merged value.
1583       continue;
1584     } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1585       // If one is unknown and the other is a different type of unknown
1586       PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1587       merged_value = Value::MergedUnknown(phi_placeholder);
1588       // We know that at least one of the merge points is unknown (and both are
1589       // not pure-unknowns since that's captured above). This means that the
1590       // overall value needs to be a MergedUnknown. Just return that.
1591       break;
1592     } else {
1593       // There are conflicting known values. We may still be able to replace loads with a Phi.
1594       PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1595       // Propagate the need for a new loop Phi from all predecessors.
1596       bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1597       merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1598     }
1599   }
1600   DCHECK(!merged_value.IsPureUnknown() || block->GetPredecessors().size() <= 1)
1601       << merged_value << " in " << GetGraph()->PrettyMethod();
1602   return merged_value;
1603 }
1604 
MergePredecessorRecords(HBasicBlock * block)1605 void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1606   if (block->IsExitBlock()) {
1607     // Exit block doesn't really merge values since the control flow ends in
1608     // its predecessors. Each predecessor needs to make sure stores are kept
1609     // if necessary.
1610     return;
1611   }
1612 
1613   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1614   DCHECK(heap_values.empty());
1615   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1616   if (block->GetPredecessors().empty()) {
1617     DCHECK(block->IsEntryBlock());
1618     heap_values.resize(num_heap_locations,
1619                        {/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()});
1620     return;
1621   }
1622 
1623   heap_values.reserve(num_heap_locations);
1624   for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1625     Value merged_value = MergePredecessorValues(block, idx);
1626     if (kIsDebugBuild) {
1627       if (merged_value.NeedsPhi()) {
1628         uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId();
1629         CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1630       } else if (merged_value.IsInstruction()) {
1631         CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1632       }
1633     }
1634     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1635     Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
1636     for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1637       uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1638       Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1639       if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1640           !merged_stored_by.Equals(stored_by)) {
1641         // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1642         PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1643         merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1644         break;
1645       }
1646     }
1647     heap_values.push_back({ merged_value, merged_stored_by });
1648   }
1649 }
1650 
FindOrConstructNonLoopPhi(HBasicBlock * block,const ScopedArenaVector<HInstruction * > & phi_inputs,DataType::Type type)1651 static HInstruction* FindOrConstructNonLoopPhi(
1652     HBasicBlock* block,
1653     const ScopedArenaVector<HInstruction*>& phi_inputs,
1654     DataType::Type type) {
1655   for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1656     HInstruction* phi = phi_it.Current();
1657     DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1658     auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1659       return lhs == rhs.GetInstruction();
1660     };
1661     if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1662       return phi;
1663     }
1664   }
1665   ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1666   HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1667   for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1668     DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1669     phi->SetRawInputAt(i, phi_inputs[i]);
1670   }
1671   block->AddPhi(phi);
1672   if (type == DataType::Type::kReference) {
1673     // Update reference type information. Pass invalid handles, these are not used for Phis.
1674     ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1675                                        Handle<mirror::ClassLoader>(),
1676                                        Handle<mirror::DexCache>(),
1677                                        /* is_first_run= */ false);
1678     rtp_fixup.Visit(phi);
1679   }
1680   return phi;
1681 }
1682 
MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder,DataType::Type type)1683 void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) {
1684   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1685   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1686   size_t idx = phi_placeholder.GetHeapLocation();
1687 
1688   // Use local allocator to reduce peak memory usage.
1689   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1690   // Reuse the same vector for collecting phi inputs.
1691   ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1692 
1693   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1694   work_queue.push_back(phi_placeholder);
1695   while (!work_queue.empty()) {
1696     PhiPlaceholder current_phi_placeholder = work_queue.back();
1697     if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1698       // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1699       // that directly or indirectly depends on it, so it was already processed as part of the
1700       // other Phi placeholder's dependencies before this one got back to the top of the stack.
1701       work_queue.pop_back();
1702       continue;
1703     }
1704     uint32_t current_block_id = current_phi_placeholder.GetBlockId();
1705     HBasicBlock* current_block = blocks[current_block_id];
1706     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1707 
1708     // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1709     // And the only way for such merged value to reach a different heap location is through
1710     // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1711     // seen here are tied to one heap location.
1712     DCHECK(!current_block->IsLoopHeader())
1713         << current_phi_placeholder << " phase: " << current_phase_;
1714     DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx);
1715 
1716     phi_inputs.clear();
1717     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1718       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1719       DCHECK(!pred_value.IsPureUnknown()) << pred_value << " block " << current_block->GetBlockId()
1720                                           << " pred: " << predecessor->GetBlockId();
1721       if (pred_value.NeedsNonLoopPhi() ||
1722           (current_phase_ == Phase::kPartialElimination && pred_value.IsMergedUnknown())) {
1723         // We need to process the Phi placeholder first.
1724         work_queue.push_back(pred_value.GetPhiPlaceholder());
1725       } else if (pred_value.IsDefault()) {
1726         phi_inputs.push_back(GetDefaultValue(type));
1727       } else {
1728         DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId()
1729                                            << " pred: " << predecessor->GetBlockId();
1730         phi_inputs.push_back(pred_value.GetInstruction());
1731       }
1732     }
1733     if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1734       // All inputs are available. Find or construct the Phi replacement.
1735       phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1736           Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1737       // Remove the block from the queue.
1738       DCHECK_EQ(current_phi_placeholder, work_queue.back());
1739       work_queue.pop_back();
1740     }
1741   }
1742 }
1743 
VisitGetLocation(HInstruction * instruction,size_t idx)1744 void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1745   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1746   uint32_t block_id = instruction->GetBlock()->GetBlockId();
1747   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1748   ValueRecord& record = heap_values[idx];
1749   if (instruction->IsFieldAccess()) {
1750     RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1751   }
1752   DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1753   // If we are unknown, we either come from somewhere untracked or we can reconstruct the partial
1754   // value.
1755   DCHECK(!record.value.IsPureUnknown() ||
1756          heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo() == nullptr ||
1757          !heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()->IsPartialSingleton())
1758          << "In " << GetGraph()->PrettyMethod() << ": " << record.value << " for " << *instruction;
1759   intermediate_values_.insert({instruction, record.value});
1760   loads_and_stores_.push_back({ instruction, idx });
1761   if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1762       !IsDefaultOrPhiAllowedForLoad(instruction)) {
1763     record.value = Value::PureUnknown();
1764   }
1765   if (record.value.IsDefault()) {
1766     KeepStores(record.stored_by);
1767     HInstruction* constant = GetDefaultValue(instruction->GetType());
1768     AddRemovedLoad(instruction, constant);
1769     record.value = Value::ForInstruction(constant);
1770   } else if (record.value.IsUnknown()) {
1771     // Load isn't eliminated. Put the load as the value into the HeapLocation.
1772     // This acts like GVN but with better aliasing analysis.
1773     Value old_value = record.value;
1774     record.value = Value::ForInstruction(instruction);
1775     KeepStoresIfAliasedToLocation(heap_values, idx);
1776     KeepStores(old_value);
1777   } else if (record.value.NeedsLoopPhi()) {
1778     // We do not know yet if the value is known for all back edges. Record for future processing.
1779     loads_requiring_loop_phi_.insert(std::make_pair(instruction, record));
1780   } else {
1781     // This load can be eliminated but we may need to construct non-loop Phis.
1782     if (record.value.NeedsNonLoopPhi()) {
1783       MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1784       record.value = Replacement(record.value);
1785     }
1786     HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1787     AddRemovedLoad(instruction, heap_value);
1788     TryRemovingNullCheck(instruction);
1789   }
1790 }
1791 
VisitSetLocation(HInstruction * instruction,size_t idx,HInstruction * value)1792 void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1793   DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1794   DCHECK(!IsStore(value)) << value->DebugName();
1795   if (instruction->IsFieldAccess()) {
1796     RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1797   }
1798   // value may already have a substitute.
1799   value = FindSubstitute(value);
1800   HBasicBlock* block = instruction->GetBlock();
1801   ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1802   ValueRecord& record = heap_values[idx];
1803   DCHECK(!record.value.IsInstruction() ||
1804          FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1805 
1806   if (record.value.Equals(value)) {
1807     // Store into the heap location with the same value.
1808     // This store can be eliminated right away.
1809     block->RemoveInstruction(instruction);
1810     return;
1811   }
1812 
1813   store_records_.insert(std::make_pair(instruction, StoreRecord{record, value}));
1814   loads_and_stores_.push_back({ instruction, idx });
1815 
1816   // If the `record.stored_by` specified a store from this block, it shall be removed
1817   // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1818   // `kept_stores_` anymore after we update the `record.stored_by` below.
1819   DCHECK(!record.stored_by.IsInstruction() ||
1820          record.stored_by.GetInstruction()->GetBlock() != block ||
1821          record.stored_by.GetInstruction()->CanThrow() ||
1822          !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1823 
1824   if (instruction->CanThrow()) {
1825     // Previous stores can become visible.
1826     HandleExit(instruction->GetBlock());
1827     // We cannot remove a possibly throwing store.
1828     // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1829     kept_stores_.SetBit(instruction->GetId());
1830   }
1831 
1832   // Update the record.
1833   auto it = loads_requiring_loop_phi_.find(value);
1834   if (it != loads_requiring_loop_phi_.end()) {
1835     // Propapate the Phi placeholder to the record.
1836     record.value = it->second.value;
1837     DCHECK(record.value.NeedsLoopPhi());
1838   } else {
1839     record.value = Value::ForInstruction(value);
1840   }
1841   // Track the store in the value record. If the value is loaded or needed after
1842   // return/deoptimization later, this store isn't really redundant.
1843   record.stored_by = Value::ForInstruction(instruction);
1844 
1845   // This store may kill values in other heap locations due to aliasing.
1846   for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1847     if (i == idx ||
1848         heap_values[i].value.IsUnknown() ||
1849         CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1850         !heap_location_collector_.MayAlias(i, idx)) {
1851       continue;
1852     }
1853     // Kill heap locations that may alias and keep previous stores to these locations.
1854     KeepStores(heap_values[i].stored_by);
1855     heap_values[i].stored_by = Value::PureUnknown();
1856     heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1857   }
1858 }
1859 
VisitBasicBlock(HBasicBlock * block)1860 void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1861   // Populate the heap_values array for this block.
1862   // TODO: try to reuse the heap_values array from one predecessor if possible.
1863   if (block->IsLoopHeader()) {
1864     PrepareLoopRecords(block);
1865   } else {
1866     MergePredecessorRecords(block);
1867   }
1868   // Visit instructions.
1869   HGraphVisitor::VisitBasicBlock(block);
1870 }
1871 
MayAliasOnBackEdge(HBasicBlock * loop_header,size_t idx1,size_t idx2) const1872 bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const {
1873   DCHECK_NE(idx1, idx2);
1874   DCHECK(loop_header->IsLoopHeader());
1875   if (heap_location_collector_.MayAlias(idx1, idx2)) {
1876     return true;
1877   }
1878   // For array locations with index defined inside the loop, include
1879   // all other locations in the array, even those that LSA declares
1880   // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
1881   // refer to the same locations for different iterations. (LSA's
1882   // `ComputeMayAlias()` does not consider different loop iterations.)
1883   HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1);
1884   HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2);
1885   if (loc1->IsArray() &&
1886       loc2->IsArray() &&
1887       HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(),
1888                                                 loc2->GetReferenceInfo())) {
1889     HLoopInformation* loop_info = loop_header->GetLoopInformation();
1890     if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) ||
1891         loop_info->Contains(*loc2->GetIndex()->GetBlock())) {
1892       // Consider the locations aliasing. Do not optimize the case where both indexes
1893       // are loop invariants defined inside the loop, rely on LICM to pull them out.
1894       return true;
1895     }
1896   }
1897   return false;
1898 }
1899 
TryReplacingLoopPhiPlaceholderWithDefault(PhiPlaceholder phi_placeholder,DataType::Type type,ArenaBitVector * phi_placeholders_to_materialize)1900 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1901     PhiPlaceholder phi_placeholder,
1902     DataType::Type type,
1903     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1904   // Use local allocator to reduce peak memory usage.
1905   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1906   ArenaBitVector visited(&allocator,
1907                          /*start_bits=*/ num_phi_placeholders_,
1908                          /*expandable=*/ false,
1909                          kArenaAllocLSE);
1910   visited.ClearAllBits();
1911   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1912 
1913   // Use depth first search to check if any non-Phi input is unknown.
1914   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1915   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1916   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1917   work_queue.push_back(phi_placeholder);
1918   while (!work_queue.empty()) {
1919     PhiPlaceholder current_phi_placeholder = work_queue.back();
1920     work_queue.pop_back();
1921     HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()];
1922     DCHECK_GE(block->GetPredecessors().size(), 2u);
1923     size_t idx = current_phi_placeholder.GetHeapLocation();
1924     for (HBasicBlock* predecessor : block->GetPredecessors()) {
1925       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1926       if (value.NeedsPhi()) {
1927         // Visit the predecessor Phi placeholder if it's not visited yet.
1928         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1929           visited.SetBit(PhiPlaceholderIndex(value));
1930           work_queue.push_back(value.GetPhiPlaceholder());
1931         }
1932       } else if (!value.Equals(Value::Default())) {
1933         return false;  // Report failure.
1934       }
1935     }
1936     if (block->IsLoopHeader()) {
1937       // For back-edges we need to check all locations that write to the same array,
1938       // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
1939       // as they may actually refer to the same locations for different iterations.
1940       for (size_t i = 0; i != num_heap_locations; ++i) {
1941         if (i == idx ||
1942             heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
1943                 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
1944           continue;
1945         }
1946         for (HBasicBlock* predecessor : block->GetPredecessors()) {
1947           // Check if there were any writes to this location.
1948           // Note: We could simply process the values but due to the vector operation
1949           // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
1950           // the value to change and not be equal to default. To work around this and
1951           // allow replacing the non-vector load of loop-invariant default values
1952           // anyway, skip over paths that do not have any writes.
1953           ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
1954           while (record.stored_by.NeedsLoopPhi() &&
1955                  blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) {
1956             HLoopInformation* loop_info =
1957                 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation();
1958             record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
1959           }
1960           Value value = ReplacementOrValue(record.value);
1961           if (value.NeedsPhi()) {
1962             // Visit the predecessor Phi placeholder if it's not visited yet.
1963             if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1964               visited.SetBit(PhiPlaceholderIndex(value));
1965               work_queue.push_back(value.GetPhiPlaceholder());
1966             }
1967           } else if (!value.Equals(Value::Default())) {
1968             return false;  // Report failure.
1969           }
1970         }
1971       }
1972     }
1973   }
1974 
1975   // Record replacement and report success.
1976   HInstruction* replacement = GetDefaultValue(type);
1977   for (uint32_t phi_placeholder_index : visited.Indexes()) {
1978     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1979     phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1980   }
1981   phi_placeholders_to_materialize->Subtract(&visited);
1982   return true;
1983 }
1984 
TryReplacingLoopPhiPlaceholderWithSingleInput(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize)1985 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
1986     PhiPlaceholder phi_placeholder,
1987     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1988   // Use local allocator to reduce peak memory usage.
1989   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1990   ArenaBitVector visited(&allocator,
1991                          /*start_bits=*/ num_phi_placeholders_,
1992                          /*expandable=*/ false,
1993                          kArenaAllocLSE);
1994   visited.ClearAllBits();
1995   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1996 
1997   // Use depth first search to check if any non-Phi input is unknown.
1998   HInstruction* replacement = nullptr;
1999   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2000   visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
2001   work_queue.push_back(phi_placeholder);
2002   while (!work_queue.empty()) {
2003     PhiPlaceholder current_phi_placeholder = work_queue.back();
2004     work_queue.pop_back();
2005     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2006     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2007     size_t idx = current_phi_placeholder.GetHeapLocation();
2008     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2009       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2010       if (value.NeedsPhi()) {
2011         // Visit the predecessor Phi placeholder if it's not visited yet.
2012         if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
2013           visited.SetBit(PhiPlaceholderIndex(value));
2014           work_queue.push_back(value.GetPhiPlaceholder());
2015         }
2016       } else {
2017         if (!value.IsInstruction() ||
2018             (replacement != nullptr && replacement != value.GetInstruction())) {
2019           return false;  // Report failure.
2020         }
2021         replacement = value.GetInstruction();
2022       }
2023     }
2024     // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment
2025     // for back-edges, it is not needed here. When looking for a single input
2026     // instruction coming from before the loop, the array index must also be
2027     // defined before the loop and the aliasing analysis done by LSA is sufficient.
2028     // Any writes of a different value with an index that is not loop invariant
2029     // would invalidate the heap location in `VisitSetLocation()`.
2030   }
2031 
2032   // Record replacement and report success.
2033   DCHECK(replacement != nullptr);
2034   for (uint32_t phi_placeholder_index : visited.Indexes()) {
2035     DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2036     phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2037   }
2038   phi_placeholders_to_materialize->Subtract(&visited);
2039   return true;
2040 }
2041 
FindLoopPhisToMaterialize(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize,DataType::Type type,bool can_use_default_or_phi)2042 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::FindLoopPhisToMaterialize(
2043     PhiPlaceholder phi_placeholder,
2044     /*inout*/ ArenaBitVector* phi_placeholders_to_materialize,
2045     DataType::Type type,
2046     bool can_use_default_or_phi) {
2047   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2048 
2049   // Use local allocator to reduce peak memory usage.
2050   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2051   ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2052 
2053   // Use depth first search to check if any non-Phi input is unknown.
2054   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2055   phi_placeholders_to_materialize->ClearAllBits();
2056   phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
2057   work_queue.push_back(phi_placeholder);
2058   while (!work_queue.empty()) {
2059     PhiPlaceholder current_phi_placeholder = work_queue.back();
2060     work_queue.pop_back();
2061     if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
2062       // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
2063       DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
2064                  Value::Default()));
2065       continue;
2066     }
2067     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2068     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2069     size_t idx = current_phi_placeholder.GetHeapLocation();
2070     if (current_block->IsLoopHeader()) {
2071       // If the index is defined inside the loop, it may reference different elements of the
2072       // array on each iteration. Since we do not track if all elements of an array are set
2073       // to the same value explicitly, the only known value in pre-header can be the default
2074       // value from NewArray or a Phi placeholder depending on a default value from some outer
2075       // loop pre-header. This Phi placeholder can be replaced only by the default value.
2076       HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
2077       if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
2078         if (can_use_default_or_phi &&
2079             TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
2080                                                       type,
2081                                                       phi_placeholders_to_materialize)) {
2082           continue;
2083         } else {
2084           return current_phi_placeholder;  // Report the loop Phi placeholder.
2085         }
2086       }
2087       // A similar situation arises with the index defined outside the loop if we cannot use
2088       // default values or Phis, i.e. for vector loads, as we can only replace the Phi
2089       // placeholder with a single instruction defined before the loop.
2090       if (!can_use_default_or_phi) {
2091         DCHECK(index != nullptr);  // Vector operations are array operations.
2092         if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
2093                                                           phi_placeholders_to_materialize)) {
2094           continue;
2095         } else {
2096           return current_phi_placeholder;  // Report the loop Phi placeholder.
2097         }
2098       }
2099     }
2100     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2101       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2102       Value value = ReplacementOrValue(heap_values[idx].value);
2103       if (value.IsUnknown()) {
2104         // We cannot create a Phi for this loop Phi placeholder.
2105         return current_phi_placeholder;  // Report the loop Phi placeholder.
2106       }
2107       // For arrays, the location may have been clobbered by writes to other locations
2108       // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`.
2109       if (current_block->IsLoopHeader() &&
2110           predecessor != current_block->GetLoopInformation()->GetPreHeader() &&
2111           heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) {
2112         for (size_t i = 0, size = heap_values.size(); i != size; ++i) {
2113           if (i != idx &&
2114               !heap_values[i].stored_by.IsUnknown() &&
2115               MayAliasOnBackEdge(current_block, idx, i)) {
2116             // We cannot create a Phi for this loop Phi placeholder.
2117             return current_phi_placeholder;
2118           }
2119         }
2120       }
2121       if (value.NeedsLoopPhi()) {
2122         // Visit the predecessor Phi placeholder if it's not visited yet.
2123         if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
2124           phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
2125           work_queue.push_back(value.GetPhiPlaceholder());
2126           LSE_VLOG << "For materialization of " << phi_placeholder
2127                    << " we need to materialize " << value;
2128         }
2129       }
2130     }
2131   }
2132 
2133   // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
2134   return std::nullopt;
2135 }
2136 
MaterializeLoopPhis(const ScopedArenaVector<size_t> & phi_placeholder_indexes,DataType::Type type)2137 bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
2138                                      DataType::Type type) {
2139   return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
2140 }
2141 
MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,DataType::Type type)2142 bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
2143                                      DataType::Type type) {
2144   // Materialize all predecessors that do not need a loop Phi and determine if all inputs
2145   // other than loop Phis are the same.
2146   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2147   std::optional<Value> other_value = std::nullopt;
2148   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2149     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2150     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2151     DCHECK_GE(block->GetPredecessors().size(), 2u);
2152     size_t idx = phi_placeholder.GetHeapLocation();
2153     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2154       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2155       if (value.NeedsNonLoopPhi()) {
2156         DCHECK(current_phase_ == Phase::kLoadElimination ||
2157                current_phase_ == Phase::kPartialElimination)
2158             << current_phase_;
2159         MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
2160         value = Replacement(value);
2161       }
2162       if (!value.NeedsLoopPhi()) {
2163         if (!other_value) {
2164           // The first other value we found.
2165           other_value = value;
2166         } else if (!other_value->IsInvalid()) {
2167           // Check if the current `value` differs from the previous `other_value`.
2168           if (!value.Equals(*other_value)) {
2169             other_value = Value::Invalid();
2170           }
2171         }
2172       }
2173     }
2174   }
2175 
2176   DCHECK(other_value.has_value());
2177   if (!other_value->IsInvalid()) {
2178     HInstruction* replacement =
2179         (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction();
2180     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2181       phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2182     }
2183     return true;
2184   }
2185 
2186   // If we're materializing only a single Phi, try to match it with an existing Phi.
2187   // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
2188   // This also covers the case when after replacing a previous set of Phi placeholders,
2189   // we continue with a Phi placeholder that does not really need a loop Phi anymore.
2190   if (phi_placeholder_indexes.size() == 1u) {
2191     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]);
2192     size_t idx = phi_placeholder.GetHeapLocation();
2193     HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()];
2194     ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
2195     for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
2196       HInstruction* phi = phi_it.Current();
2197       DCHECK_EQ(phi->InputCount(), predecessors.size());
2198       ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
2199       auto cmp = [=](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
2200         Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
2201         if (value.NeedsPhi()) {
2202           DCHECK(value.GetPhiPlaceholder() == phi_placeholder);
2203           return lhs.GetInstruction() == phi;
2204         } else {
2205           DCHECK(value.IsDefault() || value.IsInstruction());
2206           return value.Equals(lhs.GetInstruction());
2207         }
2208       };
2209       if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
2210         phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
2211         return true;
2212       }
2213     }
2214   }
2215 
2216   if (current_phase_ == Phase::kStoreElimination) {
2217     // We're not creating Phis during the final store elimination phase.
2218     return false;
2219   }
2220 
2221   // There are different inputs to the Phi chain. Create the Phis.
2222   ArenaAllocator* allocator = GetGraph()->GetAllocator();
2223   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2224     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2225     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2226     CHECK_GE(block->GetPredecessors().size(), 2u);
2227     phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
2228         new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
2229   }
2230   // Fill the Phi inputs.
2231   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2232     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2233     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2234     size_t idx = phi_placeholder.GetHeapLocation();
2235     HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
2236     DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType()))
2237         << "type=" << type << " vs phi-type=" << phi->GetType();
2238     for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
2239       HBasicBlock* predecessor = block->GetPredecessors()[i];
2240       Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2241       HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
2242       DCHECK_NE(input->GetType(), DataType::Type::kVoid);
2243       phi->SetRawInputAt(i, input);
2244       DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType()))
2245           << " input: " << input->GetType() << value << " phi: " << phi->GetType()
2246           << " request: " << type;
2247     }
2248   }
2249   // Add the Phis to their blocks.
2250   for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2251     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2252     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2253     block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
2254   }
2255   if (type == DataType::Type::kReference) {
2256     ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
2257     ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
2258     for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2259       phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
2260     }
2261     // Update reference type information. Pass invalid handles, these are not used for Phis.
2262     ReferenceTypePropagation rtp_fixup(GetGraph(),
2263                                        Handle<mirror::ClassLoader>(),
2264                                        Handle<mirror::DexCache>(),
2265                                        /* is_first_run= */ false);
2266     rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
2267   }
2268 
2269   return true;
2270 }
2271 
MaterializeLoopPhis(const ArenaBitVector & phi_placeholders_to_materialize,DataType::Type type)2272 bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
2273                                      DataType::Type type) {
2274   // Use local allocator to reduce peak memory usage.
2275   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2276 
2277   // We want to recognize when a subset of these loop Phis that do not need other
2278   // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
2279   // i.e. that instruction can be used instead of each Phi in the set. See for example
2280   // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
2281   // materialize these loop Phis from the smallest transitive closure.
2282 
2283   // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
2284   // assign new indexes to the Phi placeholders, making the matrix dense.
2285   ScopedArenaVector<size_t> matrix_indexes(num_phi_placeholders_,
2286                                            static_cast<size_t>(-1),  // Invalid.
2287                                            allocator.Adapter(kArenaAllocLSE));
2288   ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
2289   size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
2290   phi_placeholder_indexes.reserve(num_phi_placeholders);
2291   for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
2292     matrix_indexes[marker_index] = phi_placeholder_indexes.size();
2293     phi_placeholder_indexes.push_back(marker_index);
2294   }
2295   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2296   ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
2297   dependencies.reserve(num_phi_placeholders);
2298   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2299     static constexpr bool kExpandable = false;
2300     dependencies.push_back(
2301         ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
2302     ArenaBitVector* current_dependencies = dependencies.back();
2303     current_dependencies->ClearAllBits();
2304     current_dependencies->SetBit(matrix_index);  // Count the Phi placeholder as its own dependency.
2305     PhiPlaceholder current_phi_placeholder =
2306         GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]);
2307     HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2308     DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2309     size_t idx = current_phi_placeholder.GetHeapLocation();
2310     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2311       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2312       if (pred_value.NeedsLoopPhi()) {
2313         size_t pred_value_index = PhiPlaceholderIndex(pred_value);
2314         DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
2315         DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
2316         current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
2317       }
2318     }
2319   }
2320 
2321   // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
2322   for (size_t k = 0; k != num_phi_placeholders; ++k) {
2323     for (size_t i = 0; i != num_phi_placeholders; ++i) {
2324       for (size_t j = 0; j != num_phi_placeholders; ++j) {
2325         if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
2326           dependencies[i]->SetBit(j);
2327         }
2328       }
2329     }
2330   }
2331 
2332   // Count the number of transitive dependencies for each replaceable Phi placeholder.
2333   ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
2334   num_dependencies.reserve(num_phi_placeholders);
2335   for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2336     num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
2337   }
2338 
2339   // Pick a Phi placeholder with the smallest number of transitive dependencies and
2340   // materialize it and its dependencies. Repeat until we have materialized all.
2341   ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
2342   current_subset.reserve(num_phi_placeholders);
2343   size_t remaining_phi_placeholders = num_phi_placeholders;
2344   while (remaining_phi_placeholders != 0u) {
2345     auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
2346     DCHECK_LE(*it, remaining_phi_placeholders);
2347     size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
2348     ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
2349     size_t current_num_dependencies = num_dependencies[current_matrix_index];
2350     current_subset.clear();
2351     for (uint32_t matrix_index : current_dependencies->Indexes()) {
2352       current_subset.push_back(phi_placeholder_indexes[matrix_index]);
2353     }
2354     if (!MaterializeLoopPhis(current_subset, type)) {
2355       DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2356       // This is the final store elimination phase and we shall not be able to eliminate any
2357       // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
2358       for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2359         if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2360           DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
2361           phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] =
2362               Value::PureUnknown();
2363         }
2364       }
2365       return false;
2366     }
2367     for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2368       if (current_dependencies->IsBitSet(matrix_index)) {
2369         // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
2370         // so that they shall never be the minimum again.
2371         num_dependencies[matrix_index] = num_phi_placeholders;
2372       } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2373         // Remove dependencies from other Phi placeholders.
2374         dependencies[matrix_index]->Subtract(current_dependencies);
2375         num_dependencies[matrix_index] -= current_num_dependencies;
2376       }
2377     }
2378     remaining_phi_placeholders -= current_num_dependencies;
2379   }
2380   return true;
2381 }
2382 
FullyMaterializePhi(PhiPlaceholder phi_placeholder,DataType::Type type)2383 bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
2384   ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
2385   ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
2386   auto res =
2387       FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
2388   CHECK(!res.has_value()) << *res;
2389   return MaterializeLoopPhis(abv, type);
2390 }
2391 
TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,HInstruction * load)2392 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
2393     PhiPlaceholder phi_placeholder, HInstruction* load) {
2394   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2395 
2396   // Use local allocator to reduce peak memory usage.
2397   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2398 
2399   // Find Phi placeholders to materialize.
2400   ArenaBitVector phi_placeholders_to_materialize(
2401       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2402   phi_placeholders_to_materialize.ClearAllBits();
2403   DataType::Type type = load->GetType();
2404   bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
2405   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2406       phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
2407   if (loop_phi_with_unknown_input) {
2408     DCHECK_GE(GetGraph()
2409                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2410                   ->GetPredecessors()
2411                   .size(),
2412               2u);
2413     return loop_phi_with_unknown_input;  // Return failure.
2414   }
2415 
2416   DCHECK_EQ(current_phase_, Phase::kLoadElimination);
2417   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2418   DCHECK(success);
2419 
2420   // Report success.
2421   return std::nullopt;
2422 }
2423 
2424 // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
2425 // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
2426 // propagate the load(s) as the new value(s) to successors; this may uncover new elimination
2427 // opportunities. If we find no such load, we shall at least propagate an unknown value to some
2428 // heap location that is needed by another loop Phi placeholder.
ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input)2429 void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) {
2430   size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2431   DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
2432   phi_placeholder_replacements_[loop_phi_with_unknown_input_index] =
2433       Value::MergedUnknown(loop_phi_with_unknown_input);
2434 
2435   uint32_t block_id = loop_phi_with_unknown_input.GetBlockId();
2436   const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2437   size_t reverse_post_order_index = 0;
2438   size_t reverse_post_order_size = reverse_post_order.size();
2439   size_t loads_and_stores_index = 0u;
2440   size_t loads_and_stores_size = loads_and_stores_.size();
2441 
2442   // Skip blocks and instructions before the block containing the loop phi with unknown input.
2443   DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2444   while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2445     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2446     while (loads_and_stores_index != loads_and_stores_size &&
2447            loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2448       ++loads_and_stores_index;
2449     }
2450     ++reverse_post_order_index;
2451     DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2452   }
2453 
2454   // Use local allocator to reduce peak memory usage.
2455   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2456   // Reuse one temporary vector for all remaining blocks.
2457   size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2458   ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
2459 
2460   auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2461     Value value;
2462     if (block->IsLoopHeader()) {
2463       if (block->GetLoopInformation()->IsIrreducible()) {
2464         PhiPlaceholder placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
2465         value = Value::MergedUnknown(placeholder);
2466       } else {
2467         value = PrepareLoopValue(block, idx);
2468       }
2469     } else {
2470       value = MergePredecessorValues(block, idx);
2471     }
2472     DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2473     return value;
2474   };
2475 
2476   // Process remaining blocks and instructions.
2477   bool found_unreplaceable_load = false;
2478   bool replaced_heap_value_with_unknown = false;
2479   for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2480     HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2481     if (block->IsExitBlock()) {
2482       continue;
2483     }
2484 
2485     // We shall reconstruct only the heap values that we need for processing loads and stores.
2486     local_heap_values.clear();
2487     local_heap_values.resize(num_heap_locations, Value::Invalid());
2488 
2489     for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2490       HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2491       size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2492       if (load_or_store->GetBlock() != block) {
2493         break;  // End of instructions from the current block.
2494       }
2495       bool is_store = load_or_store->GetSideEffects().DoesAnyWrite();
2496       DCHECK_EQ(is_store, IsStore(load_or_store));
2497       HInstruction* stored_value = nullptr;
2498       if (is_store) {
2499         auto it = store_records_.find(load_or_store);
2500         DCHECK(it != store_records_.end());
2501         stored_value = it->second.stored_value;
2502       }
2503       auto it = loads_requiring_loop_phi_.find(
2504           stored_value != nullptr ? stored_value : load_or_store);
2505       if (it == loads_requiring_loop_phi_.end()) {
2506         continue;  // This load or store never needed a loop Phi.
2507       }
2508       ValueRecord& record = it->second;
2509       if (is_store) {
2510         // Process the store by updating `local_heap_values[idx]`. The last update shall
2511         // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2512         // at the end of the block.
2513         Value replacement = ReplacementOrValue(record.value);
2514         if (replacement.NeedsLoopPhi()) {
2515           // No replacement yet, use the Phi placeholder from the load.
2516           DCHECK(record.value.NeedsLoopPhi());
2517           local_heap_values[idx] = record.value;
2518         } else {
2519           // If the load fetched a known value, use it, otherwise use the load.
2520           local_heap_values[idx] = Value::ForInstruction(
2521               replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2522         }
2523       } else {
2524         // Process the load unless it has previously been marked unreplaceable.
2525         if (record.value.NeedsLoopPhi()) {
2526           if (local_heap_values[idx].IsInvalid()) {
2527             local_heap_values[idx] = get_initial_value(block, idx);
2528           }
2529           if (local_heap_values[idx].IsUnknown()) {
2530             // This load cannot be replaced. Keep stores that feed the Phi placeholder
2531             // (no aliasing since then, otherwise the Phi placeholder would not have been
2532             // propagated as a value to this load) and store the load as the new heap value.
2533             found_unreplaceable_load = true;
2534             KeepStores(record.value);
2535             record.value = Value::MergedUnknown(record.value.GetPhiPlaceholder());
2536             local_heap_values[idx] = Value::ForInstruction(load_or_store);
2537           } else if (local_heap_values[idx].NeedsLoopPhi()) {
2538             // The load may still be replaced with a Phi later.
2539             DCHECK(local_heap_values[idx].Equals(record.value));
2540           } else {
2541             // This load can be eliminated but we may need to construct non-loop Phis.
2542             if (local_heap_values[idx].NeedsNonLoopPhi()) {
2543               MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2544                                      load_or_store->GetType());
2545               local_heap_values[idx] = Replacement(local_heap_values[idx]);
2546             }
2547             record.value = local_heap_values[idx];
2548             HInstruction* heap_value = local_heap_values[idx].GetInstruction();
2549             AddRemovedLoad(load_or_store, heap_value);
2550             TryRemovingNullCheck(load_or_store);
2551           }
2552         }
2553       }
2554     }
2555 
2556     // All heap values that previously needed a loop Phi at the end of the block
2557     // need to be updated for processing successors.
2558     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2559     for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2560       if (heap_values[idx].value.NeedsLoopPhi()) {
2561         if (local_heap_values[idx].IsValid()) {
2562           heap_values[idx].value = local_heap_values[idx];
2563         } else {
2564           heap_values[idx].value = get_initial_value(block, idx);
2565         }
2566         if (heap_values[idx].value.IsUnknown()) {
2567           replaced_heap_value_with_unknown = true;
2568         }
2569       }
2570     }
2571   }
2572   DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2573 }
2574 
ProcessLoadsRequiringLoopPhis()2575 void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2576   // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2577   // make the result of the processing depend on the order in which we process these loads.
2578   // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2579   // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2580   for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2581     auto it = loads_requiring_loop_phi_.find(load_store_record.load_or_store);
2582     if (it == loads_requiring_loop_phi_.end()) {
2583       continue;
2584     }
2585     HInstruction* load = it->first;
2586     ValueRecord& record = it->second;
2587     while (record.value.NeedsLoopPhi() &&
2588            phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid()) {
2589       std::optional<PhiPlaceholder> loop_phi_with_unknown_input =
2590           TryToMaterializeLoopPhis(record.value.GetPhiPlaceholder(), load);
2591       DCHECK_EQ(loop_phi_with_unknown_input.has_value(),
2592                 phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid());
2593       if (loop_phi_with_unknown_input) {
2594         DCHECK_GE(GetGraph()
2595                       ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2596                       ->GetPredecessors()
2597                       .size(),
2598                   2u);
2599         ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input);
2600       }
2601     }
2602     // The load could have been marked as unreplaceable (and stores marked for keeping)
2603     // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput().
2604     DCHECK(record.value.IsUnknown() || record.value.IsInstruction() || record.value.NeedsLoopPhi());
2605     if (record.value.NeedsLoopPhi()) {
2606       record.value = Replacement(record.value);
2607       HInstruction* heap_value = record.value.GetInstruction();
2608       AddRemovedLoad(load, heap_value);
2609       TryRemovingNullCheck(load);
2610     }
2611   }
2612 }
2613 
SearchPhiPlaceholdersForKeptStores()2614 void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2615   ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2616   size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2617   work_queue.reserve(((start_size * 3u) + 1u) / 2u);  // Reserve 1.5x start size, rounded up.
2618   for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2619     work_queue.push_back(index);
2620   }
2621   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2622   std::optional<ArenaBitVector> not_kept_stores;
2623   if (stats_) {
2624     not_kept_stores.emplace(GetGraph()->GetAllocator(),
2625                             kept_stores_.GetBitSizeOf(),
2626                             false,
2627                             ArenaAllocKind::kArenaAllocLSE);
2628   }
2629   while (!work_queue.empty()) {
2630     uint32_t cur_phi_idx = work_queue.back();
2631     PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
2632     // Only writes to partial-escapes need to be specifically kept.
2633     bool is_partial_kept_merged_unknown =
2634         kept_merged_unknowns_.IsBitSet(cur_phi_idx) &&
2635         heap_location_collector_.GetHeapLocation(phi_placeholder.GetHeapLocation())
2636             ->GetReferenceInfo()
2637             ->IsPartialSingleton();
2638     work_queue.pop_back();
2639     size_t idx = phi_placeholder.GetHeapLocation();
2640     HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2641     DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder
2642                              << " (blocks: " << blocks.size() << ")";
2643     for (HBasicBlock* predecessor : block->GetPredecessors()) {
2644       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2645       // For loop back-edges we must also preserve all stores to locations that
2646       // may alias with the location `idx`.
2647       // TODO: Add tests cases around this.
2648       bool is_back_edge =
2649           block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2650       size_t start = is_back_edge ? 0u : idx;
2651       size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2652       for (size_t i = start; i != end; ++i) {
2653         Value stored_by = heap_values[i].stored_by;
2654         if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
2655           if (stored_by.NeedsPhi()) {
2656             size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2657             if (is_partial_kept_merged_unknown) {
2658               // Propagate merged-unknown keep since otherwise this might look
2659               // like a partial escape we can remove.
2660               kept_merged_unknowns_.SetBit(phi_placeholder_index);
2661             }
2662             if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2663               phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2664               work_queue.push_back(phi_placeholder_index);
2665             }
2666           } else {
2667             DCHECK(IsStore(stored_by.GetInstruction()));
2668             ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2669             DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2670                                   << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2671                                   << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2672             if (!is_partial_kept_merged_unknown && IsPartialNoEscape(predecessor, idx)) {
2673               if (not_kept_stores) {
2674                 not_kept_stores->SetBit(stored_by.GetInstruction()->GetId());
2675               }
2676             } else {
2677               kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2678             }
2679           }
2680         }
2681       }
2682     }
2683   }
2684   if (not_kept_stores) {
2685     // a - b := (a & ~b)
2686     not_kept_stores->Subtract(&kept_stores_);
2687     auto num_removed = not_kept_stores->NumSetBits();
2688     MaybeRecordStat(stats_, MethodCompilationStat::kPartialStoreRemoved, num_removed);
2689   }
2690 }
2691 
UpdateValueRecordForStoreElimination(ValueRecord * value_record)2692 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2693   while (value_record->stored_by.IsInstruction() &&
2694          !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2695     auto it = store_records_.find(value_record->stored_by.GetInstruction());
2696     DCHECK(it != store_records_.end());
2697     *value_record = it->second.old_value_record;
2698   }
2699   if (value_record->stored_by.NeedsPhi() &&
2700       !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2701            PhiPlaceholderIndex(value_record->stored_by))) {
2702     // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2703     // Phi placeholder to recalculate the actual value.
2704     value_record->value = value_record->stored_by;
2705   }
2706   value_record->value = ReplacementOrValue(value_record->value);
2707   if (value_record->value.NeedsNonLoopPhi()) {
2708     // Treat all Phi placeholders as requiring loop Phis at this point.
2709     // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2710     value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2711   }
2712 }
2713 
FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,DataType::Type type)2714 void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,
2715                                                DataType::Type type) {
2716   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2717 
2718   // Use local allocator to reduce peak memory usage.
2719   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2720   ArenaBitVector visited(&allocator,
2721                          /*start_bits=*/ num_phi_placeholders_,
2722                          /*expandable=*/ false,
2723                          kArenaAllocLSE);
2724   visited.ClearAllBits();
2725 
2726   // Find Phi placeholders to try and match against existing Phis or other replacement values.
2727   ArenaBitVector phi_placeholders_to_materialize(
2728       &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2729   phi_placeholders_to_materialize.ClearAllBits();
2730   std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2731       phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true);
2732   if (loop_phi_with_unknown_input) {
2733     DCHECK_GE(GetGraph()
2734                   ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2735                   ->GetPredecessors()
2736                   .size(),
2737               2u);
2738     // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2739     phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::PureUnknown();
2740     phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] =
2741         Value::PureUnknown();
2742     return;
2743   }
2744 
2745   DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2746   bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2747   DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2748   DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2749             !success);
2750 }
2751 
2752 struct ScopedRestoreHeapValues {
2753  public:
ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2754   ScopedRestoreHeapValues(ArenaStack* alloc,
2755                           size_t num_heap_locs,
2756                           ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore)
2757       : alloc_(alloc),
2758         updated_values_(alloc_.Adapter(kArenaAllocLSE)),
2759         to_restore_(to_restore) {
2760     updated_values_.reserve(num_heap_locs * to_restore_.size());
2761   }
2762 
~ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2763   ~ScopedRestoreHeapValues() {
2764     for (const auto& rec : updated_values_) {
2765       to_restore_[rec.blk_id][rec.heap_loc].value = rec.val_;
2766     }
2767   }
2768 
2769   template<typename Func>
ForEachRecordart::ScopedRestoreHeapValues2770   void ForEachRecord(Func func) {
2771     for (size_t blk_id : Range(to_restore_.size())) {
2772       for (size_t heap_loc : Range(to_restore_[blk_id].size())) {
2773         LSEVisitor::ValueRecord* vr = &to_restore_[blk_id][heap_loc];
2774         LSEVisitor::Value initial = vr->value;
2775         func(vr);
2776         if (!vr->value.ExactEquals(initial)) {
2777           updated_values_.push_back({blk_id, heap_loc, initial});
2778         }
2779       }
2780     }
2781   }
2782 
2783  private:
2784   struct UpdateRecord {
2785     size_t blk_id;
2786     size_t heap_loc;
2787     LSEVisitor::Value val_;
2788   };
2789   ScopedArenaAllocator alloc_;
2790   ScopedArenaVector<UpdateRecord> updated_values_;
2791   ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore_;
2792 
2793   DISALLOW_COPY_AND_ASSIGN(ScopedRestoreHeapValues);
2794 };
2795 
FindStoresWritingOldValues()2796 void LSEVisitor::FindStoresWritingOldValues() {
2797   // Partial LSE relies on knowing the real heap-values not the
2798   // store-replacement versions so we need to restore the map after removing
2799   // stores.
2800   ScopedRestoreHeapValues heap_vals(allocator_.GetArenaStack(),
2801                                     heap_location_collector_.GetNumberOfHeapLocations(),
2802                                     heap_values_for_);
2803   // The Phi placeholder replacements have so far been used for eliminating loads,
2804   // tracking values that would be stored if all stores were kept. As we want to
2805   // compare actual old values after removing unmarked stores, prune the Phi
2806   // placeholder replacements that can be fed by values we may not actually store.
2807   // Replacements marked as unknown can be kept as they are fed by some unknown
2808   // value and would end up as unknown again if we recalculated them.
2809   for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2810     if (!phi_placeholder_replacements_[i].IsUnknown() &&
2811         !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2812       phi_placeholder_replacements_[i] = Value::Invalid();
2813     }
2814   }
2815 
2816   // Update heap values at end of blocks.
2817   heap_vals.ForEachRecord([&](ValueRecord* rec) {
2818     UpdateValueRecordForStoreElimination(rec);
2819   });
2820 
2821   if (kIsDebugBuild) {
2822     heap_vals.ForEachRecord([](ValueRecord* rec) {
2823       DCHECK(!rec->value.NeedsNonLoopPhi()) << rec->value;
2824     });
2825   }
2826 
2827   // Use local allocator to reduce peak memory usage.
2828   ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2829   // Mark the stores we want to eliminate in a separate bit vector.
2830   ArenaBitVector eliminated_stores(&allocator,
2831                                    /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2832                                    /*expandable=*/ false,
2833                                    kArenaAllocLSE);
2834   eliminated_stores.ClearAllBits();
2835 
2836   for (auto& entry : store_records_) {
2837     HInstruction* store = entry.first;
2838     StoreRecord& store_record = entry.second;
2839     if (!kept_stores_.IsBitSet(store->GetId())) {
2840       continue;  // Ignore stores that are not kept.
2841     }
2842     UpdateValueRecordForStoreElimination(&store_record.old_value_record);
2843     if (store_record.old_value_record.value.NeedsPhi()) {
2844       DataType::Type type = store_record.stored_value->GetType();
2845       FindOldValueForPhiPlaceholder(store_record.old_value_record.value.GetPhiPlaceholder(), type);
2846       store_record.old_value_record.value = ReplacementOrValue(store_record.old_value_record.value);
2847     }
2848     DCHECK(!store_record.old_value_record.value.NeedsPhi());
2849     HInstruction* stored_value = FindSubstitute(store_record.stored_value);
2850     if (store_record.old_value_record.value.Equals(stored_value)) {
2851       eliminated_stores.SetBit(store->GetId());
2852     }
2853   }
2854 
2855   // Commit the stores to eliminate by removing them from `kept_stores_`.
2856   kept_stores_.Subtract(&eliminated_stores);
2857 }
2858 
Run()2859 void LSEVisitor::Run() {
2860   // 1. Process blocks and instructions in reverse post order.
2861   for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2862     VisitBasicBlock(block);
2863   }
2864 
2865   // 2. Process loads that require loop Phis, trying to find/create replacements.
2866   current_phase_ = Phase::kLoadElimination;
2867   ProcessLoadsRequiringLoopPhis();
2868 
2869   // 3. Determine which stores to keep and which to eliminate.
2870   current_phase_ = Phase::kStoreElimination;
2871   // Finish marking stores for keeping.
2872   SearchPhiPlaceholdersForKeptStores();
2873 
2874   // Find stores that write the same value as is already present in the location.
2875   FindStoresWritingOldValues();
2876 
2877   // 4. Replace loads and remove unnecessary stores and singleton allocations.
2878   FinishFullLSE();
2879 
2880   // 5. Move partial escapes down and fixup with PHIs.
2881   current_phase_ = Phase::kPartialElimination;
2882   MovePartialEscapes();
2883 }
2884 
2885 // Clear unknown loop-phi results. Here we'll be able to use partial-unknowns so we need to
2886 // retry all of them with more information about where they come from.
PrepareForPartialPhiComputation()2887 void LSEVisitor::PrepareForPartialPhiComputation() {
2888   std::replace_if(
2889       phi_placeholder_replacements_.begin(),
2890       phi_placeholder_replacements_.end(),
2891       [](const Value& val) { return !val.IsDefault() && !val.IsInstruction(); },
2892       Value::Invalid());
2893 }
2894 
2895 class PartialLoadStoreEliminationHelper {
2896  public:
PartialLoadStoreEliminationHelper(LSEVisitor * lse,ScopedArenaAllocator * alloc)2897   PartialLoadStoreEliminationHelper(LSEVisitor* lse, ScopedArenaAllocator* alloc)
2898       : lse_(lse),
2899         alloc_(alloc),
2900         new_ref_phis_(alloc_->Adapter(kArenaAllocLSE)),
2901         heap_refs_(alloc_->Adapter(kArenaAllocLSE)),
2902         max_preds_per_block_((*std::max_element(GetGraph()->GetActiveBlocks().begin(),
2903                                                 GetGraph()->GetActiveBlocks().end(),
2904                                                 [](HBasicBlock* a, HBasicBlock* b) {
2905                                                   return a->GetNumberOfPredecessors() <
2906                                                          b->GetNumberOfPredecessors();
2907                                                 }))
2908                                  ->GetNumberOfPredecessors()),
2909         materialization_blocks_(GetGraph()->GetBlocks().size() * max_preds_per_block_,
2910                                 nullptr,
2911                                 alloc_->Adapter(kArenaAllocLSE)),
2912         first_materialization_block_id_(GetGraph()->GetBlocks().size()) {
2913     size_t num_partial_singletons = lse_->heap_location_collector_.CountPartialSingletons();
2914     heap_refs_.reserve(num_partial_singletons);
2915     new_ref_phis_.reserve(num_partial_singletons * GetGraph()->GetBlocks().size());
2916     CollectInterestingHeapRefs();
2917   }
2918 
~PartialLoadStoreEliminationHelper()2919   ~PartialLoadStoreEliminationHelper() {
2920     if (heap_refs_.empty()) {
2921       return;
2922     }
2923     ReferenceTypePropagation rtp_fixup(GetGraph(),
2924                                        Handle<mirror::ClassLoader>(),
2925                                        Handle<mirror::DexCache>(),
2926                                        /* is_first_run= */ false);
2927     rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_));
2928     GetGraph()->ClearLoopInformation();
2929     GetGraph()->ClearDominanceInformation();
2930     GetGraph()->ClearReachabilityInformation();
2931     GetGraph()->BuildDominatorTree();
2932     GetGraph()->ComputeReachabilityInformation();
2933   }
2934 
2935   class IdxToHeapLoc {
2936    public:
IdxToHeapLoc(const HeapLocationCollector * hlc)2937     explicit IdxToHeapLoc(const HeapLocationCollector* hlc) : collector_(hlc) {}
operator ()(size_t idx) const2938     HeapLocation* operator()(size_t idx) const {
2939       return collector_->GetHeapLocation(idx);
2940     }
2941 
2942    private:
2943     const HeapLocationCollector* collector_;
2944   };
2945 
2946 
2947   class HeapReferenceData {
2948    public:
2949     using LocIterator = IterationRange<TransformIterator<BitVector::IndexIterator, IdxToHeapLoc>>;
HeapReferenceData(PartialLoadStoreEliminationHelper * helper,HNewInstance * new_inst,const ExecutionSubgraph * subgraph,ScopedArenaAllocator * alloc)2950     HeapReferenceData(PartialLoadStoreEliminationHelper* helper,
2951                       HNewInstance* new_inst,
2952                       const ExecutionSubgraph* subgraph,
2953                       ScopedArenaAllocator* alloc)
2954         : new_instance_(new_inst),
2955           helper_(helper),
2956           heap_locs_(alloc,
2957                      helper->lse_->heap_location_collector_.GetNumberOfHeapLocations(),
2958                      /* expandable= */ false,
2959                      kArenaAllocLSE),
2960           materializations_(
2961               // We generally won't need to create too many materialization blocks and we can expand
2962               // this as needed so just start off with 2x.
2963               2 * helper->lse_->GetGraph()->GetBlocks().size(),
2964               nullptr,
2965               alloc->Adapter(kArenaAllocLSE)),
2966           collector_(helper->lse_->heap_location_collector_),
2967           subgraph_(subgraph) {}
2968 
IterateLocations()2969     LocIterator IterateLocations() {
2970       auto idxs = heap_locs_.Indexes();
2971       return MakeTransformRange(idxs, IdxToHeapLoc(&collector_));
2972     }
2973 
AddHeapLocation(size_t idx)2974     void AddHeapLocation(size_t idx) {
2975       heap_locs_.SetBit(idx);
2976     }
2977 
GetNoEscapeSubgraph() const2978     const ExecutionSubgraph* GetNoEscapeSubgraph() const {
2979       return subgraph_;
2980     }
2981 
IsPostEscape(HBasicBlock * blk)2982     bool IsPostEscape(HBasicBlock* blk) {
2983       return std::any_of(
2984           subgraph_->GetExcludedCohorts().cbegin(),
2985           subgraph_->GetExcludedCohorts().cend(),
2986           [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.PrecedesBlock(blk); });
2987     }
2988 
InEscapeCohort(HBasicBlock * blk)2989     bool InEscapeCohort(HBasicBlock* blk) {
2990       return std::any_of(
2991           subgraph_->GetExcludedCohorts().cbegin(),
2992           subgraph_->GetExcludedCohorts().cend(),
2993           [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.ContainsBlock(blk); });
2994     }
2995 
BeforeAllEscapes(HBasicBlock * b)2996     bool BeforeAllEscapes(HBasicBlock* b) {
2997       return std::none_of(subgraph_->GetExcludedCohorts().cbegin(),
2998                           subgraph_->GetExcludedCohorts().cend(),
2999                           [&](const ExecutionSubgraph::ExcludedCohort& ec) {
3000                             return ec.PrecedesBlock(b) || ec.ContainsBlock(b);
3001                           });
3002     }
3003 
OriginalNewInstance() const3004     HNewInstance* OriginalNewInstance() const {
3005       return new_instance_;
3006     }
3007 
3008     // Collect and replace all uses. We need to perform this twice since we will
3009     // generate PHIs and additional uses as we create the default-values for
3010     // pred-gets. These values might be other references that are also being
3011     // partially eliminated. By running just the replacement part again we are
3012     // able to avoid having to keep another whole in-progress partial map
3013     // around. Since we will have already handled all the other uses in the
3014     // first pass the second one will be quite fast.
FixupUses(bool first_pass)3015     void FixupUses(bool first_pass) {
3016       ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
3017       // Replace uses with materialized values.
3018       ScopedArenaVector<InstructionUse<HInstruction>> to_replace(saa.Adapter(kArenaAllocLSE));
3019       ScopedArenaVector<HInstruction*> to_remove(saa.Adapter(kArenaAllocLSE));
3020       // Do we need to add a constructor-fence.
3021       ScopedArenaVector<InstructionUse<HConstructorFence>> constructor_fences(
3022           saa.Adapter(kArenaAllocLSE));
3023       ScopedArenaVector<InstructionUse<HInstruction>> to_predicate(saa.Adapter(kArenaAllocLSE));
3024 
3025       CollectReplacements(to_replace, to_remove, constructor_fences, to_predicate);
3026 
3027       if (!first_pass) {
3028         // If another partial creates new references they can only be in Phis or pred-get defaults
3029         // so they must be in the to_replace group.
3030         DCHECK(to_predicate.empty());
3031         DCHECK(constructor_fences.empty());
3032         DCHECK(to_remove.empty());
3033       }
3034 
3035       ReplaceInput(to_replace);
3036       RemoveAndReplaceInputs(to_remove);
3037       CreateConstructorFences(constructor_fences);
3038       PredicateInstructions(to_predicate);
3039 
3040       CHECK(OriginalNewInstance()->GetUses().empty())
3041           << OriginalNewInstance()->GetUses() << ", " << OriginalNewInstance()->GetEnvUses();
3042     }
3043 
AddMaterialization(HBasicBlock * blk,HInstruction * ins)3044     void AddMaterialization(HBasicBlock* blk, HInstruction* ins) {
3045       if (blk->GetBlockId() >= materializations_.size()) {
3046         // Make sure the materialization array is large enough, try to avoid
3047         // re-sizing too many times by giving extra space.
3048         materializations_.resize(blk->GetBlockId() * 2, nullptr);
3049       }
3050       DCHECK(materializations_[blk->GetBlockId()] == nullptr)
3051           << "Already have a materialization in block " << blk->GetBlockId() << ": "
3052           << *materializations_[blk->GetBlockId()] << " when trying to set materialization to "
3053           << *ins;
3054       materializations_[blk->GetBlockId()] = ins;
3055       LSE_VLOG << "In  block " << blk->GetBlockId() << " materialization is " << *ins;
3056       helper_->NotifyNewMaterialization(ins);
3057     }
3058 
HasMaterialization(HBasicBlock * blk) const3059     bool HasMaterialization(HBasicBlock* blk) const {
3060       return blk->GetBlockId() < materializations_.size() &&
3061              materializations_[blk->GetBlockId()] != nullptr;
3062     }
3063 
GetMaterialization(HBasicBlock * blk) const3064     HInstruction* GetMaterialization(HBasicBlock* blk) const {
3065       if (materializations_.size() <= blk->GetBlockId() ||
3066           materializations_[blk->GetBlockId()] == nullptr) {
3067         // This must be a materialization block added after the partial LSE of
3068         // the current reference finished. Since every edge can only have at
3069         // most one materialization block added to it we can just check the
3070         // blocks predecessor.
3071         DCHECK(helper_->IsMaterializationBlock(blk));
3072         blk = helper_->FindDominatingNonMaterializationBlock(blk);
3073         DCHECK(!helper_->IsMaterializationBlock(blk));
3074       }
3075       DCHECK_GT(materializations_.size(), blk->GetBlockId());
3076       DCHECK(materializations_[blk->GetBlockId()] != nullptr);
3077       return materializations_[blk->GetBlockId()];
3078     }
3079 
GenerateMaterializationValueFromPredecessors(HBasicBlock * blk)3080     void GenerateMaterializationValueFromPredecessors(HBasicBlock* blk) {
3081       DCHECK(std::none_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
3082                           GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
3083                           [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
3084                             return cohort.IsEntryBlock(blk);
3085                           }));
3086       DCHECK(!HasMaterialization(blk));
3087       if (blk->IsExitBlock()) {
3088         return;
3089       } else if (blk->IsLoopHeader()) {
3090         // See comment in execution_subgraph.h. Currently we act as though every
3091         // allocation for partial elimination takes place in the entry block.
3092         // This simplifies the analysis by making it so any escape cohort
3093         // expands to contain any loops it is a part of. This is something that
3094         // we should rectify at some point. In either case however we can still
3095         // special case the loop-header since (1) currently the loop can't have
3096         // any merges between different cohort entries since the pre-header will
3097         // be the earliest place entry can happen and (2) even if the analysis
3098         // is improved to consider lifetime of the object WRT loops any values
3099         // which would require loop-phis would have to make the whole loop
3100         // escape anyway.
3101         // This all means we can always use value from the pre-header when the
3102         // block is the loop-header and we didn't already create a
3103         // materialization block. (NB when we do improve the analysis we will
3104         // need to modify the materialization creation code to deal with this
3105         // correctly.)
3106         HInstruction* pre_header_val =
3107             GetMaterialization(blk->GetLoopInformation()->GetPreHeader());
3108         AddMaterialization(blk, pre_header_val);
3109         return;
3110       }
3111       ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
3112       ScopedArenaVector<HInstruction*> pred_vals(saa.Adapter(kArenaAllocLSE));
3113       pred_vals.reserve(blk->GetNumberOfPredecessors());
3114       for (HBasicBlock* pred : blk->GetPredecessors()) {
3115         DCHECK(HasMaterialization(pred));
3116         pred_vals.push_back(GetMaterialization(pred));
3117       }
3118       GenerateMaterializationValueFromPredecessorsDirect(blk, pred_vals);
3119     }
3120 
GenerateMaterializationValueFromPredecessorsForEntry(HBasicBlock * entry,const ScopedArenaVector<HInstruction * > & pred_vals)3121     void GenerateMaterializationValueFromPredecessorsForEntry(
3122         HBasicBlock* entry, const ScopedArenaVector<HInstruction*>& pred_vals) {
3123       DCHECK(std::any_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
3124                          GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
3125                          [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
3126                            return cohort.IsEntryBlock(entry);
3127                          }));
3128       GenerateMaterializationValueFromPredecessorsDirect(entry, pred_vals);
3129     }
3130 
3131    private:
3132     template <typename InstructionType>
3133     struct InstructionUse {
3134       InstructionType* instruction_;
3135       size_t index_;
3136     };
3137 
ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>> & to_replace)3138     void ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>>& to_replace) {
3139       for (auto& [ins, idx] : to_replace) {
3140         HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
3141         if (ins->IsPhi() && merged_inst->IsPhi() && ins->GetBlock() == merged_inst->GetBlock()) {
3142           // Phis we just pass through the appropriate inputs.
3143           ins->ReplaceInput(merged_inst->InputAt(idx), idx);
3144         } else {
3145           ins->ReplaceInput(merged_inst, idx);
3146         }
3147       }
3148     }
3149 
RemoveAndReplaceInputs(const ScopedArenaVector<HInstruction * > & to_remove)3150     void RemoveAndReplaceInputs(const ScopedArenaVector<HInstruction*>& to_remove) {
3151       for (HInstruction* ins : to_remove) {
3152         if (ins->GetBlock() == nullptr) {
3153           // Already dealt with.
3154           continue;
3155         }
3156         DCHECK(BeforeAllEscapes(ins->GetBlock())) << *ins;
3157         if (ins->IsInstanceFieldGet() || ins->IsInstanceFieldSet()) {
3158           bool instruction_has_users =
3159               ins->IsInstanceFieldGet() && (!ins->GetUses().empty() || !ins->GetEnvUses().empty());
3160           if (instruction_has_users) {
3161             // Make sure any remaining users of read are replaced.
3162             HInstruction* replacement =
3163                 helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins);
3164             // NB ReplaceInput will remove a use from the list so this is
3165             // guaranteed to finish eventually.
3166             while (!ins->GetUses().empty()) {
3167               const HUseListNode<HInstruction*>& use = ins->GetUses().front();
3168               use.GetUser()->ReplaceInput(replacement, use.GetIndex());
3169             }
3170             while (!ins->GetEnvUses().empty()) {
3171               const HUseListNode<HEnvironment*>& use = ins->GetEnvUses().front();
3172               use.GetUser()->ReplaceInput(replacement, use.GetIndex());
3173             }
3174           } else {
3175             DCHECK(ins->GetUses().empty())
3176                 << "Instruction has users!\n"
3177                 << ins->DumpWithArgs() << "\nUsers are " << ins->GetUses();
3178             DCHECK(ins->GetEnvUses().empty())
3179                 << "Instruction has users!\n"
3180                 << ins->DumpWithArgs() << "\nUsers are " << ins->GetEnvUses();
3181           }
3182           ins->GetBlock()->RemoveInstruction(ins);
3183         } else {
3184           // Can only be obj == other, obj != other, obj == obj (!?) or, obj != obj (!?)
3185           // Since PHIs are escapes as far as LSE is concerned and we are before
3186           // any escapes these are the only 4 options.
3187           DCHECK(ins->IsEqual() || ins->IsNotEqual()) << *ins;
3188           HInstruction* replacement;
3189           if (UNLIKELY(ins->InputAt(0) == ins->InputAt(1))) {
3190             replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(1)
3191                                          : GetGraph()->GetIntConstant(0);
3192           } else {
3193             replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(0)
3194                                          : GetGraph()->GetIntConstant(1);
3195           }
3196           ins->ReplaceWith(replacement);
3197           ins->GetBlock()->RemoveInstruction(ins);
3198         }
3199       }
3200     }
3201 
CreateConstructorFences(const ScopedArenaVector<InstructionUse<HConstructorFence>> & constructor_fences)3202     void CreateConstructorFences(
3203         const ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences) {
3204       if (!constructor_fences.empty()) {
3205         uint32_t pc = constructor_fences.front().instruction_->GetDexPc();
3206         for (auto& [cf, idx] : constructor_fences) {
3207           if (cf->GetInputs().size() == 1) {
3208             cf->GetBlock()->RemoveInstruction(cf);
3209           } else {
3210             cf->RemoveInputAt(idx);
3211           }
3212         }
3213         for (const ExecutionSubgraph::ExcludedCohort& ec :
3214             GetNoEscapeSubgraph()->GetExcludedCohorts()) {
3215           for (HBasicBlock* blk : ec.EntryBlocks()) {
3216             for (HBasicBlock* materializer :
3217                 Filter(MakeIterationRange(blk->GetPredecessors()),
3218                         [&](HBasicBlock* blk) { return helper_->IsMaterializationBlock(blk); })) {
3219               HInstruction* new_cf = new (GetGraph()->GetAllocator()) HConstructorFence(
3220                   GetMaterialization(materializer), pc, GetGraph()->GetAllocator());
3221               materializer->InsertInstructionBefore(new_cf, materializer->GetLastInstruction());
3222             }
3223           }
3224         }
3225       }
3226     }
3227 
PredicateInstructions(const ScopedArenaVector<InstructionUse<HInstruction>> & to_predicate)3228     void PredicateInstructions(
3229         const ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
3230       for (auto& [ins, idx] : to_predicate) {
3231         if (UNLIKELY(ins->GetBlock() == nullptr)) {
3232           // Already handled due to obj == obj;
3233           continue;
3234         } else if (ins->IsInstanceFieldGet()) {
3235           // IFieldGet[obj] => PredicatedIFieldGet[PartialValue, obj]
3236           HInstruction* new_fget = new (GetGraph()->GetAllocator()) HPredicatedInstanceFieldGet(
3237               ins->AsInstanceFieldGet(),
3238               GetMaterialization(ins->GetBlock()),
3239               helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins));
3240           MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedLoadAdded);
3241           ins->GetBlock()->InsertInstructionBefore(new_fget, ins);
3242           if (ins->GetType() == DataType::Type::kReference) {
3243             // Reference info is the same
3244             new_fget->SetReferenceTypeInfo(ins->GetReferenceTypeInfo());
3245           }
3246           // In this phase, substitute instructions are used only for the predicated get
3247           // default values which are used only if the partial singleton did not escape,
3248           // so the out value of the `new_fget` for the relevant cases is the same as
3249           // the default value.
3250           // TODO: Use the default value for materializing default values used by
3251           // other predicated loads to avoid some unnecessary Phis. (This shall
3252           // complicate the search for replacement in `ReplacementOrValue()`.)
3253           DCHECK(helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] == nullptr);
3254           helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] = new_fget;
3255           ins->ReplaceWith(new_fget);
3256           ins->ReplaceEnvUsesDominatedBy(ins, new_fget);
3257           CHECK(ins->GetEnvUses().empty() && ins->GetUses().empty())
3258               << "Instruction: " << *ins << " uses: " << ins->GetUses()
3259               << ", env: " << ins->GetEnvUses();
3260           ins->GetBlock()->RemoveInstruction(ins);
3261         } else if (ins->IsInstanceFieldSet()) {
3262           // Any predicated sets shouldn't require movement.
3263           ins->AsInstanceFieldSet()->SetIsPredicatedSet();
3264           MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedStoreAdded);
3265           HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
3266           ins->ReplaceInput(merged_inst, idx);
3267         } else {
3268           // comparisons need to be split into 2.
3269           DCHECK(ins->IsEqual() || ins->IsNotEqual()) << "bad instruction " << *ins;
3270           bool this_is_first = idx == 0;
3271           if (ins->InputAt(0) == ins->InputAt(1)) {
3272             // This is a obj == obj or obj != obj.
3273             // No idea why anyone would do this but whatever.
3274             ins->ReplaceWith(GetGraph()->GetIntConstant(ins->IsEqual() ? 1 : 0));
3275             ins->GetBlock()->RemoveInstruction(ins);
3276             continue;
3277           } else {
3278             HInstruction* is_escaped = new (GetGraph()->GetAllocator())
3279                 HNotEqual(GetMaterialization(ins->GetBlock()), GetGraph()->GetNullConstant());
3280             HInstruction* combine_inst =
3281                 ins->IsEqual() ? static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HAnd(
3282                                      DataType::Type::kBool, is_escaped, ins))
3283                                : static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HOr(
3284                                      DataType::Type::kBool, is_escaped, ins));
3285             ins->ReplaceInput(GetMaterialization(ins->GetBlock()), this_is_first ? 0 : 1);
3286             ins->GetBlock()->InsertInstructionBefore(is_escaped, ins);
3287             ins->GetBlock()->InsertInstructionAfter(combine_inst, ins);
3288             ins->ReplaceWith(combine_inst);
3289             combine_inst->ReplaceInput(ins, 1);
3290           }
3291         }
3292       }
3293     }
3294 
3295     // Figure out all the instructions we need to
3296     // fixup/replace/remove/duplicate. Since this requires an iteration of an
3297     // intrusive linked list we want to do it only once and collect all the data
3298     // here.
CollectReplacements(ScopedArenaVector<InstructionUse<HInstruction>> & to_replace,ScopedArenaVector<HInstruction * > & to_remove,ScopedArenaVector<InstructionUse<HConstructorFence>> & constructor_fences,ScopedArenaVector<InstructionUse<HInstruction>> & to_predicate)3299     void CollectReplacements(
3300         ScopedArenaVector<InstructionUse<HInstruction>>& to_replace,
3301         ScopedArenaVector<HInstruction*>& to_remove,
3302         ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences,
3303         ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
3304       size_t size = new_instance_->GetUses().SizeSlow();
3305       to_replace.reserve(size);
3306       to_remove.reserve(size);
3307       constructor_fences.reserve(size);
3308       to_predicate.reserve(size);
3309       for (auto& use : new_instance_->GetUses()) {
3310         HBasicBlock* blk =
3311             helper_->FindDominatingNonMaterializationBlock(use.GetUser()->GetBlock());
3312         if (InEscapeCohort(blk)) {
3313           LSE_VLOG << "Replacing " << *new_instance_ << " use in " << *use.GetUser() << " with "
3314                    << *GetMaterialization(blk);
3315           to_replace.push_back({use.GetUser(), use.GetIndex()});
3316         } else if (IsPostEscape(blk)) {
3317           LSE_VLOG << "User " << *use.GetUser() << " after escapes!";
3318           // The fields + cmp are normal uses. Phi can only be here if it was
3319           // generated by full LSE so whatever store+load that created the phi
3320           // is the escape.
3321           if (use.GetUser()->IsPhi()) {
3322             to_replace.push_back({use.GetUser(), use.GetIndex()});
3323           } else {
3324             DCHECK(use.GetUser()->IsFieldAccess() ||
3325                    use.GetUser()->IsEqual() ||
3326                    use.GetUser()->IsNotEqual())
3327                 << *use.GetUser() << "@" << use.GetIndex();
3328             to_predicate.push_back({use.GetUser(), use.GetIndex()});
3329           }
3330         } else if (use.GetUser()->IsConstructorFence()) {
3331           LSE_VLOG << "User " << *use.GetUser() << " being moved to materialization!";
3332           constructor_fences.push_back({use.GetUser()->AsConstructorFence(), use.GetIndex()});
3333         } else {
3334           LSE_VLOG << "User " << *use.GetUser() << " not contained in cohort!";
3335           to_remove.push_back(use.GetUser());
3336         }
3337       }
3338       DCHECK_EQ(
3339           to_replace.size() + to_remove.size() + constructor_fences.size() + to_predicate.size(),
3340           size);
3341     }
3342 
GenerateMaterializationValueFromPredecessorsDirect(HBasicBlock * blk,const ScopedArenaVector<HInstruction * > & pred_vals)3343     void GenerateMaterializationValueFromPredecessorsDirect(
3344         HBasicBlock* blk, const ScopedArenaVector<HInstruction*>& pred_vals) {
3345       DCHECK(!pred_vals.empty());
3346       bool all_equal = std::all_of(pred_vals.begin() + 1, pred_vals.end(), [&](HInstruction* val) {
3347         return val == pred_vals.front();
3348       });
3349       if (LIKELY(all_equal)) {
3350         AddMaterialization(blk, pred_vals.front());
3351       } else {
3352         // Make a PHI for the predecessors.
3353         HPhi* phi = new (GetGraph()->GetAllocator()) HPhi(
3354             GetGraph()->GetAllocator(), kNoRegNumber, pred_vals.size(), DataType::Type::kReference);
3355         for (const auto& [ins, off] : ZipCount(MakeIterationRange(pred_vals))) {
3356           phi->SetRawInputAt(off, ins);
3357         }
3358         blk->AddPhi(phi);
3359         AddMaterialization(blk, phi);
3360       }
3361     }
3362 
GetGraph() const3363     HGraph* GetGraph() const {
3364       return helper_->GetGraph();
3365     }
3366 
3367     HNewInstance* new_instance_;
3368     PartialLoadStoreEliminationHelper* helper_;
3369     ArenaBitVector heap_locs_;
3370     ScopedArenaVector<HInstruction*> materializations_;
3371     const HeapLocationCollector& collector_;
3372     const ExecutionSubgraph* subgraph_;
3373   };
3374 
GetHeapRefs()3375   ArrayRef<HeapReferenceData> GetHeapRefs() {
3376     return ArrayRef<HeapReferenceData>(heap_refs_);
3377   }
3378 
IsMaterializationBlock(HBasicBlock * blk) const3379   bool IsMaterializationBlock(HBasicBlock* blk) const {
3380     return blk->GetBlockId() >= first_materialization_block_id_;
3381   }
3382 
GetOrCreateMaterializationBlock(HBasicBlock * entry,size_t pred_num)3383   HBasicBlock* GetOrCreateMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
3384     size_t idx = GetMaterializationBlockIndex(entry, pred_num);
3385     HBasicBlock* blk = materialization_blocks_[idx];
3386     if (blk == nullptr) {
3387       blk = new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph());
3388       GetGraph()->AddBlock(blk);
3389       LSE_VLOG << "creating materialization block " << blk->GetBlockId() << " on edge "
3390                << entry->GetPredecessors()[pred_num]->GetBlockId() << "->" << entry->GetBlockId();
3391       blk->AddInstruction(new (GetGraph()->GetAllocator()) HGoto());
3392       materialization_blocks_[idx] = blk;
3393     }
3394     return blk;
3395   }
3396 
GetMaterializationBlock(HBasicBlock * entry,size_t pred_num)3397   HBasicBlock* GetMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
3398     HBasicBlock* out = materialization_blocks_[GetMaterializationBlockIndex(entry, pred_num)];
3399     DCHECK(out != nullptr) << "No materialization block for edge " << entry->GetBlockId() << "->"
3400                            << entry->GetPredecessors()[pred_num]->GetBlockId();
3401     return out;
3402   }
3403 
IterateMaterializationBlocks()3404   IterationRange<ArenaVector<HBasicBlock*>::const_iterator> IterateMaterializationBlocks() {
3405     return MakeIterationRange(GetGraph()->GetBlocks().begin() + first_materialization_block_id_,
3406                               GetGraph()->GetBlocks().end());
3407   }
3408 
FixupPartialObjectUsers()3409   void FixupPartialObjectUsers() {
3410     for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
3411       // Use the materialized instances to replace original instance
3412       ref_data.FixupUses(/*first_pass=*/true);
3413       CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3414           << ref_data.OriginalNewInstance()->GetUses() << ", "
3415           << ref_data.OriginalNewInstance()->GetEnvUses();
3416     }
3417     // This can cause new uses to be created due to the creation of phis/pred-get defaults
3418     for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
3419       // Only need to handle new phis/pred-get defaults. DCHECK that's all we find.
3420       ref_data.FixupUses(/*first_pass=*/false);
3421       CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3422           << ref_data.OriginalNewInstance()->GetUses() << ", "
3423           << ref_data.OriginalNewInstance()->GetEnvUses();
3424     }
3425   }
3426 
3427   // Finds the first block which either is or dominates the given block which is
3428   // not a materialization block
FindDominatingNonMaterializationBlock(HBasicBlock * blk)3429   HBasicBlock* FindDominatingNonMaterializationBlock(HBasicBlock* blk) {
3430     if (LIKELY(!IsMaterializationBlock(blk))) {
3431       // Not a materialization block so itself.
3432       return blk;
3433     } else if (blk->GetNumberOfPredecessors() != 0) {
3434       // We're far enough along that the materialization blocks have been
3435       // inserted into the graph so no need to go searching.
3436       return blk->GetSinglePredecessor();
3437     }
3438     // Search through the materialization blocks to find where it will be
3439     // inserted.
3440     for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
3441       if (mat == blk) {
3442         size_t cur_pred_idx = idx % max_preds_per_block_;
3443         HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
3444         return entry->GetPredecessors()[cur_pred_idx];
3445       }
3446     }
3447     LOG(FATAL) << "Unable to find materialization block position for " << blk->GetBlockId() << "!";
3448     return nullptr;
3449   }
3450 
InsertMaterializationBlocks()3451   void InsertMaterializationBlocks() {
3452     for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
3453       if (mat == nullptr) {
3454         continue;
3455       }
3456       size_t cur_pred_idx = idx % max_preds_per_block_;
3457       HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
3458       HBasicBlock* pred = entry->GetPredecessors()[cur_pred_idx];
3459       mat->InsertBetween(pred, entry);
3460       LSE_VLOG << "Adding materialization block " << mat->GetBlockId() << " on edge "
3461                << pred->GetBlockId() << "->" << entry->GetBlockId();
3462     }
3463   }
3464 
3465   // Replace any env-uses remaining of the partial singletons with the
3466   // appropriate phis and remove the instructions.
RemoveReplacedInstructions()3467   void RemoveReplacedInstructions() {
3468     for (HeapReferenceData& ref_data : GetHeapRefs()) {
3469       CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
3470           << ref_data.OriginalNewInstance()->GetUses() << ", "
3471           << ref_data.OriginalNewInstance()->GetEnvUses()
3472           << " inst is: " << ref_data.OriginalNewInstance();
3473       const auto& env_uses = ref_data.OriginalNewInstance()->GetEnvUses();
3474       while (!env_uses.empty()) {
3475         const HUseListNode<HEnvironment*>& use = env_uses.front();
3476         HInstruction* merged_inst =
3477             ref_data.GetMaterialization(use.GetUser()->GetHolder()->GetBlock());
3478         LSE_VLOG << "Replacing env use of " << *use.GetUser()->GetHolder() << "@" << use.GetIndex()
3479                  << " with " << *merged_inst;
3480         use.GetUser()->ReplaceInput(merged_inst, use.GetIndex());
3481       }
3482       ref_data.OriginalNewInstance()->GetBlock()->RemoveInstruction(ref_data.OriginalNewInstance());
3483     }
3484   }
3485 
3486   // We need to make sure any allocations dominate their environment uses.
3487   // Technically we could probably remove the env-uses and be fine but this is easy.
ReorderMaterializationsForEnvDominance()3488   void ReorderMaterializationsForEnvDominance() {
3489     for (HBasicBlock* blk : IterateMaterializationBlocks()) {
3490       ScopedArenaAllocator alloc(alloc_->GetArenaStack());
3491       ArenaBitVector still_unsorted(
3492           &alloc, GetGraph()->GetCurrentInstructionId(), false, kArenaAllocLSE);
3493       // This is guaranteed to be very short (since we will abandon LSE if there
3494       // are >= kMaxNumberOfHeapLocations (32) heap locations so that is the
3495       // absolute maximum size this list can be) so doing a selection sort is
3496       // fine. This avoids the need to do a complicated recursive check to
3497       // ensure transitivity for std::sort.
3498       ScopedArenaVector<HNewInstance*> materializations(alloc.Adapter(kArenaAllocLSE));
3499       materializations.reserve(GetHeapRefs().size());
3500       for (HInstruction* ins :
3501            MakeSTLInstructionIteratorRange(HInstructionIterator(blk->GetInstructions()))) {
3502         if (ins->IsNewInstance()) {
3503           materializations.push_back(ins->AsNewInstance());
3504           still_unsorted.SetBit(ins->GetId());
3505         }
3506       }
3507       using Iter = ScopedArenaVector<HNewInstance*>::iterator;
3508       Iter unsorted_start = materializations.begin();
3509       Iter unsorted_end = materializations.end();
3510       // selection sort. Required since the only check we can easily perform a
3511       // is-before-all-unsorted check.
3512       while (unsorted_start != unsorted_end) {
3513         bool found_instruction = false;
3514         for (Iter candidate = unsorted_start; candidate != unsorted_end; ++candidate) {
3515           HNewInstance* ni = *candidate;
3516           if (std::none_of(ni->GetAllEnvironments().cbegin(),
3517                            ni->GetAllEnvironments().cend(),
3518                            [&](const HEnvironment* env) {
3519                              return std::any_of(
3520                                  env->GetEnvInputs().cbegin(),
3521                                  env->GetEnvInputs().cend(),
3522                                  [&](const HInstruction* env_element) {
3523                                    return env_element != nullptr &&
3524                                           still_unsorted.IsBitSet(env_element->GetId());
3525                                  });
3526                            })) {
3527             still_unsorted.ClearBit(ni->GetId());
3528             std::swap(*unsorted_start, *candidate);
3529             ++unsorted_start;
3530             found_instruction = true;
3531             break;
3532           }
3533         }
3534         CHECK(found_instruction) << "Unable to select next materialization instruction."
3535                                  << " Environments have a dependency loop!";
3536       }
3537       // Reverse so we as we prepend them we end up with the correct order.
3538       auto reverse_iter = MakeIterationRange(materializations.rbegin(), materializations.rend());
3539       for (HNewInstance* ins : reverse_iter) {
3540         if (blk->GetFirstInstruction() != ins) {
3541           // Don't do checks since that makes sure the move is safe WRT
3542           // ins->CanBeMoved which for NewInstance is false.
3543           ins->MoveBefore(blk->GetFirstInstruction(), /*do_checks=*/false);
3544         }
3545       }
3546     }
3547   }
3548 
3549  private:
CollectInterestingHeapRefs()3550   void CollectInterestingHeapRefs() {
3551     // Get all the partials we need to move around.
3552     for (size_t i = 0; i < lse_->heap_location_collector_.GetNumberOfHeapLocations(); ++i) {
3553       ReferenceInfo* ri = lse_->heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
3554       if (ri->IsPartialSingleton() &&
3555           ri->GetReference()->GetBlock() != nullptr &&
3556           ri->GetNoEscapeSubgraph()->ContainsBlock(ri->GetReference()->GetBlock())) {
3557         RecordHeapRefField(ri->GetReference()->AsNewInstance(), i);
3558       }
3559     }
3560   }
3561 
RecordHeapRefField(HNewInstance * ni,size_t loc)3562   void RecordHeapRefField(HNewInstance* ni, size_t loc) {
3563     DCHECK(ni != nullptr);
3564     // This is likely to be very short so just do a linear search.
3565     auto it = std::find_if(heap_refs_.begin(), heap_refs_.end(), [&](HeapReferenceData& data) {
3566       return data.OriginalNewInstance() == ni;
3567     });
3568     HeapReferenceData& cur_ref =
3569         (it == heap_refs_.end())
3570             ? heap_refs_.emplace_back(this,
3571                                       ni,
3572                                       lse_->heap_location_collector_.GetHeapLocation(loc)
3573                                           ->GetReferenceInfo()
3574                                           ->GetNoEscapeSubgraph(),
3575                                       alloc_)
3576             : *it;
3577     cur_ref.AddHeapLocation(loc);
3578   }
3579 
3580 
NotifyNewMaterialization(HInstruction * ins)3581   void NotifyNewMaterialization(HInstruction* ins) {
3582     if (ins->IsPhi()) {
3583       new_ref_phis_.push_back(ins->AsPhi());
3584     }
3585   }
3586 
GetMaterializationBlockIndex(HBasicBlock * blk,size_t pred_num) const3587   size_t GetMaterializationBlockIndex(HBasicBlock* blk, size_t pred_num) const {
3588     DCHECK_LT(blk->GetBlockId(), first_materialization_block_id_)
3589         << "block is a materialization block!";
3590     DCHECK_LT(pred_num, max_preds_per_block_);
3591     return blk->GetBlockId() * max_preds_per_block_ + pred_num;
3592   }
3593 
GetGraph() const3594   HGraph* GetGraph() const {
3595     return lse_->GetGraph();
3596   }
3597 
3598   LSEVisitor* lse_;
3599   ScopedArenaAllocator* alloc_;
3600   ScopedArenaVector<HInstruction*> new_ref_phis_;
3601   ScopedArenaVector<HeapReferenceData> heap_refs_;
3602   size_t max_preds_per_block_;
3603   // An array of (# of non-materialization blocks) * max_preds_per_block
3604   // arranged in block-id major order. Since we can only have at most one
3605   // materialization block on each edge this is the maximum possible number of
3606   // materialization blocks.
3607   ScopedArenaVector<HBasicBlock*> materialization_blocks_;
3608   size_t first_materialization_block_id_;
3609 
3610   friend void LSEVisitor::MovePartialEscapes();
3611 };
3612 
3613 // Work around c++ type checking annoyances with not being able to forward-declare inner types.
3614 class HeapRefHolder
3615     : public std::reference_wrapper<PartialLoadStoreEliminationHelper::HeapReferenceData> {};
3616 
SetupPartialMaterialization(PartialLoadStoreEliminationHelper & helper,HeapRefHolder && holder,size_t pred_idx,HBasicBlock * entry)3617 HInstruction* LSEVisitor::SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
3618                                                       HeapRefHolder&& holder,
3619                                                       size_t pred_idx,
3620                                                       HBasicBlock* entry) {
3621   PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data = holder.get();
3622   HBasicBlock* old_pred = entry->GetPredecessors()[pred_idx];
3623   HInstruction* new_inst = ref_data.OriginalNewInstance();
3624   if (UNLIKELY(!new_inst->GetBlock()->Dominates(entry))) {
3625     LSE_VLOG << "Initial materialization in non-dominating block " << entry->GetBlockId()
3626              << " is null!";
3627     return GetGraph()->GetNullConstant();
3628   }
3629   HBasicBlock* bb = helper.GetOrCreateMaterializationBlock(entry, pred_idx);
3630   CHECK(bb != nullptr) << "entry " << entry->GetBlockId() << " -> " << old_pred->GetBlockId();
3631   HNewInstance* repl_create = new_inst->Clone(GetGraph()->GetAllocator())->AsNewInstance();
3632   repl_create->SetPartialMaterialization();
3633   bb->InsertInstructionBefore(repl_create, bb->GetLastInstruction());
3634   repl_create->CopyEnvironmentFrom(new_inst->GetEnvironment());
3635   MaybeRecordStat(stats_, MethodCompilationStat::kPartialAllocationMoved);
3636   LSE_VLOG << "In blk " << bb->GetBlockId() << " initial materialization is " << *repl_create;
3637   ref_data.AddMaterialization(bb, repl_create);
3638   const FieldInfo* info = nullptr;
3639   for (const HeapLocation* loc : ref_data.IterateLocations()) {
3640     size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
3641     info = field_infos_[loc_off];
3642     DCHECK(loc->GetIndex() == nullptr);
3643     Value value = ReplacementOrValue(heap_values_for_[old_pred->GetBlockId()][loc_off].value);
3644     if (value.NeedsLoopPhi() || value.IsMergedUnknown()) {
3645       Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3646       DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
3647           << repl << " from " << value << " pred is " << old_pred->GetBlockId();
3648       if (!repl.IsInvalid()) {
3649         value = repl;
3650       } else {
3651         FullyMaterializePhi(value.GetPhiPlaceholder(), info->GetFieldType());
3652         value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3653       }
3654     } else if (value.NeedsNonLoopPhi()) {
3655       Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3656       DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
3657           << repl << " from " << value << " pred is " << old_pred->GetBlockId();
3658       if (!repl.IsInvalid()) {
3659         value = repl;
3660       } else {
3661         MaterializeNonLoopPhis(value.GetPhiPlaceholder(), info->GetFieldType());
3662         value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
3663       }
3664     }
3665     DCHECK(value.IsDefault() || value.IsInstruction())
3666         << GetGraph()->PrettyMethod() << ": " << value;
3667 
3668     if (!value.IsDefault() &&
3669         // shadow$_klass_ doesn't need to be manually initialized.
3670         MemberOffset(loc->GetOffset()) != mirror::Object::ClassOffset()) {
3671       CHECK(info != nullptr);
3672       HInstruction* set_value =
3673           new (GetGraph()->GetAllocator()) HInstanceFieldSet(repl_create,
3674                                                              value.GetInstruction(),
3675                                                              field_infos_[loc_off]->GetField(),
3676                                                              loc->GetType(),
3677                                                              MemberOffset(loc->GetOffset()),
3678                                                              false,
3679                                                              field_infos_[loc_off]->GetFieldIndex(),
3680                                                              loc->GetDeclaringClassDefIndex(),
3681                                                              field_infos_[loc_off]->GetDexFile(),
3682                                                              0u);
3683       bb->InsertInstructionAfter(set_value, repl_create);
3684       LSE_VLOG << "Adding " << *set_value << " for materialization setup!";
3685     }
3686   }
3687   return repl_create;
3688 }
3689 
GetPartialValueAt(HNewInstance * orig_new_inst,HInstruction * read)3690 HInstruction* LSEVisitor::GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read) {
3691   size_t loc = heap_location_collector_.GetFieldHeapLocation(orig_new_inst, &read->GetFieldInfo());
3692   Value pred = ReplacementOrValue(intermediate_values_.find(read)->second);
3693   LSE_VLOG << "using " << pred << " as default value for " << *read;
3694   if (pred.IsInstruction()) {
3695     return pred.GetInstruction();
3696   } else if (pred.IsMergedUnknown() || pred.NeedsPhi()) {
3697     FullyMaterializePhi(pred.GetPhiPlaceholder(),
3698                         heap_location_collector_.GetHeapLocation(loc)->GetType());
3699     HInstruction* res = Replacement(pred).GetInstruction();
3700     LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
3701     return res;
3702   } else if (pred.IsDefault()) {
3703     HInstruction* res = GetDefaultValue(read->GetType());
3704     LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
3705     return res;
3706   }
3707   LOG(FATAL) << "Unable to find unescaped value at " << read->DumpWithArgs()
3708              << "! This should be impossible! Value is " << pred;
3709   UNREACHABLE();
3710 }
3711 
MovePartialEscapes()3712 void LSEVisitor::MovePartialEscapes() {
3713   if (!ShouldPerformPartialLSE()) {
3714     return;
3715   }
3716 
3717   ScopedArenaAllocator saa(allocator_.GetArenaStack());
3718   PartialLoadStoreEliminationHelper helper(this, &saa);
3719 
3720   // Since for PHIs we now will have more information (since we know the object
3721   // hasn't escaped) we need to clear the old phi-replacements where we weren't
3722   // able to find the value.
3723   PrepareForPartialPhiComputation();
3724 
3725   for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : helper.GetHeapRefs()) {
3726     LSE_VLOG << "Creating materializations for " << *ref_data.OriginalNewInstance();
3727     // Setup entry and exit blocks.
3728     for (const auto& excluded_cohort : ref_data.GetNoEscapeSubgraph()->GetExcludedCohorts()) {
3729       // Setup materialization blocks.
3730       for (HBasicBlock* entry : excluded_cohort.EntryBlocksReversePostOrder()) {
3731         // Setup entries.
3732         // TODO Assuming we correctly break critical edges every entry block
3733         // must have only a single predecessor so we could just put all this
3734         // stuff in there. OTOH simplifier can do it for us and this is simpler
3735         // to implement - giving clean separation between the original graph and
3736         // materialization blocks - so for now we might as well have these new
3737         // blocks.
3738         ScopedArenaAllocator pred_alloc(saa.GetArenaStack());
3739         ScopedArenaVector<HInstruction*> pred_vals(pred_alloc.Adapter(kArenaAllocLSE));
3740         pred_vals.reserve(entry->GetNumberOfPredecessors());
3741         for (const auto& [pred, pred_idx] :
3742              ZipCount(MakeIterationRange(entry->GetPredecessors()))) {
3743           DCHECK(!helper.IsMaterializationBlock(pred));
3744           if (excluded_cohort.IsEntryBlock(pred)) {
3745             pred_vals.push_back(ref_data.GetMaterialization(pred));
3746             continue;
3747           } else {
3748             pred_vals.push_back(SetupPartialMaterialization(helper, {ref_data}, pred_idx, entry));
3749           }
3750         }
3751         ref_data.GenerateMaterializationValueFromPredecessorsForEntry(entry, pred_vals);
3752       }
3753 
3754       // Setup exit block heap-values for later phi-generation.
3755       for (HBasicBlock* exit : excluded_cohort.ExitBlocks()) {
3756         // mark every exit of cohorts as having a value so we can easily
3757         // materialize the PHIs.
3758         // TODO By setting this we can easily use the normal MaterializeLoopPhis
3759         //      (via FullyMaterializePhis) in order to generate the default-values
3760         //      for predicated-gets. This has the unfortunate side effect of creating
3761         //      somewhat more phis than are really needed (in some cases). We really
3762         //      should try to eventually know that we can lower these PHIs to only
3763         //      the non-escaping value in cases where it is possible. Currently this
3764         //      is done to some extent in instruction_simplifier but we have more
3765         //      information here to do the right thing.
3766         for (const HeapLocation* loc : ref_data.IterateLocations()) {
3767           size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
3768           // This Value::Default() is only used to fill in PHIs used as the
3769           // default value for PredicatedInstanceFieldGets. The actual value
3770           // stored there is meaningless since the Predicated-iget will use the
3771           // actual field value instead on these paths.
3772           heap_values_for_[exit->GetBlockId()][loc_off].value = Value::Default();
3773         }
3774       }
3775     }
3776 
3777     // string materialization through the graph.
3778     // // Visit RPO to PHI the materialized object through the cohort.
3779     for (HBasicBlock* blk : GetGraph()->GetReversePostOrder()) {
3780       // NB This doesn't include materialization blocks.
3781       DCHECK(!helper.IsMaterializationBlock(blk))
3782           << "Materialization blocks should not be in RPO yet.";
3783       if (ref_data.HasMaterialization(blk)) {
3784         continue;
3785       } else if (ref_data.BeforeAllEscapes(blk)) {
3786         ref_data.AddMaterialization(blk, GetGraph()->GetNullConstant());
3787         continue;
3788       } else {
3789         ref_data.GenerateMaterializationValueFromPredecessors(blk);
3790       }
3791     }
3792   }
3793 
3794   // Once we've generated all the materializations we can update the users.
3795   helper.FixupPartialObjectUsers();
3796 
3797   // Actually put materialization blocks into the graph
3798   helper.InsertMaterializationBlocks();
3799 
3800   // Get rid of the original instructions.
3801   helper.RemoveReplacedInstructions();
3802 
3803   // Ensure everything is ordered correctly in the materialization blocks. This
3804   // involves moving every NewInstance to the top and ordering them so that any
3805   // required env-uses are correctly ordered.
3806   helper.ReorderMaterializationsForEnvDominance();
3807 }
3808 
FinishFullLSE()3809 void LSEVisitor::FinishFullLSE() {
3810   // Remove recorded load instructions that should be eliminated.
3811   for (const LoadStoreRecord& record : loads_and_stores_) {
3812     size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
3813     HInstruction* substitute = substitute_instructions_for_loads_[id];
3814     if (substitute == nullptr) {
3815       continue;
3816     }
3817     HInstruction* load = record.load_or_store;
3818     DCHECK(load != nullptr);
3819     DCHECK(IsLoad(load));
3820     DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
3821     // We proactively retrieve the substitute for a removed load, so
3822     // a load that has a substitute should not be observed as a heap
3823     // location value.
3824     DCHECK_EQ(FindSubstitute(substitute), substitute);
3825 
3826     load->ReplaceWith(substitute);
3827     load->GetBlock()->RemoveInstruction(load);
3828   }
3829 
3830   // Remove all the stores we can.
3831   for (const LoadStoreRecord& record : loads_and_stores_) {
3832     bool is_store = record.load_or_store->GetSideEffects().DoesAnyWrite();
3833     DCHECK_EQ(is_store, IsStore(record.load_or_store));
3834     if (is_store && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
3835       record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
3836     }
3837   }
3838 
3839   // Eliminate singleton-classified instructions:
3840   //   * - Constructor fences (they never escape this thread).
3841   //   * - Allocations (if they are unused).
3842   for (HInstruction* new_instance : singleton_new_instances_) {
3843     size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
3844     MaybeRecordStat(stats_,
3845                     MethodCompilationStat::kConstructorFenceRemovedLSE,
3846                     removed);
3847 
3848     if (!new_instance->HasNonEnvironmentUses()) {
3849       new_instance->RemoveEnvironmentUsers();
3850       new_instance->GetBlock()->RemoveInstruction(new_instance);
3851       MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
3852     }
3853   }
3854 }
3855 
3856 // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore
3857 // cannot be directly allocated with an arena allocator, so we need to wrap it.
3858 class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
3859  public:
LSEVisitorWrapper(HGraph * graph,const HeapLocationCollector & heap_location_collector,bool perform_partial_lse,OptimizingCompilerStats * stats)3860   LSEVisitorWrapper(HGraph* graph,
3861                     const HeapLocationCollector& heap_location_collector,
3862                     bool perform_partial_lse,
3863                     OptimizingCompilerStats* stats)
3864       : lse_visitor_(graph, heap_location_collector, perform_partial_lse, stats) {}
3865 
Run()3866   void Run() {
3867     lse_visitor_.Run();
3868   }
3869 
3870  private:
3871   LSEVisitor lse_visitor_;
3872 };
3873 
Run(bool enable_partial_lse)3874 bool LoadStoreElimination::Run(bool enable_partial_lse) {
3875   if (graph_->IsDebuggable() || graph_->HasTryCatch()) {
3876     // Debugger may set heap values or trigger deoptimization of callers.
3877     // Try/catch support not implemented yet.
3878     // Skip this optimization.
3879     return false;
3880   }
3881   // We need to be able to determine reachability. Clear it just to be safe but
3882   // this should initially be empty.
3883   graph_->ClearReachabilityInformation();
3884   // This is O(blocks^3) time complexity. It means we can query reachability in
3885   // O(1) though.
3886   graph_->ComputeReachabilityInformation();
3887   ScopedArenaAllocator allocator(graph_->GetArenaStack());
3888   LoadStoreAnalysis lsa(graph_,
3889                         stats_,
3890                         &allocator,
3891                         enable_partial_lse ? LoadStoreAnalysisType::kFull
3892                                            : LoadStoreAnalysisType::kNoPredicatedInstructions);
3893   lsa.Run();
3894   const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
3895   if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
3896     // No HeapLocation information from LSA, skip this optimization.
3897     return false;
3898   }
3899 
3900   std::unique_ptr<LSEVisitorWrapper> lse_visitor(new (&allocator) LSEVisitorWrapper(
3901       graph_, heap_location_collector, enable_partial_lse, stats_));
3902   lse_visitor->Run();
3903   return true;
3904 }
3905 
3906 #undef LSE_VLOG
3907 
3908 }  // namespace art
3909