1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "load_store_elimination.h"
18
19 #include <algorithm>
20 #include <optional>
21 #include <sstream>
22 #include <variant>
23
24 #include "base/arena_allocator.h"
25 #include "base/arena_bit_vector.h"
26 #include "base/array_ref.h"
27 #include "base/bit_vector-inl.h"
28 #include "base/bit_vector.h"
29 #include "base/globals.h"
30 #include "base/indenter.h"
31 #include "base/iteration_range.h"
32 #include "base/scoped_arena_allocator.h"
33 #include "base/scoped_arena_containers.h"
34 #include "base/transform_iterator.h"
35 #include "escape.h"
36 #include "handle.h"
37 #include "load_store_analysis.h"
38 #include "mirror/class_loader.h"
39 #include "mirror/dex_cache.h"
40 #include "nodes.h"
41 #include "oat/stack_map.h"
42 #include "optimizing_compiler_stats.h"
43 #include "reference_type_propagation.h"
44 #include "side_effects_analysis.h"
45
46 /**
47 * The general algorithm of load-store elimination (LSE).
48 *
49 * We use load-store analysis to collect a list of heap locations and perform
50 * alias analysis of those heap locations. LSE then keeps track of a list of
51 * heap values corresponding to the heap locations and stores that put those
52 * values in these locations.
53 * - In phase 1, we visit basic blocks in reverse post order and for each basic
54 * block, visit instructions sequentially, recording heap values and looking
55 * for loads and stores to eliminate without relying on loop Phis.
56 * - In phase 2, we look for loads that can be replaced by creating loop Phis
57 * or using a loop-invariant value.
58 * - In phase 3, we determine which stores are dead and can be eliminated and
59 * based on that information we re-evaluate whether some kept stores are
60 * storing the same value as the value in the heap location; such stores are
61 * also marked for elimination.
62 * - In phase 4, we commit the changes, replacing loads marked for elimination
63 * in previous processing and removing stores not marked for keeping. We also
64 * remove allocations that are no longer needed.
65 * - In phase 5, we move allocations which only escape along some executions
66 * closer to their escape points and fixup non-escaping paths with their actual
67 * values, creating PHIs when needed.
68 *
69 * 1. Walk over blocks and their instructions.
70 *
71 * The initial set of heap values for a basic block is
72 * - For a loop header of an irreducible loop, all heap values are unknown.
73 * - For a loop header of a normal loop, all values unknown at the end of the
74 * preheader are initialized to unknown, other heap values are set to Phi
75 * placeholders as we cannot determine yet whether these values are known on
76 * all back-edges. We use Phi placeholders also for array heap locations with
77 * index defined inside the loop but this helps only when the value remains
78 * zero from the array allocation throughout the loop.
79 * - For catch blocks, we clear all assumptions since we arrived due to an
80 * instruction throwing.
81 * - For other basic blocks, we merge incoming values from the end of all
82 * predecessors. If any incoming value is unknown, the start value for this
83 * block is also unknown. Otherwise, if all the incoming values are the same
84 * (including the case of a single predecessor), the incoming value is used.
85 * Otherwise, we use a Phi placeholder to indicate different incoming values.
86 * We record whether such Phi placeholder depends on a loop Phi placeholder.
87 *
88 * For each instruction in the block
89 * - If the instruction is a load from a heap location with a known value not
90 * dependent on a loop Phi placeholder, the load can be eliminated, either by
91 * using an existing instruction or by creating new Phi(s) instead. In order
92 * to maintain the validity of all heap locations during the optimization
93 * phase, we only record substitutes at this phase and the real elimination
94 * is delayed till the end of LSE. Loads that require a loop Phi placeholder
95 * replacement are recorded for processing later.
96 * - If the instruction is a store, it updates the heap value for the heap
97 * location with the stored value and records the store itself so that we can
98 * mark it for keeping if the value becomes observable. Heap values are
99 * invalidated for heap locations that may alias with the store instruction's
100 * heap location and their recorded stores are marked for keeping as they are
101 * now potentially observable. The store instruction can be eliminated unless
102 * the value stored is later needed e.g. by a load from the same/aliased heap
103 * location or the heap location persists at method return/deoptimization.
104 * - A store that stores the same value as the heap value is eliminated.
105 * - For newly instantiated instances, their heap values are initialized to
106 * language defined default values.
107 * - Finalizable objects are considered as persisting at method
108 * return/deoptimization.
109 * - Some instructions such as invokes are treated as loading and invalidating
110 * all the heap values, depending on the instruction's side effects.
111 * - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
112 * partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
113 * alias and no load/store is eliminated in such case.
114 *
115 * The time complexity of the initial phase has several components. The total
116 * time for the initialization of heap values for all blocks is
117 * O(heap_locations * edges)
118 * and the time complexity for simple instruction processing is
119 * O(instructions).
120 * See the description of phase 3 for additional complexity due to matching of
121 * existing Phis for replacing loads.
122 *
123 * 2. Process loads that depend on loop Phi placeholders.
124 *
125 * We go over these loads to determine whether they can be eliminated. We look
126 * for the set of all Phi placeholders that feed the load and depend on a loop
127 * Phi placeholder and, if we find no unknown value, we construct the necessary
128 * Phi(s) or, if all other inputs are identical, i.e. the location does not
129 * change in the loop, just use that input. If we do find an unknown input, this
130 * must be from a loop back-edge and we replace the loop Phi placeholder with
131 * unknown value and re-process loads and stores that previously depended on
132 * loop Phi placeholders. This shall find at least one load of an unknown value
133 * which is now known to be unreplaceable or a new unknown value on a back-edge
134 * and we repeat this process until each load is either marked for replacement
135 * or found to be unreplaceable. As we mark at least one additional loop Phi
136 * placeholder as unreplacable in each iteration, this process shall terminate.
137 *
138 * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
139 * is limited by the number of Phi placeholders and their dependencies we need
140 * to search with worst-case time complexity
141 * O(phi_placeholder_dependencies) .
142 * The dependencies are usually just the Phi placeholders' potential inputs,
143 * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
144 * replacement search, there are additional dependencies to consider, see below.
145 *
146 * In the successful case (no unknown inputs found) we use the Floyd-Warshall
147 * algorithm to determine transitive closures for each found Phi placeholder,
148 * and then match or materialize Phis from the smallest transitive closure,
149 * so that we can determine if such subset has a single other input. This has
150 * time complexity
151 * O(phi_placeholders_found^3) .
152 * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
153 * contribute to this as such Phi placeholders are replaced immediately.
154 * The total time of all such successful cases has time complexity
155 * O(phi_placeholders^3)
156 * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
157 * argument applies to the searches used to find all successful cases, so their
158 * total contribution is also just an insignificant
159 * O(phi_placeholder_dependencies) .
160 * The materialization of Phis has an insignificant total time complexity
161 * O(phi_placeholders * edges) .
162 *
163 * If we find an unknown input, we re-process heap values and loads with a time
164 * complexity that's the same as the phase 1 in the worst case. Adding this to
165 * the depth-first search time complexity yields
166 * O(phi_placeholder_dependencies + heap_locations * edges + instructions)
167 * for a single iteration. We can ignore the middle term as it's proprotional
168 * to the number of Phi placeholder inputs included in the first term. Using
169 * the upper limit of number of such iterations, the total time complexity is
170 * O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
171 *
172 * The upper bound of Phi placeholder inputs is
173 * heap_locations * edges
174 * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
175 * include other heap locations in predecessor blocks with the upper bound of
176 * heap_locations^2 * edges .
177 * Using the estimate
178 * edges <= blocks^2
179 * and
180 * phi_placeholders <= heap_locations * blocks ,
181 * the worst-case time complexity of the
182 * O(phi_placeholder_dependencies * phi_placeholders)
183 * term from unknown input cases is actually
184 * O(heap_locations^3 * blocks^3) ,
185 * exactly as the estimate for the Floyd-Warshall parts of successful cases.
186 * Adding the other term from the unknown input cases (to account for the case
187 * with significantly more instructions than blocks and heap locations), the
188 * phase 2 time complexity is
189 * O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
190 *
191 * See the description of phase 3 for additional complexity due to matching of
192 * existing Phis for replacing loads.
193 *
194 * 3. Determine which stores to keep and which to eliminate.
195 *
196 * During instruction processing in phase 1 and re-processing in phase 2, we are
197 * keeping a record of the stores and Phi placeholders that become observable
198 * and now propagate the observable Phi placeholders to all actual stores that
199 * feed them. Having determined observable stores, we look for stores that just
200 * overwrite the old value with the same. Since ignoring non-observable stores
201 * actually changes the old values in heap locations, we need to recalculate
202 * Phi placeholder replacements but we proceed similarly to the previous phase.
203 * We look for the set of all Phis that feed the old value replaced by the store
204 * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
205 * value, we try to match existing Phis (we do not create new Phis anymore) or,
206 * if all other inputs are identical, i.e. the location does not change in the
207 * loop, just use that input. If this succeeds and the old value is identical to
208 * the value we're storing, such store shall be eliminated.
209 *
210 * The work is similar to the phase 2, except that we're not re-processing loads
211 * and stores anymore, so the time complexity of phase 3 is
212 * O(heap_locations^3 * blocks^3) .
213 *
214 * There is additional complexity in matching existing Phis shared between the
215 * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
216 * time (this could be difficult and slow), so each matching attempt is just
217 * looking at Phis in the block (both old Phis and newly created Phis) and their
218 * inputs. As we create at most `heap_locations` Phis in each block, the upper
219 * bound on the number of Phis we look at is
220 * heap_locations * (old_phis + heap_locations)
221 * and the worst-case time complexity is
222 * O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
223 * The first term is lower than one term in phase 2, so the relevant part is
224 * O(heap_locations * old_phis * edges) .
225 *
226 * 4. Replace loads and remove unnecessary stores and singleton allocations.
227 *
228 * A special type of objects called singletons are instantiated in the method
229 * and have a single name, i.e. no aliases. Singletons have exclusive heap
230 * locations since they have no aliases. Singletons are helpful in narrowing
231 * down the life span of a heap location such that they do not always need to
232 * participate in merging heap values. Allocation of a singleton can be
233 * eliminated if that singleton is not used and does not persist at method
234 * return/deoptimization.
235 *
236 * The time complexity of this phase is
237 * O(instructions + instruction_uses) .
238 *
239 * FIXME: The time complexities described above assumes that the
240 * HeapLocationCollector finds a heap location for an instruction in O(1)
241 * time but it is currently O(heap_locations); this can be fixed by adding
242 * a hash map to the HeapLocationCollector.
243 */
244
245 namespace art HIDDEN {
246
247 #define LSE_VLOG \
248 if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
249
250 class HeapRefHolder;
251
252 // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
253 class LSEVisitor final : private HGraphDelegateVisitor {
254 public:
255 LSEVisitor(HGraph* graph,
256 const HeapLocationCollector& heap_location_collector,
257 OptimizingCompilerStats* stats);
258
259 void Run();
260
261 private:
262 class PhiPlaceholder {
263 public:
PhiPlaceholder()264 constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {}
PhiPlaceholder(uint32_t block_id,size_t heap_location)265 constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location)
266 : block_id_(block_id), heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
267
268 constexpr PhiPlaceholder(const PhiPlaceholder& p) = default;
269 constexpr PhiPlaceholder(PhiPlaceholder&& p) = default;
270 constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default;
271 constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default;
272
GetBlockId() const273 constexpr uint32_t GetBlockId() const {
274 return block_id_;
275 }
276
GetHeapLocation() const277 constexpr size_t GetHeapLocation() const {
278 return heap_location_;
279 }
280
Equals(const PhiPlaceholder & p2) const281 constexpr bool Equals(const PhiPlaceholder& p2) const {
282 return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_;
283 }
284
Dump(std::ostream & oss) const285 void Dump(std::ostream& oss) const {
286 oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]";
287 }
288
289 private:
290 uint32_t block_id_;
291 uint32_t heap_location_;
292 };
293
294 struct Marker {};
295
296 class Value;
297
298 class PriorValueHolder {
299 public:
300 constexpr explicit PriorValueHolder(Value prior);
301
IsInstruction() const302 constexpr bool IsInstruction() const {
303 return std::holds_alternative<HInstruction*>(value_);
304 }
IsPhi() const305 constexpr bool IsPhi() const {
306 return std::holds_alternative<PhiPlaceholder>(value_);
307 }
IsDefault() const308 constexpr bool IsDefault() const {
309 return std::holds_alternative<Marker>(value_);
310 }
GetPhiPlaceholder() const311 constexpr PhiPlaceholder GetPhiPlaceholder() const {
312 DCHECK(IsPhi());
313 return std::get<PhiPlaceholder>(value_);
314 }
GetInstruction() const315 constexpr HInstruction* GetInstruction() const {
316 DCHECK(IsInstruction());
317 return std::get<HInstruction*>(value_);
318 }
319
320 Value ToValue() const;
321 void Dump(std::ostream& oss) const;
322
Equals(PriorValueHolder other) const323 constexpr bool Equals(PriorValueHolder other) const {
324 return value_ == other.value_;
325 }
326
327 private:
328 std::variant<Marker, HInstruction*, PhiPlaceholder> value_;
329 };
330
331 friend constexpr bool operator==(const Marker&, const Marker&);
332 friend constexpr bool operator==(const PriorValueHolder& p1, const PriorValueHolder& p2);
333 friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
334 friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
335
336 class Value {
337 public:
338 enum class ValuelessType {
339 kInvalid,
340 kPureUnknown,
341 kDefault,
342 };
343 struct MergedUnknownMarker {
344 PhiPlaceholder phi_;
345 };
346 struct NeedsNonLoopPhiMarker {
347 PhiPlaceholder phi_;
348 };
349 struct NeedsLoopPhiMarker {
350 PhiPlaceholder phi_;
351 };
352
Invalid()353 static constexpr Value Invalid() {
354 return Value(ValuelessType::kInvalid);
355 }
356
357 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
358 // A heap location can be set to an unknown heap value when:
359 // - it is coming from outside the method,
360 // - it is killed due to aliasing, or side effects, or merging with an unknown value.
PureUnknown()361 static constexpr Value PureUnknown() {
362 return Value(ValuelessType::kPureUnknown);
363 }
364
PartialUnknown(Value old_value)365 static constexpr Value PartialUnknown(Value old_value) {
366 if (old_value.IsInvalid() || old_value.IsPureUnknown()) {
367 return PureUnknown();
368 } else {
369 return Value(PriorValueHolder(old_value));
370 }
371 }
372
MergedUnknown(PhiPlaceholder phi_placeholder)373 static constexpr Value MergedUnknown(PhiPlaceholder phi_placeholder) {
374 return Value(MergedUnknownMarker{phi_placeholder});
375 }
376
377 // Default heap value after an allocation.
378 // A heap location can be set to that value right after an allocation.
Default()379 static constexpr Value Default() {
380 return Value(ValuelessType::kDefault);
381 }
382
ForInstruction(HInstruction * instruction)383 static constexpr Value ForInstruction(HInstruction* instruction) {
384 return Value(instruction);
385 }
386
ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)387 static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
388 return Value(NeedsNonLoopPhiMarker{phi_placeholder});
389 }
390
ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder)391 static constexpr Value ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) {
392 return Value(NeedsLoopPhiMarker{phi_placeholder});
393 }
394
ForPhiPlaceholder(PhiPlaceholder phi_placeholder,bool needs_loop_phi)395 static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) {
396 return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder)
397 : ForNonLoopPhiPlaceholder(phi_placeholder);
398 }
399
IsValid() const400 constexpr bool IsValid() const {
401 return !IsInvalid();
402 }
403
IsInvalid() const404 constexpr bool IsInvalid() const {
405 return std::holds_alternative<ValuelessType>(value_) &&
406 GetValuelessType() == ValuelessType::kInvalid;
407 }
408
IsPartialUnknown() const409 bool IsPartialUnknown() const {
410 return std::holds_alternative<PriorValueHolder>(value_);
411 }
412
IsMergedUnknown() const413 bool IsMergedUnknown() const {
414 return std::holds_alternative<MergedUnknownMarker>(value_);
415 }
416
IsPureUnknown() const417 bool IsPureUnknown() const {
418 return std::holds_alternative<ValuelessType>(value_) &&
419 GetValuelessType() == ValuelessType::kPureUnknown;
420 }
421
IsUnknown() const422 bool IsUnknown() const {
423 return IsPureUnknown() || IsMergedUnknown() || IsPartialUnknown();
424 }
425
IsDefault() const426 bool IsDefault() const {
427 return std::holds_alternative<ValuelessType>(value_) &&
428 GetValuelessType() == ValuelessType::kDefault;
429 }
430
IsInstruction() const431 bool IsInstruction() const {
432 return std::holds_alternative<HInstruction*>(value_);
433 }
434
NeedsNonLoopPhi() const435 bool NeedsNonLoopPhi() const {
436 return std::holds_alternative<NeedsNonLoopPhiMarker>(value_);
437 }
438
NeedsLoopPhi() const439 bool NeedsLoopPhi() const {
440 return std::holds_alternative<NeedsLoopPhiMarker>(value_);
441 }
442
NeedsPhi() const443 bool NeedsPhi() const {
444 return NeedsNonLoopPhi() || NeedsLoopPhi();
445 }
446
GetInstruction() const447 HInstruction* GetInstruction() const {
448 DCHECK(IsInstruction()) << *this;
449 return std::get<HInstruction*>(value_);
450 }
451
GetPriorValue() const452 PriorValueHolder GetPriorValue() const {
453 DCHECK(IsPartialUnknown());
454 return std::get<PriorValueHolder>(value_);
455 }
456
GetPhiPlaceholder() const457 PhiPlaceholder GetPhiPlaceholder() const {
458 DCHECK(NeedsPhi() || IsMergedUnknown());
459 if (NeedsNonLoopPhi()) {
460 return std::get<NeedsNonLoopPhiMarker>(value_).phi_;
461 } else if (NeedsLoopPhi()) {
462 return std::get<NeedsLoopPhiMarker>(value_).phi_;
463 } else {
464 return std::get<MergedUnknownMarker>(value_).phi_;
465 }
466 }
467
GetMergeBlockId() const468 uint32_t GetMergeBlockId() const {
469 DCHECK(IsMergedUnknown()) << this;
470 return std::get<MergedUnknownMarker>(value_).phi_.GetBlockId();
471 }
472
GetMergeBlock(const HGraph * graph) const473 HBasicBlock* GetMergeBlock(const HGraph* graph) const {
474 DCHECK(IsMergedUnknown()) << *this;
475 return graph->GetBlocks()[GetMergeBlockId()];
476 }
477
GetHeapLocation() const478 size_t GetHeapLocation() const {
479 DCHECK(IsMergedUnknown() || NeedsPhi()) << this;
480 return GetPhiPlaceholder().GetHeapLocation();
481 }
482
483 constexpr bool ExactEquals(Value other) const;
484
485 constexpr bool Equals(Value other) const;
486
Equals(HInstruction * instruction) const487 constexpr bool Equals(HInstruction* instruction) const {
488 return Equals(ForInstruction(instruction));
489 }
490
491 std::ostream& Dump(std::ostream& os) const;
492
493 // Public for use with lists.
Value()494 constexpr Value() : value_(ValuelessType::kInvalid) {}
495
496 private:
497 using ValueHolder = std::variant<ValuelessType,
498 HInstruction*,
499 MergedUnknownMarker,
500 NeedsNonLoopPhiMarker,
501 NeedsLoopPhiMarker,
502 PriorValueHolder>;
GetValuelessType() const503 constexpr ValuelessType GetValuelessType() const {
504 return std::get<ValuelessType>(value_);
505 }
506
Value(ValueHolder v)507 constexpr explicit Value(ValueHolder v) : value_(v) {}
508
509 friend std::ostream& operator<<(std::ostream& os, const Value& v);
510
511 ValueHolder value_;
512
513 static_assert(std::is_move_assignable<PhiPlaceholder>::value);
514 };
515
516 friend constexpr bool operator==(const Value::NeedsLoopPhiMarker& p1,
517 const Value::NeedsLoopPhiMarker& p2);
518 friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1,
519 const Value::NeedsNonLoopPhiMarker& p2);
520 friend constexpr bool operator==(const Value::MergedUnknownMarker& p1,
521 const Value::MergedUnknownMarker& p2);
522
523 // Get Phi placeholder index for access to `phi_placeholder_replacements_`
524 // and "visited" bit vectors during depth-first searches.
PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const525 size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const {
526 size_t res =
527 phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() +
528 phi_placeholder.GetHeapLocation();
529 DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res))
530 << res << "blks: " << GetGraph()->GetBlocks().size()
531 << " hls: " << heap_location_collector_.GetNumberOfHeapLocations();
532 return res;
533 }
534
PhiPlaceholderIndex(Value phi_placeholder) const535 size_t PhiPlaceholderIndex(Value phi_placeholder) const {
536 return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
537 }
538
IsEscapingObject(ReferenceInfo * info)539 bool IsEscapingObject(ReferenceInfo* info) { return !info->IsSingletonAndRemovable(); }
540
GetPhiPlaceholderAt(size_t off) const541 PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
542 DCHECK_LT(off, num_phi_placeholders_);
543 size_t id = off % heap_location_collector_.GetNumberOfHeapLocations();
544 // Technically this should be (off - id) / NumberOfHeapLocations
545 // but due to truncation it's all the same.
546 size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations();
547 return GetPhiPlaceholder(blk_id, id);
548 }
549
GetPhiPlaceholder(uint32_t block_id,size_t idx) const550 PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
551 DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id;
552 return PhiPlaceholder(block_id, idx);
553 }
554
Replacement(Value value) const555 Value Replacement(Value value) const {
556 DCHECK(value.NeedsPhi()) << value << " phase: " << current_phase_;
557 Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
558 DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
559 DCHECK(replacement.IsUnknown() ||
560 FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
561 return replacement;
562 }
563
ReplacementOrValue(Value value) const564 Value ReplacementOrValue(Value value) const {
565 if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
566 return Replacement(value);
567 } else {
568 DCHECK_IMPLIES(value.IsInstruction(),
569 FindSubstitute(value.GetInstruction()) == value.GetInstruction());
570 return value;
571 }
572 }
573
574 // The record of a heap value and instruction(s) that feed that value.
575 struct ValueRecord {
576 Value value;
577 Value stored_by;
578 };
579
FindOrAddTypeConversionIfNecessary(HInstruction * instruction,HInstruction * value,DataType::Type expected_type)580 HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
581 HInstruction* value,
582 DataType::Type expected_type) {
583 // Should never add type conversion into boolean value.
584 if (expected_type == DataType::Type::kBool ||
585 DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
586 // TODO: This prevents type conversion of default values but we can still insert
587 // type conversion of other constants and there is no constant folding pass after LSE.
588 IsZeroBitPattern(value)) {
589 return nullptr;
590 }
591
592 // Check if there is already a suitable TypeConversion we can reuse.
593 for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
594 if (use.GetUser()->IsTypeConversion() &&
595 use.GetUser()->GetType() == expected_type &&
596 // TODO: We could move the TypeConversion to a common dominator
597 // if it does not cross irreducible loop header.
598 use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
599 // Don't share across irreducible loop headers.
600 // TODO: can be more fine-grained than this by testing each dominator.
601 (use.GetUser()->GetBlock() == instruction->GetBlock() ||
602 !GetGraph()->HasIrreducibleLoops())) {
603 if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
604 use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
605 // Move the TypeConversion before the instruction.
606 use.GetUser()->MoveBefore(instruction);
607 }
608 DCHECK(use.GetUser()->StrictlyDominates(instruction));
609 return use.GetUser()->AsTypeConversion();
610 }
611 }
612
613 // We must create a new TypeConversion instruction.
614 HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
615 expected_type, value, instruction->GetDexPc());
616 instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
617 return type_conversion;
618 }
619
620 // Find an instruction's substitute if it's a removed load.
621 // Return the same instruction if it should not be removed.
FindSubstitute(HInstruction * instruction) const622 HInstruction* FindSubstitute(HInstruction* instruction) const {
623 size_t id = static_cast<size_t>(instruction->GetId());
624 if (id >= substitute_instructions_for_loads_.size()) {
625 // New Phi (may not be in the graph yet), or default value.
626 DCHECK(!IsLoad(instruction));
627 return instruction;
628 }
629 HInstruction* substitute = substitute_instructions_for_loads_[id];
630 DCHECK(substitute == nullptr || IsLoad(instruction));
631 return (substitute != nullptr) ? substitute : instruction;
632 }
633
AddRemovedLoad(HInstruction * load,HInstruction * heap_value)634 void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
635 DCHECK(IsLoad(load));
636 DCHECK_EQ(FindSubstitute(load), load);
637 DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
638 "Unexpected heap_value that has a substitute " << heap_value->DebugName();
639
640 // The load expects to load the heap value as type load->GetType().
641 // However the tracked heap value may not be of that type. An explicit
642 // type conversion may be needed.
643 // There are actually three types involved here:
644 // (1) tracked heap value's type (type A)
645 // (2) heap location (field or element)'s type (type B)
646 // (3) load's type (type C)
647 // We guarantee that type A stored as type B and then fetched out as
648 // type C is the same as casting from type A to type C directly, since
649 // type B and type C will have the same size which is guaranteed in
650 // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
651 // So we only need one type conversion from type A to type C.
652 HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
653 load, heap_value, load->GetType());
654
655 substitute_instructions_for_loads_[load->GetId()] =
656 type_conversion != nullptr ? type_conversion : heap_value;
657 }
658
IsLoad(HInstruction * instruction)659 static bool IsLoad(HInstruction* instruction) {
660 // Unresolved load is not treated as a load.
661 return instruction->IsInstanceFieldGet() ||
662 instruction->IsStaticFieldGet() ||
663 instruction->IsVecLoad() ||
664 instruction->IsArrayGet();
665 }
666
IsStore(HInstruction * instruction)667 static bool IsStore(HInstruction* instruction) {
668 // Unresolved store is not treated as a store.
669 return instruction->IsInstanceFieldSet() ||
670 instruction->IsArraySet() ||
671 instruction->IsVecStore() ||
672 instruction->IsStaticFieldSet();
673 }
674
675 // Check if it is allowed to use default values or Phis for the specified load.
IsDefaultOrPhiAllowedForLoad(HInstruction * instruction)676 static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
677 DCHECK(IsLoad(instruction));
678 // Using defaults for VecLoads requires to create additional vector operations.
679 // As there are some issues with scheduling vector operations it is better to avoid creating
680 // them.
681 return !instruction->IsVecOperation();
682 }
683
684 // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
685 // This is necessary if the stored heap value can be observed.
KeepStores(Value value)686 void KeepStores(Value value) {
687 if (value.IsPureUnknown() || value.IsPartialUnknown()) {
688 return;
689 }
690 if (value.IsMergedUnknown() || value.NeedsPhi()) {
691 phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
692 } else {
693 HInstruction* instruction = value.GetInstruction();
694 DCHECK(IsStore(instruction));
695 kept_stores_.SetBit(instruction->GetId());
696 }
697 }
698
699 // If a heap location X may alias with heap location at `loc_index`
700 // and heap_values of that heap location X holds a store, keep that store.
701 // It's needed for a dependent load that's not eliminated since any store
702 // that may put value into the load's heap location needs to be kept.
KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord> & heap_values,size_t loc_index)703 void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
704 size_t loc_index) {
705 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
706 if (i == loc_index) {
707 // We use this function when reading a location with unknown value and
708 // therefore we cannot know what exact store wrote that unknown value.
709 // But we can have a phi placeholder here marking multiple stores to keep.
710 DCHECK(!heap_values[i].stored_by.IsInstruction());
711 KeepStores(heap_values[i].stored_by);
712 heap_values[i].stored_by = Value::PureUnknown();
713 } else if (heap_location_collector_.MayAlias(i, loc_index)) {
714 KeepStores(heap_values[i].stored_by);
715 heap_values[i].stored_by = Value::PureUnknown();
716 }
717 }
718 }
719
GetDefaultValue(DataType::Type type)720 HInstruction* GetDefaultValue(DataType::Type type) {
721 switch (type) {
722 case DataType::Type::kReference:
723 return GetGraph()->GetNullConstant();
724 case DataType::Type::kBool:
725 case DataType::Type::kUint8:
726 case DataType::Type::kInt8:
727 case DataType::Type::kUint16:
728 case DataType::Type::kInt16:
729 case DataType::Type::kInt32:
730 return GetGraph()->GetIntConstant(0);
731 case DataType::Type::kInt64:
732 return GetGraph()->GetLongConstant(0);
733 case DataType::Type::kFloat32:
734 return GetGraph()->GetFloatConstant(0);
735 case DataType::Type::kFloat64:
736 return GetGraph()->GetDoubleConstant(0);
737 default:
738 UNREACHABLE();
739 }
740 }
741
CanValueBeKeptIfSameAsNew(Value value,HInstruction * new_value,HInstruction * new_value_set_instr)742 bool CanValueBeKeptIfSameAsNew(Value value,
743 HInstruction* new_value,
744 HInstruction* new_value_set_instr) {
745 // For field/array set location operations, if the value is the same as the new_value
746 // it can be kept even if aliasing happens. All aliased operations will access the same memory
747 // range.
748 // For vector values, this is not true. For example:
749 // packed_data = [0xA, 0xB, 0xC, 0xD]; <-- Different values in each lane.
750 // VecStore array[i ,i+1,i+2,i+3] = packed_data;
751 // VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
752 // VecLoad vx = array[i,i+1,i+2,i+3]; <-- Cannot be eliminated because the value
753 // here is not packed_data anymore.
754 //
755 // TODO: to allow such 'same value' optimization on vector data,
756 // LSA needs to report more fine-grain MAY alias information:
757 // (1) May alias due to two vector data partial overlap.
758 // e.g. a[i..i+3] and a[i+1,..,i+4].
759 // (2) May alias due to two vector data may complete overlap each other.
760 // e.g. a[i..i+3] and b[i..i+3].
761 // (3) May alias but the exact relationship between two locations is unknown.
762 // e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
763 // This 'same value' optimization can apply only on case (2).
764 if (new_value_set_instr->IsVecOperation()) {
765 return false;
766 }
767
768 return value.Equals(new_value);
769 }
770
771 Value PrepareLoopValue(HBasicBlock* block, size_t idx);
772 Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
773 void PrepareLoopRecords(HBasicBlock* block);
774 Value MergePredecessorValues(HBasicBlock* block, size_t idx);
775 void MergePredecessorRecords(HBasicBlock* block);
776
777 void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type);
778
779 void VisitGetLocation(HInstruction* instruction, size_t idx);
780 void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
RecordFieldInfo(const FieldInfo * info,size_t heap_loc)781 void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
782 field_infos_[heap_loc] = info;
783 }
784
785 void VisitBasicBlock(HBasicBlock* block) override;
786
787 enum class Phase {
788 kLoadElimination,
789 kStoreElimination,
790 };
791
792 bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
793
794 bool TryReplacingLoopPhiPlaceholderWithDefault(
795 PhiPlaceholder phi_placeholder,
796 DataType::Type type,
797 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
798 bool TryReplacingLoopPhiPlaceholderWithSingleInput(
799 PhiPlaceholder phi_placeholder,
800 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize);
801 std::optional<PhiPlaceholder> FindLoopPhisToMaterialize(
802 PhiPlaceholder phi_placeholder,
803 /*out*/ ArenaBitVector* phi_placeholders_to_materialize,
804 DataType::Type type,
805 bool can_use_default_or_phi);
806 bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
807 DataType::Type type);
808 bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
809 bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
810 DataType::Type type);
811 bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
812 std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
813 HInstruction* load);
814 void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
815 void ProcessLoadsRequiringLoopPhis();
816
817 void SearchPhiPlaceholdersForKeptStores();
818 void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
819 void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
820 void FindStoresWritingOldValues();
821 void FinishFullLSE();
822
HandleAcquireLoad(HInstruction * instruction)823 void HandleAcquireLoad(HInstruction* instruction) {
824 DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) ||
825 (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) ||
826 (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter()))
827 << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
828
829 // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must
830 // invalidate all current values.
831 ScopedArenaVector<ValueRecord>& heap_values =
832 heap_values_for_[instruction->GetBlock()->GetBlockId()];
833 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
834 KeepStores(heap_values[i].stored_by);
835 heap_values[i].stored_by = Value::PureUnknown();
836 heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
837 }
838
839 // Note that there's no need to record the load as subsequent acquire loads shouldn't be
840 // eliminated either.
841 }
842
HandleReleaseStore(HInstruction * instruction)843 void HandleReleaseStore(HInstruction* instruction) {
844 DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) ||
845 (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) ||
846 (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter()))
847 << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
848
849 // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but
850 // they will push the modifications for other threads to see. Therefore, we must keep the
851 // stores but there's no need to clobber the value.
852 ScopedArenaVector<ValueRecord>& heap_values =
853 heap_values_for_[instruction->GetBlock()->GetBlockId()];
854 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
855 KeepStores(heap_values[i].stored_by);
856 heap_values[i].stored_by = Value::PureUnknown();
857 }
858
859 // Note that there's no need to record the store as subsequent release store shouldn't be
860 // eliminated either.
861 }
862
VisitInstanceFieldGet(HInstanceFieldGet * instruction)863 void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
864 HInstruction* object = instruction->InputAt(0);
865 if (instruction->IsVolatile()) {
866 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
867 heap_location_collector_.HuntForOriginalReference(object));
868 if (!ref_info->IsSingletonAndRemovable()) {
869 HandleAcquireLoad(instruction);
870 return;
871 }
872 // Treat it as a normal load if it is a removable singleton.
873 }
874
875 const FieldInfo& field = instruction->GetFieldInfo();
876 VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
877 }
878
VisitInstanceFieldSet(HInstanceFieldSet * instruction)879 void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
880 HInstruction* object = instruction->InputAt(0);
881 if (instruction->IsVolatile()) {
882 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
883 heap_location_collector_.HuntForOriginalReference(object));
884 if (!ref_info->IsSingletonAndRemovable()) {
885 HandleReleaseStore(instruction);
886 return;
887 }
888 // Treat it as a normal store if it is a removable singleton.
889 }
890
891 const FieldInfo& field = instruction->GetFieldInfo();
892 HInstruction* value = instruction->InputAt(1);
893 size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
894 VisitSetLocation(instruction, idx, value);
895 }
896
VisitStaticFieldGet(HStaticFieldGet * instruction)897 void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
898 if (instruction->IsVolatile()) {
899 HandleAcquireLoad(instruction);
900 return;
901 }
902
903 const FieldInfo& field = instruction->GetFieldInfo();
904 HInstruction* cls = instruction->InputAt(0);
905 VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
906 }
907
VisitStaticFieldSet(HStaticFieldSet * instruction)908 void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
909 if (instruction->IsVolatile()) {
910 HandleReleaseStore(instruction);
911 return;
912 }
913
914 const FieldInfo& field = instruction->GetFieldInfo();
915 HInstruction* cls = instruction->InputAt(0);
916 HInstruction* value = instruction->InputAt(1);
917 size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
918 VisitSetLocation(instruction, idx, value);
919 }
920
VisitMonitorOperation(HMonitorOperation * monitor_op)921 void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
922 HInstruction* object = monitor_op->InputAt(0);
923 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(
924 heap_location_collector_.HuntForOriginalReference(object));
925 if (ref_info->IsSingletonAndRemovable()) {
926 // If the object is a removable singleton, we know that no other threads will have
927 // access to it, and we can remove the MonitorOperation instruction.
928 // MONITOR_ENTER throws when encountering a null object. If `object` is a removable
929 // singleton, it is guaranteed to be non-null so we don't have to worry about the NullCheck.
930 DCHECK(!object->CanBeNull());
931 monitor_op->GetBlock()->RemoveInstruction(monitor_op);
932 MaybeRecordStat(stats_, MethodCompilationStat::kRemovedMonitorOp);
933 return;
934 }
935
936 // We detected a monitor operation that we couldn't remove. See also LSEVisitor::Run().
937 monitor_op->GetBlock()->GetGraph()->SetHasMonitorOperations(true);
938 if (monitor_op->IsEnter()) {
939 HandleAcquireLoad(monitor_op);
940 } else {
941 HandleReleaseStore(monitor_op);
942 }
943 }
944
VisitArrayGet(HArrayGet * instruction)945 void VisitArrayGet(HArrayGet* instruction) override {
946 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
947 }
948
VisitArraySet(HArraySet * instruction)949 void VisitArraySet(HArraySet* instruction) override {
950 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
951 VisitSetLocation(instruction, idx, instruction->GetValue());
952 }
953
VisitVecLoad(HVecLoad * instruction)954 void VisitVecLoad(HVecLoad* instruction) override {
955 DCHECK(!instruction->IsPredicated());
956 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
957 }
958
VisitVecStore(HVecStore * instruction)959 void VisitVecStore(HVecStore* instruction) override {
960 DCHECK(!instruction->IsPredicated());
961 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
962 VisitSetLocation(instruction, idx, instruction->GetValue());
963 }
964
VisitDeoptimize(HDeoptimize * instruction)965 void VisitDeoptimize(HDeoptimize* instruction) override {
966 // If we are in a try, even singletons are observable.
967 const bool inside_a_try = instruction->GetBlock()->IsTryBlock();
968 HBasicBlock* block = instruction->GetBlock();
969 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
970 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
971 Value* stored_by = &heap_values[i].stored_by;
972 if (stored_by->IsUnknown()) {
973 continue;
974 }
975 // Stores are generally observeable after deoptimization, except
976 // for singletons that don't escape in the deoptimization environment.
977 bool observable = true;
978 ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
979 if (!inside_a_try && info->IsSingleton()) {
980 HInstruction* reference = info->GetReference();
981 // Finalizable objects always escape.
982 const bool finalizable_object =
983 reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
984 if (!finalizable_object && !IsEscapingObject(info)) {
985 // Check whether the reference for a store is used by an environment local of
986 // the HDeoptimize. If not, the singleton is not observed after deoptimization.
987 const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
988 observable = std::any_of(
989 env_uses.begin(),
990 env_uses.end(),
991 [instruction](const HUseListNode<HEnvironment*>& use) {
992 return use.GetUser()->GetHolder() == instruction;
993 });
994 }
995 }
996 if (observable) {
997 KeepStores(*stored_by);
998 *stored_by = Value::PureUnknown();
999 }
1000 }
1001 }
1002
1003 // Keep necessary stores before exiting a method via return/throw.
HandleExit(HBasicBlock * block,bool must_keep_stores=false)1004 void HandleExit(HBasicBlock* block, bool must_keep_stores = false) {
1005 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1006 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1007 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1008 if (must_keep_stores || IsEscapingObject(ref_info)) {
1009 KeepStores(heap_values[i].stored_by);
1010 heap_values[i].stored_by = Value::PureUnknown();
1011 }
1012 }
1013 }
1014
VisitReturn(HReturn * instruction)1015 void VisitReturn(HReturn* instruction) override {
1016 HandleExit(instruction->GetBlock());
1017 }
1018
VisitReturnVoid(HReturnVoid * return_void)1019 void VisitReturnVoid(HReturnVoid* return_void) override {
1020 HandleExit(return_void->GetBlock());
1021 }
1022
HandleThrowingInstruction(HInstruction * instruction)1023 void HandleThrowingInstruction(HInstruction* instruction) {
1024 DCHECK(instruction->CanThrow());
1025 // If we are inside of a try, singletons can become visible since we may not exit the method.
1026 HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock());
1027 }
1028
VisitMethodEntryHook(HMethodEntryHook * method_entry)1029 void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
1030 HandleThrowingInstruction(method_entry);
1031 }
1032
VisitMethodExitHook(HMethodExitHook * method_exit)1033 void VisitMethodExitHook(HMethodExitHook* method_exit) override {
1034 HandleThrowingInstruction(method_exit);
1035 }
1036
VisitDivZeroCheck(HDivZeroCheck * div_zero_check)1037 void VisitDivZeroCheck(HDivZeroCheck* div_zero_check) override {
1038 HandleThrowingInstruction(div_zero_check);
1039 }
1040
VisitNullCheck(HNullCheck * null_check)1041 void VisitNullCheck(HNullCheck* null_check) override {
1042 HandleThrowingInstruction(null_check);
1043 }
1044
VisitBoundsCheck(HBoundsCheck * bounds_check)1045 void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
1046 HandleThrowingInstruction(bounds_check);
1047 }
1048
VisitLoadClass(HLoadClass * load_class)1049 void VisitLoadClass(HLoadClass* load_class) override {
1050 if (load_class->CanThrow()) {
1051 HandleThrowingInstruction(load_class);
1052 }
1053 }
1054
VisitLoadString(HLoadString * load_string)1055 void VisitLoadString(HLoadString* load_string) override {
1056 if (load_string->CanThrow()) {
1057 HandleThrowingInstruction(load_string);
1058 }
1059 }
1060
VisitLoadMethodHandle(HLoadMethodHandle * load_method_handle)1061 void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override {
1062 HandleThrowingInstruction(load_method_handle);
1063 }
1064
VisitLoadMethodType(HLoadMethodType * load_method_type)1065 void VisitLoadMethodType(HLoadMethodType* load_method_type) override {
1066 HandleThrowingInstruction(load_method_type);
1067 }
1068
VisitStringBuilderAppend(HStringBuilderAppend * sb_append)1069 void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
1070 HandleThrowingInstruction(sb_append);
1071 }
1072
VisitThrow(HThrow * throw_instruction)1073 void VisitThrow(HThrow* throw_instruction) override {
1074 HandleThrowingInstruction(throw_instruction);
1075 }
1076
VisitCheckCast(HCheckCast * check_cast)1077 void VisitCheckCast(HCheckCast* check_cast) override {
1078 HandleThrowingInstruction(check_cast);
1079 }
1080
HandleInvoke(HInstruction * instruction)1081 void HandleInvoke(HInstruction* instruction) {
1082 // If `instruction` can throw we have to presume all stores are visible.
1083 const bool can_throw = instruction->CanThrow();
1084 // If we are in a try, even singletons are observable.
1085 const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock();
1086 SideEffects side_effects = instruction->GetSideEffects();
1087 ScopedArenaVector<ValueRecord>& heap_values =
1088 heap_values_for_[instruction->GetBlock()->GetBlockId()];
1089 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1090 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1091 // We don't need to do anything if the reference has not escaped at this point.
1092 // This is true if we never escape.
1093 if (!can_throw_inside_a_try && ref_info->IsSingleton()) {
1094 // Singleton references cannot be seen by the callee.
1095 } else {
1096 if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
1097 // Previous stores may become visible (read) and/or impossible for LSE to track (write).
1098 KeepStores(heap_values[i].stored_by);
1099 heap_values[i].stored_by = Value::PureUnknown();
1100 }
1101 if (side_effects.DoesAnyWrite()) {
1102 // The value may be clobbered.
1103 heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1104 }
1105 }
1106 }
1107 }
1108
VisitInvoke(HInvoke * invoke)1109 void VisitInvoke(HInvoke* invoke) override {
1110 HandleInvoke(invoke);
1111 }
1112
VisitClinitCheck(HClinitCheck * clinit)1113 void VisitClinitCheck(HClinitCheck* clinit) override {
1114 // Class initialization check can result in class initializer calling arbitrary methods.
1115 HandleInvoke(clinit);
1116 }
1117
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)1118 void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
1119 // Conservatively treat it as an invocation.
1120 HandleInvoke(instruction);
1121 }
1122
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)1123 void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
1124 // Conservatively treat it as an invocation.
1125 HandleInvoke(instruction);
1126 }
1127
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)1128 void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
1129 // Conservatively treat it as an invocation.
1130 HandleInvoke(instruction);
1131 }
1132
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)1133 void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
1134 // Conservatively treat it as an invocation.
1135 HandleInvoke(instruction);
1136 }
1137
VisitNewInstance(HNewInstance * new_instance)1138 void VisitNewInstance(HNewInstance* new_instance) override {
1139 // If we are in a try, even singletons are observable.
1140 const bool inside_a_try = new_instance->GetBlock()->IsTryBlock();
1141 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
1142 if (ref_info == nullptr) {
1143 // new_instance isn't used for field accesses. No need to process it.
1144 return;
1145 }
1146 if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
1147 DCHECK(!new_instance->IsFinalizable());
1148 // new_instance can potentially be eliminated.
1149 singleton_new_instances_.push_back(new_instance);
1150 }
1151 HBasicBlock* block = new_instance->GetBlock();
1152 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1153 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1154 ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
1155 HInstruction* ref = info->GetReference();
1156 size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
1157 if (ref == new_instance) {
1158 if (offset >= mirror::kObjectHeaderSize ||
1159 MemberOffset(offset) == mirror::Object::MonitorOffset()) {
1160 // Instance fields except the header fields are set to default heap values.
1161 // The shadow$_monitor_ field is set to the default value however.
1162 heap_values[i].value = Value::Default();
1163 heap_values[i].stored_by = Value::PureUnknown();
1164 } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
1165 // The shadow$_klass_ field is special and has an actual value however.
1166 heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
1167 heap_values[i].stored_by = Value::PureUnknown();
1168 }
1169 } else if (inside_a_try || IsEscapingObject(info)) {
1170 // Since NewInstance can throw, we presume all previous stores could be visible.
1171 KeepStores(heap_values[i].stored_by);
1172 heap_values[i].stored_by = Value::PureUnknown();
1173 }
1174 }
1175 }
1176
VisitNewArray(HNewArray * new_array)1177 void VisitNewArray(HNewArray* new_array) override {
1178 // If we are in a try, even singletons are observable.
1179 const bool inside_a_try = new_array->GetBlock()->IsTryBlock();
1180 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
1181 if (ref_info == nullptr) {
1182 // new_array isn't used for array accesses. No need to process it.
1183 return;
1184 }
1185 if (ref_info->IsSingletonAndRemovable()) {
1186 if (new_array->GetLength()->IsIntConstant() &&
1187 new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
1188 // new_array can potentially be eliminated.
1189 singleton_new_instances_.push_back(new_array);
1190 } else {
1191 // new_array may throw NegativeArraySizeException. Keep it.
1192 }
1193 }
1194 HBasicBlock* block = new_array->GetBlock();
1195 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1196 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1197 HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
1198 ReferenceInfo* info = location->GetReferenceInfo();
1199 HInstruction* ref = info->GetReference();
1200 if (ref == new_array && location->GetIndex() != nullptr) {
1201 // Array elements are set to default heap values.
1202 heap_values[i].value = Value::Default();
1203 heap_values[i].stored_by = Value::PureUnknown();
1204 } else if (inside_a_try || IsEscapingObject(info)) {
1205 // Since NewArray can throw, we presume all previous stores could be visible.
1206 KeepStores(heap_values[i].stored_by);
1207 heap_values[i].stored_by = Value::PureUnknown();
1208 }
1209 }
1210 }
1211
VisitInstruction(HInstruction * instruction)1212 void VisitInstruction(HInstruction* instruction) override {
1213 // Throwing instructions must be handled specially.
1214 DCHECK(!instruction->CanThrow());
1215 }
1216
1217 const HeapLocationCollector& heap_location_collector_;
1218
1219 // Use local allocator for allocating memory.
1220 ScopedArenaAllocator allocator_;
1221
1222 // The number of unique phi_placeholders there possibly are
1223 size_t num_phi_placeholders_;
1224
1225 // One array of heap value records for each block.
1226 ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
1227
1228 // We record loads and stores for re-processing when we find a loop Phi placeholder
1229 // with unknown value from a predecessor, and also for removing stores that are
1230 // found to be dead, i.e. not marked in `kept_stores_` at the end.
1231 struct LoadStoreRecord {
1232 HInstruction* load_or_store;
1233 size_t heap_location_index;
1234 };
1235 ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1236
1237 // We record the substitute instructions for loads that should be
1238 // eliminated but may be used by heap locations. They'll be removed
1239 // in the end. These are indexed by the load's id.
1240 ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1241
1242 // Record stores to keep in a bit vector indexed by instruction ID.
1243 ArenaBitVector kept_stores_;
1244 // When we need to keep all stores that feed a Phi placeholder, we just record the
1245 // index of that placeholder for processing after graph traversal.
1246 ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1247
1248 // Loads that would require a loop Phi to replace are recorded for processing
1249 // later as we do not have enough information from back-edges to determine if
1250 // a suitable Phi can be found or created when we visit these loads.
1251 // This is a flat "map" indexed by the load instruction id.
1252 ScopedArenaVector<ValueRecord*> loads_requiring_loop_phi_;
1253
1254 // For stores, record the old value records that were replaced and the stored values.
1255 struct StoreRecord {
StoreRecordart::LSEVisitor::StoreRecord1256 StoreRecord(ValueRecord old_value_record_in, HInstruction* stored_value_in)
1257 : old_value_record(old_value_record_in), stored_value(stored_value_in) {}
1258 ValueRecord old_value_record;
1259 HInstruction* stored_value;
1260 };
1261 // This is a flat "map" indexed by the store instruction id.
1262 ScopedArenaVector<StoreRecord*> store_records_;
1263
1264 // Replacements for Phi placeholders.
1265 // The invalid heap value is used to mark Phi placeholders that cannot be replaced.
1266 ScopedArenaVector<Value> phi_placeholder_replacements_;
1267
1268 ScopedArenaVector<HInstruction*> singleton_new_instances_;
1269
1270 // The field infos for each heap location (if relevant).
1271 ScopedArenaVector<const FieldInfo*> field_infos_;
1272
1273 Phase current_phase_;
1274
1275 friend struct ScopedRestoreHeapValues;
1276
1277 friend std::ostream& operator<<(std::ostream& os, const Value& v);
1278 friend std::ostream& operator<<(std::ostream& os, const PriorValueHolder& v);
1279 friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
1280
1281 DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1282 };
1283
operator <<(std::ostream & oss,const LSEVisitor::PriorValueHolder & p)1284 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PriorValueHolder& p) {
1285 p.Dump(oss);
1286 return oss;
1287 }
1288
operator <<(std::ostream & oss,const LSEVisitor::Phase & phase)1289 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
1290 switch (phase) {
1291 case LSEVisitor::Phase::kLoadElimination:
1292 return oss << "kLoadElimination";
1293 case LSEVisitor::Phase::kStoreElimination:
1294 return oss << "kStoreElimination";
1295 }
1296 }
1297
Dump(std::ostream & oss) const1298 void LSEVisitor::PriorValueHolder::Dump(std::ostream& oss) const {
1299 if (IsDefault()) {
1300 oss << "Default";
1301 } else if (IsPhi()) {
1302 oss << "Phi: " << GetPhiPlaceholder();
1303 } else {
1304 oss << "Instruction: " << *GetInstruction();
1305 }
1306 }
1307
PriorValueHolder(Value val)1308 constexpr LSEVisitor::PriorValueHolder::PriorValueHolder(Value val)
1309 : value_(Marker{}) {
1310 DCHECK(!val.IsInvalid() && !val.IsPureUnknown());
1311 if (val.IsPartialUnknown()) {
1312 value_ = val.GetPriorValue().value_;
1313 } else if (val.IsMergedUnknown() || val.NeedsPhi()) {
1314 value_ = val.GetPhiPlaceholder();
1315 } else if (val.IsInstruction()) {
1316 value_ = val.GetInstruction();
1317 } else {
1318 DCHECK(val.IsDefault());
1319 }
1320 }
1321
operator ==(const LSEVisitor::Marker &,const LSEVisitor::Marker &)1322 constexpr bool operator==(const LSEVisitor::Marker&, const LSEVisitor::Marker&) {
1323 return true;
1324 }
1325
operator ==(const LSEVisitor::PriorValueHolder & p1,const LSEVisitor::PriorValueHolder & p2)1326 constexpr bool operator==(const LSEVisitor::PriorValueHolder& p1,
1327 const LSEVisitor::PriorValueHolder& p2) {
1328 return p1.Equals(p2);
1329 }
1330
operator ==(const LSEVisitor::PhiPlaceholder & p1,const LSEVisitor::PhiPlaceholder & p2)1331 constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
1332 const LSEVisitor::PhiPlaceholder& p2) {
1333 return p1.Equals(p2);
1334 }
1335
operator ==(const LSEVisitor::Value::NeedsLoopPhiMarker & p1,const LSEVisitor::Value::NeedsLoopPhiMarker & p2)1336 constexpr bool operator==(const LSEVisitor::Value::NeedsLoopPhiMarker& p1,
1337 const LSEVisitor::Value::NeedsLoopPhiMarker& p2) {
1338 return p1.phi_ == p2.phi_;
1339 }
1340
operator ==(const LSEVisitor::Value::NeedsNonLoopPhiMarker & p1,const LSEVisitor::Value::NeedsNonLoopPhiMarker & p2)1341 constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1,
1342 const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) {
1343 return p1.phi_ == p2.phi_;
1344 }
1345
operator ==(const LSEVisitor::Value::MergedUnknownMarker & p1,const LSEVisitor::Value::MergedUnknownMarker & p2)1346 constexpr bool operator==(const LSEVisitor::Value::MergedUnknownMarker& p1,
1347 const LSEVisitor::Value::MergedUnknownMarker& p2) {
1348 return p1.phi_ == p2.phi_;
1349 }
1350
operator <<(std::ostream & oss,const LSEVisitor::PhiPlaceholder & p)1351 std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) {
1352 p.Dump(oss);
1353 return oss;
1354 }
1355
ToValue() const1356 LSEVisitor::Value LSEVisitor::PriorValueHolder::ToValue() const {
1357 if (IsDefault()) {
1358 return Value::Default();
1359 } else if (IsPhi()) {
1360 return Value::ForLoopPhiPlaceholder(GetPhiPlaceholder());
1361 } else {
1362 return Value::ForInstruction(GetInstruction());
1363 }
1364 }
1365
ExactEquals(LSEVisitor::Value other) const1366 constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
1367 return value_ == other.value_;
1368 }
1369
Equals(LSEVisitor::Value other) const1370 constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
1371 // Only valid values can be compared.
1372 DCHECK(IsValid());
1373 DCHECK(other.IsValid());
1374 if (value_ == other.value_) {
1375 // Note: Two unknown values are considered different.
1376 return !IsUnknown();
1377 } else {
1378 // Default is considered equal to zero-bit-pattern instructions.
1379 return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
1380 (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
1381 }
1382 }
1383
Dump(std::ostream & os) const1384 std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1385 if (std::holds_alternative<LSEVisitor::Value::ValuelessType>(value_)) {
1386 switch (GetValuelessType()) {
1387 case ValuelessType::kDefault:
1388 return os << "Default";
1389 case ValuelessType::kPureUnknown:
1390 return os << "PureUnknown";
1391 case ValuelessType::kInvalid:
1392 return os << "Invalid";
1393 }
1394 } else if (IsPartialUnknown()) {
1395 return os << "PartialUnknown[" << GetPriorValue() << "]";
1396 } else if (IsInstruction()) {
1397 return os << "Instruction[id: " << GetInstruction()->GetId()
1398 << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
1399 } else if (IsMergedUnknown()) {
1400 return os << "MergedUnknown[block: " << GetPhiPlaceholder().GetBlockId()
1401 << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1402
1403 } else if (NeedsLoopPhi()) {
1404 return os << "NeedsLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1405 << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1406 } else {
1407 return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId()
1408 << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]";
1409 }
1410 }
1411
operator <<(std::ostream & os,const LSEVisitor::Value & v)1412 std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1413 return v.Dump(os);
1414 }
1415
LSEVisitor(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)1416 LSEVisitor::LSEVisitor(HGraph* graph,
1417 const HeapLocationCollector& heap_location_collector,
1418 OptimizingCompilerStats* stats)
1419 : HGraphDelegateVisitor(graph, stats),
1420 heap_location_collector_(heap_location_collector),
1421 allocator_(graph->GetArenaStack()),
1422 num_phi_placeholders_(GetGraph()->GetBlocks().size() *
1423 heap_location_collector_.GetNumberOfHeapLocations()),
1424 heap_values_for_(graph->GetBlocks().size(),
1425 ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1426 allocator_.Adapter(kArenaAllocLSE)),
1427 loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
1428 // We may add new instructions (default values, Phis) but we're not adding loads
1429 // or stores, so we shall not need to resize following vector and BitVector.
1430 substitute_instructions_for_loads_(
1431 graph->GetCurrentInstructionId(), nullptr, allocator_.Adapter(kArenaAllocLSE)),
1432 kept_stores_(&allocator_,
1433 /*start_bits=*/graph->GetCurrentInstructionId(),
1434 /*expandable=*/false,
1435 kArenaAllocLSE),
1436 phi_placeholders_to_search_for_kept_stores_(&allocator_,
1437 num_phi_placeholders_,
1438 /*expandable=*/false,
1439 kArenaAllocLSE),
1440 loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1441 store_records_(allocator_.Adapter(kArenaAllocLSE)),
1442 phi_placeholder_replacements_(
1443 num_phi_placeholders_, Value::Invalid(), allocator_.Adapter(kArenaAllocLSE)),
1444 singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
1445 field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
1446 allocator_.Adapter(kArenaAllocLSE)),
1447 current_phase_(Phase::kLoadElimination) {}
1448
PrepareLoopValue(HBasicBlock * block,size_t idx)1449 LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1450 // If the pre-header value is known (which implies that the reference dominates this
1451 // block), use a Phi placeholder for the value in the loop header. If all predecessors
1452 // are later found to have a known value, we can replace loads from this location,
1453 // either with the pre-header value or with a new Phi. For array locations, the index
1454 // may be defined inside the loop but the only known value in that case should be the
1455 // default value or a Phi placeholder that can be replaced only with the default value.
1456 HLoopInformation* loop_info = block->GetLoopInformation();
1457 uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1458 Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1459 if (pre_header_value.IsUnknown()) {
1460 return pre_header_value;
1461 }
1462 if (kIsDebugBuild) {
1463 // Check that the reference indeed dominates this loop.
1464 HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1465 HInstruction* ref = location->GetReferenceInfo()->GetReference();
1466 CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1467 << GetGraph()->PrettyMethod();
1468 // Check that the index, if defined inside the loop, tracks a default value
1469 // or a Phi placeholder requiring a loop Phi.
1470 HInstruction* index = location->GetIndex();
1471 if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1472 CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1473 << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1474 << pre_header_value;
1475 }
1476 }
1477 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1478 return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder));
1479 }
1480
PrepareLoopStoredBy(HBasicBlock * block,size_t idx)1481 LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1482 // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1483 // if the value in the location escapes. This is not applicable to singletons that are
1484 // defined inside the loop as they shall be dead in the loop header.
1485 const ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1486 const HInstruction* reference = ref_info->GetReference();
1487 // Finalizable objects always escape.
1488 const bool is_finalizable =
1489 reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
1490 if (ref_info->IsSingleton() &&
1491 block->GetLoopInformation()->Contains(*reference->GetBlock()) &&
1492 !is_finalizable) {
1493 return Value::PureUnknown();
1494 }
1495 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1496 return Value::ForLoopPhiPlaceholder(phi_placeholder);
1497 }
1498
PrepareLoopRecords(HBasicBlock * block)1499 void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1500 DCHECK(block->IsLoopHeader());
1501 int block_id = block->GetBlockId();
1502 HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1503 ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1504 heap_values_for_[pre_header->GetBlockId()];
1505 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1506 DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1507 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1508 DCHECK(heap_values.empty());
1509
1510 // Don't eliminate loads in irreducible loops.
1511 if (block->GetLoopInformation()->IsIrreducible()) {
1512 heap_values.resize(num_heap_locations,
1513 {/*value=*/Value::Invalid(), /*stored_by=*/Value::PureUnknown()});
1514 // Also keep the stores before the loop header, including in blocks that were not visited yet.
1515 bool is_osr = GetGraph()->IsCompilingOsr();
1516 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1517 heap_values[idx].value =
1518 is_osr ? Value::PureUnknown()
1519 : Value::MergedUnknown(GetPhiPlaceholder(block->GetBlockId(), idx));
1520 KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1521 }
1522 return;
1523 }
1524
1525 // Fill `heap_values` based on values from pre-header.
1526 heap_values.reserve(num_heap_locations);
1527 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1528 heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1529 }
1530 }
1531
MergePredecessorValues(HBasicBlock * block,size_t idx)1532 LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1533 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1534 DCHECK(!predecessors.empty());
1535 Value merged_value =
1536 ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1537 for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1538 Value pred_value =
1539 ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1540 if (pred_value.Equals(merged_value)) {
1541 // Value is the same. No need to update our merged value.
1542 continue;
1543 } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1544 // If one is unknown and the other is a different type of unknown
1545 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1546 merged_value = Value::MergedUnknown(phi_placeholder);
1547 // We know that at least one of the merge points is unknown (and both are
1548 // not pure-unknowns since that's captured above). This means that the
1549 // overall value needs to be a MergedUnknown. Just return that.
1550 break;
1551 } else {
1552 // There are conflicting known values. We may still be able to replace loads with a Phi.
1553 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1554 // Propagate the need for a new loop Phi from all predecessors.
1555 bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1556 merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1557 }
1558 }
1559 DCHECK_IMPLIES(merged_value.IsPureUnknown(), block->GetPredecessors().size() <= 1)
1560 << merged_value << " in " << GetGraph()->PrettyMethod();
1561 return merged_value;
1562 }
1563
MergePredecessorRecords(HBasicBlock * block)1564 void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1565 if (block->IsExitBlock()) {
1566 // Exit block doesn't really merge values since the control flow ends in
1567 // its predecessors. Each predecessor needs to make sure stores are kept
1568 // if necessary.
1569 return;
1570 }
1571
1572 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1573 DCHECK(heap_values.empty());
1574 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1575 if (block->GetPredecessors().empty() || block->IsCatchBlock()) {
1576 DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
1577 heap_values.resize(num_heap_locations,
1578 {/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()});
1579 return;
1580 }
1581
1582 heap_values.reserve(num_heap_locations);
1583 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1584 Value merged_value = MergePredecessorValues(block, idx);
1585 if (kIsDebugBuild) {
1586 if (merged_value.NeedsPhi()) {
1587 uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId();
1588 CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1589 } else if (merged_value.IsInstruction()) {
1590 CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1591 }
1592 }
1593 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1594 Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
1595 for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1596 uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1597 Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1598 if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1599 !merged_stored_by.Equals(stored_by)) {
1600 // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1601 PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1602 merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1603 break;
1604 }
1605 }
1606 heap_values.push_back({ merged_value, merged_stored_by });
1607 }
1608 }
1609
FindOrConstructNonLoopPhi(HBasicBlock * block,const ScopedArenaVector<HInstruction * > & phi_inputs,DataType::Type type)1610 static HInstruction* FindOrConstructNonLoopPhi(
1611 HBasicBlock* block,
1612 const ScopedArenaVector<HInstruction*>& phi_inputs,
1613 DataType::Type type) {
1614 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1615 HInstruction* phi = phi_it.Current();
1616 DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1617 auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1618 return lhs == rhs.GetInstruction();
1619 };
1620 if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1621 return phi;
1622 }
1623 }
1624 ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1625 HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1626 for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1627 DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1628 phi->SetRawInputAt(i, phi_inputs[i]);
1629 }
1630 block->AddPhi(phi);
1631 if (type == DataType::Type::kReference) {
1632 // Update reference type information. Pass invalid handles, these are not used for Phis.
1633 ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1634 Handle<mirror::DexCache>(),
1635 /* is_first_run= */ false);
1636 rtp_fixup.Visit(phi);
1637 }
1638 return phi;
1639 }
1640
MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder,DataType::Type type)1641 void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) {
1642 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1643 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1644 size_t idx = phi_placeholder.GetHeapLocation();
1645
1646 // Use local allocator to reduce peak memory usage.
1647 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1648 // Reuse the same vector for collecting phi inputs.
1649 ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1650
1651 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1652 work_queue.push_back(phi_placeholder);
1653 while (!work_queue.empty()) {
1654 PhiPlaceholder current_phi_placeholder = work_queue.back();
1655 if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1656 // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1657 // that directly or indirectly depends on it, so it was already processed as part of the
1658 // other Phi placeholder's dependencies before this one got back to the top of the stack.
1659 work_queue.pop_back();
1660 continue;
1661 }
1662 uint32_t current_block_id = current_phi_placeholder.GetBlockId();
1663 HBasicBlock* current_block = blocks[current_block_id];
1664 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1665
1666 // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1667 // And the only way for such merged value to reach a different heap location is through
1668 // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1669 // seen here are tied to one heap location.
1670 DCHECK(!current_block->IsLoopHeader())
1671 << current_phi_placeholder << " phase: " << current_phase_;
1672 DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx);
1673
1674 phi_inputs.clear();
1675 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1676 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1677 DCHECK(!pred_value.IsPureUnknown()) << pred_value << " block " << current_block->GetBlockId()
1678 << " pred: " << predecessor->GetBlockId();
1679 if (pred_value.NeedsNonLoopPhi()) {
1680 // We need to process the Phi placeholder first.
1681 work_queue.push_back(pred_value.GetPhiPlaceholder());
1682 } else if (pred_value.IsDefault()) {
1683 phi_inputs.push_back(GetDefaultValue(type));
1684 } else {
1685 DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId()
1686 << " pred: " << predecessor->GetBlockId();
1687 phi_inputs.push_back(pred_value.GetInstruction());
1688 }
1689 }
1690 if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1691 // All inputs are available. Find or construct the Phi replacement.
1692 phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1693 Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1694 // Remove the block from the queue.
1695 DCHECK_EQ(current_phi_placeholder, work_queue.back());
1696 work_queue.pop_back();
1697 }
1698 }
1699 }
1700
VisitGetLocation(HInstruction * instruction,size_t idx)1701 void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1702 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1703 uint32_t block_id = instruction->GetBlock()->GetBlockId();
1704 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1705 ValueRecord& record = heap_values[idx];
1706 if (instruction->IsFieldAccess()) {
1707 RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1708 }
1709 DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1710 loads_and_stores_.push_back({ instruction, idx });
1711 if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1712 !IsDefaultOrPhiAllowedForLoad(instruction)) {
1713 record.value = Value::PureUnknown();
1714 }
1715 if (record.value.IsDefault()) {
1716 KeepStores(record.stored_by);
1717 HInstruction* constant = GetDefaultValue(instruction->GetType());
1718 AddRemovedLoad(instruction, constant);
1719 record.value = Value::ForInstruction(constant);
1720 } else if (record.value.IsUnknown()) {
1721 // Load isn't eliminated. Put the load as the value into the HeapLocation.
1722 // This acts like GVN but with better aliasing analysis.
1723 Value old_value = record.value;
1724 record.value = Value::ForInstruction(instruction);
1725 KeepStoresIfAliasedToLocation(heap_values, idx);
1726 KeepStores(old_value);
1727 } else if (record.value.NeedsLoopPhi()) {
1728 // We do not know yet if the value is known for all back edges. Record for future processing.
1729 if (loads_requiring_loop_phi_.empty()) {
1730 loads_requiring_loop_phi_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1731 }
1732 DCHECK_EQ(loads_requiring_loop_phi_[instruction->GetId()], nullptr);
1733 loads_requiring_loop_phi_[instruction->GetId()] =
1734 new (allocator_.Alloc<ValueRecord>(kArenaAllocLSE)) ValueRecord(record);
1735 } else {
1736 // This load can be eliminated but we may need to construct non-loop Phis.
1737 if (record.value.NeedsNonLoopPhi()) {
1738 MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1739 record.value = Replacement(record.value);
1740 }
1741 HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1742 AddRemovedLoad(instruction, heap_value);
1743 }
1744 }
1745
VisitSetLocation(HInstruction * instruction,size_t idx,HInstruction * value)1746 void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1747 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1748 DCHECK(!IsStore(value)) << value->DebugName();
1749 if (instruction->IsFieldAccess()) {
1750 RecordFieldInfo(&instruction->GetFieldInfo(), idx);
1751 }
1752 // value may already have a substitute.
1753 value = FindSubstitute(value);
1754 HBasicBlock* block = instruction->GetBlock();
1755 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1756 ValueRecord& record = heap_values[idx];
1757 DCHECK_IMPLIES(record.value.IsInstruction(),
1758 FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1759
1760 if (record.value.Equals(value)) {
1761 // Store into the heap location with the same value.
1762 // This store can be eliminated right away.
1763 block->RemoveInstruction(instruction);
1764 return;
1765 }
1766
1767 if (store_records_.empty()) {
1768 store_records_.resize(GetGraph()->GetCurrentInstructionId(), nullptr);
1769 }
1770 DCHECK_EQ(store_records_[instruction->GetId()], nullptr);
1771 store_records_[instruction->GetId()] =
1772 new (allocator_.Alloc<StoreRecord>(kArenaAllocLSE)) StoreRecord(record, value);
1773 loads_and_stores_.push_back({ instruction, idx });
1774
1775 // If the `record.stored_by` specified a store from this block, it shall be removed
1776 // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1777 // `kept_stores_` anymore after we update the `record.stored_by` below.
1778 DCHECK(!record.stored_by.IsInstruction() ||
1779 record.stored_by.GetInstruction()->GetBlock() != block ||
1780 record.stored_by.GetInstruction()->CanThrow() ||
1781 !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1782
1783 if (instruction->CanThrow()) {
1784 // Previous stores can become visible.
1785 HandleThrowingInstruction(instruction);
1786 // We cannot remove a possibly throwing store.
1787 // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1788 kept_stores_.SetBit(instruction->GetId());
1789 }
1790
1791 // Update the record.
1792 // Note that the `value` can be a newly created `Phi` with an id that falls outside
1793 // the allocated `loads_requiring_loop_phi_` range.
1794 DCHECK_IMPLIES(IsLoad(value) && !loads_requiring_loop_phi_.empty(),
1795 static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size());
1796 if (static_cast<size_t>(value->GetId()) < loads_requiring_loop_phi_.size() &&
1797 loads_requiring_loop_phi_[value->GetId()] != nullptr) {
1798 // Propapate the Phi placeholder to the record.
1799 record.value = loads_requiring_loop_phi_[value->GetId()]->value;
1800 DCHECK(record.value.NeedsLoopPhi());
1801 } else {
1802 record.value = Value::ForInstruction(value);
1803 }
1804 // Track the store in the value record. If the value is loaded or needed after
1805 // return/deoptimization later, this store isn't really redundant.
1806 record.stored_by = Value::ForInstruction(instruction);
1807
1808 // This store may kill values in other heap locations due to aliasing.
1809 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1810 if (i == idx ||
1811 heap_values[i].value.IsUnknown() ||
1812 CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1813 !heap_location_collector_.MayAlias(i, idx)) {
1814 continue;
1815 }
1816 // Kill heap locations that may alias and keep previous stores to these locations.
1817 KeepStores(heap_values[i].stored_by);
1818 heap_values[i].stored_by = Value::PureUnknown();
1819 heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
1820 }
1821 }
1822
VisitBasicBlock(HBasicBlock * block)1823 void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1824 // Populate the heap_values array for this block.
1825 // TODO: try to reuse the heap_values array from one predecessor if possible.
1826 if (block->IsLoopHeader()) {
1827 PrepareLoopRecords(block);
1828 } else {
1829 MergePredecessorRecords(block);
1830 }
1831 // Visit non-Phi instructions.
1832 VisitNonPhiInstructions(block);
1833 }
1834
MayAliasOnBackEdge(HBasicBlock * loop_header,size_t idx1,size_t idx2) const1835 bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const {
1836 DCHECK_NE(idx1, idx2);
1837 DCHECK(loop_header->IsLoopHeader());
1838 if (heap_location_collector_.MayAlias(idx1, idx2)) {
1839 return true;
1840 }
1841 // For array locations with index defined inside the loop, include
1842 // all other locations in the array, even those that LSA declares
1843 // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
1844 // refer to the same locations for different iterations. (LSA's
1845 // `ComputeMayAlias()` does not consider different loop iterations.)
1846 HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1);
1847 HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2);
1848 if (loc1->IsArray() &&
1849 loc2->IsArray() &&
1850 HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(),
1851 loc2->GetReferenceInfo())) {
1852 HLoopInformation* loop_info = loop_header->GetLoopInformation();
1853 if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) ||
1854 loop_info->Contains(*loc2->GetIndex()->GetBlock())) {
1855 // Consider the locations aliasing. Do not optimize the case where both indexes
1856 // are loop invariants defined inside the loop, rely on LICM to pull them out.
1857 return true;
1858 }
1859 }
1860 return false;
1861 }
1862
TryReplacingLoopPhiPlaceholderWithDefault(PhiPlaceholder phi_placeholder,DataType::Type type,ArenaBitVector * phi_placeholders_to_materialize)1863 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1864 PhiPlaceholder phi_placeholder,
1865 DataType::Type type,
1866 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1867 // Use local allocator to reduce peak memory usage.
1868 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1869 ArenaBitVector visited(&allocator,
1870 /*start_bits=*/ num_phi_placeholders_,
1871 /*expandable=*/ false,
1872 kArenaAllocLSE);
1873 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1874
1875 // Use depth first search to check if any non-Phi input is unknown.
1876 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1877 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1878 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1879 work_queue.push_back(phi_placeholder);
1880 while (!work_queue.empty()) {
1881 PhiPlaceholder current_phi_placeholder = work_queue.back();
1882 work_queue.pop_back();
1883 HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()];
1884 DCHECK_GE(block->GetPredecessors().size(), 2u);
1885 size_t idx = current_phi_placeholder.GetHeapLocation();
1886 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1887 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1888 if (value.NeedsPhi()) {
1889 // Visit the predecessor Phi placeholder if it's not visited yet.
1890 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1891 visited.SetBit(PhiPlaceholderIndex(value));
1892 work_queue.push_back(value.GetPhiPlaceholder());
1893 }
1894 } else if (!value.Equals(Value::Default())) {
1895 return false; // Report failure.
1896 }
1897 }
1898 if (block->IsLoopHeader()) {
1899 // For back-edges we need to check all locations that write to the same array,
1900 // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
1901 // as they may actually refer to the same locations for different iterations.
1902 for (size_t i = 0; i != num_heap_locations; ++i) {
1903 if (i == idx ||
1904 heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
1905 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
1906 continue;
1907 }
1908 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1909 // Check if there were any writes to this location.
1910 // Note: We could simply process the values but due to the vector operation
1911 // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
1912 // the value to change and not be equal to default. To work around this and
1913 // allow replacing the non-vector load of loop-invariant default values
1914 // anyway, skip over paths that do not have any writes.
1915 ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
1916 while (record.stored_by.NeedsLoopPhi() &&
1917 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) {
1918 HLoopInformation* loop_info =
1919 blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation();
1920 record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
1921 }
1922 Value value = ReplacementOrValue(record.value);
1923 if (value.NeedsPhi()) {
1924 // Visit the predecessor Phi placeholder if it's not visited yet.
1925 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1926 visited.SetBit(PhiPlaceholderIndex(value));
1927 work_queue.push_back(value.GetPhiPlaceholder());
1928 }
1929 } else if (!value.Equals(Value::Default())) {
1930 return false; // Report failure.
1931 }
1932 }
1933 }
1934 }
1935 }
1936
1937 // Record replacement and report success.
1938 HInstruction* replacement = GetDefaultValue(type);
1939 for (uint32_t phi_placeholder_index : visited.Indexes()) {
1940 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1941 PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
1942 HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
1943 // We use both vector and non vector operations to analyze the information. However, we replace
1944 // only non vector operations in this code path.
1945 if (!hl->IsVecOp()) {
1946 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1947 phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
1948 }
1949 }
1950 return true;
1951 }
1952
TryReplacingLoopPhiPlaceholderWithSingleInput(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize)1953 bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
1954 PhiPlaceholder phi_placeholder,
1955 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) {
1956 // Use local allocator to reduce peak memory usage.
1957 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1958 ArenaBitVector visited(&allocator,
1959 /*start_bits=*/ num_phi_placeholders_,
1960 /*expandable=*/ false,
1961 kArenaAllocLSE);
1962 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
1963
1964 // Use depth first search to check if any non-Phi input is unknown.
1965 HInstruction* replacement = nullptr;
1966 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1967 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1968 work_queue.push_back(phi_placeholder);
1969 while (!work_queue.empty()) {
1970 PhiPlaceholder current_phi_placeholder = work_queue.back();
1971 work_queue.pop_back();
1972 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
1973 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1974 size_t idx = current_phi_placeholder.GetHeapLocation();
1975 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1976 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1977 if (value.NeedsPhi()) {
1978 // Visit the predecessor Phi placeholder if it's not visited yet.
1979 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1980 visited.SetBit(PhiPlaceholderIndex(value));
1981 work_queue.push_back(value.GetPhiPlaceholder());
1982 }
1983 } else {
1984 if (!value.IsInstruction() ||
1985 (replacement != nullptr && replacement != value.GetInstruction())) {
1986 return false; // Report failure.
1987 }
1988 replacement = value.GetInstruction();
1989 }
1990 }
1991 // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment
1992 // for back-edges, it is not needed here. When looking for a single input
1993 // instruction coming from before the loop, the array index must also be
1994 // defined before the loop and the aliasing analysis done by LSA is sufficient.
1995 // Any writes of a different value with an index that is not loop invariant
1996 // would invalidate the heap location in `VisitSetLocation()`.
1997 }
1998
1999 // Record replacement and report success.
2000 DCHECK(replacement != nullptr);
2001 for (uint32_t phi_placeholder_index : visited.Indexes()) {
2002 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
2003 PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
2004 HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
2005 // We use both vector and non vector operations to analyze the information. However, we replace
2006 // only vector operations in this code path.
2007 if (hl->IsVecOp()) {
2008 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2009 phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
2010 }
2011 }
2012 return true;
2013 }
2014
FindLoopPhisToMaterialize(PhiPlaceholder phi_placeholder,ArenaBitVector * phi_placeholders_to_materialize,DataType::Type type,bool can_use_default_or_phi)2015 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::FindLoopPhisToMaterialize(
2016 PhiPlaceholder phi_placeholder,
2017 /*inout*/ ArenaBitVector* phi_placeholders_to_materialize,
2018 DataType::Type type,
2019 bool can_use_default_or_phi) {
2020 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2021
2022 // Use local allocator to reduce peak memory usage.
2023 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2024 ScopedArenaVector<PhiPlaceholder> work_queue(allocator.Adapter(kArenaAllocLSE));
2025
2026 // Use depth first search to check if any non-Phi input is unknown.
2027 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2028 phi_placeholders_to_materialize->ClearAllBits();
2029 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
2030 work_queue.push_back(phi_placeholder);
2031 while (!work_queue.empty()) {
2032 PhiPlaceholder current_phi_placeholder = work_queue.back();
2033 work_queue.pop_back();
2034 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
2035 // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
2036 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
2037 Value::Default()));
2038 continue;
2039 }
2040 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2041 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2042 size_t idx = current_phi_placeholder.GetHeapLocation();
2043 if (current_block->IsLoopHeader()) {
2044 // If the index is defined inside the loop, it may reference different elements of the
2045 // array on each iteration. Since we do not track if all elements of an array are set
2046 // to the same value explicitly, the only known value in pre-header can be the default
2047 // value from NewArray or a Phi placeholder depending on a default value from some outer
2048 // loop pre-header. This Phi placeholder can be replaced only by the default value.
2049 HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
2050 if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
2051 if (can_use_default_or_phi &&
2052 TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
2053 type,
2054 phi_placeholders_to_materialize)) {
2055 continue;
2056 } else {
2057 return current_phi_placeholder; // Report the loop Phi placeholder.
2058 }
2059 }
2060 // A similar situation arises with the index defined outside the loop if we cannot use
2061 // default values or Phis, i.e. for vector loads, as we can only replace the Phi
2062 // placeholder with a single instruction defined before the loop.
2063 if (!can_use_default_or_phi) {
2064 DCHECK(index != nullptr); // Vector operations are array operations.
2065 if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
2066 phi_placeholders_to_materialize)) {
2067 continue;
2068 } else {
2069 return current_phi_placeholder; // Report the loop Phi placeholder.
2070 }
2071 }
2072 }
2073 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2074 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2075 Value value = ReplacementOrValue(heap_values[idx].value);
2076 if (value.IsUnknown()) {
2077 // We cannot create a Phi for this loop Phi placeholder.
2078 return current_phi_placeholder; // Report the loop Phi placeholder.
2079 }
2080 // For arrays, the location may have been clobbered by writes to other locations
2081 // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`.
2082 if (current_block->IsLoopHeader() &&
2083 predecessor != current_block->GetLoopInformation()->GetPreHeader() &&
2084 heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) {
2085 for (size_t i = 0, size = heap_values.size(); i != size; ++i) {
2086 if (i != idx &&
2087 !heap_values[i].stored_by.IsUnknown() &&
2088 MayAliasOnBackEdge(current_block, idx, i)) {
2089 // We cannot create a Phi for this loop Phi placeholder.
2090 return current_phi_placeholder;
2091 }
2092 }
2093 }
2094 if (value.NeedsLoopPhi()) {
2095 // Visit the predecessor Phi placeholder if it's not visited yet.
2096 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
2097 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
2098 work_queue.push_back(value.GetPhiPlaceholder());
2099 LSE_VLOG << "For materialization of " << phi_placeholder
2100 << " we need to materialize " << value;
2101 }
2102 }
2103 }
2104 }
2105
2106 // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
2107 return std::nullopt;
2108 }
2109
MaterializeLoopPhis(const ScopedArenaVector<size_t> & phi_placeholder_indexes,DataType::Type type)2110 bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
2111 DataType::Type type) {
2112 return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
2113 }
2114
MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,DataType::Type type)2115 bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
2116 DataType::Type type) {
2117 // Materialize all predecessors that do not need a loop Phi and determine if all inputs
2118 // other than loop Phis are the same.
2119 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2120 std::optional<Value> other_value = std::nullopt;
2121 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2122 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2123 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2124 DCHECK_GE(block->GetPredecessors().size(), 2u);
2125 size_t idx = phi_placeholder.GetHeapLocation();
2126 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2127 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2128 if (value.NeedsNonLoopPhi()) {
2129 DCHECK(current_phase_ == Phase::kLoadElimination) << current_phase_;
2130 MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
2131 value = Replacement(value);
2132 }
2133 if (!value.NeedsLoopPhi()) {
2134 if (!other_value) {
2135 // The first other value we found.
2136 other_value = value;
2137 } else if (!other_value->IsInvalid()) {
2138 // Check if the current `value` differs from the previous `other_value`.
2139 if (!value.Equals(*other_value)) {
2140 other_value = Value::Invalid();
2141 }
2142 }
2143 }
2144 }
2145 }
2146
2147 DCHECK(other_value.has_value());
2148 if (!other_value->IsInvalid()) {
2149 HInstruction* replacement =
2150 (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction();
2151 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2152 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
2153 }
2154 return true;
2155 }
2156
2157 // If we're materializing only a single Phi, try to match it with an existing Phi.
2158 // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
2159 // This also covers the case when after replacing a previous set of Phi placeholders,
2160 // we continue with a Phi placeholder that does not really need a loop Phi anymore.
2161 if (phi_placeholder_indexes.size() == 1u) {
2162 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]);
2163 size_t idx = phi_placeholder.GetHeapLocation();
2164 HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()];
2165 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
2166 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
2167 HInstruction* phi = phi_it.Current();
2168 DCHECK_EQ(phi->InputCount(), predecessors.size());
2169 ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
2170 auto cmp = [=](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
2171 Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
2172 if (value.NeedsPhi()) {
2173 DCHECK(value.GetPhiPlaceholder() == phi_placeholder);
2174 return lhs.GetInstruction() == phi;
2175 } else {
2176 DCHECK(value.IsDefault() || value.IsInstruction());
2177 return value.Equals(lhs.GetInstruction());
2178 }
2179 };
2180 if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
2181 phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
2182 return true;
2183 }
2184 }
2185 }
2186
2187 if (current_phase_ == Phase::kStoreElimination) {
2188 // We're not creating Phis during the final store elimination phase.
2189 return false;
2190 }
2191
2192 // There are different inputs to the Phi chain. Create the Phis.
2193 ArenaAllocator* allocator = GetGraph()->GetAllocator();
2194 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2195 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2196 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2197 CHECK_GE(block->GetPredecessors().size(), 2u);
2198 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
2199 new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
2200 }
2201 // Fill the Phi inputs.
2202 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2203 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2204 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2205 size_t idx = phi_placeholder.GetHeapLocation();
2206 HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
2207 DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType()))
2208 << "type=" << type << " vs phi-type=" << phi->GetType();
2209 for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
2210 HBasicBlock* predecessor = block->GetPredecessors()[i];
2211 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2212 HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
2213 DCHECK_NE(input->GetType(), DataType::Type::kVoid);
2214 phi->SetRawInputAt(i, input);
2215 DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType()))
2216 << " input: " << input->GetType() << value << " phi: " << phi->GetType()
2217 << " request: " << type;
2218 }
2219 }
2220 // Add the Phis to their blocks.
2221 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2222 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index);
2223 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2224 block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
2225 }
2226 if (type == DataType::Type::kReference) {
2227 ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
2228 ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
2229 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
2230 phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
2231 }
2232 // Update reference type information. Pass invalid handles, these are not used for Phis.
2233 ReferenceTypePropagation rtp_fixup(GetGraph(),
2234 Handle<mirror::DexCache>(),
2235 /* is_first_run= */ false);
2236 rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
2237 }
2238
2239 return true;
2240 }
2241
MaterializeLoopPhis(const ArenaBitVector & phi_placeholders_to_materialize,DataType::Type type)2242 bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
2243 DataType::Type type) {
2244 // Use local allocator to reduce peak memory usage.
2245 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2246
2247 // We want to recognize when a subset of these loop Phis that do not need other
2248 // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
2249 // i.e. that instruction can be used instead of each Phi in the set. See for example
2250 // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
2251 // materialize these loop Phis from the smallest transitive closure.
2252
2253 // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
2254 // assign new indexes to the Phi placeholders, making the matrix dense.
2255 ScopedArenaVector<size_t> matrix_indexes(num_phi_placeholders_,
2256 static_cast<size_t>(-1), // Invalid.
2257 allocator.Adapter(kArenaAllocLSE));
2258 ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
2259 size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
2260 phi_placeholder_indexes.reserve(num_phi_placeholders);
2261 for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
2262 matrix_indexes[marker_index] = phi_placeholder_indexes.size();
2263 phi_placeholder_indexes.push_back(marker_index);
2264 }
2265 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2266 ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
2267 dependencies.reserve(num_phi_placeholders);
2268 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2269 static constexpr bool kExpandable = false;
2270 dependencies.push_back(
2271 ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
2272 ArenaBitVector* current_dependencies = dependencies.back();
2273 current_dependencies->SetBit(matrix_index); // Count the Phi placeholder as its own dependency.
2274 PhiPlaceholder current_phi_placeholder =
2275 GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]);
2276 HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()];
2277 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
2278 size_t idx = current_phi_placeholder.GetHeapLocation();
2279 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
2280 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
2281 if (pred_value.NeedsLoopPhi()) {
2282 size_t pred_value_index = PhiPlaceholderIndex(pred_value);
2283 DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
2284 DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
2285 current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
2286 }
2287 }
2288 }
2289
2290 // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
2291 for (size_t k = 0; k != num_phi_placeholders; ++k) {
2292 for (size_t i = 0; i != num_phi_placeholders; ++i) {
2293 for (size_t j = 0; j != num_phi_placeholders; ++j) {
2294 if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
2295 dependencies[i]->SetBit(j);
2296 }
2297 }
2298 }
2299 }
2300
2301 // Count the number of transitive dependencies for each replaceable Phi placeholder.
2302 ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
2303 num_dependencies.reserve(num_phi_placeholders);
2304 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2305 num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
2306 }
2307
2308 // Pick a Phi placeholder with the smallest number of transitive dependencies and
2309 // materialize it and its dependencies. Repeat until we have materialized all.
2310 ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
2311 current_subset.reserve(num_phi_placeholders);
2312 size_t remaining_phi_placeholders = num_phi_placeholders;
2313 while (remaining_phi_placeholders != 0u) {
2314 auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
2315 DCHECK_LE(*it, remaining_phi_placeholders);
2316 size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
2317 ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
2318 size_t current_num_dependencies = num_dependencies[current_matrix_index];
2319 current_subset.clear();
2320 for (uint32_t matrix_index : current_dependencies->Indexes()) {
2321 current_subset.push_back(phi_placeholder_indexes[matrix_index]);
2322 }
2323 if (!MaterializeLoopPhis(current_subset, type)) {
2324 DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2325 // This is the final store elimination phase and we shall not be able to eliminate any
2326 // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
2327 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2328 if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2329 DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
2330 phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] =
2331 Value::PureUnknown();
2332 }
2333 }
2334 return false;
2335 }
2336 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
2337 if (current_dependencies->IsBitSet(matrix_index)) {
2338 // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
2339 // so that they shall never be the minimum again.
2340 num_dependencies[matrix_index] = num_phi_placeholders;
2341 } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
2342 // Remove dependencies from other Phi placeholders.
2343 dependencies[matrix_index]->Subtract(current_dependencies);
2344 num_dependencies[matrix_index] -= current_num_dependencies;
2345 }
2346 }
2347 remaining_phi_placeholders -= current_num_dependencies;
2348 }
2349 return true;
2350 }
2351
FullyMaterializePhi(PhiPlaceholder phi_placeholder,DataType::Type type)2352 bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
2353 ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
2354 ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
2355 auto res =
2356 FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
2357 CHECK(!res.has_value()) << *res;
2358 return MaterializeLoopPhis(abv, type);
2359 }
2360
TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,HInstruction * load)2361 std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
2362 PhiPlaceholder phi_placeholder, HInstruction* load) {
2363 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2364
2365 // Use local allocator to reduce peak memory usage.
2366 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2367
2368 // Find Phi placeholders to materialize.
2369 ArenaBitVector phi_placeholders_to_materialize(
2370 &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2371 DataType::Type type = load->GetType();
2372 bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
2373 std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2374 phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
2375 if (loop_phi_with_unknown_input) {
2376 DCHECK_GE(GetGraph()
2377 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2378 ->GetPredecessors()
2379 .size(),
2380 2u);
2381 return loop_phi_with_unknown_input; // Return failure.
2382 }
2383
2384 DCHECK_EQ(current_phase_, Phase::kLoadElimination);
2385 bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2386 DCHECK(success);
2387
2388 // Report success.
2389 return std::nullopt;
2390 }
2391
2392 // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
2393 // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
2394 // propagate the load(s) as the new value(s) to successors; this may uncover new elimination
2395 // opportunities. If we find no such load, we shall at least propagate an unknown value to some
2396 // heap location that is needed by another loop Phi placeholder.
ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input)2397 void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) {
2398 DCHECK(!loads_requiring_loop_phi_.empty());
2399 size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2400 DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
2401 phi_placeholder_replacements_[loop_phi_with_unknown_input_index] =
2402 Value::MergedUnknown(loop_phi_with_unknown_input);
2403
2404 uint32_t block_id = loop_phi_with_unknown_input.GetBlockId();
2405 const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2406 size_t reverse_post_order_index = 0;
2407 size_t reverse_post_order_size = reverse_post_order.size();
2408 size_t loads_and_stores_index = 0u;
2409 size_t loads_and_stores_size = loads_and_stores_.size();
2410
2411 // Skip blocks and instructions before the block containing the loop phi with unknown input.
2412 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2413 while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2414 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2415 while (loads_and_stores_index != loads_and_stores_size &&
2416 loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2417 ++loads_and_stores_index;
2418 }
2419 ++reverse_post_order_index;
2420 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2421 }
2422
2423 // Use local allocator to reduce peak memory usage.
2424 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2425 // Reuse one temporary vector for all remaining blocks.
2426 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
2427 ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
2428
2429 auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2430 Value value;
2431 if (block->IsLoopHeader()) {
2432 if (block->GetLoopInformation()->IsIrreducible()) {
2433 PhiPlaceholder placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
2434 value = Value::MergedUnknown(placeholder);
2435 } else {
2436 value = PrepareLoopValue(block, idx);
2437 }
2438 } else {
2439 value = MergePredecessorValues(block, idx);
2440 }
2441 DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2442 return value;
2443 };
2444
2445 // Process remaining blocks and instructions.
2446 bool found_unreplaceable_load = false;
2447 bool replaced_heap_value_with_unknown = false;
2448 for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2449 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2450 if (block->IsExitBlock()) {
2451 continue;
2452 }
2453
2454 // We shall reconstruct only the heap values that we need for processing loads and stores.
2455 local_heap_values.clear();
2456 local_heap_values.resize(num_heap_locations, Value::Invalid());
2457
2458 for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2459 HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2460 size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2461 if (load_or_store->GetBlock() != block) {
2462 break; // End of instructions from the current block.
2463 }
2464 if (IsStore(load_or_store)) {
2465 StoreRecord* store_record = store_records_[load_or_store->GetId()];
2466 DCHECK(store_record != nullptr);
2467 HInstruction* stored_value = store_record->stored_value;
2468 DCHECK(stored_value != nullptr);
2469 // Note that the `stored_value` can be a newly created `Phi` with an id that falls
2470 // outside the allocated `loads_requiring_loop_phi_` range.
2471 DCHECK_IMPLIES(
2472 IsLoad(stored_value),
2473 static_cast<size_t>(stored_value->GetId()) < loads_requiring_loop_phi_.size());
2474 if (static_cast<size_t>(stored_value->GetId()) >= loads_requiring_loop_phi_.size() ||
2475 loads_requiring_loop_phi_[stored_value->GetId()] == nullptr) {
2476 continue; // This store never needed a loop Phi.
2477 }
2478 ValueRecord* record = loads_requiring_loop_phi_[stored_value->GetId()];
2479 // Process the store by updating `local_heap_values[idx]`. The last update shall
2480 // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2481 // at the end of the block.
2482 Value replacement = ReplacementOrValue(record->value);
2483 if (replacement.NeedsLoopPhi()) {
2484 // No replacement yet, use the Phi placeholder from the load.
2485 DCHECK(record->value.NeedsLoopPhi());
2486 local_heap_values[idx] = record->value;
2487 } else {
2488 // If the load fetched a known value, use it, otherwise use the load.
2489 local_heap_values[idx] = Value::ForInstruction(
2490 replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2491 }
2492 } else {
2493 // Process the load unless it has previously been marked unreplaceable.
2494 DCHECK(IsLoad(load_or_store));
2495 ValueRecord* record = loads_requiring_loop_phi_[load_or_store->GetId()];
2496 if (record == nullptr) {
2497 continue; // This load never needed a loop Phi.
2498 }
2499 if (record->value.NeedsLoopPhi()) {
2500 if (local_heap_values[idx].IsInvalid()) {
2501 local_heap_values[idx] = get_initial_value(block, idx);
2502 }
2503 if (local_heap_values[idx].IsUnknown()) {
2504 // This load cannot be replaced. Keep stores that feed the Phi placeholder
2505 // (no aliasing since then, otherwise the Phi placeholder would not have been
2506 // propagated as a value to this load) and store the load as the new heap value.
2507 found_unreplaceable_load = true;
2508 KeepStores(record->value);
2509 record->value = Value::MergedUnknown(record->value.GetPhiPlaceholder());
2510 local_heap_values[idx] = Value::ForInstruction(load_or_store);
2511 } else if (local_heap_values[idx].NeedsLoopPhi()) {
2512 // The load may still be replaced with a Phi later.
2513 DCHECK(local_heap_values[idx].Equals(record->value));
2514 } else {
2515 // This load can be eliminated but we may need to construct non-loop Phis.
2516 if (local_heap_values[idx].NeedsNonLoopPhi()) {
2517 MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2518 load_or_store->GetType());
2519 local_heap_values[idx] = Replacement(local_heap_values[idx]);
2520 }
2521 record->value = local_heap_values[idx];
2522 DCHECK(local_heap_values[idx].IsDefault() || local_heap_values[idx].IsInstruction())
2523 << "The replacement heap value can be an HIR instruction or the default value.";
2524 HInstruction* heap_value = local_heap_values[idx].IsDefault() ?
2525 GetDefaultValue(load_or_store->GetType()) :
2526 local_heap_values[idx].GetInstruction();
2527 AddRemovedLoad(load_or_store, heap_value);
2528 }
2529 }
2530 }
2531 }
2532
2533 // All heap values that previously needed a loop Phi at the end of the block
2534 // need to be updated for processing successors.
2535 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2536 for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2537 if (heap_values[idx].value.NeedsLoopPhi()) {
2538 if (local_heap_values[idx].IsValid()) {
2539 heap_values[idx].value = local_heap_values[idx];
2540 } else {
2541 heap_values[idx].value = get_initial_value(block, idx);
2542 }
2543 if (heap_values[idx].value.IsUnknown()) {
2544 replaced_heap_value_with_unknown = true;
2545 }
2546 }
2547 }
2548 }
2549 DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2550 }
2551
ProcessLoadsRequiringLoopPhis()2552 void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2553 // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2554 // make the result of the processing depend on the order in which we process these loads.
2555 // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2556 // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2557 if (loads_requiring_loop_phi_.empty()) {
2558 return; // No loads to process.
2559 }
2560 for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2561 ValueRecord* record = loads_requiring_loop_phi_[load_store_record.load_or_store->GetId()];
2562 if (record == nullptr) {
2563 continue;
2564 }
2565 HInstruction* load = load_store_record.load_or_store;
2566 while (record->value.NeedsLoopPhi() &&
2567 phi_placeholder_replacements_[PhiPlaceholderIndex(record->value)].IsInvalid()) {
2568 std::optional<PhiPlaceholder> loop_phi_with_unknown_input =
2569 TryToMaterializeLoopPhis(record->value.GetPhiPlaceholder(), load);
2570 DCHECK_EQ(loop_phi_with_unknown_input.has_value(),
2571 phi_placeholder_replacements_[PhiPlaceholderIndex(record->value)].IsInvalid());
2572 if (loop_phi_with_unknown_input) {
2573 DCHECK_GE(GetGraph()
2574 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2575 ->GetPredecessors()
2576 .size(),
2577 2u);
2578 ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input);
2579 }
2580 }
2581 // The load could have been marked as unreplaceable (and stores marked for keeping)
2582 // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput().
2583 DCHECK(record->value.IsUnknown() ||
2584 record->value.IsInstruction() ||
2585 record->value.NeedsLoopPhi());
2586 if (record->value.NeedsLoopPhi()) {
2587 record->value = Replacement(record->value);
2588 HInstruction* heap_value = record->value.GetInstruction();
2589 AddRemovedLoad(load, heap_value);
2590 }
2591 }
2592 }
2593
SearchPhiPlaceholdersForKeptStores()2594 void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2595 ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2596 size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2597 work_queue.reserve(((start_size * 3u) + 1u) / 2u); // Reserve 1.5x start size, rounded up.
2598 for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2599 work_queue.push_back(index);
2600 }
2601 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2602 while (!work_queue.empty()) {
2603 uint32_t cur_phi_idx = work_queue.back();
2604 PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
2605 work_queue.pop_back();
2606 size_t idx = phi_placeholder.GetHeapLocation();
2607 HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
2608 DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder
2609 << " (blocks: " << blocks.size() << ")";
2610 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2611 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
2612 // For loop back-edges we must also preserve all stores to locations that
2613 // may alias with the location `idx`.
2614 // TODO: Add tests cases around this.
2615 bool is_back_edge =
2616 block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2617 size_t start = is_back_edge ? 0u : idx;
2618 size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2619 for (size_t i = start; i != end; ++i) {
2620 Value stored_by = heap_values[i].stored_by;
2621 if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
2622 if (stored_by.NeedsPhi()) {
2623 size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2624 if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2625 phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2626 work_queue.push_back(phi_placeholder_index);
2627 }
2628 } else {
2629 DCHECK(IsStore(stored_by.GetInstruction()));
2630 ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2631 DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2632 << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2633 << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2634 kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2635 }
2636 }
2637 }
2638 }
2639 }
2640 }
2641
UpdateValueRecordForStoreElimination(ValueRecord * value_record)2642 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2643 while (value_record->stored_by.IsInstruction() &&
2644 !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2645 StoreRecord* store_record = store_records_[value_record->stored_by.GetInstruction()->GetId()];
2646 DCHECK(store_record != nullptr);
2647 *value_record = store_record->old_value_record;
2648 }
2649 if (value_record->stored_by.NeedsPhi() &&
2650 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2651 PhiPlaceholderIndex(value_record->stored_by))) {
2652 // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2653 // Phi placeholder to recalculate the actual value.
2654 value_record->value = value_record->stored_by;
2655 }
2656 value_record->value = ReplacementOrValue(value_record->value);
2657 if (value_record->value.NeedsNonLoopPhi()) {
2658 // Treat all Phi placeholders as requiring loop Phis at this point.
2659 // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2660 value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2661 }
2662 }
2663
FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,DataType::Type type)2664 void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder,
2665 DataType::Type type) {
2666 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2667
2668 // Use local allocator to reduce peak memory usage.
2669 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2670 ArenaBitVector visited(&allocator,
2671 /*start_bits=*/ num_phi_placeholders_,
2672 /*expandable=*/ false,
2673 kArenaAllocLSE);
2674
2675 // Find Phi placeholders to try and match against existing Phis or other replacement values.
2676 ArenaBitVector phi_placeholders_to_materialize(
2677 &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE);
2678 std::optional<PhiPlaceholder> loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2679 phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true);
2680 if (loop_phi_with_unknown_input) {
2681 DCHECK_GE(GetGraph()
2682 ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()]
2683 ->GetPredecessors()
2684 .size(),
2685 2u);
2686 // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2687 phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::PureUnknown();
2688 phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] =
2689 Value::PureUnknown();
2690 return;
2691 }
2692
2693 DCHECK_EQ(current_phase_, Phase::kStoreElimination);
2694 bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type);
2695 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2696 DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2697 !success);
2698 }
2699
2700 struct ScopedRestoreHeapValues {
2701 public:
ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2702 ScopedRestoreHeapValues(ArenaStack* alloc,
2703 size_t num_heap_locs,
2704 ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore)
2705 : alloc_(alloc),
2706 updated_values_(alloc_.Adapter(kArenaAllocLSE)),
2707 to_restore_(to_restore) {
2708 updated_values_.reserve(num_heap_locs * to_restore_.size());
2709 }
2710
~ScopedRestoreHeapValuesart::ScopedRestoreHeapValues2711 ~ScopedRestoreHeapValues() {
2712 for (const auto& rec : updated_values_) {
2713 to_restore_[rec.blk_id][rec.heap_loc].value = rec.val_;
2714 }
2715 }
2716
2717 template<typename Func>
ForEachRecordart::ScopedRestoreHeapValues2718 void ForEachRecord(Func&& func) {
2719 for (size_t blk_id : Range(to_restore_.size())) {
2720 for (size_t heap_loc : Range(to_restore_[blk_id].size())) {
2721 LSEVisitor::ValueRecord* vr = &to_restore_[blk_id][heap_loc];
2722 LSEVisitor::Value initial = vr->value;
2723 func(vr);
2724 if (!vr->value.ExactEquals(initial)) {
2725 updated_values_.push_back({blk_id, heap_loc, initial});
2726 }
2727 }
2728 }
2729 }
2730
2731 private:
2732 struct UpdateRecord {
2733 size_t blk_id;
2734 size_t heap_loc;
2735 LSEVisitor::Value val_;
2736 };
2737 ScopedArenaAllocator alloc_;
2738 ScopedArenaVector<UpdateRecord> updated_values_;
2739 ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore_;
2740
2741 DISALLOW_COPY_AND_ASSIGN(ScopedRestoreHeapValues);
2742 };
2743
FindStoresWritingOldValues()2744 void LSEVisitor::FindStoresWritingOldValues() {
2745 // Partial LSE relies on knowing the real heap-values not the
2746 // store-replacement versions so we need to restore the map after removing
2747 // stores.
2748 ScopedRestoreHeapValues heap_vals(allocator_.GetArenaStack(),
2749 heap_location_collector_.GetNumberOfHeapLocations(),
2750 heap_values_for_);
2751 // The Phi placeholder replacements have so far been used for eliminating loads,
2752 // tracking values that would be stored if all stores were kept. As we want to
2753 // compare actual old values after removing unmarked stores, prune the Phi
2754 // placeholder replacements that can be fed by values we may not actually store.
2755 // Replacements marked as unknown can be kept as they are fed by some unknown
2756 // value and would end up as unknown again if we recalculated them.
2757 for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2758 if (!phi_placeholder_replacements_[i].IsUnknown() &&
2759 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2760 phi_placeholder_replacements_[i] = Value::Invalid();
2761 }
2762 }
2763
2764 // Update heap values at end of blocks.
2765 heap_vals.ForEachRecord([&](ValueRecord* rec) {
2766 UpdateValueRecordForStoreElimination(rec);
2767 });
2768
2769 if (kIsDebugBuild) {
2770 heap_vals.ForEachRecord([](ValueRecord* rec) {
2771 DCHECK(!rec->value.NeedsNonLoopPhi()) << rec->value;
2772 });
2773 }
2774
2775 // Use local allocator to reduce peak memory usage.
2776 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2777 // Mark the stores we want to eliminate in a separate bit vector.
2778 ArenaBitVector eliminated_stores(&allocator,
2779 /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2780 /*expandable=*/ false,
2781 kArenaAllocLSE);
2782
2783 for (uint32_t store_id : kept_stores_.Indexes()) {
2784 DCHECK(kept_stores_.IsBitSet(store_id));
2785 StoreRecord* store_record = store_records_[store_id];
2786 DCHECK(store_record != nullptr);
2787 UpdateValueRecordForStoreElimination(&store_record->old_value_record);
2788 if (store_record->old_value_record.value.NeedsPhi()) {
2789 DataType::Type type = store_record->stored_value->GetType();
2790 FindOldValueForPhiPlaceholder(store_record->old_value_record.value.GetPhiPlaceholder(), type);
2791 store_record->old_value_record.value =
2792 ReplacementOrValue(store_record->old_value_record.value);
2793 }
2794 DCHECK(!store_record->old_value_record.value.NeedsPhi());
2795 HInstruction* stored_value = FindSubstitute(store_record->stored_value);
2796 if (store_record->old_value_record.value.Equals(stored_value)) {
2797 eliminated_stores.SetBit(store_id);
2798 }
2799 }
2800
2801 // Commit the stores to eliminate by removing them from `kept_stores_`.
2802 kept_stores_.Subtract(&eliminated_stores);
2803 }
2804
Run()2805 void LSEVisitor::Run() {
2806 // 0. Set HasMonitorOperations to false. If we encounter some MonitorOperations that we can't
2807 // remove, we will set it to true in VisitMonitorOperation.
2808 GetGraph()->SetHasMonitorOperations(false);
2809
2810 // 1. Process blocks and instructions in reverse post order.
2811 for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2812 VisitBasicBlock(block);
2813 }
2814
2815 // 2. Process loads that require loop Phis, trying to find/create replacements.
2816 current_phase_ = Phase::kLoadElimination;
2817 ProcessLoadsRequiringLoopPhis();
2818
2819 // 3. Determine which stores to keep and which to eliminate.
2820 current_phase_ = Phase::kStoreElimination;
2821 // Finish marking stores for keeping.
2822 SearchPhiPlaceholdersForKeptStores();
2823
2824 // Find stores that write the same value as is already present in the location.
2825 FindStoresWritingOldValues();
2826
2827 // 4. Replace loads and remove unnecessary stores and singleton allocations.
2828 FinishFullLSE();
2829 }
2830
2831
FinishFullLSE()2832 void LSEVisitor::FinishFullLSE() {
2833 // Remove recorded load instructions that should be eliminated.
2834 for (const LoadStoreRecord& record : loads_and_stores_) {
2835 size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
2836 HInstruction* substitute = substitute_instructions_for_loads_[id];
2837 if (substitute == nullptr) {
2838 continue;
2839 }
2840 HInstruction* load = record.load_or_store;
2841 DCHECK(load != nullptr);
2842 DCHECK(IsLoad(load));
2843 DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
2844 // We proactively retrieve the substitute for a removed load, so
2845 // a load that has a substitute should not be observed as a heap
2846 // location value.
2847 DCHECK_EQ(FindSubstitute(substitute), substitute);
2848
2849 load->ReplaceWith(substitute);
2850 load->GetBlock()->RemoveInstruction(load);
2851 if ((load->IsInstanceFieldGet() && load->AsInstanceFieldGet()->IsVolatile()) ||
2852 (load->IsStaticFieldGet() && load->AsStaticFieldGet()->IsVolatile())) {
2853 MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileLoad);
2854 }
2855 }
2856
2857 // Remove all the stores we can.
2858 for (const LoadStoreRecord& record : loads_and_stores_) {
2859 if (IsStore(record.load_or_store) && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
2860 record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
2861 if ((record.load_or_store->IsInstanceFieldSet() &&
2862 record.load_or_store->AsInstanceFieldSet()->IsVolatile()) ||
2863 (record.load_or_store->IsStaticFieldSet() &&
2864 record.load_or_store->AsStaticFieldSet()->IsVolatile())) {
2865 MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileStore);
2866 }
2867 }
2868 }
2869
2870 // Eliminate singleton-classified instructions:
2871 // * - Constructor fences (they never escape this thread).
2872 // * - Allocations (if they are unused).
2873 for (HInstruction* new_instance : singleton_new_instances_) {
2874 size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
2875 MaybeRecordStat(stats_,
2876 MethodCompilationStat::kConstructorFenceRemovedLSE,
2877 removed);
2878
2879 if (!new_instance->HasNonEnvironmentUses()) {
2880 new_instance->RemoveEnvironmentUsers();
2881 new_instance->GetBlock()->RemoveInstruction(new_instance);
2882 MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
2883 }
2884 }
2885 }
2886
2887 // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore
2888 // cannot be directly allocated with an arena allocator, so we need to wrap it.
2889 class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
2890 public:
LSEVisitorWrapper(HGraph * graph,const HeapLocationCollector & heap_location_collector,OptimizingCompilerStats * stats)2891 LSEVisitorWrapper(HGraph* graph,
2892 const HeapLocationCollector& heap_location_collector,
2893 OptimizingCompilerStats* stats)
2894 : lse_visitor_(graph, heap_location_collector, stats) {}
2895
Run()2896 void Run() {
2897 lse_visitor_.Run();
2898 }
2899
2900 private:
2901 LSEVisitor lse_visitor_;
2902 };
2903
Run()2904 bool LoadStoreElimination::Run() {
2905 if (graph_->IsDebuggable()) {
2906 // Debugger may set heap values or trigger deoptimization of callers.
2907 // Skip this optimization.
2908 return false;
2909 }
2910 ScopedArenaAllocator allocator(graph_->GetArenaStack());
2911 LoadStoreAnalysis lsa(graph_, stats_, &allocator);
2912 lsa.Run();
2913 const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
2914 if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
2915 // No HeapLocation information from LSA, skip this optimization.
2916 return false;
2917 }
2918
2919 // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
2920 // memory operations with different predicates.
2921 // TODO: support predicated SIMD.
2922 if (graph_->HasPredicatedSIMD()) {
2923 return false;
2924 }
2925
2926 std::unique_ptr<LSEVisitorWrapper> lse_visitor(
2927 new (&allocator) LSEVisitorWrapper(graph_, heap_location_collector, stats_));
2928 lse_visitor->Run();
2929 return true;
2930 }
2931
2932 #undef LSE_VLOG
2933
2934 } // namespace art
2935