1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "scheduler.h"
18 
19 #include <string>
20 
21 #include "base/scoped_arena_allocator.h"
22 #include "base/scoped_arena_containers.h"
23 #include "data_type-inl.h"
24 #include "optimizing/load_store_analysis.h"
25 #include "prepare_for_register_allocation.h"
26 
27 #ifdef ART_ENABLE_CODEGEN_arm64
28 #include "scheduler_arm64.h"
29 #endif
30 
31 #ifdef ART_ENABLE_CODEGEN_arm
32 #include "scheduler_arm.h"
33 #endif
34 
35 namespace art {
36 
AddDependency(SchedulingNode * node,SchedulingNode * dependency,bool is_data_dependency)37 void SchedulingGraph::AddDependency(SchedulingNode* node,
38                                     SchedulingNode* dependency,
39                                     bool is_data_dependency) {
40   if (node == nullptr || dependency == nullptr) {
41     // A `nullptr` node indicates an instruction out of scheduling range (eg. in
42     // an other block), so we do not need to add a dependency edge to the graph.
43     return;
44   }
45 
46   if (is_data_dependency) {
47     node->AddDataPredecessor(dependency);
48   } else {
49     node->AddOtherPredecessor(dependency);
50   }
51 }
52 
HasReorderingDependency(const HInstruction * instr1,const HInstruction * instr2)53 bool SideEffectDependencyAnalysis::HasReorderingDependency(const HInstruction* instr1,
54                                                            const HInstruction* instr2) {
55   SideEffects instr1_side_effects = instr1->GetSideEffects();
56   SideEffects instr2_side_effects = instr2->GetSideEffects();
57 
58   // Read after write.
59   if (instr1_side_effects.MayDependOn(instr2_side_effects)) {
60     return true;
61   }
62 
63   // Write after read.
64   if (instr2_side_effects.MayDependOn(instr1_side_effects)) {
65     return true;
66   }
67 
68   // Memory write after write.
69   if (instr1_side_effects.DoesAnyWrite() && instr2_side_effects.DoesAnyWrite()) {
70     return true;
71   }
72 
73   return false;
74 }
75 
ArrayAccessHeapLocation(HInstruction * instruction) const76 size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessHeapLocation(
77     HInstruction* instruction) const {
78   DCHECK(heap_location_collector_ != nullptr);
79   size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction);
80   // This array access should be analyzed and added to HeapLocationCollector before.
81   DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
82   return heap_loc;
83 }
84 
ArrayAccessMayAlias(HInstruction * instr1,HInstruction * instr2) const85 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessMayAlias(
86     HInstruction* instr1, HInstruction* instr2) const {
87   DCHECK(heap_location_collector_ != nullptr);
88   size_t instr1_heap_loc = ArrayAccessHeapLocation(instr1);
89   size_t instr2_heap_loc = ArrayAccessHeapLocation(instr2);
90 
91   // For example: arr[0] and arr[0]
92   if (instr1_heap_loc == instr2_heap_loc) {
93     return true;
94   }
95 
96   // For example: arr[0] and arr[i]
97   if (heap_location_collector_->MayAlias(instr1_heap_loc, instr2_heap_loc)) {
98     return true;
99   }
100 
101   return false;
102 }
103 
IsArrayAccess(const HInstruction * instruction)104 static bool IsArrayAccess(const HInstruction* instruction) {
105   return instruction->IsArrayGet() || instruction->IsArraySet();
106 }
107 
IsInstanceFieldAccess(const HInstruction * instruction)108 static bool IsInstanceFieldAccess(const HInstruction* instruction) {
109   return instruction->IsInstanceFieldGet() ||
110          instruction->IsInstanceFieldSet() ||
111          instruction->IsPredicatedInstanceFieldGet() ||
112          instruction->IsUnresolvedInstanceFieldGet() ||
113          instruction->IsUnresolvedInstanceFieldSet();
114 }
115 
IsStaticFieldAccess(const HInstruction * instruction)116 static bool IsStaticFieldAccess(const HInstruction* instruction) {
117   return instruction->IsStaticFieldGet() ||
118          instruction->IsStaticFieldSet() ||
119          instruction->IsUnresolvedStaticFieldGet() ||
120          instruction->IsUnresolvedStaticFieldSet();
121 }
122 
IsResolvedFieldAccess(const HInstruction * instruction)123 static bool IsResolvedFieldAccess(const HInstruction* instruction) {
124   return instruction->IsInstanceFieldGet() ||
125          instruction->IsInstanceFieldSet() ||
126          instruction->IsPredicatedInstanceFieldGet() ||
127          instruction->IsStaticFieldGet() ||
128          instruction->IsStaticFieldSet();
129 }
130 
IsUnresolvedFieldAccess(const HInstruction * instruction)131 static bool IsUnresolvedFieldAccess(const HInstruction* instruction) {
132   return instruction->IsUnresolvedInstanceFieldGet() ||
133          instruction->IsUnresolvedInstanceFieldSet() ||
134          instruction->IsUnresolvedStaticFieldGet() ||
135          instruction->IsUnresolvedStaticFieldSet();
136 }
137 
IsFieldAccess(const HInstruction * instruction)138 static bool IsFieldAccess(const HInstruction* instruction) {
139   return IsResolvedFieldAccess(instruction) || IsUnresolvedFieldAccess(instruction);
140 }
141 
GetFieldInfo(const HInstruction * instruction)142 static const FieldInfo* GetFieldInfo(const HInstruction* instruction) {
143   return &instruction->GetFieldInfo();
144 }
145 
FieldAccessHeapLocation(const HInstruction * instr) const146 size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessHeapLocation(
147     const HInstruction* instr) const {
148   DCHECK(instr != nullptr);
149   DCHECK(GetFieldInfo(instr) != nullptr);
150   DCHECK(heap_location_collector_ != nullptr);
151 
152   HInstruction* ref = instr->IsPredicatedInstanceFieldGet()
153       ? instr->AsPredicatedInstanceFieldGet()->GetTarget()
154       : instr->InputAt(0);
155   size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(ref, GetFieldInfo(instr));
156   // This field access should be analyzed and added to HeapLocationCollector before.
157   DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
158 
159   return heap_loc;
160 }
161 
FieldAccessMayAlias(const HInstruction * instr1,const HInstruction * instr2) const162 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessMayAlias(
163     const HInstruction* instr1, const HInstruction* instr2) const {
164   DCHECK(heap_location_collector_ != nullptr);
165 
166   // Static and instance field accesses should not alias.
167   if ((IsInstanceFieldAccess(instr1) && IsStaticFieldAccess(instr2)) ||
168       (IsStaticFieldAccess(instr1) && IsInstanceFieldAccess(instr2))) {
169     return false;
170   }
171 
172   // If either of the field accesses is unresolved.
173   if (IsUnresolvedFieldAccess(instr1) || IsUnresolvedFieldAccess(instr2)) {
174     // Conservatively treat these two accesses may alias.
175     return true;
176   }
177 
178   // If both fields accesses are resolved.
179   size_t instr1_field_access_heap_loc = FieldAccessHeapLocation(instr1);
180   size_t instr2_field_access_heap_loc = FieldAccessHeapLocation(instr2);
181 
182   if (instr1_field_access_heap_loc == instr2_field_access_heap_loc) {
183     return true;
184   }
185 
186   if (!heap_location_collector_->MayAlias(instr1_field_access_heap_loc,
187                                           instr2_field_access_heap_loc)) {
188     return false;
189   }
190 
191   return true;
192 }
193 
HasMemoryDependency(HInstruction * instr1,HInstruction * instr2) const194 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::HasMemoryDependency(
195     HInstruction* instr1, HInstruction* instr2) const {
196   if (!HasReorderingDependency(instr1, instr2)) {
197     return false;
198   }
199 
200   if (heap_location_collector_ == nullptr ||
201       heap_location_collector_->GetNumberOfHeapLocations() == 0) {
202     // Without HeapLocation information from load store analysis,
203     // we cannot do further disambiguation analysis on these two instructions.
204     // Just simply say that those two instructions have memory dependency.
205     return true;
206   }
207 
208   if (IsArrayAccess(instr1) && IsArrayAccess(instr2)) {
209     return ArrayAccessMayAlias(instr1, instr2);
210   }
211   if (IsFieldAccess(instr1) && IsFieldAccess(instr2)) {
212     return FieldAccessMayAlias(instr1, instr2);
213   }
214 
215   // TODO(xueliang): LSA to support alias analysis among HVecLoad, HVecStore and ArrayAccess
216   if (instr1->IsVecMemoryOperation() && instr2->IsVecMemoryOperation()) {
217     return true;
218   }
219   if (instr1->IsVecMemoryOperation() && IsArrayAccess(instr2)) {
220     return true;
221   }
222   if (IsArrayAccess(instr1) && instr2->IsVecMemoryOperation()) {
223     return true;
224   }
225 
226   // Heap accesses of different kinds should not alias.
227   if (IsArrayAccess(instr1) && IsFieldAccess(instr2)) {
228     return false;
229   }
230   if (IsFieldAccess(instr1) && IsArrayAccess(instr2)) {
231     return false;
232   }
233   if (instr1->IsVecMemoryOperation() && IsFieldAccess(instr2)) {
234     return false;
235   }
236   if (IsFieldAccess(instr1) && instr2->IsVecMemoryOperation()) {
237     return false;
238   }
239 
240   // We conservatively treat all other cases having dependency,
241   // for example, Invoke and ArrayGet.
242   return true;
243 }
244 
HasExceptionDependency(const HInstruction * instr1,const HInstruction * instr2)245 bool SideEffectDependencyAnalysis::HasExceptionDependency(const HInstruction* instr1,
246                                                           const HInstruction* instr2) {
247   if (instr2->CanThrow() && instr1->GetSideEffects().DoesAnyWrite()) {
248     return true;
249   }
250   if (instr2->GetSideEffects().DoesAnyWrite() && instr1->CanThrow()) {
251     return true;
252   }
253   if (instr2->CanThrow() && instr1->CanThrow()) {
254     return true;
255   }
256 
257   // Above checks should cover all cases where we cannot reorder two
258   // instructions which may throw exception.
259   return false;
260 }
261 
262 // Check if the specified instruction is a better candidate which more likely will
263 // have other instructions depending on it.
IsBetterCandidateWithMoreLikelyDependencies(HInstruction * new_candidate,HInstruction * old_candidate)264 static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate,
265                                                         HInstruction* old_candidate) {
266   if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) {
267     // Weaker side effects.
268     return false;
269   }
270   if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) {
271     // Same side effects, check if `new_candidate` has stronger `CanThrow()`.
272     return new_candidate->CanThrow() && !old_candidate->CanThrow();
273   } else {
274     // Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`.
275     return new_candidate->CanThrow() || !old_candidate->CanThrow();
276   }
277 }
278 
AddCrossIterationDependencies(SchedulingNode * node)279 void SchedulingGraph::AddCrossIterationDependencies(SchedulingNode* node) {
280   for (HInstruction* instruction : node->GetInstruction()->GetInputs()) {
281     // Having a phi-function from a loop header as an input means the current node of the
282     // scheduling graph has a cross-iteration dependency because such phi-functions bring values
283     // from the previous iteration to the current iteration.
284     if (!instruction->IsLoopHeaderPhi()) {
285       continue;
286     }
287     for (HInstruction* phi_input : instruction->GetInputs()) {
288       // As a scheduling graph of the current basic block is built by
289       // processing instructions bottom-up, nullptr returned by GetNode means
290       // an instruction defining a value for the phi is either before the
291       // instruction represented by node or it is in a different basic block.
292       SchedulingNode* def_node = GetNode(phi_input);
293 
294       // We don't create a dependency if there are uses besides the use in phi.
295       // In such cases a register to hold phi_input is usually allocated and
296       // a MOV instruction is generated. In cases with multiple uses and no MOV
297       // instruction, reordering creating a MOV instruction can improve
298       // performance more than an attempt to avoid a MOV instruction.
299       if (def_node != nullptr && def_node != node && phi_input->GetUses().HasExactlyOneElement()) {
300         // We have an implicit data dependency between node and def_node.
301         // AddAddDataDependency cannot be used because it is for explicit data dependencies.
302         // So AddOtherDependency is used.
303         AddOtherDependency(def_node, node);
304       }
305     }
306   }
307 }
308 
AddDependencies(SchedulingNode * instruction_node,bool is_scheduling_barrier)309 void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node,
310                                       bool is_scheduling_barrier) {
311   HInstruction* instruction = instruction_node->GetInstruction();
312 
313   // Define-use dependencies.
314   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
315     AddDataDependency(GetNode(use.GetUser()), instruction_node);
316   }
317 
318   // Scheduling barrier dependencies.
319   DCHECK(!is_scheduling_barrier || contains_scheduling_barrier_);
320   if (contains_scheduling_barrier_) {
321     // A barrier depends on instructions after it. And instructions before the
322     // barrier depend on it.
323     for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
324       SchedulingNode* other_node = GetNode(other);
325       CHECK(other_node != nullptr)
326           << other->DebugName()
327           << " is in block " << other->GetBlock()->GetBlockId()
328           << ", and expected in block " << instruction->GetBlock()->GetBlockId();
329       bool other_is_barrier = other_node->IsSchedulingBarrier();
330       if (is_scheduling_barrier || other_is_barrier) {
331         AddOtherDependency(other_node, instruction_node);
332       }
333       if (other_is_barrier) {
334         // This other scheduling barrier guarantees ordering of instructions after
335         // it, so avoid creating additional useless dependencies in the graph.
336         // For example if we have
337         //     instr_1
338         //     barrier_2
339         //     instr_3
340         //     barrier_4
341         //     instr_5
342         // we only create the following non-data dependencies
343         //     1 -> 2
344         //     2 -> 3
345         //     2 -> 4
346         //     3 -> 4
347         //     4 -> 5
348         // and do not create
349         //     1 -> 4
350         //     2 -> 5
351         // Note that in this example we could also avoid creating the dependency
352         // `2 -> 4`.  But if we remove `instr_3` that dependency is required to
353         // order the barriers. So we generate it to avoid a special case.
354         break;
355       }
356     }
357   }
358 
359   // Side effect dependencies.
360   if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
361     HInstruction* dep_chain_candidate = nullptr;
362     for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
363       SchedulingNode* other_node = GetNode(other);
364       if (other_node->IsSchedulingBarrier()) {
365         // We have reached a scheduling barrier so we can stop further
366         // processing.
367         //
368         // As a "other" dependency is not set up if a data dependency exists, we need to check that
369         // one of them must exist.
370         DCHECK(other_node->HasOtherDependency(instruction_node)
371                || other_node->HasDataDependency(instruction_node));
372         break;
373       }
374       if (side_effect_dependency_analysis_.HasSideEffectDependency(other, instruction)) {
375         if (dep_chain_candidate != nullptr &&
376             side_effect_dependency_analysis_.HasSideEffectDependency(other, dep_chain_candidate)) {
377           // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency.
378         } else {
379           AddOtherDependency(other_node, instruction_node);
380         }
381         // Check if `other` is a better candidate which more likely will have other instructions
382         // depending on it.
383         if (dep_chain_candidate == nullptr ||
384             IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) {
385           dep_chain_candidate = other;
386         }
387       }
388     }
389   }
390 
391   // Environment dependencies.
392   // We do not need to process those if the instruction is a scheduling barrier,
393   // since the barrier already has non-data dependencies on all following
394   // instructions.
395   if (!is_scheduling_barrier) {
396     for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
397       // Note that here we could stop processing if the environment holder is
398       // across a scheduling barrier. But checking this would likely require
399       // more work than simply iterating through environment uses.
400       AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
401     }
402   }
403 
404   AddCrossIterationDependencies(instruction_node);
405 }
406 
InstructionTypeId(const HInstruction * instruction)407 static const std::string InstructionTypeId(const HInstruction* instruction) {
408   return DataType::TypeId(instruction->GetType()) + std::to_string(instruction->GetId());
409 }
410 
411 // Ideally we would reuse the graph visualizer code, but it is not available
412 // from here and it is not worth moving all that code only for our use.
DumpAsDotNode(std::ostream & output,const SchedulingNode * node)413 static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
414   const HInstruction* instruction = node->GetInstruction();
415   // Use the instruction typed id as the node identifier.
416   std::string instruction_id = InstructionTypeId(instruction);
417   output << instruction_id << "[shape=record, label=\""
418       << instruction_id << ' ' << instruction->DebugName() << " [";
419   // List the instruction's inputs in its description. When visualizing the
420   // graph this helps differentiating data inputs from other dependencies.
421   const char* seperator = "";
422   for (const HInstruction* input : instruction->GetInputs()) {
423     output << seperator << InstructionTypeId(input);
424     seperator = ",";
425   }
426   output << "]";
427   // Other properties of the node.
428   output << "\\ninternal_latency: " << node->GetInternalLatency();
429   output << "\\ncritical_path: " << node->GetCriticalPath();
430   if (node->IsSchedulingBarrier()) {
431     output << "\\n(barrier)";
432   }
433   output << "\"];\n";
434   // We want program order to go from top to bottom in the graph output, so we
435   // reverse the edges and specify `dir=back`.
436   for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
437     const HInstruction* predecessor_instruction = predecessor->GetInstruction();
438     output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
439         << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
440   }
441   for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
442     const HInstruction* predecessor_instruction = predecessor->GetInstruction();
443     output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
444         << "[dir=back,color=blue]\n";
445   }
446 }
447 
DumpAsDotGraph(const std::string & description,const ScopedArenaVector<SchedulingNode * > & initial_candidates)448 void SchedulingGraph::DumpAsDotGraph(const std::string& description,
449                                      const ScopedArenaVector<SchedulingNode*>& initial_candidates) {
450   // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
451   // we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
452   std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
453   // Description of this graph, as a comment.
454   output << "// " << description << "\n";
455   // Start the dot graph. Use an increasing index for easier differentiation.
456   output << "digraph G {\n";
457   for (const auto& entry : nodes_map_) {
458     SchedulingNode* node = entry.second.get();
459     DumpAsDotNode(output, node);
460   }
461   // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
462   for (SchedulingNode* node : initial_candidates) {
463     const HInstruction* instruction = node->GetInstruction();
464     output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
465       << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
466   }
467   // End of the dot graph.
468   output << "}\n";
469   output.close();
470 }
471 
SelectMaterializedCondition(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph) const472 SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
473     ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
474   // Schedule condition inputs that can be materialized immediately before their use.
475   // In following example, after we've scheduled HSelect, we want LessThan to be scheduled
476   // immediately, because it is a materialized condition, and will be emitted right before HSelect
477   // in codegen phase.
478   //
479   // i20 HLessThan [...]                  HLessThan    HAdd      HAdd
480   // i21 HAdd [...]                ===>      |          |         |
481   // i22 HAdd [...]                          +----------+---------+
482   // i23 HSelect [i21, i22, i20]                     HSelect
483 
484   if (prev_select_ == nullptr) {
485     return nullptr;
486   }
487 
488   const HInstruction* instruction = prev_select_->GetInstruction();
489   const HCondition* condition = nullptr;
490   DCHECK(instruction != nullptr);
491 
492   if (instruction->IsIf()) {
493     condition = instruction->AsIf()->InputAt(0)->AsCondition();
494   } else if (instruction->IsSelect()) {
495     condition = instruction->AsSelect()->GetCondition()->AsCondition();
496   }
497 
498   SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
499 
500   if ((condition_node != nullptr) &&
501       condition->HasOnlyOneNonEnvironmentUse() &&
502       ContainsElement(*nodes, condition_node)) {
503     DCHECK(!condition_node->HasUnscheduledSuccessors());
504     // Remove the condition from the list of candidates and schedule it.
505     RemoveElement(*nodes, condition_node);
506     return condition_node;
507   }
508 
509   return nullptr;
510 }
511 
PopHighestPriorityNode(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph)512 SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
513     ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
514   DCHECK(!nodes->empty());
515   SchedulingNode* select_node = nullptr;
516 
517   // Optimize for materialized condition and its emit before use scenario.
518   select_node = SelectMaterializedCondition(nodes, graph);
519 
520   if (select_node == nullptr) {
521     // Get highest priority node based on critical path information.
522     select_node = (*nodes)[0];
523     size_t select = 0;
524     for (size_t i = 1, e = nodes->size(); i < e; i++) {
525       SchedulingNode* check = (*nodes)[i];
526       SchedulingNode* candidate = (*nodes)[select];
527       select_node = GetHigherPrioritySchedulingNode(candidate, check);
528       if (select_node == check) {
529         select = i;
530       }
531     }
532     DeleteNodeAtIndex(nodes, select);
533   }
534 
535   prev_select_ = select_node;
536   return select_node;
537 }
538 
GetHigherPrioritySchedulingNode(SchedulingNode * candidate,SchedulingNode * check) const539 SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
540     SchedulingNode* candidate, SchedulingNode* check) const {
541   uint32_t candidate_path = candidate->GetCriticalPath();
542   uint32_t check_path = check->GetCriticalPath();
543   // First look at the critical_path.
544   if (check_path != candidate_path) {
545     return check_path < candidate_path ? check : candidate;
546   }
547   // If both critical paths are equal, schedule instructions with a higher latency
548   // first in program order.
549   return check->GetLatency() < candidate->GetLatency() ? check : candidate;
550 }
551 
Schedule(HGraph * graph)552 void HScheduler::Schedule(HGraph* graph) {
553   // We run lsa here instead of in a separate pass to better control whether we
554   // should run the analysis or not.
555   const HeapLocationCollector* heap_location_collector = nullptr;
556   ScopedArenaAllocator allocator(graph->GetArenaStack());
557   LoadStoreAnalysis lsa(graph, /*stats=*/nullptr, &allocator, LoadStoreAnalysisType::kBasic);
558   if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
559     lsa.Run();
560     heap_location_collector = &lsa.GetHeapLocationCollector();
561   }
562 
563   for (HBasicBlock* block : graph->GetReversePostOrder()) {
564     if (IsSchedulable(block)) {
565       Schedule(block, heap_location_collector);
566     }
567   }
568 }
569 
Schedule(HBasicBlock * block,const HeapLocationCollector * heap_location_collector)570 void HScheduler::Schedule(HBasicBlock* block,
571                           const HeapLocationCollector* heap_location_collector) {
572   ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack());
573   ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler));
574 
575   // Build the scheduling graph.
576   SchedulingGraph scheduling_graph(&allocator, heap_location_collector);
577   for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
578     HInstruction* instruction = it.Current();
579     CHECK_EQ(instruction->GetBlock(), block)
580         << instruction->DebugName()
581         << " is in block " << instruction->GetBlock()->GetBlockId()
582         << ", and expected in block " << block->GetBlockId();
583     SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction));
584     CalculateLatency(node);
585     scheduling_nodes.push_back(node);
586   }
587 
588   if (scheduling_graph.Size() <= 1) {
589     return;
590   }
591 
592   cursor_ = block->GetLastInstruction();
593 
594   // The list of candidates for scheduling. A node becomes a candidate when all
595   // its predecessors have been scheduled.
596   ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler));
597 
598   // Find the initial candidates for scheduling.
599   for (SchedulingNode* node : scheduling_nodes) {
600     if (!node->HasUnscheduledSuccessors()) {
601       node->MaybeUpdateCriticalPath(node->GetLatency());
602       candidates.push_back(node);
603     }
604   }
605 
606   ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler));
607   if (kDumpDotSchedulingGraphs) {
608     // Remember the list of initial candidates for debug output purposes.
609     initial_candidates.assign(candidates.begin(), candidates.end());
610   }
611 
612   // Schedule all nodes.
613   selector_->Reset();
614   while (!candidates.empty()) {
615     SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph);
616     Schedule(node, &candidates);
617   }
618 
619   if (kDumpDotSchedulingGraphs) {
620     // Dump the graph in `dot` format.
621     HGraph* graph = block->GetGraph();
622     std::stringstream description;
623     description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
624         << " B" << block->GetBlockId();
625     scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates);
626   }
627 }
628 
Schedule(SchedulingNode * scheduling_node,ScopedArenaVector<SchedulingNode * > * candidates)629 void HScheduler::Schedule(SchedulingNode* scheduling_node,
630                           /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) {
631   // Check whether any of the node's predecessors will be valid candidates after
632   // this node is scheduled.
633   uint32_t path_to_node = scheduling_node->GetCriticalPath();
634   for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
635     predecessor->MaybeUpdateCriticalPath(
636         path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
637     predecessor->DecrementNumberOfUnscheduledSuccessors();
638     if (!predecessor->HasUnscheduledSuccessors()) {
639       candidates->push_back(predecessor);
640     }
641   }
642   for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
643     // Do not update the critical path.
644     // The 'other' (so 'non-data') dependencies (usually) do not represent a
645     // 'material' dependency of nodes on others. They exist for program
646     // correctness. So we do not use them to compute the critical path.
647     predecessor->DecrementNumberOfUnscheduledSuccessors();
648     if (!predecessor->HasUnscheduledSuccessors()) {
649       candidates->push_back(predecessor);
650     }
651   }
652 
653   Schedule(scheduling_node->GetInstruction());
654 }
655 
656 // Move an instruction after cursor instruction inside one basic block.
MoveAfterInBlock(HInstruction * instruction,HInstruction * cursor)657 static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
658   DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
659   DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
660   DCHECK(!instruction->IsControlFlow());
661   DCHECK(!cursor->IsControlFlow());
662   instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false);
663 }
664 
Schedule(HInstruction * instruction)665 void HScheduler::Schedule(HInstruction* instruction) {
666   if (instruction == cursor_) {
667     cursor_ = cursor_->GetPrevious();
668   } else {
669     MoveAfterInBlock(instruction, cursor_);
670   }
671 }
672 
IsSchedulable(const HInstruction * instruction) const673 bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
674   // We want to avoid exhaustively listing all instructions, so we first check
675   // for instruction categories that we know are safe.
676   if (instruction->IsControlFlow() ||
677       instruction->IsConstant()) {
678     return true;
679   }
680   // Currently all unary and binary operations are safe to schedule, so avoid
681   // checking for each of them individually.
682   // Since nothing prevents a new scheduling-unsafe HInstruction to subclass
683   // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
684   // the exhaustive lists here.
685   if (instruction->IsUnaryOperation()) {
686     DCHECK(instruction->IsAbs() ||
687            instruction->IsBooleanNot() ||
688            instruction->IsNot() ||
689            instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
690     return true;
691   }
692   if (instruction->IsBinaryOperation()) {
693     DCHECK(instruction->IsAdd() ||
694            instruction->IsAnd() ||
695            instruction->IsCompare() ||
696            instruction->IsCondition() ||
697            instruction->IsDiv() ||
698            instruction->IsMin() ||
699            instruction->IsMax() ||
700            instruction->IsMul() ||
701            instruction->IsOr() ||
702            instruction->IsRem() ||
703            instruction->IsRor() ||
704            instruction->IsShl() ||
705            instruction->IsShr() ||
706            instruction->IsSub() ||
707            instruction->IsUShr() ||
708            instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
709     return true;
710   }
711   // The scheduler should not see any of these.
712   DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
713   // List of instructions explicitly excluded:
714   //    HClearException
715   //    HClinitCheck
716   //    HDeoptimize
717   //    HLoadClass
718   //    HLoadException
719   //    HMemoryBarrier
720   //    HMonitorOperation
721   //    HNativeDebugInfo
722   //    HThrow
723   //    HTryBoundary
724   // TODO: Some of the instructions above may be safe to schedule (maybe as
725   // scheduling barriers).
726   return instruction->IsArrayGet() ||
727          instruction->IsArraySet() ||
728          instruction->IsArrayLength() ||
729          instruction->IsBoundType() ||
730          instruction->IsBoundsCheck() ||
731          instruction->IsCheckCast() ||
732          instruction->IsClassTableGet() ||
733          instruction->IsCurrentMethod() ||
734          instruction->IsDivZeroCheck() ||
735          (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
736          (instruction->IsPredicatedInstanceFieldGet() &&
737           !instruction->AsPredicatedInstanceFieldGet()->IsVolatile()) ||
738          (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
739          instruction->IsInstanceOf() ||
740          instruction->IsInvokeInterface() ||
741          instruction->IsInvokeStaticOrDirect() ||
742          instruction->IsInvokeUnresolved() ||
743          instruction->IsInvokeVirtual() ||
744          instruction->IsLoadString() ||
745          instruction->IsNewArray() ||
746          instruction->IsNewInstance() ||
747          instruction->IsNullCheck() ||
748          instruction->IsPackedSwitch() ||
749          instruction->IsParameterValue() ||
750          instruction->IsPhi() ||
751          instruction->IsReturn() ||
752          instruction->IsReturnVoid() ||
753          instruction->IsSelect() ||
754          (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) ||
755          (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) ||
756          instruction->IsSuspendCheck() ||
757          instruction->IsTypeConversion();
758 }
759 
IsSchedulable(const HBasicBlock * block) const760 bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
761   // We may be only interested in loop blocks.
762   if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
763     return false;
764   }
765   if (block->GetTryCatchInformation() != nullptr) {
766     // Do not schedule blocks that are part of try-catch.
767     // Because scheduler cannot see if catch block has assumptions on the instruction order in
768     // the try block. In following example, if we enable scheduler for the try block,
769     // MulitiplyAccumulate may be scheduled before DivZeroCheck,
770     // which can result in an incorrect value in the catch block.
771     //   try {
772     //     a = a/b;    // DivZeroCheck
773     //                 // Div
774     //     c = c*d+e;  // MulitiplyAccumulate
775     //   } catch {System.out.print(c); }
776     return false;
777   }
778   // Check whether all instructions in this block are schedulable.
779   for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
780     if (!IsSchedulable(it.Current())) {
781       return false;
782     }
783   }
784   return true;
785 }
786 
IsSchedulingBarrier(const HInstruction * instr) const787 bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
788   return instr->IsControlFlow() ||
789       // Don't break calling convention.
790       instr->IsParameterValue() ||
791       // Code generation of goto relies on SuspendCheck's position.
792       instr->IsSuspendCheck();
793 }
794 
Run(bool only_optimize_loop_blocks,bool schedule_randomly)795 bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
796                                  bool schedule_randomly) {
797 #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
798   // Phase-local allocator that allocates scheduler internal data structures like
799   // scheduling nodes, internel nodes map, dependencies, etc.
800   CriticalPathSchedulingNodeSelector critical_path_selector;
801   RandomSchedulingNodeSelector random_selector;
802   SchedulingNodeSelector* selector = schedule_randomly
803       ? static_cast<SchedulingNodeSelector*>(&random_selector)
804       : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
805 #else
806   // Avoid compilation error when compiling for unsupported instruction set.
807   UNUSED(only_optimize_loop_blocks);
808   UNUSED(schedule_randomly);
809   UNUSED(codegen_);
810 #endif
811 
812   switch (instruction_set_) {
813 #ifdef ART_ENABLE_CODEGEN_arm64
814     case InstructionSet::kArm64: {
815       arm64::HSchedulerARM64 scheduler(selector);
816       scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
817       scheduler.Schedule(graph_);
818       break;
819     }
820 #endif
821 #if defined(ART_ENABLE_CODEGEN_arm)
822     case InstructionSet::kThumb2:
823     case InstructionSet::kArm: {
824       arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
825       arm::HSchedulerARM scheduler(selector, &arm_latency_visitor);
826       scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
827       scheduler.Schedule(graph_);
828       break;
829     }
830 #endif
831     default:
832       break;
833   }
834   return true;
835 }
836 
837 }  // namespace art
838