1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <string>
18 
19 #include "scheduler.h"
20 
21 #include "base/scoped_arena_allocator.h"
22 #include "base/scoped_arena_containers.h"
23 #include "data_type-inl.h"
24 #include "prepare_for_register_allocation.h"
25 
26 #ifdef ART_ENABLE_CODEGEN_arm64
27 #include "scheduler_arm64.h"
28 #endif
29 
30 #ifdef ART_ENABLE_CODEGEN_arm
31 #include "scheduler_arm.h"
32 #endif
33 
34 namespace art {
35 
AddDependency(SchedulingNode * node,SchedulingNode * dependency,bool is_data_dependency)36 void SchedulingGraph::AddDependency(SchedulingNode* node,
37                                     SchedulingNode* dependency,
38                                     bool is_data_dependency) {
39   if (node == nullptr || dependency == nullptr) {
40     // A `nullptr` node indicates an instruction out of scheduling range (eg. in
41     // an other block), so we do not need to add a dependency edge to the graph.
42     return;
43   }
44 
45   if (is_data_dependency) {
46     if (!HasImmediateDataDependency(node, dependency)) {
47       node->AddDataPredecessor(dependency);
48     }
49   } else if (!HasImmediateOtherDependency(node, dependency)) {
50     node->AddOtherPredecessor(dependency);
51   }
52 }
53 
MayHaveReorderingDependency(SideEffects node,SideEffects other)54 static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) {
55   // Read after write.
56   if (node.MayDependOn(other)) {
57     return true;
58   }
59 
60   // Write after read.
61   if (other.MayDependOn(node)) {
62     return true;
63   }
64 
65   // Memory write after write.
66   if (node.DoesAnyWrite() && other.DoesAnyWrite()) {
67     return true;
68   }
69 
70   return false;
71 }
72 
ArrayAccessHeapLocation(HInstruction * instruction) const73 size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* instruction) const {
74   DCHECK(heap_location_collector_ != nullptr);
75   size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction);
76   // This array access should be analyzed and added to HeapLocationCollector before.
77   DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
78   return heap_loc;
79 }
80 
ArrayAccessMayAlias(HInstruction * node,HInstruction * other) const81 bool SchedulingGraph::ArrayAccessMayAlias(HInstruction* node,
82                                           HInstruction* other) const {
83   DCHECK(heap_location_collector_ != nullptr);
84   size_t node_heap_loc = ArrayAccessHeapLocation(node);
85   size_t other_heap_loc = ArrayAccessHeapLocation(other);
86 
87   // For example: arr[0] and arr[0]
88   if (node_heap_loc == other_heap_loc) {
89     return true;
90   }
91 
92   // For example: arr[0] and arr[i]
93   if (heap_location_collector_->MayAlias(node_heap_loc, other_heap_loc)) {
94     return true;
95   }
96 
97   return false;
98 }
99 
IsArrayAccess(const HInstruction * instruction)100 static bool IsArrayAccess(const HInstruction* instruction) {
101   return instruction->IsArrayGet() || instruction->IsArraySet();
102 }
103 
IsInstanceFieldAccess(const HInstruction * instruction)104 static bool IsInstanceFieldAccess(const HInstruction* instruction) {
105   return instruction->IsInstanceFieldGet() ||
106          instruction->IsInstanceFieldSet() ||
107          instruction->IsUnresolvedInstanceFieldGet() ||
108          instruction->IsUnresolvedInstanceFieldSet();
109 }
110 
IsStaticFieldAccess(const HInstruction * instruction)111 static bool IsStaticFieldAccess(const HInstruction* instruction) {
112   return instruction->IsStaticFieldGet() ||
113          instruction->IsStaticFieldSet() ||
114          instruction->IsUnresolvedStaticFieldGet() ||
115          instruction->IsUnresolvedStaticFieldSet();
116 }
117 
IsResolvedFieldAccess(const HInstruction * instruction)118 static bool IsResolvedFieldAccess(const HInstruction* instruction) {
119   return instruction->IsInstanceFieldGet() ||
120          instruction->IsInstanceFieldSet() ||
121          instruction->IsStaticFieldGet() ||
122          instruction->IsStaticFieldSet();
123 }
124 
IsUnresolvedFieldAccess(const HInstruction * instruction)125 static bool IsUnresolvedFieldAccess(const HInstruction* instruction) {
126   return instruction->IsUnresolvedInstanceFieldGet() ||
127          instruction->IsUnresolvedInstanceFieldSet() ||
128          instruction->IsUnresolvedStaticFieldGet() ||
129          instruction->IsUnresolvedStaticFieldSet();
130 }
131 
IsFieldAccess(const HInstruction * instruction)132 static bool IsFieldAccess(const HInstruction* instruction) {
133   return IsResolvedFieldAccess(instruction) || IsUnresolvedFieldAccess(instruction);
134 }
135 
GetFieldInfo(const HInstruction * instruction)136 static const FieldInfo* GetFieldInfo(const HInstruction* instruction) {
137   if (instruction->IsInstanceFieldGet()) {
138     return &instruction->AsInstanceFieldGet()->GetFieldInfo();
139   } else if (instruction->IsInstanceFieldSet()) {
140     return &instruction->AsInstanceFieldSet()->GetFieldInfo();
141   } else if (instruction->IsStaticFieldGet()) {
142     return &instruction->AsStaticFieldGet()->GetFieldInfo();
143   } else if (instruction->IsStaticFieldSet()) {
144     return &instruction->AsStaticFieldSet()->GetFieldInfo();
145   } else {
146     LOG(FATAL) << "Unexpected field access type";
147     UNREACHABLE();
148   }
149 }
150 
FieldAccessHeapLocation(HInstruction * obj,const FieldInfo * field) const151 size_t SchedulingGraph::FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const {
152   DCHECK(obj != nullptr);
153   DCHECK(field != nullptr);
154   DCHECK(heap_location_collector_ != nullptr);
155 
156   size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(obj, field);
157   // This field access should be analyzed and added to HeapLocationCollector before.
158   DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
159 
160   return heap_loc;
161 }
162 
FieldAccessMayAlias(const HInstruction * node,const HInstruction * other) const163 bool SchedulingGraph::FieldAccessMayAlias(const HInstruction* node,
164                                           const HInstruction* other) const {
165   DCHECK(heap_location_collector_ != nullptr);
166 
167   // Static and instance field accesses should not alias.
168   if ((IsInstanceFieldAccess(node) && IsStaticFieldAccess(other)) ||
169       (IsStaticFieldAccess(node) && IsInstanceFieldAccess(other))) {
170     return false;
171   }
172 
173   // If either of the field accesses is unresolved.
174   if (IsUnresolvedFieldAccess(node) || IsUnresolvedFieldAccess(other)) {
175     // Conservatively treat these two accesses may alias.
176     return true;
177   }
178 
179   // If both fields accesses are resolved.
180   const FieldInfo* node_field = GetFieldInfo(node);
181   const FieldInfo* other_field = GetFieldInfo(other);
182 
183   size_t node_loc = FieldAccessHeapLocation(node->InputAt(0), node_field);
184   size_t other_loc = FieldAccessHeapLocation(other->InputAt(0), other_field);
185 
186   if (node_loc == other_loc) {
187     return true;
188   }
189 
190   if (!heap_location_collector_->MayAlias(node_loc, other_loc)) {
191     return false;
192   }
193 
194   return true;
195 }
196 
HasMemoryDependency(HInstruction * node,HInstruction * other) const197 bool SchedulingGraph::HasMemoryDependency(HInstruction* node,
198                                           HInstruction* other) const {
199   if (!MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) {
200     return false;
201   }
202 
203   if (heap_location_collector_ == nullptr ||
204       heap_location_collector_->GetNumberOfHeapLocations() == 0) {
205     // Without HeapLocation information from load store analysis,
206     // we cannot do further disambiguation analysis on these two instructions.
207     // Just simply say that those two instructions have memory dependency.
208     return true;
209   }
210 
211   if (IsArrayAccess(node) && IsArrayAccess(other)) {
212     return ArrayAccessMayAlias(node, other);
213   }
214   if (IsFieldAccess(node) && IsFieldAccess(other)) {
215     return FieldAccessMayAlias(node, other);
216   }
217 
218   // TODO(xueliang): LSA to support alias analysis among HVecLoad, HVecStore and ArrayAccess
219   if (node->IsVecMemoryOperation() && other->IsVecMemoryOperation()) {
220     return true;
221   }
222   if (node->IsVecMemoryOperation() && IsArrayAccess(other)) {
223     return true;
224   }
225   if (IsArrayAccess(node) && other->IsVecMemoryOperation()) {
226     return true;
227   }
228 
229   // Heap accesses of different kinds should not alias.
230   if (IsArrayAccess(node) && IsFieldAccess(other)) {
231     return false;
232   }
233   if (IsFieldAccess(node) && IsArrayAccess(other)) {
234     return false;
235   }
236   if (node->IsVecMemoryOperation() && IsFieldAccess(other)) {
237     return false;
238   }
239   if (IsFieldAccess(node) && other->IsVecMemoryOperation()) {
240     return false;
241   }
242 
243   // We conservatively treat all other cases having dependency,
244   // for example, Invoke and ArrayGet.
245   return true;
246 }
247 
HasExceptionDependency(const HInstruction * node,const HInstruction * other) const248 bool SchedulingGraph::HasExceptionDependency(const HInstruction* node,
249                                              const HInstruction* other) const {
250   if (other->CanThrow() && node->GetSideEffects().DoesAnyWrite()) {
251     return true;
252   }
253   if (other->GetSideEffects().DoesAnyWrite() && node->CanThrow()) {
254     return true;
255   }
256   if (other->CanThrow() && node->CanThrow()) {
257     return true;
258   }
259 
260   // Above checks should cover all cases where we cannot reorder two
261   // instructions which may throw exception.
262   return false;
263 }
264 
265 // Check whether `node` depends on `other`, taking into account `SideEffect`
266 // information and `CanThrow` information.
HasSideEffectDependency(HInstruction * node,HInstruction * other) const267 bool SchedulingGraph::HasSideEffectDependency(HInstruction* node,
268                                               HInstruction* other) const {
269   if (HasMemoryDependency(node, other)) {
270     return true;
271   }
272 
273   // Even if above memory dependency check has passed, it is still necessary to
274   // check dependencies between instructions that can throw and instructions
275   // that write to memory.
276   if (HasExceptionDependency(node, other)) {
277     return true;
278   }
279 
280   return false;
281 }
282 
283 // Check if the specified instruction is a better candidate which more likely will
284 // have other instructions depending on it.
IsBetterCandidateWithMoreLikelyDependencies(HInstruction * new_candidate,HInstruction * old_candidate)285 static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate,
286                                                         HInstruction* old_candidate) {
287   if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) {
288     // Weaker side effects.
289     return false;
290   }
291   if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) {
292     // Same side effects, check if `new_candidate` has stronger `CanThrow()`.
293     return new_candidate->CanThrow() && !old_candidate->CanThrow();
294   } else {
295     // Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`.
296     return new_candidate->CanThrow() || !old_candidate->CanThrow();
297   }
298 }
299 
AddDependencies(HInstruction * instruction,bool is_scheduling_barrier)300 void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_scheduling_barrier) {
301   SchedulingNode* instruction_node = GetNode(instruction);
302 
303   // Define-use dependencies.
304   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
305     AddDataDependency(GetNode(use.GetUser()), instruction_node);
306   }
307 
308   // Scheduling barrier dependencies.
309   DCHECK(!is_scheduling_barrier || contains_scheduling_barrier_);
310   if (contains_scheduling_barrier_) {
311     // A barrier depends on instructions after it. And instructions before the
312     // barrier depend on it.
313     for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
314       SchedulingNode* other_node = GetNode(other);
315       CHECK(other_node != nullptr)
316           << other->DebugName()
317           << " is in block " << other->GetBlock()->GetBlockId()
318           << ", and expected in block " << instruction->GetBlock()->GetBlockId();
319       bool other_is_barrier = other_node->IsSchedulingBarrier();
320       if (is_scheduling_barrier || other_is_barrier) {
321         AddOtherDependency(other_node, instruction_node);
322       }
323       if (other_is_barrier) {
324         // This other scheduling barrier guarantees ordering of instructions after
325         // it, so avoid creating additional useless dependencies in the graph.
326         // For example if we have
327         //     instr_1
328         //     barrier_2
329         //     instr_3
330         //     barrier_4
331         //     instr_5
332         // we only create the following non-data dependencies
333         //     1 -> 2
334         //     2 -> 3
335         //     2 -> 4
336         //     3 -> 4
337         //     4 -> 5
338         // and do not create
339         //     1 -> 4
340         //     2 -> 5
341         // Note that in this example we could also avoid creating the dependency
342         // `2 -> 4`.  But if we remove `instr_3` that dependency is required to
343         // order the barriers. So we generate it to avoid a special case.
344         break;
345       }
346     }
347   }
348 
349   // Side effect dependencies.
350   if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
351     HInstruction* dep_chain_candidate = nullptr;
352     for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
353       SchedulingNode* other_node = GetNode(other);
354       if (other_node->IsSchedulingBarrier()) {
355         // We have reached a scheduling barrier so we can stop further
356         // processing.
357         DCHECK(HasImmediateOtherDependency(other_node, instruction_node));
358         break;
359       }
360       if (HasSideEffectDependency(other, instruction)) {
361         if (dep_chain_candidate != nullptr &&
362             HasSideEffectDependency(other, dep_chain_candidate)) {
363           // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency.
364         } else {
365           AddOtherDependency(other_node, instruction_node);
366         }
367         // Check if `other` is a better candidate which more likely will have other instructions
368         // depending on it.
369         if (dep_chain_candidate == nullptr ||
370             IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) {
371           dep_chain_candidate = other;
372         }
373       }
374     }
375   }
376 
377   // Environment dependencies.
378   // We do not need to process those if the instruction is a scheduling barrier,
379   // since the barrier already has non-data dependencies on all following
380   // instructions.
381   if (!is_scheduling_barrier) {
382     for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
383       // Note that here we could stop processing if the environment holder is
384       // across a scheduling barrier. But checking this would likely require
385       // more work than simply iterating through environment uses.
386       AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
387     }
388   }
389 }
390 
HasImmediateDataDependency(const SchedulingNode * node,const SchedulingNode * other) const391 bool SchedulingGraph::HasImmediateDataDependency(const SchedulingNode* node,
392                                                  const SchedulingNode* other) const {
393   return ContainsElement(node->GetDataPredecessors(), other);
394 }
395 
HasImmediateDataDependency(const HInstruction * instruction,const HInstruction * other_instruction) const396 bool SchedulingGraph::HasImmediateDataDependency(const HInstruction* instruction,
397                                                  const HInstruction* other_instruction) const {
398   const SchedulingNode* node = GetNode(instruction);
399   const SchedulingNode* other = GetNode(other_instruction);
400   if (node == nullptr || other == nullptr) {
401     // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their
402     // corresponding SchedulingNode in the graph, and tell whether there is a dependency.
403     // Otherwise there is no dependency from SchedulingGraph's perspective, for example,
404     // instruction and other_instruction are in different basic blocks.
405     return false;
406   }
407   return HasImmediateDataDependency(node, other);
408 }
409 
HasImmediateOtherDependency(const SchedulingNode * node,const SchedulingNode * other) const410 bool SchedulingGraph::HasImmediateOtherDependency(const SchedulingNode* node,
411                                                   const SchedulingNode* other) const {
412   return ContainsElement(node->GetOtherPredecessors(), other);
413 }
414 
HasImmediateOtherDependency(const HInstruction * instruction,const HInstruction * other_instruction) const415 bool SchedulingGraph::HasImmediateOtherDependency(const HInstruction* instruction,
416                                                   const HInstruction* other_instruction) const {
417   const SchedulingNode* node = GetNode(instruction);
418   const SchedulingNode* other = GetNode(other_instruction);
419   if (node == nullptr || other == nullptr) {
420     // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their
421     // corresponding SchedulingNode in the graph, and tell whether there is a dependency.
422     // Otherwise there is no dependency from SchedulingGraph's perspective, for example,
423     // instruction and other_instruction are in different basic blocks.
424     return false;
425   }
426   return HasImmediateOtherDependency(node, other);
427 }
428 
InstructionTypeId(const HInstruction * instruction)429 static const std::string InstructionTypeId(const HInstruction* instruction) {
430   return DataType::TypeId(instruction->GetType()) + std::to_string(instruction->GetId());
431 }
432 
433 // Ideally we would reuse the graph visualizer code, but it is not available
434 // from here and it is not worth moving all that code only for our use.
DumpAsDotNode(std::ostream & output,const SchedulingNode * node)435 static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
436   const HInstruction* instruction = node->GetInstruction();
437   // Use the instruction typed id as the node identifier.
438   std::string instruction_id = InstructionTypeId(instruction);
439   output << instruction_id << "[shape=record, label=\""
440       << instruction_id << ' ' << instruction->DebugName() << " [";
441   // List the instruction's inputs in its description. When visualizing the
442   // graph this helps differentiating data inputs from other dependencies.
443   const char* seperator = "";
444   for (const HInstruction* input : instruction->GetInputs()) {
445     output << seperator << InstructionTypeId(input);
446     seperator = ",";
447   }
448   output << "]";
449   // Other properties of the node.
450   output << "\\ninternal_latency: " << node->GetInternalLatency();
451   output << "\\ncritical_path: " << node->GetCriticalPath();
452   if (node->IsSchedulingBarrier()) {
453     output << "\\n(barrier)";
454   }
455   output << "\"];\n";
456   // We want program order to go from top to bottom in the graph output, so we
457   // reverse the edges and specify `dir=back`.
458   for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
459     const HInstruction* predecessor_instruction = predecessor->GetInstruction();
460     output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
461         << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
462   }
463   for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
464     const HInstruction* predecessor_instruction = predecessor->GetInstruction();
465     output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
466         << "[dir=back,color=blue]\n";
467   }
468 }
469 
DumpAsDotGraph(const std::string & description,const ScopedArenaVector<SchedulingNode * > & initial_candidates)470 void SchedulingGraph::DumpAsDotGraph(const std::string& description,
471                                      const ScopedArenaVector<SchedulingNode*>& initial_candidates) {
472   // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
473   // we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
474   std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
475   // Description of this graph, as a comment.
476   output << "// " << description << "\n";
477   // Start the dot graph. Use an increasing index for easier differentiation.
478   output << "digraph G {\n";
479   for (const auto& entry : nodes_map_) {
480     SchedulingNode* node = entry.second.get();
481     DumpAsDotNode(output, node);
482   }
483   // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
484   for (SchedulingNode* node : initial_candidates) {
485     const HInstruction* instruction = node->GetInstruction();
486     output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
487       << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
488   }
489   // End of the dot graph.
490   output << "}\n";
491   output.close();
492 }
493 
SelectMaterializedCondition(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph) const494 SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
495     ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
496   // Schedule condition inputs that can be materialized immediately before their use.
497   // In following example, after we've scheduled HSelect, we want LessThan to be scheduled
498   // immediately, because it is a materialized condition, and will be emitted right before HSelect
499   // in codegen phase.
500   //
501   // i20 HLessThan [...]                  HLessThan    HAdd      HAdd
502   // i21 HAdd [...]                ===>      |          |         |
503   // i22 HAdd [...]                          +----------+---------+
504   // i23 HSelect [i21, i22, i20]                     HSelect
505 
506   if (prev_select_ == nullptr) {
507     return nullptr;
508   }
509 
510   const HInstruction* instruction = prev_select_->GetInstruction();
511   const HCondition* condition = nullptr;
512   DCHECK(instruction != nullptr);
513 
514   if (instruction->IsIf()) {
515     condition = instruction->AsIf()->InputAt(0)->AsCondition();
516   } else if (instruction->IsSelect()) {
517     condition = instruction->AsSelect()->GetCondition()->AsCondition();
518   }
519 
520   SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
521 
522   if ((condition_node != nullptr) &&
523       condition->HasOnlyOneNonEnvironmentUse() &&
524       ContainsElement(*nodes, condition_node)) {
525     DCHECK(!condition_node->HasUnscheduledSuccessors());
526     // Remove the condition from the list of candidates and schedule it.
527     RemoveElement(*nodes, condition_node);
528     return condition_node;
529   }
530 
531   return nullptr;
532 }
533 
PopHighestPriorityNode(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph)534 SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
535     ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
536   DCHECK(!nodes->empty());
537   SchedulingNode* select_node = nullptr;
538 
539   // Optimize for materialized condition and its emit before use scenario.
540   select_node = SelectMaterializedCondition(nodes, graph);
541 
542   if (select_node == nullptr) {
543     // Get highest priority node based on critical path information.
544     select_node = (*nodes)[0];
545     size_t select = 0;
546     for (size_t i = 1, e = nodes->size(); i < e; i++) {
547       SchedulingNode* check = (*nodes)[i];
548       SchedulingNode* candidate = (*nodes)[select];
549       select_node = GetHigherPrioritySchedulingNode(candidate, check);
550       if (select_node == check) {
551         select = i;
552       }
553     }
554     DeleteNodeAtIndex(nodes, select);
555   }
556 
557   prev_select_ = select_node;
558   return select_node;
559 }
560 
GetHigherPrioritySchedulingNode(SchedulingNode * candidate,SchedulingNode * check) const561 SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
562     SchedulingNode* candidate, SchedulingNode* check) const {
563   uint32_t candidate_path = candidate->GetCriticalPath();
564   uint32_t check_path = check->GetCriticalPath();
565   // First look at the critical_path.
566   if (check_path != candidate_path) {
567     return check_path < candidate_path ? check : candidate;
568   }
569   // If both critical paths are equal, schedule instructions with a higher latency
570   // first in program order.
571   return check->GetLatency() < candidate->GetLatency() ? check : candidate;
572 }
573 
Schedule(HGraph * graph)574 void HScheduler::Schedule(HGraph* graph) {
575   // We run lsa here instead of in a separate pass to better control whether we
576   // should run the analysis or not.
577   const HeapLocationCollector* heap_location_collector = nullptr;
578   LoadStoreAnalysis lsa(graph);
579   if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
580     lsa.Run();
581     heap_location_collector = &lsa.GetHeapLocationCollector();
582   }
583 
584   for (HBasicBlock* block : graph->GetReversePostOrder()) {
585     if (IsSchedulable(block)) {
586       Schedule(block, heap_location_collector);
587     }
588   }
589 }
590 
Schedule(HBasicBlock * block,const HeapLocationCollector * heap_location_collector)591 void HScheduler::Schedule(HBasicBlock* block,
592                           const HeapLocationCollector* heap_location_collector) {
593   ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack());
594   ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler));
595 
596   // Build the scheduling graph.
597   SchedulingGraph scheduling_graph(this, &allocator, heap_location_collector);
598   for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
599     HInstruction* instruction = it.Current();
600     CHECK_EQ(instruction->GetBlock(), block)
601         << instruction->DebugName()
602         << " is in block " << instruction->GetBlock()->GetBlockId()
603         << ", and expected in block " << block->GetBlockId();
604     SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction));
605     CalculateLatency(node);
606     scheduling_nodes.push_back(node);
607   }
608 
609   if (scheduling_graph.Size() <= 1) {
610     return;
611   }
612 
613   cursor_ = block->GetLastInstruction();
614 
615   // The list of candidates for scheduling. A node becomes a candidate when all
616   // its predecessors have been scheduled.
617   ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler));
618 
619   // Find the initial candidates for scheduling.
620   for (SchedulingNode* node : scheduling_nodes) {
621     if (!node->HasUnscheduledSuccessors()) {
622       node->MaybeUpdateCriticalPath(node->GetLatency());
623       candidates.push_back(node);
624     }
625   }
626 
627   ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler));
628   if (kDumpDotSchedulingGraphs) {
629     // Remember the list of initial candidates for debug output purposes.
630     initial_candidates.assign(candidates.begin(), candidates.end());
631   }
632 
633   // Schedule all nodes.
634   selector_->Reset();
635   while (!candidates.empty()) {
636     SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph);
637     Schedule(node, &candidates);
638   }
639 
640   if (kDumpDotSchedulingGraphs) {
641     // Dump the graph in `dot` format.
642     HGraph* graph = block->GetGraph();
643     std::stringstream description;
644     description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
645         << " B" << block->GetBlockId();
646     scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates);
647   }
648 }
649 
Schedule(SchedulingNode * scheduling_node,ScopedArenaVector<SchedulingNode * > * candidates)650 void HScheduler::Schedule(SchedulingNode* scheduling_node,
651                           /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) {
652   // Check whether any of the node's predecessors will be valid candidates after
653   // this node is scheduled.
654   uint32_t path_to_node = scheduling_node->GetCriticalPath();
655   for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
656     predecessor->MaybeUpdateCriticalPath(
657         path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
658     predecessor->DecrementNumberOfUnscheduledSuccessors();
659     if (!predecessor->HasUnscheduledSuccessors()) {
660       candidates->push_back(predecessor);
661     }
662   }
663   for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
664     // Do not update the critical path.
665     // The 'other' (so 'non-data') dependencies (usually) do not represent a
666     // 'material' dependency of nodes on others. They exist for program
667     // correctness. So we do not use them to compute the critical path.
668     predecessor->DecrementNumberOfUnscheduledSuccessors();
669     if (!predecessor->HasUnscheduledSuccessors()) {
670       candidates->push_back(predecessor);
671     }
672   }
673 
674   Schedule(scheduling_node->GetInstruction());
675 }
676 
677 // Move an instruction after cursor instruction inside one basic block.
MoveAfterInBlock(HInstruction * instruction,HInstruction * cursor)678 static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
679   DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
680   DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
681   DCHECK(!instruction->IsControlFlow());
682   DCHECK(!cursor->IsControlFlow());
683   instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false);
684 }
685 
Schedule(HInstruction * instruction)686 void HScheduler::Schedule(HInstruction* instruction) {
687   if (instruction == cursor_) {
688     cursor_ = cursor_->GetPrevious();
689   } else {
690     MoveAfterInBlock(instruction, cursor_);
691   }
692 }
693 
IsSchedulable(const HInstruction * instruction) const694 bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
695   // We want to avoid exhaustively listing all instructions, so we first check
696   // for instruction categories that we know are safe.
697   if (instruction->IsControlFlow() ||
698       instruction->IsConstant()) {
699     return true;
700   }
701   // Currently all unary and binary operations are safe to schedule, so avoid
702   // checking for each of them individually.
703   // Since nothing prevents a new scheduling-unsafe HInstruction to subclass
704   // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
705   // the exhaustive lists here.
706   if (instruction->IsUnaryOperation()) {
707     DCHECK(instruction->IsAbs() ||
708            instruction->IsBooleanNot() ||
709            instruction->IsNot() ||
710            instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
711     return true;
712   }
713   if (instruction->IsBinaryOperation()) {
714     DCHECK(instruction->IsAdd() ||
715            instruction->IsAnd() ||
716            instruction->IsCompare() ||
717            instruction->IsCondition() ||
718            instruction->IsDiv() ||
719            instruction->IsMin() ||
720            instruction->IsMax() ||
721            instruction->IsMul() ||
722            instruction->IsOr() ||
723            instruction->IsRem() ||
724            instruction->IsRor() ||
725            instruction->IsShl() ||
726            instruction->IsShr() ||
727            instruction->IsSub() ||
728            instruction->IsUShr() ||
729            instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
730     return true;
731   }
732   // The scheduler should not see any of these.
733   DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
734   // List of instructions explicitly excluded:
735   //    HClearException
736   //    HClinitCheck
737   //    HDeoptimize
738   //    HLoadClass
739   //    HLoadException
740   //    HMemoryBarrier
741   //    HMonitorOperation
742   //    HNativeDebugInfo
743   //    HThrow
744   //    HTryBoundary
745   // TODO: Some of the instructions above may be safe to schedule (maybe as
746   // scheduling barriers).
747   return instruction->IsArrayGet() ||
748       instruction->IsArraySet() ||
749       instruction->IsArrayLength() ||
750       instruction->IsBoundType() ||
751       instruction->IsBoundsCheck() ||
752       instruction->IsCheckCast() ||
753       instruction->IsClassTableGet() ||
754       instruction->IsCurrentMethod() ||
755       instruction->IsDivZeroCheck() ||
756       (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
757       (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
758       instruction->IsInstanceOf() ||
759       instruction->IsInvokeInterface() ||
760       instruction->IsInvokeStaticOrDirect() ||
761       instruction->IsInvokeUnresolved() ||
762       instruction->IsInvokeVirtual() ||
763       instruction->IsLoadString() ||
764       instruction->IsNewArray() ||
765       instruction->IsNewInstance() ||
766       instruction->IsNullCheck() ||
767       instruction->IsPackedSwitch() ||
768       instruction->IsParameterValue() ||
769       instruction->IsPhi() ||
770       instruction->IsReturn() ||
771       instruction->IsReturnVoid() ||
772       instruction->IsSelect() ||
773       (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) ||
774       (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) ||
775       instruction->IsSuspendCheck() ||
776       instruction->IsTypeConversion();
777 }
778 
IsSchedulable(const HBasicBlock * block) const779 bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
780   // We may be only interested in loop blocks.
781   if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
782     return false;
783   }
784   if (block->GetTryCatchInformation() != nullptr) {
785     // Do not schedule blocks that are part of try-catch.
786     // Because scheduler cannot see if catch block has assumptions on the instruction order in
787     // the try block. In following example, if we enable scheduler for the try block,
788     // MulitiplyAccumulate may be scheduled before DivZeroCheck,
789     // which can result in an incorrect value in the catch block.
790     //   try {
791     //     a = a/b;    // DivZeroCheck
792     //                 // Div
793     //     c = c*d+e;  // MulitiplyAccumulate
794     //   } catch {System.out.print(c); }
795     return false;
796   }
797   // Check whether all instructions in this block are schedulable.
798   for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
799     if (!IsSchedulable(it.Current())) {
800       return false;
801     }
802   }
803   return true;
804 }
805 
IsSchedulingBarrier(const HInstruction * instr) const806 bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
807   return instr->IsControlFlow() ||
808       // Don't break calling convention.
809       instr->IsParameterValue() ||
810       // Code generation of goto relies on SuspendCheck's position.
811       instr->IsSuspendCheck();
812 }
813 
Run(bool only_optimize_loop_blocks,bool schedule_randomly)814 bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
815                                  bool schedule_randomly) {
816 #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
817   // Phase-local allocator that allocates scheduler internal data structures like
818   // scheduling nodes, internel nodes map, dependencies, etc.
819   CriticalPathSchedulingNodeSelector critical_path_selector;
820   RandomSchedulingNodeSelector random_selector;
821   SchedulingNodeSelector* selector = schedule_randomly
822       ? static_cast<SchedulingNodeSelector*>(&random_selector)
823       : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
824 #else
825   // Avoid compilation error when compiling for unsupported instruction set.
826   UNUSED(only_optimize_loop_blocks);
827   UNUSED(schedule_randomly);
828   UNUSED(codegen_);
829 #endif
830 
831   switch (instruction_set_) {
832 #ifdef ART_ENABLE_CODEGEN_arm64
833     case InstructionSet::kArm64: {
834       arm64::HSchedulerARM64 scheduler(selector);
835       scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
836       scheduler.Schedule(graph_);
837       break;
838     }
839 #endif
840 #if defined(ART_ENABLE_CODEGEN_arm)
841     case InstructionSet::kThumb2:
842     case InstructionSet::kArm: {
843       arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
844       arm::HSchedulerARM scheduler(selector, &arm_latency_visitor);
845       scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
846       scheduler.Schedule(graph_);
847       break;
848     }
849 #endif
850     default:
851       break;
852   }
853   return true;
854 }
855 
856 }  // namespace art
857