• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright (C) 2016 The Android Open Source Project
3   *
4   * Licensed under the Apache License, Version 2.0 (the "License");
5   * you may not use this file except in compliance with the License.
6   * You may obtain a copy of the License at
7   *
8   *      http://www.apache.org/licenses/LICENSE-2.0
9   *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_H_
18  #define ART_COMPILER_OPTIMIZING_SCHEDULER_H_
19  
20  #include <fstream>
21  
22  #include "base/macros.h"
23  #include "base/scoped_arena_allocator.h"
24  #include "base/scoped_arena_containers.h"
25  #include "base/stl_util.h"
26  #include "base/time_utils.h"
27  #include "code_generator.h"
28  #include "load_store_analysis.h"
29  #include "nodes.h"
30  #include "optimization.h"
31  
32  namespace art HIDDEN {
33  
34  // General description of instruction scheduling.
35  //
36  // This pass tries to improve the quality of the generated code by reordering
37  // instructions in the graph to avoid execution delays caused by execution
38  // dependencies.
39  // Currently, scheduling is performed at the block level, so no `HInstruction`
40  // ever leaves its block in this pass.
41  //
42  // The scheduling process iterates through blocks in the graph. For blocks that
43  // we can and want to schedule:
44  // 1) Build a dependency graph for instructions.
45  //    It includes data dependencies (inputs/uses), but also environment
46  //    dependencies and side-effect dependencies.
47  // 2) Schedule the dependency graph.
48  //    This is a topological sort of the dependency graph, using heuristics to
49  //    decide what node to scheduler first when there are multiple candidates.
50  //
51  // A few factors impacting the quality of the scheduling are:
52  // - The heuristics used to decide what node to schedule in the topological sort
53  //   when there are multiple valid candidates. There is a wide range of
54  //   complexity possible here, going from a simple model only considering
55  //   latencies, to a super detailed CPU pipeline model.
56  // - Fewer dependencies in the dependency graph give more freedom for the
57  //   scheduling heuristics. For example de-aliasing can allow possibilities for
58  //   reordering of memory accesses.
59  // - The level of abstraction of the IR. It is easier to evaluate scheduling for
60  //   IRs that translate to a single assembly instruction than for IRs
61  //   that generate multiple assembly instructions or generate different code
62  //   depending on properties of the IR.
63  // - Scheduling is performed before register allocation, it is not aware of the
64  //   impact of moving instructions on register allocation.
65  //
66  //
67  // The scheduling code uses the terms predecessors, successors, and dependencies.
68  // This can be confusing at times, so here are clarifications.
69  // These terms are used from the point of view of the program dependency graph. So
70  // the inputs of an instruction are part of its dependencies, and hence part its
71  // predecessors. So the uses of an instruction are (part of) its successors.
72  // (Side-effect dependencies can yield predecessors or successors that are not
73  // inputs or uses.)
74  //
75  // Here is a trivial example. For the Java code:
76  //
77  //    int a = 1 + 2;
78  //
79  // we would have the instructions
80  //
81  //    i1 HIntConstant 1
82  //    i2 HIntConstant 2
83  //    i3 HAdd [i1,i2]
84  //
85  // `i1` and `i2` are predecessors of `i3`.
86  // `i3` is a successor of `i1` and a successor of `i2`.
87  // In a scheduling graph for this code we would have three nodes `n1`, `n2`,
88  // and `n3` (respectively for instructions `i1`, `i1`, and `i3`).
89  // Conceptually the program dependency graph for this would contain two edges
90  //
91  //    n1 -> n3
92  //    n2 -> n3
93  //
94  // Since we schedule backwards (starting from the last instruction in each basic
95  // block), the implementation of nodes keeps a list of pointers their
96  // predecessors. So `n3` would keep pointers to its predecessors `n1` and `n2`.
97  //
98  // Node dependencies are also referred to from the program dependency graph
99  // point of view: we say that node `B` immediately depends on `A` if there is an
100  // edge from `A` to `B` in the program dependency graph. `A` is a predecessor of
101  // `B`, `B` is a successor of `A`. In the example above `n3` depends on `n1` and
102  // `n2`.
103  // Since nodes in the scheduling graph keep a list of their predecessors, node
104  // `B` will have a pointer to its predecessor `A`.
105  // As we schedule backwards, `B` will be selected for scheduling before `A` is.
106  //
107  // So the scheduling for the example above could happen as follow
108  //
109  //    |---------------------------+------------------------|
110  //    | candidates for scheduling | instructions scheduled |
111  //    | --------------------------+------------------------|
112  //
113  // The only node without successors is `n3`, so it is the only initial
114  // candidate.
115  //
116  //    | n3                        | (none)                 |
117  //
118  // We schedule `n3` as the last (and only) instruction. All its predecessors
119  // that do not have any unscheduled successors become candidate. That is, `n1`
120  // and `n2` become candidates.
121  //
122  //    | n1, n2                    | n3                     |
123  //
124  // One of the candidates is selected. In practice this is where scheduling
125  // heuristics kick in, to decide which of the candidates should be selected.
126  // In this example, let it be `n1`. It is scheduled before previously scheduled
127  // nodes (in program order). There are no other nodes to add to the list of
128  // candidates.
129  //
130  //    | n2                        | n1                     |
131  //    |                           | n3                     |
132  //
133  // The only candidate available for scheduling is `n2`. Schedule it before
134  // (in program order) the previously scheduled nodes.
135  //
136  //    | (none)                    | n2                     |
137  //    |                           | n1                     |
138  //    |                           | n3                     |
139  //    |---------------------------+------------------------|
140  //
141  // So finally the instructions will be executed in the order `i2`, `i1`, and `i3`.
142  // In this trivial example, it does not matter which of `i1` and `i2` is
143  // scheduled first since they are constants. However the same process would
144  // apply if `i1` and `i2` were actual operations (for example `HMul` and `HDiv`).
145  
146  // Set to true to have instruction scheduling dump scheduling graphs to the file
147  // `scheduling_graphs.dot`. See `SchedulingGraph::DumpAsDotGraph()`.
148  static constexpr bool kDumpDotSchedulingGraphs = false;
149  
150  // Typically used as a default instruction latency.
151  static constexpr uint32_t kGenericInstructionLatency = 1;
152  
153  class HScheduler;
154  
155  /**
156   * A node representing an `HInstruction` in the `SchedulingGraph`.
157   */
158  class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> {
159   public:
SchedulingNode(HInstruction * instr,ScopedArenaAllocator * allocator,bool is_scheduling_barrier)160    SchedulingNode(HInstruction* instr, ScopedArenaAllocator* allocator, bool is_scheduling_barrier)
161        : latency_(0),
162          internal_latency_(0),
163          critical_path_(0),
164          instruction_(instr),
165          is_scheduling_barrier_(is_scheduling_barrier),
166          data_predecessors_(allocator->Adapter(kArenaAllocScheduler)),
167          other_predecessors_(allocator->Adapter(kArenaAllocScheduler)),
168          num_unscheduled_successors_(0) {
169      data_predecessors_.reserve(kPreallocatedPredecessors);
170    }
171  
AddDataPredecessor(SchedulingNode * predecessor)172    void AddDataPredecessor(SchedulingNode* predecessor) {
173      // Check whether the predecessor has been added earlier.
174      if (HasDataDependency(predecessor)) {
175        return;
176      }
177      data_predecessors_.push_back(predecessor);
178      predecessor->num_unscheduled_successors_++;
179    }
180  
GetDataPredecessors()181    const ScopedArenaVector<SchedulingNode*>& GetDataPredecessors() const {
182      return data_predecessors_;
183    }
184  
AddOtherPredecessor(SchedulingNode * predecessor)185    void AddOtherPredecessor(SchedulingNode* predecessor) {
186      // Check whether the predecessor has been added earlier.
187      // As an optimization of the scheduling graph, we don't need to create another dependency if
188      // there is a data dependency between scheduling nodes.
189      if (HasOtherDependency(predecessor) || HasDataDependency(predecessor)) {
190        return;
191      }
192      other_predecessors_.push_back(predecessor);
193      predecessor->num_unscheduled_successors_++;
194    }
195  
GetOtherPredecessors()196    const ScopedArenaVector<SchedulingNode*>& GetOtherPredecessors() const {
197      return other_predecessors_;
198    }
199  
DecrementNumberOfUnscheduledSuccessors()200    void DecrementNumberOfUnscheduledSuccessors() {
201      num_unscheduled_successors_--;
202    }
203  
MaybeUpdateCriticalPath(uint32_t other_critical_path)204    void MaybeUpdateCriticalPath(uint32_t other_critical_path) {
205      critical_path_ = std::max(critical_path_, other_critical_path);
206    }
207  
HasUnscheduledSuccessors()208    bool HasUnscheduledSuccessors() const {
209      return num_unscheduled_successors_ != 0;
210    }
211  
GetInstruction()212    HInstruction* GetInstruction() const { return instruction_; }
GetLatency()213    uint32_t GetLatency() const { return latency_; }
SetLatency(uint32_t latency)214    void SetLatency(uint32_t latency) { latency_ = latency; }
GetInternalLatency()215    uint32_t GetInternalLatency() const { return internal_latency_; }
SetInternalLatency(uint32_t internal_latency)216    void SetInternalLatency(uint32_t internal_latency) { internal_latency_ = internal_latency; }
GetCriticalPath()217    uint32_t GetCriticalPath() const { return critical_path_; }
IsSchedulingBarrier()218    bool IsSchedulingBarrier() const { return is_scheduling_barrier_; }
219  
HasDataDependency(const SchedulingNode * node)220    bool HasDataDependency(const SchedulingNode* node) const {
221      return ContainsElement(data_predecessors_, node);
222    }
223  
HasOtherDependency(const SchedulingNode * node)224    bool HasOtherDependency(const SchedulingNode* node) const {
225      return ContainsElement(other_predecessors_, node);
226    }
227  
228   private:
229    // The latency of this node. It represents the latency between the moment the
230    // last instruction for this node has executed to the moment the result
231    // produced by this node is available to users.
232    uint32_t latency_;
233    // This represents the time spent *within* the generated code for this node.
234    // It should be zero for nodes that only generate a single instruction.
235    uint32_t internal_latency_;
236  
237    // The critical path from this instruction to the end of scheduling. It is
238    // used by the scheduling heuristics to measure the priority of this instruction.
239    // It is defined as
240    //     critical_path_ = latency_ + max((use.internal_latency_ + use.critical_path_) for all uses)
241    // (Note that here 'uses' is equivalent to 'data successors'. Also see comments in
242    // `HScheduler::Schedule(SchedulingNode* scheduling_node)`).
243    uint32_t critical_path_;
244  
245    // The instruction that this node represents.
246    HInstruction* const instruction_;
247  
248    // If a node is scheduling barrier, other nodes cannot be scheduled before it.
249    const bool is_scheduling_barrier_;
250  
251    // The lists of predecessors. They cannot be scheduled before this node. Once
252    // this node is scheduled, we check whether any of its predecessors has become a
253    // valid candidate for scheduling.
254    // Predecessors in `data_predecessors_` are data dependencies. Those in
255    // `other_predecessors_` contain side-effect dependencies, environment
256    // dependencies, and scheduling barrier dependencies.
257    ScopedArenaVector<SchedulingNode*> data_predecessors_;
258    ScopedArenaVector<SchedulingNode*> other_predecessors_;
259  
260    // The number of unscheduled successors for this node. This number is
261    // decremented as successors are scheduled. When it reaches zero this node
262    // becomes a valid candidate to schedule.
263    uint32_t num_unscheduled_successors_;
264  
265    static constexpr size_t kPreallocatedPredecessors = 4;
266  };
267  
268  /*
269   * Provide analysis of instruction dependencies (side effects) which are not in a form of explicit
270   * def-use data dependencies.
271   */
272  class SideEffectDependencyAnalysis {
273   public:
SideEffectDependencyAnalysis(const HeapLocationCollector * heap_location_collector)274    explicit SideEffectDependencyAnalysis(const HeapLocationCollector* heap_location_collector)
275        : memory_dependency_analysis_(heap_location_collector) {}
276  
HasSideEffectDependency(HInstruction * instr1,HInstruction * instr2)277    bool HasSideEffectDependency(HInstruction* instr1, HInstruction* instr2) const {
278      if (memory_dependency_analysis_.HasMemoryDependency(instr1, instr2)) {
279        return true;
280      }
281  
282      // Even if above memory dependency check has passed, it is still necessary to
283      // check dependencies between instructions that can throw and instructions
284      // that write to memory.
285      if (HasExceptionDependency(instr1, instr2)) {
286        return true;
287      }
288  
289      return false;
290    }
291  
292   private:
293    static bool HasExceptionDependency(const HInstruction* instr1, const HInstruction* instr2);
294    static bool HasReorderingDependency(const HInstruction* instr1, const HInstruction* instr2);
295  
296    /*
297     * Memory dependency analysis of instructions based on their memory side effects
298     * and heap location information from the LCA pass if it is provided.
299     */
300    class MemoryDependencyAnalysis {
301     public:
MemoryDependencyAnalysis(const HeapLocationCollector * heap_location_collector)302      explicit MemoryDependencyAnalysis(const HeapLocationCollector* heap_location_collector)
303          : heap_location_collector_(heap_location_collector) {}
304  
305      bool HasMemoryDependency(HInstruction* instr1, HInstruction* instr2) const;
306  
307     private:
308      bool ArrayAccessMayAlias(HInstruction* instr1, HInstruction* instr2) const;
309      bool FieldAccessMayAlias(const HInstruction* instr1, const HInstruction* instr2) const;
310      size_t ArrayAccessHeapLocation(HInstruction* instruction) const;
311      size_t FieldAccessHeapLocation(const HInstruction* instruction) const;
312  
313      const HeapLocationCollector* const heap_location_collector_;
314    };
315  
316    MemoryDependencyAnalysis memory_dependency_analysis_;
317  };
318  
319  /*
320   * Directed acyclic graph for scheduling.
321   */
322  class SchedulingGraph : public ValueObject {
323   public:
SchedulingGraph(ScopedArenaAllocator * allocator,const HeapLocationCollector * heap_location_collector)324    SchedulingGraph(ScopedArenaAllocator* allocator,
325                    const HeapLocationCollector* heap_location_collector)
326        : allocator_(allocator),
327          contains_scheduling_barrier_(false),
328          nodes_map_(allocator_->Adapter(kArenaAllocScheduler)),
329          side_effect_dependency_analysis_(heap_location_collector) {}
330  
331    SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) {
332      std::unique_ptr<SchedulingNode> node(
333          new (allocator_) SchedulingNode(instr, allocator_, is_scheduling_barrier));
334      SchedulingNode* result = node.get();
335      nodes_map_.insert(std::make_pair(instr, std::move(node)));
336      contains_scheduling_barrier_ |= is_scheduling_barrier;
337      AddDependencies(result, is_scheduling_barrier);
338      return result;
339    }
340  
GetNode(const HInstruction * instr)341    SchedulingNode* GetNode(const HInstruction* instr) const {
342      auto it = nodes_map_.find(instr);
343      if (it == nodes_map_.end()) {
344        return nullptr;
345      } else {
346        return it->second.get();
347      }
348    }
349  
Size()350    size_t Size() const {
351      return nodes_map_.size();
352    }
353  
354    // Dump the scheduling graph, in dot file format, appending it to the file
355    // `scheduling_graphs.dot`.
356    void DumpAsDotGraph(const std::string& description,
357                        const ScopedArenaVector<SchedulingNode*>& initial_candidates);
358  
359   protected:
360    void AddDependency(SchedulingNode* node, SchedulingNode* dependency, bool is_data_dependency);
AddDataDependency(SchedulingNode * node,SchedulingNode * dependency)361    void AddDataDependency(SchedulingNode* node, SchedulingNode* dependency) {
362      AddDependency(node, dependency, /*is_data_dependency*/true);
363    }
AddOtherDependency(SchedulingNode * node,SchedulingNode * dependency)364    void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) {
365      AddDependency(node, dependency, /*is_data_dependency*/false);
366    }
367  
368    // Analyze whether the scheduling node has cross-iteration dependencies which mean it uses
369    // values defined on the previous iteration.
370    //
371    // Supported cases:
372    //
373    //   L:
374    //     v2 = loop_head_phi(v1)
375    //     instr1(v2)
376    //     v1 = instr2
377    //     goto L
378    //
379    // In such cases moving instr2 before instr1 creates intersecting live ranges
380    // of v1 and v2. As a result a separate register is needed to keep the value
381    // defined by instr2 which is only used on the next iteration.
382    // If instr2 is not moved, no additional register is needed. The register
383    // used by instr1 is reused.
384    // To prevent such a situation a "other" dependency between instr1 and instr2 must be set.
385    void AddCrossIterationDependencies(SchedulingNode* node);
386  
387    // Add dependencies nodes for the given `SchedulingNode`: inputs, environments, and side-effects.
388    void AddDependencies(SchedulingNode* node, bool is_scheduling_barrier = false);
389  
390    ScopedArenaAllocator* const allocator_;
391    bool contains_scheduling_barrier_;
392    ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_;
393    SideEffectDependencyAnalysis side_effect_dependency_analysis_;
394  };
395  
396  /*
397   * The visitors derived from this base class are used by schedulers to evaluate
398   * the latencies of `HInstruction`s.
399   */
400  class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
401   public:
402    // This class and its sub-classes will never be used to drive a visit of an
403    // `HGraph` but only to visit `HInstructions` one at a time, so we do not need
404    // to pass a valid graph to `HGraphDelegateVisitor()`.
SchedulingLatencyVisitor()405    SchedulingLatencyVisitor()
406        : HGraphDelegateVisitor(nullptr),
407          last_visited_latency_(0),
408          last_visited_internal_latency_(0) {}
409  
VisitInstruction(HInstruction * instruction)410    void VisitInstruction(HInstruction* instruction) override {
411      LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
412          "Architecture-specific scheduling latency visitors must handle all instructions"
413          " (potentially by overriding the generic `VisitInstruction()`.";
414      UNREACHABLE();
415    }
416  
Visit(HInstruction * instruction)417    void Visit(HInstruction* instruction) {
418      instruction->Accept(this);
419    }
420  
CalculateLatency(SchedulingNode * node)421    void CalculateLatency(SchedulingNode* node) {
422      // By default nodes have no internal latency.
423      last_visited_internal_latency_ = 0;
424      Visit(node->GetInstruction());
425    }
426  
GetLastVisitedLatency()427    uint32_t GetLastVisitedLatency() const { return last_visited_latency_; }
GetLastVisitedInternalLatency()428    uint32_t GetLastVisitedInternalLatency() const { return last_visited_internal_latency_; }
429  
430   protected:
431    // The latency of the most recent visited SchedulingNode.
432    // This is for reporting the latency value to the user of this visitor.
433    uint32_t last_visited_latency_;
434    // This represents the time spent *within* the generated code for the most recent visited
435    // SchedulingNode. This is for reporting the internal latency value to the user of this visitor.
436    uint32_t last_visited_internal_latency_;
437  };
438  
439  class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> {
440   public:
Reset()441    virtual void Reset() {}
442    virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
443                                                   const SchedulingGraph& graph) = 0;
~SchedulingNodeSelector()444    virtual ~SchedulingNodeSelector() {}
445   protected:
DeleteNodeAtIndex(ScopedArenaVector<SchedulingNode * > * nodes,size_t index)446    static void DeleteNodeAtIndex(ScopedArenaVector<SchedulingNode*>* nodes, size_t index) {
447      (*nodes)[index] = nodes->back();
448      nodes->pop_back();
449    }
450  };
451  
452  /*
453   * Select a `SchedulingNode` at random within the candidates.
454   */
455  class RandomSchedulingNodeSelector : public SchedulingNodeSelector {
456   public:
RandomSchedulingNodeSelector()457    RandomSchedulingNodeSelector() : seed_(0) {
458      seed_  = static_cast<uint32_t>(NanoTime());
459      srand(seed_);
460    }
461  
PopHighestPriorityNode(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph)462    SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
463                                           const SchedulingGraph& graph) override {
464      UNUSED(graph);
465      DCHECK(!nodes->empty());
466      size_t select = rand_r(&seed_) % nodes->size();
467      SchedulingNode* select_node = (*nodes)[select];
468      DeleteNodeAtIndex(nodes, select);
469      return select_node;
470    }
471  
472    uint32_t seed_;
473  };
474  
475  /*
476   * Select a `SchedulingNode` according to critical path information,
477   * with heuristics to favor certain instruction patterns like materialized condition.
478   */
479  class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector {
480   public:
CriticalPathSchedulingNodeSelector()481    CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {}
482  
Reset()483    void Reset() override { prev_select_ = nullptr; }
484    SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
485                                           const SchedulingGraph& graph) override;
486  
487   protected:
488    SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate,
489                                                    SchedulingNode* check) const;
490  
491    SchedulingNode* SelectMaterializedCondition(ScopedArenaVector<SchedulingNode*>* nodes,
492                                                const SchedulingGraph& graph) const;
493  
494   private:
495    const SchedulingNode* prev_select_;
496  };
497  
498  class HScheduler {
499   public:
HScheduler(SchedulingNodeSelector * selector)500    explicit HScheduler(SchedulingNodeSelector* selector)
501        : selector_(selector),
502          only_optimize_loop_blocks_(true),
503          cursor_(nullptr) {}
~HScheduler()504    virtual ~HScheduler() {}
505  
506    void Schedule(HGraph* graph);
507  
SetOnlyOptimizeLoopBlocks(bool loop_only)508    void SetOnlyOptimizeLoopBlocks(bool loop_only) { only_optimize_loop_blocks_ = loop_only; }
509  
510    // Instructions can not be rescheduled across a scheduling barrier.
511    virtual bool IsSchedulingBarrier(const HInstruction* instruction) const;
512  
513   protected:
514    virtual std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>> BuildSchedulingGraph(
515        HBasicBlock* block,
516        ScopedArenaAllocator* allocator,
517        const HeapLocationCollector* heap_location_collector) = 0;
518  
519    template <typename LatencyVisitor>
BuildSchedulingGraph(HBasicBlock * block,ScopedArenaAllocator * allocator,const HeapLocationCollector * heap_location_collector,LatencyVisitor * latency_visitor)520    std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>> BuildSchedulingGraph(
521        HBasicBlock* block,
522        ScopedArenaAllocator* allocator,
523        const HeapLocationCollector* heap_location_collector,
524        LatencyVisitor* latency_visitor) ALWAYS_INLINE {
525      SchedulingGraph scheduling_graph(allocator, heap_location_collector);
526      ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator->Adapter(kArenaAllocScheduler));
527      for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
528        HInstruction* instruction = it.Current();
529        CHECK_EQ(instruction->GetBlock(), block)
530            << instruction->DebugName()
531            << " is in block " << instruction->GetBlock()->GetBlockId()
532            << ", and expected in block " << block->GetBlockId();
533        SchedulingNode* node =
534            scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction));
535        latency_visitor->CalculateLatency(node);
536        node->SetLatency(latency_visitor->GetLastVisitedLatency());
537        node->SetInternalLatency(latency_visitor->GetLastVisitedInternalLatency());
538        scheduling_nodes.push_back(node);
539      }
540      return {std::move(scheduling_graph), std::move(scheduling_nodes)};
541    }
542  
543    void Schedule(HBasicBlock* block, const HeapLocationCollector* heap_location_collector);
544    void Schedule(SchedulingNode* scheduling_node,
545                  /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates);
546    void Schedule(HInstruction* instruction);
547  
548    // Any instruction returning `false` via this method will prevent its
549    // containing basic block from being scheduled.
550    // This method is used to restrict scheduling to instructions that we know are
551    // safe to handle.
552    //
553    // For newly introduced instructions by default HScheduler::IsSchedulable returns false.
554    // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see
555    // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also
556    // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction.
557    virtual bool IsSchedulable(const HInstruction* instruction) const;
558    bool IsSchedulable(const HBasicBlock* block) const;
559  
560    SchedulingNodeSelector* const selector_;
561    bool only_optimize_loop_blocks_;
562  
563    // A pointer indicating where the next instruction to be scheduled will be inserted.
564    HInstruction* cursor_;
565  
566   private:
567    DISALLOW_COPY_AND_ASSIGN(HScheduler);
568  };
569  
570  class HInstructionScheduling : public HOptimization {
571   public:
572    HInstructionScheduling(HGraph* graph,
573                           InstructionSet instruction_set,
574                           CodeGenerator* cg = nullptr,
575                           const char* name = kInstructionSchedulingPassName)
HOptimization(graph,name)576        : HOptimization(graph, name),
577          codegen_(cg),
578          instruction_set_(instruction_set) {}
579  
Run()580    bool Run() override {
581      return Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false);
582    }
583  
584    bool Run(bool only_optimize_loop_blocks, bool schedule_randomly);
585  
586    static constexpr const char* kInstructionSchedulingPassName = "scheduler";
587  
588   private:
589    CodeGenerator* const codegen_;
590    const InstructionSet instruction_set_;
591    DISALLOW_COPY_AND_ASSIGN(HInstructionScheduling);
592  };
593  
594  }  // namespace art
595  
596  #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_H_
597