1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/instruction-scheduler.h"
6 
7 #include "src/base/adapters.h"
8 #include "src/base/utils/random-number-generator.h"
9 #include "src/isolate.h"
10 
11 namespace v8 {
12 namespace internal {
13 namespace compiler {
14 
AddNode(ScheduleGraphNode * node)15 void InstructionScheduler::SchedulingQueueBase::AddNode(
16     ScheduleGraphNode* node) {
17   // We keep the ready list sorted by total latency so that we can quickly find
18   // the next best candidate to schedule.
19   auto it = nodes_.begin();
20   while ((it != nodes_.end()) &&
21          ((*it)->total_latency() >= node->total_latency())) {
22     ++it;
23   }
24   nodes_.insert(it, node);
25 }
26 
27 
28 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)29 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
30   DCHECK(!IsEmpty());
31   auto candidate = nodes_.end();
32   for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
33     // We only consider instructions that have all their operands ready.
34     if (cycle >= (*iterator)->start_cycle()) {
35       candidate = iterator;
36       break;
37     }
38   }
39 
40   if (candidate != nodes_.end()) {
41     ScheduleGraphNode *result = *candidate;
42     nodes_.erase(candidate);
43     return result;
44   }
45 
46   return nullptr;
47 }
48 
49 
50 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)51 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
52   DCHECK(!IsEmpty());
53   // Choose a random element from the ready list.
54   auto candidate = nodes_.begin();
55   std::advance(candidate, isolate()->random_number_generator()->NextInt(
56       static_cast<int>(nodes_.size())));
57   ScheduleGraphNode *result = *candidate;
58   nodes_.erase(candidate);
59   return result;
60 }
61 
62 
ScheduleGraphNode(Zone * zone,Instruction * instr)63 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(
64     Zone* zone,
65     Instruction* instr)
66     : instr_(instr),
67       successors_(zone),
68       unscheduled_predecessors_count_(0),
69       latency_(GetInstructionLatency(instr)),
70       total_latency_(-1),
71       start_cycle_(-1) {
72 }
73 
74 
AddSuccessor(ScheduleGraphNode * node)75 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
76     ScheduleGraphNode* node) {
77   successors_.push_back(node);
78   node->unscheduled_predecessors_count_++;
79 }
80 
InstructionScheduler(Zone * zone,InstructionSequence * sequence)81 InstructionScheduler::InstructionScheduler(Zone* zone,
82                                            InstructionSequence* sequence)
83     : zone_(zone),
84       sequence_(sequence),
85       graph_(zone),
86       last_side_effect_instr_(nullptr),
87       pending_loads_(zone),
88       last_live_in_reg_marker_(nullptr),
89       last_deopt_or_trap_(nullptr),
90       operands_map_(zone) {}
91 
StartBlock(RpoNumber rpo)92 void InstructionScheduler::StartBlock(RpoNumber rpo) {
93   DCHECK(graph_.empty());
94   DCHECK_NULL(last_side_effect_instr_);
95   DCHECK(pending_loads_.empty());
96   DCHECK_NULL(last_live_in_reg_marker_);
97   DCHECK_NULL(last_deopt_or_trap_);
98   DCHECK(operands_map_.empty());
99   sequence()->StartBlock(rpo);
100 }
101 
102 
EndBlock(RpoNumber rpo)103 void InstructionScheduler::EndBlock(RpoNumber rpo) {
104   if (FLAG_turbo_stress_instruction_scheduling) {
105     ScheduleBlock<StressSchedulerQueue>();
106   } else {
107     ScheduleBlock<CriticalPathFirstQueue>();
108   }
109   sequence()->EndBlock(rpo);
110   graph_.clear();
111   last_side_effect_instr_ = nullptr;
112   pending_loads_.clear();
113   last_live_in_reg_marker_ = nullptr;
114   last_deopt_or_trap_ = nullptr;
115   operands_map_.clear();
116 }
117 
AddTerminator(Instruction * instr)118 void InstructionScheduler::AddTerminator(Instruction* instr) {
119   ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
120   // Make sure that basic block terminators are not moved by adding them
121   // as successor of every instruction.
122   for (ScheduleGraphNode* node : graph_) {
123     node->AddSuccessor(new_node);
124   }
125   graph_.push_back(new_node);
126 }
127 
AddInstruction(Instruction * instr)128 void InstructionScheduler::AddInstruction(Instruction* instr) {
129   ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
130 
131   // We should not have branches in the middle of a block.
132   DCHECK_NE(instr->flags_mode(), kFlags_branch);
133   DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
134 
135   if (IsFixedRegisterParameter(instr)) {
136     if (last_live_in_reg_marker_ != nullptr) {
137       last_live_in_reg_marker_->AddSuccessor(new_node);
138     }
139     last_live_in_reg_marker_ = new_node;
140   } else {
141     if (last_live_in_reg_marker_ != nullptr) {
142       last_live_in_reg_marker_->AddSuccessor(new_node);
143     }
144 
145     // Make sure that instructions are not scheduled before the last
146     // deoptimization or trap point when they depend on it.
147     if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
148       last_deopt_or_trap_->AddSuccessor(new_node);
149     }
150 
151     // Instructions with side effects and memory operations can't be
152     // reordered with respect to each other.
153     if (HasSideEffect(instr)) {
154       if (last_side_effect_instr_ != nullptr) {
155         last_side_effect_instr_->AddSuccessor(new_node);
156       }
157       for (ScheduleGraphNode* load : pending_loads_) {
158         load->AddSuccessor(new_node);
159       }
160       pending_loads_.clear();
161       last_side_effect_instr_ = new_node;
162     } else if (IsLoadOperation(instr)) {
163       // Load operations can't be reordered with side effects instructions but
164       // independent loads can be reordered with respect to each other.
165       if (last_side_effect_instr_ != nullptr) {
166         last_side_effect_instr_->AddSuccessor(new_node);
167       }
168       pending_loads_.push_back(new_node);
169     } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
170       // Ensure that deopts or traps are not reordered with respect to
171       // side-effect instructions.
172       if (last_side_effect_instr_ != nullptr) {
173         last_side_effect_instr_->AddSuccessor(new_node);
174       }
175       last_deopt_or_trap_ = new_node;
176     }
177 
178     // Look for operand dependencies.
179     for (size_t i = 0; i < instr->InputCount(); ++i) {
180       const InstructionOperand* input = instr->InputAt(i);
181       if (input->IsUnallocated()) {
182         int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
183         auto it = operands_map_.find(vreg);
184         if (it != operands_map_.end()) {
185           it->second->AddSuccessor(new_node);
186         }
187       }
188     }
189 
190     // Record the virtual registers defined by this instruction.
191     for (size_t i = 0; i < instr->OutputCount(); ++i) {
192       const InstructionOperand* output = instr->OutputAt(i);
193       if (output->IsUnallocated()) {
194         operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
195             new_node;
196       } else if (output->IsConstant()) {
197         operands_map_[ConstantOperand::cast(output)->virtual_register()] =
198             new_node;
199       }
200     }
201   }
202 
203   graph_.push_back(new_node);
204 }
205 
206 
207 template <typename QueueType>
ScheduleBlock()208 void InstructionScheduler::ScheduleBlock() {
209   QueueType ready_list(this);
210 
211   // Compute total latencies so that we can schedule the critical path first.
212   ComputeTotalLatencies();
213 
214   // Add nodes which don't have dependencies to the ready list.
215   for (ScheduleGraphNode* node : graph_) {
216     if (!node->HasUnscheduledPredecessor()) {
217       ready_list.AddNode(node);
218     }
219   }
220 
221   // Go through the ready list and schedule the instructions.
222   int cycle = 0;
223   while (!ready_list.IsEmpty()) {
224     ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
225 
226     if (candidate != nullptr) {
227       sequence()->AddInstruction(candidate->instruction());
228 
229       for (ScheduleGraphNode* successor : candidate->successors()) {
230         successor->DropUnscheduledPredecessor();
231         successor->set_start_cycle(
232             std::max(successor->start_cycle(),
233                      cycle + candidate->latency()));
234 
235         if (!successor->HasUnscheduledPredecessor()) {
236           ready_list.AddNode(successor);
237         }
238       }
239     }
240 
241     cycle++;
242   }
243 }
244 
245 
GetInstructionFlags(const Instruction * instr) const246 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
247   switch (instr->arch_opcode()) {
248     case kArchNop:
249     case kArchFramePointer:
250     case kArchParentFramePointer:
251     case kArchStackSlot:  // Despite its name this opcode will produce a
252                           // reference to a frame slot, so it is not affected
253                           // by the arm64 dual stack issues mentioned below.
254     case kArchComment:
255     case kArchDeoptimize:
256     case kArchJmp:
257     case kArchBinarySearchSwitch:
258     case kArchLookupSwitch:
259     case kArchRet:
260     case kArchTableSwitch:
261     case kArchThrowTerminator:
262       return kNoOpcodeFlags;
263 
264     case kArchTruncateDoubleToI:
265     case kIeee754Float64Acos:
266     case kIeee754Float64Acosh:
267     case kIeee754Float64Asin:
268     case kIeee754Float64Asinh:
269     case kIeee754Float64Atan:
270     case kIeee754Float64Atanh:
271     case kIeee754Float64Atan2:
272     case kIeee754Float64Cbrt:
273     case kIeee754Float64Cos:
274     case kIeee754Float64Cosh:
275     case kIeee754Float64Exp:
276     case kIeee754Float64Expm1:
277     case kIeee754Float64Log:
278     case kIeee754Float64Log1p:
279     case kIeee754Float64Log10:
280     case kIeee754Float64Log2:
281     case kIeee754Float64Pow:
282     case kIeee754Float64Sin:
283     case kIeee754Float64Sinh:
284     case kIeee754Float64Tan:
285     case kIeee754Float64Tanh:
286       return kNoOpcodeFlags;
287 
288     case kArchStackPointer:
289       // ArchStackPointer instruction loads the current stack pointer value and
290       // must not be reordered with instruction with side effects.
291       return kIsLoadOperation;
292 
293     case kArchWordPoisonOnSpeculation:
294       // While poisoning operations have no side effect, they must not be
295       // reordered relative to branches.
296       return kHasSideEffect;
297 
298     case kArchPrepareCallCFunction:
299     case kArchSaveCallerRegisters:
300     case kArchRestoreCallerRegisters:
301     case kArchPrepareTailCall:
302     case kArchCallCFunction:
303     case kArchCallCodeObject:
304     case kArchCallJSFunction:
305     case kArchCallWasmFunction:
306     case kArchTailCallCodeObjectFromJSFunction:
307     case kArchTailCallCodeObject:
308     case kArchTailCallAddress:
309     case kArchTailCallWasm:
310     case kArchDebugAbort:
311     case kArchDebugBreak:
312       return kHasSideEffect;
313 
314     case kArchStoreWithWriteBarrier:
315       return kHasSideEffect;
316 
317     case kWord32AtomicLoadInt8:
318     case kWord32AtomicLoadUint8:
319     case kWord32AtomicLoadInt16:
320     case kWord32AtomicLoadUint16:
321     case kWord32AtomicLoadWord32:
322       return kIsLoadOperation;
323 
324     case kWord32AtomicStoreWord8:
325     case kWord32AtomicStoreWord16:
326     case kWord32AtomicStoreWord32:
327       return kHasSideEffect;
328 
329     case kWord32AtomicExchangeInt8:
330     case kWord32AtomicExchangeUint8:
331     case kWord32AtomicExchangeInt16:
332     case kWord32AtomicExchangeUint16:
333     case kWord32AtomicExchangeWord32:
334     case kWord32AtomicCompareExchangeInt8:
335     case kWord32AtomicCompareExchangeUint8:
336     case kWord32AtomicCompareExchangeInt16:
337     case kWord32AtomicCompareExchangeUint16:
338     case kWord32AtomicCompareExchangeWord32:
339     case kWord32AtomicAddInt8:
340     case kWord32AtomicAddUint8:
341     case kWord32AtomicAddInt16:
342     case kWord32AtomicAddUint16:
343     case kWord32AtomicAddWord32:
344     case kWord32AtomicSubInt8:
345     case kWord32AtomicSubUint8:
346     case kWord32AtomicSubInt16:
347     case kWord32AtomicSubUint16:
348     case kWord32AtomicSubWord32:
349     case kWord32AtomicAndInt8:
350     case kWord32AtomicAndUint8:
351     case kWord32AtomicAndInt16:
352     case kWord32AtomicAndUint16:
353     case kWord32AtomicAndWord32:
354     case kWord32AtomicOrInt8:
355     case kWord32AtomicOrUint8:
356     case kWord32AtomicOrInt16:
357     case kWord32AtomicOrUint16:
358     case kWord32AtomicOrWord32:
359     case kWord32AtomicXorInt8:
360     case kWord32AtomicXorUint8:
361     case kWord32AtomicXorInt16:
362     case kWord32AtomicXorUint16:
363     case kWord32AtomicXorWord32:
364       return kHasSideEffect;
365 
366 #define CASE(Name) case k##Name:
367     TARGET_ARCH_OPCODE_LIST(CASE)
368 #undef CASE
369       return GetTargetInstructionFlags(instr);
370   }
371 
372   UNREACHABLE();
373 }
374 
ComputeTotalLatencies()375 void InstructionScheduler::ComputeTotalLatencies() {
376   for (ScheduleGraphNode* node : base::Reversed(graph_)) {
377     int max_latency = 0;
378 
379     for (ScheduleGraphNode* successor : node->successors()) {
380       DCHECK_NE(-1, successor->total_latency());
381       if (successor->total_latency() > max_latency) {
382         max_latency = successor->total_latency();
383       }
384     }
385 
386     node->set_total_latency(max_latency + node->latency());
387   }
388 }
389 
390 }  // namespace compiler
391 }  // namespace internal
392 }  // namespace v8
393