1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/instruction-scheduler.h"
6 
7 #include "src/base/adapters.h"
8 #include "src/base/utils/random-number-generator.h"
9 
10 namespace v8 {
11 namespace internal {
12 namespace compiler {
13 
AddNode(ScheduleGraphNode * node)14 void InstructionScheduler::SchedulingQueueBase::AddNode(
15     ScheduleGraphNode* node) {
16   // We keep the ready list sorted by total latency so that we can quickly find
17   // the next best candidate to schedule.
18   auto it = nodes_.begin();
19   while ((it != nodes_.end()) &&
20          ((*it)->total_latency() >= node->total_latency())) {
21     ++it;
22   }
23   nodes_.insert(it, node);
24 }
25 
26 
27 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)28 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29   DCHECK(!IsEmpty());
30   auto candidate = nodes_.end();
31   for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32     // We only consider instructions that have all their operands ready.
33     if (cycle >= (*iterator)->start_cycle()) {
34       candidate = iterator;
35       break;
36     }
37   }
38 
39   if (candidate != nodes_.end()) {
40     ScheduleGraphNode *result = *candidate;
41     nodes_.erase(candidate);
42     return result;
43   }
44 
45   return nullptr;
46 }
47 
48 
49 InstructionScheduler::ScheduleGraphNode*
PopBestCandidate(int cycle)50 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
51   DCHECK(!IsEmpty());
52   // Choose a random element from the ready list.
53   auto candidate = nodes_.begin();
54   std::advance(candidate, isolate()->random_number_generator()->NextInt(
55       static_cast<int>(nodes_.size())));
56   ScheduleGraphNode *result = *candidate;
57   nodes_.erase(candidate);
58   return result;
59 }
60 
61 
ScheduleGraphNode(Zone * zone,Instruction * instr)62 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(
63     Zone* zone,
64     Instruction* instr)
65     : instr_(instr),
66       successors_(zone),
67       unscheduled_predecessors_count_(0),
68       latency_(GetInstructionLatency(instr)),
69       total_latency_(-1),
70       start_cycle_(-1) {
71 }
72 
73 
AddSuccessor(ScheduleGraphNode * node)74 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
75     ScheduleGraphNode* node) {
76   successors_.push_back(node);
77   node->unscheduled_predecessors_count_++;
78 }
79 
80 
InstructionScheduler(Zone * zone,InstructionSequence * sequence)81 InstructionScheduler::InstructionScheduler(Zone* zone,
82                                            InstructionSequence* sequence)
83     : zone_(zone),
84       sequence_(sequence),
85       graph_(zone),
86       last_side_effect_instr_(nullptr),
87       pending_loads_(zone),
88       last_live_in_reg_marker_(nullptr),
89       last_deopt_(nullptr),
90       operands_map_(zone) {}
91 
92 
StartBlock(RpoNumber rpo)93 void InstructionScheduler::StartBlock(RpoNumber rpo) {
94   DCHECK(graph_.empty());
95   DCHECK(last_side_effect_instr_ == nullptr);
96   DCHECK(pending_loads_.empty());
97   DCHECK(last_live_in_reg_marker_ == nullptr);
98   DCHECK(last_deopt_ == nullptr);
99   DCHECK(operands_map_.empty());
100   sequence()->StartBlock(rpo);
101 }
102 
103 
EndBlock(RpoNumber rpo)104 void InstructionScheduler::EndBlock(RpoNumber rpo) {
105   if (FLAG_turbo_stress_instruction_scheduling) {
106     ScheduleBlock<StressSchedulerQueue>();
107   } else {
108     ScheduleBlock<CriticalPathFirstQueue>();
109   }
110   sequence()->EndBlock(rpo);
111   graph_.clear();
112   last_side_effect_instr_ = nullptr;
113   pending_loads_.clear();
114   last_live_in_reg_marker_ = nullptr;
115   last_deopt_ = nullptr;
116   operands_map_.clear();
117 }
118 
119 
AddInstruction(Instruction * instr)120 void InstructionScheduler::AddInstruction(Instruction* instr) {
121   ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
122 
123   if (IsBlockTerminator(instr)) {
124     // Make sure that basic block terminators are not moved by adding them
125     // as successor of every instruction.
126     for (ScheduleGraphNode* node : graph_) {
127       node->AddSuccessor(new_node);
128     }
129   } else if (IsFixedRegisterParameter(instr)) {
130     if (last_live_in_reg_marker_ != nullptr) {
131       last_live_in_reg_marker_->AddSuccessor(new_node);
132     }
133     last_live_in_reg_marker_ = new_node;
134   } else {
135     if (last_live_in_reg_marker_ != nullptr) {
136       last_live_in_reg_marker_->AddSuccessor(new_node);
137     }
138 
139     // Make sure that instructions are not scheduled before the last
140     // deoptimization point when they depend on it.
141     if ((last_deopt_ != nullptr) && DependsOnDeoptimization(instr)) {
142       last_deopt_->AddSuccessor(new_node);
143     }
144 
145     // Instructions with side effects and memory operations can't be
146     // reordered with respect to each other.
147     if (HasSideEffect(instr)) {
148       if (last_side_effect_instr_ != nullptr) {
149         last_side_effect_instr_->AddSuccessor(new_node);
150       }
151       for (ScheduleGraphNode* load : pending_loads_) {
152         load->AddSuccessor(new_node);
153       }
154       pending_loads_.clear();
155       last_side_effect_instr_ = new_node;
156     } else if (IsLoadOperation(instr)) {
157       // Load operations can't be reordered with side effects instructions but
158       // independent loads can be reordered with respect to each other.
159       if (last_side_effect_instr_ != nullptr) {
160         last_side_effect_instr_->AddSuccessor(new_node);
161       }
162       pending_loads_.push_back(new_node);
163     } else if (instr->IsDeoptimizeCall()) {
164       // Ensure that deopts are not reordered with respect to side-effect
165       // instructions.
166       if (last_side_effect_instr_ != nullptr) {
167         last_side_effect_instr_->AddSuccessor(new_node);
168       }
169       last_deopt_ = new_node;
170     }
171 
172     // Look for operand dependencies.
173     for (size_t i = 0; i < instr->InputCount(); ++i) {
174       const InstructionOperand* input = instr->InputAt(i);
175       if (input->IsUnallocated()) {
176         int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
177         auto it = operands_map_.find(vreg);
178         if (it != operands_map_.end()) {
179           it->second->AddSuccessor(new_node);
180         }
181       }
182     }
183 
184     // Record the virtual registers defined by this instruction.
185     for (size_t i = 0; i < instr->OutputCount(); ++i) {
186       const InstructionOperand* output = instr->OutputAt(i);
187       if (output->IsUnallocated()) {
188         operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
189             new_node;
190       } else if (output->IsConstant()) {
191         operands_map_[ConstantOperand::cast(output)->virtual_register()] =
192             new_node;
193       }
194     }
195   }
196 
197   graph_.push_back(new_node);
198 }
199 
200 
201 template <typename QueueType>
ScheduleBlock()202 void InstructionScheduler::ScheduleBlock() {
203   QueueType ready_list(this);
204 
205   // Compute total latencies so that we can schedule the critical path first.
206   ComputeTotalLatencies();
207 
208   // Add nodes which don't have dependencies to the ready list.
209   for (ScheduleGraphNode* node : graph_) {
210     if (!node->HasUnscheduledPredecessor()) {
211       ready_list.AddNode(node);
212     }
213   }
214 
215   // Go through the ready list and schedule the instructions.
216   int cycle = 0;
217   while (!ready_list.IsEmpty()) {
218     ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
219 
220     if (candidate != nullptr) {
221       sequence()->AddInstruction(candidate->instruction());
222 
223       for (ScheduleGraphNode* successor : candidate->successors()) {
224         successor->DropUnscheduledPredecessor();
225         successor->set_start_cycle(
226             std::max(successor->start_cycle(),
227                      cycle + candidate->latency()));
228 
229         if (!successor->HasUnscheduledPredecessor()) {
230           ready_list.AddNode(successor);
231         }
232       }
233     }
234 
235     cycle++;
236   }
237 }
238 
239 
GetInstructionFlags(const Instruction * instr) const240 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
241   switch (instr->arch_opcode()) {
242     case kArchNop:
243     case kArchFramePointer:
244     case kArchParentFramePointer:
245     case kArchTruncateDoubleToI:
246     case kArchStackSlot:
247     case kArchDebugBreak:
248     case kArchComment:
249     case kIeee754Float64Acos:
250     case kIeee754Float64Acosh:
251     case kIeee754Float64Asin:
252     case kIeee754Float64Asinh:
253     case kIeee754Float64Atan:
254     case kIeee754Float64Atanh:
255     case kIeee754Float64Atan2:
256     case kIeee754Float64Cbrt:
257     case kIeee754Float64Cos:
258     case kIeee754Float64Cosh:
259     case kIeee754Float64Exp:
260     case kIeee754Float64Expm1:
261     case kIeee754Float64Log:
262     case kIeee754Float64Log1p:
263     case kIeee754Float64Log10:
264     case kIeee754Float64Log2:
265     case kIeee754Float64Pow:
266     case kIeee754Float64Sin:
267     case kIeee754Float64Sinh:
268     case kIeee754Float64Tan:
269     case kIeee754Float64Tanh:
270       return kNoOpcodeFlags;
271 
272     case kArchStackPointer:
273       // ArchStackPointer instruction loads the current stack pointer value and
274       // must not be reordered with instruction with side effects.
275       return kIsLoadOperation;
276 
277     case kArchPrepareCallCFunction:
278     case kArchPrepareTailCall:
279     case kArchCallCFunction:
280     case kArchCallCodeObject:
281     case kArchCallJSFunction:
282       return kHasSideEffect;
283 
284     case kArchTailCallCodeObjectFromJSFunction:
285     case kArchTailCallCodeObject:
286     case kArchTailCallJSFunctionFromJSFunction:
287     case kArchTailCallAddress:
288       return kHasSideEffect | kIsBlockTerminator;
289 
290     case kArchDeoptimize:
291     case kArchJmp:
292     case kArchLookupSwitch:
293     case kArchTableSwitch:
294     case kArchRet:
295     case kArchThrowTerminator:
296       return kIsBlockTerminator;
297 
298     case kCheckedLoadInt8:
299     case kCheckedLoadUint8:
300     case kCheckedLoadInt16:
301     case kCheckedLoadUint16:
302     case kCheckedLoadWord32:
303     case kCheckedLoadWord64:
304     case kCheckedLoadFloat32:
305     case kCheckedLoadFloat64:
306       return kIsLoadOperation;
307 
308     case kCheckedStoreWord8:
309     case kCheckedStoreWord16:
310     case kCheckedStoreWord32:
311     case kCheckedStoreWord64:
312     case kCheckedStoreFloat32:
313     case kCheckedStoreFloat64:
314     case kArchStoreWithWriteBarrier:
315       return kHasSideEffect;
316 
317     case kAtomicLoadInt8:
318     case kAtomicLoadUint8:
319     case kAtomicLoadInt16:
320     case kAtomicLoadUint16:
321     case kAtomicLoadWord32:
322       return kIsLoadOperation;
323 
324     case kAtomicStoreWord8:
325     case kAtomicStoreWord16:
326     case kAtomicStoreWord32:
327       return kHasSideEffect;
328 
329 #define CASE(Name) case k##Name:
330     TARGET_ARCH_OPCODE_LIST(CASE)
331 #undef CASE
332       return GetTargetInstructionFlags(instr);
333   }
334 
335   UNREACHABLE();
336   return kNoOpcodeFlags;
337 }
338 
339 
IsBlockTerminator(const Instruction * instr) const340 bool InstructionScheduler::IsBlockTerminator(const Instruction* instr) const {
341   return ((GetInstructionFlags(instr) & kIsBlockTerminator) ||
342           (instr->flags_mode() == kFlags_branch));
343 }
344 
345 
ComputeTotalLatencies()346 void InstructionScheduler::ComputeTotalLatencies() {
347   for (ScheduleGraphNode* node : base::Reversed(graph_)) {
348     int max_latency = 0;
349 
350     for (ScheduleGraphNode* successor : node->successors()) {
351       DCHECK(successor->total_latency() != -1);
352       if (successor->total_latency() > max_latency) {
353         max_latency = successor->total_latency();
354       }
355     }
356 
357     node->set_total_latency(max_latency + node->latency());
358   }
359 }
360 
361 }  // namespace compiler
362 }  // namespace internal
363 }  // namespace v8
364