1 // Copyright (c) 2018 Google LLC.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <algorithm>
16 #include <functional>
17 #include <memory>
18 #include <unordered_map>
19 #include <unordered_set>
20 #include <vector>
21
22 #include "source/opt/ir_builder.h"
23 #include "source/opt/ir_context.h"
24 #include "source/opt/loop_descriptor.h"
25 #include "source/opt/loop_peeling.h"
26 #include "source/opt/loop_utils.h"
27 #include "source/opt/scalar_analysis.h"
28 #include "source/opt/scalar_analysis_nodes.h"
29
30 namespace spvtools {
31 namespace opt {
32 size_t LoopPeelingPass::code_grow_threshold_ = 1000;
33
DuplicateAndConnectLoop(LoopUtils::LoopCloningResult * clone_results)34 void LoopPeeling::DuplicateAndConnectLoop(
35 LoopUtils::LoopCloningResult* clone_results) {
36 CFG& cfg = *context_->cfg();
37 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
38
39 assert(CanPeelLoop() && "Cannot peel loop!");
40
41 std::vector<BasicBlock*> ordered_loop_blocks;
42 // TODO(1841): Handle failure to create pre-header.
43 BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock();
44
45 loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
46
47 cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks);
48
49 // Add the basic block to the function.
50 Function::iterator it =
51 loop_utils_.GetFunction()->FindBlock(pre_header->id());
52 assert(it != loop_utils_.GetFunction()->end() &&
53 "Pre-header not found in the function.");
54 loop_utils_.GetFunction()->AddBasicBlocks(
55 clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it);
56
57 // Make the |loop_|'s preheader the |cloned_loop_| one.
58 BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock();
59 pre_header->ForEachSuccessorLabel(
60 [cloned_header](uint32_t* succ) { *succ = cloned_header->id(); });
61
62 // Update cfg.
63 cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id());
64 cloned_loop_->SetPreHeaderBlock(pre_header);
65 loop_->SetPreHeaderBlock(nullptr);
66
67 // When cloning the loop, we didn't cloned the merge block, so currently
68 // |cloned_loop_| shares the same block as |loop_|.
69 // We mutate all branches from |cloned_loop_| block to |loop_|'s merge into a
70 // branch to |loop_|'s header (so header will also be the merge of
71 // |cloned_loop_|).
72 uint32_t cloned_loop_exit = 0;
73 for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) {
74 if (loop_->IsInsideLoop(pred_id)) continue;
75 BasicBlock* bb = cfg.block(pred_id);
76 assert(cloned_loop_exit == 0 && "The loop has multiple exits.");
77 cloned_loop_exit = bb->id();
78 bb->ForEachSuccessorLabel([this](uint32_t* succ) {
79 if (*succ == loop_->GetMergeBlock()->id())
80 *succ = loop_->GetHeaderBlock()->id();
81 });
82 }
83
84 // Update cfg.
85 cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id());
86 cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id());
87
88 // Patch the phi of the original loop header:
89 // - Set the loop entry branch to come from the cloned loop exit block;
90 // - Set the initial value of the phi using the corresponding cloned loop
91 // exit values.
92 //
93 // We patch the iterating value initializers of the original loop using the
94 // corresponding cloned loop exit values. Connects the cloned loop iterating
95 // values to the original loop. This make sure that the initial value of the
96 // second loop starts with the last value of the first loop.
97 //
98 // For example, loops like:
99 //
100 // int z = 0;
101 // for (int i = 0; i++ < M; i += cst1) {
102 // if (cond)
103 // z += cst2;
104 // }
105 //
106 // Will become:
107 //
108 // int z = 0;
109 // int i = 0;
110 // for (; i++ < M; i += cst1) {
111 // if (cond)
112 // z += cst2;
113 // }
114 // for (; i++ < M; i += cst1) {
115 // if (cond)
116 // z += cst2;
117 // }
118 loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr,
119 clone_results,
120 this](Instruction* phi) {
121 for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
122 if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + 1))) {
123 phi->SetInOperand(i,
124 {clone_results->value_map_.at(
125 exit_value_.at(phi->result_id())->result_id())});
126 phi->SetInOperand(i + 1, {cloned_loop_exit});
127 def_use_mgr->AnalyzeInstUse(phi);
128 return;
129 }
130 }
131 });
132
133 // Force the creation of a new preheader for the original loop and set it as
134 // the merge block for the cloned loop.
135 // TODO(1841): Handle failure to create pre-header.
136 cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock());
137 }
138
InsertCanonicalInductionVariable(LoopUtils::LoopCloningResult * clone_results)139 void LoopPeeling::InsertCanonicalInductionVariable(
140 LoopUtils::LoopCloningResult* clone_results) {
141 if (original_loop_canonical_induction_variable_) {
142 canonical_induction_variable_ =
143 context_->get_def_use_mgr()->GetDef(clone_results->value_map_.at(
144 original_loop_canonical_induction_variable_->result_id()));
145 return;
146 }
147
148 BasicBlock::iterator insert_point = GetClonedLoop()->GetLatchBlock()->tail();
149 if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) {
150 --insert_point;
151 }
152 InstructionBuilder builder(
153 context_, &*insert_point,
154 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
155 Instruction* uint_1_cst =
156 builder.GetIntConstant<uint32_t>(1, int_type_->IsSigned());
157 // Create the increment.
158 // Note that we do "1 + 1" here, one of the operand should the phi
159 // value but we don't have it yet. The operand will be set latter.
160 Instruction* iv_inc = builder.AddIAdd(
161 uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id());
162
163 builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin());
164
165 canonical_induction_variable_ = builder.AddPhi(
166 uint_1_cst->type_id(),
167 {builder.GetIntConstant<uint32_t>(0, int_type_->IsSigned())->result_id(),
168 GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(),
169 GetClonedLoop()->GetLatchBlock()->id()});
170 // Connect everything.
171 iv_inc->SetInOperand(0, {canonical_induction_variable_->result_id()});
172
173 // Update def/use manager.
174 context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc);
175
176 // If do-while form, use the incremented value.
177 if (do_while_form_) {
178 canonical_induction_variable_ = iv_inc;
179 }
180 }
181
GetIteratorUpdateOperations(const Loop * loop,Instruction * iterator,std::unordered_set<Instruction * > * operations)182 void LoopPeeling::GetIteratorUpdateOperations(
183 const Loop* loop, Instruction* iterator,
184 std::unordered_set<Instruction*>* operations) {
185 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
186 operations->insert(iterator);
187 iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) {
188 Instruction* insn = def_use_mgr->GetDef(*id);
189 if (insn->opcode() == SpvOpLabel) {
190 return;
191 }
192 if (operations->count(insn)) {
193 return;
194 }
195 if (!loop->IsInsideLoop(insn)) {
196 return;
197 }
198 GetIteratorUpdateOperations(loop, insn, operations);
199 });
200 }
201
202 // Gather the set of blocks for all the path from |entry| to |root|.
GetBlocksInPath(uint32_t block,uint32_t entry,std::unordered_set<uint32_t> * blocks_in_path,const CFG & cfg)203 static void GetBlocksInPath(uint32_t block, uint32_t entry,
204 std::unordered_set<uint32_t>* blocks_in_path,
205 const CFG& cfg) {
206 for (uint32_t pid : cfg.preds(block)) {
207 if (blocks_in_path->insert(pid).second) {
208 if (pid != entry) {
209 GetBlocksInPath(pid, entry, blocks_in_path, cfg);
210 }
211 }
212 }
213 }
214
IsConditionCheckSideEffectFree() const215 bool LoopPeeling::IsConditionCheckSideEffectFree() const {
216 CFG& cfg = *context_->cfg();
217
218 // The "do-while" form does not cause issues, the algorithm takes into account
219 // the first iteration.
220 if (!do_while_form_) {
221 uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
222
223 std::unordered_set<uint32_t> blocks_in_path;
224
225 blocks_in_path.insert(condition_block_id);
226 GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(),
227 &blocks_in_path, cfg);
228
229 for (uint32_t bb_id : blocks_in_path) {
230 BasicBlock* bb = cfg.block(bb_id);
231 if (!bb->WhileEachInst([this](Instruction* insn) {
232 if (insn->IsBranch()) return true;
233 switch (insn->opcode()) {
234 case SpvOpLabel:
235 case SpvOpSelectionMerge:
236 case SpvOpLoopMerge:
237 return true;
238 default:
239 break;
240 }
241 return context_->IsCombinatorInstruction(insn);
242 })) {
243 return false;
244 }
245 }
246 }
247
248 return true;
249 }
250
GetIteratingExitValues()251 void LoopPeeling::GetIteratingExitValues() {
252 CFG& cfg = *context_->cfg();
253
254 loop_->GetHeaderBlock()->ForEachPhiInst(
255 [this](Instruction* phi) { exit_value_[phi->result_id()] = nullptr; });
256
257 if (!loop_->GetMergeBlock()) {
258 return;
259 }
260 if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
261 return;
262 }
263 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
264
265 uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
266
267 auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id());
268 do_while_form_ = std::find(header_pred.begin(), header_pred.end(),
269 condition_block_id) != header_pred.end();
270 if (do_while_form_) {
271 loop_->GetHeaderBlock()->ForEachPhiInst(
272 [condition_block_id, def_use_mgr, this](Instruction* phi) {
273 std::unordered_set<Instruction*> operations;
274
275 for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
276 if (condition_block_id == phi->GetSingleWordInOperand(i + 1)) {
277 exit_value_[phi->result_id()] =
278 def_use_mgr->GetDef(phi->GetSingleWordInOperand(i));
279 }
280 }
281 });
282 } else {
283 DominatorTree* dom_tree =
284 &context_->GetDominatorAnalysis(loop_utils_.GetFunction())
285 ->GetDomTree();
286 BasicBlock* condition_block = cfg.block(condition_block_id);
287
288 loop_->GetHeaderBlock()->ForEachPhiInst(
289 [dom_tree, condition_block, this](Instruction* phi) {
290 std::unordered_set<Instruction*> operations;
291
292 // Not the back-edge value, check if the phi instruction is the only
293 // possible candidate.
294 GetIteratorUpdateOperations(loop_, phi, &operations);
295
296 for (Instruction* insn : operations) {
297 if (insn == phi) {
298 continue;
299 }
300 if (dom_tree->Dominates(context_->get_instr_block(insn),
301 condition_block)) {
302 return;
303 }
304 }
305 exit_value_[phi->result_id()] = phi;
306 });
307 }
308 }
309
FixExitCondition(const std::function<uint32_t (Instruction *)> & condition_builder)310 void LoopPeeling::FixExitCondition(
311 const std::function<uint32_t(Instruction*)>& condition_builder) {
312 CFG& cfg = *context_->cfg();
313
314 uint32_t condition_block_id = 0;
315 for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) {
316 if (GetClonedLoop()->IsInsideLoop(id)) {
317 condition_block_id = id;
318 break;
319 }
320 }
321 assert(condition_block_id != 0 && "2nd loop in improperly connected");
322
323 BasicBlock* condition_block = cfg.block(condition_block_id);
324 Instruction* exit_condition = condition_block->terminator();
325 assert(exit_condition->opcode() == SpvOpBranchConditional);
326 BasicBlock::iterator insert_point = condition_block->tail();
327 if (condition_block->GetMergeInst()) {
328 --insert_point;
329 }
330
331 exit_condition->SetInOperand(0, {condition_builder(&*insert_point)});
332
333 uint32_t to_continue_block_idx =
334 GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(1))
335 ? 1
336 : 2;
337 exit_condition->SetInOperand(
338 1, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)});
339 exit_condition->SetInOperand(2, {GetClonedLoop()->GetMergeBlock()->id()});
340
341 // Update def/use manager.
342 context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition);
343 }
344
CreateBlockBefore(BasicBlock * bb)345 BasicBlock* LoopPeeling::CreateBlockBefore(BasicBlock* bb) {
346 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
347 CFG& cfg = *context_->cfg();
348 assert(cfg.preds(bb->id()).size() == 1 && "More than one predecessor");
349
350 // TODO(1841): Handle id overflow.
351 std::unique_ptr<BasicBlock> new_bb =
352 MakeUnique<BasicBlock>(std::unique_ptr<Instruction>(new Instruction(
353 context_, SpvOpLabel, 0, context_->TakeNextId(), {})));
354 new_bb->SetParent(loop_utils_.GetFunction());
355 // Update the loop descriptor.
356 Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb];
357 if (in_loop) {
358 in_loop->AddBasicBlock(new_bb.get());
359 loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb->id(), in_loop);
360 }
361
362 context_->set_instr_block(new_bb->GetLabelInst(), new_bb.get());
363 def_use_mgr->AnalyzeInstDefUse(new_bb->GetLabelInst());
364
365 BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[0]);
366 bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) {
367 if (*id == bb->id()) {
368 *id = new_bb->id();
369 }
370 });
371 cfg.RemoveEdge(bb_pred->id(), bb->id());
372 cfg.AddEdge(bb_pred->id(), new_bb->id());
373 def_use_mgr->AnalyzeInstUse(&*bb_pred->tail());
374
375 // Update the incoming branch.
376 bb->ForEachPhiInst([&new_bb, def_use_mgr](Instruction* phi) {
377 phi->SetInOperand(1, {new_bb->id()});
378 def_use_mgr->AnalyzeInstUse(phi);
379 });
380 InstructionBuilder(
381 context_, new_bb.get(),
382 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
383 .AddBranch(bb->id());
384 cfg.RegisterBlock(new_bb.get());
385
386 // Add the basic block to the function.
387 Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id());
388 assert(it != loop_utils_.GetFunction()->end() &&
389 "Basic block not found in the function.");
390 BasicBlock* ret = new_bb.get();
391 loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it);
392 return ret;
393 }
394
ProtectLoop(Loop * loop,Instruction * condition,BasicBlock * if_merge)395 BasicBlock* LoopPeeling::ProtectLoop(Loop* loop, Instruction* condition,
396 BasicBlock* if_merge) {
397 // TODO(1841): Handle failure to create pre-header.
398 BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock();
399 // Will no longer be a pre-header because of the if.
400 loop->SetPreHeaderBlock(nullptr);
401 // Kill the branch to the header.
402 context_->KillInst(&*if_block->tail());
403
404 InstructionBuilder builder(
405 context_, if_block,
406 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
407 builder.AddConditionalBranch(condition->result_id(),
408 loop->GetHeaderBlock()->id(), if_merge->id(),
409 if_merge->id());
410
411 return if_block;
412 }
413
PeelBefore(uint32_t peel_factor)414 void LoopPeeling::PeelBefore(uint32_t peel_factor) {
415 assert(CanPeelLoop() && "Cannot peel loop");
416 LoopUtils::LoopCloningResult clone_results;
417
418 // Clone the loop and insert the cloned one before the loop.
419 DuplicateAndConnectLoop(&clone_results);
420
421 // Add a canonical induction variable "canonical_induction_variable_".
422 InsertCanonicalInductionVariable(&clone_results);
423
424 InstructionBuilder builder(
425 context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
426 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
427 Instruction* factor =
428 builder.GetIntConstant(peel_factor, int_type_->IsSigned());
429
430 Instruction* has_remaining_iteration = builder.AddLessThan(
431 factor->result_id(), loop_iteration_count_->result_id());
432 Instruction* max_iteration = builder.AddSelect(
433 factor->type_id(), has_remaining_iteration->result_id(),
434 factor->result_id(), loop_iteration_count_->result_id());
435
436 // Change the exit condition of the cloned loop to be (exit when become
437 // false):
438 // "canonical_induction_variable_" < min("factor", "loop_iteration_count_")
439 FixExitCondition([max_iteration, this](Instruction* insert_before_point) {
440 return InstructionBuilder(context_, insert_before_point,
441 IRContext::kAnalysisDefUse |
442 IRContext::kAnalysisInstrToBlockMapping)
443 .AddLessThan(canonical_induction_variable_->result_id(),
444 max_iteration->result_id())
445 ->result_id();
446 });
447
448 // "Protect" the second loop: the second loop can only be executed if
449 // |has_remaining_iteration| is true (i.e. factor < loop_iteration_count_).
450 BasicBlock* if_merge_block = loop_->GetMergeBlock();
451 loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock()));
452 // Prevent the second loop from being executed if we already executed all the
453 // required iterations.
454 BasicBlock* if_block =
455 ProtectLoop(loop_, has_remaining_iteration, if_merge_block);
456 // Patch the phi of the merge block.
457 if_merge_block->ForEachPhiInst(
458 [&clone_results, if_block, this](Instruction* phi) {
459 // if_merge_block had previously only 1 predecessor.
460 uint32_t incoming_value = phi->GetSingleWordInOperand(0);
461 auto def_in_loop = clone_results.value_map_.find(incoming_value);
462 if (def_in_loop != clone_results.value_map_.end())
463 incoming_value = def_in_loop->second;
464 phi->AddOperand(
465 {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}});
466 phi->AddOperand(
467 {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}});
468 context_->get_def_use_mgr()->AnalyzeInstUse(phi);
469 });
470
471 context_->InvalidateAnalysesExceptFor(
472 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
473 IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
474 }
475
PeelAfter(uint32_t peel_factor)476 void LoopPeeling::PeelAfter(uint32_t peel_factor) {
477 assert(CanPeelLoop() && "Cannot peel loop");
478 LoopUtils::LoopCloningResult clone_results;
479
480 // Clone the loop and insert the cloned one before the loop.
481 DuplicateAndConnectLoop(&clone_results);
482
483 // Add a canonical induction variable "canonical_induction_variable_".
484 InsertCanonicalInductionVariable(&clone_results);
485
486 InstructionBuilder builder(
487 context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
488 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
489 Instruction* factor =
490 builder.GetIntConstant(peel_factor, int_type_->IsSigned());
491
492 Instruction* has_remaining_iteration = builder.AddLessThan(
493 factor->result_id(), loop_iteration_count_->result_id());
494
495 // Change the exit condition of the cloned loop to be (exit when become
496 // false):
497 // "canonical_induction_variable_" + "factor" < "loop_iteration_count_"
498 FixExitCondition([factor, this](Instruction* insert_before_point) {
499 InstructionBuilder cond_builder(
500 context_, insert_before_point,
501 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
502 // Build the following check: canonical_induction_variable_ + factor <
503 // iteration_count
504 return cond_builder
505 .AddLessThan(cond_builder
506 .AddIAdd(canonical_induction_variable_->type_id(),
507 canonical_induction_variable_->result_id(),
508 factor->result_id())
509 ->result_id(),
510 loop_iteration_count_->result_id())
511 ->result_id();
512 });
513
514 // "Protect" the first loop: the first loop can only be executed if
515 // factor < loop_iteration_count_.
516
517 // The original loop's pre-header was the cloned loop merge block.
518 GetClonedLoop()->SetMergeBlock(
519 CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock()));
520 // Use the second loop preheader as if merge block.
521
522 // Prevent the first loop if only the peeled loop needs it.
523 BasicBlock* if_block = ProtectLoop(cloned_loop_, has_remaining_iteration,
524 GetOriginalLoop()->GetPreHeaderBlock());
525
526 // Patch the phi of the header block.
527 // We added an if to enclose the first loop and because the phi node are
528 // connected to the exit value of the first loop, the definition no longer
529 // dominate the preheader.
530 // We had to the preheader (our if merge block) the required phi instruction
531 // and patch the header phi.
532 GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst(
533 [&clone_results, if_block, this](Instruction* phi) {
534 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
535
536 auto find_value_idx = [](Instruction* phi_inst, Loop* loop) {
537 uint32_t preheader_value_idx =
538 !loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(1)) ? 0 : 2;
539 return preheader_value_idx;
540 };
541
542 Instruction* cloned_phi =
543 def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id()));
544 uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand(
545 find_value_idx(cloned_phi, GetClonedLoop()));
546
547 Instruction* new_phi =
548 InstructionBuilder(context_,
549 &*GetOriginalLoop()->GetPreHeaderBlock()->tail(),
550 IRContext::kAnalysisDefUse |
551 IRContext::kAnalysisInstrToBlockMapping)
552 .AddPhi(phi->type_id(),
553 {phi->GetSingleWordInOperand(
554 find_value_idx(phi, GetOriginalLoop())),
555 GetClonedLoop()->GetMergeBlock()->id(),
556 cloned_preheader_value, if_block->id()});
557
558 phi->SetInOperand(find_value_idx(phi, GetOriginalLoop()),
559 {new_phi->result_id()});
560 def_use_mgr->AnalyzeInstUse(phi);
561 });
562
563 context_->InvalidateAnalysesExceptFor(
564 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
565 IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
566 }
567
Process()568 Pass::Status LoopPeelingPass::Process() {
569 bool modified = false;
570 Module* module = context()->module();
571
572 // Process each function in the module
573 for (Function& f : *module) {
574 modified |= ProcessFunction(&f);
575 }
576
577 return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
578 }
579
ProcessFunction(Function * f)580 bool LoopPeelingPass::ProcessFunction(Function* f) {
581 bool modified = false;
582 LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f);
583
584 std::vector<Loop*> to_process_loop;
585 to_process_loop.reserve(loop_descriptor.NumLoops());
586 for (Loop& l : loop_descriptor) {
587 to_process_loop.push_back(&l);
588 }
589
590 ScalarEvolutionAnalysis scev_analysis(context());
591
592 for (Loop* loop : to_process_loop) {
593 CodeMetrics loop_size;
594 loop_size.Analyze(*loop);
595
596 auto try_peel = [&loop_size, &modified, this](Loop* loop_to_peel) -> Loop* {
597 if (!loop_to_peel->IsLCSSA()) {
598 LoopUtils(context(), loop_to_peel).MakeLoopClosedSSA();
599 }
600
601 bool peeled_loop;
602 Loop* still_peelable_loop;
603 std::tie(peeled_loop, still_peelable_loop) =
604 ProcessLoop(loop_to_peel, &loop_size);
605
606 if (peeled_loop) {
607 modified = true;
608 }
609
610 return still_peelable_loop;
611 };
612
613 Loop* still_peelable_loop = try_peel(loop);
614 // The pass is working out the maximum factor by which a loop can be peeled.
615 // If the loop can potentially be peeled again, then there is only one
616 // possible direction, so only one call is still needed.
617 if (still_peelable_loop) {
618 try_peel(loop);
619 }
620 }
621
622 return modified;
623 }
624
ProcessLoop(Loop * loop,CodeMetrics * loop_size)625 std::pair<bool, Loop*> LoopPeelingPass::ProcessLoop(Loop* loop,
626 CodeMetrics* loop_size) {
627 ScalarEvolutionAnalysis* scev_analysis =
628 context()->GetScalarEvolutionAnalysis();
629 // Default values for bailing out.
630 std::pair<bool, Loop*> bail_out{false, nullptr};
631
632 BasicBlock* exit_block = loop->FindConditionBlock();
633 if (!exit_block) {
634 return bail_out;
635 }
636
637 Instruction* exiting_iv = loop->FindConditionVariable(exit_block);
638 if (!exiting_iv) {
639 return bail_out;
640 }
641 size_t iterations = 0;
642 if (!loop->FindNumberOfIterations(exiting_iv, &*exit_block->tail(),
643 &iterations)) {
644 return bail_out;
645 }
646 if (!iterations) {
647 return bail_out;
648 }
649
650 Instruction* canonical_induction_variable = nullptr;
651
652 loop->GetHeaderBlock()->WhileEachPhiInst([&canonical_induction_variable,
653 scev_analysis,
654 this](Instruction* insn) {
655 if (const SERecurrentNode* iv =
656 scev_analysis->AnalyzeInstruction(insn)->AsSERecurrentNode()) {
657 const SEConstantNode* offset = iv->GetOffset()->AsSEConstantNode();
658 const SEConstantNode* coeff = iv->GetCoefficient()->AsSEConstantNode();
659 if (offset && coeff && offset->FoldToSingleValue() == 0 &&
660 coeff->FoldToSingleValue() == 1) {
661 if (context()->get_type_mgr()->GetType(insn->type_id())->AsInteger()) {
662 canonical_induction_variable = insn;
663 return false;
664 }
665 }
666 }
667 return true;
668 });
669
670 bool is_signed = canonical_induction_variable
671 ? context()
672 ->get_type_mgr()
673 ->GetType(canonical_induction_variable->type_id())
674 ->AsInteger()
675 ->IsSigned()
676 : false;
677
678 LoopPeeling peeler(
679 loop,
680 InstructionBuilder(
681 context(), loop->GetHeaderBlock(),
682 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
683 .GetIntConstant<uint32_t>(static_cast<uint32_t>(iterations),
684 is_signed),
685 canonical_induction_variable);
686
687 if (!peeler.CanPeelLoop()) {
688 return bail_out;
689 }
690
691 // For each basic block in the loop, check if it can be peeled. If it
692 // can, get the direction (before/after) and by which factor.
693 LoopPeelingInfo peel_info(loop, iterations, scev_analysis);
694
695 uint32_t peel_before_factor = 0;
696 uint32_t peel_after_factor = 0;
697
698 for (uint32_t block : loop->GetBlocks()) {
699 if (block == exit_block->id()) {
700 continue;
701 }
702 BasicBlock* bb = cfg()->block(block);
703 PeelDirection direction;
704 uint32_t factor;
705 std::tie(direction, factor) = peel_info.GetPeelingInfo(bb);
706
707 if (direction == PeelDirection::kNone) {
708 continue;
709 }
710 if (direction == PeelDirection::kBefore) {
711 peel_before_factor = std::max(peel_before_factor, factor);
712 } else {
713 assert(direction == PeelDirection::kAfter);
714 peel_after_factor = std::max(peel_after_factor, factor);
715 }
716 }
717 PeelDirection direction = PeelDirection::kNone;
718 uint32_t factor = 0;
719
720 // Find which direction we should peel.
721 if (peel_before_factor) {
722 factor = peel_before_factor;
723 direction = PeelDirection::kBefore;
724 }
725 if (peel_after_factor) {
726 if (peel_before_factor < peel_after_factor) {
727 // Favor a peel after here and give the peel before another shot later.
728 factor = peel_after_factor;
729 direction = PeelDirection::kAfter;
730 }
731 }
732
733 // Do the peel if we can.
734 if (direction == PeelDirection::kNone) return bail_out;
735
736 // This does not take into account branch elimination opportunities and
737 // the unrolling. It assumes the peeled loop will be unrolled as well.
738 if (factor * loop_size->roi_size_ > code_grow_threshold_) {
739 return bail_out;
740 }
741 loop_size->roi_size_ *= factor;
742
743 // Find if a loop should be peeled again.
744 Loop* extra_opportunity = nullptr;
745
746 if (direction == PeelDirection::kBefore) {
747 peeler.PeelBefore(factor);
748 if (stats_) {
749 stats_->peeled_loops_.emplace_back(loop, PeelDirection::kBefore, factor);
750 }
751 if (peel_after_factor) {
752 // We could have peeled after, give it another try.
753 extra_opportunity = peeler.GetOriginalLoop();
754 }
755 } else {
756 peeler.PeelAfter(factor);
757 if (stats_) {
758 stats_->peeled_loops_.emplace_back(loop, PeelDirection::kAfter, factor);
759 }
760 if (peel_before_factor) {
761 // We could have peeled before, give it another try.
762 extra_opportunity = peeler.GetClonedLoop();
763 }
764 }
765
766 return {true, extra_opportunity};
767 }
768
GetFirstLoopInvariantOperand(Instruction * condition) const769 uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstLoopInvariantOperand(
770 Instruction* condition) const {
771 for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
772 BasicBlock* bb =
773 context_->get_instr_block(condition->GetSingleWordInOperand(i));
774 if (bb && loop_->IsInsideLoop(bb)) {
775 return condition->GetSingleWordInOperand(i);
776 }
777 }
778
779 return 0;
780 }
781
GetFirstNonLoopInvariantOperand(Instruction * condition) const782 uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstNonLoopInvariantOperand(
783 Instruction* condition) const {
784 for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
785 BasicBlock* bb =
786 context_->get_instr_block(condition->GetSingleWordInOperand(i));
787 if (!bb || !loop_->IsInsideLoop(bb)) {
788 return condition->GetSingleWordInOperand(i);
789 }
790 }
791
792 return 0;
793 }
794
IsHandledCondition(SpvOp opcode)795 static bool IsHandledCondition(SpvOp opcode) {
796 switch (opcode) {
797 case SpvOpIEqual:
798 case SpvOpINotEqual:
799 case SpvOpUGreaterThan:
800 case SpvOpSGreaterThan:
801 case SpvOpUGreaterThanEqual:
802 case SpvOpSGreaterThanEqual:
803 case SpvOpULessThan:
804 case SpvOpSLessThan:
805 case SpvOpULessThanEqual:
806 case SpvOpSLessThanEqual:
807 return true;
808 default:
809 return false;
810 }
811 }
812
813 LoopPeelingPass::LoopPeelingInfo::Direction
GetPeelingInfo(BasicBlock * bb) const814 LoopPeelingPass::LoopPeelingInfo::GetPeelingInfo(BasicBlock* bb) const {
815 if (bb->terminator()->opcode() != SpvOpBranchConditional) {
816 return GetNoneDirection();
817 }
818
819 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
820
821 Instruction* condition =
822 def_use_mgr->GetDef(bb->terminator()->GetSingleWordInOperand(0));
823
824 if (!IsHandledCondition(condition->opcode())) {
825 return GetNoneDirection();
826 }
827
828 if (!GetFirstLoopInvariantOperand(condition)) {
829 // No loop invariant, it cannot be peeled by this pass.
830 return GetNoneDirection();
831 }
832 if (!GetFirstNonLoopInvariantOperand(condition)) {
833 // Seems to be a job for the unswitch pass.
834 return GetNoneDirection();
835 }
836
837 // Left hand-side.
838 SExpression lhs = scev_analysis_->AnalyzeInstruction(
839 def_use_mgr->GetDef(condition->GetSingleWordInOperand(0)));
840 if (lhs->GetType() == SENode::CanNotCompute) {
841 // Can't make any conclusion.
842 return GetNoneDirection();
843 }
844
845 // Right hand-side.
846 SExpression rhs = scev_analysis_->AnalyzeInstruction(
847 def_use_mgr->GetDef(condition->GetSingleWordInOperand(1)));
848 if (rhs->GetType() == SENode::CanNotCompute) {
849 // Can't make any conclusion.
850 return GetNoneDirection();
851 }
852
853 // Only take into account recurrent expression over the current loop.
854 bool is_lhs_rec = !scev_analysis_->IsLoopInvariant(loop_, lhs);
855 bool is_rhs_rec = !scev_analysis_->IsLoopInvariant(loop_, rhs);
856
857 if ((is_lhs_rec && is_rhs_rec) || (!is_lhs_rec && !is_rhs_rec)) {
858 return GetNoneDirection();
859 }
860
861 if (is_lhs_rec) {
862 if (!lhs->AsSERecurrentNode() ||
863 lhs->AsSERecurrentNode()->GetLoop() != loop_) {
864 return GetNoneDirection();
865 }
866 }
867 if (is_rhs_rec) {
868 if (!rhs->AsSERecurrentNode() ||
869 rhs->AsSERecurrentNode()->GetLoop() != loop_) {
870 return GetNoneDirection();
871 }
872 }
873
874 // If the op code is ==, then we try a peel before or after.
875 // If opcode is not <, >, <= or >=, we bail out.
876 //
877 // For the remaining cases, we canonicalize the expression so that the
878 // constant expression is on the left hand side and the recurring expression
879 // is on the right hand side. If we swap hand side, then < becomes >, <=
880 // becomes >= etc.
881 // If the opcode is <=, then we add 1 to the right hand side and do the peel
882 // check on <.
883 // If the opcode is >=, then we add 1 to the left hand side and do the peel
884 // check on >.
885
886 CmpOperator cmp_operator;
887 switch (condition->opcode()) {
888 default:
889 return GetNoneDirection();
890 case SpvOpIEqual:
891 case SpvOpINotEqual:
892 return HandleEquality(lhs, rhs);
893 case SpvOpUGreaterThan:
894 case SpvOpSGreaterThan: {
895 cmp_operator = CmpOperator::kGT;
896 break;
897 }
898 case SpvOpULessThan:
899 case SpvOpSLessThan: {
900 cmp_operator = CmpOperator::kLT;
901 break;
902 }
903 // We add one to transform >= into > and <= into <.
904 case SpvOpUGreaterThanEqual:
905 case SpvOpSGreaterThanEqual: {
906 cmp_operator = CmpOperator::kGE;
907 break;
908 }
909 case SpvOpULessThanEqual:
910 case SpvOpSLessThanEqual: {
911 cmp_operator = CmpOperator::kLE;
912 break;
913 }
914 }
915
916 // Force the left hand side to be the non recurring expression.
917 if (is_lhs_rec) {
918 std::swap(lhs, rhs);
919 switch (cmp_operator) {
920 case CmpOperator::kLT: {
921 cmp_operator = CmpOperator::kGT;
922 break;
923 }
924 case CmpOperator::kGT: {
925 cmp_operator = CmpOperator::kLT;
926 break;
927 }
928 case CmpOperator::kLE: {
929 cmp_operator = CmpOperator::kGE;
930 break;
931 }
932 case CmpOperator::kGE: {
933 cmp_operator = CmpOperator::kLE;
934 break;
935 }
936 }
937 }
938 return HandleInequality(cmp_operator, lhs, rhs->AsSERecurrentNode());
939 }
940
GetValueAtFirstIteration(SERecurrentNode * rec) const941 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtFirstIteration(
942 SERecurrentNode* rec) const {
943 return rec->GetOffset();
944 }
945
GetValueAtIteration(SERecurrentNode * rec,int64_t iteration) const946 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtIteration(
947 SERecurrentNode* rec, int64_t iteration) const {
948 SExpression coeff = rec->GetCoefficient();
949 SExpression offset = rec->GetOffset();
950
951 return (coeff * iteration) + offset;
952 }
953
GetValueAtLastIteration(SERecurrentNode * rec) const954 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtLastIteration(
955 SERecurrentNode* rec) const {
956 return GetValueAtIteration(rec, loop_max_iterations_ - 1);
957 }
958
EvalOperator(CmpOperator cmp_op,SExpression lhs,SExpression rhs,bool * result) const959 bool LoopPeelingPass::LoopPeelingInfo::EvalOperator(CmpOperator cmp_op,
960 SExpression lhs,
961 SExpression rhs,
962 bool* result) const {
963 assert(scev_analysis_->IsLoopInvariant(loop_, lhs));
964 assert(scev_analysis_->IsLoopInvariant(loop_, rhs));
965 // We perform the test: 0 cmp_op rhs - lhs
966 // What is left is then to determine the sign of the expression.
967 switch (cmp_op) {
968 case CmpOperator::kLT: {
969 return scev_analysis_->IsAlwaysGreaterThanZero(rhs - lhs, result);
970 }
971 case CmpOperator::kGT: {
972 return scev_analysis_->IsAlwaysGreaterThanZero(lhs - rhs, result);
973 }
974 case CmpOperator::kLE: {
975 return scev_analysis_->IsAlwaysGreaterOrEqualToZero(rhs - lhs, result);
976 }
977 case CmpOperator::kGE: {
978 return scev_analysis_->IsAlwaysGreaterOrEqualToZero(lhs - rhs, result);
979 }
980 }
981 return false;
982 }
983
984 LoopPeelingPass::LoopPeelingInfo::Direction
HandleEquality(SExpression lhs,SExpression rhs) const985 LoopPeelingPass::LoopPeelingInfo::HandleEquality(SExpression lhs,
986 SExpression rhs) const {
987 {
988 // Try peel before opportunity.
989 SExpression lhs_cst = lhs;
990 if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
991 lhs_cst = rec_node->GetOffset();
992 }
993 SExpression rhs_cst = rhs;
994 if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
995 rhs_cst = rec_node->GetOffset();
996 }
997
998 if (lhs_cst == rhs_cst) {
999 return Direction{LoopPeelingPass::PeelDirection::kBefore, 1};
1000 }
1001 }
1002
1003 {
1004 // Try peel after opportunity.
1005 SExpression lhs_cst = lhs;
1006 if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
1007 // rec_node(x) = a * x + b
1008 // assign to lhs: a * (loop_max_iterations_ - 1) + b
1009 lhs_cst = GetValueAtLastIteration(rec_node);
1010 }
1011 SExpression rhs_cst = rhs;
1012 if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
1013 // rec_node(x) = a * x + b
1014 // assign to lhs: a * (loop_max_iterations_ - 1) + b
1015 rhs_cst = GetValueAtLastIteration(rec_node);
1016 }
1017
1018 if (lhs_cst == rhs_cst) {
1019 return Direction{LoopPeelingPass::PeelDirection::kAfter, 1};
1020 }
1021 }
1022
1023 return GetNoneDirection();
1024 }
1025
1026 LoopPeelingPass::LoopPeelingInfo::Direction
HandleInequality(CmpOperator cmp_op,SExpression lhs,SERecurrentNode * rhs) const1027 LoopPeelingPass::LoopPeelingInfo::HandleInequality(CmpOperator cmp_op,
1028 SExpression lhs,
1029 SERecurrentNode* rhs) const {
1030 SExpression offset = rhs->GetOffset();
1031 SExpression coefficient = rhs->GetCoefficient();
1032 // Compute (cst - B) / A.
1033 std::pair<SExpression, int64_t> flip_iteration = (lhs - offset) / coefficient;
1034 if (!flip_iteration.first->AsSEConstantNode()) {
1035 return GetNoneDirection();
1036 }
1037 // note: !!flip_iteration.second normalize to 0/1 (via bool cast).
1038 int64_t iteration =
1039 flip_iteration.first->AsSEConstantNode()->FoldToSingleValue() +
1040 !!flip_iteration.second;
1041 if (iteration <= 0 ||
1042 loop_max_iterations_ <= static_cast<uint64_t>(iteration)) {
1043 // Always true or false within the loop bounds.
1044 return GetNoneDirection();
1045 }
1046 // If this is a <= or >= operator and the iteration, make sure |iteration| is
1047 // the one flipping the condition.
1048 // If (cst - B) and A are not divisible, this equivalent to a < or > check, so
1049 // we skip this test.
1050 if (!flip_iteration.second &&
1051 (cmp_op == CmpOperator::kLE || cmp_op == CmpOperator::kGE)) {
1052 bool first_iteration;
1053 bool current_iteration;
1054 if (!EvalOperator(cmp_op, lhs, offset, &first_iteration) ||
1055 !EvalOperator(cmp_op, lhs, GetValueAtIteration(rhs, iteration),
1056 ¤t_iteration)) {
1057 return GetNoneDirection();
1058 }
1059 // If the condition did not flip the next will.
1060 if (first_iteration == current_iteration) {
1061 iteration++;
1062 }
1063 }
1064
1065 uint32_t cast_iteration = 0;
1066 // sanity check: can we fit |iteration| in a uint32_t ?
1067 if (static_cast<uint64_t>(iteration) < std::numeric_limits<uint32_t>::max()) {
1068 cast_iteration = static_cast<uint32_t>(iteration);
1069 }
1070
1071 if (cast_iteration) {
1072 // Peel before if we are closer to the start, after if closer to the end.
1073 if (loop_max_iterations_ / 2 > cast_iteration) {
1074 return Direction{LoopPeelingPass::PeelDirection::kBefore, cast_iteration};
1075 } else {
1076 return Direction{
1077 LoopPeelingPass::PeelDirection::kAfter,
1078 static_cast<uint32_t>(loop_max_iterations_ - cast_iteration)};
1079 }
1080 }
1081
1082 return GetNoneDirection();
1083 }
1084
1085 } // namespace opt
1086 } // namespace spvtools
1087