1 //===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the machine instruction level if-conversion pass, which
11 // tries to convert conditional branches into predicated instructions.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/CodeGen/Passes.h"
16 #include "BranchFolding.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/LivePhysRegs.h"
21 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
22 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/TargetSchedule.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetInstrInfo.h"
33 #include "llvm/Target/TargetLowering.h"
34 #include "llvm/Target/TargetRegisterInfo.h"
35 #include "llvm/Target/TargetSubtargetInfo.h"
36 #include <algorithm>
37 #include <utility>
38
39 using namespace llvm;
40
41 #define DEBUG_TYPE "ifcvt"
42
43 // Hidden options for help debugging.
44 static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
45 static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
46 static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
47 static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
48 cl::init(false), cl::Hidden);
49 static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
50 cl::init(false), cl::Hidden);
51 static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
52 cl::init(false), cl::Hidden);
53 static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
54 cl::init(false), cl::Hidden);
55 static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
56 cl::init(false), cl::Hidden);
57 static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
58 cl::init(false), cl::Hidden);
59 static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
60 cl::init(false), cl::Hidden);
61 static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
62 cl::init(true), cl::Hidden);
63
64 STATISTIC(NumSimple, "Number of simple if-conversions performed");
65 STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
66 STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
67 STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
68 STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
69 STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
70 STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
71 STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
72 STATISTIC(NumDupBBs, "Number of duplicated blocks");
73 STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
74
75 namespace {
76 class IfConverter : public MachineFunctionPass {
77 enum IfcvtKind {
78 ICNotClassfied, // BB data valid, but not classified.
79 ICSimpleFalse, // Same as ICSimple, but on the false path.
80 ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
81 ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
82 ICTriangleRev, // Same as ICTriangle, but true path rev condition.
83 ICTriangleFalse, // Same as ICTriangle, but on the false path.
84 ICTriangle, // BB is entry of a triangle sub-CFG.
85 ICDiamond // BB is entry of a diamond sub-CFG.
86 };
87
88 /// BBInfo - One per MachineBasicBlock, this is used to cache the result
89 /// if-conversion feasibility analysis. This includes results from
90 /// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its
91 /// classification, and common tail block of its successors (if it's a
92 /// diamond shape), its size, whether it's predicable, and whether any
93 /// instruction can clobber the 'would-be' predicate.
94 ///
95 /// IsDone - True if BB is not to be considered for ifcvt.
96 /// IsBeingAnalyzed - True if BB is currently being analyzed.
97 /// IsAnalyzed - True if BB has been analyzed (info is still valid).
98 /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
99 /// IsBrAnalyzable - True if analyzeBranch() returns false.
100 /// HasFallThrough - True if BB may fallthrough to the following BB.
101 /// IsUnpredicable - True if BB is known to be unpredicable.
102 /// ClobbersPred - True if BB could modify predicates (e.g. has
103 /// cmp, call, etc.)
104 /// NonPredSize - Number of non-predicated instructions.
105 /// ExtraCost - Extra cost for multi-cycle instructions.
106 /// ExtraCost2 - Some instructions are slower when predicated
107 /// BB - Corresponding MachineBasicBlock.
108 /// TrueBB / FalseBB- See analyzeBranch().
109 /// BrCond - Conditions for end of block conditional branches.
110 /// Predicate - Predicate used in the BB.
111 struct BBInfo {
112 bool IsDone : 1;
113 bool IsBeingAnalyzed : 1;
114 bool IsAnalyzed : 1;
115 bool IsEnqueued : 1;
116 bool IsBrAnalyzable : 1;
117 bool HasFallThrough : 1;
118 bool IsUnpredicable : 1;
119 bool CannotBeCopied : 1;
120 bool ClobbersPred : 1;
121 unsigned NonPredSize;
122 unsigned ExtraCost;
123 unsigned ExtraCost2;
124 MachineBasicBlock *BB;
125 MachineBasicBlock *TrueBB;
126 MachineBasicBlock *FalseBB;
127 SmallVector<MachineOperand, 4> BrCond;
128 SmallVector<MachineOperand, 4> Predicate;
BBInfo__anon01cb6a4b0111::IfConverter::BBInfo129 BBInfo() : IsDone(false), IsBeingAnalyzed(false),
130 IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
131 HasFallThrough(false), IsUnpredicable(false),
132 CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
133 ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
134 FalseBB(nullptr) {}
135 };
136
137 /// IfcvtToken - Record information about pending if-conversions to attempt:
138 /// BBI - Corresponding BBInfo.
139 /// Kind - Type of block. See IfcvtKind.
140 /// NeedSubsumption - True if the to-be-predicated BB has already been
141 /// predicated.
142 /// NumDups - Number of instructions that would be duplicated due
143 /// to this if-conversion. (For diamonds, the number of
144 /// identical instructions at the beginnings of both
145 /// paths).
146 /// NumDups2 - For diamonds, the number of identical instructions
147 /// at the ends of both paths.
148 struct IfcvtToken {
149 BBInfo &BBI;
150 IfcvtKind Kind;
151 bool NeedSubsumption;
152 unsigned NumDups;
153 unsigned NumDups2;
IfcvtToken__anon01cb6a4b0111::IfConverter::IfcvtToken154 IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
155 : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
156 };
157
158 /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
159 /// basic block number.
160 std::vector<BBInfo> BBAnalysis;
161 TargetSchedModel SchedModel;
162
163 const TargetLoweringBase *TLI;
164 const TargetInstrInfo *TII;
165 const TargetRegisterInfo *TRI;
166 const MachineBranchProbabilityInfo *MBPI;
167 MachineRegisterInfo *MRI;
168
169 LivePhysRegs Redefs;
170 LivePhysRegs DontKill;
171
172 bool PreRegAlloc;
173 bool MadeChange;
174 int FnNum;
175 std::function<bool(const Function &)> PredicateFtor;
176
177 public:
178 static char ID;
IfConverter(std::function<bool (const Function &)> Ftor=nullptr)179 IfConverter(std::function<bool(const Function &)> Ftor = nullptr)
180 : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) {
181 initializeIfConverterPass(*PassRegistry::getPassRegistry());
182 }
183
getAnalysisUsage(AnalysisUsage & AU) const184 void getAnalysisUsage(AnalysisUsage &AU) const override {
185 AU.addRequired<MachineBlockFrequencyInfo>();
186 AU.addRequired<MachineBranchProbabilityInfo>();
187 MachineFunctionPass::getAnalysisUsage(AU);
188 }
189
190 bool runOnMachineFunction(MachineFunction &MF) override;
191
getRequiredProperties() const192 MachineFunctionProperties getRequiredProperties() const override {
193 return MachineFunctionProperties().set(
194 MachineFunctionProperties::Property::AllVRegsAllocated);
195 }
196
197 private:
198 bool ReverseBranchCondition(BBInfo &BBI);
199 bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
200 BranchProbability Prediction) const;
201 bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
202 bool FalseBranch, unsigned &Dups,
203 BranchProbability Prediction) const;
204 bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
205 unsigned &Dups1, unsigned &Dups2) const;
206 void ScanInstructions(BBInfo &BBI);
207 void AnalyzeBlock(MachineBasicBlock *MBB,
208 std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
209 bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
210 bool isTriangle = false, bool RevBranch = false);
211 void AnalyzeBlocks(MachineFunction &MF,
212 std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
213 void InvalidatePreds(MachineBasicBlock *BB);
214 void RemoveExtraEdges(BBInfo &BBI);
215 bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
216 bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
217 bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
218 unsigned NumDups1, unsigned NumDups2);
219 void PredicateBlock(BBInfo &BBI,
220 MachineBasicBlock::iterator E,
221 SmallVectorImpl<MachineOperand> &Cond,
222 SmallSet<unsigned, 4> *LaterRedefs = nullptr);
223 void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
224 SmallVectorImpl<MachineOperand> &Cond,
225 bool IgnoreBr = false);
226 void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
227
MeetIfcvtSizeLimit(MachineBasicBlock & BB,unsigned Cycle,unsigned Extra,BranchProbability Prediction) const228 bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
229 unsigned Cycle, unsigned Extra,
230 BranchProbability Prediction) const {
231 return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
232 Prediction);
233 }
234
MeetIfcvtSizeLimit(MachineBasicBlock & TBB,unsigned TCycle,unsigned TExtra,MachineBasicBlock & FBB,unsigned FCycle,unsigned FExtra,BranchProbability Prediction) const235 bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
236 unsigned TCycle, unsigned TExtra,
237 MachineBasicBlock &FBB,
238 unsigned FCycle, unsigned FExtra,
239 BranchProbability Prediction) const {
240 return TCycle > 0 && FCycle > 0 &&
241 TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
242 Prediction);
243 }
244
245 // blockAlwaysFallThrough - Block ends without a terminator.
blockAlwaysFallThrough(BBInfo & BBI) const246 bool blockAlwaysFallThrough(BBInfo &BBI) const {
247 return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr;
248 }
249
250 // IfcvtTokenCmp - Used to sort if-conversion candidates.
IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> & C1,const std::unique_ptr<IfcvtToken> & C2)251 static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1,
252 const std::unique_ptr<IfcvtToken> &C2) {
253 int Incr1 = (C1->Kind == ICDiamond)
254 ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
255 int Incr2 = (C2->Kind == ICDiamond)
256 ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
257 if (Incr1 > Incr2)
258 return true;
259 else if (Incr1 == Incr2) {
260 // Favors subsumption.
261 if (!C1->NeedSubsumption && C2->NeedSubsumption)
262 return true;
263 else if (C1->NeedSubsumption == C2->NeedSubsumption) {
264 // Favors diamond over triangle, etc.
265 if ((unsigned)C1->Kind < (unsigned)C2->Kind)
266 return true;
267 else if (C1->Kind == C2->Kind)
268 return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
269 }
270 }
271 return false;
272 }
273 };
274
275 char IfConverter::ID = 0;
276 }
277
278 char &llvm::IfConverterID = IfConverter::ID;
279
280 INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)281 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
282 INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
283
284 bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
285 if (skipFunction(*MF.getFunction()) ||
286 (PredicateFtor && !PredicateFtor(*MF.getFunction())))
287 return false;
288
289 const TargetSubtargetInfo &ST = MF.getSubtarget();
290 TLI = ST.getTargetLowering();
291 TII = ST.getInstrInfo();
292 TRI = ST.getRegisterInfo();
293 BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
294 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
295 MRI = &MF.getRegInfo();
296 SchedModel.init(ST.getSchedModel(), &ST, TII);
297
298 if (!TII) return false;
299
300 PreRegAlloc = MRI->isSSA();
301
302 bool BFChange = false;
303 if (!PreRegAlloc) {
304 // Tail merge tend to expose more if-conversion opportunities.
305 BranchFolder BF(true, false, MBFI, *MBPI);
306 BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(),
307 getAnalysisIfAvailable<MachineModuleInfo>());
308 }
309
310 DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
311 << MF.getName() << "\'");
312
313 if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
314 DEBUG(dbgs() << " skipped\n");
315 return false;
316 }
317 DEBUG(dbgs() << "\n");
318
319 MF.RenumberBlocks();
320 BBAnalysis.resize(MF.getNumBlockIDs());
321
322 std::vector<std::unique_ptr<IfcvtToken>> Tokens;
323 MadeChange = false;
324 unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
325 NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
326 while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
327 // Do an initial analysis for each basic block and find all the potential
328 // candidates to perform if-conversion.
329 bool Change = false;
330 AnalyzeBlocks(MF, Tokens);
331 while (!Tokens.empty()) {
332 std::unique_ptr<IfcvtToken> Token = std::move(Tokens.back());
333 Tokens.pop_back();
334 BBInfo &BBI = Token->BBI;
335 IfcvtKind Kind = Token->Kind;
336 unsigned NumDups = Token->NumDups;
337 unsigned NumDups2 = Token->NumDups2;
338
339 // If the block has been evicted out of the queue or it has already been
340 // marked dead (due to it being predicated), then skip it.
341 if (BBI.IsDone)
342 BBI.IsEnqueued = false;
343 if (!BBI.IsEnqueued)
344 continue;
345
346 BBI.IsEnqueued = false;
347
348 bool RetVal = false;
349 switch (Kind) {
350 default: llvm_unreachable("Unexpected!");
351 case ICSimple:
352 case ICSimpleFalse: {
353 bool isFalse = Kind == ICSimpleFalse;
354 if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
355 DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
356 " false" : "")
357 << "): BB#" << BBI.BB->getNumber() << " ("
358 << ((Kind == ICSimpleFalse)
359 ? BBI.FalseBB->getNumber()
360 : BBI.TrueBB->getNumber()) << ") ");
361 RetVal = IfConvertSimple(BBI, Kind);
362 DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
363 if (RetVal) {
364 if (isFalse) ++NumSimpleFalse;
365 else ++NumSimple;
366 }
367 break;
368 }
369 case ICTriangle:
370 case ICTriangleRev:
371 case ICTriangleFalse:
372 case ICTriangleFRev: {
373 bool isFalse = Kind == ICTriangleFalse;
374 bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
375 if (DisableTriangle && !isFalse && !isRev) break;
376 if (DisableTriangleR && !isFalse && isRev) break;
377 if (DisableTriangleF && isFalse && !isRev) break;
378 if (DisableTriangleFR && isFalse && isRev) break;
379 DEBUG(dbgs() << "Ifcvt (Triangle");
380 if (isFalse)
381 DEBUG(dbgs() << " false");
382 if (isRev)
383 DEBUG(dbgs() << " rev");
384 DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
385 << BBI.TrueBB->getNumber() << ",F:"
386 << BBI.FalseBB->getNumber() << ") ");
387 RetVal = IfConvertTriangle(BBI, Kind);
388 DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
389 if (RetVal) {
390 if (isFalse) {
391 if (isRev) ++NumTriangleFRev;
392 else ++NumTriangleFalse;
393 } else {
394 if (isRev) ++NumTriangleRev;
395 else ++NumTriangle;
396 }
397 }
398 break;
399 }
400 case ICDiamond: {
401 if (DisableDiamond) break;
402 DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
403 << BBI.TrueBB->getNumber() << ",F:"
404 << BBI.FalseBB->getNumber() << ") ");
405 RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
406 DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
407 if (RetVal) ++NumDiamonds;
408 break;
409 }
410 }
411
412 Change |= RetVal;
413
414 NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
415 NumTriangleFalse + NumTriangleFRev + NumDiamonds;
416 if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
417 break;
418 }
419
420 if (!Change)
421 break;
422 MadeChange |= Change;
423 }
424
425 Tokens.clear();
426 BBAnalysis.clear();
427
428 if (MadeChange && IfCvtBranchFold) {
429 BranchFolder BF(false, false, MBFI, *MBPI);
430 BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
431 getAnalysisIfAvailable<MachineModuleInfo>());
432 }
433
434 MadeChange |= BFChange;
435 return MadeChange;
436 }
437
438 /// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
439 /// its 'true' successor.
findFalseBlock(MachineBasicBlock * BB,MachineBasicBlock * TrueBB)440 static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
441 MachineBasicBlock *TrueBB) {
442 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
443 E = BB->succ_end(); SI != E; ++SI) {
444 MachineBasicBlock *SuccBB = *SI;
445 if (SuccBB != TrueBB)
446 return SuccBB;
447 }
448 return nullptr;
449 }
450
451 /// ReverseBranchCondition - Reverse the condition of the end of the block
452 /// branch. Swap block's 'true' and 'false' successors.
ReverseBranchCondition(BBInfo & BBI)453 bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
454 DebugLoc dl; // FIXME: this is nowhere
455 if (!TII->ReverseBranchCondition(BBI.BrCond)) {
456 TII->RemoveBranch(*BBI.BB);
457 TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
458 std::swap(BBI.TrueBB, BBI.FalseBB);
459 return true;
460 }
461 return false;
462 }
463
464 /// getNextBlock - Returns the next block in the function blocks ordering. If
465 /// it is the end, returns NULL.
getNextBlock(MachineBasicBlock * BB)466 static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
467 MachineFunction::iterator I = BB->getIterator();
468 MachineFunction::iterator E = BB->getParent()->end();
469 if (++I == E)
470 return nullptr;
471 return &*I;
472 }
473
474 /// ValidSimple - Returns true if the 'true' block (along with its
475 /// predecessor) forms a valid simple shape for ifcvt. It also returns the
476 /// number of instructions that the ifcvt would need to duplicate if performed
477 /// in Dups.
ValidSimple(BBInfo & TrueBBI,unsigned & Dups,BranchProbability Prediction) const478 bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
479 BranchProbability Prediction) const {
480 Dups = 0;
481 if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
482 return false;
483
484 if (TrueBBI.IsBrAnalyzable)
485 return false;
486
487 if (TrueBBI.BB->pred_size() > 1) {
488 if (TrueBBI.CannotBeCopied ||
489 !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
490 Prediction))
491 return false;
492 Dups = TrueBBI.NonPredSize;
493 }
494
495 return true;
496 }
497
498 /// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
499 /// with their common predecessor) forms a valid triangle shape for ifcvt.
500 /// If 'FalseBranch' is true, it checks if 'true' block's false branch
501 /// branches to the 'false' block rather than the other way around. It also
502 /// returns the number of instructions that the ifcvt would need to duplicate
503 /// if performed in 'Dups'.
ValidTriangle(BBInfo & TrueBBI,BBInfo & FalseBBI,bool FalseBranch,unsigned & Dups,BranchProbability Prediction) const504 bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
505 bool FalseBranch, unsigned &Dups,
506 BranchProbability Prediction) const {
507 Dups = 0;
508 if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
509 return false;
510
511 if (TrueBBI.BB->pred_size() > 1) {
512 if (TrueBBI.CannotBeCopied)
513 return false;
514
515 unsigned Size = TrueBBI.NonPredSize;
516 if (TrueBBI.IsBrAnalyzable) {
517 if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
518 // Ends with an unconditional branch. It will be removed.
519 --Size;
520 else {
521 MachineBasicBlock *FExit = FalseBranch
522 ? TrueBBI.TrueBB : TrueBBI.FalseBB;
523 if (FExit)
524 // Require a conditional branch
525 ++Size;
526 }
527 }
528 if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
529 return false;
530 Dups = Size;
531 }
532
533 MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
534 if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
535 MachineFunction::iterator I = TrueBBI.BB->getIterator();
536 if (++I == TrueBBI.BB->getParent()->end())
537 return false;
538 TExit = &*I;
539 }
540 return TExit && TExit == FalseBBI.BB;
541 }
542
543 /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
544 /// with their common predecessor) forms a valid diamond shape for ifcvt.
ValidDiamond(BBInfo & TrueBBI,BBInfo & FalseBBI,unsigned & Dups1,unsigned & Dups2) const545 bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
546 unsigned &Dups1, unsigned &Dups2) const {
547 Dups1 = Dups2 = 0;
548 if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
549 FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
550 return false;
551
552 MachineBasicBlock *TT = TrueBBI.TrueBB;
553 MachineBasicBlock *FT = FalseBBI.TrueBB;
554
555 if (!TT && blockAlwaysFallThrough(TrueBBI))
556 TT = getNextBlock(TrueBBI.BB);
557 if (!FT && blockAlwaysFallThrough(FalseBBI))
558 FT = getNextBlock(FalseBBI.BB);
559 if (TT != FT)
560 return false;
561 if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
562 return false;
563 if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
564 return false;
565
566 // FIXME: Allow true block to have an early exit?
567 if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
568 (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
569 return false;
570
571 // Count duplicate instructions at the beginning of the true and false blocks.
572 MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
573 MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
574 MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
575 MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
576 while (TIB != TIE && FIB != FIE) {
577 // Skip dbg_value instructions. These do not count.
578 if (TIB->isDebugValue()) {
579 while (TIB != TIE && TIB->isDebugValue())
580 ++TIB;
581 if (TIB == TIE)
582 break;
583 }
584 if (FIB->isDebugValue()) {
585 while (FIB != FIE && FIB->isDebugValue())
586 ++FIB;
587 if (FIB == FIE)
588 break;
589 }
590 if (!TIB->isIdenticalTo(*FIB))
591 break;
592 ++Dups1;
593 ++TIB;
594 ++FIB;
595 }
596
597 // Now, in preparation for counting duplicate instructions at the ends of the
598 // blocks, move the end iterators up past any branch instructions.
599 // If both blocks are returning don't skip the branches, since they will
600 // likely be both identical return instructions. In such cases the return
601 // can be left unpredicated.
602 // Check for already containing all of the block.
603 if (TIB == TIE || FIB == FIE)
604 return true;
605 --TIE;
606 --FIE;
607 if (!TrueBBI.BB->succ_empty() || !FalseBBI.BB->succ_empty()) {
608 while (TIE != TIB && TIE->isBranch())
609 --TIE;
610 while (FIE != FIB && FIE->isBranch())
611 --FIE;
612 }
613
614 // If Dups1 includes all of a block, then don't count duplicate
615 // instructions at the end of the blocks.
616 if (TIB == TIE || FIB == FIE)
617 return true;
618
619 // Count duplicate instructions at the ends of the blocks.
620 while (TIE != TIB && FIE != FIB) {
621 // Skip dbg_value instructions. These do not count.
622 if (TIE->isDebugValue()) {
623 while (TIE != TIB && TIE->isDebugValue())
624 --TIE;
625 if (TIE == TIB)
626 break;
627 }
628 if (FIE->isDebugValue()) {
629 while (FIE != FIB && FIE->isDebugValue())
630 --FIE;
631 if (FIE == FIB)
632 break;
633 }
634 if (!TIE->isIdenticalTo(*FIE))
635 break;
636 ++Dups2;
637 --TIE;
638 --FIE;
639 }
640
641 return true;
642 }
643
644 /// ScanInstructions - Scan all the instructions in the block to determine if
645 /// the block is predicable. In most cases, that means all the instructions
646 /// in the block are isPredicable(). Also checks if the block contains any
647 /// instruction which can clobber a predicate (e.g. condition code register).
648 /// If so, the block is not predicable unless it's the last instruction.
ScanInstructions(BBInfo & BBI)649 void IfConverter::ScanInstructions(BBInfo &BBI) {
650 if (BBI.IsDone)
651 return;
652
653 bool AlreadyPredicated = !BBI.Predicate.empty();
654 // First analyze the end of BB branches.
655 BBI.TrueBB = BBI.FalseBB = nullptr;
656 BBI.BrCond.clear();
657 BBI.IsBrAnalyzable =
658 !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
659 BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
660
661 if (BBI.BrCond.size()) {
662 // No false branch. This BB must end with a conditional branch and a
663 // fallthrough.
664 if (!BBI.FalseBB)
665 BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
666 if (!BBI.FalseBB) {
667 // Malformed bcc? True and false blocks are the same?
668 BBI.IsUnpredicable = true;
669 return;
670 }
671 }
672
673 // Then scan all the instructions.
674 BBI.NonPredSize = 0;
675 BBI.ExtraCost = 0;
676 BBI.ExtraCost2 = 0;
677 BBI.ClobbersPred = false;
678 for (auto &MI : *BBI.BB) {
679 if (MI.isDebugValue())
680 continue;
681
682 // It's unsafe to duplicate convergent instructions in this context, so set
683 // BBI.CannotBeCopied to true if MI is convergent. To see why, consider the
684 // following CFG, which is subject to our "simple" transformation.
685 //
686 // BB0 // if (c1) goto BB1; else goto BB2;
687 // / \
688 // BB1 |
689 // | BB2 // if (c2) goto TBB; else goto FBB;
690 // | / |
691 // | / |
692 // TBB |
693 // | |
694 // | FBB
695 // |
696 // exit
697 //
698 // Suppose we want to move TBB's contents up into BB1 and BB2 (in BB1 they'd
699 // be unconditional, and in BB2, they'd be predicated upon c2), and suppose
700 // TBB contains a convergent instruction. This is safe iff doing so does
701 // not add a control-flow dependency to the convergent instruction -- i.e.,
702 // it's safe iff the set of control flows that leads us to the convergent
703 // instruction does not get smaller after the transformation.
704 //
705 // Originally we executed TBB if c1 || c2. After the transformation, there
706 // are two copies of TBB's instructions. We get to the first if c1, and we
707 // get to the second if !c1 && c2.
708 //
709 // There are clearly fewer ways to satisfy the condition "c1" than
710 // "c1 || c2". Since we've shrunk the set of control flows which lead to
711 // our convergent instruction, the transformation is unsafe.
712 if (MI.isNotDuplicable() || MI.isConvergent())
713 BBI.CannotBeCopied = true;
714
715 bool isPredicated = TII->isPredicated(MI);
716 bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch();
717
718 // A conditional branch is not predicable, but it may be eliminated.
719 if (isCondBr)
720 continue;
721
722 if (!isPredicated) {
723 BBI.NonPredSize++;
724 unsigned ExtraPredCost = TII->getPredicationCost(MI);
725 unsigned NumCycles = SchedModel.computeInstrLatency(&MI, false);
726 if (NumCycles > 1)
727 BBI.ExtraCost += NumCycles-1;
728 BBI.ExtraCost2 += ExtraPredCost;
729 } else if (!AlreadyPredicated) {
730 // FIXME: This instruction is already predicated before the
731 // if-conversion pass. It's probably something like a conditional move.
732 // Mark this block unpredicable for now.
733 BBI.IsUnpredicable = true;
734 return;
735 }
736
737 if (BBI.ClobbersPred && !isPredicated) {
738 // Predicate modification instruction should end the block (except for
739 // already predicated instructions and end of block branches).
740 // Predicate may have been modified, the subsequent (currently)
741 // unpredicated instructions cannot be correctly predicated.
742 BBI.IsUnpredicable = true;
743 return;
744 }
745
746 // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
747 // still potentially predicable.
748 std::vector<MachineOperand> PredDefs;
749 if (TII->DefinesPredicate(MI, PredDefs))
750 BBI.ClobbersPred = true;
751
752 if (!TII->isPredicable(MI)) {
753 BBI.IsUnpredicable = true;
754 return;
755 }
756 }
757 }
758
759 /// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
760 /// predicated by the specified predicate.
FeasibilityAnalysis(BBInfo & BBI,SmallVectorImpl<MachineOperand> & Pred,bool isTriangle,bool RevBranch)761 bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
762 SmallVectorImpl<MachineOperand> &Pred,
763 bool isTriangle, bool RevBranch) {
764 // If the block is dead or unpredicable, then it cannot be predicated.
765 if (BBI.IsDone || BBI.IsUnpredicable)
766 return false;
767
768 // If it is already predicated but we couldn't analyze its terminator, the
769 // latter might fallthrough, but we can't determine where to.
770 // Conservatively avoid if-converting again.
771 if (BBI.Predicate.size() && !BBI.IsBrAnalyzable)
772 return false;
773
774 // If it is already predicated, check if the new predicate subsumes
775 // its predicate.
776 if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
777 return false;
778
779 if (BBI.BrCond.size()) {
780 if (!isTriangle)
781 return false;
782
783 // Test predicate subsumption.
784 SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
785 SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
786 if (RevBranch) {
787 if (TII->ReverseBranchCondition(Cond))
788 return false;
789 }
790 if (TII->ReverseBranchCondition(RevPred) ||
791 !TII->SubsumesPredicate(Cond, RevPred))
792 return false;
793 }
794
795 return true;
796 }
797
798 /// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
799 /// the specified block. Record its successors and whether it looks like an
800 /// if-conversion candidate.
AnalyzeBlock(MachineBasicBlock * MBB,std::vector<std::unique_ptr<IfcvtToken>> & Tokens)801 void IfConverter::AnalyzeBlock(
802 MachineBasicBlock *MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
803 struct BBState {
804 BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {}
805 MachineBasicBlock *MBB;
806
807 /// This flag is true if MBB's successors have been analyzed.
808 bool SuccsAnalyzed;
809 };
810
811 // Push MBB to the stack.
812 SmallVector<BBState, 16> BBStack(1, MBB);
813
814 while (!BBStack.empty()) {
815 BBState &State = BBStack.back();
816 MachineBasicBlock *BB = State.MBB;
817 BBInfo &BBI = BBAnalysis[BB->getNumber()];
818
819 if (!State.SuccsAnalyzed) {
820 if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) {
821 BBStack.pop_back();
822 continue;
823 }
824
825 BBI.BB = BB;
826 BBI.IsBeingAnalyzed = true;
827
828 ScanInstructions(BBI);
829
830 // Unanalyzable or ends with fallthrough or unconditional branch, or if is
831 // not considered for ifcvt anymore.
832 if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
833 BBI.IsBeingAnalyzed = false;
834 BBI.IsAnalyzed = true;
835 BBStack.pop_back();
836 continue;
837 }
838
839 // Do not ifcvt if either path is a back edge to the entry block.
840 if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
841 BBI.IsBeingAnalyzed = false;
842 BBI.IsAnalyzed = true;
843 BBStack.pop_back();
844 continue;
845 }
846
847 // Do not ifcvt if true and false fallthrough blocks are the same.
848 if (!BBI.FalseBB) {
849 BBI.IsBeingAnalyzed = false;
850 BBI.IsAnalyzed = true;
851 BBStack.pop_back();
852 continue;
853 }
854
855 // Push the False and True blocks to the stack.
856 State.SuccsAnalyzed = true;
857 BBStack.push_back(BBI.FalseBB);
858 BBStack.push_back(BBI.TrueBB);
859 continue;
860 }
861
862 BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
863 BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
864
865 if (TrueBBI.IsDone && FalseBBI.IsDone) {
866 BBI.IsBeingAnalyzed = false;
867 BBI.IsAnalyzed = true;
868 BBStack.pop_back();
869 continue;
870 }
871
872 SmallVector<MachineOperand, 4>
873 RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
874 bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
875
876 unsigned Dups = 0;
877 unsigned Dups2 = 0;
878 bool TNeedSub = !TrueBBI.Predicate.empty();
879 bool FNeedSub = !FalseBBI.Predicate.empty();
880 bool Enqueued = false;
881
882 BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
883
884 if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
885 MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
886 TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
887 *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
888 FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
889 Prediction) &&
890 FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
891 FeasibilityAnalysis(FalseBBI, RevCond)) {
892 // Diamond:
893 // EBB
894 // / \_
895 // | |
896 // TBB FBB
897 // \ /
898 // TailBB
899 // Note TailBB can be empty.
900 Tokens.push_back(llvm::make_unique<IfcvtToken>(
901 BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2));
902 Enqueued = true;
903 }
904
905 if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
906 MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
907 TrueBBI.ExtraCost2, Prediction) &&
908 FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
909 // Triangle:
910 // EBB
911 // | \_
912 // | |
913 // | TBB
914 // | /
915 // FBB
916 Tokens.push_back(
917 llvm::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups));
918 Enqueued = true;
919 }
920
921 if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
922 MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
923 TrueBBI.ExtraCost2, Prediction) &&
924 FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
925 Tokens.push_back(
926 llvm::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups));
927 Enqueued = true;
928 }
929
930 if (ValidSimple(TrueBBI, Dups, Prediction) &&
931 MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
932 TrueBBI.ExtraCost2, Prediction) &&
933 FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
934 // Simple (split, no rejoin):
935 // EBB
936 // | \_
937 // | |
938 // | TBB---> exit
939 // |
940 // FBB
941 Tokens.push_back(
942 llvm::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups));
943 Enqueued = true;
944 }
945
946 if (CanRevCond) {
947 // Try the other path...
948 if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
949 Prediction.getCompl()) &&
950 MeetIfcvtSizeLimit(*FalseBBI.BB,
951 FalseBBI.NonPredSize + FalseBBI.ExtraCost,
952 FalseBBI.ExtraCost2, Prediction.getCompl()) &&
953 FeasibilityAnalysis(FalseBBI, RevCond, true)) {
954 Tokens.push_back(llvm::make_unique<IfcvtToken>(BBI, ICTriangleFalse,
955 FNeedSub, Dups));
956 Enqueued = true;
957 }
958
959 if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
960 Prediction.getCompl()) &&
961 MeetIfcvtSizeLimit(*FalseBBI.BB,
962 FalseBBI.NonPredSize + FalseBBI.ExtraCost,
963 FalseBBI.ExtraCost2, Prediction.getCompl()) &&
964 FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
965 Tokens.push_back(
966 llvm::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups));
967 Enqueued = true;
968 }
969
970 if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
971 MeetIfcvtSizeLimit(*FalseBBI.BB,
972 FalseBBI.NonPredSize + FalseBBI.ExtraCost,
973 FalseBBI.ExtraCost2, Prediction.getCompl()) &&
974 FeasibilityAnalysis(FalseBBI, RevCond)) {
975 Tokens.push_back(
976 llvm::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups));
977 Enqueued = true;
978 }
979 }
980
981 BBI.IsEnqueued = Enqueued;
982 BBI.IsBeingAnalyzed = false;
983 BBI.IsAnalyzed = true;
984 BBStack.pop_back();
985 }
986 }
987
988 /// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
989 /// candidates.
AnalyzeBlocks(MachineFunction & MF,std::vector<std::unique_ptr<IfcvtToken>> & Tokens)990 void IfConverter::AnalyzeBlocks(
991 MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
992 for (auto &BB : MF)
993 AnalyzeBlock(&BB, Tokens);
994
995 // Sort to favor more complex ifcvt scheme.
996 std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
997 }
998
999 /// canFallThroughTo - Returns true either if ToBB is the next block after BB or
1000 /// that all the intervening blocks are empty (given BB can fall through to its
1001 /// next block).
canFallThroughTo(MachineBasicBlock * BB,MachineBasicBlock * ToBB)1002 static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
1003 MachineFunction::iterator PI = BB->getIterator();
1004 MachineFunction::iterator I = std::next(PI);
1005 MachineFunction::iterator TI = ToBB->getIterator();
1006 MachineFunction::iterator E = BB->getParent()->end();
1007 while (I != TI) {
1008 // Check isSuccessor to avoid case where the next block is empty, but
1009 // it's not a successor.
1010 if (I == E || !I->empty() || !PI->isSuccessor(&*I))
1011 return false;
1012 PI = I++;
1013 }
1014 return true;
1015 }
1016
1017 /// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
1018 /// to determine if it can be if-converted. If predecessor is already enqueued,
1019 /// dequeue it!
InvalidatePreds(MachineBasicBlock * BB)1020 void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
1021 for (const auto &Predecessor : BB->predecessors()) {
1022 BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()];
1023 if (PBBI.IsDone || PBBI.BB == BB)
1024 continue;
1025 PBBI.IsAnalyzed = false;
1026 PBBI.IsEnqueued = false;
1027 }
1028 }
1029
1030 /// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
1031 ///
InsertUncondBranch(MachineBasicBlock * BB,MachineBasicBlock * ToBB,const TargetInstrInfo * TII)1032 static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
1033 const TargetInstrInfo *TII) {
1034 DebugLoc dl; // FIXME: this is nowhere
1035 SmallVector<MachineOperand, 0> NoCond;
1036 TII->InsertBranch(*BB, ToBB, nullptr, NoCond, dl);
1037 }
1038
1039 /// RemoveExtraEdges - Remove true / false edges if either / both are no longer
1040 /// successors.
RemoveExtraEdges(BBInfo & BBI)1041 void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
1042 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1043 SmallVector<MachineOperand, 4> Cond;
1044 if (!TII->analyzeBranch(*BBI.BB, TBB, FBB, Cond))
1045 BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
1046 }
1047
1048 /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
1049 /// values defined in MI which are not live/used by MI.
UpdatePredRedefs(MachineInstr & MI,LivePhysRegs & Redefs)1050 static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
1051 SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
1052 Redefs.stepForward(MI, Clobbers);
1053
1054 // Now add the implicit uses for each of the clobbered values.
1055 for (auto Reg : Clobbers) {
1056 // FIXME: Const cast here is nasty, but better than making StepForward
1057 // take a mutable instruction instead of const.
1058 MachineOperand &Op = const_cast<MachineOperand&>(*Reg.second);
1059 MachineInstr *OpMI = Op.getParent();
1060 MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI);
1061 if (Op.isRegMask()) {
1062 // First handle regmasks. They clobber any entries in the mask which
1063 // means that we need a def for those registers.
1064 MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
1065
1066 // We also need to add an implicit def of this register for the later
1067 // use to read from.
1068 // For the register allocator to have allocated a register clobbered
1069 // by the call which is used later, it must be the case that
1070 // the call doesn't return.
1071 MIB.addReg(Reg.first, RegState::Implicit | RegState::Define);
1072 continue;
1073 }
1074 assert(Op.isReg() && "Register operand required");
1075 if (Op.isDead()) {
1076 // If we found a dead def, but it needs to be live, then remove the dead
1077 // flag.
1078 if (Redefs.contains(Op.getReg()))
1079 Op.setIsDead(false);
1080 }
1081 MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
1082 }
1083 }
1084
1085 /**
1086 * Remove kill flags from operands with a registers in the @p DontKill set.
1087 */
RemoveKills(MachineInstr & MI,const LivePhysRegs & DontKill)1088 static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
1089 for (MIBundleOperands O(MI); O.isValid(); ++O) {
1090 if (!O->isReg() || !O->isKill())
1091 continue;
1092 if (DontKill.contains(O->getReg()))
1093 O->setIsKill(false);
1094 }
1095 }
1096
1097 /**
1098 * Walks a range of machine instructions and removes kill flags for registers
1099 * in the @p DontKill set.
1100 */
RemoveKills(MachineBasicBlock::iterator I,MachineBasicBlock::iterator E,const LivePhysRegs & DontKill,const MCRegisterInfo & MCRI)1101 static void RemoveKills(MachineBasicBlock::iterator I,
1102 MachineBasicBlock::iterator E,
1103 const LivePhysRegs &DontKill,
1104 const MCRegisterInfo &MCRI) {
1105 for ( ; I != E; ++I)
1106 RemoveKills(*I, DontKill);
1107 }
1108
1109 /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
1110 ///
IfConvertSimple(BBInfo & BBI,IfcvtKind Kind)1111 bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
1112 BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
1113 BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
1114 BBInfo *CvtBBI = &TrueBBI;
1115 BBInfo *NextBBI = &FalseBBI;
1116
1117 SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
1118 if (Kind == ICSimpleFalse)
1119 std::swap(CvtBBI, NextBBI);
1120
1121 if (CvtBBI->IsDone ||
1122 (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
1123 // Something has changed. It's no longer safe to predicate this block.
1124 BBI.IsAnalyzed = false;
1125 CvtBBI->IsAnalyzed = false;
1126 return false;
1127 }
1128
1129 if (CvtBBI->BB->hasAddressTaken())
1130 // Conservatively abort if-conversion if BB's address is taken.
1131 return false;
1132
1133 if (Kind == ICSimpleFalse)
1134 if (TII->ReverseBranchCondition(Cond))
1135 llvm_unreachable("Unable to reverse branch condition!");
1136
1137 // Initialize liveins to the first BB. These are potentiall redefined by
1138 // predicated instructions.
1139 Redefs.init(TRI);
1140 Redefs.addLiveIns(*CvtBBI->BB);
1141 Redefs.addLiveIns(*NextBBI->BB);
1142
1143 // Compute a set of registers which must not be killed by instructions in
1144 // BB1: This is everything live-in to BB2.
1145 DontKill.init(TRI);
1146 DontKill.addLiveIns(*NextBBI->BB);
1147
1148 if (CvtBBI->BB->pred_size() > 1) {
1149 BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
1150 // Copy instructions in the true block, predicate them, and add them to
1151 // the entry block.
1152 CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
1153
1154 // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
1155 // explicitly remove CvtBBI as a successor.
1156 BBI.BB->removeSuccessor(CvtBBI->BB, true);
1157 } else {
1158 RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI);
1159 PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
1160
1161 // Merge converted block into entry block.
1162 BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
1163 MergeBlocks(BBI, *CvtBBI);
1164 }
1165
1166 bool IterIfcvt = true;
1167 if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
1168 InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
1169 BBI.HasFallThrough = false;
1170 // Now ifcvt'd block will look like this:
1171 // BB:
1172 // ...
1173 // t, f = cmp
1174 // if t op
1175 // b BBf
1176 //
1177 // We cannot further ifcvt this block because the unconditional branch
1178 // will have to be predicated on the new condition, that will not be
1179 // available if cmp executes.
1180 IterIfcvt = false;
1181 }
1182
1183 RemoveExtraEdges(BBI);
1184
1185 // Update block info. BB can be iteratively if-converted.
1186 if (!IterIfcvt)
1187 BBI.IsDone = true;
1188 InvalidatePreds(BBI.BB);
1189 CvtBBI->IsDone = true;
1190
1191 // FIXME: Must maintain LiveIns.
1192 return true;
1193 }
1194
1195 /// IfConvertTriangle - If convert a triangle sub-CFG.
1196 ///
IfConvertTriangle(BBInfo & BBI,IfcvtKind Kind)1197 bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
1198 BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
1199 BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
1200 BBInfo *CvtBBI = &TrueBBI;
1201 BBInfo *NextBBI = &FalseBBI;
1202 DebugLoc dl; // FIXME: this is nowhere
1203
1204 SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
1205 if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
1206 std::swap(CvtBBI, NextBBI);
1207
1208 if (CvtBBI->IsDone ||
1209 (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
1210 // Something has changed. It's no longer safe to predicate this block.
1211 BBI.IsAnalyzed = false;
1212 CvtBBI->IsAnalyzed = false;
1213 return false;
1214 }
1215
1216 if (CvtBBI->BB->hasAddressTaken())
1217 // Conservatively abort if-conversion if BB's address is taken.
1218 return false;
1219
1220 if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
1221 if (TII->ReverseBranchCondition(Cond))
1222 llvm_unreachable("Unable to reverse branch condition!");
1223
1224 if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
1225 if (ReverseBranchCondition(*CvtBBI)) {
1226 // BB has been changed, modify its predecessors (except for this
1227 // one) so they don't get ifcvt'ed based on bad intel.
1228 for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
1229 E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
1230 MachineBasicBlock *PBB = *PI;
1231 if (PBB == BBI.BB)
1232 continue;
1233 BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
1234 if (PBBI.IsEnqueued) {
1235 PBBI.IsAnalyzed = false;
1236 PBBI.IsEnqueued = false;
1237 }
1238 }
1239 }
1240 }
1241
1242 // Initialize liveins to the first BB. These are potentially redefined by
1243 // predicated instructions.
1244 Redefs.init(TRI);
1245 Redefs.addLiveIns(*CvtBBI->BB);
1246 Redefs.addLiveIns(*NextBBI->BB);
1247
1248 DontKill.clear();
1249
1250 bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
1251 BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
1252
1253 if (HasEarlyExit) {
1254 // Get probabilities before modifying CvtBBI->BB and BBI.BB.
1255 CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB);
1256 CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB);
1257 BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB);
1258 BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB);
1259 }
1260
1261 if (CvtBBI->BB->pred_size() > 1) {
1262 BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
1263 // Copy instructions in the true block, predicate them, and add them to
1264 // the entry block.
1265 CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
1266
1267 // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
1268 // explicitly remove CvtBBI as a successor.
1269 BBI.BB->removeSuccessor(CvtBBI->BB, true);
1270 } else {
1271 // Predicate the 'true' block after removing its branch.
1272 CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
1273 PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
1274
1275 // Now merge the entry of the triangle with the true block.
1276 BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
1277 MergeBlocks(BBI, *CvtBBI, false);
1278 }
1279
1280 // If 'true' block has a 'false' successor, add an exit branch to it.
1281 if (HasEarlyExit) {
1282 SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
1283 CvtBBI->BrCond.end());
1284 if (TII->ReverseBranchCondition(RevCond))
1285 llvm_unreachable("Unable to reverse branch condition!");
1286
1287 // Update the edge probability for both CvtBBI->FalseBB and NextBBI.
1288 // NewNext = New_Prob(BBI.BB, NextBBI->BB) =
1289 // Prob(BBI.BB, NextBBI->BB) +
1290 // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB)
1291 // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) =
1292 // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB)
1293 auto NewTrueBB = getNextBlock(BBI.BB);
1294 auto NewNext = BBNext + BBCvt * CvtNext;
1295 auto NewTrueBBIter =
1296 std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB);
1297 if (NewTrueBBIter != BBI.BB->succ_end())
1298 BBI.BB->setSuccProbability(NewTrueBBIter, NewNext);
1299
1300 auto NewFalse = BBCvt * CvtFalse;
1301 TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
1302 BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse);
1303 }
1304
1305 // Merge in the 'false' block if the 'false' block has no other
1306 // predecessors. Otherwise, add an unconditional branch to 'false'.
1307 bool FalseBBDead = false;
1308 bool IterIfcvt = true;
1309 bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
1310 if (!isFallThrough) {
1311 // Only merge them if the true block does not fallthrough to the false
1312 // block. By not merging them, we make it possible to iteratively
1313 // ifcvt the blocks.
1314 if (!HasEarlyExit &&
1315 NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough &&
1316 !NextBBI->BB->hasAddressTaken()) {
1317 MergeBlocks(BBI, *NextBBI);
1318 FalseBBDead = true;
1319 } else {
1320 InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
1321 BBI.HasFallThrough = false;
1322 }
1323 // Mixed predicated and unpredicated code. This cannot be iteratively
1324 // predicated.
1325 IterIfcvt = false;
1326 }
1327
1328 RemoveExtraEdges(BBI);
1329
1330 // Update block info. BB can be iteratively if-converted.
1331 if (!IterIfcvt)
1332 BBI.IsDone = true;
1333 InvalidatePreds(BBI.BB);
1334 CvtBBI->IsDone = true;
1335 if (FalseBBDead)
1336 NextBBI->IsDone = true;
1337
1338 // FIXME: Must maintain LiveIns.
1339 return true;
1340 }
1341
1342 /// IfConvertDiamond - If convert a diamond sub-CFG.
1343 ///
IfConvertDiamond(BBInfo & BBI,IfcvtKind Kind,unsigned NumDups1,unsigned NumDups2)1344 bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
1345 unsigned NumDups1, unsigned NumDups2) {
1346 BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
1347 BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
1348 MachineBasicBlock *TailBB = TrueBBI.TrueBB;
1349 // True block must fall through or end with an unanalyzable terminator.
1350 if (!TailBB) {
1351 if (blockAlwaysFallThrough(TrueBBI))
1352 TailBB = FalseBBI.TrueBB;
1353 assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
1354 }
1355
1356 if (TrueBBI.IsDone || FalseBBI.IsDone ||
1357 TrueBBI.BB->pred_size() > 1 ||
1358 FalseBBI.BB->pred_size() > 1) {
1359 // Something has changed. It's no longer safe to predicate these blocks.
1360 BBI.IsAnalyzed = false;
1361 TrueBBI.IsAnalyzed = false;
1362 FalseBBI.IsAnalyzed = false;
1363 return false;
1364 }
1365
1366 if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken())
1367 // Conservatively abort if-conversion if either BB has its address taken.
1368 return false;
1369
1370 // Put the predicated instructions from the 'true' block before the
1371 // instructions from the 'false' block, unless the true block would clobber
1372 // the predicate, in which case, do the opposite.
1373 BBInfo *BBI1 = &TrueBBI;
1374 BBInfo *BBI2 = &FalseBBI;
1375 SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
1376 if (TII->ReverseBranchCondition(RevCond))
1377 llvm_unreachable("Unable to reverse branch condition!");
1378 SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
1379 SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
1380
1381 // Figure out the more profitable ordering.
1382 bool DoSwap = false;
1383 if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
1384 DoSwap = true;
1385 else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
1386 if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
1387 DoSwap = true;
1388 }
1389 if (DoSwap) {
1390 std::swap(BBI1, BBI2);
1391 std::swap(Cond1, Cond2);
1392 }
1393
1394 // Remove the conditional branch from entry to the blocks.
1395 BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
1396
1397 // Initialize liveins to the first BB. These are potentially redefined by
1398 // predicated instructions.
1399 Redefs.init(TRI);
1400 Redefs.addLiveIns(*BBI1->BB);
1401
1402 // Remove the duplicated instructions at the beginnings of both paths.
1403 // Skip dbg_value instructions
1404 MachineBasicBlock::iterator DI1 = BBI1->BB->getFirstNonDebugInstr();
1405 MachineBasicBlock::iterator DI2 = BBI2->BB->getFirstNonDebugInstr();
1406 BBI1->NonPredSize -= NumDups1;
1407 BBI2->NonPredSize -= NumDups1;
1408
1409 // Skip past the dups on each side separately since there may be
1410 // differing dbg_value entries.
1411 for (unsigned i = 0; i < NumDups1; ++DI1) {
1412 if (!DI1->isDebugValue())
1413 ++i;
1414 }
1415 while (NumDups1 != 0) {
1416 ++DI2;
1417 if (!DI2->isDebugValue())
1418 --NumDups1;
1419 }
1420
1421 // Compute a set of registers which must not be killed by instructions in BB1:
1422 // This is everything used+live in BB2 after the duplicated instructions. We
1423 // can compute this set by simulating liveness backwards from the end of BB2.
1424 DontKill.init(TRI);
1425 for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(),
1426 E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) {
1427 DontKill.stepBackward(*I);
1428 }
1429
1430 for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;
1431 ++I) {
1432 SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
1433 Redefs.stepForward(*I, IgnoredClobbers);
1434 }
1435 BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
1436 BBI2->BB->erase(BBI2->BB->begin(), DI2);
1437
1438 // Remove branch from the 'true' block, unless it was not analyzable.
1439 // Non-analyzable branches need to be preserved, since in such cases,
1440 // the CFG structure is not an actual diamond (the join block may not
1441 // be present).
1442 if (BBI1->IsBrAnalyzable)
1443 BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
1444 // Remove duplicated instructions.
1445 DI1 = BBI1->BB->end();
1446 for (unsigned i = 0; i != NumDups2; ) {
1447 // NumDups2 only counted non-dbg_value instructions, so this won't
1448 // run off the head of the list.
1449 assert (DI1 != BBI1->BB->begin());
1450 --DI1;
1451 // skip dbg_value instructions
1452 if (!DI1->isDebugValue())
1453 ++i;
1454 }
1455 BBI1->BB->erase(DI1, BBI1->BB->end());
1456
1457 // Kill flags in the true block for registers living into the false block
1458 // must be removed.
1459 RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);
1460
1461 // Remove 'false' block branch (unless it was not analyzable), and find
1462 // the last instruction to predicate.
1463 if (BBI2->IsBrAnalyzable)
1464 BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
1465 DI2 = BBI2->BB->end();
1466 while (NumDups2 != 0) {
1467 // NumDups2 only counted non-dbg_value instructions, so this won't
1468 // run off the head of the list.
1469 assert (DI2 != BBI2->BB->begin());
1470 --DI2;
1471 // skip dbg_value instructions
1472 if (!DI2->isDebugValue())
1473 --NumDups2;
1474 }
1475
1476 // Remember which registers would later be defined by the false block.
1477 // This allows us not to predicate instructions in the true block that would
1478 // later be re-defined. That is, rather than
1479 // subeq r0, r1, #1
1480 // addne r0, r1, #1
1481 // generate:
1482 // sub r0, r1, #1
1483 // addne r0, r1, #1
1484 SmallSet<unsigned, 4> RedefsByFalse;
1485 SmallSet<unsigned, 4> ExtUses;
1486 if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
1487 for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
1488 if (FI->isDebugValue())
1489 continue;
1490 SmallVector<unsigned, 4> Defs;
1491 for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
1492 const MachineOperand &MO = FI->getOperand(i);
1493 if (!MO.isReg())
1494 continue;
1495 unsigned Reg = MO.getReg();
1496 if (!Reg)
1497 continue;
1498 if (MO.isDef()) {
1499 Defs.push_back(Reg);
1500 } else if (!RedefsByFalse.count(Reg)) {
1501 // These are defined before ctrl flow reach the 'false' instructions.
1502 // They cannot be modified by the 'true' instructions.
1503 for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
1504 SubRegs.isValid(); ++SubRegs)
1505 ExtUses.insert(*SubRegs);
1506 }
1507 }
1508
1509 for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1510 unsigned Reg = Defs[i];
1511 if (!ExtUses.count(Reg)) {
1512 for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
1513 SubRegs.isValid(); ++SubRegs)
1514 RedefsByFalse.insert(*SubRegs);
1515 }
1516 }
1517 }
1518 }
1519
1520 // Predicate the 'true' block.
1521 PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse);
1522
1523 // After predicating BBI1, if there is a predicated terminator in BBI1 and
1524 // a non-predicated in BBI2, then we don't want to predicate the one from
1525 // BBI2. The reason is that if we merged these blocks, we would end up with
1526 // two predicated terminators in the same block.
1527 if (!BBI2->BB->empty() && (DI2 == BBI2->BB->end())) {
1528 MachineBasicBlock::iterator BBI1T = BBI1->BB->getFirstTerminator();
1529 MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator();
1530 if (BBI1T != BBI1->BB->end() && TII->isPredicated(*BBI1T) &&
1531 BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T))
1532 --DI2;
1533 }
1534
1535 // Predicate the 'false' block.
1536 PredicateBlock(*BBI2, DI2, *Cond2);
1537
1538 // Merge the true block into the entry of the diamond.
1539 MergeBlocks(BBI, *BBI1, TailBB == nullptr);
1540 MergeBlocks(BBI, *BBI2, TailBB == nullptr);
1541
1542 // If the if-converted block falls through or unconditionally branches into
1543 // the tail block, and the tail block does not have other predecessors, then
1544 // fold the tail block in as well. Otherwise, unless it falls through to the
1545 // tail, add a unconditional branch to it.
1546 if (TailBB) {
1547 BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
1548 bool CanMergeTail = !TailBBI.HasFallThrough &&
1549 !TailBBI.BB->hasAddressTaken();
1550 // The if-converted block can still have a predicated terminator
1551 // (e.g. a predicated return). If that is the case, we cannot merge
1552 // it with the tail block.
1553 MachineBasicBlock::const_iterator TI = BBI.BB->getFirstTerminator();
1554 if (TI != BBI.BB->end() && TII->isPredicated(*TI))
1555 CanMergeTail = false;
1556 // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
1557 // check if there are any other predecessors besides those.
1558 unsigned NumPreds = TailBB->pred_size();
1559 if (NumPreds > 1)
1560 CanMergeTail = false;
1561 else if (NumPreds == 1 && CanMergeTail) {
1562 MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
1563 if (*PI != BBI1->BB && *PI != BBI2->BB)
1564 CanMergeTail = false;
1565 }
1566 if (CanMergeTail) {
1567 MergeBlocks(BBI, TailBBI);
1568 TailBBI.IsDone = true;
1569 } else {
1570 BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
1571 InsertUncondBranch(BBI.BB, TailBB, TII);
1572 BBI.HasFallThrough = false;
1573 }
1574 }
1575
1576 // RemoveExtraEdges won't work if the block has an unanalyzable branch,
1577 // which can happen here if TailBB is unanalyzable and is merged, so
1578 // explicitly remove BBI1 and BBI2 as successors.
1579 BBI.BB->removeSuccessor(BBI1->BB);
1580 BBI.BB->removeSuccessor(BBI2->BB, true);
1581 RemoveExtraEdges(BBI);
1582
1583 // Update block info.
1584 BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
1585 InvalidatePreds(BBI.BB);
1586
1587 // FIXME: Must maintain LiveIns.
1588 return true;
1589 }
1590
MaySpeculate(const MachineInstr & MI,SmallSet<unsigned,4> & LaterRedefs)1591 static bool MaySpeculate(const MachineInstr &MI,
1592 SmallSet<unsigned, 4> &LaterRedefs) {
1593 bool SawStore = true;
1594 if (!MI.isSafeToMove(nullptr, SawStore))
1595 return false;
1596
1597 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1598 const MachineOperand &MO = MI.getOperand(i);
1599 if (!MO.isReg())
1600 continue;
1601 unsigned Reg = MO.getReg();
1602 if (!Reg)
1603 continue;
1604 if (MO.isDef() && !LaterRedefs.count(Reg))
1605 return false;
1606 }
1607
1608 return true;
1609 }
1610
1611 /// PredicateBlock - Predicate instructions from the start of the block to the
1612 /// specified end with the specified condition.
PredicateBlock(BBInfo & BBI,MachineBasicBlock::iterator E,SmallVectorImpl<MachineOperand> & Cond,SmallSet<unsigned,4> * LaterRedefs)1613 void IfConverter::PredicateBlock(BBInfo &BBI,
1614 MachineBasicBlock::iterator E,
1615 SmallVectorImpl<MachineOperand> &Cond,
1616 SmallSet<unsigned, 4> *LaterRedefs) {
1617 bool AnyUnpred = false;
1618 bool MaySpec = LaterRedefs != nullptr;
1619 for (MachineInstr &I : llvm::make_range(BBI.BB->begin(), E)) {
1620 if (I.isDebugValue() || TII->isPredicated(I))
1621 continue;
1622 // It may be possible not to predicate an instruction if it's the 'true'
1623 // side of a diamond and the 'false' side may re-define the instruction's
1624 // defs.
1625 if (MaySpec && MaySpeculate(I, *LaterRedefs)) {
1626 AnyUnpred = true;
1627 continue;
1628 }
1629 // If any instruction is predicated, then every instruction after it must
1630 // be predicated.
1631 MaySpec = false;
1632 if (!TII->PredicateInstruction(I, Cond)) {
1633 #ifndef NDEBUG
1634 dbgs() << "Unable to predicate " << I << "!\n";
1635 #endif
1636 llvm_unreachable(nullptr);
1637 }
1638
1639 // If the predicated instruction now redefines a register as the result of
1640 // if-conversion, add an implicit kill.
1641 UpdatePredRedefs(I, Redefs);
1642 }
1643
1644 BBI.Predicate.append(Cond.begin(), Cond.end());
1645
1646 BBI.IsAnalyzed = false;
1647 BBI.NonPredSize = 0;
1648
1649 ++NumIfConvBBs;
1650 if (AnyUnpred)
1651 ++NumUnpred;
1652 }
1653
1654 /// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
1655 /// the destination block. Skip end of block branches if IgnoreBr is true.
CopyAndPredicateBlock(BBInfo & ToBBI,BBInfo & FromBBI,SmallVectorImpl<MachineOperand> & Cond,bool IgnoreBr)1656 void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
1657 SmallVectorImpl<MachineOperand> &Cond,
1658 bool IgnoreBr) {
1659 MachineFunction &MF = *ToBBI.BB->getParent();
1660
1661 for (auto &I : *FromBBI.BB) {
1662 // Do not copy the end of the block branches.
1663 if (IgnoreBr && I.isBranch())
1664 break;
1665
1666 MachineInstr *MI = MF.CloneMachineInstr(&I);
1667 ToBBI.BB->insert(ToBBI.BB->end(), MI);
1668 ToBBI.NonPredSize++;
1669 unsigned ExtraPredCost = TII->getPredicationCost(I);
1670 unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
1671 if (NumCycles > 1)
1672 ToBBI.ExtraCost += NumCycles-1;
1673 ToBBI.ExtraCost2 += ExtraPredCost;
1674
1675 if (!TII->isPredicated(I) && !MI->isDebugValue()) {
1676 if (!TII->PredicateInstruction(*MI, Cond)) {
1677 #ifndef NDEBUG
1678 dbgs() << "Unable to predicate " << I << "!\n";
1679 #endif
1680 llvm_unreachable(nullptr);
1681 }
1682 }
1683
1684 // If the predicated instruction now redefines a register as the result of
1685 // if-conversion, add an implicit kill.
1686 UpdatePredRedefs(*MI, Redefs);
1687
1688 // Some kill flags may not be correct anymore.
1689 if (!DontKill.empty())
1690 RemoveKills(*MI, DontKill);
1691 }
1692
1693 if (!IgnoreBr) {
1694 std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
1695 FromBBI.BB->succ_end());
1696 MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
1697 MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
1698
1699 for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
1700 MachineBasicBlock *Succ = Succs[i];
1701 // Fallthrough edge can't be transferred.
1702 if (Succ == FallThrough)
1703 continue;
1704 ToBBI.BB->addSuccessor(Succ);
1705 }
1706 }
1707
1708 ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
1709 ToBBI.Predicate.append(Cond.begin(), Cond.end());
1710
1711 ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
1712 ToBBI.IsAnalyzed = false;
1713
1714 ++NumDupBBs;
1715 }
1716
1717 /// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
1718 /// This will leave FromBB as an empty block, so remove all of its
1719 /// successor edges except for the fall-through edge. If AddEdges is true,
1720 /// i.e., when FromBBI's branch is being moved, add those successor edges to
1721 /// ToBBI.
MergeBlocks(BBInfo & ToBBI,BBInfo & FromBBI,bool AddEdges)1722 void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
1723 assert(!FromBBI.BB->hasAddressTaken() &&
1724 "Removing a BB whose address is taken!");
1725
1726 // In case FromBBI.BB contains terminators (e.g. return instruction),
1727 // first move the non-terminator instructions, then the terminators.
1728 MachineBasicBlock::iterator FromTI = FromBBI.BB->getFirstTerminator();
1729 MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator();
1730 ToBBI.BB->splice(ToTI, FromBBI.BB, FromBBI.BB->begin(), FromTI);
1731
1732 // If FromBB has non-predicated terminator we should copy it at the end.
1733 if (FromTI != FromBBI.BB->end() && !TII->isPredicated(*FromTI))
1734 ToTI = ToBBI.BB->end();
1735 ToBBI.BB->splice(ToTI, FromBBI.BB, FromTI, FromBBI.BB->end());
1736
1737 // Force normalizing the successors' probabilities of ToBBI.BB to convert all
1738 // unknown probabilities into known ones.
1739 // FIXME: This usage is too tricky and in the future we would like to
1740 // eliminate all unknown probabilities in MBB.
1741 ToBBI.BB->normalizeSuccProbs();
1742
1743 SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(),
1744 FromBBI.BB->succ_end());
1745 MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
1746 MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
1747 // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when
1748 // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB.
1749 auto To2FromProb = BranchProbability::getZero();
1750 if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) {
1751 To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB);
1752 // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the
1753 // edge probability being merged to other edges when this edge is removed
1754 // later.
1755 ToBBI.BB->setSuccProbability(
1756 std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB),
1757 BranchProbability::getZero());
1758 }
1759
1760 for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) {
1761 MachineBasicBlock *Succ = FromSuccs[i];
1762 // Fallthrough edge can't be transferred.
1763 if (Succ == FallThrough)
1764 continue;
1765
1766 auto NewProb = BranchProbability::getZero();
1767 if (AddEdges) {
1768 // Calculate the edge probability for the edge from ToBBI.BB to Succ,
1769 // which is a portion of the edge probability from FromBBI.BB to Succ. The
1770 // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if
1771 // FromBBI is a successor of ToBBI.BB. See comment below for excepion).
1772 NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ);
1773
1774 // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This
1775 // only happens when if-converting a diamond CFG and FromBBI.BB is the
1776 // tail BB. In this case FromBBI.BB post-dominates ToBBI.BB and hence we
1777 // could just use the probabilities on FromBBI.BB's out-edges when adding
1778 // new successors.
1779 if (!To2FromProb.isZero())
1780 NewProb *= To2FromProb;
1781 }
1782
1783 FromBBI.BB->removeSuccessor(Succ);
1784
1785 if (AddEdges) {
1786 // If the edge from ToBBI.BB to Succ already exists, update the
1787 // probability of this edge by adding NewProb to it. An example is shown
1788 // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we
1789 // don't have to set C as A's successor as it already is. We only need to
1790 // update the edge probability on A->C. Note that B will not be
1791 // immediately removed from A's successors. It is possible that B->D is
1792 // not removed either if D is a fallthrough of B. Later the edge A->D
1793 // (generated here) and B->D will be combined into one edge. To maintain
1794 // correct edge probability of this combined edge, we need to set the edge
1795 // probability of A->B to zero, which is already done above. The edge
1796 // probability on A->D is calculated by scaling the original probability
1797 // on A->B by the probability of B->D.
1798 //
1799 // Before ifcvt: After ifcvt (assume B->D is kept):
1800 //
1801 // A A
1802 // /| /|\
1803 // / B / B|
1804 // | /| | ||
1805 // |/ | | |/
1806 // C D C D
1807 //
1808 if (ToBBI.BB->isSuccessor(Succ))
1809 ToBBI.BB->setSuccProbability(
1810 std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ),
1811 MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb);
1812 else
1813 ToBBI.BB->addSuccessor(Succ, NewProb);
1814 }
1815 }
1816
1817 // Now FromBBI always falls through to the next block!
1818 if (NBB && !FromBBI.BB->isSuccessor(NBB))
1819 FromBBI.BB->addSuccessor(NBB);
1820
1821 // Normalize the probabilities of ToBBI.BB's successors with all adjustment
1822 // we've done above.
1823 ToBBI.BB->normalizeSuccProbs();
1824
1825 ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
1826 FromBBI.Predicate.clear();
1827
1828 ToBBI.NonPredSize += FromBBI.NonPredSize;
1829 ToBBI.ExtraCost += FromBBI.ExtraCost;
1830 ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
1831 FromBBI.NonPredSize = 0;
1832 FromBBI.ExtraCost = 0;
1833 FromBBI.ExtraCost2 = 0;
1834
1835 ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
1836 ToBBI.HasFallThrough = FromBBI.HasFallThrough;
1837 ToBBI.IsAnalyzed = false;
1838 FromBBI.IsAnalyzed = false;
1839 }
1840
1841 FunctionPass *
createIfConverter(std::function<bool (const Function &)> Ftor)1842 llvm::createIfConverter(std::function<bool(const Function &)> Ftor) {
1843 return new IfConverter(std::move(Ftor));
1844 }
1845