1 //===- SpeculateAroundPHIs.cpp --------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
11 #include "llvm/ADT/PostOrderIterator.h"
12 #include "llvm/ADT/Sequence.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Analysis/TargetTransformInfo.h"
16 #include "llvm/Analysis/ValueTracking.h"
17 #include "llvm/IR/BasicBlock.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
23
24 using namespace llvm;
25
26 #define DEBUG_TYPE "spec-phis"
27
28 STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around");
29 STATISTIC(NumEdgesSplit,
30 "Number of critical edges which were split for speculation");
31 STATISTIC(NumSpeculatedInstructions,
32 "Number of instructions we speculated around the PHI nodes");
33 STATISTIC(NumNewRedundantInstructions,
34 "Number of new, redundant instructions inserted");
35
36 /// Check wether speculating the users of a PHI node around the PHI
37 /// will be safe.
38 ///
39 /// This checks both that all of the users are safe and also that all of their
40 /// operands are either recursively safe or already available along an incoming
41 /// edge to the PHI.
42 ///
43 /// This routine caches both all the safe nodes explored in `PotentialSpecSet`
44 /// and the chain of nodes that definitively reach any unsafe node in
45 /// `UnsafeSet`. By preserving these between repeated calls to this routine for
46 /// PHIs in the same basic block, the exploration here can be reused. However,
47 /// these caches must no be reused for PHIs in a different basic block as they
48 /// reflect what is available along incoming edges.
49 static bool
isSafeToSpeculatePHIUsers(PHINode & PN,DominatorTree & DT,SmallPtrSetImpl<Instruction * > & PotentialSpecSet,SmallPtrSetImpl<Instruction * > & UnsafeSet)50 isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
51 SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
52 SmallPtrSetImpl<Instruction *> &UnsafeSet) {
53 auto *PhiBB = PN.getParent();
54 SmallPtrSet<Instruction *, 4> Visited;
55 SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
56
57 // Walk each user of the PHI node.
58 for (Use &U : PN.uses()) {
59 auto *UI = cast<Instruction>(U.getUser());
60
61 // Ensure the use post-dominates the PHI node. This ensures that, in the
62 // absence of unwinding, the use will actually be reached.
63 // FIXME: We use a blunt hammer of requiring them to be in the same basic
64 // block. We should consider using actual post-dominance here in the
65 // future.
66 if (UI->getParent() != PhiBB) {
67 LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI << "\n");
68 return false;
69 }
70
71 // FIXME: This check is much too conservative. We're not going to move these
72 // instructions onto new dynamic paths through the program unless there is
73 // a call instruction between the use and the PHI node. And memory isn't
74 // changing unless there is a store in that same sequence. We should
75 // probably change this to do at least a limited scan of the intervening
76 // instructions and allow handling stores in easily proven safe cases.
77 if (mayBeMemoryDependent(*UI)) {
78 LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI << "\n");
79 return false;
80 }
81
82 // Now do a depth-first search of everything these users depend on to make
83 // sure they are transitively safe. This is a depth-first search, but we
84 // check nodes in preorder to minimize the amount of checking.
85 Visited.insert(UI);
86 DFSStack.push_back({UI, UI->value_op_begin()});
87 do {
88 User::value_op_iterator OpIt;
89 std::tie(UI, OpIt) = DFSStack.pop_back_val();
90
91 while (OpIt != UI->value_op_end()) {
92 auto *OpI = dyn_cast<Instruction>(*OpIt);
93 // Increment to the next operand for whenever we continue.
94 ++OpIt;
95 // No need to visit non-instructions, which can't form dependencies.
96 if (!OpI)
97 continue;
98
99 // Now do the main pre-order checks that this operand is a viable
100 // dependency of something we want to speculate.
101
102 // First do a few checks for instructions that won't require
103 // speculation at all because they are trivially available on the
104 // incoming edge (either through dominance or through an incoming value
105 // to a PHI).
106 //
107 // The cases in the current block will be trivially dominated by the
108 // edge.
109 auto *ParentBB = OpI->getParent();
110 if (ParentBB == PhiBB) {
111 if (isa<PHINode>(OpI)) {
112 // We can trivially map through phi nodes in the same block.
113 continue;
114 }
115 } else if (DT.dominates(ParentBB, PhiBB)) {
116 // Instructions from dominating blocks are already available.
117 continue;
118 }
119
120 // Once we know that we're considering speculating the operand, check
121 // if we've already explored this subgraph and found it to be safe.
122 if (PotentialSpecSet.count(OpI))
123 continue;
124
125 // If we've already explored this subgraph and found it unsafe, bail.
126 // If when we directly test whether this is safe it fails, bail.
127 if (UnsafeSet.count(OpI) || ParentBB != PhiBB ||
128 mayBeMemoryDependent(*OpI)) {
129 LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: "
130 << *OpI << "\n");
131 // Record the stack of instructions which reach this node as unsafe
132 // so we prune subsequent searches.
133 UnsafeSet.insert(OpI);
134 for (auto &StackPair : DFSStack) {
135 Instruction *I = StackPair.first;
136 UnsafeSet.insert(I);
137 }
138 return false;
139 }
140
141 // Skip any operands we're already recursively checking.
142 if (!Visited.insert(OpI).second)
143 continue;
144
145 // Push onto the stack and descend. We can directly continue this
146 // loop when ascending.
147 DFSStack.push_back({UI, OpIt});
148 UI = OpI;
149 OpIt = OpI->value_op_begin();
150 }
151
152 // This node and all its operands are safe. Go ahead and cache that for
153 // reuse later.
154 PotentialSpecSet.insert(UI);
155
156 // Continue with the next node on the stack.
157 } while (!DFSStack.empty());
158 }
159
160 #ifndef NDEBUG
161 // Every visited operand should have been marked as safe for speculation at
162 // this point. Verify this and return success.
163 for (auto *I : Visited)
164 assert(PotentialSpecSet.count(I) &&
165 "Failed to mark a visited instruction as safe!");
166 #endif
167 return true;
168 }
169
170 /// Check whether, in isolation, a given PHI node is both safe and profitable
171 /// to speculate users around.
172 ///
173 /// This handles checking whether there are any constant operands to a PHI
174 /// which could represent a useful speculation candidate, whether the users of
175 /// the PHI are safe to speculate including all their transitive dependencies,
176 /// and whether after speculation there will be some cost savings (profit) to
177 /// folding the operands into the users of the PHI node. Returns true if both
178 /// safe and profitable with relevant cost savings updated in the map and with
179 /// an update to the `PotentialSpecSet`. Returns false if either safety or
180 /// profitability are absent. Some new entries may be made to the
181 /// `PotentialSpecSet` even when this routine returns false, but they remain
182 /// conservatively correct.
183 ///
184 /// The profitability check here is a local one, but it checks this in an
185 /// interesting way. Beyond checking that the total cost of materializing the
186 /// constants will be less than the cost of folding them into their users, it
187 /// also checks that no one incoming constant will have a higher cost when
188 /// folded into its users rather than materialized. This higher cost could
189 /// result in a dynamic *path* that is more expensive even when the total cost
190 /// is lower. Currently, all of the interesting cases where this optimization
191 /// should fire are ones where it is a no-loss operation in this sense. If we
192 /// ever want to be more aggressive here, we would need to balance the
193 /// different incoming edges' cost by looking at their respective
194 /// probabilities.
isSafeAndProfitableToSpeculateAroundPHI(PHINode & PN,SmallDenseMap<PHINode *,int,16> & CostSavingsMap,SmallPtrSetImpl<Instruction * > & PotentialSpecSet,SmallPtrSetImpl<Instruction * > & UnsafeSet,DominatorTree & DT,TargetTransformInfo & TTI)195 static bool isSafeAndProfitableToSpeculateAroundPHI(
196 PHINode &PN, SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
197 SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
198 SmallPtrSetImpl<Instruction *> &UnsafeSet, DominatorTree &DT,
199 TargetTransformInfo &TTI) {
200 // First see whether there is any cost savings to speculating around this
201 // PHI, and build up a map of the constant inputs to how many times they
202 // occur.
203 bool NonFreeMat = false;
204 struct CostsAndCount {
205 int MatCost = TargetTransformInfo::TCC_Free;
206 int FoldedCost = TargetTransformInfo::TCC_Free;
207 int Count = 0;
208 };
209 SmallDenseMap<ConstantInt *, CostsAndCount, 16> CostsAndCounts;
210 SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks;
211 for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) {
212 auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i));
213 if (!IncomingC)
214 continue;
215
216 // Only visit each incoming edge with a constant input once.
217 if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second)
218 continue;
219
220 auto InsertResult = CostsAndCounts.insert({IncomingC, {}});
221 // Count how many edges share a given incoming costant.
222 ++InsertResult.first->second.Count;
223 // Only compute the cost the first time we see a particular constant.
224 if (!InsertResult.second)
225 continue;
226
227 int &MatCost = InsertResult.first->second.MatCost;
228 MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType());
229 NonFreeMat |= MatCost != TTI.TCC_Free;
230 }
231 if (!NonFreeMat) {
232 LLVM_DEBUG(dbgs() << " Free: " << PN << "\n");
233 // No profit in free materialization.
234 return false;
235 }
236
237 // Now check that the uses of this PHI can actually be speculated,
238 // otherwise we'll still have to materialize the PHI value.
239 if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) {
240 LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN << "\n");
241 return false;
242 }
243
244 // Compute how much (if any) savings are available by speculating around this
245 // PHI.
246 for (Use &U : PN.uses()) {
247 auto *UserI = cast<Instruction>(U.getUser());
248 // Now check whether there is any savings to folding the incoming constants
249 // into this use.
250 unsigned Idx = U.getOperandNo();
251
252 // If we have a binary operator that is commutative, an actual constant
253 // operand would end up on the RHS, so pretend the use of the PHI is on the
254 // RHS.
255 //
256 // Technically, this is a bit weird if *both* operands are PHIs we're
257 // speculating. But if that is the case, giving an "optimistic" cost isn't
258 // a bad thing because after speculation it will constant fold. And
259 // moreover, such cases should likely have been constant folded already by
260 // some other pass, so we shouldn't worry about "modeling" them terribly
261 // accurately here. Similarly, if the other operand is a constant, it still
262 // seems fine to be "optimistic" in our cost modeling, because when the
263 // incoming operand from the PHI node is also a constant, we will end up
264 // constant folding.
265 if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1)
266 // Assume we will commute the constant to the RHS to be canonical.
267 Idx = 1;
268
269 // Get the intrinsic ID if this user is an intrinsic.
270 Intrinsic::ID IID = Intrinsic::not_intrinsic;
271 if (auto *UserII = dyn_cast<IntrinsicInst>(UserI))
272 IID = UserII->getIntrinsicID();
273
274 for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) {
275 ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first;
276 int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
277 int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
278 if (IID)
279 FoldedCost += TTI.getIntImmCost(IID, Idx, IncomingC->getValue(),
280 IncomingC->getType());
281 else
282 FoldedCost +=
283 TTI.getIntImmCost(UserI->getOpcode(), Idx, IncomingC->getValue(),
284 IncomingC->getType());
285
286 // If we accumulate more folded cost for this incoming constant than
287 // materialized cost, then we'll regress any edge with this constant so
288 // just bail. We're only interested in cases where folding the incoming
289 // constants is at least break-even on all paths.
290 if (FoldedCost > MatCost) {
291 LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC
292 << "\n"
293 " Materializing cost: "
294 << MatCost
295 << "\n"
296 " Accumulated folded cost: "
297 << FoldedCost << "\n");
298 return false;
299 }
300 }
301 }
302
303 // Compute the total cost savings afforded by this PHI node.
304 int TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free;
305 for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) {
306 int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
307 int FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
308 int Count = IncomingConstantAndCostsAndCount.second.Count;
309
310 TotalMatCost += MatCost * Count;
311 TotalFoldedCost += FoldedCost * Count;
312 }
313 assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is "
314 "less that its materialized cost, "
315 "the sum must be as well.");
316
317 LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost - TotalFoldedCost)
318 << ": " << PN << "\n");
319 CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost;
320 return true;
321 }
322
323 /// Simple helper to walk all the users of a list of phis depth first, and call
324 /// a visit function on each one in post-order.
325 ///
326 /// All of the PHIs should be in the same basic block, and this is primarily
327 /// used to make a single depth-first walk across their collective users
328 /// without revisiting any subgraphs. Callers should provide a fast, idempotent
329 /// callable to test whether a node has been visited and the more important
330 /// callable to actually visit a particular node.
331 ///
332 /// Depth-first and postorder here refer to the *operand* graph -- we start
333 /// from a collection of users of PHI nodes and walk "up" the operands
334 /// depth-first.
335 template <typename IsVisitedT, typename VisitT>
visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode * > PNs,IsVisitedT IsVisited,VisitT Visit)336 static void visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode *> PNs,
337 IsVisitedT IsVisited,
338 VisitT Visit) {
339 SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
340 for (auto *PN : PNs)
341 for (Use &U : PN->uses()) {
342 auto *UI = cast<Instruction>(U.getUser());
343 if (IsVisited(UI))
344 // Already visited this user, continue across the roots.
345 continue;
346
347 // Otherwise, walk the operand graph depth-first and visit each
348 // dependency in postorder.
349 DFSStack.push_back({UI, UI->value_op_begin()});
350 do {
351 User::value_op_iterator OpIt;
352 std::tie(UI, OpIt) = DFSStack.pop_back_val();
353 while (OpIt != UI->value_op_end()) {
354 auto *OpI = dyn_cast<Instruction>(*OpIt);
355 // Increment to the next operand for whenever we continue.
356 ++OpIt;
357 // No need to visit non-instructions, which can't form dependencies,
358 // or instructions outside of our potential dependency set that we
359 // were given. Finally, if we've already visited the node, continue
360 // to the next.
361 if (!OpI || IsVisited(OpI))
362 continue;
363
364 // Push onto the stack and descend. We can directly continue this
365 // loop when ascending.
366 DFSStack.push_back({UI, OpIt});
367 UI = OpI;
368 OpIt = OpI->value_op_begin();
369 }
370
371 // Finished visiting children, visit this node.
372 assert(!IsVisited(UI) && "Should not have already visited a node!");
373 Visit(UI);
374 } while (!DFSStack.empty());
375 }
376 }
377
378 /// Find profitable PHIs to speculate.
379 ///
380 /// For a PHI node to be profitable, we need the cost of speculating its users
381 /// (and their dependencies) to not exceed the savings of folding the PHI's
382 /// constant operands into the speculated users.
383 ///
384 /// Computing this is surprisingly challenging. Because users of two different
385 /// PHI nodes can depend on each other or on common other instructions, it may
386 /// be profitable to speculate two PHI nodes together even though neither one
387 /// in isolation is profitable. The straightforward way to find all the
388 /// profitable PHIs would be to check each combination of PHIs' cost, but this
389 /// is exponential in complexity.
390 ///
391 /// Even if we assume that we only care about cases where we can consider each
392 /// PHI node in isolation (rather than considering cases where none are
393 /// profitable in isolation but some subset are profitable as a set), we still
394 /// have a challenge. The obvious way to find all individually profitable PHIs
395 /// is to iterate until reaching a fixed point, but this will be quadratic in
396 /// complexity. =/
397 ///
398 /// This code currently uses a linear-to-compute order for a greedy approach.
399 /// It won't find cases where a set of PHIs must be considered together, but it
400 /// handles most cases of order dependence without quadratic iteration. The
401 /// specific order used is the post-order across the operand DAG. When the last
402 /// user of a PHI is visited in this postorder walk, we check it for
403 /// profitability.
404 ///
405 /// There is an orthogonal extra complexity to all of this: computing the cost
406 /// itself can easily become a linear computation making everything again (at
407 /// best) quadratic. Using a postorder over the operand graph makes it
408 /// particularly easy to avoid this through dynamic programming. As we do the
409 /// postorder walk, we build the transitive cost of that subgraph. It is also
410 /// straightforward to then update these costs when we mark a PHI for
411 /// speculation so that subsequent PHIs don't re-pay the cost of already
412 /// speculated instructions.
413 static SmallVector<PHINode *, 16>
findProfitablePHIs(ArrayRef<PHINode * > PNs,const SmallDenseMap<PHINode *,int,16> & CostSavingsMap,const SmallPtrSetImpl<Instruction * > & PotentialSpecSet,int NumPreds,DominatorTree & DT,TargetTransformInfo & TTI)414 findProfitablePHIs(ArrayRef<PHINode *> PNs,
415 const SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
416 const SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
417 int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI) {
418 SmallVector<PHINode *, 16> SpecPNs;
419
420 // First, establish a reverse mapping from immediate users of the PHI nodes
421 // to the nodes themselves, and count how many users each PHI node has in
422 // a way we can update while processing them.
423 SmallDenseMap<Instruction *, TinyPtrVector<PHINode *>, 16> UserToPNMap;
424 SmallDenseMap<PHINode *, int, 16> PNUserCountMap;
425 SmallPtrSet<Instruction *, 16> UserSet;
426 for (auto *PN : PNs) {
427 assert(UserSet.empty() && "Must start with an empty user set!");
428 for (Use &U : PN->uses())
429 UserSet.insert(cast<Instruction>(U.getUser()));
430 PNUserCountMap[PN] = UserSet.size();
431 for (auto *UI : UserSet)
432 UserToPNMap.insert({UI, {}}).first->second.push_back(PN);
433 UserSet.clear();
434 }
435
436 // Now do a DFS across the operand graph of the users, computing cost as we
437 // go and when all costs for a given PHI are known, checking that PHI for
438 // profitability.
439 SmallDenseMap<Instruction *, int, 16> SpecCostMap;
440 visitPHIUsersAndDepsInPostOrder(
441 PNs,
442 /*IsVisited*/
443 [&](Instruction *I) {
444 // We consider anything that isn't potentially speculated to be
445 // "visited" as it is already handled. Similarly, anything that *is*
446 // potentially speculated but for which we have an entry in our cost
447 // map, we're done.
448 return !PotentialSpecSet.count(I) || SpecCostMap.count(I);
449 },
450 /*Visit*/
451 [&](Instruction *I) {
452 // We've fully visited the operands, so sum their cost with this node
453 // and update the cost map.
454 int Cost = TTI.TCC_Free;
455 for (Value *OpV : I->operand_values())
456 if (auto *OpI = dyn_cast<Instruction>(OpV)) {
457 auto CostMapIt = SpecCostMap.find(OpI);
458 if (CostMapIt != SpecCostMap.end())
459 Cost += CostMapIt->second;
460 }
461 Cost += TTI.getUserCost(I);
462 bool Inserted = SpecCostMap.insert({I, Cost}).second;
463 (void)Inserted;
464 assert(Inserted && "Must not re-insert a cost during the DFS!");
465
466 // Now check if this node had a corresponding PHI node using it. If so,
467 // we need to decrement the outstanding user count for it.
468 auto UserPNsIt = UserToPNMap.find(I);
469 if (UserPNsIt == UserToPNMap.end())
470 return;
471 auto &UserPNs = UserPNsIt->second;
472 auto UserPNsSplitIt = std::stable_partition(
473 UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) {
474 int &PNUserCount = PNUserCountMap.find(UserPN)->second;
475 assert(
476 PNUserCount > 0 &&
477 "Should never re-visit a PN after its user count hits zero!");
478 --PNUserCount;
479 return PNUserCount != 0;
480 });
481
482 // FIXME: Rather than one at a time, we should sum the savings as the
483 // cost will be completely shared.
484 SmallVector<Instruction *, 16> SpecWorklist;
485 for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) {
486 int SpecCost = TTI.TCC_Free;
487 for (Use &U : PN->uses())
488 SpecCost +=
489 SpecCostMap.find(cast<Instruction>(U.getUser()))->second;
490 SpecCost *= (NumPreds - 1);
491 // When the user count of a PHI node hits zero, we should check its
492 // profitability. If profitable, we should mark it for speculation
493 // and zero out the cost of everything it depends on.
494 int CostSavings = CostSavingsMap.find(PN)->second;
495 if (SpecCost > CostSavings) {
496 LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN
497 << "\n"
498 " Cost savings: "
499 << CostSavings
500 << "\n"
501 " Speculation cost: "
502 << SpecCost << "\n");
503 continue;
504 }
505
506 // We're going to speculate this user-associated PHI. Copy it out and
507 // add its users to the worklist to update their cost.
508 SpecPNs.push_back(PN);
509 for (Use &U : PN->uses()) {
510 auto *UI = cast<Instruction>(U.getUser());
511 auto CostMapIt = SpecCostMap.find(UI);
512 if (CostMapIt->second == 0)
513 continue;
514 // Zero out this cost entry to avoid duplicates.
515 CostMapIt->second = 0;
516 SpecWorklist.push_back(UI);
517 }
518 }
519
520 // Now walk all the operands of the users in the worklist transitively
521 // to zero out all the memoized costs.
522 while (!SpecWorklist.empty()) {
523 Instruction *SpecI = SpecWorklist.pop_back_val();
524 assert(SpecCostMap.find(SpecI)->second == 0 &&
525 "Didn't zero out a cost!");
526
527 // Walk the operands recursively to zero out their cost as well.
528 for (auto *OpV : SpecI->operand_values()) {
529 auto *OpI = dyn_cast<Instruction>(OpV);
530 if (!OpI)
531 continue;
532 auto CostMapIt = SpecCostMap.find(OpI);
533 if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0)
534 continue;
535 CostMapIt->second = 0;
536 SpecWorklist.push_back(OpI);
537 }
538 }
539 });
540
541 return SpecPNs;
542 }
543
544 /// Speculate users around a set of PHI nodes.
545 ///
546 /// This routine does the actual speculation around a set of PHI nodes where we
547 /// have determined this to be both safe and profitable.
548 ///
549 /// This routine handles any spliting of critical edges necessary to create
550 /// a safe block to speculate into as well as cloning the instructions and
551 /// rewriting all uses.
speculatePHIs(ArrayRef<PHINode * > SpecPNs,SmallPtrSetImpl<Instruction * > & PotentialSpecSet,SmallSetVector<BasicBlock *,16> & PredSet,DominatorTree & DT)552 static void speculatePHIs(ArrayRef<PHINode *> SpecPNs,
553 SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
554 SmallSetVector<BasicBlock *, 16> &PredSet,
555 DominatorTree &DT) {
556 LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs.size() << " PHIs!\n");
557 NumPHIsSpeculated += SpecPNs.size();
558
559 // Split any critical edges so that we have a block to hoist into.
560 auto *ParentBB = SpecPNs[0]->getParent();
561 SmallVector<BasicBlock *, 16> SpecPreds;
562 SpecPreds.reserve(PredSet.size());
563 for (auto *PredBB : PredSet) {
564 auto *NewPredBB = SplitCriticalEdge(
565 PredBB, ParentBB,
566 CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges());
567 if (NewPredBB) {
568 ++NumEdgesSplit;
569 LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB->getName()
570 << "\n");
571 SpecPreds.push_back(NewPredBB);
572 } else {
573 assert(PredBB->getSingleSuccessor() == ParentBB &&
574 "We need a non-critical predecessor to speculate into.");
575 assert(!isa<InvokeInst>(PredBB->getTerminator()) &&
576 "Cannot have a non-critical invoke!");
577
578 // Already non-critical, use existing pred.
579 SpecPreds.push_back(PredBB);
580 }
581 }
582
583 SmallPtrSet<Instruction *, 16> SpecSet;
584 SmallVector<Instruction *, 16> SpecList;
585 visitPHIUsersAndDepsInPostOrder(SpecPNs,
586 /*IsVisited*/
587 [&](Instruction *I) {
588 // This is visited if we don't need to
589 // speculate it or we already have
590 // speculated it.
591 return !PotentialSpecSet.count(I) ||
592 SpecSet.count(I);
593 },
594 /*Visit*/
595 [&](Instruction *I) {
596 // All operands scheduled, schedule this
597 // node.
598 SpecSet.insert(I);
599 SpecList.push_back(I);
600 });
601
602 int NumSpecInsts = SpecList.size() * SpecPreds.size();
603 int NumRedundantInsts = NumSpecInsts - SpecList.size();
604 LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts
605 << " speculated instructions, " << NumRedundantInsts
606 << " redundancies\n");
607 NumSpeculatedInstructions += NumSpecInsts;
608 NumNewRedundantInstructions += NumRedundantInsts;
609
610 // Each predecessor is numbered by its index in `SpecPreds`, so for each
611 // instruction we speculate, the speculated instruction is stored in that
612 // index of the vector associated with the original instruction. We also
613 // store the incoming values for each predecessor from any PHIs used.
614 SmallDenseMap<Instruction *, SmallVector<Value *, 2>, 16> SpeculatedValueMap;
615
616 // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
617 // value. This handles both the PHIs we are speculating around and any other
618 // PHIs that happen to be used.
619 for (auto *OrigI : SpecList)
620 for (auto *OpV : OrigI->operand_values()) {
621 auto *OpPN = dyn_cast<PHINode>(OpV);
622 if (!OpPN || OpPN->getParent() != ParentBB)
623 continue;
624
625 auto InsertResult = SpeculatedValueMap.insert({OpPN, {}});
626 if (!InsertResult.second)
627 continue;
628
629 auto &SpeculatedVals = InsertResult.first->second;
630
631 // Populating our structure for mapping is particularly annoying because
632 // finding an incoming value for a particular predecessor block in a PHI
633 // node is a linear time operation! To avoid quadratic behavior, we build
634 // a map for this PHI node's incoming values and then translate it into
635 // the more compact representation used below.
636 SmallDenseMap<BasicBlock *, Value *, 16> IncomingValueMap;
637 for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues()))
638 IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i);
639
640 for (auto *PredBB : SpecPreds)
641 SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second);
642 }
643
644 // Speculate into each predecessor.
645 for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) {
646 auto *PredBB = SpecPreds[PredIdx];
647 assert(PredBB->getSingleSuccessor() == ParentBB &&
648 "We need a non-critical predecessor to speculate into.");
649
650 for (auto *OrigI : SpecList) {
651 auto *NewI = OrigI->clone();
652 NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx));
653 NewI->insertBefore(PredBB->getTerminator());
654
655 // Rewrite all the operands to the previously speculated instructions.
656 // Because we're walking in-order, the defs must precede the uses and we
657 // should already have these mappings.
658 for (Use &U : NewI->operands()) {
659 auto *OpI = dyn_cast<Instruction>(U.get());
660 if (!OpI)
661 continue;
662 auto MapIt = SpeculatedValueMap.find(OpI);
663 if (MapIt == SpeculatedValueMap.end())
664 continue;
665 const auto &SpeculatedVals = MapIt->second;
666 assert(SpeculatedVals[PredIdx] &&
667 "Must have a speculated value for this predecessor!");
668 assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() &&
669 "Speculated value has the wrong type!");
670
671 // Rewrite the use to this predecessor's speculated instruction.
672 U.set(SpeculatedVals[PredIdx]);
673 }
674
675 // Commute instructions which now have a constant in the LHS but not the
676 // RHS.
677 if (NewI->isBinaryOp() && NewI->isCommutative() &&
678 isa<Constant>(NewI->getOperand(0)) &&
679 !isa<Constant>(NewI->getOperand(1)))
680 NewI->getOperandUse(0).swap(NewI->getOperandUse(1));
681
682 SpeculatedValueMap[OrigI].push_back(NewI);
683 assert(SpeculatedValueMap[OrigI][PredIdx] == NewI &&
684 "Mismatched speculated instruction index!");
685 }
686 }
687
688 // Walk the speculated instruction list and if they have uses, insert a PHI
689 // for them from the speculated versions, and replace the uses with the PHI.
690 // Then erase the instructions as they have been fully speculated. The walk
691 // needs to be in reverse so that we don't think there are users when we'll
692 // actually eventually remove them later.
693 IRBuilder<> IRB(SpecPNs[0]);
694 for (auto *OrigI : llvm::reverse(SpecList)) {
695 // Check if we need a PHI for any remaining users and if so, insert it.
696 if (!OrigI->use_empty()) {
697 auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(),
698 Twine(OrigI->getName()) + ".phi");
699 // Add the incoming values we speculated.
700 auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second;
701 for (int PredIdx : llvm::seq<int>(0, SpecPreds.size()))
702 SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]);
703
704 // And replace the uses with the PHI node.
705 OrigI->replaceAllUsesWith(SpecIPN);
706 }
707
708 // It is important to immediately erase this so that it stops using other
709 // instructions. This avoids inserting needless PHIs of them.
710 OrigI->eraseFromParent();
711 }
712
713 // All of the uses of the speculated phi nodes should be removed at this
714 // point, so erase them.
715 for (auto *SpecPN : SpecPNs) {
716 assert(SpecPN->use_empty() && "All users should have been speculated!");
717 SpecPN->eraseFromParent();
718 }
719 }
720
721 /// Try to speculate around a series of PHIs from a single basic block.
722 ///
723 /// This routine checks whether any of these PHIs are profitable to speculate
724 /// users around. If safe and profitable, it does the speculation. It returns
725 /// true when at least some speculation occurs.
tryToSpeculatePHIs(SmallVectorImpl<PHINode * > & PNs,DominatorTree & DT,TargetTransformInfo & TTI)726 static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
727 DominatorTree &DT, TargetTransformInfo &TTI) {
728 LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
729
730 // Savings in cost from speculating around a PHI node.
731 SmallDenseMap<PHINode *, int, 16> CostSavingsMap;
732
733 // Remember the set of instructions that are candidates for speculation so
734 // that we can quickly walk things within that space. This prunes out
735 // instructions already available along edges, etc.
736 SmallPtrSet<Instruction *, 16> PotentialSpecSet;
737
738 // Remember the set of instructions that are (transitively) unsafe to
739 // speculate into the incoming edges of this basic block. This avoids
740 // recomputing them for each PHI node we check. This set is specific to this
741 // block though as things are pruned out of it based on what is available
742 // along incoming edges.
743 SmallPtrSet<Instruction *, 16> UnsafeSet;
744
745 // For each PHI node in this block, check whether there are immediate folding
746 // opportunities from speculation, and whether that speculation will be
747 // valid. This determise the set of safe PHIs to speculate.
748 PNs.erase(llvm::remove_if(PNs,
749 [&](PHINode *PN) {
750 return !isSafeAndProfitableToSpeculateAroundPHI(
751 *PN, CostSavingsMap, PotentialSpecSet,
752 UnsafeSet, DT, TTI);
753 }),
754 PNs.end());
755 // If no PHIs were profitable, skip.
756 if (PNs.empty()) {
757 LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n");
758 return false;
759 }
760
761 // We need to know how much speculation will cost which is determined by how
762 // many incoming edges will need a copy of each speculated instruction.
763 SmallSetVector<BasicBlock *, 16> PredSet;
764 for (auto *PredBB : PNs[0]->blocks()) {
765 if (!PredSet.insert(PredBB))
766 continue;
767
768 // We cannot speculate when a predecessor is an indirect branch.
769 // FIXME: We also can't reliably create a non-critical edge block for
770 // speculation if the predecessor is an invoke. This doesn't seem
771 // fundamental and we should probably be splitting critical edges
772 // differently.
773 if (isa<IndirectBrInst>(PredBB->getTerminator()) ||
774 isa<InvokeInst>(PredBB->getTerminator())) {
775 LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
776 << PredBB->getName() << "\n");
777 return false;
778 }
779 }
780 if (PredSet.size() < 2) {
781 LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n");
782 return false;
783 }
784
785 SmallVector<PHINode *, 16> SpecPNs = findProfitablePHIs(
786 PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI);
787 if (SpecPNs.empty())
788 // Nothing to do.
789 return false;
790
791 speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT);
792 return true;
793 }
794
run(Function & F,FunctionAnalysisManager & AM)795 PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F,
796 FunctionAnalysisManager &AM) {
797 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
798 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
799
800 bool Changed = false;
801 for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) {
802 SmallVector<PHINode *, 16> PNs;
803 auto BBI = BB->begin();
804 while (auto *PN = dyn_cast<PHINode>(&*BBI)) {
805 PNs.push_back(PN);
806 ++BBI;
807 }
808
809 if (PNs.empty())
810 continue;
811
812 Changed |= tryToSpeculatePHIs(PNs, DT, TTI);
813 }
814
815 if (!Changed)
816 return PreservedAnalyses::all();
817
818 PreservedAnalyses PA;
819 return PA;
820 }
821