1 //===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass reassociates n-ary add expressions and eliminates the redundancy
11 // exposed by the reassociation.
12 //
13 // A motivating example:
14 //
15 //   void foo(int a, int b) {
16 //     bar(a + b);
17 //     bar((a + 2) + b);
18 //   }
19 //
20 // An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
21 // the above code to
22 //
23 //   int t = a + b;
24 //   bar(t);
25 //   bar(t + 2);
26 //
27 // However, the Reassociate pass is unable to do that because it processes each
28 // instruction individually and believes (a + 2) + b is the best form according
29 // to its rank system.
30 //
31 // To address this limitation, NaryReassociate reassociates an expression in a
32 // form that reuses existing instructions. As a result, NaryReassociate can
33 // reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
34 // (a + b) is computed before.
35 //
36 // NaryReassociate works as follows. For every instruction in the form of (a +
37 // b) + c, it checks whether a + c or b + c is already computed by a dominating
38 // instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
39 // c) + a and removes the redundancy accordingly. To efficiently look up whether
40 // an expression is computed before, we store each instruction seen and its SCEV
41 // into an SCEV-to-instruction map.
42 //
43 // Although the algorithm pattern-matches only ternary additions, it
44 // automatically handles many >3-ary expressions by walking through the function
45 // in the depth-first order. For example, given
46 //
47 //   (a + c) + d
48 //   ((a + b) + c) + d
49 //
50 // NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
51 // ((a + c) + b) + d into ((a + c) + d) + b.
52 //
53 // Finally, the above dominator-based algorithm may need to be run multiple
54 // iterations before emitting optimal code. One source of this need is that we
55 // only split an operand when it is used only once. The above algorithm can
56 // eliminate an instruction and decrease the usage count of its operands. As a
57 // result, an instruction that previously had multiple uses may become a
58 // single-use instruction and thus eligible for split consideration. For
59 // example,
60 //
61 //   ac = a + c
62 //   ab = a + b
63 //   abc = ab + c
64 //   ab2 = ab + b
65 //   ab2c = ab2 + c
66 //
67 // In the first iteration, we cannot reassociate abc to ac+b because ab is used
68 // twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
69 // result, ab2 becomes dead and ab will be used only once in the second
70 // iteration.
71 //
72 // Limitations and TODO items:
73 //
74 // 1) We only considers n-ary adds and muls for now. This should be extended
75 // and generalized.
76 //
77 //===----------------------------------------------------------------------===//
78 
79 #include "llvm/Analysis/AssumptionCache.h"
80 #include "llvm/Analysis/ScalarEvolution.h"
81 #include "llvm/Analysis/TargetLibraryInfo.h"
82 #include "llvm/Analysis/TargetTransformInfo.h"
83 #include "llvm/Analysis/ValueTracking.h"
84 #include "llvm/IR/Dominators.h"
85 #include "llvm/IR/Module.h"
86 #include "llvm/IR/PatternMatch.h"
87 #include "llvm/Support/Debug.h"
88 #include "llvm/Support/raw_ostream.h"
89 #include "llvm/Transforms/Scalar.h"
90 #include "llvm/Transforms/Utils/Local.h"
91 using namespace llvm;
92 using namespace PatternMatch;
93 
94 #define DEBUG_TYPE "nary-reassociate"
95 
96 namespace {
97 class NaryReassociate : public FunctionPass {
98 public:
99   static char ID;
100 
NaryReassociate()101   NaryReassociate(): FunctionPass(ID) {
102     initializeNaryReassociatePass(*PassRegistry::getPassRegistry());
103   }
104 
doInitialization(Module & M)105   bool doInitialization(Module &M) override {
106     DL = &M.getDataLayout();
107     return false;
108   }
109   bool runOnFunction(Function &F) override;
110 
getAnalysisUsage(AnalysisUsage & AU) const111   void getAnalysisUsage(AnalysisUsage &AU) const override {
112     AU.addPreserved<DominatorTreeWrapperPass>();
113     AU.addPreserved<ScalarEvolutionWrapperPass>();
114     AU.addPreserved<TargetLibraryInfoWrapperPass>();
115     AU.addRequired<AssumptionCacheTracker>();
116     AU.addRequired<DominatorTreeWrapperPass>();
117     AU.addRequired<ScalarEvolutionWrapperPass>();
118     AU.addRequired<TargetLibraryInfoWrapperPass>();
119     AU.addRequired<TargetTransformInfoWrapperPass>();
120     AU.setPreservesCFG();
121   }
122 
123 private:
124   // Runs only one iteration of the dominator-based algorithm. See the header
125   // comments for why we need multiple iterations.
126   bool doOneIteration(Function &F);
127 
128   // Reassociates I for better CSE.
129   Instruction *tryReassociate(Instruction *I);
130 
131   // Reassociate GEP for better CSE.
132   Instruction *tryReassociateGEP(GetElementPtrInst *GEP);
133   // Try splitting GEP at the I-th index and see whether either part can be
134   // CSE'ed. This is a helper function for tryReassociateGEP.
135   //
136   // \p IndexedType The element type indexed by GEP's I-th index. This is
137   //                equivalent to
138   //                  GEP->getIndexedType(GEP->getPointerOperand(), 0-th index,
139   //                                      ..., i-th index).
140   GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
141                                               unsigned I, Type *IndexedType);
142   // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or
143   // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly.
144   GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
145                                               unsigned I, Value *LHS,
146                                               Value *RHS, Type *IndexedType);
147 
148   // Reassociate binary operators for better CSE.
149   Instruction *tryReassociateBinaryOp(BinaryOperator *I);
150 
151   // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly
152   // passed.
153   Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS,
154                                       BinaryOperator *I);
155   // Rewrites I to (LHS op RHS) if LHS is computed already.
156   Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS,
157                                        BinaryOperator *I);
158 
159   // Tries to match Op1 and Op2 by using V.
160   bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2);
161 
162   // Gets SCEV for (LHS op RHS).
163   const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
164                             const SCEV *RHS);
165 
166   // Returns the closest dominator of \c Dominatee that computes
167   // \c CandidateExpr. Returns null if not found.
168   Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr,
169                                             Instruction *Dominatee);
170   // GetElementPtrInst implicitly sign-extends an index if the index is shorter
171   // than the pointer size. This function returns whether Index is shorter than
172   // GEP's pointer size, i.e., whether Index needs to be sign-extended in order
173   // to be an index of GEP.
174   bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
175 
176   AssumptionCache *AC;
177   const DataLayout *DL;
178   DominatorTree *DT;
179   ScalarEvolution *SE;
180   TargetLibraryInfo *TLI;
181   TargetTransformInfo *TTI;
182   // A lookup table quickly telling which instructions compute the given SCEV.
183   // Note that there can be multiple instructions at different locations
184   // computing to the same SCEV, so we map a SCEV to an instruction list.  For
185   // example,
186   //
187   //   if (p1)
188   //     foo(a + b);
189   //   if (p2)
190   //     bar(a + b);
191   DenseMap<const SCEV *, SmallVector<WeakVH, 2>> SeenExprs;
192 };
193 } // anonymous namespace
194 
195 char NaryReassociate::ID = 0;
196 INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
197                       false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)198 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
199 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
200 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
201 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
202 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
203 INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
204                     false, false)
205 
206 FunctionPass *llvm::createNaryReassociatePass() {
207   return new NaryReassociate();
208 }
209 
runOnFunction(Function & F)210 bool NaryReassociate::runOnFunction(Function &F) {
211   if (skipOptnoneFunction(F))
212     return false;
213 
214   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
215   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
216   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
217   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
218   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
219 
220   bool Changed = false, ChangedInThisIteration;
221   do {
222     ChangedInThisIteration = doOneIteration(F);
223     Changed |= ChangedInThisIteration;
224   } while (ChangedInThisIteration);
225   return Changed;
226 }
227 
228 // Whitelist the instruction types NaryReassociate handles for now.
isPotentiallyNaryReassociable(Instruction * I)229 static bool isPotentiallyNaryReassociable(Instruction *I) {
230   switch (I->getOpcode()) {
231   case Instruction::Add:
232   case Instruction::GetElementPtr:
233   case Instruction::Mul:
234     return true;
235   default:
236     return false;
237   }
238 }
239 
doOneIteration(Function & F)240 bool NaryReassociate::doOneIteration(Function &F) {
241   bool Changed = false;
242   SeenExprs.clear();
243   // Process the basic blocks in pre-order of the dominator tree. This order
244   // ensures that all bases of a candidate are in Candidates when we process it.
245   for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
246        Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
247     BasicBlock *BB = Node->getBlock();
248     for (auto I = BB->begin(); I != BB->end(); ++I) {
249       if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(&*I)) {
250         const SCEV *OldSCEV = SE->getSCEV(&*I);
251         if (Instruction *NewI = tryReassociate(&*I)) {
252           Changed = true;
253           SE->forgetValue(&*I);
254           I->replaceAllUsesWith(NewI);
255           // If SeenExprs constains I's WeakVH, that entry will be replaced with
256           // nullptr.
257           RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);
258           I = NewI->getIterator();
259         }
260         // Add the rewritten instruction to SeenExprs; the original instruction
261         // is deleted.
262         const SCEV *NewSCEV = SE->getSCEV(&*I);
263         SeenExprs[NewSCEV].push_back(WeakVH(&*I));
264         // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
265         // is equivalent to I. However, ScalarEvolution::getSCEV may
266         // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
267         // we reassociate
268         //   I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4
269         // to
270         //   NewI = &a[sext(i)] + sext(j).
271         //
272         // ScalarEvolution computes
273         //   getSCEV(I)    = a + 4 * sext(i + j)
274         //   getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j)
275         // which are different SCEVs.
276         //
277         // To alleviate this issue of ScalarEvolution not always capturing
278         // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can
279         // map both SCEV before and after tryReassociate(I) to I.
280         //
281         // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
282         if (NewSCEV != OldSCEV)
283           SeenExprs[OldSCEV].push_back(WeakVH(&*I));
284       }
285     }
286   }
287   return Changed;
288 }
289 
tryReassociate(Instruction * I)290 Instruction *NaryReassociate::tryReassociate(Instruction *I) {
291   switch (I->getOpcode()) {
292   case Instruction::Add:
293   case Instruction::Mul:
294     return tryReassociateBinaryOp(cast<BinaryOperator>(I));
295   case Instruction::GetElementPtr:
296     return tryReassociateGEP(cast<GetElementPtrInst>(I));
297   default:
298     llvm_unreachable("should be filtered out by isPotentiallyNaryReassociable");
299   }
300 }
301 
302 // FIXME: extract this method into TTI->getGEPCost.
isGEPFoldable(GetElementPtrInst * GEP,const TargetTransformInfo * TTI,const DataLayout * DL)303 static bool isGEPFoldable(GetElementPtrInst *GEP,
304                           const TargetTransformInfo *TTI,
305                           const DataLayout *DL) {
306   GlobalVariable *BaseGV = nullptr;
307   int64_t BaseOffset = 0;
308   bool HasBaseReg = false;
309   int64_t Scale = 0;
310 
311   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()))
312     BaseGV = GV;
313   else
314     HasBaseReg = true;
315 
316   gep_type_iterator GTI = gep_type_begin(GEP);
317   for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) {
318     if (isa<SequentialType>(*GTI)) {
319       int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
320       if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
321         BaseOffset += ConstIdx->getSExtValue() * ElementSize;
322       } else {
323         // Needs scale register.
324         if (Scale != 0) {
325           // No addressing mode takes two scale registers.
326           return false;
327         }
328         Scale = ElementSize;
329       }
330     } else {
331       StructType *STy = cast<StructType>(*GTI);
332       uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
333       BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
334     }
335   }
336 
337   unsigned AddrSpace = GEP->getPointerAddressSpace();
338   return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
339                                     BaseOffset, HasBaseReg, Scale, AddrSpace);
340 }
341 
tryReassociateGEP(GetElementPtrInst * GEP)342 Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) {
343   // Not worth reassociating GEP if it is foldable.
344   if (isGEPFoldable(GEP, TTI, DL))
345     return nullptr;
346 
347   gep_type_iterator GTI = gep_type_begin(*GEP);
348   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
349     if (isa<SequentialType>(*GTI++)) {
350       if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I - 1, *GTI)) {
351         return NewGEP;
352       }
353     }
354   }
355   return nullptr;
356 }
357 
requiresSignExtension(Value * Index,GetElementPtrInst * GEP)358 bool NaryReassociate::requiresSignExtension(Value *Index,
359                                             GetElementPtrInst *GEP) {
360   unsigned PointerSizeInBits =
361       DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace());
362   return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
363 }
364 
365 GetElementPtrInst *
tryReassociateGEPAtIndex(GetElementPtrInst * GEP,unsigned I,Type * IndexedType)366 NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
367                                           Type *IndexedType) {
368   Value *IndexToSplit = GEP->getOperand(I + 1);
369   if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {
370     IndexToSplit = SExt->getOperand(0);
371   } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
372     // zext can be treated as sext if the source is non-negative.
373     if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, AC, GEP, DT))
374       IndexToSplit = ZExt->getOperand(0);
375   }
376 
377   if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {
378     // If the I-th index needs sext and the underlying add is not equipped with
379     // nsw, we cannot split the add because
380     //   sext(LHS + RHS) != sext(LHS) + sext(RHS).
381     if (requiresSignExtension(IndexToSplit, GEP) &&
382         computeOverflowForSignedAdd(AO, *DL, AC, GEP, DT) !=
383             OverflowResult::NeverOverflows)
384       return nullptr;
385 
386     Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
387     // IndexToSplit = LHS + RHS.
388     if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))
389       return NewGEP;
390     // Symmetrically, try IndexToSplit = RHS + LHS.
391     if (LHS != RHS) {
392       if (auto *NewGEP =
393               tryReassociateGEPAtIndex(GEP, I, RHS, LHS, IndexedType))
394         return NewGEP;
395     }
396   }
397   return nullptr;
398 }
399 
tryReassociateGEPAtIndex(GetElementPtrInst * GEP,unsigned I,Value * LHS,Value * RHS,Type * IndexedType)400 GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
401     GetElementPtrInst *GEP, unsigned I, Value *LHS, Value *RHS,
402     Type *IndexedType) {
403   // Look for GEP's closest dominator that has the same SCEV as GEP except that
404   // the I-th index is replaced with LHS.
405   SmallVector<const SCEV *, 4> IndexExprs;
406   for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
407     IndexExprs.push_back(SE->getSCEV(*Index));
408   // Replace the I-th index with LHS.
409   IndexExprs[I] = SE->getSCEV(LHS);
410   if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) &&
411       DL->getTypeSizeInBits(LHS->getType()) <
412           DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) {
413     // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
414     // zext if the source operand is proved non-negative. We should do that
415     // consistently so that CandidateExpr more likely appears before. See
416     // @reassociate_gep_assume for an example of this canonicalization.
417     IndexExprs[I] =
418         SE->getZeroExtendExpr(IndexExprs[I], GEP->getOperand(I)->getType());
419   }
420   const SCEV *CandidateExpr = SE->getGEPExpr(
421       GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
422       IndexExprs, GEP->isInBounds());
423 
424   Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
425   if (Candidate == nullptr)
426     return nullptr;
427 
428   IRBuilder<> Builder(GEP);
429   // Candidate does not necessarily have the same pointer type as GEP. Use
430   // bitcast or pointer cast to make sure they have the same type, so that the
431   // later RAUW doesn't complain.
432   Candidate = Builder.CreateBitOrPointerCast(Candidate, GEP->getType());
433   assert(Candidate->getType() == GEP->getType());
434 
435   // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
436   uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);
437   Type *ElementType = GEP->getType()->getElementType();
438   uint64_t ElementSize = DL->getTypeAllocSize(ElementType);
439   // Another less rare case: because I is not necessarily the last index of the
440   // GEP, the size of the type at the I-th index (IndexedSize) is not
441   // necessarily divisible by ElementSize. For example,
442   //
443   // #pragma pack(1)
444   // struct S {
445   //   int a[3];
446   //   int64 b[8];
447   // };
448   // #pragma pack()
449   //
450   // sizeof(S) = 100 is indivisible by sizeof(int64) = 8.
451   //
452   // TODO: bail out on this case for now. We could emit uglygep.
453   if (IndexedSize % ElementSize != 0)
454     return nullptr;
455 
456   // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
457   Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
458   if (RHS->getType() != IntPtrTy)
459     RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy);
460   if (IndexedSize != ElementSize) {
461     RHS = Builder.CreateMul(
462         RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize));
463   }
464   GetElementPtrInst *NewGEP =
465       cast<GetElementPtrInst>(Builder.CreateGEP(Candidate, RHS));
466   NewGEP->setIsInBounds(GEP->isInBounds());
467   NewGEP->takeName(GEP);
468   return NewGEP;
469 }
470 
tryReassociateBinaryOp(BinaryOperator * I)471 Instruction *NaryReassociate::tryReassociateBinaryOp(BinaryOperator *I) {
472   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
473   if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I))
474     return NewI;
475   if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I))
476     return NewI;
477   return nullptr;
478 }
479 
tryReassociateBinaryOp(Value * LHS,Value * RHS,BinaryOperator * I)480 Instruction *NaryReassociate::tryReassociateBinaryOp(Value *LHS, Value *RHS,
481                                                      BinaryOperator *I) {
482   Value *A = nullptr, *B = nullptr;
483   // To be conservative, we reassociate I only when it is the only user of (A op
484   // B).
485   if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) {
486     // I = (A op B) op RHS
487     //   = (A op RHS) op B or (B op RHS) op A
488     const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
489     const SCEV *RHSExpr = SE->getSCEV(RHS);
490     if (BExpr != RHSExpr) {
491       if (auto *NewI =
492               tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I))
493         return NewI;
494     }
495     if (AExpr != RHSExpr) {
496       if (auto *NewI =
497               tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I))
498         return NewI;
499     }
500   }
501   return nullptr;
502 }
503 
tryReassociatedBinaryOp(const SCEV * LHSExpr,Value * RHS,BinaryOperator * I)504 Instruction *NaryReassociate::tryReassociatedBinaryOp(const SCEV *LHSExpr,
505                                                       Value *RHS,
506                                                       BinaryOperator *I) {
507   // Look for the closest dominator LHS of I that computes LHSExpr, and replace
508   // I with LHS op RHS.
509   auto *LHS = findClosestMatchingDominator(LHSExpr, I);
510   if (LHS == nullptr)
511     return nullptr;
512 
513   Instruction *NewI = nullptr;
514   switch (I->getOpcode()) {
515   case Instruction::Add:
516     NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
517     break;
518   case Instruction::Mul:
519     NewI = BinaryOperator::CreateMul(LHS, RHS, "", I);
520     break;
521   default:
522     llvm_unreachable("Unexpected instruction.");
523   }
524   NewI->takeName(I);
525   return NewI;
526 }
527 
matchTernaryOp(BinaryOperator * I,Value * V,Value * & Op1,Value * & Op2)528 bool NaryReassociate::matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1,
529                                      Value *&Op2) {
530   switch (I->getOpcode()) {
531   case Instruction::Add:
532     return match(V, m_Add(m_Value(Op1), m_Value(Op2)));
533   case Instruction::Mul:
534     return match(V, m_Mul(m_Value(Op1), m_Value(Op2)));
535   default:
536     llvm_unreachable("Unexpected instruction.");
537   }
538   return false;
539 }
540 
getBinarySCEV(BinaryOperator * I,const SCEV * LHS,const SCEV * RHS)541 const SCEV *NaryReassociate::getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
542                                            const SCEV *RHS) {
543   switch (I->getOpcode()) {
544   case Instruction::Add:
545     return SE->getAddExpr(LHS, RHS);
546   case Instruction::Mul:
547     return SE->getMulExpr(LHS, RHS);
548   default:
549     llvm_unreachable("Unexpected instruction.");
550   }
551   return nullptr;
552 }
553 
554 Instruction *
findClosestMatchingDominator(const SCEV * CandidateExpr,Instruction * Dominatee)555 NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
556                                               Instruction *Dominatee) {
557   auto Pos = SeenExprs.find(CandidateExpr);
558   if (Pos == SeenExprs.end())
559     return nullptr;
560 
561   auto &Candidates = Pos->second;
562   // Because we process the basic blocks in pre-order of the dominator tree, a
563   // candidate that doesn't dominate the current instruction won't dominate any
564   // future instruction either. Therefore, we pop it out of the stack. This
565   // optimization makes the algorithm O(n).
566   while (!Candidates.empty()) {
567     // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed
568     // during rewriting.
569     if (Value *Candidate = Candidates.back()) {
570       Instruction *CandidateInstruction = cast<Instruction>(Candidate);
571       if (DT->dominates(CandidateInstruction, Dominatee))
572         return CandidateInstruction;
573     }
574     Candidates.pop_back();
575   }
576   return nullptr;
577 }
578