1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instructions.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "bypass-slow-division"
27 
28 namespace {
29   struct DivOpInfo {
30     bool SignedOp;
31     Value *Dividend;
32     Value *Divisor;
33 
DivOpInfo__anon34d4fe8c0111::DivOpInfo34     DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
35       : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
36   };
37 
38   struct DivPhiNodes {
39     PHINode *Quotient;
40     PHINode *Remainder;
41 
DivPhiNodes__anon34d4fe8c0111::DivPhiNodes42     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
43       : Quotient(InQuotient), Remainder(InRemainder) {}
44   };
45 }
46 
47 namespace llvm {
48   template<>
49   struct DenseMapInfo<DivOpInfo> {
isEqualllvm::DenseMapInfo50     static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
51       return Val1.SignedOp == Val2.SignedOp &&
52              Val1.Dividend == Val2.Dividend &&
53              Val1.Divisor == Val2.Divisor;
54     }
55 
getEmptyKeyllvm::DenseMapInfo56     static DivOpInfo getEmptyKey() {
57       return DivOpInfo(false, nullptr, nullptr);
58     }
59 
getTombstoneKeyllvm::DenseMapInfo60     static DivOpInfo getTombstoneKey() {
61       return DivOpInfo(true, nullptr, nullptr);
62     }
63 
getHashValuellvm::DenseMapInfo64     static unsigned getHashValue(const DivOpInfo &Val) {
65       return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
66                         reinterpret_cast<uintptr_t>(Val.Divisor)) ^
67                         (unsigned)Val.SignedOp;
68     }
69   };
70 
71   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
72 }
73 
74 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
75 // value of the operands and uses a shorter-faster div/rem instruction when
76 // possible and the longer-slower div/rem instruction otherwise.
insertFastDiv(Instruction * I,IntegerType * BypassType,bool UseDivOp,bool UseSignedOp,DivCacheTy & PerBBDivCache)77 static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
78                           bool UseDivOp, bool UseSignedOp,
79                           DivCacheTy &PerBBDivCache) {
80   Function *F = I->getParent()->getParent();
81   // Get instruction operands
82   Value *Dividend = I->getOperand(0);
83   Value *Divisor = I->getOperand(1);
84 
85   if (isa<ConstantInt>(Divisor) ||
86       (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
87     // Operations with immediate values should have
88     // been solved and replaced during compile time.
89     return false;
90   }
91 
92   // Basic Block is split before divide
93   BasicBlock *MainBB = &*I->getParent();
94   BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
95 
96   // Add new basic block for slow divide operation
97   BasicBlock *SlowBB =
98       BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
99   SlowBB->moveBefore(SuccessorBB);
100   IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
101   Value *SlowQuotientV;
102   Value *SlowRemainderV;
103   if (UseSignedOp) {
104     SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
105     SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
106   } else {
107     SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
108     SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
109   }
110   SlowBuilder.CreateBr(SuccessorBB);
111 
112   // Add new basic block for fast divide operation
113   BasicBlock *FastBB =
114       BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
115   FastBB->moveBefore(SlowBB);
116   IRBuilder<> FastBuilder(FastBB, FastBB->begin());
117   Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
118                                                 BypassType);
119   Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
120                                                  BypassType);
121 
122   // udiv/urem because optimization only handles positive numbers
123   Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
124                                                       ShortDivisorV);
125   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
126                                                   ShortDivisorV);
127   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
128                                                 ShortQuotientV,
129                                                 Dividend->getType());
130   Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
131                                                  ShortRemainderV,
132                                                  Dividend->getType());
133   FastBuilder.CreateBr(SuccessorBB);
134 
135   // Phi nodes for result of div and rem
136   IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
137   PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
138   QuoPhi->addIncoming(SlowQuotientV, SlowBB);
139   QuoPhi->addIncoming(FastQuotientV, FastBB);
140   PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
141   RemPhi->addIncoming(SlowRemainderV, SlowBB);
142   RemPhi->addIncoming(FastRemainderV, FastBB);
143 
144   // Replace I with appropriate phi node
145   if (UseDivOp)
146     I->replaceAllUsesWith(QuoPhi);
147   else
148     I->replaceAllUsesWith(RemPhi);
149   I->eraseFromParent();
150 
151   // Combine operands into a single value with OR for value testing below
152   MainBB->getInstList().back().eraseFromParent();
153   IRBuilder<> MainBuilder(MainBB, MainBB->end());
154   Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
155 
156   // BitMask is inverted to check if the operands are
157   // larger than the bypass type
158   uint64_t BitMask = ~BypassType->getBitMask();
159   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
160 
161   // Compare operand values and branch
162   Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
163   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
164   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
165 
166   // Cache phi nodes to be used later in place of other instances
167   // of div or rem with the same sign, dividend, and divisor
168   DivOpInfo Key(UseSignedOp, Dividend, Divisor);
169   DivPhiNodes Value(QuoPhi, RemPhi);
170   PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
171   return true;
172 }
173 
174 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
175 // the current BB if operands and operation are identical. Otherwise calls
176 // insertFastDiv to perform the optimization and caches the resulting dividend
177 // and remainder.
reuseOrInsertFastDiv(Instruction * I,IntegerType * BypassType,bool UseDivOp,bool UseSignedOp,DivCacheTy & PerBBDivCache)178 static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
179                                  bool UseDivOp, bool UseSignedOp,
180                                  DivCacheTy &PerBBDivCache) {
181   // Get instruction operands
182   DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
183   DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
184 
185   if (CacheI == PerBBDivCache.end()) {
186     // If previous instance does not exist, insert fast div
187     return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
188   }
189 
190   // Replace operation value with previously generated phi node
191   DivPhiNodes &Value = CacheI->second;
192   if (UseDivOp) {
193     // Replace all uses of div instruction with quotient phi node
194     I->replaceAllUsesWith(Value.Quotient);
195   } else {
196     // Replace all uses of rem instruction with remainder phi node
197     I->replaceAllUsesWith(Value.Remainder);
198   }
199 
200   // Remove redundant operation
201   I->eraseFromParent();
202   return true;
203 }
204 
205 // bypassSlowDivision - This optimization identifies DIV instructions in a BB
206 // that can be profitably bypassed and carried out with a shorter, faster
207 // divide.
bypassSlowDivision(BasicBlock * BB,const DenseMap<unsigned int,unsigned int> & BypassWidths)208 bool llvm::bypassSlowDivision(
209     BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
210   DivCacheTy DivCache;
211 
212   bool MadeChange = false;
213   Instruction* Next = &*BB->begin();
214   while (Next != nullptr) {
215     // We may add instructions immediately after I, but we want to skip over
216     // them.
217     Instruction* I = Next;
218     Next = Next->getNextNode();
219 
220     // Get instruction details
221     unsigned Opcode = I->getOpcode();
222     bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
223     bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
224     bool UseSignedOp = Opcode == Instruction::SDiv ||
225                        Opcode == Instruction::SRem;
226 
227     // Only optimize div or rem ops
228     if (!UseDivOp && !UseRemOp)
229       continue;
230 
231     // Skip division on vector types, only optimize integer instructions
232     if (!I->getType()->isIntegerTy())
233       continue;
234 
235     // Get bitwidth of div/rem instruction
236     IntegerType *T = cast<IntegerType>(I->getType());
237     unsigned int bitwidth = T->getBitWidth();
238 
239     // Continue if bitwidth is not bypassed
240     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
241     if (BI == BypassWidths.end())
242       continue;
243 
244     // Get type for div/rem instruction with bypass bitwidth
245     IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
246 
247     MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
248   }
249 
250   return MadeChange;
251 }
252