1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instructions.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "bypass-slow-division"
27 
28 namespace {
29   struct DivOpInfo {
30     bool SignedOp;
31     Value *Dividend;
32     Value *Divisor;
33 
DivOpInfo__anon34d4fe8c0111::DivOpInfo34     DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
35       : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
36   };
37 
38   struct DivPhiNodes {
39     PHINode *Quotient;
40     PHINode *Remainder;
41 
DivPhiNodes__anon34d4fe8c0111::DivPhiNodes42     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
43       : Quotient(InQuotient), Remainder(InRemainder) {}
44   };
45 }
46 
47 namespace llvm {
48   template<>
49   struct DenseMapInfo<DivOpInfo> {
isEqualllvm::DenseMapInfo50     static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
51       return Val1.SignedOp == Val2.SignedOp &&
52              Val1.Dividend == Val2.Dividend &&
53              Val1.Divisor == Val2.Divisor;
54     }
55 
getEmptyKeyllvm::DenseMapInfo56     static DivOpInfo getEmptyKey() {
57       return DivOpInfo(false, nullptr, nullptr);
58     }
59 
getTombstoneKeyllvm::DenseMapInfo60     static DivOpInfo getTombstoneKey() {
61       return DivOpInfo(true, nullptr, nullptr);
62     }
63 
getHashValuellvm::DenseMapInfo64     static unsigned getHashValue(const DivOpInfo &Val) {
65       return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
66                         reinterpret_cast<uintptr_t>(Val.Divisor)) ^
67                         (unsigned)Val.SignedOp;
68     }
69   };
70 
71   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
72 }
73 
74 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
75 // value of the operands and uses a shorter-faster div/rem instruction when
76 // possible and the longer-slower div/rem instruction otherwise.
insertFastDiv(Function & F,Function::iterator & I,BasicBlock::iterator & J,IntegerType * BypassType,bool UseDivOp,bool UseSignedOp,DivCacheTy & PerBBDivCache)77 static bool insertFastDiv(Function &F,
78                           Function::iterator &I,
79                           BasicBlock::iterator &J,
80                           IntegerType *BypassType,
81                           bool UseDivOp,
82                           bool UseSignedOp,
83                           DivCacheTy &PerBBDivCache) {
84   // Get instruction operands
85   Instruction *Instr = &*J;
86   Value *Dividend = Instr->getOperand(0);
87   Value *Divisor = Instr->getOperand(1);
88 
89   if (isa<ConstantInt>(Divisor) ||
90       (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
91     // Operations with immediate values should have
92     // been solved and replaced during compile time.
93     return false;
94   }
95 
96   // Basic Block is split before divide
97   BasicBlock *MainBB = &*I;
98   BasicBlock *SuccessorBB = I->splitBasicBlock(J);
99   ++I; //advance iterator I to successorBB
100 
101   // Add new basic block for slow divide operation
102   BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
103                                           MainBB->getParent(), SuccessorBB);
104   SlowBB->moveBefore(SuccessorBB);
105   IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
106   Value *SlowQuotientV;
107   Value *SlowRemainderV;
108   if (UseSignedOp) {
109     SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
110     SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
111   } else {
112     SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
113     SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
114   }
115   SlowBuilder.CreateBr(SuccessorBB);
116 
117   // Add new basic block for fast divide operation
118   BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
119                                           MainBB->getParent(), SuccessorBB);
120   FastBB->moveBefore(SlowBB);
121   IRBuilder<> FastBuilder(FastBB, FastBB->begin());
122   Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
123                                                 BypassType);
124   Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
125                                                  BypassType);
126 
127   // udiv/urem because optimization only handles positive numbers
128   Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
129                                                       ShortDivisorV);
130   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
131                                                   ShortDivisorV);
132   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
133                                                 ShortQuotientV,
134                                                 Dividend->getType());
135   Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
136                                                  ShortRemainderV,
137                                                  Dividend->getType());
138   FastBuilder.CreateBr(SuccessorBB);
139 
140   // Phi nodes for result of div and rem
141   IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
142   PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
143   QuoPhi->addIncoming(SlowQuotientV, SlowBB);
144   QuoPhi->addIncoming(FastQuotientV, FastBB);
145   PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
146   RemPhi->addIncoming(SlowRemainderV, SlowBB);
147   RemPhi->addIncoming(FastRemainderV, FastBB);
148 
149   // Replace Instr with appropriate phi node
150   if (UseDivOp)
151     Instr->replaceAllUsesWith(QuoPhi);
152   else
153     Instr->replaceAllUsesWith(RemPhi);
154   Instr->eraseFromParent();
155 
156   // Combine operands into a single value with OR for value testing below
157   MainBB->getInstList().back().eraseFromParent();
158   IRBuilder<> MainBuilder(MainBB, MainBB->end());
159   Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
160 
161   // BitMask is inverted to check if the operands are
162   // larger than the bypass type
163   uint64_t BitMask = ~BypassType->getBitMask();
164   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
165 
166   // Compare operand values and branch
167   Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
168   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
169   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
170 
171   // point iterator J at first instruction of successorBB
172   J = I->begin();
173 
174   // Cache phi nodes to be used later in place of other instances
175   // of div or rem with the same sign, dividend, and divisor
176   DivOpInfo Key(UseSignedOp, Dividend, Divisor);
177   DivPhiNodes Value(QuoPhi, RemPhi);
178   PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
179   return true;
180 }
181 
182 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
183 // operands and operation are identical. Otherwise call insertFastDiv to perform
184 // the optimization and cache the resulting dividend and remainder.
reuseOrInsertFastDiv(Function & F,Function::iterator & I,BasicBlock::iterator & J,IntegerType * BypassType,bool UseDivOp,bool UseSignedOp,DivCacheTy & PerBBDivCache)185 static bool reuseOrInsertFastDiv(Function &F,
186                                  Function::iterator &I,
187                                  BasicBlock::iterator &J,
188                                  IntegerType *BypassType,
189                                  bool UseDivOp,
190                                  bool UseSignedOp,
191                                  DivCacheTy &PerBBDivCache) {
192   // Get instruction operands
193   Instruction *Instr = &*J;
194   DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
195   DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
196 
197   if (CacheI == PerBBDivCache.end()) {
198     // If previous instance does not exist, insert fast div
199     return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
200                          PerBBDivCache);
201   }
202 
203   // Replace operation value with previously generated phi node
204   DivPhiNodes &Value = CacheI->second;
205   if (UseDivOp) {
206     // Replace all uses of div instruction with quotient phi node
207     J->replaceAllUsesWith(Value.Quotient);
208   } else {
209     // Replace all uses of rem instruction with remainder phi node
210     J->replaceAllUsesWith(Value.Remainder);
211   }
212 
213   // Advance to next operation
214   ++J;
215 
216   // Remove redundant operation
217   Instr->eraseFromParent();
218   return true;
219 }
220 
221 // bypassSlowDivision - This optimization identifies DIV instructions that can
222 // be profitably bypassed and carried out with a shorter, faster divide.
bypassSlowDivision(Function & F,Function::iterator & I,const DenseMap<unsigned int,unsigned int> & BypassWidths)223 bool llvm::bypassSlowDivision(Function &F,
224                               Function::iterator &I,
225                               const DenseMap<unsigned int, unsigned int> &BypassWidths) {
226   DivCacheTy DivCache;
227 
228   bool MadeChange = false;
229   for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
230 
231     // Get instruction details
232     unsigned Opcode = J->getOpcode();
233     bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
234     bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
235     bool UseSignedOp = Opcode == Instruction::SDiv ||
236                        Opcode == Instruction::SRem;
237 
238     // Only optimize div or rem ops
239     if (!UseDivOp && !UseRemOp)
240       continue;
241 
242     // Skip division on vector types, only optimize integer instructions
243     if (!J->getType()->isIntegerTy())
244       continue;
245 
246     // Get bitwidth of div/rem instruction
247     IntegerType *T = cast<IntegerType>(J->getType());
248     unsigned int bitwidth = T->getBitWidth();
249 
250     // Continue if bitwidth is not bypassed
251     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
252     if (BI == BypassWidths.end())
253       continue;
254 
255     // Get type for div/rem instruction with bypass bitwidth
256     IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
257 
258     MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
259                                        UseSignedOp, DivCache);
260   }
261 
262   return MadeChange;
263 }
264