1 //===--- HexagonGenExtract.cpp --------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
12 #include "llvm/IR/Constants.h"
13 #include "llvm/IR/Dominators.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/Instructions.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/PatternMatch.h"
19 #include "llvm/Pass.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/MathExtras.h"
23 #include "llvm/Support/raw_ostream.h"
24 
25 using namespace llvm;
26 
27 static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U),
28   cl::Hidden, cl::desc("Cutoff for generating \"extract\""
29   " instructions"));
30 
31 // This prevents generating extract instructions that have the offset of 0.
32 // One of the reasons for "extract" is to put a sequence of bits in a regis-
33 // ter, starting at offset 0 (so that these bits can then be used by an
34 // "insert"). If the bits are already at offset 0, it is better not to gene-
35 // rate "extract", since logical bit operations can be merged into compound
36 // instructions (as opposed to "extract").
37 static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden,
38   cl::desc("No extract instruction with offset 0"));
39 
40 static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden,
41   cl::desc("Require & in extract patterns"));
42 
43 namespace llvm {
44   void initializeHexagonGenExtractPass(PassRegistry&);
45   FunctionPass *createHexagonGenExtract();
46 }
47 
48 
49 namespace {
50   class HexagonGenExtract : public FunctionPass {
51   public:
52     static char ID;
HexagonGenExtract()53     HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) {
54       initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry());
55     }
getPassName() const56     virtual const char *getPassName() const override {
57       return "Hexagon generate \"extract\" instructions";
58     }
59     virtual bool runOnFunction(Function &F) override;
getAnalysisUsage(AnalysisUsage & AU) const60     virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
61       AU.addRequired<DominatorTreeWrapperPass>();
62       AU.addPreserved<DominatorTreeWrapperPass>();
63       AU.addPreserved<MachineFunctionAnalysis>();
64       FunctionPass::getAnalysisUsage(AU);
65     }
66   private:
67     bool visitBlock(BasicBlock *B);
68     bool convert(Instruction *In);
69 
70     unsigned ExtractCount;
71     DominatorTree *DT;
72   };
73 
74   char HexagonGenExtract::ID = 0;
75 }
76 
77 INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate "
78   "\"extract\" instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)79 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
80 INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate "
81   "\"extract\" instructions", false, false)
82 
83 
84 bool HexagonGenExtract::convert(Instruction *In) {
85   using namespace PatternMatch;
86   Value *BF = 0;
87   ConstantInt *CSL = 0, *CSR = 0, *CM = 0;
88   BasicBlock *BB = In->getParent();
89   LLVMContext &Ctx = BB->getContext();
90   bool LogicalSR;
91 
92   // (and (shl (lshr x, #sr), #sl), #m)
93   LogicalSR = true;
94   bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
95                                m_ConstantInt(CSL)),
96                          m_ConstantInt(CM)));
97 
98   if (!Match) {
99     // (and (shl (ashr x, #sr), #sl), #m)
100     LogicalSR = false;
101     Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
102                             m_ConstantInt(CSL)),
103                       m_ConstantInt(CM)));
104   }
105   if (!Match) {
106     // (and (shl x, #sl), #m)
107     LogicalSR = true;
108     CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
109     Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
110                       m_ConstantInt(CM)));
111     if (Match && NoSR0)
112       return false;
113   }
114   if (!Match) {
115     // (and (lshr x, #sr), #m)
116     LogicalSR = true;
117     CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
118     Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
119                             m_ConstantInt(CM)));
120   }
121   if (!Match) {
122     // (and (ashr x, #sr), #m)
123     LogicalSR = false;
124     CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
125     Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
126                             m_ConstantInt(CM)));
127   }
128   if (!Match) {
129     CM = 0;
130     // (shl (lshr x, #sr), #sl)
131     LogicalSR = true;
132     Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
133                             m_ConstantInt(CSL)));
134   }
135   if (!Match) {
136     CM = 0;
137     // (shl (ashr x, #sr), #sl)
138     LogicalSR = false;
139     Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
140                             m_ConstantInt(CSL)));
141   }
142   if (!Match)
143     return false;
144 
145   Type *Ty = BF->getType();
146   if (!Ty->isIntegerTy())
147     return false;
148   unsigned BW = Ty->getPrimitiveSizeInBits();
149   if (BW != 32 && BW != 64)
150     return false;
151 
152   uint32_t SR = CSR->getZExtValue();
153   uint32_t SL = CSL->getZExtValue();
154 
155   if (!CM) {
156     // If there was no and, and the shift left did not remove all potential
157     // sign bits created by the shift right, then extractu cannot reproduce
158     // this value.
159     if (!LogicalSR && (SR > SL))
160       return false;
161     APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
162     CM = ConstantInt::get(Ctx, A);
163   }
164 
165   // CM is the shifted-left mask. Shift it back right to remove the zero
166   // bits on least-significant positions.
167   APInt M = CM->getValue().lshr(SL);
168   uint32_t T = M.countTrailingOnes();
169 
170   // During the shifts some of the bits will be lost. Calculate how many
171   // of the original value will remain after shift right and then left.
172   uint32_t U = BW - std::max(SL, SR);
173   // The width of the extracted field is the minimum of the original bits
174   // that remain after the shifts and the number of contiguous 1s in the mask.
175   uint32_t W = std::min(U, T);
176   if (W == 0)
177     return false;
178 
179   // Check if the extracted bits are contained within the mask that it is
180   // and-ed with. The extract operation will copy these bits, and so the
181   // mask cannot any holes in it that would clear any of the bits of the
182   // extracted field.
183   if (!LogicalSR) {
184     // If the shift right was arithmetic, it could have included some 1 bits.
185     // It is still ok to generate extract, but only if the mask eliminates
186     // those bits (i.e. M does not have any bits set beyond U).
187     APInt C = APInt::getHighBitsSet(BW, BW-U);
188     if (M.intersects(C) || !APIntOps::isMask(W, M))
189       return false;
190   } else {
191     // Check if M starts with a contiguous sequence of W times 1 bits. Get
192     // the low U bits of M (which eliminates the 0 bits shifted in on the
193     // left), and check if the result is APInt's "mask":
194     if (!APIntOps::isMask(W, M.getLoBits(U)))
195       return false;
196   }
197 
198   IRBuilder<> IRB(In);
199   Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
200                                    : Intrinsic::hexagon_S2_extractup;
201   Module *Mod = BB->getParent()->getParent();
202   Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
203   Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
204   if (SL != 0)
205     NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
206   In->replaceAllUsesWith(NewIn);
207   return true;
208 }
209 
210 
visitBlock(BasicBlock * B)211 bool HexagonGenExtract::visitBlock(BasicBlock *B) {
212   // Depth-first, bottom-up traversal.
213   DomTreeNode *DTN = DT->getNode(B);
214   typedef GraphTraits<DomTreeNode*> GTN;
215   typedef GTN::ChildIteratorType Iter;
216   for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
217     visitBlock((*I)->getBlock());
218 
219   // Allow limiting the number of generated extracts for debugging purposes.
220   bool HasCutoff = ExtractCutoff.getPosition();
221   unsigned Cutoff = ExtractCutoff;
222 
223   bool Changed = false;
224   BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin();
225   while (true) {
226     if (HasCutoff && (ExtractCount >= Cutoff))
227       return Changed;
228     bool Last = (I == Begin);
229     if (!Last)
230       NextI = std::prev(I);
231     Instruction *In = &*I;
232     bool Done = convert(In);
233     if (HasCutoff && Done)
234       ExtractCount++;
235     Changed |= Done;
236     if (Last)
237       break;
238     I = NextI;
239   }
240   return Changed;
241 }
242 
243 
runOnFunction(Function & F)244 bool HexagonGenExtract::runOnFunction(Function &F) {
245   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
246   bool Changed;
247 
248   // Traverse the function bottom-up, to see super-expressions before their
249   // sub-expressions.
250   BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F);
251   Changed = visitBlock(Entry);
252 
253   return Changed;
254 }
255 
256 
createHexagonGenExtract()257 FunctionPass *llvm::createHexagonGenExtract() {
258   return new HexagonGenExtract();
259 }
260