1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass looks for instructions that can be replaced by a Test Data Class
11 // instruction, and replaces them when profitable.
12 //
13 // Roughly, the following rules are recognized:
14 //
15 // 1: fcmp pred X, 0 -> tdc X, mask
16 // 2: fcmp pred X, +-inf -> tdc X, mask
17 // 3: fcmp pred X, +-minnorm -> tdc X, mask
18 // 4: tdc (fabs X), mask -> tdc X, newmask
19 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
20 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
21 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
22 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
23 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
24 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
25 //
26 // The pass works in 4 steps:
27 //
28 // 1. All fcmp and icmp instructions in a function are checked for a match
29 //    with rules 1-3 and 5-7.  Their TDC equivalents are stored in
30 //    the ConvertedInsts mapping.  If the operand of a fcmp instruction is
31 //    a fabs, it's also folded according to rule 4.
32 // 2. All and/or/xor i1 instructions whose both operands have been already
33 //    mapped are mapped according to rules 8-10.  LogicOpsWorklist is used
34 //    as a queue of instructions to check.
35 // 3. All mapped instructions that are considered worthy of conversion (ie.
36 //    replacing them will actually simplify the final code) are replaced
37 //    with a call to the s390.tdc intrinsic.
38 // 4. All intermediate results of replaced instructions are removed if unused.
39 //
40 // Instructions that match rules 1-3 are considered unworthy of conversion
41 // on their own (since a comparison instruction is superior), but are mapped
42 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
43 // the original comparison in the process).
44 //
45 //===----------------------------------------------------------------------===//
46 
47 #include "SystemZ.h"
48 #include "llvm/ADT/MapVector.h"
49 #include "llvm/IR/Constants.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/InstIterator.h"
52 #include "llvm/IR/IntrinsicInst.h"
53 #include "llvm/IR/IRBuilder.h"
54 #include "llvm/IR/LegacyPassManager.h"
55 #include "llvm/IR/Module.h"
56 #include <deque>
57 #include <set>
58 
59 using namespace llvm;
60 
61 namespace llvm {
62   void initializeSystemZTDCPassPass(PassRegistry&);
63 }
64 
65 namespace {
66 
67 class SystemZTDCPass : public FunctionPass {
68 public:
69   static char ID;
SystemZTDCPass()70   SystemZTDCPass() : FunctionPass(ID) {
71     initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
72   }
73 
74   bool runOnFunction(Function &F) override;
75 private:
76   // Maps seen instructions that can be mapped to a TDC, values are
77   // (TDC operand, TDC mask, worthy flag) triples.
78   MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
79   // The queue of and/or/xor i1 instructions to be potentially folded.
80   std::vector<BinaryOperator *> LogicOpsWorklist;
81   // Instructions matched while folding, to be removed at the end if unused.
82   std::set<Instruction *> PossibleJunk;
83 
84   // Tries to convert a fcmp instruction.
85   void convertFCmp(CmpInst &I);
86 
87   // Tries to convert an icmp instruction.
88   void convertICmp(CmpInst &I);
89 
90   // Tries to convert an i1 and/or/xor instruction, whose both operands
91   // have been already converted.
92   void convertLogicOp(BinaryOperator &I);
93 
94   // Marks an instruction as converted - adds it to ConvertedInsts and adds
95   // any and/or/xor i1 users to the queue.
converted(Instruction * I,Value * V,int Mask,bool Worthy)96   void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
97     ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
98     auto &M = *I->getFunction()->getParent();
99     auto &Ctx = M.getContext();
100     for (auto *U : I->users()) {
101       auto *LI = dyn_cast<BinaryOperator>(U);
102       if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
103           (LI->getOpcode() == Instruction::And ||
104            LI->getOpcode() == Instruction::Or ||
105            LI->getOpcode() == Instruction::Xor)) {
106         LogicOpsWorklist.push_back(LI);
107       }
108     }
109   }
110 };
111 
112 } // end anonymous namespace
113 
114 char SystemZTDCPass::ID = 0;
115 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
116                 "SystemZ Test Data Class optimization", false, false)
117 
createSystemZTDCPass()118 FunctionPass *llvm::createSystemZTDCPass() {
119   return new SystemZTDCPass();
120 }
121 
convertFCmp(CmpInst & I)122 void SystemZTDCPass::convertFCmp(CmpInst &I) {
123   Value *Op0 = I.getOperand(0);
124   auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
125   auto Pred = I.getPredicate();
126   // Only comparisons with consts are interesting.
127   if (!Const)
128     return;
129   // Compute the smallest normal number (and its negation).
130   auto &Sem = Op0->getType()->getFltSemantics();
131   APFloat Smallest = APFloat::getSmallestNormalized(Sem);
132   APFloat NegSmallest = Smallest;
133   NegSmallest.changeSign();
134   // Check if Const is one of our recognized consts.
135   int WhichConst;
136   if (Const->isZero()) {
137     // All comparisons with 0 can be converted.
138     WhichConst = 0;
139   } else if (Const->isInfinity()) {
140     // Likewise for infinities.
141     WhichConst = Const->isNegative() ? 2 : 1;
142   } else if (Const->isExactlyValue(Smallest)) {
143     // For Smallest, we cannot do EQ separately from GT.
144     if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
145         (Pred & CmpInst::FCMP_OGE) != 0)
146       return;
147     WhichConst = 3;
148   } else if (Const->isExactlyValue(NegSmallest)) {
149     // Likewise for NegSmallest, we cannot do EQ separately from LT.
150     if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
151         (Pred & CmpInst::FCMP_OLE) != 0)
152       return;
153     WhichConst = 4;
154   } else {
155     // Not one of our special constants.
156     return;
157   }
158   // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
159   static const int Masks[][4] = {
160     { // 0
161       SystemZ::TDCMASK_ZERO,              // eq
162       SystemZ::TDCMASK_POSITIVE,          // gt
163       SystemZ::TDCMASK_NEGATIVE,          // lt
164       SystemZ::TDCMASK_NAN,               // un
165     },
166     { // inf
167       SystemZ::TDCMASK_INFINITY_PLUS,     // eq
168       0,                                  // gt
169       (SystemZ::TDCMASK_ZERO |
170        SystemZ::TDCMASK_NEGATIVE |
171        SystemZ::TDCMASK_NORMAL_PLUS |
172        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
173       SystemZ::TDCMASK_NAN,               // un
174     },
175     { // -inf
176       SystemZ::TDCMASK_INFINITY_MINUS,    // eq
177       (SystemZ::TDCMASK_ZERO |
178        SystemZ::TDCMASK_POSITIVE |
179        SystemZ::TDCMASK_NORMAL_MINUS |
180        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
181       0,                                  // lt
182       SystemZ::TDCMASK_NAN,               // un
183     },
184     { // minnorm
185       0,                                  // eq (unsupported)
186       (SystemZ::TDCMASK_NORMAL_PLUS |
187        SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge)
188       (SystemZ::TDCMASK_ZERO |
189        SystemZ::TDCMASK_NEGATIVE |
190        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
191       SystemZ::TDCMASK_NAN,               // un
192     },
193     { // -minnorm
194       0,                                  // eq (unsupported)
195       (SystemZ::TDCMASK_ZERO |
196        SystemZ::TDCMASK_POSITIVE |
197        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
198       (SystemZ::TDCMASK_NORMAL_MINUS |
199        SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le)
200       SystemZ::TDCMASK_NAN,               // un
201     }
202   };
203   // Construct the mask as a combination of the partial masks.
204   int Mask = 0;
205   if (Pred & CmpInst::FCMP_OEQ)
206     Mask |= Masks[WhichConst][0];
207   if (Pred & CmpInst::FCMP_OGT)
208     Mask |= Masks[WhichConst][1];
209   if (Pred & CmpInst::FCMP_OLT)
210     Mask |= Masks[WhichConst][2];
211   if (Pred & CmpInst::FCMP_UNO)
212     Mask |= Masks[WhichConst][3];
213   // A lone fcmp is unworthy of tdc conversion on its own, but may become
214   // worthy if combined with fabs.
215   bool Worthy = false;
216   if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
217     Function *F = CI->getCalledFunction();
218     if (F && F->getIntrinsicID() == Intrinsic::fabs) {
219       // Fold with fabs - adjust the mask appropriately.
220       Mask &= SystemZ::TDCMASK_PLUS;
221       Mask |= Mask >> 1;
222       Op0 = CI->getArgOperand(0);
223       // A combination of fcmp with fabs is a win, unless the constant
224       // involved is 0 (which is handled by later passes).
225       Worthy = WhichConst != 0;
226       PossibleJunk.insert(CI);
227     }
228   }
229   converted(&I, Op0, Mask, Worthy);
230 }
231 
convertICmp(CmpInst & I)232 void SystemZTDCPass::convertICmp(CmpInst &I) {
233   Value *Op0 = I.getOperand(0);
234   auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
235   auto Pred = I.getPredicate();
236   // All our icmp rules involve comparisons with consts.
237   if (!Const)
238     return;
239   if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
240     // Check for icmp+bitcast used for signbit.
241     if (!Cast->getSrcTy()->isFloatTy() &&
242         !Cast->getSrcTy()->isDoubleTy() &&
243         !Cast->getSrcTy()->isFP128Ty())
244       return;
245     Value *V = Cast->getOperand(0);
246     int Mask;
247     if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
248       // icmp slt (bitcast X), 0 - set if sign bit true
249       Mask = SystemZ::TDCMASK_MINUS;
250     } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
251       // icmp sgt (bitcast X), -1 - set if sign bit false
252       Mask = SystemZ::TDCMASK_PLUS;
253     } else {
254       // Not a sign bit check.
255       return;
256     }
257     PossibleJunk.insert(Cast);
258     converted(&I, V, Mask, true);
259   } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
260     // Check if this is a pre-existing call of our tdc intrinsic.
261     Function *F = CI->getCalledFunction();
262     if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
263       return;
264     if (!Const->isZero())
265       return;
266     Value *V = CI->getArgOperand(0);
267     auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
268     // Bail if the mask is not a constant.
269     if (!MaskC)
270       return;
271     int Mask = MaskC->getZExtValue();
272     Mask &= SystemZ::TDCMASK_ALL;
273     if (Pred == CmpInst::ICMP_NE) {
274       // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
275     } else if (Pred == CmpInst::ICMP_EQ) {
276       // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
277       Mask ^= SystemZ::TDCMASK_ALL;
278     } else {
279       // An unknown comparison - ignore.
280       return;
281     }
282     PossibleJunk.insert(CI);
283     converted(&I, V, Mask, false);
284   }
285 }
286 
convertLogicOp(BinaryOperator & I)287 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
288   Value *Op0, *Op1;
289   int Mask0, Mask1;
290   bool Worthy0, Worthy1;
291   std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
292   std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
293   if (Op0 != Op1)
294     return;
295   int Mask;
296   switch (I.getOpcode()) {
297     case Instruction::And:
298       Mask = Mask0 & Mask1;
299       break;
300     case Instruction::Or:
301       Mask = Mask0 | Mask1;
302       break;
303     case Instruction::Xor:
304       Mask = Mask0 ^ Mask1;
305       break;
306     default:
307       llvm_unreachable("Unknown op in convertLogicOp");
308   }
309   converted(&I, Op0, Mask, true);
310 }
311 
runOnFunction(Function & F)312 bool SystemZTDCPass::runOnFunction(Function &F) {
313   ConvertedInsts.clear();
314   LogicOpsWorklist.clear();
315   PossibleJunk.clear();
316 
317   // Look for icmp+fcmp instructions.
318   for (auto &I : instructions(F)) {
319     if (I.getOpcode() == Instruction::FCmp)
320       convertFCmp(cast<CmpInst>(I));
321     else if (I.getOpcode() == Instruction::ICmp)
322       convertICmp(cast<CmpInst>(I));
323   }
324 
325   // If none found, bail already.
326   if (ConvertedInsts.empty())
327     return false;
328 
329   // Process the queue of logic instructions.
330   while (!LogicOpsWorklist.empty()) {
331     BinaryOperator *Op = LogicOpsWorklist.back();
332     LogicOpsWorklist.pop_back();
333     // If both operands mapped, and the instruction itself not yet mapped,
334     // convert it.
335     if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
336         ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
337         !ConvertedInsts.count(Op))
338       convertLogicOp(*Op);
339   }
340 
341   // Time to actually replace the instructions.  Do it in the reverse order
342   // of finding them, since there's a good chance the earlier ones will be
343   // unused (due to being folded into later ones).
344   Module &M = *F.getParent();
345   auto &Ctx = M.getContext();
346   Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
347   bool MadeChange = false;
348   for (auto &It : reverse(ConvertedInsts)) {
349     Instruction *I = It.first;
350     Value *V;
351     int Mask;
352     bool Worthy;
353     std::tie(V, Mask, Worthy) = It.second;
354     if (!I->user_empty()) {
355       // If used and unworthy of conversion, skip it.
356       if (!Worthy)
357         continue;
358       // Call the intrinsic, compare result with 0.
359       Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc,
360                                                  V->getType());
361       IRBuilder<> IRB(I);
362       Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
363       Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
364       Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
365       I->replaceAllUsesWith(ICmp);
366     }
367     // If unused, or used and converted, remove it.
368     I->eraseFromParent();
369     MadeChange = true;
370   }
371 
372   if (!MadeChange)
373     return false;
374 
375   // We've actually done something - now clear misc accumulated junk (fabs,
376   // bitcast).
377   for (auto *I : PossibleJunk)
378     if (I->user_empty())
379       I->eraseFromParent();
380 
381   return true;
382 }
383