1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 // This file contains TargetLowering functions borrowed from AMDLI.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUISelLowering.h"
15 #include "AMDGPURegisterInfo.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDILUtilityFunctions.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/PseudoSourceValue.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27 #include "llvm/DerivedTypes.h"
28 #include "llvm/Instructions.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetOptions.h"
33 
34 using namespace llvm;
35 //===----------------------------------------------------------------------===//
36 // Calling Convention Implementation
37 //===----------------------------------------------------------------------===//
38 #include "AMDGPUGenCallingConv.inc"
39 
40 //===----------------------------------------------------------------------===//
41 // TargetLowering Implementation Help Functions End
42 //===----------------------------------------------------------------------===//
43 
44 //===----------------------------------------------------------------------===//
45 // TargetLowering Class Implementation Begins
46 //===----------------------------------------------------------------------===//
InitAMDILLowering()47 void AMDGPUTargetLowering::InitAMDILLowering()
48 {
49   int types[] =
50   {
51     (int)MVT::i8,
52     (int)MVT::i16,
53     (int)MVT::i32,
54     (int)MVT::f32,
55     (int)MVT::f64,
56     (int)MVT::i64,
57     (int)MVT::v2i8,
58     (int)MVT::v4i8,
59     (int)MVT::v2i16,
60     (int)MVT::v4i16,
61     (int)MVT::v4f32,
62     (int)MVT::v4i32,
63     (int)MVT::v2f32,
64     (int)MVT::v2i32,
65     (int)MVT::v2f64,
66     (int)MVT::v2i64
67   };
68 
69   int IntTypes[] =
70   {
71     (int)MVT::i8,
72     (int)MVT::i16,
73     (int)MVT::i32,
74     (int)MVT::i64
75   };
76 
77   int FloatTypes[] =
78   {
79     (int)MVT::f32,
80     (int)MVT::f64
81   };
82 
83   int VectorTypes[] =
84   {
85     (int)MVT::v2i8,
86     (int)MVT::v4i8,
87     (int)MVT::v2i16,
88     (int)MVT::v4i16,
89     (int)MVT::v4f32,
90     (int)MVT::v4i32,
91     (int)MVT::v2f32,
92     (int)MVT::v2i32,
93     (int)MVT::v2f64,
94     (int)MVT::v2i64
95   };
96   size_t numTypes = sizeof(types) / sizeof(*types);
97   size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
98   size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
99   size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
100 
101   const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
102   // These are the current register classes that are
103   // supported
104 
105   for (unsigned int x  = 0; x < numTypes; ++x) {
106     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
107 
108     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
109     // We cannot sextinreg, expand to shifts
110     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
111     setOperationAction(ISD::SUBE, VT, Expand);
112     setOperationAction(ISD::SUBC, VT, Expand);
113     setOperationAction(ISD::ADDE, VT, Expand);
114     setOperationAction(ISD::ADDC, VT, Expand);
115     setOperationAction(ISD::BRCOND, VT, Custom);
116     setOperationAction(ISD::BR_JT, VT, Expand);
117     setOperationAction(ISD::BRIND, VT, Expand);
118     // TODO: Implement custom UREM/SREM routines
119     setOperationAction(ISD::SREM, VT, Expand);
120     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
121     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
122     if (VT != MVT::i64 && VT != MVT::v2i64) {
123       setOperationAction(ISD::SDIV, VT, Custom);
124     }
125   }
126   for (unsigned int x = 0; x < numFloatTypes; ++x) {
127     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
128 
129     // IL does not have these operations for floating point types
130     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
131     setOperationAction(ISD::SETOLT, VT, Expand);
132     setOperationAction(ISD::SETOGE, VT, Expand);
133     setOperationAction(ISD::SETOGT, VT, Expand);
134     setOperationAction(ISD::SETOLE, VT, Expand);
135     setOperationAction(ISD::SETULT, VT, Expand);
136     setOperationAction(ISD::SETUGE, VT, Expand);
137     setOperationAction(ISD::SETUGT, VT, Expand);
138     setOperationAction(ISD::SETULE, VT, Expand);
139   }
140 
141   for (unsigned int x = 0; x < numIntTypes; ++x) {
142     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
143 
144     // GPU also does not have divrem function for signed or unsigned
145     setOperationAction(ISD::SDIVREM, VT, Expand);
146 
147     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
148     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
149     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
150 
151     // GPU doesn't have a rotl, rotr, or byteswap instruction
152     setOperationAction(ISD::ROTR, VT, Expand);
153     setOperationAction(ISD::BSWAP, VT, Expand);
154 
155     // GPU doesn't have any counting operators
156     setOperationAction(ISD::CTPOP, VT, Expand);
157     setOperationAction(ISD::CTTZ, VT, Expand);
158     setOperationAction(ISD::CTLZ, VT, Expand);
159   }
160 
161   for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
162   {
163     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
164 
165     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
166     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
167     setOperationAction(ISD::SDIVREM, VT, Expand);
168     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
169     // setOperationAction(ISD::VSETCC, VT, Expand);
170     setOperationAction(ISD::SELECT_CC, VT, Expand);
171 
172   }
173   if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
174     setOperationAction(ISD::MULHU, MVT::i64, Expand);
175     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
176     setOperationAction(ISD::MULHS, MVT::i64, Expand);
177     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
178     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
179     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
180     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
181     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
182     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
183     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
184     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
185     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
186   }
187   if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
188     // we support loading/storing v2f64 but not operations on the type
189     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
190     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
191     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
192     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
193     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
194     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
195     // We want to expand vector conversions into their scalar
196     // counterparts.
197     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
198     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
199     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
200     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
201     setOperationAction(ISD::FABS, MVT::f64, Expand);
202     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
203   }
204   // TODO: Fix the UDIV24 algorithm so it works for these
205   // types correctly. This needs vector comparisons
206   // for this to work correctly.
207   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
208   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
209   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
210   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
211   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
212   setOperationAction(ISD::SUBC, MVT::Other, Expand);
213   setOperationAction(ISD::ADDE, MVT::Other, Expand);
214   setOperationAction(ISD::ADDC, MVT::Other, Expand);
215   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
216   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
217   setOperationAction(ISD::BRIND, MVT::Other, Expand);
218   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
219 
220   setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
221 
222   // Use the default implementation.
223   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
224   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
225 
226   setSchedulingPreference(Sched::RegPressure);
227   setPow2DivIsCheap(false);
228   setPrefLoopAlignment(16);
229   setSelectIsExpensive(true);
230   setJumpIsExpensive(true);
231 
232   maxStoresPerMemcpy  = 4096;
233   maxStoresPerMemmove = 4096;
234   maxStoresPerMemset  = 4096;
235 
236 #undef numTypes
237 #undef numIntTypes
238 #undef numVectorTypes
239 #undef numFloatTypes
240 }
241 
242 bool
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,unsigned Intrinsic) const243 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
244     const CallInst &I, unsigned Intrinsic) const
245 {
246   return false;
247 }
248 // The backend supports 32 and 64 bit floating point immediates
249 bool
isFPImmLegal(const APFloat & Imm,EVT VT) const250 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
251 {
252   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
253       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
254     return true;
255   } else {
256     return false;
257   }
258 }
259 
260 bool
ShouldShrinkFPConstant(EVT VT) const261 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
262 {
263   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
264       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
265     return false;
266   } else {
267     return true;
268   }
269 }
270 
271 
272 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
273 // be zero. Op is expected to be a target specific node. Used by DAG
274 // combiner.
275 
276 void
computeMaskedBitsForTargetNode(const SDValue Op,APInt & KnownZero,APInt & KnownOne,const SelectionDAG & DAG,unsigned Depth) const277 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
278     const SDValue Op,
279     APInt &KnownZero,
280     APInt &KnownOne,
281     const SelectionDAG &DAG,
282     unsigned Depth) const
283 {
284   APInt KnownZero2;
285   APInt KnownOne2;
286   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
287   switch (Op.getOpcode()) {
288     default: break;
289     case ISD::SELECT_CC:
290              DAG.ComputeMaskedBits(
291                  Op.getOperand(1),
292                  KnownZero,
293                  KnownOne,
294                  Depth + 1
295                  );
296              DAG.ComputeMaskedBits(
297                  Op.getOperand(0),
298                  KnownZero2,
299                  KnownOne2
300                  );
301              assert((KnownZero & KnownOne) == 0
302                  && "Bits known to be one AND zero?");
303              assert((KnownZero2 & KnownOne2) == 0
304                  && "Bits known to be one AND zero?");
305              // Only known if known in both the LHS and RHS
306              KnownOne &= KnownOne2;
307              KnownZero &= KnownZero2;
308              break;
309   };
310 }
311 
312 //===----------------------------------------------------------------------===//
313 //                           Other Lowering Hooks
314 //===----------------------------------------------------------------------===//
315 
316 SDValue
LowerSDIV(SDValue Op,SelectionDAG & DAG) const317 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
318 {
319   EVT OVT = Op.getValueType();
320   SDValue DST;
321   if (OVT.getScalarType() == MVT::i64) {
322     DST = LowerSDIV64(Op, DAG);
323   } else if (OVT.getScalarType() == MVT::i32) {
324     DST = LowerSDIV32(Op, DAG);
325   } else if (OVT.getScalarType() == MVT::i16
326       || OVT.getScalarType() == MVT::i8) {
327     DST = LowerSDIV24(Op, DAG);
328   } else {
329     DST = SDValue(Op.getNode(), 0);
330   }
331   return DST;
332 }
333 
334 SDValue
LowerSREM(SDValue Op,SelectionDAG & DAG) const335 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
336 {
337   EVT OVT = Op.getValueType();
338   SDValue DST;
339   if (OVT.getScalarType() == MVT::i64) {
340     DST = LowerSREM64(Op, DAG);
341   } else if (OVT.getScalarType() == MVT::i32) {
342     DST = LowerSREM32(Op, DAG);
343   } else if (OVT.getScalarType() == MVT::i16) {
344     DST = LowerSREM16(Op, DAG);
345   } else if (OVT.getScalarType() == MVT::i8) {
346     DST = LowerSREM8(Op, DAG);
347   } else {
348     DST = SDValue(Op.getNode(), 0);
349   }
350   return DST;
351 }
352 
353 SDValue
LowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const354 AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
355 {
356   EVT VT = Op.getValueType();
357   SDValue Nodes1;
358   SDValue second;
359   SDValue third;
360   SDValue fourth;
361   DebugLoc DL = Op.getDebugLoc();
362   Nodes1 = DAG.getNode(AMDGPUISD::VBUILD,
363       DL,
364       VT, Op.getOperand(0));
365 #if 0
366   bool allEqual = true;
367   for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
368     if (Op.getOperand(0) != Op.getOperand(x)) {
369       allEqual = false;
370       break;
371     }
372   }
373   if (allEqual) {
374     return Nodes1;
375   }
376 #endif
377   switch(Op.getNumOperands()) {
378     default:
379     case 1:
380       break;
381     case 4:
382       fourth = Op.getOperand(3);
383       if (fourth.getOpcode() != ISD::UNDEF) {
384         Nodes1 = DAG.getNode(
385             ISD::INSERT_VECTOR_ELT,
386             DL,
387             Op.getValueType(),
388             Nodes1,
389             fourth,
390             DAG.getConstant(7, MVT::i32));
391       }
392     case 3:
393       third = Op.getOperand(2);
394       if (third.getOpcode() != ISD::UNDEF) {
395         Nodes1 = DAG.getNode(
396             ISD::INSERT_VECTOR_ELT,
397             DL,
398             Op.getValueType(),
399             Nodes1,
400             third,
401             DAG.getConstant(6, MVT::i32));
402       }
403     case 2:
404       second = Op.getOperand(1);
405       if (second.getOpcode() != ISD::UNDEF) {
406         Nodes1 = DAG.getNode(
407             ISD::INSERT_VECTOR_ELT,
408             DL,
409             Op.getValueType(),
410             Nodes1,
411             second,
412             DAG.getConstant(5, MVT::i32));
413       }
414       break;
415   };
416   return Nodes1;
417 }
418 
419 SDValue
LowerSIGN_EXTEND_INREG(SDValue Op,SelectionDAG & DAG) const420 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
421 {
422   SDValue Data = Op.getOperand(0);
423   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
424   DebugLoc DL = Op.getDebugLoc();
425   EVT DVT = Data.getValueType();
426   EVT BVT = BaseType->getVT();
427   unsigned baseBits = BVT.getScalarType().getSizeInBits();
428   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
429   unsigned shiftBits = srcBits - baseBits;
430   if (srcBits < 32) {
431     // If the op is less than 32 bits, then it needs to extend to 32bits
432     // so it can properly keep the upper bits valid.
433     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
434     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
435     shiftBits = 32 - baseBits;
436     DVT = IVT;
437   }
438   SDValue Shift = DAG.getConstant(shiftBits, DVT);
439   // Shift left by 'Shift' bits.
440   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
441   // Signed shift Right by 'Shift' bits.
442   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
443   if (srcBits < 32) {
444     // Once the sign extension is done, the op needs to be converted to
445     // its original type.
446     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
447   }
448   return Data;
449 }
450 EVT
genIntType(uint32_t size,uint32_t numEle) const451 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
452 {
453   int iSize = (size * numEle);
454   int vEle = (iSize >> ((size == 64) ? 6 : 5));
455   if (!vEle) {
456     vEle = 1;
457   }
458   if (size == 64) {
459     if (vEle == 1) {
460       return EVT(MVT::i64);
461     } else {
462       return EVT(MVT::getVectorVT(MVT::i64, vEle));
463     }
464   } else {
465     if (vEle == 1) {
466       return EVT(MVT::i32);
467     } else {
468       return EVT(MVT::getVectorVT(MVT::i32, vEle));
469     }
470   }
471 }
472 
473 SDValue
LowerBRCOND(SDValue Op,SelectionDAG & DAG) const474 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
475 {
476   SDValue Chain = Op.getOperand(0);
477   SDValue Cond  = Op.getOperand(1);
478   SDValue Jump  = Op.getOperand(2);
479   SDValue Result;
480   Result = DAG.getNode(
481       AMDGPUISD::BRANCH_COND,
482       Op.getDebugLoc(),
483       Op.getValueType(),
484       Chain, Jump, Cond);
485   return Result;
486 }
487 
488 SDValue
LowerSDIV24(SDValue Op,SelectionDAG & DAG) const489 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
490 {
491   DebugLoc DL = Op.getDebugLoc();
492   EVT OVT = Op.getValueType();
493   SDValue LHS = Op.getOperand(0);
494   SDValue RHS = Op.getOperand(1);
495   MVT INTTY;
496   MVT FLTTY;
497   if (!OVT.isVector()) {
498     INTTY = MVT::i32;
499     FLTTY = MVT::f32;
500   } else if (OVT.getVectorNumElements() == 2) {
501     INTTY = MVT::v2i32;
502     FLTTY = MVT::v2f32;
503   } else if (OVT.getVectorNumElements() == 4) {
504     INTTY = MVT::v4i32;
505     FLTTY = MVT::v4f32;
506   }
507   unsigned bitsize = OVT.getScalarType().getSizeInBits();
508   // char|short jq = ia ^ ib;
509   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
510 
511   // jq = jq >> (bitsize - 2)
512   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
513 
514   // jq = jq | 0x1
515   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
516 
517   // jq = (int)jq
518   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
519 
520   // int ia = (int)LHS;
521   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
522 
523   // int ib, (int)RHS;
524   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
525 
526   // float fa = (float)ia;
527   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
528 
529   // float fb = (float)ib;
530   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
531 
532   // float fq = native_divide(fa, fb);
533   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
534 
535   // fq = trunc(fq);
536   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
537 
538   // float fqneg = -fq;
539   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
540 
541   // float fr = mad(fqneg, fb, fa);
542   SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
543 
544   // int iq = (int)fq;
545   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
546 
547   // fr = fabs(fr);
548   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
549 
550   // fb = fabs(fb);
551   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
552 
553   // int cv = fr >= fb;
554   SDValue cv;
555   if (INTTY == MVT::i32) {
556     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
557   } else {
558     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
559   }
560   // jq = (cv ? jq : 0);
561   jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
562       DAG.getConstant(0, OVT));
563   // dst = iq + jq;
564   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
565   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
566   return iq;
567 }
568 
569 SDValue
LowerSDIV32(SDValue Op,SelectionDAG & DAG) const570 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
571 {
572   DebugLoc DL = Op.getDebugLoc();
573   EVT OVT = Op.getValueType();
574   SDValue LHS = Op.getOperand(0);
575   SDValue RHS = Op.getOperand(1);
576   // The LowerSDIV32 function generates equivalent to the following IL.
577   // mov r0, LHS
578   // mov r1, RHS
579   // ilt r10, r0, 0
580   // ilt r11, r1, 0
581   // iadd r0, r0, r10
582   // iadd r1, r1, r11
583   // ixor r0, r0, r10
584   // ixor r1, r1, r11
585   // udiv r0, r0, r1
586   // ixor r10, r10, r11
587   // iadd r0, r0, r10
588   // ixor DST, r0, r10
589 
590   // mov r0, LHS
591   SDValue r0 = LHS;
592 
593   // mov r1, RHS
594   SDValue r1 = RHS;
595 
596   // ilt r10, r0, 0
597   SDValue r10 = DAG.getSelectCC(DL,
598       r0, DAG.getConstant(0, OVT),
599       DAG.getConstant(-1, MVT::i32),
600       DAG.getConstant(0, MVT::i32),
601       ISD::SETLT);
602 
603   // ilt r11, r1, 0
604   SDValue r11 = DAG.getSelectCC(DL,
605       r1, DAG.getConstant(0, OVT),
606       DAG.getConstant(-1, MVT::i32),
607       DAG.getConstant(0, MVT::i32),
608       ISD::SETLT);
609 
610   // iadd r0, r0, r10
611   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
612 
613   // iadd r1, r1, r11
614   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
615 
616   // ixor r0, r0, r10
617   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
618 
619   // ixor r1, r1, r11
620   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
621 
622   // udiv r0, r0, r1
623   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
624 
625   // ixor r10, r10, r11
626   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
627 
628   // iadd r0, r0, r10
629   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
630 
631   // ixor DST, r0, r10
632   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
633   return DST;
634 }
635 
636 SDValue
LowerSDIV64(SDValue Op,SelectionDAG & DAG) const637 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
638 {
639   return SDValue(Op.getNode(), 0);
640 }
641 
642 SDValue
LowerSREM8(SDValue Op,SelectionDAG & DAG) const643 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
644 {
645   DebugLoc DL = Op.getDebugLoc();
646   EVT OVT = Op.getValueType();
647   MVT INTTY = MVT::i32;
648   if (OVT == MVT::v2i8) {
649     INTTY = MVT::v2i32;
650   } else if (OVT == MVT::v4i8) {
651     INTTY = MVT::v4i32;
652   }
653   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
654   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
655   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
656   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
657   return LHS;
658 }
659 
660 SDValue
LowerSREM16(SDValue Op,SelectionDAG & DAG) const661 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
662 {
663   DebugLoc DL = Op.getDebugLoc();
664   EVT OVT = Op.getValueType();
665   MVT INTTY = MVT::i32;
666   if (OVT == MVT::v2i16) {
667     INTTY = MVT::v2i32;
668   } else if (OVT == MVT::v4i16) {
669     INTTY = MVT::v4i32;
670   }
671   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
672   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
673   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
674   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
675   return LHS;
676 }
677 
678 SDValue
LowerSREM32(SDValue Op,SelectionDAG & DAG) const679 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
680 {
681   DebugLoc DL = Op.getDebugLoc();
682   EVT OVT = Op.getValueType();
683   SDValue LHS = Op.getOperand(0);
684   SDValue RHS = Op.getOperand(1);
685   // The LowerSREM32 function generates equivalent to the following IL.
686   // mov r0, LHS
687   // mov r1, RHS
688   // ilt r10, r0, 0
689   // ilt r11, r1, 0
690   // iadd r0, r0, r10
691   // iadd r1, r1, r11
692   // ixor r0, r0, r10
693   // ixor r1, r1, r11
694   // udiv r20, r0, r1
695   // umul r20, r20, r1
696   // sub r0, r0, r20
697   // iadd r0, r0, r10
698   // ixor DST, r0, r10
699 
700   // mov r0, LHS
701   SDValue r0 = LHS;
702 
703   // mov r1, RHS
704   SDValue r1 = RHS;
705 
706   // ilt r10, r0, 0
707   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
708 
709   // ilt r11, r1, 0
710   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
711 
712   // iadd r0, r0, r10
713   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
714 
715   // iadd r1, r1, r11
716   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
717 
718   // ixor r0, r0, r10
719   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
720 
721   // ixor r1, r1, r11
722   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
723 
724   // udiv r20, r0, r1
725   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
726 
727   // umul r20, r20, r1
728   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
729 
730   // sub r0, r0, r20
731   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
732 
733   // iadd r0, r0, r10
734   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
735 
736   // ixor DST, r0, r10
737   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
738   return DST;
739 }
740 
741 SDValue
LowerSREM64(SDValue Op,SelectionDAG & DAG) const742 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
743 {
744   return SDValue(Op.getNode(), 0);
745 }
746