1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 // This file contains TargetLowering functions borrowed from AMDLI.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUISelLowering.h"
15 #include "AMDGPURegisterInfo.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDILUtilityFunctions.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/PseudoSourceValue.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27 #include "llvm/DerivedTypes.h"
28 #include "llvm/Instructions.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetOptions.h"
33
34 using namespace llvm;
35 //===----------------------------------------------------------------------===//
36 // Calling Convention Implementation
37 //===----------------------------------------------------------------------===//
38 #include "AMDGPUGenCallingConv.inc"
39
40 //===----------------------------------------------------------------------===//
41 // TargetLowering Implementation Help Functions End
42 //===----------------------------------------------------------------------===//
43
44 //===----------------------------------------------------------------------===//
45 // TargetLowering Class Implementation Begins
46 //===----------------------------------------------------------------------===//
InitAMDILLowering()47 void AMDGPUTargetLowering::InitAMDILLowering()
48 {
49 int types[] =
50 {
51 (int)MVT::i8,
52 (int)MVT::i16,
53 (int)MVT::i32,
54 (int)MVT::f32,
55 (int)MVT::f64,
56 (int)MVT::i64,
57 (int)MVT::v2i8,
58 (int)MVT::v4i8,
59 (int)MVT::v2i16,
60 (int)MVT::v4i16,
61 (int)MVT::v4f32,
62 (int)MVT::v4i32,
63 (int)MVT::v2f32,
64 (int)MVT::v2i32,
65 (int)MVT::v2f64,
66 (int)MVT::v2i64
67 };
68
69 int IntTypes[] =
70 {
71 (int)MVT::i8,
72 (int)MVT::i16,
73 (int)MVT::i32,
74 (int)MVT::i64
75 };
76
77 int FloatTypes[] =
78 {
79 (int)MVT::f32,
80 (int)MVT::f64
81 };
82
83 int VectorTypes[] =
84 {
85 (int)MVT::v2i8,
86 (int)MVT::v4i8,
87 (int)MVT::v2i16,
88 (int)MVT::v4i16,
89 (int)MVT::v4f32,
90 (int)MVT::v4i32,
91 (int)MVT::v2f32,
92 (int)MVT::v2i32,
93 (int)MVT::v2f64,
94 (int)MVT::v2i64
95 };
96 size_t numTypes = sizeof(types) / sizeof(*types);
97 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
98 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
99 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
100
101 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
102 // These are the current register classes that are
103 // supported
104
105 for (unsigned int x = 0; x < numTypes; ++x) {
106 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
107
108 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
109 // We cannot sextinreg, expand to shifts
110 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
111 setOperationAction(ISD::SUBE, VT, Expand);
112 setOperationAction(ISD::SUBC, VT, Expand);
113 setOperationAction(ISD::ADDE, VT, Expand);
114 setOperationAction(ISD::ADDC, VT, Expand);
115 setOperationAction(ISD::BRCOND, VT, Custom);
116 setOperationAction(ISD::BR_JT, VT, Expand);
117 setOperationAction(ISD::BRIND, VT, Expand);
118 // TODO: Implement custom UREM/SREM routines
119 setOperationAction(ISD::SREM, VT, Expand);
120 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
121 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
122 if (VT != MVT::i64 && VT != MVT::v2i64) {
123 setOperationAction(ISD::SDIV, VT, Custom);
124 }
125 }
126 for (unsigned int x = 0; x < numFloatTypes; ++x) {
127 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
128
129 // IL does not have these operations for floating point types
130 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
131 setOperationAction(ISD::SETOLT, VT, Expand);
132 setOperationAction(ISD::SETOGE, VT, Expand);
133 setOperationAction(ISD::SETOGT, VT, Expand);
134 setOperationAction(ISD::SETOLE, VT, Expand);
135 setOperationAction(ISD::SETULT, VT, Expand);
136 setOperationAction(ISD::SETUGE, VT, Expand);
137 setOperationAction(ISD::SETUGT, VT, Expand);
138 setOperationAction(ISD::SETULE, VT, Expand);
139 }
140
141 for (unsigned int x = 0; x < numIntTypes; ++x) {
142 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
143
144 // GPU also does not have divrem function for signed or unsigned
145 setOperationAction(ISD::SDIVREM, VT, Expand);
146
147 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
148 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
149 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
150
151 // GPU doesn't have a rotl, rotr, or byteswap instruction
152 setOperationAction(ISD::ROTR, VT, Expand);
153 setOperationAction(ISD::BSWAP, VT, Expand);
154
155 // GPU doesn't have any counting operators
156 setOperationAction(ISD::CTPOP, VT, Expand);
157 setOperationAction(ISD::CTTZ, VT, Expand);
158 setOperationAction(ISD::CTLZ, VT, Expand);
159 }
160
161 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
162 {
163 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
164
165 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
166 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
167 setOperationAction(ISD::SDIVREM, VT, Expand);
168 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
169 // setOperationAction(ISD::VSETCC, VT, Expand);
170 setOperationAction(ISD::SELECT_CC, VT, Expand);
171
172 }
173 if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
174 setOperationAction(ISD::MULHU, MVT::i64, Expand);
175 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
176 setOperationAction(ISD::MULHS, MVT::i64, Expand);
177 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
178 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
179 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
180 setOperationAction(ISD::Constant , MVT::i64 , Legal);
181 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
182 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
183 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
184 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
185 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
186 }
187 if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
188 // we support loading/storing v2f64 but not operations on the type
189 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
190 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
191 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
192 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
193 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
194 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
195 // We want to expand vector conversions into their scalar
196 // counterparts.
197 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
198 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
199 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
200 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
201 setOperationAction(ISD::FABS, MVT::f64, Expand);
202 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
203 }
204 // TODO: Fix the UDIV24 algorithm so it works for these
205 // types correctly. This needs vector comparisons
206 // for this to work correctly.
207 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
208 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
209 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
210 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
211 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
212 setOperationAction(ISD::SUBC, MVT::Other, Expand);
213 setOperationAction(ISD::ADDE, MVT::Other, Expand);
214 setOperationAction(ISD::ADDC, MVT::Other, Expand);
215 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
216 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
217 setOperationAction(ISD::BRIND, MVT::Other, Expand);
218 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
219
220 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
221
222 // Use the default implementation.
223 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
224 setOperationAction(ISD::Constant , MVT::i32 , Legal);
225
226 setSchedulingPreference(Sched::RegPressure);
227 setPow2DivIsCheap(false);
228 setPrefLoopAlignment(16);
229 setSelectIsExpensive(true);
230 setJumpIsExpensive(true);
231
232 maxStoresPerMemcpy = 4096;
233 maxStoresPerMemmove = 4096;
234 maxStoresPerMemset = 4096;
235
236 #undef numTypes
237 #undef numIntTypes
238 #undef numVectorTypes
239 #undef numFloatTypes
240 }
241
242 bool
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,unsigned Intrinsic) const243 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
244 const CallInst &I, unsigned Intrinsic) const
245 {
246 return false;
247 }
248 // The backend supports 32 and 64 bit floating point immediates
249 bool
isFPImmLegal(const APFloat & Imm,EVT VT) const250 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
251 {
252 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
253 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
254 return true;
255 } else {
256 return false;
257 }
258 }
259
260 bool
ShouldShrinkFPConstant(EVT VT) const261 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
262 {
263 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
264 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
265 return false;
266 } else {
267 return true;
268 }
269 }
270
271
272 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
273 // be zero. Op is expected to be a target specific node. Used by DAG
274 // combiner.
275
276 void
computeMaskedBitsForTargetNode(const SDValue Op,APInt & KnownZero,APInt & KnownOne,const SelectionDAG & DAG,unsigned Depth) const277 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
278 const SDValue Op,
279 APInt &KnownZero,
280 APInt &KnownOne,
281 const SelectionDAG &DAG,
282 unsigned Depth) const
283 {
284 APInt KnownZero2;
285 APInt KnownOne2;
286 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
287 switch (Op.getOpcode()) {
288 default: break;
289 case ISD::SELECT_CC:
290 DAG.ComputeMaskedBits(
291 Op.getOperand(1),
292 KnownZero,
293 KnownOne,
294 Depth + 1
295 );
296 DAG.ComputeMaskedBits(
297 Op.getOperand(0),
298 KnownZero2,
299 KnownOne2
300 );
301 assert((KnownZero & KnownOne) == 0
302 && "Bits known to be one AND zero?");
303 assert((KnownZero2 & KnownOne2) == 0
304 && "Bits known to be one AND zero?");
305 // Only known if known in both the LHS and RHS
306 KnownOne &= KnownOne2;
307 KnownZero &= KnownZero2;
308 break;
309 };
310 }
311
312 //===----------------------------------------------------------------------===//
313 // Other Lowering Hooks
314 //===----------------------------------------------------------------------===//
315
316 SDValue
LowerSDIV(SDValue Op,SelectionDAG & DAG) const317 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
318 {
319 EVT OVT = Op.getValueType();
320 SDValue DST;
321 if (OVT.getScalarType() == MVT::i64) {
322 DST = LowerSDIV64(Op, DAG);
323 } else if (OVT.getScalarType() == MVT::i32) {
324 DST = LowerSDIV32(Op, DAG);
325 } else if (OVT.getScalarType() == MVT::i16
326 || OVT.getScalarType() == MVT::i8) {
327 DST = LowerSDIV24(Op, DAG);
328 } else {
329 DST = SDValue(Op.getNode(), 0);
330 }
331 return DST;
332 }
333
334 SDValue
LowerSREM(SDValue Op,SelectionDAG & DAG) const335 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
336 {
337 EVT OVT = Op.getValueType();
338 SDValue DST;
339 if (OVT.getScalarType() == MVT::i64) {
340 DST = LowerSREM64(Op, DAG);
341 } else if (OVT.getScalarType() == MVT::i32) {
342 DST = LowerSREM32(Op, DAG);
343 } else if (OVT.getScalarType() == MVT::i16) {
344 DST = LowerSREM16(Op, DAG);
345 } else if (OVT.getScalarType() == MVT::i8) {
346 DST = LowerSREM8(Op, DAG);
347 } else {
348 DST = SDValue(Op.getNode(), 0);
349 }
350 return DST;
351 }
352
353 SDValue
LowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const354 AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
355 {
356 EVT VT = Op.getValueType();
357 SDValue Nodes1;
358 SDValue second;
359 SDValue third;
360 SDValue fourth;
361 DebugLoc DL = Op.getDebugLoc();
362 Nodes1 = DAG.getNode(AMDGPUISD::VBUILD,
363 DL,
364 VT, Op.getOperand(0));
365 #if 0
366 bool allEqual = true;
367 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
368 if (Op.getOperand(0) != Op.getOperand(x)) {
369 allEqual = false;
370 break;
371 }
372 }
373 if (allEqual) {
374 return Nodes1;
375 }
376 #endif
377 switch(Op.getNumOperands()) {
378 default:
379 case 1:
380 break;
381 case 4:
382 fourth = Op.getOperand(3);
383 if (fourth.getOpcode() != ISD::UNDEF) {
384 Nodes1 = DAG.getNode(
385 ISD::INSERT_VECTOR_ELT,
386 DL,
387 Op.getValueType(),
388 Nodes1,
389 fourth,
390 DAG.getConstant(7, MVT::i32));
391 }
392 case 3:
393 third = Op.getOperand(2);
394 if (third.getOpcode() != ISD::UNDEF) {
395 Nodes1 = DAG.getNode(
396 ISD::INSERT_VECTOR_ELT,
397 DL,
398 Op.getValueType(),
399 Nodes1,
400 third,
401 DAG.getConstant(6, MVT::i32));
402 }
403 case 2:
404 second = Op.getOperand(1);
405 if (second.getOpcode() != ISD::UNDEF) {
406 Nodes1 = DAG.getNode(
407 ISD::INSERT_VECTOR_ELT,
408 DL,
409 Op.getValueType(),
410 Nodes1,
411 second,
412 DAG.getConstant(5, MVT::i32));
413 }
414 break;
415 };
416 return Nodes1;
417 }
418
419 SDValue
LowerSIGN_EXTEND_INREG(SDValue Op,SelectionDAG & DAG) const420 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
421 {
422 SDValue Data = Op.getOperand(0);
423 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
424 DebugLoc DL = Op.getDebugLoc();
425 EVT DVT = Data.getValueType();
426 EVT BVT = BaseType->getVT();
427 unsigned baseBits = BVT.getScalarType().getSizeInBits();
428 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
429 unsigned shiftBits = srcBits - baseBits;
430 if (srcBits < 32) {
431 // If the op is less than 32 bits, then it needs to extend to 32bits
432 // so it can properly keep the upper bits valid.
433 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
434 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
435 shiftBits = 32 - baseBits;
436 DVT = IVT;
437 }
438 SDValue Shift = DAG.getConstant(shiftBits, DVT);
439 // Shift left by 'Shift' bits.
440 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
441 // Signed shift Right by 'Shift' bits.
442 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
443 if (srcBits < 32) {
444 // Once the sign extension is done, the op needs to be converted to
445 // its original type.
446 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
447 }
448 return Data;
449 }
450 EVT
genIntType(uint32_t size,uint32_t numEle) const451 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
452 {
453 int iSize = (size * numEle);
454 int vEle = (iSize >> ((size == 64) ? 6 : 5));
455 if (!vEle) {
456 vEle = 1;
457 }
458 if (size == 64) {
459 if (vEle == 1) {
460 return EVT(MVT::i64);
461 } else {
462 return EVT(MVT::getVectorVT(MVT::i64, vEle));
463 }
464 } else {
465 if (vEle == 1) {
466 return EVT(MVT::i32);
467 } else {
468 return EVT(MVT::getVectorVT(MVT::i32, vEle));
469 }
470 }
471 }
472
473 SDValue
LowerBRCOND(SDValue Op,SelectionDAG & DAG) const474 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
475 {
476 SDValue Chain = Op.getOperand(0);
477 SDValue Cond = Op.getOperand(1);
478 SDValue Jump = Op.getOperand(2);
479 SDValue Result;
480 Result = DAG.getNode(
481 AMDGPUISD::BRANCH_COND,
482 Op.getDebugLoc(),
483 Op.getValueType(),
484 Chain, Jump, Cond);
485 return Result;
486 }
487
488 SDValue
LowerSDIV24(SDValue Op,SelectionDAG & DAG) const489 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
490 {
491 DebugLoc DL = Op.getDebugLoc();
492 EVT OVT = Op.getValueType();
493 SDValue LHS = Op.getOperand(0);
494 SDValue RHS = Op.getOperand(1);
495 MVT INTTY;
496 MVT FLTTY;
497 if (!OVT.isVector()) {
498 INTTY = MVT::i32;
499 FLTTY = MVT::f32;
500 } else if (OVT.getVectorNumElements() == 2) {
501 INTTY = MVT::v2i32;
502 FLTTY = MVT::v2f32;
503 } else if (OVT.getVectorNumElements() == 4) {
504 INTTY = MVT::v4i32;
505 FLTTY = MVT::v4f32;
506 }
507 unsigned bitsize = OVT.getScalarType().getSizeInBits();
508 // char|short jq = ia ^ ib;
509 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
510
511 // jq = jq >> (bitsize - 2)
512 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
513
514 // jq = jq | 0x1
515 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
516
517 // jq = (int)jq
518 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
519
520 // int ia = (int)LHS;
521 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
522
523 // int ib, (int)RHS;
524 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
525
526 // float fa = (float)ia;
527 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
528
529 // float fb = (float)ib;
530 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
531
532 // float fq = native_divide(fa, fb);
533 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
534
535 // fq = trunc(fq);
536 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
537
538 // float fqneg = -fq;
539 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
540
541 // float fr = mad(fqneg, fb, fa);
542 SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
543
544 // int iq = (int)fq;
545 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
546
547 // fr = fabs(fr);
548 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
549
550 // fb = fabs(fb);
551 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
552
553 // int cv = fr >= fb;
554 SDValue cv;
555 if (INTTY == MVT::i32) {
556 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
557 } else {
558 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
559 }
560 // jq = (cv ? jq : 0);
561 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
562 DAG.getConstant(0, OVT));
563 // dst = iq + jq;
564 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
565 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
566 return iq;
567 }
568
569 SDValue
LowerSDIV32(SDValue Op,SelectionDAG & DAG) const570 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
571 {
572 DebugLoc DL = Op.getDebugLoc();
573 EVT OVT = Op.getValueType();
574 SDValue LHS = Op.getOperand(0);
575 SDValue RHS = Op.getOperand(1);
576 // The LowerSDIV32 function generates equivalent to the following IL.
577 // mov r0, LHS
578 // mov r1, RHS
579 // ilt r10, r0, 0
580 // ilt r11, r1, 0
581 // iadd r0, r0, r10
582 // iadd r1, r1, r11
583 // ixor r0, r0, r10
584 // ixor r1, r1, r11
585 // udiv r0, r0, r1
586 // ixor r10, r10, r11
587 // iadd r0, r0, r10
588 // ixor DST, r0, r10
589
590 // mov r0, LHS
591 SDValue r0 = LHS;
592
593 // mov r1, RHS
594 SDValue r1 = RHS;
595
596 // ilt r10, r0, 0
597 SDValue r10 = DAG.getSelectCC(DL,
598 r0, DAG.getConstant(0, OVT),
599 DAG.getConstant(-1, MVT::i32),
600 DAG.getConstant(0, MVT::i32),
601 ISD::SETLT);
602
603 // ilt r11, r1, 0
604 SDValue r11 = DAG.getSelectCC(DL,
605 r1, DAG.getConstant(0, OVT),
606 DAG.getConstant(-1, MVT::i32),
607 DAG.getConstant(0, MVT::i32),
608 ISD::SETLT);
609
610 // iadd r0, r0, r10
611 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
612
613 // iadd r1, r1, r11
614 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
615
616 // ixor r0, r0, r10
617 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
618
619 // ixor r1, r1, r11
620 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
621
622 // udiv r0, r0, r1
623 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
624
625 // ixor r10, r10, r11
626 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
627
628 // iadd r0, r0, r10
629 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
630
631 // ixor DST, r0, r10
632 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
633 return DST;
634 }
635
636 SDValue
LowerSDIV64(SDValue Op,SelectionDAG & DAG) const637 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
638 {
639 return SDValue(Op.getNode(), 0);
640 }
641
642 SDValue
LowerSREM8(SDValue Op,SelectionDAG & DAG) const643 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
644 {
645 DebugLoc DL = Op.getDebugLoc();
646 EVT OVT = Op.getValueType();
647 MVT INTTY = MVT::i32;
648 if (OVT == MVT::v2i8) {
649 INTTY = MVT::v2i32;
650 } else if (OVT == MVT::v4i8) {
651 INTTY = MVT::v4i32;
652 }
653 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
654 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
655 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
656 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
657 return LHS;
658 }
659
660 SDValue
LowerSREM16(SDValue Op,SelectionDAG & DAG) const661 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
662 {
663 DebugLoc DL = Op.getDebugLoc();
664 EVT OVT = Op.getValueType();
665 MVT INTTY = MVT::i32;
666 if (OVT == MVT::v2i16) {
667 INTTY = MVT::v2i32;
668 } else if (OVT == MVT::v4i16) {
669 INTTY = MVT::v4i32;
670 }
671 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
672 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
673 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
674 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
675 return LHS;
676 }
677
678 SDValue
LowerSREM32(SDValue Op,SelectionDAG & DAG) const679 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
680 {
681 DebugLoc DL = Op.getDebugLoc();
682 EVT OVT = Op.getValueType();
683 SDValue LHS = Op.getOperand(0);
684 SDValue RHS = Op.getOperand(1);
685 // The LowerSREM32 function generates equivalent to the following IL.
686 // mov r0, LHS
687 // mov r1, RHS
688 // ilt r10, r0, 0
689 // ilt r11, r1, 0
690 // iadd r0, r0, r10
691 // iadd r1, r1, r11
692 // ixor r0, r0, r10
693 // ixor r1, r1, r11
694 // udiv r20, r0, r1
695 // umul r20, r20, r1
696 // sub r0, r0, r20
697 // iadd r0, r0, r10
698 // ixor DST, r0, r10
699
700 // mov r0, LHS
701 SDValue r0 = LHS;
702
703 // mov r1, RHS
704 SDValue r1 = RHS;
705
706 // ilt r10, r0, 0
707 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
708
709 // ilt r11, r1, 0
710 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
711
712 // iadd r0, r0, r10
713 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
714
715 // iadd r1, r1, r11
716 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
717
718 // ixor r0, r0, r10
719 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
720
721 // ixor r1, r1, r11
722 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
723
724 // udiv r20, r0, r1
725 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
726
727 // umul r20, r20, r1
728 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
729
730 // sub r0, r0, r20
731 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
732
733 // iadd r0, r0, r10
734 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
735
736 // ixor DST, r0, r10
737 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
738 return DST;
739 }
740
741 SDValue
LowerSREM64(SDValue Op,SelectionDAG & DAG) const742 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
743 {
744 return SDValue(Op.getNode(), 0);
745 }
746