1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This is the parent TargetLowering class for hardware code gen targets.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPUISelLowering.h"
15 #include "AMDILIntrinsicInfo.h"
16 #include "llvm/CodeGen/MachineFunction.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
20
21 using namespace llvm;
22
AMDGPUTargetLowering(TargetMachine & TM)23 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
24 TargetLowering(TM, new TargetLoweringObjectFileELF())
25 {
26
27 // Initialize target lowering borrowed from AMDIL
28 InitAMDILLowering();
29
30 // We need to custom lower some of the intrinsics
31 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
32
33 // Library functions. These default to Expand, but we have instructions
34 // for them.
35 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
36 setOperationAction(ISD::FEXP2, MVT::f32, Legal);
37 setOperationAction(ISD::FRINT, MVT::f32, Legal);
38
39 setOperationAction(ISD::UDIV, MVT::i32, Expand);
40 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
41 setOperationAction(ISD::UREM, MVT::i32, Expand);
42 }
43
44 //===---------------------------------------------------------------------===//
45 // TargetLowering Callbacks
46 //===---------------------------------------------------------------------===//
47
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,DebugLoc DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const48 SDValue AMDGPUTargetLowering::LowerFormalArguments(
49 SDValue Chain,
50 CallingConv::ID CallConv,
51 bool isVarArg,
52 const SmallVectorImpl<ISD::InputArg> &Ins,
53 DebugLoc DL, SelectionDAG &DAG,
54 SmallVectorImpl<SDValue> &InVals) const
55 {
56 // Lowering of arguments happens in R600LowerKernelParameters, so we can
57 // ignore the arguments here.
58 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
59 InVals.push_back(SDValue());
60 }
61 return Chain;
62 }
63
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,DebugLoc DL,SelectionDAG & DAG) const64 SDValue AMDGPUTargetLowering::LowerReturn(
65 SDValue Chain,
66 CallingConv::ID CallConv,
67 bool isVarArg,
68 const SmallVectorImpl<ISD::OutputArg> &Outs,
69 const SmallVectorImpl<SDValue> &OutVals,
70 DebugLoc DL, SelectionDAG &DAG) const
71 {
72 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
73 }
74
75 //===---------------------------------------------------------------------===//
76 // Target specific lowering
77 //===---------------------------------------------------------------------===//
78
LowerOperation(SDValue Op,SelectionDAG & DAG) const79 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
80 const
81 {
82 switch (Op.getOpcode()) {
83 default:
84 Op.getNode()->dump();
85 assert(0 && "Custom lowering code for this"
86 "instruction is not implemented yet!");
87 break;
88 // AMDIL DAG lowering
89 case ISD::SDIV: return LowerSDIV(Op, DAG);
90 case ISD::SREM: return LowerSREM(Op, DAG);
91 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
92 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
93 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
94 // AMDGPU DAG lowering
95 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
96 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
97 }
98 return Op;
99 }
100
LowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const101 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
102 SelectionDAG &DAG) const
103 {
104 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
105 DebugLoc DL = Op.getDebugLoc();
106 EVT VT = Op.getValueType();
107
108 switch (IntrinsicID) {
109 default: return Op;
110 case AMDGPUIntrinsic::AMDIL_abs:
111 return LowerIntrinsicIABS(Op, DAG);
112 case AMDGPUIntrinsic::AMDIL_exp:
113 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
114 case AMDGPUIntrinsic::AMDIL_fabs:
115 return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
116 case AMDGPUIntrinsic::AMDGPU_lrp:
117 return LowerIntrinsicLRP(Op, DAG);
118 case AMDGPUIntrinsic::AMDIL_fraction:
119 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
120 case AMDGPUIntrinsic::AMDIL_mad:
121 return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
122 Op.getOperand(2), Op.getOperand(3));
123 case AMDGPUIntrinsic::AMDIL_max:
124 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
125 Op.getOperand(2));
126 case AMDGPUIntrinsic::AMDGPU_imax:
127 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
128 Op.getOperand(2));
129 case AMDGPUIntrinsic::AMDGPU_umax:
130 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
131 Op.getOperand(2));
132 case AMDGPUIntrinsic::AMDIL_min:
133 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
134 Op.getOperand(2));
135 case AMDGPUIntrinsic::AMDGPU_imin:
136 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
137 Op.getOperand(2));
138 case AMDGPUIntrinsic::AMDGPU_umin:
139 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
140 Op.getOperand(2));
141 case AMDGPUIntrinsic::AMDIL_round_nearest:
142 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
143 case AMDGPUIntrinsic::AMDIL_round_posinf:
144 return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
145 }
146 }
147
148 ///IABS(a) = SMAX(sub(0, a), a)
LowerIntrinsicIABS(SDValue Op,SelectionDAG & DAG) const149 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
150 SelectionDAG &DAG) const
151 {
152
153 DebugLoc DL = Op.getDebugLoc();
154 EVT VT = Op.getValueType();
155 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
156 Op.getOperand(1));
157
158 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
159 }
160
161 /// Linear Interpolation
162 /// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
LowerIntrinsicLRP(SDValue Op,SelectionDAG & DAG) const163 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
164 SelectionDAG &DAG) const
165 {
166 DebugLoc DL = Op.getDebugLoc();
167 EVT VT = Op.getValueType();
168 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
169 DAG.getConstantFP(1.0f, MVT::f32),
170 Op.getOperand(1));
171 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
172 Op.getOperand(3));
173 return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
174 Op.getOperand(2),
175 OneSubAC);
176 }
177
178
179
LowerUDIVREM(SDValue Op,SelectionDAG & DAG) const180 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
181 SelectionDAG &DAG) const
182 {
183 DebugLoc DL = Op.getDebugLoc();
184 EVT VT = Op.getValueType();
185
186 SDValue Num = Op.getOperand(0);
187 SDValue Den = Op.getOperand(1);
188
189 SmallVector<SDValue, 8> Results;
190
191 // RCP = URECIP(Den) = 2^32 / Den + e
192 // e is rounding error.
193 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
194
195 // RCP_LO = umulo(RCP, Den) */
196 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
197
198 // RCP_HI = mulhu (RCP, Den) */
199 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
200
201 // NEG_RCP_LO = -RCP_LO
202 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
203 RCP_LO);
204
205 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
206 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
207 NEG_RCP_LO, RCP_LO,
208 ISD::SETEQ);
209 // Calculate the rounding error from the URECIP instruction
210 // E = mulhu(ABS_RCP_LO, RCP)
211 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
212
213 // RCP_A_E = RCP + E
214 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
215
216 // RCP_S_E = RCP - E
217 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
218
219 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
220 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
221 RCP_A_E, RCP_S_E,
222 ISD::SETEQ);
223 // Quotient = mulhu(Tmp0, Num)
224 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
225
226 // Num_S_Remainder = Quotient * Den
227 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
228
229 // Remainder = Num - Num_S_Remainder
230 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
231
232 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
233 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
234 DAG.getConstant(-1, VT),
235 DAG.getConstant(0, VT),
236 ISD::SETGE);
237 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
238 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
239 DAG.getConstant(0, VT),
240 DAG.getConstant(-1, VT),
241 DAG.getConstant(0, VT),
242 ISD::SETGE);
243 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
244 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
245 Remainder_GE_Zero);
246
247 // Calculate Division result:
248
249 // Quotient_A_One = Quotient + 1
250 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
251 DAG.getConstant(1, VT));
252
253 // Quotient_S_One = Quotient - 1
254 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
255 DAG.getConstant(1, VT));
256
257 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
258 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
259 Quotient, Quotient_A_One, ISD::SETEQ);
260
261 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
262 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
263 Quotient_S_One, Div, ISD::SETEQ);
264
265 // Calculate Rem result:
266
267 // Remainder_S_Den = Remainder - Den
268 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
269
270 // Remainder_A_Den = Remainder + Den
271 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
272
273 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
274 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
275 Remainder, Remainder_S_Den, ISD::SETEQ);
276
277 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
278 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
279 Remainder_A_Den, Rem, ISD::SETEQ);
280
281 DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
282 DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
283
284 return Op;
285 }
286
287 //===----------------------------------------------------------------------===//
288 // Helper functions
289 //===----------------------------------------------------------------------===//
290
isHWTrueValue(SDValue Op) const291 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
292 {
293 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
294 return CFP->isExactlyValue(1.0);
295 }
296 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
297 return C->isAllOnesValue();
298 }
299 return false;
300 }
301
isHWFalseValue(SDValue Op) const302 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
303 {
304 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
305 return CFP->getValueAPF().isZero();
306 }
307 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
308 return C->isNullValue();
309 }
310 return false;
311 }
312
CreateLiveInRegister(SelectionDAG & DAG,const TargetRegisterClass * RC,unsigned Reg,EVT VT) const313 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
314 const TargetRegisterClass *RC,
315 unsigned Reg, EVT VT) const {
316 MachineFunction &MF = DAG.getMachineFunction();
317 MachineRegisterInfo &MRI = MF.getRegInfo();
318 unsigned VirtualRegister;
319 if (!MRI.isLiveIn(Reg)) {
320 VirtualRegister = MRI.createVirtualRegister(RC);
321 MRI.addLiveIn(Reg, VirtualRegister);
322 } else {
323 VirtualRegister = MRI.getLiveInVirtReg(Reg);
324 }
325 return DAG.getRegister(VirtualRegister, VT);
326 }
327
328 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
329
getTargetNodeName(unsigned Opcode) const330 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
331 {
332 switch (Opcode) {
333 default: return 0;
334 // AMDIL DAG nodes
335 NODE_NAME_CASE(MAD);
336 NODE_NAME_CASE(CALL);
337 NODE_NAME_CASE(UMUL);
338 NODE_NAME_CASE(DIV_INF);
339 NODE_NAME_CASE(VBUILD);
340 NODE_NAME_CASE(RET_FLAG);
341 NODE_NAME_CASE(BRANCH_COND);
342
343 // AMDGPU DAG nodes
344 NODE_NAME_CASE(FRACT)
345 NODE_NAME_CASE(FMAX)
346 NODE_NAME_CASE(SMAX)
347 NODE_NAME_CASE(UMAX)
348 NODE_NAME_CASE(FMIN)
349 NODE_NAME_CASE(SMIN)
350 NODE_NAME_CASE(UMIN)
351 NODE_NAME_CASE(URECIP)
352 }
353 }
354