1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This is the parent TargetLowering class for hardware code gen targets.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUISelLowering.h"
15 #include "AMDILIntrinsicInfo.h"
16 #include "llvm/CodeGen/MachineFunction.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
20 
21 using namespace llvm;
22 
AMDGPUTargetLowering(TargetMachine & TM)23 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
24   TargetLowering(TM, new TargetLoweringObjectFileELF())
25 {
26 
27   // Initialize target lowering borrowed from AMDIL
28   InitAMDILLowering();
29 
30   // We need to custom lower some of the intrinsics
31   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
32 
33   // Library functions.  These default to Expand, but we have instructions
34   // for them.
35   setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
36   setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
37   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
38 
39   setOperationAction(ISD::UDIV, MVT::i32, Expand);
40   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
41   setOperationAction(ISD::UREM, MVT::i32, Expand);
42 }
43 
44 //===---------------------------------------------------------------------===//
45 // TargetLowering Callbacks
46 //===---------------------------------------------------------------------===//
47 
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,DebugLoc DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const48 SDValue AMDGPUTargetLowering::LowerFormalArguments(
49                                       SDValue Chain,
50                                       CallingConv::ID CallConv,
51                                       bool isVarArg,
52                                       const SmallVectorImpl<ISD::InputArg> &Ins,
53                                       DebugLoc DL, SelectionDAG &DAG,
54                                       SmallVectorImpl<SDValue> &InVals) const
55 {
56   // Lowering of arguments happens in R600LowerKernelParameters, so we can
57   // ignore the arguments here.
58   for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
59     InVals.push_back(SDValue());
60   }
61   return Chain;
62 }
63 
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,DebugLoc DL,SelectionDAG & DAG) const64 SDValue AMDGPUTargetLowering::LowerReturn(
65                                      SDValue Chain,
66                                      CallingConv::ID CallConv,
67                                      bool isVarArg,
68                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
69                                      const SmallVectorImpl<SDValue> &OutVals,
70                                      DebugLoc DL, SelectionDAG &DAG) const
71 {
72   return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
73 }
74 
75 //===---------------------------------------------------------------------===//
76 // Target specific lowering
77 //===---------------------------------------------------------------------===//
78 
LowerOperation(SDValue Op,SelectionDAG & DAG) const79 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
80     const
81 {
82   switch (Op.getOpcode()) {
83   default:
84     Op.getNode()->dump();
85     assert(0 && "Custom lowering code for this"
86         "instruction is not implemented yet!");
87     break;
88   // AMDIL DAG lowering
89   case ISD::SDIV: return LowerSDIV(Op, DAG);
90   case ISD::SREM: return LowerSREM(Op, DAG);
91   case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
92   case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
93   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
94   // AMDGPU DAG lowering
95   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
96   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
97   }
98   return Op;
99 }
100 
LowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const101 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
102     SelectionDAG &DAG) const
103 {
104   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
105   DebugLoc DL = Op.getDebugLoc();
106   EVT VT = Op.getValueType();
107 
108   switch (IntrinsicID) {
109     default: return Op;
110     case AMDGPUIntrinsic::AMDIL_abs:
111       return LowerIntrinsicIABS(Op, DAG);
112     case AMDGPUIntrinsic::AMDIL_exp:
113       return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
114     case AMDGPUIntrinsic::AMDIL_fabs:
115       return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
116     case AMDGPUIntrinsic::AMDGPU_lrp:
117       return LowerIntrinsicLRP(Op, DAG);
118     case AMDGPUIntrinsic::AMDIL_fraction:
119       return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
120     case AMDGPUIntrinsic::AMDIL_mad:
121       return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
122                               Op.getOperand(2), Op.getOperand(3));
123     case AMDGPUIntrinsic::AMDIL_max:
124       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
125                                                   Op.getOperand(2));
126     case AMDGPUIntrinsic::AMDGPU_imax:
127       return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
128                                                   Op.getOperand(2));
129     case AMDGPUIntrinsic::AMDGPU_umax:
130       return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
131                                                   Op.getOperand(2));
132     case AMDGPUIntrinsic::AMDIL_min:
133       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
134                                                   Op.getOperand(2));
135     case AMDGPUIntrinsic::AMDGPU_imin:
136       return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
137                                                   Op.getOperand(2));
138     case AMDGPUIntrinsic::AMDGPU_umin:
139       return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
140                                                   Op.getOperand(2));
141     case AMDGPUIntrinsic::AMDIL_round_nearest:
142       return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
143     case AMDGPUIntrinsic::AMDIL_round_posinf:
144       return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
145   }
146 }
147 
148 ///IABS(a) = SMAX(sub(0, a), a)
LowerIntrinsicIABS(SDValue Op,SelectionDAG & DAG) const149 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
150     SelectionDAG &DAG) const
151 {
152 
153   DebugLoc DL = Op.getDebugLoc();
154   EVT VT = Op.getValueType();
155   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
156                                               Op.getOperand(1));
157 
158   return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
159 }
160 
161 /// Linear Interpolation
162 /// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
LowerIntrinsicLRP(SDValue Op,SelectionDAG & DAG) const163 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
164     SelectionDAG &DAG) const
165 {
166   DebugLoc DL = Op.getDebugLoc();
167   EVT VT = Op.getValueType();
168   SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
169                                 DAG.getConstantFP(1.0f, MVT::f32),
170                                 Op.getOperand(1));
171   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
172                                                     Op.getOperand(3));
173   return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
174                                                Op.getOperand(2),
175                                                OneSubAC);
176 }
177 
178 
179 
LowerUDIVREM(SDValue Op,SelectionDAG & DAG) const180 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
181     SelectionDAG &DAG) const
182 {
183   DebugLoc DL = Op.getDebugLoc();
184   EVT VT = Op.getValueType();
185 
186   SDValue Num = Op.getOperand(0);
187   SDValue Den = Op.getOperand(1);
188 
189   SmallVector<SDValue, 8> Results;
190 
191   // RCP =  URECIP(Den) = 2^32 / Den + e
192   // e is rounding error.
193   SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
194 
195   // RCP_LO = umulo(RCP, Den) */
196   SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
197 
198   // RCP_HI = mulhu (RCP, Den) */
199   SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
200 
201   // NEG_RCP_LO = -RCP_LO
202   SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
203                                                      RCP_LO);
204 
205   // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
206   SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
207                                            NEG_RCP_LO, RCP_LO,
208                                            ISD::SETEQ);
209   // Calculate the rounding error from the URECIP instruction
210   // E = mulhu(ABS_RCP_LO, RCP)
211   SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
212 
213   // RCP_A_E = RCP + E
214   SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
215 
216   // RCP_S_E = RCP - E
217   SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
218 
219   // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
220   SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
221                                      RCP_A_E, RCP_S_E,
222                                      ISD::SETEQ);
223   // Quotient = mulhu(Tmp0, Num)
224   SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
225 
226   // Num_S_Remainder = Quotient * Den
227   SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
228 
229   // Remainder = Num - Num_S_Remainder
230   SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
231 
232   // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
233   SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
234                                                  DAG.getConstant(-1, VT),
235                                                  DAG.getConstant(0, VT),
236                                                  ISD::SETGE);
237   // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
238   SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
239                                                   DAG.getConstant(0, VT),
240                                                   DAG.getConstant(-1, VT),
241                                                   DAG.getConstant(0, VT),
242                                                   ISD::SETGE);
243   // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
244   SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
245                                                Remainder_GE_Zero);
246 
247   // Calculate Division result:
248 
249   // Quotient_A_One = Quotient + 1
250   SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
251                                                          DAG.getConstant(1, VT));
252 
253   // Quotient_S_One = Quotient - 1
254   SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
255                                                          DAG.getConstant(1, VT));
256 
257   // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
258   SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
259                                      Quotient, Quotient_A_One, ISD::SETEQ);
260 
261   // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
262   Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
263                             Quotient_S_One, Div, ISD::SETEQ);
264 
265   // Calculate Rem result:
266 
267   // Remainder_S_Den = Remainder - Den
268   SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
269 
270   // Remainder_A_Den = Remainder + Den
271   SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
272 
273   // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
274   SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
275                                     Remainder, Remainder_S_Den, ISD::SETEQ);
276 
277   // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
278   Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
279                             Remainder_A_Den, Rem, ISD::SETEQ);
280 
281   DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
282   DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
283 
284   return Op;
285 }
286 
287 //===----------------------------------------------------------------------===//
288 // Helper functions
289 //===----------------------------------------------------------------------===//
290 
isHWTrueValue(SDValue Op) const291 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
292 {
293   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
294     return CFP->isExactlyValue(1.0);
295   }
296   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
297     return C->isAllOnesValue();
298   }
299   return false;
300 }
301 
isHWFalseValue(SDValue Op) const302 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
303 {
304   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
305     return CFP->getValueAPF().isZero();
306   }
307   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
308     return C->isNullValue();
309   }
310   return false;
311 }
312 
CreateLiveInRegister(SelectionDAG & DAG,const TargetRegisterClass * RC,unsigned Reg,EVT VT) const313 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
314                                                   const TargetRegisterClass *RC,
315                                                    unsigned Reg, EVT VT) const {
316   MachineFunction &MF = DAG.getMachineFunction();
317   MachineRegisterInfo &MRI = MF.getRegInfo();
318   unsigned VirtualRegister;
319   if (!MRI.isLiveIn(Reg)) {
320     VirtualRegister = MRI.createVirtualRegister(RC);
321     MRI.addLiveIn(Reg, VirtualRegister);
322   } else {
323     VirtualRegister = MRI.getLiveInVirtReg(Reg);
324   }
325   return DAG.getRegister(VirtualRegister, VT);
326 }
327 
328 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
329 
getTargetNodeName(unsigned Opcode) const330 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
331 {
332   switch (Opcode) {
333   default: return 0;
334   // AMDIL DAG nodes
335   NODE_NAME_CASE(MAD);
336   NODE_NAME_CASE(CALL);
337   NODE_NAME_CASE(UMUL);
338   NODE_NAME_CASE(DIV_INF);
339   NODE_NAME_CASE(VBUILD);
340   NODE_NAME_CASE(RET_FLAG);
341   NODE_NAME_CASE(BRANCH_COND);
342 
343   // AMDGPU DAG nodes
344   NODE_NAME_CASE(FRACT)
345   NODE_NAME_CASE(FMAX)
346   NODE_NAME_CASE(SMAX)
347   NODE_NAME_CASE(UMAX)
348   NODE_NAME_CASE(FMIN)
349   NODE_NAME_CASE(SMIN)
350   NODE_NAME_CASE(UMIN)
351   NODE_NAME_CASE(URECIP)
352   }
353 }
354