1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements the lowering of LLVM calls to machine code calls for
12 /// GlobalISel.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "AMDGPUCallLowering.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUISelLowering.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIISelLowering.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "llvm/CodeGen/CallingConvLower.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27
28 using namespace llvm;
29
AMDGPUCallLowering(const AMDGPUTargetLowering & TLI)30 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
31 : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
32 }
33
lowerReturn(MachineIRBuilder & MIRBuilder,const Value * Val,unsigned VReg) const34 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
35 const Value *Val, unsigned VReg) const {
36 // FIXME: Add support for non-void returns.
37 if (Val)
38 return false;
39
40 MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
41 return true;
42 }
43
lowerParameterPtr(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset) const44 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
45 Type *ParamTy,
46 uint64_t Offset) const {
47
48 MachineFunction &MF = MIRBuilder.getMF();
49 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
50 MachineRegisterInfo &MRI = MF.getRegInfo();
51 const Function &F = MF.getFunction();
52 const DataLayout &DL = F.getParent()->getDataLayout();
53 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
54 LLT PtrType = getLLTForType(*PtrTy, DL);
55 unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
56 unsigned KernArgSegmentPtr =
57 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
58 unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
59
60 unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
61 MIRBuilder.buildConstant(OffsetReg, Offset);
62
63 MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
64
65 return DstReg;
66 }
67
lowerParameter(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset,unsigned Align,unsigned DstReg) const68 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
69 Type *ParamTy, uint64_t Offset,
70 unsigned Align,
71 unsigned DstReg) const {
72 MachineFunction &MF = MIRBuilder.getMF();
73 const Function &F = MF.getFunction();
74 const DataLayout &DL = F.getParent()->getDataLayout();
75 PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
76 MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
77 unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
78 unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
79
80 MachineMemOperand *MMO =
81 MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
82 MachineMemOperand::MONonTemporal |
83 MachineMemOperand::MOInvariant,
84 TypeSize, Align);
85
86 MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
87 }
88
lowerFormalArguments(MachineIRBuilder & MIRBuilder,const Function & F,ArrayRef<unsigned> VRegs) const89 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
90 const Function &F,
91 ArrayRef<unsigned> VRegs) const {
92 // AMDGPU_GS and AMDGP_HS are not supported yet.
93 if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
94 F.getCallingConv() == CallingConv::AMDGPU_HS)
95 return false;
96
97 MachineFunction &MF = MIRBuilder.getMF();
98 const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
99 MachineRegisterInfo &MRI = MF.getRegInfo();
100 SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
101 const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
102 const DataLayout &DL = F.getParent()->getDataLayout();
103
104 SmallVector<CCValAssign, 16> ArgLocs;
105 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
106
107 // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
108 if (Info->hasPrivateSegmentBuffer()) {
109 unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
110 MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
111 CCInfo.AllocateReg(PrivateSegmentBufferReg);
112 }
113
114 if (Info->hasDispatchPtr()) {
115 unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
116 // FIXME: Need to add reg as live-in
117 CCInfo.AllocateReg(DispatchPtrReg);
118 }
119
120 if (Info->hasQueuePtr()) {
121 unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
122 // FIXME: Need to add reg as live-in
123 CCInfo.AllocateReg(QueuePtrReg);
124 }
125
126 if (Info->hasKernargSegmentPtr()) {
127 unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
128 const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
129 unsigned VReg = MRI.createGenericVirtualRegister(P2);
130 MRI.addLiveIn(InputPtrReg, VReg);
131 MIRBuilder.getMBB().addLiveIn(InputPtrReg);
132 MIRBuilder.buildCopy(VReg, InputPtrReg);
133 CCInfo.AllocateReg(InputPtrReg);
134 }
135
136 if (Info->hasDispatchID()) {
137 unsigned DispatchIDReg = Info->addDispatchID(*TRI);
138 // FIXME: Need to add reg as live-in
139 CCInfo.AllocateReg(DispatchIDReg);
140 }
141
142 if (Info->hasFlatScratchInit()) {
143 unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
144 // FIXME: Need to add reg as live-in
145 CCInfo.AllocateReg(FlatScratchInitReg);
146 }
147
148 // The infrastructure for normal calling convention lowering is essentially
149 // useless for kernels. We want to avoid any kind of legalization or argument
150 // splitting.
151 if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
152 unsigned i = 0;
153 const unsigned KernArgBaseAlign = 16;
154 const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
155 uint64_t ExplicitArgOffset = 0;
156
157 // TODO: Align down to dword alignment and extract bits for extending loads.
158 for (auto &Arg : F.args()) {
159 Type *ArgTy = Arg.getType();
160 unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
161 if (AllocSize == 0)
162 continue;
163
164 unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
165
166 uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
167 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
168
169 unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
170 ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
171 lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
172 ++i;
173 }
174
175 return true;
176 }
177
178 unsigned NumArgs = F.arg_size();
179 Function::const_arg_iterator CurOrigArg = F.arg_begin();
180 const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
181 unsigned PSInputNum = 0;
182 BitVector Skipped(NumArgs);
183 for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
184 EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
185
186 // We can only hanlde simple value types at the moment.
187 ISD::ArgFlagsTy Flags;
188 ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
189 setArgFlags(OrigArg, i + 1, DL, F);
190 Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
191
192 if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
193 !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
194 PSInputNum <= 15) {
195 if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
196 Skipped.set(i);
197 ++PSInputNum;
198 continue;
199 }
200
201 Info->markPSInputAllocated(PSInputNum);
202 if (!CurOrigArg->use_empty())
203 Info->markPSInputEnabled(PSInputNum);
204
205 ++PSInputNum;
206 }
207
208 CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
209 /*IsVarArg=*/false);
210
211 if (ValEVT.isVector()) {
212 EVT ElemVT = ValEVT.getVectorElementType();
213 if (!ValEVT.isSimple())
214 return false;
215 MVT ValVT = ElemVT.getSimpleVT();
216 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
217 OrigArg.Flags, CCInfo);
218 if (!Res)
219 return false;
220 } else {
221 MVT ValVT = ValEVT.getSimpleVT();
222 if (!ValEVT.isSimple())
223 return false;
224 bool Res =
225 AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
226
227 // Fail if we don't know how to handle this type.
228 if (Res)
229 return false;
230 }
231 }
232
233 Function::const_arg_iterator Arg = F.arg_begin();
234
235 if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
236 F.getCallingConv() == CallingConv::AMDGPU_PS) {
237 for (unsigned i = 0, OrigArgIdx = 0;
238 OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
239 if (Skipped.test(OrigArgIdx))
240 continue;
241 CCValAssign &VA = ArgLocs[i++];
242 MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
243 MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
244 MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
245 }
246 return true;
247 }
248
249 return false;
250 }
251