1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements the lowering of LLVM calls to machine code calls for
12 /// GlobalISel.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUCallLowering.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUISelLowering.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIISelLowering.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "llvm/CodeGen/CallingConvLower.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 
28 using namespace llvm;
29 
AMDGPUCallLowering(const AMDGPUTargetLowering & TLI)30 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
31   : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
32 }
33 
lowerReturn(MachineIRBuilder & MIRBuilder,const Value * Val,unsigned VReg) const34 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
35                                      const Value *Val, unsigned VReg) const {
36   // FIXME: Add support for non-void returns.
37   if (Val)
38     return false;
39 
40   MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
41   return true;
42 }
43 
lowerParameterPtr(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset) const44 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
45                                                Type *ParamTy,
46                                                uint64_t Offset) const {
47 
48   MachineFunction &MF = MIRBuilder.getMF();
49   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
50   MachineRegisterInfo &MRI = MF.getRegInfo();
51   const Function &F = MF.getFunction();
52   const DataLayout &DL = F.getParent()->getDataLayout();
53   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
54   LLT PtrType = getLLTForType(*PtrTy, DL);
55   unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
56   unsigned KernArgSegmentPtr =
57     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
58   unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
59 
60   unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
61   MIRBuilder.buildConstant(OffsetReg, Offset);
62 
63   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
64 
65   return DstReg;
66 }
67 
lowerParameter(MachineIRBuilder & MIRBuilder,Type * ParamTy,uint64_t Offset,unsigned Align,unsigned DstReg) const68 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
69                                         Type *ParamTy, uint64_t Offset,
70                                         unsigned Align,
71                                         unsigned DstReg) const {
72   MachineFunction &MF = MIRBuilder.getMF();
73   const Function &F = MF.getFunction();
74   const DataLayout &DL = F.getParent()->getDataLayout();
75   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
76   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
77   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
78   unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
79 
80   MachineMemOperand *MMO =
81       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
82                                        MachineMemOperand::MONonTemporal |
83                                        MachineMemOperand::MOInvariant,
84                                        TypeSize, Align);
85 
86   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
87 }
88 
lowerFormalArguments(MachineIRBuilder & MIRBuilder,const Function & F,ArrayRef<unsigned> VRegs) const89 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
90                                               const Function &F,
91                                               ArrayRef<unsigned> VRegs) const {
92   // AMDGPU_GS and AMDGP_HS are not supported yet.
93   if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
94       F.getCallingConv() == CallingConv::AMDGPU_HS)
95     return false;
96 
97   MachineFunction &MF = MIRBuilder.getMF();
98   const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
99   MachineRegisterInfo &MRI = MF.getRegInfo();
100   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
101   const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
102   const DataLayout &DL = F.getParent()->getDataLayout();
103 
104   SmallVector<CCValAssign, 16> ArgLocs;
105   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
106 
107   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
108   if (Info->hasPrivateSegmentBuffer()) {
109     unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
110     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
111     CCInfo.AllocateReg(PrivateSegmentBufferReg);
112   }
113 
114   if (Info->hasDispatchPtr()) {
115     unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
116     // FIXME: Need to add reg as live-in
117     CCInfo.AllocateReg(DispatchPtrReg);
118   }
119 
120   if (Info->hasQueuePtr()) {
121     unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
122     // FIXME: Need to add reg as live-in
123     CCInfo.AllocateReg(QueuePtrReg);
124   }
125 
126   if (Info->hasKernargSegmentPtr()) {
127     unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
128     const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
129     unsigned VReg = MRI.createGenericVirtualRegister(P2);
130     MRI.addLiveIn(InputPtrReg, VReg);
131     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
132     MIRBuilder.buildCopy(VReg, InputPtrReg);
133     CCInfo.AllocateReg(InputPtrReg);
134   }
135 
136   if (Info->hasDispatchID()) {
137     unsigned DispatchIDReg = Info->addDispatchID(*TRI);
138     // FIXME: Need to add reg as live-in
139     CCInfo.AllocateReg(DispatchIDReg);
140   }
141 
142   if (Info->hasFlatScratchInit()) {
143     unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
144     // FIXME: Need to add reg as live-in
145     CCInfo.AllocateReg(FlatScratchInitReg);
146   }
147 
148   // The infrastructure for normal calling convention lowering is essentially
149   // useless for kernels. We want to avoid any kind of legalization or argument
150   // splitting.
151   if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
152     unsigned i = 0;
153     const unsigned KernArgBaseAlign = 16;
154     const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
155     uint64_t ExplicitArgOffset = 0;
156 
157     // TODO: Align down to dword alignment and extract bits for extending loads.
158     for (auto &Arg : F.args()) {
159       Type *ArgTy = Arg.getType();
160       unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
161       if (AllocSize == 0)
162         continue;
163 
164       unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
165 
166       uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
167       ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
168 
169       unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
170       ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
171       lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
172       ++i;
173     }
174 
175     return true;
176   }
177 
178   unsigned NumArgs = F.arg_size();
179   Function::const_arg_iterator CurOrigArg = F.arg_begin();
180   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
181   unsigned PSInputNum = 0;
182   BitVector Skipped(NumArgs);
183   for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
184     EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
185 
186     // We can only hanlde simple value types at the moment.
187     ISD::ArgFlagsTy Flags;
188     ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
189     setArgFlags(OrigArg, i + 1, DL, F);
190     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
191 
192     if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
193         !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
194         PSInputNum <= 15) {
195       if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
196         Skipped.set(i);
197         ++PSInputNum;
198         continue;
199       }
200 
201       Info->markPSInputAllocated(PSInputNum);
202       if (!CurOrigArg->use_empty())
203         Info->markPSInputEnabled(PSInputNum);
204 
205       ++PSInputNum;
206     }
207 
208     CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
209                                              /*IsVarArg=*/false);
210 
211     if (ValEVT.isVector()) {
212       EVT ElemVT = ValEVT.getVectorElementType();
213       if (!ValEVT.isSimple())
214         return false;
215       MVT ValVT = ElemVT.getSimpleVT();
216       bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
217                           OrigArg.Flags, CCInfo);
218       if (!Res)
219         return false;
220     } else {
221       MVT ValVT = ValEVT.getSimpleVT();
222       if (!ValEVT.isSimple())
223         return false;
224       bool Res =
225           AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
226 
227       // Fail if we don't know how to handle this type.
228       if (Res)
229         return false;
230     }
231   }
232 
233   Function::const_arg_iterator Arg = F.arg_begin();
234 
235   if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
236       F.getCallingConv() == CallingConv::AMDGPU_PS) {
237     for (unsigned i = 0, OrigArgIdx = 0;
238          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
239        if (Skipped.test(OrigArgIdx))
240           continue;
241       CCValAssign &VA = ArgLocs[i++];
242       MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
243       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
244       MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
245     }
246     return true;
247   }
248 
249   return false;
250 }
251