1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIRegisterInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "SIInstrInfo.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/RegisterScavenging.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/LLVMContext.h"
26 
27 using namespace llvm;
28 
hasPressureSet(const int * PSets,unsigned PSetID)29 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
30   for (unsigned i = 0; PSets[i] != -1; ++i) {
31     if (PSets[i] == (int)PSetID)
32       return true;
33   }
34   return false;
35 }
36 
classifyPressureSet(unsigned PSetID,unsigned Reg,BitVector & PressureSets) const37 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
38                                          BitVector &PressureSets) const {
39   for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
40     const int *PSets = getRegUnitPressureSets(*U);
41     if (hasPressureSet(PSets, PSetID)) {
42       PressureSets.set(PSetID);
43       break;
44     }
45   }
46 }
47 
48 static cl::opt<bool> EnableSpillSGPRToSMEM(
49   "amdgpu-spill-sgpr-to-smem",
50   cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
51   cl::init(false));
52 
53 static cl::opt<bool> EnableSpillSGPRToVGPR(
54   "amdgpu-spill-sgpr-to-vgpr",
55   cl::desc("Enable spilling VGPRs to SGPRs"),
56   cl::ReallyHidden,
57   cl::init(true));
58 
SIRegisterInfo(const GCNSubtarget & ST)59 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
60   AMDGPURegisterInfo(),
61   SGPRPressureSets(getNumRegPressureSets()),
62   VGPRPressureSets(getNumRegPressureSets()),
63   SpillSGPRToVGPR(false),
64   SpillSGPRToSMEM(false) {
65   if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
66     SpillSGPRToSMEM = true;
67   else if (EnableSpillSGPRToVGPR)
68     SpillSGPRToVGPR = true;
69 
70   unsigned NumRegPressureSets = getNumRegPressureSets();
71 
72   SGPRSetID = NumRegPressureSets;
73   VGPRSetID = NumRegPressureSets;
74 
75   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
76     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
77     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
78   }
79 
80   // Determine the number of reg units for each pressure set.
81   std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
82   for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
83     const int *PSets = getRegUnitPressureSets(i);
84     for (unsigned j = 0; PSets[j] != -1; ++j) {
85       ++PressureSetRegUnits[PSets[j]];
86     }
87   }
88 
89   unsigned VGPRMax = 0, SGPRMax = 0;
90   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
91     if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
92       VGPRSetID = i;
93       VGPRMax = PressureSetRegUnits[i];
94       continue;
95     }
96     if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
97       SGPRSetID = i;
98       SGPRMax = PressureSetRegUnits[i];
99     }
100   }
101 
102   assert(SGPRSetID < NumRegPressureSets &&
103          VGPRSetID < NumRegPressureSets);
104 }
105 
reservedPrivateSegmentBufferReg(const MachineFunction & MF) const106 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
107   const MachineFunction &MF) const {
108 
109   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110   unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
111   unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
112   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
113 }
114 
findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)115 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
116   unsigned Reg;
117 
118   // Try to place it in a hole after PrivateSegmentBufferReg.
119   if (RegCount & 3) {
120     // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
121     // alignment constraints, so we have a hole where can put the wave offset.
122     Reg = RegCount - 1;
123   } else {
124     // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
125     // wave offset before it.
126     Reg = RegCount - 5;
127   }
128 
129   return Reg;
130 }
131 
reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction & MF) const132 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
133   const MachineFunction &MF) const {
134   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
135   unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
136   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
137 }
138 
reservedStackPtrOffsetReg(const MachineFunction & MF) const139 unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
140   const MachineFunction &MF) const {
141   return AMDGPU::SGPR32;
142 }
143 
getReservedRegs(const MachineFunction & MF) const144 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
145   BitVector Reserved(getNumRegs());
146 
147   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
148   // this seems likely to result in bugs, so I'm marking them as reserved.
149   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
150   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
151 
152   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
153   reserveRegisterTuples(Reserved, AMDGPU::M0);
154 
155   // Reserve the memory aperture registers.
156   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
157   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
158   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
159   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
160 
161   // Reserve xnack_mask registers - support is not implemented in Codegen.
162   reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
163 
164   // Reserve Trap Handler registers - support is not implemented in Codegen.
165   reserveRegisterTuples(Reserved, AMDGPU::TBA);
166   reserveRegisterTuples(Reserved, AMDGPU::TMA);
167   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
168   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
169   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
170   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
171   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
172   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
173   reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
174   reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
175 
176   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
177 
178   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
179   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
180   for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
181     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
182     reserveRegisterTuples(Reserved, Reg);
183   }
184 
185   unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
186   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
187   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
188     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
189     reserveRegisterTuples(Reserved, Reg);
190   }
191 
192   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
193 
194   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
195   if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
196     // Reserve 1 SGPR for scratch wave offset in case we need to spill.
197     reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
198   }
199 
200   unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
201   if (ScratchRSrcReg != AMDGPU::NoRegister) {
202     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
203     // to spill.
204     // TODO: May need to reserve a VGPR if doing LDS spilling.
205     reserveRegisterTuples(Reserved, ScratchRSrcReg);
206     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
207   }
208 
209   // We have to assume the SP is needed in case there are calls in the function,
210   // which is detected after the function is lowered. If we aren't really going
211   // to need SP, don't bother reserving it.
212   unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
213 
214   if (StackPtrReg != AMDGPU::NoRegister) {
215     reserveRegisterTuples(Reserved, StackPtrReg);
216     assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
217   }
218 
219   unsigned FrameReg = MFI->getFrameOffsetReg();
220   if (FrameReg != AMDGPU::NoRegister) {
221     reserveRegisterTuples(Reserved, FrameReg);
222     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
223   }
224 
225   return Reserved;
226 }
227 
requiresRegisterScavenging(const MachineFunction & Fn) const228 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
229   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
230   if (Info->isEntryFunction()) {
231     const MachineFrameInfo &MFI = Fn.getFrameInfo();
232     return MFI.hasStackObjects() || MFI.hasCalls();
233   }
234 
235   // May need scavenger for dealing with callee saved registers.
236   return true;
237 }
238 
requiresFrameIndexScavenging(const MachineFunction & MF) const239 bool SIRegisterInfo::requiresFrameIndexScavenging(
240   const MachineFunction &MF) const {
241   const MachineFrameInfo &MFI = MF.getFrameInfo();
242   if (MFI.hasStackObjects())
243     return true;
244 
245   // May need to deal with callee saved registers.
246   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
247   return !Info->isEntryFunction();
248 }
249 
requiresFrameIndexReplacementScavenging(const MachineFunction & MF) const250 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
251   const MachineFunction &MF) const {
252   // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
253   // create a virtual register for it during frame index elimination, so the
254   // scavenger is directly needed.
255   return MF.getFrameInfo().hasStackObjects() &&
256          MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
257          MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
258 }
259 
requiresVirtualBaseRegisters(const MachineFunction &) const260 bool SIRegisterInfo::requiresVirtualBaseRegisters(
261   const MachineFunction &) const {
262   // There are no special dedicated stack or frame pointers.
263   return true;
264 }
265 
trackLivenessAfterRegAlloc(const MachineFunction & MF) const266 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
267   // This helps catch bugs as verifier errors.
268   return true;
269 }
270 
getMUBUFInstrOffset(const MachineInstr * MI) const271 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
272   assert(SIInstrInfo::isMUBUF(*MI));
273 
274   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
275                                           AMDGPU::OpName::offset);
276   return MI->getOperand(OffIdx).getImm();
277 }
278 
getFrameIndexInstrOffset(const MachineInstr * MI,int Idx) const279 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
280                                                  int Idx) const {
281   if (!SIInstrInfo::isMUBUF(*MI))
282     return 0;
283 
284   assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
285                                            AMDGPU::OpName::vaddr) &&
286          "Should never see frame index on non-address operand");
287 
288   return getMUBUFInstrOffset(MI);
289 }
290 
needsFrameBaseReg(MachineInstr * MI,int64_t Offset) const291 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
292   if (!MI->mayLoadOrStore())
293     return false;
294 
295   int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
296 
297   return !isUInt<12>(FullOffset);
298 }
299 
materializeFrameBaseRegister(MachineBasicBlock * MBB,unsigned BaseReg,int FrameIdx,int64_t Offset) const300 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
301                                                   unsigned BaseReg,
302                                                   int FrameIdx,
303                                                   int64_t Offset) const {
304   MachineBasicBlock::iterator Ins = MBB->begin();
305   DebugLoc DL; // Defaults to "unknown"
306 
307   if (Ins != MBB->end())
308     DL = Ins->getDebugLoc();
309 
310   MachineFunction *MF = MBB->getParent();
311   const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
312   const SIInstrInfo *TII = Subtarget.getInstrInfo();
313 
314   if (Offset == 0) {
315     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
316       .addFrameIndex(FrameIdx);
317     return;
318   }
319 
320   MachineRegisterInfo &MRI = MF->getRegInfo();
321   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
322 
323   unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
324 
325   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
326     .addImm(Offset);
327   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
328     .addFrameIndex(FrameIdx);
329 
330   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
331     .addReg(OffsetReg, RegState::Kill)
332     .addReg(FIReg);
333 }
334 
resolveFrameIndex(MachineInstr & MI,unsigned BaseReg,int64_t Offset) const335 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
336                                        int64_t Offset) const {
337 
338   MachineBasicBlock *MBB = MI.getParent();
339   MachineFunction *MF = MBB->getParent();
340   const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
341   const SIInstrInfo *TII = Subtarget.getInstrInfo();
342 
343 #ifndef NDEBUG
344   // FIXME: Is it possible to be storing a frame index to itself?
345   bool SeenFI = false;
346   for (const MachineOperand &MO: MI.operands()) {
347     if (MO.isFI()) {
348       if (SeenFI)
349         llvm_unreachable("should not see multiple frame indices");
350 
351       SeenFI = true;
352     }
353   }
354 #endif
355 
356   MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
357   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
358   assert(TII->isMUBUF(MI));
359   assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
360          MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
361          "should only be seeing frame offset relative FrameIndex");
362 
363 
364   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
365   int64_t NewOffset = OffsetOp->getImm() + Offset;
366   assert(isUInt<12>(NewOffset) && "offset should be legal");
367 
368   FIOp->ChangeToRegister(BaseReg, false);
369   OffsetOp->setImm(NewOffset);
370 }
371 
isFrameOffsetLegal(const MachineInstr * MI,unsigned BaseReg,int64_t Offset) const372 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
373                                         unsigned BaseReg,
374                                         int64_t Offset) const {
375   if (!SIInstrInfo::isMUBUF(*MI))
376     return false;
377 
378   int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
379 
380   return isUInt<12>(NewOffset);
381 }
382 
getPointerRegClass(const MachineFunction & MF,unsigned Kind) const383 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
384   const MachineFunction &MF, unsigned Kind) const {
385   // This is inaccurate. It depends on the instruction and address space. The
386   // only place where we should hit this is for dealing with frame indexes /
387   // private accesses, so this is correct in that case.
388   return &AMDGPU::VGPR_32RegClass;
389 }
390 
getNumSubRegsForSpillOp(unsigned Op)391 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
392 
393   switch (Op) {
394   case AMDGPU::SI_SPILL_S512_SAVE:
395   case AMDGPU::SI_SPILL_S512_RESTORE:
396   case AMDGPU::SI_SPILL_V512_SAVE:
397   case AMDGPU::SI_SPILL_V512_RESTORE:
398     return 16;
399   case AMDGPU::SI_SPILL_S256_SAVE:
400   case AMDGPU::SI_SPILL_S256_RESTORE:
401   case AMDGPU::SI_SPILL_V256_SAVE:
402   case AMDGPU::SI_SPILL_V256_RESTORE:
403     return 8;
404   case AMDGPU::SI_SPILL_S128_SAVE:
405   case AMDGPU::SI_SPILL_S128_RESTORE:
406   case AMDGPU::SI_SPILL_V128_SAVE:
407   case AMDGPU::SI_SPILL_V128_RESTORE:
408     return 4;
409   case AMDGPU::SI_SPILL_V96_SAVE:
410   case AMDGPU::SI_SPILL_V96_RESTORE:
411     return 3;
412   case AMDGPU::SI_SPILL_S64_SAVE:
413   case AMDGPU::SI_SPILL_S64_RESTORE:
414   case AMDGPU::SI_SPILL_V64_SAVE:
415   case AMDGPU::SI_SPILL_V64_RESTORE:
416     return 2;
417   case AMDGPU::SI_SPILL_S32_SAVE:
418   case AMDGPU::SI_SPILL_S32_RESTORE:
419   case AMDGPU::SI_SPILL_V32_SAVE:
420   case AMDGPU::SI_SPILL_V32_RESTORE:
421     return 1;
422   default: llvm_unreachable("Invalid spill opcode");
423   }
424 }
425 
getOffsetMUBUFStore(unsigned Opc)426 static int getOffsetMUBUFStore(unsigned Opc) {
427   switch (Opc) {
428   case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
429     return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
430   case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
431     return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
432   case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
433     return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
434   case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
435     return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
436   case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
437     return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
438   case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
439     return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
440   case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
441     return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
442   default:
443     return -1;
444   }
445 }
446 
getOffsetMUBUFLoad(unsigned Opc)447 static int getOffsetMUBUFLoad(unsigned Opc) {
448   switch (Opc) {
449   case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
450     return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
451   case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
452     return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
453   case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
454     return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
455   case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
456     return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
457   case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
458     return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
459   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
460     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
461   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
462     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
463   case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
464     return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
465   case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
466     return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
467   case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
468     return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
469   case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
470     return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
471   case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
472     return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
473   case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
474     return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
475   default:
476     return -1;
477   }
478 }
479 
480 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
481 // need to handle the case where an SGPR may need to be spilled while spilling.
buildMUBUFOffsetLoadStore(const SIInstrInfo * TII,MachineFrameInfo & MFI,MachineBasicBlock::iterator MI,int Index,int64_t Offset)482 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
483                                       MachineFrameInfo &MFI,
484                                       MachineBasicBlock::iterator MI,
485                                       int Index,
486                                       int64_t Offset) {
487   MachineBasicBlock *MBB = MI->getParent();
488   const DebugLoc &DL = MI->getDebugLoc();
489   bool IsStore = MI->mayStore();
490 
491   unsigned Opc = MI->getOpcode();
492   int LoadStoreOp = IsStore ?
493     getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
494   if (LoadStoreOp == -1)
495     return false;
496 
497   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
498   MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
499     .add(*Reg)
500     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
501     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
502     .addImm(Offset)
503     .addImm(0) // glc
504     .addImm(0) // slc
505     .addImm(0) // tfe
506     .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
507 
508   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
509                                                        AMDGPU::OpName::vdata_in);
510   if (VDataIn)
511     NewMI.add(*VDataIn);
512   return true;
513 }
514 
buildSpillLoadStore(MachineBasicBlock::iterator MI,unsigned LoadStoreOp,int Index,unsigned ValueReg,bool IsKill,unsigned ScratchRsrcReg,unsigned ScratchOffsetReg,int64_t InstOffset,MachineMemOperand * MMO,RegScavenger * RS) const515 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
516                                          unsigned LoadStoreOp,
517                                          int Index,
518                                          unsigned ValueReg,
519                                          bool IsKill,
520                                          unsigned ScratchRsrcReg,
521                                          unsigned ScratchOffsetReg,
522                                          int64_t InstOffset,
523                                          MachineMemOperand *MMO,
524                                          RegScavenger *RS) const {
525   MachineBasicBlock *MBB = MI->getParent();
526   MachineFunction *MF = MI->getParent()->getParent();
527   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
528   const SIInstrInfo *TII = ST.getInstrInfo();
529   const MachineFrameInfo &MFI = MF->getFrameInfo();
530 
531   const MCInstrDesc &Desc = TII->get(LoadStoreOp);
532   const DebugLoc &DL = MI->getDebugLoc();
533   bool IsStore = Desc.mayStore();
534 
535   bool Scavenged = false;
536   unsigned SOffset = ScratchOffsetReg;
537 
538   const unsigned EltSize = 4;
539   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
540   unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
541   unsigned Size = NumSubRegs * EltSize;
542   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
543   int64_t ScratchOffsetRegDelta = 0;
544 
545   unsigned Align = MFI.getObjectAlignment(Index);
546   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
547 
548   assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
549 
550   if (!isUInt<12>(Offset + Size - EltSize)) {
551     SOffset = AMDGPU::NoRegister;
552 
553     // We currently only support spilling VGPRs to EltSize boundaries, meaning
554     // we can simplify the adjustment of Offset here to just scale with
555     // WavefrontSize.
556     Offset *= ST.getWavefrontSize();
557 
558     // We don't have access to the register scavenger if this function is called
559     // during  PEI::scavengeFrameVirtualRegs().
560     if (RS)
561       SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
562 
563     if (SOffset == AMDGPU::NoRegister) {
564       // There are no free SGPRs, and since we are in the process of spilling
565       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
566       // on SI/CI and on VI it is true until we implement spilling using scalar
567       // stores), we have no way to free up an SGPR.  Our solution here is to
568       // add the offset directly to the ScratchOffset register, and then
569       // subtract the offset after the spill to return ScratchOffset to it's
570       // original value.
571       SOffset = ScratchOffsetReg;
572       ScratchOffsetRegDelta = Offset;
573     } else {
574       Scavenged = true;
575     }
576 
577     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
578       .addReg(ScratchOffsetReg)
579       .addImm(Offset);
580 
581     Offset = 0;
582   }
583 
584   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
585     unsigned SubReg = NumSubRegs == 1 ?
586       ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
587 
588     unsigned SOffsetRegState = 0;
589     unsigned SrcDstRegState = getDefRegState(!IsStore);
590     if (i + 1 == e) {
591       SOffsetRegState |= getKillRegState(Scavenged);
592       // The last implicit use carries the "Kill" flag.
593       SrcDstRegState |= getKillRegState(IsKill);
594     }
595 
596     MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
597     MachineMemOperand *NewMMO
598       = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
599                                  EltSize, MinAlign(Align, EltSize * i));
600 
601     auto MIB = BuildMI(*MBB, MI, DL, Desc)
602       .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
603       .addReg(ScratchRsrcReg)
604       .addReg(SOffset, SOffsetRegState)
605       .addImm(Offset)
606       .addImm(0) // glc
607       .addImm(0) // slc
608       .addImm(0) // tfe
609       .addMemOperand(NewMMO);
610 
611     if (NumSubRegs > 1)
612       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
613   }
614 
615   if (ScratchOffsetRegDelta != 0) {
616     // Subtract the offset we added to the ScratchOffset register.
617     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
618         .addReg(ScratchOffsetReg)
619         .addImm(ScratchOffsetRegDelta);
620   }
621 }
622 
getSpillEltSize(unsigned SuperRegSize,bool Store)623 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
624                                                      bool Store) {
625   if (SuperRegSize % 16 == 0) {
626     return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
627                          AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
628   }
629 
630   if (SuperRegSize % 8 == 0) {
631     return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
632                         AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
633   }
634 
635   return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
636                       AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
637 }
638 
spillSGPR(MachineBasicBlock::iterator MI,int Index,RegScavenger * RS,bool OnlyToVGPR) const639 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
640                                int Index,
641                                RegScavenger *RS,
642                                bool OnlyToVGPR) const {
643   MachineBasicBlock *MBB = MI->getParent();
644   MachineFunction *MF = MBB->getParent();
645   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
646   DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
647 
648   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
649     = MFI->getSGPRToVGPRSpills(Index);
650   bool SpillToVGPR = !VGPRSpills.empty();
651   if (OnlyToVGPR && !SpillToVGPR)
652     return false;
653 
654   MachineRegisterInfo &MRI = MF->getRegInfo();
655   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
656   const SIInstrInfo *TII = ST.getInstrInfo();
657 
658   unsigned SuperReg = MI->getOperand(0).getReg();
659   bool IsKill = MI->getOperand(0).isKill();
660   const DebugLoc &DL = MI->getDebugLoc();
661 
662   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
663 
664   bool SpillToSMEM = spillSGPRToSMEM();
665   if (SpillToSMEM && OnlyToVGPR)
666     return false;
667 
668   assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
669                          SuperReg != MFI->getFrameOffsetReg() &&
670                          SuperReg != MFI->getScratchWaveOffsetReg()));
671 
672   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
673 
674   unsigned OffsetReg = AMDGPU::M0;
675   unsigned M0CopyReg = AMDGPU::NoRegister;
676 
677   if (SpillToSMEM) {
678     if (RS->isRegUsed(AMDGPU::M0)) {
679       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
680       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
681         .addReg(AMDGPU::M0);
682     }
683   }
684 
685   unsigned ScalarStoreOp;
686   unsigned EltSize = 4;
687   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
688   if (SpillToSMEM && isSGPRClass(RC)) {
689     // XXX - if private_element_size is larger than 4 it might be useful to be
690     // able to spill wider vmem spills.
691     std::tie(EltSize, ScalarStoreOp) =
692           getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
693   }
694 
695   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
696   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
697 
698   // SubReg carries the "Kill" flag when SubReg == SuperReg.
699   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
700   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
701     unsigned SubReg = NumSubRegs == 1 ?
702       SuperReg : getSubReg(SuperReg, SplitParts[i]);
703 
704     if (SpillToSMEM) {
705       int64_t FrOffset = FrameInfo.getObjectOffset(Index);
706 
707       // The allocated memory size is really the wavefront size * the frame
708       // index size. The widest register class is 64 bytes, so a 4-byte scratch
709       // allocation is enough to spill this in a single stack object.
710       //
711       // FIXME: Frame size/offsets are computed earlier than this, so the extra
712       // space is still unnecessarily allocated.
713 
714       unsigned Align = FrameInfo.getObjectAlignment(Index);
715       MachinePointerInfo PtrInfo
716         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
717       MachineMemOperand *MMO
718         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
719                                    EltSize, MinAlign(Align, EltSize * i));
720 
721       // SMEM instructions only support a single offset, so increment the wave
722       // offset.
723 
724       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
725       if (Offset != 0) {
726         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
727           .addReg(MFI->getFrameOffsetReg())
728           .addImm(Offset);
729       } else {
730         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
731           .addReg(MFI->getFrameOffsetReg());
732       }
733 
734       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
735         .addReg(SubReg, getKillRegState(IsKill)) // sdata
736         .addReg(MFI->getScratchRSrcReg())        // sbase
737         .addReg(OffsetReg, RegState::Kill)       // soff
738         .addImm(0)                               // glc
739         .addMemOperand(MMO);
740 
741       continue;
742     }
743 
744     if (SpillToVGPR) {
745       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
746 
747       // During SGPR spilling to VGPR, determine if the VGPR is defined. The
748       // only circumstance in which we say it is undefined is when it is the
749       // first spill to this VGPR in the first basic block.
750       bool VGPRDefined = true;
751       if (MBB == &MF->front())
752         VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
753 
754       // Mark the "old value of vgpr" input undef only if this is the first sgpr
755       // spill to this specific vgpr in the first basic block.
756       BuildMI(*MBB, MI, DL,
757               TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
758               Spill.VGPR)
759         .addReg(SubReg, getKillRegState(IsKill))
760         .addImm(Spill.Lane)
761         .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
762 
763       // FIXME: Since this spills to another register instead of an actual
764       // frame index, we should delete the frame index when all references to
765       // it are fixed.
766     } else {
767       // XXX - Can to VGPR spill fail for some subregisters but not others?
768       if (OnlyToVGPR)
769         return false;
770 
771       // Spill SGPR to a frame index.
772       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
773       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
774       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
775 
776       MachineInstrBuilder Mov
777         = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
778         .addReg(SubReg, SubKillState);
779 
780 
781       // There could be undef components of a spilled super register.
782       // TODO: Can we detect this and skip the spill?
783       if (NumSubRegs > 1) {
784         // The last implicit use of the SuperReg carries the "Kill" flag.
785         unsigned SuperKillState = 0;
786         if (i + 1 == e)
787           SuperKillState |= getKillRegState(IsKill);
788         Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
789       }
790 
791       unsigned Align = FrameInfo.getObjectAlignment(Index);
792       MachinePointerInfo PtrInfo
793         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
794       MachineMemOperand *MMO
795         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
796                                    EltSize, MinAlign(Align, EltSize * i));
797       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
798         .addReg(TmpReg, RegState::Kill)    // src
799         .addFrameIndex(Index)              // vaddr
800         .addReg(MFI->getScratchRSrcReg())  // srrsrc
801         .addReg(MFI->getFrameOffsetReg())  // soffset
802         .addImm(i * 4)                     // offset
803         .addMemOperand(MMO);
804     }
805   }
806 
807   if (M0CopyReg != AMDGPU::NoRegister) {
808     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
809       .addReg(M0CopyReg, RegState::Kill);
810   }
811 
812   MI->eraseFromParent();
813   MFI->addToSpilledSGPRs(NumSubRegs);
814   return true;
815 }
816 
restoreSGPR(MachineBasicBlock::iterator MI,int Index,RegScavenger * RS,bool OnlyToVGPR) const817 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
818                                  int Index,
819                                  RegScavenger *RS,
820                                  bool OnlyToVGPR) const {
821   MachineFunction *MF = MI->getParent()->getParent();
822   MachineRegisterInfo &MRI = MF->getRegInfo();
823   MachineBasicBlock *MBB = MI->getParent();
824   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
825 
826   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
827     = MFI->getSGPRToVGPRSpills(Index);
828   bool SpillToVGPR = !VGPRSpills.empty();
829   if (OnlyToVGPR && !SpillToVGPR)
830     return false;
831 
832   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
833   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
834   const SIInstrInfo *TII = ST.getInstrInfo();
835   const DebugLoc &DL = MI->getDebugLoc();
836 
837   unsigned SuperReg = MI->getOperand(0).getReg();
838   bool SpillToSMEM = spillSGPRToSMEM();
839   if (SpillToSMEM && OnlyToVGPR)
840     return false;
841 
842   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
843 
844   unsigned OffsetReg = AMDGPU::M0;
845   unsigned M0CopyReg = AMDGPU::NoRegister;
846 
847   if (SpillToSMEM) {
848     if (RS->isRegUsed(AMDGPU::M0)) {
849       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
850       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
851         .addReg(AMDGPU::M0);
852     }
853   }
854 
855   unsigned EltSize = 4;
856   unsigned ScalarLoadOp;
857 
858   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
859   if (SpillToSMEM && isSGPRClass(RC)) {
860     // XXX - if private_element_size is larger than 4 it might be useful to be
861     // able to spill wider vmem spills.
862     std::tie(EltSize, ScalarLoadOp) =
863           getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
864   }
865 
866   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
867   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
868 
869   // SubReg carries the "Kill" flag when SubReg == SuperReg.
870   int64_t FrOffset = FrameInfo.getObjectOffset(Index);
871 
872   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
873     unsigned SubReg = NumSubRegs == 1 ?
874       SuperReg : getSubReg(SuperReg, SplitParts[i]);
875 
876     if (SpillToSMEM) {
877       // FIXME: Size may be > 4 but extra bytes wasted.
878       unsigned Align = FrameInfo.getObjectAlignment(Index);
879       MachinePointerInfo PtrInfo
880         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
881       MachineMemOperand *MMO
882         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
883                                    EltSize, MinAlign(Align, EltSize * i));
884 
885       // Add i * 4 offset
886       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
887       if (Offset != 0) {
888         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
889           .addReg(MFI->getFrameOffsetReg())
890           .addImm(Offset);
891       } else {
892         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
893           .addReg(MFI->getFrameOffsetReg());
894       }
895 
896       auto MIB =
897         BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
898         .addReg(MFI->getScratchRSrcReg()) // sbase
899         .addReg(OffsetReg, RegState::Kill)                // soff
900         .addImm(0)                        // glc
901         .addMemOperand(MMO);
902 
903       if (NumSubRegs > 1)
904         MIB.addReg(SuperReg, RegState::ImplicitDefine);
905 
906       continue;
907     }
908 
909     if (SpillToVGPR) {
910       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
911       auto MIB =
912         BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
913                 SubReg)
914         .addReg(Spill.VGPR)
915         .addImm(Spill.Lane);
916 
917       if (NumSubRegs > 1)
918         MIB.addReg(SuperReg, RegState::ImplicitDefine);
919     } else {
920       if (OnlyToVGPR)
921         return false;
922 
923       // Restore SGPR from a stack slot.
924       // FIXME: We should use S_LOAD_DWORD here for VI.
925       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
926       unsigned Align = FrameInfo.getObjectAlignment(Index);
927 
928       MachinePointerInfo PtrInfo
929         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
930 
931       MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
932         MachineMemOperand::MOLoad, EltSize,
933         MinAlign(Align, EltSize * i));
934 
935       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
936         .addFrameIndex(Index)              // vaddr
937         .addReg(MFI->getScratchRSrcReg())  // srsrc
938         .addReg(MFI->getFrameOffsetReg())  // soffset
939         .addImm(i * 4)                     // offset
940         .addMemOperand(MMO);
941 
942       auto MIB =
943         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
944         .addReg(TmpReg, RegState::Kill);
945 
946       if (NumSubRegs > 1)
947         MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
948     }
949   }
950 
951   if (M0CopyReg != AMDGPU::NoRegister) {
952     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
953       .addReg(M0CopyReg, RegState::Kill);
954   }
955 
956   MI->eraseFromParent();
957   return true;
958 }
959 
960 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
961 /// a VGPR and the stack slot can be safely eliminated when all other users are
962 /// handled.
eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,int FI,RegScavenger * RS) const963 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
964   MachineBasicBlock::iterator MI,
965   int FI,
966   RegScavenger *RS) const {
967   switch (MI->getOpcode()) {
968   case AMDGPU::SI_SPILL_S512_SAVE:
969   case AMDGPU::SI_SPILL_S256_SAVE:
970   case AMDGPU::SI_SPILL_S128_SAVE:
971   case AMDGPU::SI_SPILL_S64_SAVE:
972   case AMDGPU::SI_SPILL_S32_SAVE:
973     return spillSGPR(MI, FI, RS, true);
974   case AMDGPU::SI_SPILL_S512_RESTORE:
975   case AMDGPU::SI_SPILL_S256_RESTORE:
976   case AMDGPU::SI_SPILL_S128_RESTORE:
977   case AMDGPU::SI_SPILL_S64_RESTORE:
978   case AMDGPU::SI_SPILL_S32_RESTORE:
979     return restoreSGPR(MI, FI, RS, true);
980   default:
981     llvm_unreachable("not an SGPR spill instruction");
982   }
983 }
984 
eliminateFrameIndex(MachineBasicBlock::iterator MI,int SPAdj,unsigned FIOperandNum,RegScavenger * RS) const985 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
986                                         int SPAdj, unsigned FIOperandNum,
987                                         RegScavenger *RS) const {
988   MachineFunction *MF = MI->getParent()->getParent();
989   MachineRegisterInfo &MRI = MF->getRegInfo();
990   MachineBasicBlock *MBB = MI->getParent();
991   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
992   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
993   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
994   const SIInstrInfo *TII = ST.getInstrInfo();
995   DebugLoc DL = MI->getDebugLoc();
996 
997   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
998   int Index = MI->getOperand(FIOperandNum).getIndex();
999 
1000   switch (MI->getOpcode()) {
1001     // SGPR register spill
1002     case AMDGPU::SI_SPILL_S512_SAVE:
1003     case AMDGPU::SI_SPILL_S256_SAVE:
1004     case AMDGPU::SI_SPILL_S128_SAVE:
1005     case AMDGPU::SI_SPILL_S64_SAVE:
1006     case AMDGPU::SI_SPILL_S32_SAVE: {
1007       spillSGPR(MI, Index, RS);
1008       break;
1009     }
1010 
1011     // SGPR register restore
1012     case AMDGPU::SI_SPILL_S512_RESTORE:
1013     case AMDGPU::SI_SPILL_S256_RESTORE:
1014     case AMDGPU::SI_SPILL_S128_RESTORE:
1015     case AMDGPU::SI_SPILL_S64_RESTORE:
1016     case AMDGPU::SI_SPILL_S32_RESTORE: {
1017       restoreSGPR(MI, Index, RS);
1018       break;
1019     }
1020 
1021     // VGPR register spill
1022     case AMDGPU::SI_SPILL_V512_SAVE:
1023     case AMDGPU::SI_SPILL_V256_SAVE:
1024     case AMDGPU::SI_SPILL_V128_SAVE:
1025     case AMDGPU::SI_SPILL_V96_SAVE:
1026     case AMDGPU::SI_SPILL_V64_SAVE:
1027     case AMDGPU::SI_SPILL_V32_SAVE: {
1028       const MachineOperand *VData = TII->getNamedOperand(*MI,
1029                                                          AMDGPU::OpName::vdata);
1030       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1031             Index,
1032             VData->getReg(), VData->isKill(),
1033             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1034             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1035             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1036             *MI->memoperands_begin(),
1037             RS);
1038       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1039       MI->eraseFromParent();
1040       break;
1041     }
1042     case AMDGPU::SI_SPILL_V32_RESTORE:
1043     case AMDGPU::SI_SPILL_V64_RESTORE:
1044     case AMDGPU::SI_SPILL_V96_RESTORE:
1045     case AMDGPU::SI_SPILL_V128_RESTORE:
1046     case AMDGPU::SI_SPILL_V256_RESTORE:
1047     case AMDGPU::SI_SPILL_V512_RESTORE: {
1048       const MachineOperand *VData = TII->getNamedOperand(*MI,
1049                                                          AMDGPU::OpName::vdata);
1050 
1051       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1052             Index,
1053             VData->getReg(), VData->isKill(),
1054             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1055             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1056             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1057             *MI->memoperands_begin(),
1058             RS);
1059       MI->eraseFromParent();
1060       break;
1061     }
1062 
1063     default: {
1064       const DebugLoc &DL = MI->getDebugLoc();
1065       bool IsMUBUF = TII->isMUBUF(*MI);
1066 
1067       if (!IsMUBUF &&
1068           MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1069         // Convert to an absolute stack address by finding the offset from the
1070         // scratch wave base and scaling by the wave size.
1071         //
1072         // In an entry function/kernel the stack address is already the
1073         // absolute address relative to the scratch wave offset.
1074 
1075         unsigned DiffReg
1076           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1077 
1078         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1079         unsigned ResultReg = IsCopy ?
1080           MI->getOperand(0).getReg() :
1081           MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1082 
1083         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1084           .addReg(MFI->getFrameOffsetReg())
1085           .addReg(MFI->getScratchWaveOffsetReg());
1086 
1087         int64_t Offset = FrameInfo.getObjectOffset(Index);
1088         if (Offset == 0) {
1089           // XXX - This never happens because of emergency scavenging slot at 0?
1090           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1091             .addImm(Log2_32(ST.getWavefrontSize()))
1092             .addReg(DiffReg);
1093         } else {
1094           unsigned ScaledReg
1095             = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1096 
1097           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1098             .addImm(Log2_32(ST.getWavefrontSize()))
1099             .addReg(DiffReg, RegState::Kill);
1100 
1101           // TODO: Fold if use instruction is another add of a constant.
1102           if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1103             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1104               .addImm(Offset)
1105               .addReg(ScaledReg, RegState::Kill);
1106           } else {
1107             unsigned ConstOffsetReg
1108               = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1109 
1110             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1111               .addImm(Offset);
1112             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1113               .addReg(ConstOffsetReg, RegState::Kill)
1114               .addReg(ScaledReg, RegState::Kill);
1115           }
1116         }
1117 
1118         // Don't introduce an extra copy if we're just materializing in a mov.
1119         if (IsCopy)
1120           MI->eraseFromParent();
1121         else
1122           FIOp.ChangeToRegister(ResultReg, false, false, true);
1123         return;
1124       }
1125 
1126       if (IsMUBUF) {
1127         // Disable offen so we don't need a 0 vgpr base.
1128         assert(static_cast<int>(FIOperandNum) ==
1129                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1130                                           AMDGPU::OpName::vaddr));
1131 
1132         assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1133                == MFI->getFrameOffsetReg());
1134 
1135         int64_t Offset = FrameInfo.getObjectOffset(Index);
1136         int64_t OldImm
1137           = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1138         int64_t NewOffset = OldImm + Offset;
1139 
1140         if (isUInt<12>(NewOffset) &&
1141             buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1142           MI->eraseFromParent();
1143           return;
1144         }
1145       }
1146 
1147       // If the offset is simply too big, don't convert to a scratch wave offset
1148       // relative index.
1149 
1150       int64_t Offset = FrameInfo.getObjectOffset(Index);
1151       FIOp.ChangeToImmediate(Offset);
1152       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1153         unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1154         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1155           .addImm(Offset);
1156         FIOp.ChangeToRegister(TmpReg, false, false, true);
1157       }
1158     }
1159   }
1160 }
1161 
getRegAsmName(unsigned Reg) const1162 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
1163   #define AMDGPU_REG_ASM_NAMES
1164   #include "AMDGPURegAsmNames.inc.cpp"
1165 
1166   #define REG_RANGE(BeginReg, EndReg, RegTable)            \
1167     if (Reg >= BeginReg && Reg <= EndReg) {                \
1168       unsigned Index = Reg - BeginReg;                     \
1169       assert(Index < array_lengthof(RegTable));            \
1170       return RegTable[Index];                              \
1171     }
1172 
1173   REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1174   REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1175   REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1176   REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1177   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1178             VGPR96RegNames);
1179 
1180   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1181             AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1182             VGPR128RegNames);
1183   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1184             AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1185             SGPR128RegNames);
1186 
1187   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1188             AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1189             VGPR256RegNames);
1190 
1191   REG_RANGE(
1192     AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1193     AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1194     VGPR512RegNames);
1195 
1196   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1197             AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1198             SGPR256RegNames);
1199 
1200   REG_RANGE(
1201     AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1202     AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1203     SGPR512RegNames
1204   );
1205 
1206 #undef REG_RANGE
1207 
1208   // FIXME: Rename flat_scr so we don't need to special case this.
1209   switch (Reg) {
1210   case AMDGPU::FLAT_SCR:
1211     return "flat_scratch";
1212   case AMDGPU::FLAT_SCR_LO:
1213     return "flat_scratch_lo";
1214   case AMDGPU::FLAT_SCR_HI:
1215     return "flat_scratch_hi";
1216   default:
1217     // For the special named registers the default is fine.
1218     return TargetRegisterInfo::getRegAsmName(Reg);
1219   }
1220 }
1221 
1222 // FIXME: This is very slow. It might be worth creating a map from physreg to
1223 // register class.
getPhysRegClass(unsigned Reg) const1224 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
1225   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
1226 
1227   static const TargetRegisterClass *const BaseClasses[] = {
1228     &AMDGPU::VGPR_32RegClass,
1229     &AMDGPU::SReg_32RegClass,
1230     &AMDGPU::VReg_64RegClass,
1231     &AMDGPU::SReg_64RegClass,
1232     &AMDGPU::VReg_96RegClass,
1233     &AMDGPU::VReg_128RegClass,
1234     &AMDGPU::SReg_128RegClass,
1235     &AMDGPU::VReg_256RegClass,
1236     &AMDGPU::SReg_256RegClass,
1237     &AMDGPU::VReg_512RegClass,
1238     &AMDGPU::SReg_512RegClass,
1239     &AMDGPU::SCC_CLASSRegClass,
1240     &AMDGPU::Pseudo_SReg_32RegClass,
1241     &AMDGPU::Pseudo_SReg_128RegClass,
1242   };
1243 
1244   for (const TargetRegisterClass *BaseClass : BaseClasses) {
1245     if (BaseClass->contains(Reg)) {
1246       return BaseClass;
1247     }
1248   }
1249   return nullptr;
1250 }
1251 
1252 // TODO: It might be helpful to have some target specific flags in
1253 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
hasVGPRs(const TargetRegisterClass * RC) const1254 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
1255   unsigned Size = getRegSizeInBits(*RC);
1256   if (Size < 32)
1257     return false;
1258   switch (Size) {
1259   case 32:
1260     return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1261   case 64:
1262     return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1263   case 96:
1264     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1265   case 128:
1266     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1267   case 256:
1268     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1269   case 512:
1270     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1271   default:
1272     llvm_unreachable("Invalid register class size");
1273   }
1274 }
1275 
getEquivalentVGPRClass(const TargetRegisterClass * SRC) const1276 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
1277                                          const TargetRegisterClass *SRC) const {
1278   switch (getRegSizeInBits(*SRC)) {
1279   case 32:
1280     return &AMDGPU::VGPR_32RegClass;
1281   case 64:
1282     return &AMDGPU::VReg_64RegClass;
1283   case 96:
1284     return &AMDGPU::VReg_96RegClass;
1285   case 128:
1286     return &AMDGPU::VReg_128RegClass;
1287   case 256:
1288     return &AMDGPU::VReg_256RegClass;
1289   case 512:
1290     return &AMDGPU::VReg_512RegClass;
1291   default:
1292     llvm_unreachable("Invalid register class size");
1293   }
1294 }
1295 
getEquivalentSGPRClass(const TargetRegisterClass * VRC) const1296 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
1297                                          const TargetRegisterClass *VRC) const {
1298   switch (getRegSizeInBits(*VRC)) {
1299   case 32:
1300     return &AMDGPU::SGPR_32RegClass;
1301   case 64:
1302     return &AMDGPU::SReg_64RegClass;
1303   case 128:
1304     return &AMDGPU::SReg_128RegClass;
1305   case 256:
1306     return &AMDGPU::SReg_256RegClass;
1307   case 512:
1308     return &AMDGPU::SReg_512RegClass;
1309   default:
1310     llvm_unreachable("Invalid register class size");
1311   }
1312 }
1313 
getSubRegClass(const TargetRegisterClass * RC,unsigned SubIdx) const1314 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
1315                          const TargetRegisterClass *RC, unsigned SubIdx) const {
1316   if (SubIdx == AMDGPU::NoSubRegister)
1317     return RC;
1318 
1319   // We can assume that each lane corresponds to one 32-bit register.
1320   unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1321   if (isSGPRClass(RC)) {
1322     switch (Count) {
1323     case 1:
1324       return &AMDGPU::SGPR_32RegClass;
1325     case 2:
1326       return &AMDGPU::SReg_64RegClass;
1327     case 4:
1328       return &AMDGPU::SReg_128RegClass;
1329     case 8:
1330       return &AMDGPU::SReg_256RegClass;
1331     case 16: /* fall-through */
1332     default:
1333       llvm_unreachable("Invalid sub-register class size");
1334     }
1335   } else {
1336     switch (Count) {
1337     case 1:
1338       return &AMDGPU::VGPR_32RegClass;
1339     case 2:
1340       return &AMDGPU::VReg_64RegClass;
1341     case 3:
1342       return &AMDGPU::VReg_96RegClass;
1343     case 4:
1344       return &AMDGPU::VReg_128RegClass;
1345     case 8:
1346       return &AMDGPU::VReg_256RegClass;
1347     case 16: /* fall-through */
1348     default:
1349       llvm_unreachable("Invalid sub-register class size");
1350     }
1351   }
1352 }
1353 
shouldRewriteCopySrc(const TargetRegisterClass * DefRC,unsigned DefSubReg,const TargetRegisterClass * SrcRC,unsigned SrcSubReg) const1354 bool SIRegisterInfo::shouldRewriteCopySrc(
1355   const TargetRegisterClass *DefRC,
1356   unsigned DefSubReg,
1357   const TargetRegisterClass *SrcRC,
1358   unsigned SrcSubReg) const {
1359   // We want to prefer the smallest register class possible, so we don't want to
1360   // stop and rewrite on anything that looks like a subregister
1361   // extract. Operations mostly don't care about the super register class, so we
1362   // only want to stop on the most basic of copies between the same register
1363   // class.
1364   //
1365   // e.g. if we have something like
1366   // %0 = ...
1367   // %1 = ...
1368   // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1369   // %3 = COPY %2, sub0
1370   //
1371   // We want to look through the COPY to find:
1372   //  => %3 = COPY %0
1373 
1374   // Plain copy.
1375   return getCommonSubClass(DefRC, SrcRC) != nullptr;
1376 }
1377 
1378 /// Returns a register that is not used at any point in the function.
1379 ///        If all registers are used, then this function will return
1380 //         AMDGPU::NoRegister.
1381 unsigned
findUnusedRegister(const MachineRegisterInfo & MRI,const TargetRegisterClass * RC,const MachineFunction & MF) const1382 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
1383                                    const TargetRegisterClass *RC,
1384                                    const MachineFunction &MF) const {
1385 
1386   for (unsigned Reg : *RC)
1387     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1388       return Reg;
1389   return AMDGPU::NoRegister;
1390 }
1391 
getRegSplitParts(const TargetRegisterClass * RC,unsigned EltSize) const1392 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
1393                                                    unsigned EltSize) const {
1394   if (EltSize == 4) {
1395     static const int16_t Sub0_15[] = {
1396       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1397       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1398       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1399       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1400     };
1401 
1402     static const int16_t Sub0_7[] = {
1403       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1404       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1405     };
1406 
1407     static const int16_t Sub0_3[] = {
1408       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1409     };
1410 
1411     static const int16_t Sub0_2[] = {
1412       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1413     };
1414 
1415     static const int16_t Sub0_1[] = {
1416       AMDGPU::sub0, AMDGPU::sub1,
1417     };
1418 
1419     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1420     case 32:
1421       return {};
1422     case 64:
1423       return makeArrayRef(Sub0_1);
1424     case 96:
1425       return makeArrayRef(Sub0_2);
1426     case 128:
1427       return makeArrayRef(Sub0_3);
1428     case 256:
1429       return makeArrayRef(Sub0_7);
1430     case 512:
1431       return makeArrayRef(Sub0_15);
1432     default:
1433       llvm_unreachable("unhandled register size");
1434     }
1435   }
1436 
1437   if (EltSize == 8) {
1438     static const int16_t Sub0_15_64[] = {
1439       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1440       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1441       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1442       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1443     };
1444 
1445     static const int16_t Sub0_7_64[] = {
1446       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1447       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1448     };
1449 
1450 
1451     static const int16_t Sub0_3_64[] = {
1452       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1453     };
1454 
1455     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1456     case 64:
1457       return {};
1458     case 128:
1459       return makeArrayRef(Sub0_3_64);
1460     case 256:
1461       return makeArrayRef(Sub0_7_64);
1462     case 512:
1463       return makeArrayRef(Sub0_15_64);
1464     default:
1465       llvm_unreachable("unhandled register size");
1466     }
1467   }
1468 
1469   assert(EltSize == 16 && "unhandled register spill split size");
1470 
1471   static const int16_t Sub0_15_128[] = {
1472     AMDGPU::sub0_sub1_sub2_sub3,
1473     AMDGPU::sub4_sub5_sub6_sub7,
1474     AMDGPU::sub8_sub9_sub10_sub11,
1475     AMDGPU::sub12_sub13_sub14_sub15
1476   };
1477 
1478   static const int16_t Sub0_7_128[] = {
1479     AMDGPU::sub0_sub1_sub2_sub3,
1480     AMDGPU::sub4_sub5_sub6_sub7
1481   };
1482 
1483   switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1484   case 128:
1485     return {};
1486   case 256:
1487     return makeArrayRef(Sub0_7_128);
1488   case 512:
1489     return makeArrayRef(Sub0_15_128);
1490   default:
1491     llvm_unreachable("unhandled register size");
1492   }
1493 }
1494 
1495 const TargetRegisterClass*
getRegClassForReg(const MachineRegisterInfo & MRI,unsigned Reg) const1496 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
1497                                   unsigned Reg) const {
1498   if (TargetRegisterInfo::isVirtualRegister(Reg))
1499     return  MRI.getRegClass(Reg);
1500 
1501   return getPhysRegClass(Reg);
1502 }
1503 
isVGPR(const MachineRegisterInfo & MRI,unsigned Reg) const1504 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
1505                             unsigned Reg) const {
1506   const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1507   assert(RC && "Register class for the reg not found");
1508   return hasVGPRs(RC);
1509 }
1510 
shouldCoalesce(MachineInstr * MI,const TargetRegisterClass * SrcRC,unsigned SubReg,const TargetRegisterClass * DstRC,unsigned DstSubReg,const TargetRegisterClass * NewRC,LiveIntervals & LIS) const1511 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
1512                                     const TargetRegisterClass *SrcRC,
1513                                     unsigned SubReg,
1514                                     const TargetRegisterClass *DstRC,
1515                                     unsigned DstSubReg,
1516                                     const TargetRegisterClass *NewRC,
1517                                     LiveIntervals &LIS) const {
1518   unsigned SrcSize = getRegSizeInBits(*SrcRC);
1519   unsigned DstSize = getRegSizeInBits(*DstRC);
1520   unsigned NewSize = getRegSizeInBits(*NewRC);
1521 
1522   // Do not increase size of registers beyond dword, we would need to allocate
1523   // adjacent registers and constraint regalloc more than needed.
1524 
1525   // Always allow dword coalescing.
1526   if (SrcSize <= 32 || DstSize <= 32)
1527     return true;
1528 
1529   return NewSize <= DstSize || NewSize <= SrcSize;
1530 }
1531 
getRegPressureLimit(const TargetRegisterClass * RC,MachineFunction & MF) const1532 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
1533                                              MachineFunction &MF) const {
1534 
1535   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1536   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1537 
1538   unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1539                                                        MF.getFunction());
1540   switch (RC->getID()) {
1541   default:
1542     return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1543   case AMDGPU::VGPR_32RegClassID:
1544     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1545   case AMDGPU::SGPR_32RegClassID:
1546     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1547   }
1548 }
1549 
getRegPressureSetLimit(const MachineFunction & MF,unsigned Idx) const1550 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
1551                                                 unsigned Idx) const {
1552   if (Idx == getVGPRPressureSet())
1553     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1554                                const_cast<MachineFunction &>(MF));
1555 
1556   if (Idx == getSGPRPressureSet())
1557     return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1558                                const_cast<MachineFunction &>(MF));
1559 
1560   return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1561 }
1562 
getRegUnitPressureSets(unsigned RegUnit) const1563 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1564   static const int Empty[] = { -1 };
1565 
1566   if (hasRegUnit(AMDGPU::M0, RegUnit))
1567     return Empty;
1568   return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1569 }
1570 
getReturnAddressReg(const MachineFunction & MF) const1571 unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
1572   // Not a callee saved register.
1573   return AMDGPU::SGPR30_SGPR31;
1574 }
1575 
1576 const TargetRegisterClass *
getConstrainedRegClassForOperand(const MachineOperand & MO,const MachineRegisterInfo & MRI) const1577 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
1578                                          const MachineRegisterInfo &MRI) const {
1579   unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1580   const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1581   if (!RB)
1582     return nullptr;
1583 
1584   switch (Size) {
1585   case 32:
1586     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1587                                                   &AMDGPU::SReg_32_XM0RegClass;
1588   case 64:
1589     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1590                                                    &AMDGPU::SReg_64_XEXECRegClass;
1591   case 96:
1592     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1593                                                   nullptr;
1594   case 128:
1595     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1596                                                   &AMDGPU::SReg_128RegClass;
1597   default:
1598     llvm_unreachable("not implemented");
1599   }
1600 }
1601