1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AMDGPUSubtarget.h"
16 #include "R600ISelLowering.h"
17 #include "R600InstrInfo.h"
18 #include "SIFrameLowering.h"
19 #include "SIISelLowering.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/CodeGen/MachineScheduler.h"
24
25 using namespace llvm;
26
27 #define DEBUG_TYPE "amdgpu-subtarget"
28
29 #define GET_SUBTARGETINFO_ENUM
30 #define GET_SUBTARGETINFO_TARGET_DESC
31 #define GET_SUBTARGETINFO_CTOR
32 #include "AMDGPUGenSubtargetInfo.inc"
33
~AMDGPUSubtarget()34 AMDGPUSubtarget::~AMDGPUSubtarget() {}
35
36 AMDGPUSubtarget &
initializeSubtargetDependencies(const Triple & TT,StringRef GPU,StringRef FS)37 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
38 StringRef GPU, StringRef FS) {
39 // Determine default and user-specified characteristics
40 // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
41 // enabled, but some instructions do not respect them and they run at the
42 // double precision rate, so don't enable by default.
43 //
44 // We want to be able to turn these off, but making this a subtarget feature
45 // for SI has the unhelpful behavior that it unsets everything else if you
46 // disable it.
47
48 SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
49 if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
50 FullFS += "+flat-for-global,+unaligned-buffer-access,";
51 FullFS += FS;
52
53 ParseSubtargetFeatures(GPU, FullFS);
54
55 // FIXME: I don't think think Evergreen has any useful support for
56 // denormals, but should be checked. Should we issue a warning somewhere
57 // if someone tries to enable these?
58 if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
59 FP32Denormals = false;
60 FP64Denormals = false;
61 }
62
63 // Set defaults if needed.
64 if (MaxPrivateElementSize == 0)
65 MaxPrivateElementSize = 4;
66
67 return *this;
68 }
69
AMDGPUSubtarget(const Triple & TT,StringRef GPU,StringRef FS,const TargetMachine & TM)70 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
71 const TargetMachine &TM)
72 : AMDGPUGenSubtargetInfo(TT, GPU, FS),
73 TargetTriple(TT),
74 Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
75 IsaVersion(ISAVersion0_0_0),
76 WavefrontSize(64),
77 LocalMemorySize(0),
78 LDSBankCount(0),
79 MaxPrivateElementSize(0),
80
81 FastFMAF32(false),
82 HalfRate64Ops(false),
83
84 FP32Denormals(false),
85 FP64Denormals(false),
86 FPExceptions(false),
87 FlatForGlobal(false),
88 UnalignedBufferAccess(false),
89
90 EnableXNACK(false),
91 DebuggerInsertNops(false),
92 DebuggerReserveRegs(false),
93 DebuggerEmitPrologue(false),
94
95 EnableVGPRSpilling(false),
96 EnablePromoteAlloca(false),
97 EnableLoadStoreOpt(false),
98 EnableUnsafeDSOffsetFolding(false),
99 EnableSIScheduler(false),
100 DumpCode(false),
101
102 FP64(false),
103 IsGCN(false),
104 GCN1Encoding(false),
105 GCN3Encoding(false),
106 CIInsts(false),
107 SGPRInitBug(false),
108 HasSMemRealTime(false),
109 Has16BitInsts(false),
110 FlatAddressSpace(false),
111
112 R600ALUInst(false),
113 CaymanISA(false),
114 CFALUBug(false),
115 HasVertexCache(false),
116 TexVTXClauseSize(0),
117
118 FeatureDisable(false),
119 InstrItins(getInstrItineraryForCPU(GPU)) {
120 initializeSubtargetDependencies(TT, GPU, FS);
121 }
122
123 // FIXME: These limits are for SI. Did they change with the larger maximum LDS
124 // size?
getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const125 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
126 switch (NWaves) {
127 case 10:
128 return 1638;
129 case 9:
130 return 1820;
131 case 8:
132 return 2048;
133 case 7:
134 return 2340;
135 case 6:
136 return 2730;
137 case 5:
138 return 3276;
139 case 4:
140 return 4096;
141 case 3:
142 return 5461;
143 case 2:
144 return 8192;
145 default:
146 return getLocalMemorySize();
147 }
148 }
149
getOccupancyWithLocalMemSize(uint32_t Bytes) const150 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
151 if (Bytes <= 1638)
152 return 10;
153
154 if (Bytes <= 1820)
155 return 9;
156
157 if (Bytes <= 2048)
158 return 8;
159
160 if (Bytes <= 2340)
161 return 7;
162
163 if (Bytes <= 2730)
164 return 6;
165
166 if (Bytes <= 3276)
167 return 5;
168
169 if (Bytes <= 4096)
170 return 4;
171
172 if (Bytes <= 5461)
173 return 3;
174
175 if (Bytes <= 8192)
176 return 2;
177
178 return 1;
179 }
180
R600Subtarget(const Triple & TT,StringRef GPU,StringRef FS,const TargetMachine & TM)181 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
182 const TargetMachine &TM) :
183 AMDGPUSubtarget(TT, GPU, FS, TM),
184 InstrInfo(*this),
185 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
186 TLInfo(TM, *this) {}
187
SISubtarget(const Triple & TT,StringRef GPU,StringRef FS,const TargetMachine & TM)188 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
189 const TargetMachine &TM) :
190 AMDGPUSubtarget(TT, GPU, FS, TM),
191 InstrInfo(*this),
192 FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
193 TLInfo(TM, *this),
194 GISel() {}
195
getStackEntrySize() const196 unsigned R600Subtarget::getStackEntrySize() const {
197 switch (getWavefrontSize()) {
198 case 16:
199 return 8;
200 case 32:
201 return hasCaymanISA() ? 4 : 8;
202 case 64:
203 return 4;
204 default:
205 llvm_unreachable("Illegal wavefront size.");
206 }
207 }
208
overrideSchedPolicy(MachineSchedPolicy & Policy,unsigned NumRegionInstrs) const209 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
210 unsigned NumRegionInstrs) const {
211 // Track register pressure so the scheduler can try to decrease
212 // pressure once register usage is above the threshold defined by
213 // SIRegisterInfo::getRegPressureSetLimit()
214 Policy.ShouldTrackPressure = true;
215
216 // Enabling both top down and bottom up scheduling seems to give us less
217 // register spills than just using one of these approaches on its own.
218 Policy.OnlyTopDown = false;
219 Policy.OnlyBottomUp = false;
220
221 // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
222 if (!enableSIScheduler())
223 Policy.ShouldTrackLaneMasks = true;
224 }
225
isVGPRSpillingEnabled(const Function & F) const226 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
227 return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
228 }
229
getAmdKernelCodeChipID() const230 unsigned SISubtarget::getAmdKernelCodeChipID() const {
231 switch (getGeneration()) {
232 case SEA_ISLANDS:
233 return 12;
234 default:
235 llvm_unreachable("ChipID unknown");
236 }
237 }
238
getIsaVersion() const239 AMDGPU::IsaVersion SISubtarget::getIsaVersion() const {
240 return AMDGPU::getIsaVersion(getFeatureBits());
241 }
242