1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUSubtarget.h"
16 #include "R600ISelLowering.h"
17 #include "R600InstrInfo.h"
18 #include "SIFrameLowering.h"
19 #include "SIISelLowering.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/CodeGen/MachineScheduler.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "amdgpu-subtarget"
28 
29 #define GET_SUBTARGETINFO_ENUM
30 #define GET_SUBTARGETINFO_TARGET_DESC
31 #define GET_SUBTARGETINFO_CTOR
32 #include "AMDGPUGenSubtargetInfo.inc"
33 
~AMDGPUSubtarget()34 AMDGPUSubtarget::~AMDGPUSubtarget() {}
35 
36 AMDGPUSubtarget &
initializeSubtargetDependencies(const Triple & TT,StringRef GPU,StringRef FS)37 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
38                                                  StringRef GPU, StringRef FS) {
39   // Determine default and user-specified characteristics
40   // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
41   // enabled, but some instructions do not respect them and they run at the
42   // double precision rate, so don't enable by default.
43   //
44   // We want to be able to turn these off, but making this a subtarget feature
45   // for SI has the unhelpful behavior that it unsets everything else if you
46   // disable it.
47 
48   SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
49   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
50     FullFS += "+flat-for-global,+unaligned-buffer-access,";
51   FullFS += FS;
52 
53   ParseSubtargetFeatures(GPU, FullFS);
54 
55   // FIXME: I don't think think Evergreen has any useful support for
56   // denormals, but should be checked. Should we issue a warning somewhere
57   // if someone tries to enable these?
58   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
59     FP32Denormals = false;
60     FP64Denormals = false;
61   }
62 
63   // Set defaults if needed.
64   if (MaxPrivateElementSize == 0)
65     MaxPrivateElementSize = 4;
66 
67   return *this;
68 }
69 
AMDGPUSubtarget(const Triple & TT,StringRef GPU,StringRef FS,const TargetMachine & TM)70 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
71                                  const TargetMachine &TM)
72   : AMDGPUGenSubtargetInfo(TT, GPU, FS),
73     TargetTriple(TT),
74     Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
75     IsaVersion(ISAVersion0_0_0),
76     WavefrontSize(64),
77     LocalMemorySize(0),
78     LDSBankCount(0),
79     MaxPrivateElementSize(0),
80 
81     FastFMAF32(false),
82     HalfRate64Ops(false),
83 
84     FP32Denormals(false),
85     FP64Denormals(false),
86     FPExceptions(false),
87     FlatForGlobal(false),
88     UnalignedBufferAccess(false),
89 
90     EnableXNACK(false),
91     DebuggerInsertNops(false),
92     DebuggerReserveRegs(false),
93     DebuggerEmitPrologue(false),
94 
95     EnableVGPRSpilling(false),
96     EnablePromoteAlloca(false),
97     EnableLoadStoreOpt(false),
98     EnableUnsafeDSOffsetFolding(false),
99     EnableSIScheduler(false),
100     DumpCode(false),
101 
102     FP64(false),
103     IsGCN(false),
104     GCN1Encoding(false),
105     GCN3Encoding(false),
106     CIInsts(false),
107     SGPRInitBug(false),
108     HasSMemRealTime(false),
109     Has16BitInsts(false),
110     FlatAddressSpace(false),
111 
112     R600ALUInst(false),
113     CaymanISA(false),
114     CFALUBug(false),
115     HasVertexCache(false),
116     TexVTXClauseSize(0),
117 
118     FeatureDisable(false),
119     InstrItins(getInstrItineraryForCPU(GPU)) {
120   initializeSubtargetDependencies(TT, GPU, FS);
121 }
122 
123 // FIXME: These limits are for SI. Did they change with the larger maximum LDS
124 // size?
getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const125 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
126   switch (NWaves) {
127   case 10:
128     return 1638;
129   case 9:
130     return 1820;
131   case 8:
132     return 2048;
133   case 7:
134     return 2340;
135   case 6:
136     return 2730;
137   case 5:
138     return 3276;
139   case 4:
140     return 4096;
141   case 3:
142     return 5461;
143   case 2:
144     return 8192;
145   default:
146     return getLocalMemorySize();
147   }
148 }
149 
getOccupancyWithLocalMemSize(uint32_t Bytes) const150 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
151   if (Bytes <= 1638)
152     return 10;
153 
154   if (Bytes <= 1820)
155     return 9;
156 
157   if (Bytes <= 2048)
158     return 8;
159 
160   if (Bytes <= 2340)
161     return 7;
162 
163   if (Bytes <= 2730)
164     return 6;
165 
166   if (Bytes <= 3276)
167     return 5;
168 
169   if (Bytes <= 4096)
170     return 4;
171 
172   if (Bytes <= 5461)
173     return 3;
174 
175   if (Bytes <= 8192)
176     return 2;
177 
178   return 1;
179 }
180 
R600Subtarget(const Triple & TT,StringRef GPU,StringRef FS,const TargetMachine & TM)181 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
182                              const TargetMachine &TM) :
183   AMDGPUSubtarget(TT, GPU, FS, TM),
184   InstrInfo(*this),
185   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
186   TLInfo(TM, *this) {}
187 
SISubtarget(const Triple & TT,StringRef GPU,StringRef FS,const TargetMachine & TM)188 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
189                          const TargetMachine &TM) :
190   AMDGPUSubtarget(TT, GPU, FS, TM),
191   InstrInfo(*this),
192   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
193   TLInfo(TM, *this),
194   GISel() {}
195 
getStackEntrySize() const196 unsigned R600Subtarget::getStackEntrySize() const {
197   switch (getWavefrontSize()) {
198   case 16:
199     return 8;
200   case 32:
201     return hasCaymanISA() ? 4 : 8;
202   case 64:
203     return 4;
204   default:
205     llvm_unreachable("Illegal wavefront size.");
206   }
207 }
208 
overrideSchedPolicy(MachineSchedPolicy & Policy,unsigned NumRegionInstrs) const209 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
210                                       unsigned NumRegionInstrs) const {
211   // Track register pressure so the scheduler can try to decrease
212   // pressure once register usage is above the threshold defined by
213   // SIRegisterInfo::getRegPressureSetLimit()
214   Policy.ShouldTrackPressure = true;
215 
216   // Enabling both top down and bottom up scheduling seems to give us less
217   // register spills than just using one of these approaches on its own.
218   Policy.OnlyTopDown = false;
219   Policy.OnlyBottomUp = false;
220 
221   // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
222   if (!enableSIScheduler())
223     Policy.ShouldTrackLaneMasks = true;
224 }
225 
isVGPRSpillingEnabled(const Function & F) const226 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
227   return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
228 }
229 
getAmdKernelCodeChipID() const230 unsigned SISubtarget::getAmdKernelCodeChipID() const {
231   switch (getGeneration()) {
232   case SEA_ISLANDS:
233     return 12;
234   default:
235     llvm_unreachable("ChipID unknown");
236   }
237 }
238 
getIsaVersion() const239 AMDGPU::IsaVersion SISubtarget::getIsaVersion() const {
240   return AMDGPU::getIsaVersion(getFeatureBits());
241 }
242