1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17 
18 #include "AMDGPU.h"
19 #include "R600InstrInfo.h"
20 #include "R600ISelLowering.h"
21 #include "R600FrameLowering.h"
22 #include "SIInstrInfo.h"
23 #include "SIISelLowering.h"
24 #include "SIFrameLowering.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
27 #include "llvm/Target/TargetSubtargetInfo.h"
28 
29 #define GET_SUBTARGETINFO_HEADER
30 #include "AMDGPUGenSubtargetInfo.inc"
31 
32 namespace llvm {
33 
34 class SIMachineFunctionInfo;
35 class StringRef;
36 
37 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
38 public:
39   enum Generation {
40     R600 = 0,
41     R700,
42     EVERGREEN,
43     NORTHERN_ISLANDS,
44     SOUTHERN_ISLANDS,
45     SEA_ISLANDS,
46     VOLCANIC_ISLANDS,
47   };
48 
49   enum {
50     ISAVersion0_0_0,
51     ISAVersion7_0_0,
52     ISAVersion7_0_1,
53     ISAVersion8_0_0,
54     ISAVersion8_0_1,
55     ISAVersion8_0_3
56   };
57 
58 protected:
59   // Basic subtarget description.
60   Triple TargetTriple;
61   Generation Gen;
62   unsigned IsaVersion;
63   unsigned WavefrontSize;
64   int LocalMemorySize;
65   int LDSBankCount;
66   unsigned MaxPrivateElementSize;
67 
68   // Possibly statically set by tablegen, but may want to be overridden.
69   bool FastFMAF32;
70   bool HalfRate64Ops;
71 
72   // Dynamially set bits that enable features.
73   bool FP32Denormals;
74   bool FP64Denormals;
75   bool FPExceptions;
76   bool FlatForGlobal;
77   bool UnalignedBufferAccess;
78   bool EnableXNACK;
79   bool DebuggerInsertNops;
80   bool DebuggerReserveRegs;
81   bool DebuggerEmitPrologue;
82 
83   // Used as options.
84   bool EnableVGPRSpilling;
85   bool EnablePromoteAlloca;
86   bool EnableLoadStoreOpt;
87   bool EnableUnsafeDSOffsetFolding;
88   bool EnableSIScheduler;
89   bool DumpCode;
90 
91   // Subtarget statically properties set by tablegen
92   bool FP64;
93   bool IsGCN;
94   bool GCN1Encoding;
95   bool GCN3Encoding;
96   bool CIInsts;
97   bool SGPRInitBug;
98   bool HasSMemRealTime;
99   bool Has16BitInsts;
100   bool FlatAddressSpace;
101   bool R600ALUInst;
102   bool CaymanISA;
103   bool CFALUBug;
104   bool HasVertexCache;
105   short TexVTXClauseSize;
106 
107   // Dummy feature to use for assembler in tablegen.
108   bool FeatureDisable;
109 
110   InstrItineraryData InstrItins;
111 
112 public:
113   AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
114                   const TargetMachine &TM);
115   virtual ~AMDGPUSubtarget();
116   AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
117                                                    StringRef GPU, StringRef FS);
118 
119   const AMDGPUInstrInfo *getInstrInfo() const override;
120   const AMDGPUFrameLowering *getFrameLowering() const override;
121   const AMDGPUTargetLowering *getTargetLowering() const override;
122   const AMDGPURegisterInfo *getRegisterInfo() const override;
123 
getInstrItineraryData()124   const InstrItineraryData *getInstrItineraryData() const override {
125     return &InstrItins;
126   }
127 
128   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
129 
isAmdHsaOS()130   bool isAmdHsaOS() const {
131     return TargetTriple.getOS() == Triple::AMDHSA;
132   }
133 
getGeneration()134   Generation getGeneration() const {
135     return Gen;
136   }
137 
getWavefrontSize()138   unsigned getWavefrontSize() const {
139     return WavefrontSize;
140   }
141 
getLocalMemorySize()142   int getLocalMemorySize() const {
143     return LocalMemorySize;
144   }
145 
getLDSBankCount()146   int getLDSBankCount() const {
147     return LDSBankCount;
148   }
149 
getMaxPrivateElementSize()150   unsigned getMaxPrivateElementSize() const {
151     return MaxPrivateElementSize;
152   }
153 
hasHWFP64()154   bool hasHWFP64() const {
155     return FP64;
156   }
157 
hasFastFMAF32()158   bool hasFastFMAF32() const {
159     return FastFMAF32;
160   }
161 
hasHalfRate64Ops()162   bool hasHalfRate64Ops() const {
163     return HalfRate64Ops;
164   }
165 
hasAddr64()166   bool hasAddr64() const {
167     return (getGeneration() < VOLCANIC_ISLANDS);
168   }
169 
hasBFE()170   bool hasBFE() const {
171     return (getGeneration() >= EVERGREEN);
172   }
173 
hasBFI()174   bool hasBFI() const {
175     return (getGeneration() >= EVERGREEN);
176   }
177 
hasBFM()178   bool hasBFM() const {
179     return hasBFE();
180   }
181 
hasBCNT(unsigned Size)182   bool hasBCNT(unsigned Size) const {
183     if (Size == 32)
184       return (getGeneration() >= EVERGREEN);
185 
186     if (Size == 64)
187       return (getGeneration() >= SOUTHERN_ISLANDS);
188 
189     return false;
190   }
191 
hasMulU24()192   bool hasMulU24() const {
193     return (getGeneration() >= EVERGREEN);
194   }
195 
hasMulI24()196   bool hasMulI24() const {
197     return (getGeneration() >= SOUTHERN_ISLANDS ||
198             hasCaymanISA());
199   }
200 
hasFFBL()201   bool hasFFBL() const {
202     return (getGeneration() >= EVERGREEN);
203   }
204 
hasFFBH()205   bool hasFFBH() const {
206     return (getGeneration() >= EVERGREEN);
207   }
208 
hasCARRY()209   bool hasCARRY() const {
210     return (getGeneration() >= EVERGREEN);
211   }
212 
hasBORROW()213   bool hasBORROW() const {
214     return (getGeneration() >= EVERGREEN);
215   }
216 
hasCaymanISA()217   bool hasCaymanISA() const {
218     return CaymanISA;
219   }
220 
isPromoteAllocaEnabled()221   bool isPromoteAllocaEnabled() const {
222     return EnablePromoteAlloca;
223   }
224 
unsafeDSOffsetFoldingEnabled()225   bool unsafeDSOffsetFoldingEnabled() const {
226     return EnableUnsafeDSOffsetFolding;
227   }
228 
dumpCode()229   bool dumpCode() const {
230     return DumpCode;
231   }
232 
233   /// Return the amount of LDS that can be used that will not restrict the
234   /// occupancy lower than WaveCount.
235   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
236 
237   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
238   /// the given LDS memory size is the only constraint.
239   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
240 
241 
hasFP32Denormals()242   bool hasFP32Denormals() const {
243     return FP32Denormals;
244   }
245 
hasFP64Denormals()246   bool hasFP64Denormals() const {
247     return FP64Denormals;
248   }
249 
hasFPExceptions()250   bool hasFPExceptions() const {
251     return FPExceptions;
252   }
253 
useFlatForGlobal()254   bool useFlatForGlobal() const {
255     return FlatForGlobal;
256   }
257 
hasUnalignedBufferAccess()258   bool hasUnalignedBufferAccess() const {
259     return UnalignedBufferAccess;
260   }
261 
isXNACKEnabled()262   bool isXNACKEnabled() const {
263     return EnableXNACK;
264   }
265 
getMaxWavesPerCU()266   unsigned getMaxWavesPerCU() const {
267     if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
268       return 10;
269 
270     // FIXME: Not sure what this is for other subtagets.
271     return 8;
272   }
273 
274   /// \brief Returns the offset in bytes from the start of the input buffer
275   ///        of the first explicit kernel argument.
getExplicitKernelArgOffset()276   unsigned getExplicitKernelArgOffset() const {
277     return isAmdHsaOS() ? 0 : 36;
278   }
279 
getStackAlignment()280   unsigned getStackAlignment() const {
281     // Scratch is allocated in 256 dword per wave blocks.
282     return 4 * 256 / getWavefrontSize();
283   }
284 
enableMachineScheduler()285   bool enableMachineScheduler() const override {
286     return true;
287   }
288 
enableSubRegLiveness()289   bool enableSubRegLiveness() const override {
290     return true;
291   }
292 };
293 
294 class R600Subtarget final : public AMDGPUSubtarget {
295 private:
296   R600InstrInfo InstrInfo;
297   R600FrameLowering FrameLowering;
298   R600TargetLowering TLInfo;
299 
300 public:
301   R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
302                 const TargetMachine &TM);
303 
getInstrInfo()304   const R600InstrInfo *getInstrInfo() const override {
305     return &InstrInfo;
306   }
307 
getFrameLowering()308   const R600FrameLowering *getFrameLowering() const override {
309     return &FrameLowering;
310   }
311 
getTargetLowering()312   const R600TargetLowering *getTargetLowering() const override {
313     return &TLInfo;
314   }
315 
getRegisterInfo()316   const R600RegisterInfo *getRegisterInfo() const override {
317     return &InstrInfo.getRegisterInfo();
318   }
319 
hasCFAluBug()320   bool hasCFAluBug() const {
321     return CFALUBug;
322   }
323 
hasVertexCache()324   bool hasVertexCache() const {
325     return HasVertexCache;
326   }
327 
getTexVTXClauseSize()328   short getTexVTXClauseSize() const {
329     return TexVTXClauseSize;
330   }
331 
332   unsigned getStackEntrySize() const;
333 };
334 
335 class SISubtarget final : public AMDGPUSubtarget {
336 public:
337   enum {
338     FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
339   };
340 
341 private:
342   SIInstrInfo InstrInfo;
343   SIFrameLowering FrameLowering;
344   SITargetLowering TLInfo;
345   std::unique_ptr<GISelAccessor> GISel;
346 
347 public:
348   SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
349               const TargetMachine &TM);
350 
getInstrInfo()351   const SIInstrInfo *getInstrInfo() const override {
352     return &InstrInfo;
353   }
354 
getFrameLowering()355   const SIFrameLowering *getFrameLowering() const override {
356     return &FrameLowering;
357   }
358 
getTargetLowering()359   const SITargetLowering *getTargetLowering() const override {
360     return &TLInfo;
361   }
362 
getCallLowering()363   const CallLowering *getCallLowering() const override {
364     assert(GISel && "Access to GlobalISel APIs not set");
365     return GISel->getCallLowering();
366   }
367 
getRegisterInfo()368   const SIRegisterInfo *getRegisterInfo() const override {
369     return &InstrInfo.getRegisterInfo();
370   }
371 
setGISelAccessor(GISelAccessor & GISel)372   void setGISelAccessor(GISelAccessor &GISel) {
373     this->GISel.reset(&GISel);
374   }
375 
376   void overrideSchedPolicy(MachineSchedPolicy &Policy,
377                            unsigned NumRegionInstrs) const override;
378 
379   bool isVGPRSpillingEnabled(const Function& F) const;
380 
381   unsigned getAmdKernelCodeChipID() const;
382 
383   AMDGPU::IsaVersion getIsaVersion() const;
384 
getMaxNumUserSGPRs()385   unsigned getMaxNumUserSGPRs() const {
386     return 16;
387   }
388 
hasFlatAddressSpace()389   bool hasFlatAddressSpace() const {
390     return FlatAddressSpace;
391   }
392 
hasSMemRealTime()393   bool hasSMemRealTime() const {
394     return HasSMemRealTime;
395   }
396 
has16BitInsts()397   bool has16BitInsts() const {
398     return Has16BitInsts;
399   }
400 
enableSIScheduler()401   bool enableSIScheduler() const {
402     return EnableSIScheduler;
403   }
404 
debuggerSupported()405   bool debuggerSupported() const {
406     return debuggerInsertNops() && debuggerReserveRegs() &&
407       debuggerEmitPrologue();
408   }
409 
debuggerInsertNops()410   bool debuggerInsertNops() const {
411     return DebuggerInsertNops;
412   }
413 
debuggerReserveRegs()414   bool debuggerReserveRegs() const {
415     return DebuggerReserveRegs;
416   }
417 
debuggerEmitPrologue()418   bool debuggerEmitPrologue() const {
419     return DebuggerEmitPrologue;
420   }
421 
loadStoreOptEnabled()422   bool loadStoreOptEnabled() const {
423     return EnableLoadStoreOpt;
424   }
425 
hasSGPRInitBug()426   bool hasSGPRInitBug() const {
427     return SGPRInitBug;
428   }
429 };
430 
431 
getInstrInfo()432 inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const {
433   if (getGeneration() >= SOUTHERN_ISLANDS)
434     return static_cast<const SISubtarget *>(this)->getInstrInfo();
435 
436   return static_cast<const R600Subtarget *>(this)->getInstrInfo();
437 }
438 
getFrameLowering()439 inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const  {
440   if (getGeneration() >= SOUTHERN_ISLANDS)
441     return static_cast<const SISubtarget *>(this)->getFrameLowering();
442 
443   return static_cast<const R600Subtarget *>(this)->getFrameLowering();
444 }
445 
getTargetLowering()446 inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const  {
447   if (getGeneration() >= SOUTHERN_ISLANDS)
448     return static_cast<const SISubtarget *>(this)->getTargetLowering();
449 
450   return static_cast<const R600Subtarget *>(this)->getTargetLowering();
451 }
452 
getRegisterInfo()453 inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const  {
454   if (getGeneration() >= SOUTHERN_ISLANDS)
455     return static_cast<const SISubtarget *>(this)->getRegisterInfo();
456 
457   return static_cast<const R600Subtarget *>(this)->getRegisterInfo();
458 }
459 
460 } // End namespace llvm
461 
462 #endif
463