1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUCallLowering.h"
20 #include "R600FrameLowering.h"
21 #include "R600ISelLowering.h"
22 #include "R600InstrInfo.h"
23 #include "SIFrameLowering.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "Utils/AMDGPUBaseInfo.h"
27 #include "llvm/ADT/Triple.h"
28 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
29 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
30 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
31 #include "llvm/CodeGen/MachineFunction.h"
32 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
33 #include "llvm/MC/MCInstrItineraries.h"
34 #include "llvm/Support/MathExtras.h"
35 #include <cassert>
36 #include <cstdint>
37 #include <memory>
38 #include <utility>
39 
40 #define GET_SUBTARGETINFO_HEADER
41 #include "AMDGPUGenSubtargetInfo.inc"
42 #define GET_SUBTARGETINFO_HEADER
43 #include "R600GenSubtargetInfo.inc"
44 
45 namespace llvm {
46 
47 class StringRef;
48 
49 class AMDGPUSubtarget {
50 public:
51   enum Generation {
52     R600 = 0,
53     R700 = 1,
54     EVERGREEN = 2,
55     NORTHERN_ISLANDS = 3,
56     SOUTHERN_ISLANDS = 4,
57     SEA_ISLANDS = 5,
58     VOLCANIC_ISLANDS = 6,
59     GFX9 = 7
60   };
61 
62 private:
63   Triple TargetTriple;
64 
65 protected:
66   const FeatureBitset &SubtargetFeatureBits;
67   bool Has16BitInsts;
68   bool HasMadMixInsts;
69   bool FP32Denormals;
70   bool FPExceptions;
71   bool HasSDWA;
72   bool HasVOP3PInsts;
73   bool HasMulI24;
74   bool HasMulU24;
75   bool HasFminFmaxLegacy;
76   bool EnablePromoteAlloca;
77   int LocalMemorySize;
78   unsigned WavefrontSize;
79 
80 public:
81   AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
82 
83   static const AMDGPUSubtarget &get(const MachineFunction &MF);
84   static const AMDGPUSubtarget &get(const TargetMachine &TM,
85                                     const Function &F);
86 
87   /// \returns Default range flat work group size for a calling convention.
88   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
89 
90   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
91   /// for function \p F, or minimum/maximum flat work group sizes explicitly
92   /// requested using "amdgpu-flat-work-group-size" attribute attached to
93   /// function \p F.
94   ///
95   /// \returns Subtarget's default values if explicitly requested values cannot
96   /// be converted to integer, or violate subtarget's specifications.
97   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
98 
99   /// \returns Subtarget's default pair of minimum/maximum number of waves per
100   /// execution unit for function \p F, or minimum/maximum number of waves per
101   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
102   /// attached to function \p F.
103   ///
104   /// \returns Subtarget's default values if explicitly requested values cannot
105   /// be converted to integer, violate subtarget's specifications, or are not
106   /// compatible with minimum/maximum number of waves limited by flat work group
107   /// size, register usage, and/or lds usage.
108   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
109 
110   /// Return the amount of LDS that can be used that will not restrict the
111   /// occupancy lower than WaveCount.
112   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
113                                            const Function &) const;
114 
115   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
116   /// the given LDS memory size is the only constraint.
117   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
118 
119   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
120 
isAmdHsaOS()121   bool isAmdHsaOS() const {
122     return TargetTriple.getOS() == Triple::AMDHSA;
123   }
124 
isAmdPalOS()125   bool isAmdPalOS() const {
126     return TargetTriple.getOS() == Triple::AMDPAL;
127   }
128 
isMesa3DOS()129   bool isMesa3DOS() const {
130     return TargetTriple.getOS() == Triple::Mesa3D;
131   }
132 
isMesaKernel(const Function & F)133   bool isMesaKernel(const Function &F) const {
134     return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
135   }
136 
isAmdCodeObjectV2(const Function & F)137   bool isAmdCodeObjectV2(const Function &F) const {
138     return isAmdHsaOS() || isMesaKernel(F);
139   }
140 
has16BitInsts()141   bool has16BitInsts() const {
142     return Has16BitInsts;
143   }
144 
hasMadMixInsts()145   bool hasMadMixInsts() const {
146     return HasMadMixInsts;
147   }
148 
hasFP32Denormals()149   bool hasFP32Denormals() const {
150     return FP32Denormals;
151   }
152 
hasFPExceptions()153   bool hasFPExceptions() const {
154     return FPExceptions;
155   }
156 
hasSDWA()157   bool hasSDWA() const {
158     return HasSDWA;
159   }
160 
hasVOP3PInsts()161   bool hasVOP3PInsts() const {
162     return HasVOP3PInsts;
163   }
164 
hasMulI24()165   bool hasMulI24() const {
166     return HasMulI24;
167   }
168 
hasMulU24()169   bool hasMulU24() const {
170     return HasMulU24;
171   }
172 
hasFminFmaxLegacy()173   bool hasFminFmaxLegacy() const {
174     return HasFminFmaxLegacy;
175   }
176 
isPromoteAllocaEnabled()177   bool isPromoteAllocaEnabled() const {
178     return EnablePromoteAlloca;
179   }
180 
getWavefrontSize()181   unsigned getWavefrontSize() const {
182     return WavefrontSize;
183   }
184 
getLocalMemorySize()185   int getLocalMemorySize() const {
186     return LocalMemorySize;
187   }
188 
getAlignmentForImplicitArgPtr()189   unsigned getAlignmentForImplicitArgPtr() const {
190     return isAmdHsaOS() ? 8 : 4;
191   }
192 
193   /// Returns the offset in bytes from the start of the input buffer
194   ///        of the first explicit kernel argument.
getExplicitKernelArgOffset(const Function & F)195   unsigned getExplicitKernelArgOffset(const Function &F) const {
196     return isAmdCodeObjectV2(F) ? 0 : 36;
197   }
198 
199   /// \returns Maximum number of work groups per compute unit supported by the
200   /// subtarget and limited by given \p FlatWorkGroupSize.
getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize)201   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
202     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
203                                                   FlatWorkGroupSize);
204   }
205 
206   /// \returns Minimum flat work group size supported by the subtarget.
getMinFlatWorkGroupSize()207   unsigned getMinFlatWorkGroupSize() const {
208     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
209   }
210 
211   /// \returns Maximum flat work group size supported by the subtarget.
getMaxFlatWorkGroupSize()212   unsigned getMaxFlatWorkGroupSize() const {
213     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
214   }
215 
216   /// \returns Maximum number of waves per execution unit supported by the
217   /// subtarget and limited by given \p FlatWorkGroupSize.
getMaxWavesPerEU(unsigned FlatWorkGroupSize)218   unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
219     return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
220                                              FlatWorkGroupSize);
221   }
222 
223   /// \returns Minimum number of waves per execution unit supported by the
224   /// subtarget.
getMinWavesPerEU()225   unsigned getMinWavesPerEU() const {
226     return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
227   }
228 
getMaxWavesPerEU()229   unsigned getMaxWavesPerEU() const { return 10; }
230 
231   /// Creates value range metadata on an workitemid.* inrinsic call or load.
232   bool makeLIDRangeMetadata(Instruction *I) const;
233 
234   /// \returns Number of bytes of arguments that are passed to a shader or
235   /// kernel in addition to the explicit ones declared for the function.
getImplicitArgNumBytes(const Function & F)236   unsigned getImplicitArgNumBytes(const Function &F) const {
237     if (isMesaKernel(F))
238       return 16;
239     return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
240   }
241   uint64_t getExplicitKernArgSize(const Function &F,
242                                   unsigned &MaxAlign) const;
243   unsigned getKernArgSegmentSize(const Function &F,
244                                  unsigned &MaxAlign) const;
245 
~AMDGPUSubtarget()246   virtual ~AMDGPUSubtarget() {}
247 };
248 
249 class GCNSubtarget : public AMDGPUGenSubtargetInfo,
250                      public AMDGPUSubtarget {
251 public:
252   enum {
253     ISAVersion0_0_0,
254     ISAVersion6_0_0,
255     ISAVersion6_0_1,
256     ISAVersion7_0_0,
257     ISAVersion7_0_1,
258     ISAVersion7_0_2,
259     ISAVersion7_0_3,
260     ISAVersion7_0_4,
261     ISAVersion8_0_1,
262     ISAVersion8_0_2,
263     ISAVersion8_0_3,
264     ISAVersion8_1_0,
265     ISAVersion9_0_0,
266     ISAVersion9_0_2,
267     ISAVersion9_0_4,
268     ISAVersion9_0_6,
269   };
270 
271   enum TrapHandlerAbi {
272     TrapHandlerAbiNone = 0,
273     TrapHandlerAbiHsa = 1
274   };
275 
276   enum TrapID {
277     TrapIDHardwareReserved = 0,
278     TrapIDHSADebugTrap = 1,
279     TrapIDLLVMTrap = 2,
280     TrapIDLLVMDebugTrap = 3,
281     TrapIDDebugBreakpoint = 7,
282     TrapIDDebugReserved8 = 8,
283     TrapIDDebugReservedFE = 0xfe,
284     TrapIDDebugReservedFF = 0xff
285   };
286 
287   enum TrapRegValues {
288     LLVMTrapHandlerRegValue = 1
289   };
290 
291 private:
292   /// GlobalISel related APIs.
293   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
294   std::unique_ptr<InstructionSelector> InstSelector;
295   std::unique_ptr<LegalizerInfo> Legalizer;
296   std::unique_ptr<RegisterBankInfo> RegBankInfo;
297 
298 protected:
299   // Basic subtarget description.
300   Triple TargetTriple;
301   unsigned Gen;
302   unsigned IsaVersion;
303   int LDSBankCount;
304   unsigned MaxPrivateElementSize;
305 
306   // Possibly statically set by tablegen, but may want to be overridden.
307   bool FastFMAF32;
308   bool HalfRate64Ops;
309 
310   // Dynamially set bits that enable features.
311   bool FP64FP16Denormals;
312   bool DX10Clamp;
313   bool FlatForGlobal;
314   bool AutoWaitcntBeforeBarrier;
315   bool CodeObjectV3;
316   bool UnalignedScratchAccess;
317   bool UnalignedBufferAccess;
318   bool HasApertureRegs;
319   bool EnableXNACK;
320   bool TrapHandler;
321   bool DebuggerInsertNops;
322   bool DebuggerEmitPrologue;
323 
324   // Used as options.
325   bool EnableHugePrivateBuffer;
326   bool EnableVGPRSpilling;
327   bool EnableLoadStoreOpt;
328   bool EnableUnsafeDSOffsetFolding;
329   bool EnableSIScheduler;
330   bool EnableDS128;
331   bool DumpCode;
332 
333   // Subtarget statically properties set by tablegen
334   bool FP64;
335   bool FMA;
336   bool MIMG_R128;
337   bool IsGCN;
338   bool GCN3Encoding;
339   bool CIInsts;
340   bool GFX9Insts;
341   bool SGPRInitBug;
342   bool HasSMemRealTime;
343   bool HasIntClamp;
344   bool HasFmaMixInsts;
345   bool HasMovrel;
346   bool HasVGPRIndexMode;
347   bool HasScalarStores;
348   bool HasScalarAtomics;
349   bool HasInv2PiInlineImm;
350   bool HasSDWAOmod;
351   bool HasSDWAScalar;
352   bool HasSDWASdst;
353   bool HasSDWAMac;
354   bool HasSDWAOutModsVOPC;
355   bool HasDPP;
356   bool HasDLInsts;
357   bool D16PreservesUnusedBits;
358   bool FlatAddressSpace;
359   bool FlatInstOffsets;
360   bool FlatGlobalInsts;
361   bool FlatScratchInsts;
362   bool AddNoCarryInsts;
363   bool HasUnpackedD16VMem;
364   bool R600ALUInst;
365   bool CaymanISA;
366   bool CFALUBug;
367   bool HasVertexCache;
368   short TexVTXClauseSize;
369   bool ScalarizeGlobal;
370 
371   // Dummy feature to use for assembler in tablegen.
372   bool FeatureDisable;
373 
374   SelectionDAGTargetInfo TSInfo;
375   AMDGPUAS AS;
376 private:
377   SIInstrInfo InstrInfo;
378   SITargetLowering TLInfo;
379   SIFrameLowering FrameLowering;
380 
381 public:
382   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
383                const GCNTargetMachine &TM);
384   ~GCNSubtarget() override;
385 
386   GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
387                                                    StringRef GPU, StringRef FS);
388 
getInstrInfo()389   const SIInstrInfo *getInstrInfo() const override {
390     return &InstrInfo;
391   }
392 
getFrameLowering()393   const SIFrameLowering *getFrameLowering() const override {
394     return &FrameLowering;
395   }
396 
getTargetLowering()397   const SITargetLowering *getTargetLowering() const override {
398     return &TLInfo;
399   }
400 
getRegisterInfo()401   const SIRegisterInfo *getRegisterInfo() const override {
402     return &InstrInfo.getRegisterInfo();
403   }
404 
getCallLowering()405   const CallLowering *getCallLowering() const override {
406     return CallLoweringInfo.get();
407   }
408 
getInstructionSelector()409   const InstructionSelector *getInstructionSelector() const override {
410     return InstSelector.get();
411   }
412 
getLegalizerInfo()413   const LegalizerInfo *getLegalizerInfo() const override {
414     return Legalizer.get();
415   }
416 
getRegBankInfo()417   const RegisterBankInfo *getRegBankInfo() const override {
418     return RegBankInfo.get();
419   }
420 
421   // Nothing implemented, just prevent crashes on use.
getSelectionDAGInfo()422   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
423     return &TSInfo;
424   }
425 
426   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
427 
getGeneration()428   Generation getGeneration() const {
429     return (Generation)Gen;
430   }
431 
getWavefrontSizeLog2()432   unsigned getWavefrontSizeLog2() const {
433     return Log2_32(WavefrontSize);
434   }
435 
getLDSBankCount()436   int getLDSBankCount() const {
437     return LDSBankCount;
438   }
439 
getMaxPrivateElementSize()440   unsigned getMaxPrivateElementSize() const {
441     return MaxPrivateElementSize;
442   }
443 
getAMDGPUAS()444   AMDGPUAS getAMDGPUAS() const {
445     return AS;
446   }
447 
hasIntClamp()448   bool hasIntClamp() const {
449     return HasIntClamp;
450   }
451 
hasFP64()452   bool hasFP64() const {
453     return FP64;
454   }
455 
hasMIMG_R128()456   bool hasMIMG_R128() const {
457     return MIMG_R128;
458   }
459 
hasHWFP64()460   bool hasHWFP64() const {
461     return FP64;
462   }
463 
hasFastFMAF32()464   bool hasFastFMAF32() const {
465     return FastFMAF32;
466   }
467 
hasHalfRate64Ops()468   bool hasHalfRate64Ops() const {
469     return HalfRate64Ops;
470   }
471 
hasAddr64()472   bool hasAddr64() const {
473     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
474   }
475 
hasBFE()476   bool hasBFE() const {
477     return true;
478   }
479 
hasBFI()480   bool hasBFI() const {
481     return true;
482   }
483 
hasBFM()484   bool hasBFM() const {
485     return hasBFE();
486   }
487 
hasBCNT(unsigned Size)488   bool hasBCNT(unsigned Size) const {
489     return true;
490   }
491 
hasFFBL()492   bool hasFFBL() const {
493     return true;
494   }
495 
hasFFBH()496   bool hasFFBH() const {
497     return true;
498   }
499 
hasMed3_16()500   bool hasMed3_16() const {
501     return getGeneration() >= AMDGPUSubtarget::GFX9;
502   }
503 
hasMin3Max3_16()504   bool hasMin3Max3_16() const {
505     return getGeneration() >= AMDGPUSubtarget::GFX9;
506   }
507 
hasFmaMixInsts()508   bool hasFmaMixInsts() const {
509     return HasFmaMixInsts;
510   }
511 
hasCARRY()512   bool hasCARRY() const {
513     return true;
514   }
515 
hasFMA()516   bool hasFMA() const {
517     return FMA;
518   }
519 
getTrapHandlerAbi()520   TrapHandlerAbi getTrapHandlerAbi() const {
521     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
522   }
523 
enableHugePrivateBuffer()524   bool enableHugePrivateBuffer() const {
525     return EnableHugePrivateBuffer;
526   }
527 
unsafeDSOffsetFoldingEnabled()528   bool unsafeDSOffsetFoldingEnabled() const {
529     return EnableUnsafeDSOffsetFolding;
530   }
531 
dumpCode()532   bool dumpCode() const {
533     return DumpCode;
534   }
535 
536   /// Return the amount of LDS that can be used that will not restrict the
537   /// occupancy lower than WaveCount.
538   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
539                                            const Function &) const;
540 
hasFP16Denormals()541   bool hasFP16Denormals() const {
542     return FP64FP16Denormals;
543   }
544 
hasFP64Denormals()545   bool hasFP64Denormals() const {
546     return FP64FP16Denormals;
547   }
548 
supportsMinMaxDenormModes()549   bool supportsMinMaxDenormModes() const {
550     return getGeneration() >= AMDGPUSubtarget::GFX9;
551   }
552 
enableDX10Clamp()553   bool enableDX10Clamp() const {
554     return DX10Clamp;
555   }
556 
enableIEEEBit(const MachineFunction & MF)557   bool enableIEEEBit(const MachineFunction &MF) const {
558     return AMDGPU::isCompute(MF.getFunction().getCallingConv());
559   }
560 
useFlatForGlobal()561   bool useFlatForGlobal() const {
562     return FlatForGlobal;
563   }
564 
565   /// \returns If target supports ds_read/write_b128 and user enables generation
566   /// of ds_read/write_b128.
useDS128()567   bool useDS128() const {
568     return CIInsts && EnableDS128;
569   }
570 
571   /// \returns If MUBUF instructions always perform range checking, even for
572   /// buffer resources used for private memory access.
privateMemoryResourceIsRangeChecked()573   bool privateMemoryResourceIsRangeChecked() const {
574     return getGeneration() < AMDGPUSubtarget::GFX9;
575   }
576 
hasAutoWaitcntBeforeBarrier()577   bool hasAutoWaitcntBeforeBarrier() const {
578     return AutoWaitcntBeforeBarrier;
579   }
580 
hasCodeObjectV3()581   bool hasCodeObjectV3() const {
582     return CodeObjectV3;
583   }
584 
hasUnalignedBufferAccess()585   bool hasUnalignedBufferAccess() const {
586     return UnalignedBufferAccess;
587   }
588 
hasUnalignedScratchAccess()589   bool hasUnalignedScratchAccess() const {
590     return UnalignedScratchAccess;
591   }
592 
hasApertureRegs()593   bool hasApertureRegs() const {
594     return HasApertureRegs;
595   }
596 
isTrapHandlerEnabled()597   bool isTrapHandlerEnabled() const {
598     return TrapHandler;
599   }
600 
isXNACKEnabled()601   bool isXNACKEnabled() const {
602     return EnableXNACK;
603   }
604 
hasFlatAddressSpace()605   bool hasFlatAddressSpace() const {
606     return FlatAddressSpace;
607   }
608 
hasFlatInstOffsets()609   bool hasFlatInstOffsets() const {
610     return FlatInstOffsets;
611   }
612 
hasFlatGlobalInsts()613   bool hasFlatGlobalInsts() const {
614     return FlatGlobalInsts;
615   }
616 
hasFlatScratchInsts()617   bool hasFlatScratchInsts() const {
618     return FlatScratchInsts;
619   }
620 
hasFlatLgkmVMemCountInOrder()621   bool hasFlatLgkmVMemCountInOrder() const {
622     return getGeneration() > GFX9;
623   }
624 
hasD16LoadStore()625   bool hasD16LoadStore() const {
626     return getGeneration() >= GFX9;
627   }
628 
629   /// Return if most LDS instructions have an m0 use that require m0 to be
630   /// iniitalized.
ldsRequiresM0Init()631   bool ldsRequiresM0Init() const {
632     return getGeneration() < GFX9;
633   }
634 
hasAddNoCarry()635   bool hasAddNoCarry() const {
636     return AddNoCarryInsts;
637   }
638 
hasUnpackedD16VMem()639   bool hasUnpackedD16VMem() const {
640     return HasUnpackedD16VMem;
641   }
642 
643   // Covers VS/PS/CS graphics shaders
isMesaGfxShader(const Function & F)644   bool isMesaGfxShader(const Function &F) const {
645     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
646   }
647 
hasMad64_32()648   bool hasMad64_32() const {
649     return getGeneration() >= SEA_ISLANDS;
650   }
651 
hasSDWAOmod()652   bool hasSDWAOmod() const {
653     return HasSDWAOmod;
654   }
655 
hasSDWAScalar()656   bool hasSDWAScalar() const {
657     return HasSDWAScalar;
658   }
659 
hasSDWASdst()660   bool hasSDWASdst() const {
661     return HasSDWASdst;
662   }
663 
hasSDWAMac()664   bool hasSDWAMac() const {
665     return HasSDWAMac;
666   }
667 
hasSDWAOutModsVOPC()668   bool hasSDWAOutModsVOPC() const {
669     return HasSDWAOutModsVOPC;
670   }
671 
vmemWriteNeedsExpWaitcnt()672   bool vmemWriteNeedsExpWaitcnt() const {
673     return getGeneration() < SEA_ISLANDS;
674   }
675 
hasDLInsts()676   bool hasDLInsts() const {
677     return HasDLInsts;
678   }
679 
d16PreservesUnusedBits()680   bool d16PreservesUnusedBits() const {
681     return D16PreservesUnusedBits;
682   }
683 
684   // Scratch is allocated in 256 dword per wave blocks for the entire
685   // wavefront. When viewed from the perspecive of an arbitrary workitem, this
686   // is 4-byte aligned.
687   //
688   // Only 4-byte alignment is really needed to access anything. Transformations
689   // on the pointer value itself may rely on the alignment / known low bits of
690   // the pointer. Set this to something above the minimum to avoid needing
691   // dynamic realignment in common cases.
getStackAlignment()692   unsigned getStackAlignment() const {
693     return 16;
694   }
695 
enableMachineScheduler()696   bool enableMachineScheduler() const override {
697     return true;
698   }
699 
enableSubRegLiveness()700   bool enableSubRegLiveness() const override {
701     return true;
702   }
703 
setScalarizeGlobalBehavior(bool b)704   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
getScalarizeGlobalBehavior()705   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
706 
707   /// \returns Number of execution units per compute unit supported by the
708   /// subtarget.
getEUsPerCU()709   unsigned getEUsPerCU() const {
710     return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
711   }
712 
713   /// \returns Maximum number of waves per compute unit supported by the
714   /// subtarget without any kind of limitation.
getMaxWavesPerCU()715   unsigned getMaxWavesPerCU() const {
716     return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
717   }
718 
719   /// \returns Maximum number of waves per compute unit supported by the
720   /// subtarget and limited by given \p FlatWorkGroupSize.
getMaxWavesPerCU(unsigned FlatWorkGroupSize)721   unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
722     return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
723                                              FlatWorkGroupSize);
724   }
725 
726   /// \returns Maximum number of waves per execution unit supported by the
727   /// subtarget without any kind of limitation.
getMaxWavesPerEU()728   unsigned getMaxWavesPerEU() const {
729     return AMDGPU::IsaInfo::getMaxWavesPerEU();
730   }
731 
732   /// \returns Number of waves per work group supported by the subtarget and
733   /// limited by given \p FlatWorkGroupSize.
getWavesPerWorkGroup(unsigned FlatWorkGroupSize)734   unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
735     return AMDGPU::IsaInfo::getWavesPerWorkGroup(
736         MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
737   }
738 
739   // static wrappers
740   static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
741 
742   // XXX - Why is this here if it isn't in the default pass set?
enableEarlyIfConversion()743   bool enableEarlyIfConversion() const override {
744     return true;
745   }
746 
747   void overrideSchedPolicy(MachineSchedPolicy &Policy,
748                            unsigned NumRegionInstrs) const override;
749 
750   bool isVGPRSpillingEnabled(const Function &F) const;
751 
getMaxNumUserSGPRs()752   unsigned getMaxNumUserSGPRs() const {
753     return 16;
754   }
755 
hasSMemRealTime()756   bool hasSMemRealTime() const {
757     return HasSMemRealTime;
758   }
759 
hasMovrel()760   bool hasMovrel() const {
761     return HasMovrel;
762   }
763 
hasVGPRIndexMode()764   bool hasVGPRIndexMode() const {
765     return HasVGPRIndexMode;
766   }
767 
useVGPRIndexMode(bool UserEnable)768   bool useVGPRIndexMode(bool UserEnable) const {
769     return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
770   }
771 
hasScalarCompareEq64()772   bool hasScalarCompareEq64() const {
773     return getGeneration() >= VOLCANIC_ISLANDS;
774   }
775 
hasScalarStores()776   bool hasScalarStores() const {
777     return HasScalarStores;
778   }
779 
hasScalarAtomics()780   bool hasScalarAtomics() const {
781     return HasScalarAtomics;
782   }
783 
hasInv2PiInlineImm()784   bool hasInv2PiInlineImm() const {
785     return HasInv2PiInlineImm;
786   }
787 
hasDPP()788   bool hasDPP() const {
789     return HasDPP;
790   }
791 
enableSIScheduler()792   bool enableSIScheduler() const {
793     return EnableSIScheduler;
794   }
795 
debuggerSupported()796   bool debuggerSupported() const {
797     return debuggerInsertNops() && debuggerEmitPrologue();
798   }
799 
debuggerInsertNops()800   bool debuggerInsertNops() const {
801     return DebuggerInsertNops;
802   }
803 
debuggerEmitPrologue()804   bool debuggerEmitPrologue() const {
805     return DebuggerEmitPrologue;
806   }
807 
loadStoreOptEnabled()808   bool loadStoreOptEnabled() const {
809     return EnableLoadStoreOpt;
810   }
811 
hasSGPRInitBug()812   bool hasSGPRInitBug() const {
813     return SGPRInitBug;
814   }
815 
has12DWordStoreHazard()816   bool has12DWordStoreHazard() const {
817     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
818   }
819 
hasSMovFedHazard()820   bool hasSMovFedHazard() const {
821     return getGeneration() >= AMDGPUSubtarget::GFX9;
822   }
823 
hasReadM0MovRelInterpHazard()824   bool hasReadM0MovRelInterpHazard() const {
825     return getGeneration() >= AMDGPUSubtarget::GFX9;
826   }
827 
hasReadM0SendMsgHazard()828   bool hasReadM0SendMsgHazard() const {
829     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
830   }
831 
832   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
833   /// SGPRs
834   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
835 
836   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
837   /// VGPRs
838   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
839 
840   /// \returns true if the flat_scratch register should be initialized with the
841   /// pointer to the wave's scratch memory rather than a size and offset.
flatScratchIsPointer()842   bool flatScratchIsPointer() const {
843     return getGeneration() >= AMDGPUSubtarget::GFX9;
844   }
845 
846   /// \returns true if the machine has merged shaders in which s0-s7 are
847   /// reserved by the hardware and user SGPRs start at s8
hasMergedShaders()848   bool hasMergedShaders() const {
849     return getGeneration() >= GFX9;
850   }
851 
852   /// \returns SGPR allocation granularity supported by the subtarget.
getSGPRAllocGranule()853   unsigned getSGPRAllocGranule() const {
854     return AMDGPU::IsaInfo::getSGPRAllocGranule(
855         MCSubtargetInfo::getFeatureBits());
856   }
857 
858   /// \returns SGPR encoding granularity supported by the subtarget.
getSGPREncodingGranule()859   unsigned getSGPREncodingGranule() const {
860     return AMDGPU::IsaInfo::getSGPREncodingGranule(
861         MCSubtargetInfo::getFeatureBits());
862   }
863 
864   /// \returns Total number of SGPRs supported by the subtarget.
getTotalNumSGPRs()865   unsigned getTotalNumSGPRs() const {
866     return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
867   }
868 
869   /// \returns Addressable number of SGPRs supported by the subtarget.
getAddressableNumSGPRs()870   unsigned getAddressableNumSGPRs() const {
871     return AMDGPU::IsaInfo::getAddressableNumSGPRs(
872         MCSubtargetInfo::getFeatureBits());
873   }
874 
875   /// \returns Minimum number of SGPRs that meets the given number of waves per
876   /// execution unit requirement supported by the subtarget.
getMinNumSGPRs(unsigned WavesPerEU)877   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
878     return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
879                                            WavesPerEU);
880   }
881 
882   /// \returns Maximum number of SGPRs that meets the given number of waves per
883   /// execution unit requirement supported by the subtarget.
getMaxNumSGPRs(unsigned WavesPerEU,bool Addressable)884   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
885     return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
886                                            WavesPerEU, Addressable);
887   }
888 
889   /// \returns Reserved number of SGPRs for given function \p MF.
890   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
891 
892   /// \returns Maximum number of SGPRs that meets number of waves per execution
893   /// unit requirement for function \p MF, or number of SGPRs explicitly
894   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
895   ///
896   /// \returns Value that meets number of waves per execution unit requirement
897   /// if explicitly requested value cannot be converted to integer, violates
898   /// subtarget's specifications, or does not meet number of waves per execution
899   /// unit requirement.
900   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
901 
902   /// \returns VGPR allocation granularity supported by the subtarget.
getVGPRAllocGranule()903   unsigned getVGPRAllocGranule() const {
904     return AMDGPU::IsaInfo::getVGPRAllocGranule(
905         MCSubtargetInfo::getFeatureBits());
906   }
907 
908   /// \returns VGPR encoding granularity supported by the subtarget.
getVGPREncodingGranule()909   unsigned getVGPREncodingGranule() const {
910     return AMDGPU::IsaInfo::getVGPREncodingGranule(
911         MCSubtargetInfo::getFeatureBits());
912   }
913 
914   /// \returns Total number of VGPRs supported by the subtarget.
getTotalNumVGPRs()915   unsigned getTotalNumVGPRs() const {
916     return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
917   }
918 
919   /// \returns Addressable number of VGPRs supported by the subtarget.
getAddressableNumVGPRs()920   unsigned getAddressableNumVGPRs() const {
921     return AMDGPU::IsaInfo::getAddressableNumVGPRs(
922         MCSubtargetInfo::getFeatureBits());
923   }
924 
925   /// \returns Minimum number of VGPRs that meets given number of waves per
926   /// execution unit requirement supported by the subtarget.
getMinNumVGPRs(unsigned WavesPerEU)927   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
928     return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
929                                            WavesPerEU);
930   }
931 
932   /// \returns Maximum number of VGPRs that meets given number of waves per
933   /// execution unit requirement supported by the subtarget.
getMaxNumVGPRs(unsigned WavesPerEU)934   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
935     return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
936                                            WavesPerEU);
937   }
938 
939   /// \returns Maximum number of VGPRs that meets number of waves per execution
940   /// unit requirement for function \p MF, or number of VGPRs explicitly
941   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
942   ///
943   /// \returns Value that meets number of waves per execution unit requirement
944   /// if explicitly requested value cannot be converted to integer, violates
945   /// subtarget's specifications, or does not meet number of waves per execution
946   /// unit requirement.
947   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
948 
949   void getPostRAMutations(
950       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
951       const override;
952 };
953 
954 class R600Subtarget final : public R600GenSubtargetInfo,
955                             public AMDGPUSubtarget {
956 private:
957   R600InstrInfo InstrInfo;
958   R600FrameLowering FrameLowering;
959   bool FMA;
960   bool CaymanISA;
961   bool CFALUBug;
962   bool DX10Clamp;
963   bool HasVertexCache;
964   bool R600ALUInst;
965   bool FP64;
966   short TexVTXClauseSize;
967   Generation Gen;
968   R600TargetLowering TLInfo;
969   InstrItineraryData InstrItins;
970   SelectionDAGTargetInfo TSInfo;
971   AMDGPUAS AS;
972 
973 public:
974   R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
975                 const TargetMachine &TM);
976 
getInstrInfo()977   const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
978 
getFrameLowering()979   const R600FrameLowering *getFrameLowering() const override {
980     return &FrameLowering;
981   }
982 
getTargetLowering()983   const R600TargetLowering *getTargetLowering() const override {
984     return &TLInfo;
985   }
986 
getRegisterInfo()987   const R600RegisterInfo *getRegisterInfo() const override {
988     return &InstrInfo.getRegisterInfo();
989   }
990 
getInstrItineraryData()991   const InstrItineraryData *getInstrItineraryData() const override {
992     return &InstrItins;
993   }
994 
995   // Nothing implemented, just prevent crashes on use.
getSelectionDAGInfo()996   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
997     return &TSInfo;
998   }
999 
1000   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1001 
getGeneration()1002   Generation getGeneration() const {
1003     return Gen;
1004   }
1005 
getStackAlignment()1006   unsigned getStackAlignment() const {
1007     return 4;
1008   }
1009 
1010   R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1011                                                  StringRef GPU, StringRef FS);
1012 
hasBFE()1013   bool hasBFE() const {
1014     return (getGeneration() >= EVERGREEN);
1015   }
1016 
hasBFI()1017   bool hasBFI() const {
1018     return (getGeneration() >= EVERGREEN);
1019   }
1020 
hasBCNT(unsigned Size)1021   bool hasBCNT(unsigned Size) const {
1022     if (Size == 32)
1023       return (getGeneration() >= EVERGREEN);
1024 
1025     return false;
1026   }
1027 
hasBORROW()1028   bool hasBORROW() const {
1029     return (getGeneration() >= EVERGREEN);
1030   }
1031 
hasCARRY()1032   bool hasCARRY() const {
1033     return (getGeneration() >= EVERGREEN);
1034   }
1035 
hasCaymanISA()1036   bool hasCaymanISA() const {
1037     return CaymanISA;
1038   }
1039 
hasFFBL()1040   bool hasFFBL() const {
1041     return (getGeneration() >= EVERGREEN);
1042   }
1043 
hasFFBH()1044   bool hasFFBH() const {
1045     return (getGeneration() >= EVERGREEN);
1046   }
1047 
hasFMA()1048   bool hasFMA() const { return FMA; }
1049 
hasCFAluBug()1050   bool hasCFAluBug() const { return CFALUBug; }
1051 
hasVertexCache()1052   bool hasVertexCache() const { return HasVertexCache; }
1053 
getTexVTXClauseSize()1054   short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1055 
getAMDGPUAS()1056   AMDGPUAS getAMDGPUAS() const { return AS; }
1057 
enableMachineScheduler()1058   bool enableMachineScheduler() const override {
1059     return true;
1060   }
1061 
enableSubRegLiveness()1062   bool enableSubRegLiveness() const override {
1063     return true;
1064   }
1065 };
1066 
1067 } // end namespace llvm
1068 
1069 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
1070