1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
12 
13 #include "AMDGPU.h"
14 #include "AMDKernelCodeT.h"
15 #include "SIDefines.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/IR/CallingConv.h"
18 #include "llvm/MC/MCInstrDesc.h"
19 #include "llvm/Support/AMDHSAKernelDescriptor.h"
20 #include "llvm/Support/Compiler.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include <cstdint>
23 #include <string>
24 #include <utility>
25 
26 namespace llvm {
27 
28 class Argument;
29 class FeatureBitset;
30 class Function;
31 class GlobalValue;
32 class MCContext;
33 class MCRegisterClass;
34 class MCRegisterInfo;
35 class MCSection;
36 class MCSubtargetInfo;
37 class MachineMemOperand;
38 class Triple;
39 
40 namespace AMDGPU {
41 
42 #define GET_MIMGBaseOpcode_DECL
43 #define GET_MIMGDim_DECL
44 #define GET_MIMGEncoding_DECL
45 #define GET_MIMGLZMapping_DECL
46 #include "AMDGPUGenSearchableTables.inc"
47 
48 namespace IsaInfo {
49 
50 enum {
51   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
52   // doesn't spill SGPRs as much as when 80 is set.
53   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
54   TRAP_NUM_SGPRS = 16
55 };
56 
57 /// Instruction set architecture version.
58 struct IsaVersion {
59   unsigned Major;
60   unsigned Minor;
61   unsigned Stepping;
62 };
63 
64 /// \returns Isa version for given subtarget \p Features.
65 IsaVersion getIsaVersion(const FeatureBitset &Features);
66 
67 /// Streams isa version string for given subtarget \p STI into \p Stream.
68 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
69 
70 /// \returns True if given subtarget \p STI supports code object version 3,
71 /// false otherwise.
72 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
73 
74 /// \returns Wavefront size for given subtarget \p Features.
75 unsigned getWavefrontSize(const FeatureBitset &Features);
76 
77 /// \returns Local memory size in bytes for given subtarget \p Features.
78 unsigned getLocalMemorySize(const FeatureBitset &Features);
79 
80 /// \returns Number of execution units per compute unit for given subtarget \p
81 /// Features.
82 unsigned getEUsPerCU(const FeatureBitset &Features);
83 
84 /// \returns Maximum number of work groups per compute unit for given subtarget
85 /// \p Features and limited by given \p FlatWorkGroupSize.
86 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
87                                unsigned FlatWorkGroupSize);
88 
89 /// \returns Maximum number of waves per compute unit for given subtarget \p
90 /// Features without any kind of limitation.
91 unsigned getMaxWavesPerCU(const FeatureBitset &Features);
92 
93 /// \returns Maximum number of waves per compute unit for given subtarget \p
94 /// Features and limited by given \p FlatWorkGroupSize.
95 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
96                           unsigned FlatWorkGroupSize);
97 
98 /// \returns Minimum number of waves per execution unit for given subtarget \p
99 /// Features.
100 unsigned getMinWavesPerEU(const FeatureBitset &Features);
101 
102 /// \returns Maximum number of waves per execution unit for given subtarget \p
103 /// Features without any kind of limitation.
104 unsigned getMaxWavesPerEU();
105 
106 /// \returns Maximum number of waves per execution unit for given subtarget \p
107 /// Features and limited by given \p FlatWorkGroupSize.
108 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
109                           unsigned FlatWorkGroupSize);
110 
111 /// \returns Minimum flat work group size for given subtarget \p Features.
112 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
113 
114 /// \returns Maximum flat work group size for given subtarget \p Features.
115 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
116 
117 /// \returns Number of waves per work group for given subtarget \p Features and
118 /// limited by given \p FlatWorkGroupSize.
119 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
120                               unsigned FlatWorkGroupSize);
121 
122 /// \returns SGPR allocation granularity for given subtarget \p Features.
123 unsigned getSGPRAllocGranule(const FeatureBitset &Features);
124 
125 /// \returns SGPR encoding granularity for given subtarget \p Features.
126 unsigned getSGPREncodingGranule(const FeatureBitset &Features);
127 
128 /// \returns Total number of SGPRs for given subtarget \p Features.
129 unsigned getTotalNumSGPRs(const FeatureBitset &Features);
130 
131 /// \returns Addressable number of SGPRs for given subtarget \p Features.
132 unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
133 
134 /// \returns Minimum number of SGPRs that meets the given number of waves per
135 /// execution unit requirement for given subtarget \p Features.
136 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
137 
138 /// \returns Maximum number of SGPRs that meets the given number of waves per
139 /// execution unit requirement for given subtarget \p Features.
140 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
141                         bool Addressable);
142 
143 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
144 /// Features when the given special registers are used.
145 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
146                           bool FlatScrUsed, bool XNACKUsed);
147 
148 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
149 /// Features when the given special registers are used. XNACK is inferred from
150 /// \p Features.
151 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
152                           bool FlatScrUsed);
153 
154 /// \returns Number of SGPR blocks needed for given subtarget \p Features when
155 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
156 /// register counts.
157 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
158 
159 /// \returns VGPR allocation granularity for given subtarget \p Features.
160 unsigned getVGPRAllocGranule(const FeatureBitset &Features);
161 
162 /// \returns VGPR encoding granularity for given subtarget \p Features.
163 unsigned getVGPREncodingGranule(const FeatureBitset &Features);
164 
165 /// \returns Total number of VGPRs for given subtarget \p Features.
166 unsigned getTotalNumVGPRs(const FeatureBitset &Features);
167 
168 /// \returns Addressable number of VGPRs for given subtarget \p Features.
169 unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
170 
171 /// \returns Minimum number of VGPRs that meets given number of waves per
172 /// execution unit requirement for given subtarget \p Features.
173 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
174 
175 /// \returns Maximum number of VGPRs that meets given number of waves per
176 /// execution unit requirement for given subtarget \p Features.
177 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
178 
179 /// \returns Number of VGPR blocks needed for given subtarget \p Features when
180 /// \p NumVGPRs are used.
181 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
182 
183 } // end namespace IsaInfo
184 
185 LLVM_READONLY
186 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
187 
188 struct MIMGBaseOpcodeInfo {
189   MIMGBaseOpcode BaseOpcode;
190   bool Store;
191   bool Atomic;
192   bool AtomicX2;
193   bool Sampler;
194 
195   uint8_t NumExtraArgs;
196   bool Gradients;
197   bool Coordinates;
198   bool LodOrClampOrMip;
199   bool HasD16;
200 };
201 
202 LLVM_READONLY
203 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
204 
205 struct MIMGDimInfo {
206   MIMGDim Dim;
207   uint8_t NumCoords;
208   uint8_t NumGradients;
209   bool DA;
210 };
211 
212 LLVM_READONLY
213 const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
214 
215 struct MIMGLZMappingInfo {
216   MIMGBaseOpcode L;
217   MIMGBaseOpcode LZ;
218 };
219 
220 LLVM_READONLY
221 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
222 
223 LLVM_READONLY
224 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
225                   unsigned VDataDwords, unsigned VAddrDwords);
226 
227 LLVM_READONLY
228 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
229 
230 LLVM_READONLY
231 int getMCOpcode(uint16_t Opcode, unsigned Gen);
232 
233 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
234                                const FeatureBitset &Features);
235 
236 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
237 
238 bool isGroupSegment(const GlobalValue *GV);
239 bool isGlobalSegment(const GlobalValue *GV);
240 bool isReadOnlySegment(const GlobalValue *GV);
241 
242 /// \returns True if constants should be emitted to .text section for given
243 /// target triple \p TT, false otherwise.
244 bool shouldEmitConstantsToTextSection(const Triple &TT);
245 
246 /// \returns Integer value requested using \p F's \p Name attribute.
247 ///
248 /// \returns \p Default if attribute is not present.
249 ///
250 /// \returns \p Default and emits error if requested value cannot be converted
251 /// to integer.
252 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
253 
254 /// \returns A pair of integer values requested using \p F's \p Name attribute
255 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
256 /// is false).
257 ///
258 /// \returns \p Default if attribute is not present.
259 ///
260 /// \returns \p Default and emits error if one of the requested values cannot be
261 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
262 /// not present.
263 std::pair<int, int> getIntegerPairAttribute(const Function &F,
264                                             StringRef Name,
265                                             std::pair<int, int> Default,
266                                             bool OnlyFirstRequired = false);
267 
268 /// \returns Vmcnt bit mask for given isa \p Version.
269 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
270 
271 /// \returns Expcnt bit mask for given isa \p Version.
272 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
273 
274 /// \returns Lgkmcnt bit mask for given isa \p Version.
275 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
276 
277 /// \returns Waitcnt bit mask for given isa \p Version.
278 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
279 
280 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
281 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
282 
283 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
284 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
285 
286 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
287 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
288 
289 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
290 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
291 /// \p Lgkmcnt respectively.
292 ///
293 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
294 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
295 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
296 ///     \p Expcnt = \p Waitcnt[6:4]
297 ///     \p Lgkmcnt = \p Waitcnt[11:8]
298 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
299                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
300 
301 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
302 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
303                      unsigned Vmcnt);
304 
305 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
306 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
307                       unsigned Expcnt);
308 
309 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
310 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
311                        unsigned Lgkmcnt);
312 
313 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
314 /// \p Version.
315 ///
316 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
317 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
318 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
319 ///     Waitcnt[6:4]   = \p Expcnt
320 ///     Waitcnt[11:8]  = \p Lgkmcnt
321 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
322 ///
323 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
324 /// isa \p Version.
325 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
326                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
327 
328 unsigned getInitialPSInputAddr(const Function &F);
329 
330 LLVM_READNONE
331 bool isShader(CallingConv::ID CC);
332 
333 LLVM_READNONE
334 bool isCompute(CallingConv::ID CC);
335 
336 LLVM_READNONE
337 bool isEntryFunctionCC(CallingConv::ID CC);
338 
339 // FIXME: Remove this when calling conventions cleaned up
340 LLVM_READNONE
isKernel(CallingConv::ID CC)341 inline bool isKernel(CallingConv::ID CC) {
342   switch (CC) {
343   case CallingConv::AMDGPU_KERNEL:
344   case CallingConv::SPIR_KERNEL:
345     return true;
346   default:
347     return false;
348   }
349 }
350 
351 bool hasXNACK(const MCSubtargetInfo &STI);
352 bool hasMIMG_R128(const MCSubtargetInfo &STI);
353 bool hasPackedD16(const MCSubtargetInfo &STI);
354 
355 bool isSI(const MCSubtargetInfo &STI);
356 bool isCI(const MCSubtargetInfo &STI);
357 bool isVI(const MCSubtargetInfo &STI);
358 bool isGFX9(const MCSubtargetInfo &STI);
359 
360 /// Is Reg - scalar register
361 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
362 
363 /// Is there any intersection between registers
364 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
365 
366 /// If \p Reg is a pseudo reg, return the correct hardware register given
367 /// \p STI otherwise return \p Reg.
368 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
369 
370 /// Convert hardware register \p Reg to a pseudo register
371 LLVM_READNONE
372 unsigned mc2PseudoReg(unsigned Reg);
373 
374 /// Can this operand also contain immediate values?
375 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
376 
377 /// Is this floating-point operand?
378 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
379 
380 /// Does this opearnd support only inlinable literals?
381 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
382 
383 /// Get the size in bits of a register from the register class \p RC.
384 unsigned getRegBitWidth(unsigned RCID);
385 
386 /// Get the size in bits of a register from the register class \p RC.
387 unsigned getRegBitWidth(const MCRegisterClass &RC);
388 
389 /// Get size of register operand
390 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
391                            unsigned OpNo);
392 
393 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)394 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
395   switch (OpInfo.OperandType) {
396   case AMDGPU::OPERAND_REG_IMM_INT32:
397   case AMDGPU::OPERAND_REG_IMM_FP32:
398   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
399   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
400     return 4;
401 
402   case AMDGPU::OPERAND_REG_IMM_INT64:
403   case AMDGPU::OPERAND_REG_IMM_FP64:
404   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
405   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
406     return 8;
407 
408   case AMDGPU::OPERAND_REG_IMM_INT16:
409   case AMDGPU::OPERAND_REG_IMM_FP16:
410   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
411   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
412   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
413   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
414     return 2;
415 
416   default:
417     llvm_unreachable("unhandled operand type");
418   }
419 }
420 
421 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)422 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
423   return getOperandSize(Desc.OpInfo[OpNo]);
424 }
425 
426 /// Is this literal inlinable
427 LLVM_READNONE
428 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
429 
430 LLVM_READNONE
431 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
432 
433 LLVM_READNONE
434 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
435 
436 LLVM_READNONE
437 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
438 
439 bool isArgPassedInSGPR(const Argument *Arg);
440 
441 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
442 /// offset field.
443 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
444 
445 /// \returns true if this offset is small enough to fit in the SMRD
446 /// offset field.  \p ByteOffset should be the offset in bytes and
447 /// not the encoded offset.
448 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
449 
450 /// \returns true if the intrinsic is divergent
451 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
452 
453 } // end namespace AMDGPU
454 } // end namespace llvm
455 
456 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
457