1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Interface definition for SIInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
17 
18 #include "AMDGPUInstrInfo.h"
19 #include "SIDefines.h"
20 #include "SIRegisterInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/Support/Compiler.h"
31 #include <cassert>
32 #include <cstdint>
33 
34 #define GET_INSTRINFO_HEADER
35 #include "AMDGPUGenInstrInfo.inc"
36 
37 namespace llvm {
38 
39 class APInt;
40 class MachineRegisterInfo;
41 class RegScavenger;
42 class GCNSubtarget;
43 class TargetRegisterClass;
44 
45 class SIInstrInfo final : public AMDGPUGenInstrInfo {
46 private:
47   const SIRegisterInfo RI;
48   const GCNSubtarget &ST;
49 
50   // The inverse predicate should have the negative value.
51   enum BranchPredicate {
52     INVALID_BR = 0,
53     SCC_TRUE = 1,
54     SCC_FALSE = -1,
55     VCCNZ = 2,
56     VCCZ = -2,
57     EXECNZ = -3,
58     EXECZ = 3
59   };
60 
61   using SetVectorType = SmallSetVector<MachineInstr *, 32>;
62 
63   static unsigned getBranchOpcode(BranchPredicate Cond);
64   static BranchPredicate getBranchPredicate(unsigned Opcode);
65 
66 public:
67   unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
68                               MachineRegisterInfo &MRI,
69                               MachineOperand &SuperReg,
70                               const TargetRegisterClass *SuperRC,
71                               unsigned SubIdx,
72                               const TargetRegisterClass *SubRC) const;
73   MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
74                                          MachineRegisterInfo &MRI,
75                                          MachineOperand &SuperReg,
76                                          const TargetRegisterClass *SuperRC,
77                                          unsigned SubIdx,
78                                          const TargetRegisterClass *SubRC) const;
79 private:
80   void swapOperands(MachineInstr &Inst) const;
81 
82   bool moveScalarAddSub(SetVectorType &Worklist,
83                         MachineInstr &Inst) const;
84 
85   void lowerScalarAbs(SetVectorType &Worklist,
86                       MachineInstr &Inst) const;
87 
88   void lowerScalarXnor(SetVectorType &Worklist,
89                        MachineInstr &Inst) const;
90 
91   void splitScalar64BitUnaryOp(SetVectorType &Worklist,
92                                MachineInstr &Inst, unsigned Opcode) const;
93 
94   void splitScalar64BitAddSub(SetVectorType &Worklist,
95                               MachineInstr &Inst) const;
96 
97   void splitScalar64BitBinaryOp(SetVectorType &Worklist,
98                                 MachineInstr &Inst, unsigned Opcode) const;
99 
100   void splitScalar64BitBCNT(SetVectorType &Worklist,
101                             MachineInstr &Inst) const;
102   void splitScalar64BitBFE(SetVectorType &Worklist,
103                            MachineInstr &Inst) const;
104   void movePackToVALU(SetVectorType &Worklist,
105                       MachineRegisterInfo &MRI,
106                       MachineInstr &Inst) const;
107 
108   void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
109                                     SetVectorType &Worklist) const;
110 
111   void
112   addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst,
113                                SetVectorType &Worklist) const;
114 
115   const TargetRegisterClass *
116   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
117 
118   bool checkInstOffsetsDoNotOverlap(MachineInstr &MIa, MachineInstr &MIb) const;
119 
120   unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
121 
122 protected:
123   bool swapSourceModifiers(MachineInstr &MI,
124                            MachineOperand &Src0, unsigned Src0OpName,
125                            MachineOperand &Src1, unsigned Src1OpName) const;
126 
127   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
128                                        unsigned OpIdx0,
129                                        unsigned OpIdx1) const override;
130 
131 public:
132   enum TargetOperandFlags {
133     MO_MASK = 0x7,
134 
135     MO_NONE = 0,
136     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
137     MO_GOTPCREL = 1,
138     // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
139     MO_GOTPCREL32 = 2,
140     MO_GOTPCREL32_LO = 2,
141     // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
142     MO_GOTPCREL32_HI = 3,
143     // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
144     MO_REL32 = 4,
145     MO_REL32_LO = 4,
146     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
147     MO_REL32_HI = 5
148   };
149 
150   explicit SIInstrInfo(const GCNSubtarget &ST);
151 
getRegisterInfo()152   const SIRegisterInfo &getRegisterInfo() const {
153     return RI;
154   }
155 
156   bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
157                                          AliasAnalysis *AA) const override;
158 
159   bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
160                                int64_t &Offset1,
161                                int64_t &Offset2) const override;
162 
163   bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
164                              int64_t &Offset,
165                              const TargetRegisterInfo *TRI) const final;
166 
167   bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
168                            MachineInstr &SecondLdSt, unsigned BaseReg2,
169                            unsigned NumLoads) const override;
170 
171   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
172                                int64_t Offset1, unsigned NumLoads) const override;
173 
174   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
175                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
176                    bool KillSrc) const override;
177 
178   unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI,
179                                     RegScavenger *RS, unsigned TmpReg,
180                                     unsigned Offset, unsigned Size) const;
181 
182   void materializeImmediate(MachineBasicBlock &MBB,
183                             MachineBasicBlock::iterator MI,
184                             const DebugLoc &DL,
185                             unsigned DestReg,
186                             int64_t Value) const;
187 
188   const TargetRegisterClass *getPreferredSelectRegClass(
189                                unsigned Size) const;
190 
191   unsigned insertNE(MachineBasicBlock *MBB,
192                     MachineBasicBlock::iterator I, const DebugLoc &DL,
193                     unsigned SrcReg, int Value) const;
194 
195   unsigned insertEQ(MachineBasicBlock *MBB,
196                     MachineBasicBlock::iterator I, const DebugLoc &DL,
197                     unsigned SrcReg, int Value)  const;
198 
199   void storeRegToStackSlot(MachineBasicBlock &MBB,
200                            MachineBasicBlock::iterator MI, unsigned SrcReg,
201                            bool isKill, int FrameIndex,
202                            const TargetRegisterClass *RC,
203                            const TargetRegisterInfo *TRI) const override;
204 
205   void loadRegFromStackSlot(MachineBasicBlock &MBB,
206                             MachineBasicBlock::iterator MI, unsigned DestReg,
207                             int FrameIndex, const TargetRegisterClass *RC,
208                             const TargetRegisterInfo *TRI) const override;
209 
210   bool expandPostRAPseudo(MachineInstr &MI) const override;
211 
212   // Returns an opcode that can be used to move a value to a \p DstRC
213   // register.  If there is no hardware instruction that can store to \p
214   // DstRC, then AMDGPU::COPY is returned.
215   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
216 
217   LLVM_READONLY
218   int commuteOpcode(unsigned Opc) const;
219 
220   LLVM_READONLY
commuteOpcode(const MachineInstr & MI)221   inline int commuteOpcode(const MachineInstr &MI) const {
222     return commuteOpcode(MI.getOpcode());
223   }
224 
225   bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
226                              unsigned &SrcOpIdx2) const override;
227 
228   bool isBranchOffsetInRange(unsigned BranchOpc,
229                              int64_t BrOffset) const override;
230 
231   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
232 
233   unsigned insertIndirectBranch(MachineBasicBlock &MBB,
234                                 MachineBasicBlock &NewDestBB,
235                                 const DebugLoc &DL,
236                                 int64_t BrOffset,
237                                 RegScavenger *RS = nullptr) const override;
238 
239   bool analyzeBranchImpl(MachineBasicBlock &MBB,
240                          MachineBasicBlock::iterator I,
241                          MachineBasicBlock *&TBB,
242                          MachineBasicBlock *&FBB,
243                          SmallVectorImpl<MachineOperand> &Cond,
244                          bool AllowModify) const;
245 
246   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
247                      MachineBasicBlock *&FBB,
248                      SmallVectorImpl<MachineOperand> &Cond,
249                      bool AllowModify = false) const override;
250 
251   unsigned removeBranch(MachineBasicBlock &MBB,
252                         int *BytesRemoved = nullptr) const override;
253 
254   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
255                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
256                         const DebugLoc &DL,
257                         int *BytesAdded = nullptr) const override;
258 
259   bool reverseBranchCondition(
260     SmallVectorImpl<MachineOperand> &Cond) const override;
261 
262   bool canInsertSelect(const MachineBasicBlock &MBB,
263                        ArrayRef<MachineOperand> Cond,
264                        unsigned TrueReg, unsigned FalseReg,
265                        int &CondCycles,
266                        int &TrueCycles, int &FalseCycles) const override;
267 
268   void insertSelect(MachineBasicBlock &MBB,
269                     MachineBasicBlock::iterator I, const DebugLoc &DL,
270                     unsigned DstReg, ArrayRef<MachineOperand> Cond,
271                     unsigned TrueReg, unsigned FalseReg) const override;
272 
273   void insertVectorSelect(MachineBasicBlock &MBB,
274                           MachineBasicBlock::iterator I, const DebugLoc &DL,
275                           unsigned DstReg, ArrayRef<MachineOperand> Cond,
276                           unsigned TrueReg, unsigned FalseReg) const;
277 
278   unsigned getAddressSpaceForPseudoSourceKind(
279              PseudoSourceValue::PSVKind Kind) const override;
280 
281   bool
282   areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
283                                   AliasAnalysis *AA = nullptr) const override;
284 
285   bool isFoldableCopy(const MachineInstr &MI) const;
286 
287   bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
288                      MachineRegisterInfo *MRI) const final;
289 
getMachineCSELookAheadLimit()290   unsigned getMachineCSELookAheadLimit() const override { return 500; }
291 
292   MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
293                                       MachineInstr &MI,
294                                       LiveVariables *LV) const override;
295 
296   bool isSchedulingBoundary(const MachineInstr &MI,
297                             const MachineBasicBlock *MBB,
298                             const MachineFunction &MF) const override;
299 
isSALU(const MachineInstr & MI)300   static bool isSALU(const MachineInstr &MI) {
301     return MI.getDesc().TSFlags & SIInstrFlags::SALU;
302   }
303 
isSALU(uint16_t Opcode)304   bool isSALU(uint16_t Opcode) const {
305     return get(Opcode).TSFlags & SIInstrFlags::SALU;
306   }
307 
isVALU(const MachineInstr & MI)308   static bool isVALU(const MachineInstr &MI) {
309     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
310   }
311 
isVALU(uint16_t Opcode)312   bool isVALU(uint16_t Opcode) const {
313     return get(Opcode).TSFlags & SIInstrFlags::VALU;
314   }
315 
isVMEM(const MachineInstr & MI)316   static bool isVMEM(const MachineInstr &MI) {
317     return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
318   }
319 
isVMEM(uint16_t Opcode)320   bool isVMEM(uint16_t Opcode) const {
321     return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
322   }
323 
isSOP1(const MachineInstr & MI)324   static bool isSOP1(const MachineInstr &MI) {
325     return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
326   }
327 
isSOP1(uint16_t Opcode)328   bool isSOP1(uint16_t Opcode) const {
329     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
330   }
331 
isSOP2(const MachineInstr & MI)332   static bool isSOP2(const MachineInstr &MI) {
333     return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
334   }
335 
isSOP2(uint16_t Opcode)336   bool isSOP2(uint16_t Opcode) const {
337     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
338   }
339 
isSOPC(const MachineInstr & MI)340   static bool isSOPC(const MachineInstr &MI) {
341     return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
342   }
343 
isSOPC(uint16_t Opcode)344   bool isSOPC(uint16_t Opcode) const {
345     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
346   }
347 
isSOPK(const MachineInstr & MI)348   static bool isSOPK(const MachineInstr &MI) {
349     return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
350   }
351 
isSOPK(uint16_t Opcode)352   bool isSOPK(uint16_t Opcode) const {
353     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
354   }
355 
isSOPP(const MachineInstr & MI)356   static bool isSOPP(const MachineInstr &MI) {
357     return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
358   }
359 
isSOPP(uint16_t Opcode)360   bool isSOPP(uint16_t Opcode) const {
361     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
362   }
363 
isVOP1(const MachineInstr & MI)364   static bool isVOP1(const MachineInstr &MI) {
365     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
366   }
367 
isVOP1(uint16_t Opcode)368   bool isVOP1(uint16_t Opcode) const {
369     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
370   }
371 
isVOP2(const MachineInstr & MI)372   static bool isVOP2(const MachineInstr &MI) {
373     return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
374   }
375 
isVOP2(uint16_t Opcode)376   bool isVOP2(uint16_t Opcode) const {
377     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
378   }
379 
isVOP3(const MachineInstr & MI)380   static bool isVOP3(const MachineInstr &MI) {
381     return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
382   }
383 
isVOP3(uint16_t Opcode)384   bool isVOP3(uint16_t Opcode) const {
385     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
386   }
387 
isSDWA(const MachineInstr & MI)388   static bool isSDWA(const MachineInstr &MI) {
389     return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
390   }
391 
isSDWA(uint16_t Opcode)392   bool isSDWA(uint16_t Opcode) const {
393     return get(Opcode).TSFlags & SIInstrFlags::SDWA;
394   }
395 
isVOPC(const MachineInstr & MI)396   static bool isVOPC(const MachineInstr &MI) {
397     return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
398   }
399 
isVOPC(uint16_t Opcode)400   bool isVOPC(uint16_t Opcode) const {
401     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
402   }
403 
isMUBUF(const MachineInstr & MI)404   static bool isMUBUF(const MachineInstr &MI) {
405     return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
406   }
407 
isMUBUF(uint16_t Opcode)408   bool isMUBUF(uint16_t Opcode) const {
409     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
410   }
411 
isMTBUF(const MachineInstr & MI)412   static bool isMTBUF(const MachineInstr &MI) {
413     return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
414   }
415 
isMTBUF(uint16_t Opcode)416   bool isMTBUF(uint16_t Opcode) const {
417     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
418   }
419 
isSMRD(const MachineInstr & MI)420   static bool isSMRD(const MachineInstr &MI) {
421     return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
422   }
423 
isSMRD(uint16_t Opcode)424   bool isSMRD(uint16_t Opcode) const {
425     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
426   }
427 
428   bool isBufferSMRD(const MachineInstr &MI) const;
429 
isDS(const MachineInstr & MI)430   static bool isDS(const MachineInstr &MI) {
431     return MI.getDesc().TSFlags & SIInstrFlags::DS;
432   }
433 
isDS(uint16_t Opcode)434   bool isDS(uint16_t Opcode) const {
435     return get(Opcode).TSFlags & SIInstrFlags::DS;
436   }
437 
isMIMG(const MachineInstr & MI)438   static bool isMIMG(const MachineInstr &MI) {
439     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
440   }
441 
isMIMG(uint16_t Opcode)442   bool isMIMG(uint16_t Opcode) const {
443     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
444   }
445 
isGather4(const MachineInstr & MI)446   static bool isGather4(const MachineInstr &MI) {
447     return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
448   }
449 
isGather4(uint16_t Opcode)450   bool isGather4(uint16_t Opcode) const {
451     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
452   }
453 
isFLAT(const MachineInstr & MI)454   static bool isFLAT(const MachineInstr &MI) {
455     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
456   }
457 
458   // Is a FLAT encoded instruction which accesses a specific segment,
459   // i.e. global_* or scratch_*.
isSegmentSpecificFLAT(const MachineInstr & MI)460   static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
461     auto Flags = MI.getDesc().TSFlags;
462     return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT);
463   }
464 
465   // Any FLAT encoded instruction, including global_* and scratch_*.
isFLAT(uint16_t Opcode)466   bool isFLAT(uint16_t Opcode) const {
467     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
468   }
469 
isEXP(const MachineInstr & MI)470   static bool isEXP(const MachineInstr &MI) {
471     return MI.getDesc().TSFlags & SIInstrFlags::EXP;
472   }
473 
isEXP(uint16_t Opcode)474   bool isEXP(uint16_t Opcode) const {
475     return get(Opcode).TSFlags & SIInstrFlags::EXP;
476   }
477 
isWQM(const MachineInstr & MI)478   static bool isWQM(const MachineInstr &MI) {
479     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
480   }
481 
isWQM(uint16_t Opcode)482   bool isWQM(uint16_t Opcode) const {
483     return get(Opcode).TSFlags & SIInstrFlags::WQM;
484   }
485 
isDisableWQM(const MachineInstr & MI)486   static bool isDisableWQM(const MachineInstr &MI) {
487     return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
488   }
489 
isDisableWQM(uint16_t Opcode)490   bool isDisableWQM(uint16_t Opcode) const {
491     return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
492   }
493 
isVGPRSpill(const MachineInstr & MI)494   static bool isVGPRSpill(const MachineInstr &MI) {
495     return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
496   }
497 
isVGPRSpill(uint16_t Opcode)498   bool isVGPRSpill(uint16_t Opcode) const {
499     return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
500   }
501 
isSGPRSpill(const MachineInstr & MI)502   static bool isSGPRSpill(const MachineInstr &MI) {
503     return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
504   }
505 
isSGPRSpill(uint16_t Opcode)506   bool isSGPRSpill(uint16_t Opcode) const {
507     return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
508   }
509 
isDPP(const MachineInstr & MI)510   static bool isDPP(const MachineInstr &MI) {
511     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
512   }
513 
isDPP(uint16_t Opcode)514   bool isDPP(uint16_t Opcode) const {
515     return get(Opcode).TSFlags & SIInstrFlags::DPP;
516   }
517 
isVOP3P(const MachineInstr & MI)518   static bool isVOP3P(const MachineInstr &MI) {
519     return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
520   }
521 
isVOP3P(uint16_t Opcode)522   bool isVOP3P(uint16_t Opcode) const {
523     return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
524   }
525 
isVINTRP(const MachineInstr & MI)526   static bool isVINTRP(const MachineInstr &MI) {
527     return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
528   }
529 
isVINTRP(uint16_t Opcode)530   bool isVINTRP(uint16_t Opcode) const {
531     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
532   }
533 
isScalarUnit(const MachineInstr & MI)534   static bool isScalarUnit(const MachineInstr &MI) {
535     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
536   }
537 
usesVM_CNT(const MachineInstr & MI)538   static bool usesVM_CNT(const MachineInstr &MI) {
539     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
540   }
541 
usesLGKM_CNT(const MachineInstr & MI)542   static bool usesLGKM_CNT(const MachineInstr &MI) {
543     return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
544   }
545 
sopkIsZext(const MachineInstr & MI)546   static bool sopkIsZext(const MachineInstr &MI) {
547     return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
548   }
549 
sopkIsZext(uint16_t Opcode)550   bool sopkIsZext(uint16_t Opcode) const {
551     return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
552   }
553 
554   /// \returns true if this is an s_store_dword* instruction. This is more
555   /// specific than than isSMEM && mayStore.
isScalarStore(const MachineInstr & MI)556   static bool isScalarStore(const MachineInstr &MI) {
557     return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
558   }
559 
isScalarStore(uint16_t Opcode)560   bool isScalarStore(uint16_t Opcode) const {
561     return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
562   }
563 
isFixedSize(const MachineInstr & MI)564   static bool isFixedSize(const MachineInstr &MI) {
565     return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
566   }
567 
isFixedSize(uint16_t Opcode)568   bool isFixedSize(uint16_t Opcode) const {
569     return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
570   }
571 
hasFPClamp(const MachineInstr & MI)572   static bool hasFPClamp(const MachineInstr &MI) {
573     return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
574   }
575 
hasFPClamp(uint16_t Opcode)576   bool hasFPClamp(uint16_t Opcode) const {
577     return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
578   }
579 
hasIntClamp(const MachineInstr & MI)580   static bool hasIntClamp(const MachineInstr &MI) {
581     return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
582   }
583 
getClampMask(const MachineInstr & MI)584   uint64_t getClampMask(const MachineInstr &MI) const {
585     const uint64_t ClampFlags = SIInstrFlags::FPClamp |
586                                 SIInstrFlags::IntClamp |
587                                 SIInstrFlags::ClampLo |
588                                 SIInstrFlags::ClampHi;
589       return MI.getDesc().TSFlags & ClampFlags;
590   }
591 
isVGPRCopy(const MachineInstr & MI)592   bool isVGPRCopy(const MachineInstr &MI) const {
593     assert(MI.isCopy());
594     unsigned Dest = MI.getOperand(0).getReg();
595     const MachineFunction &MF = *MI.getParent()->getParent();
596     const MachineRegisterInfo &MRI = MF.getRegInfo();
597     return !RI.isSGPRReg(MRI, Dest);
598   }
599 
600   /// Whether we must prevent this instruction from executing with EXEC = 0.
601   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
602 
603   bool isInlineConstant(const APInt &Imm) const;
604 
605   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
606 
isInlineConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)607   bool isInlineConstant(const MachineOperand &MO,
608                         const MCOperandInfo &OpInfo) const {
609     return isInlineConstant(MO, OpInfo.OperandType);
610   }
611 
612   /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
613   /// be an inline immediate.
isInlineConstant(const MachineInstr & MI,const MachineOperand & UseMO,const MachineOperand & DefMO)614   bool isInlineConstant(const MachineInstr &MI,
615                         const MachineOperand &UseMO,
616                         const MachineOperand &DefMO) const {
617     assert(UseMO.getParent() == &MI);
618     int OpIdx = MI.getOperandNo(&UseMO);
619     if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) {
620       return false;
621     }
622 
623     return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]);
624   }
625 
626   /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
627   /// immediate.
isInlineConstant(const MachineInstr & MI,unsigned OpIdx)628   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
629     const MachineOperand &MO = MI.getOperand(OpIdx);
630     return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
631   }
632 
isInlineConstant(const MachineInstr & MI,unsigned OpIdx,const MachineOperand & MO)633   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
634                         const MachineOperand &MO) const {
635     if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands)
636       return false;
637 
638     if (MI.isCopy()) {
639       unsigned Size = getOpSize(MI, OpIdx);
640       assert(Size == 8 || Size == 4);
641 
642       uint8_t OpType = (Size == 8) ?
643         AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
644       return isInlineConstant(MO, OpType);
645     }
646 
647     return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
648   }
649 
isInlineConstant(const MachineOperand & MO)650   bool isInlineConstant(const MachineOperand &MO) const {
651     const MachineInstr *Parent = MO.getParent();
652     return isInlineConstant(*Parent, Parent->getOperandNo(&MO));
653   }
654 
isLiteralConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)655   bool isLiteralConstant(const MachineOperand &MO,
656                          const MCOperandInfo &OpInfo) const {
657     return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType);
658   }
659 
isLiteralConstant(const MachineInstr & MI,int OpIdx)660   bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const {
661     const MachineOperand &MO = MI.getOperand(OpIdx);
662     return MO.isImm() && !isInlineConstant(MI, OpIdx);
663   }
664 
665   // Returns true if this operand could potentially require a 32-bit literal
666   // operand, but not necessarily. A FrameIndex for example could resolve to an
667   // inline immediate value that will not require an additional 4-bytes; this
668   // assumes that it will.
669   bool isLiteralConstantLike(const MachineOperand &MO,
670                              const MCOperandInfo &OpInfo) const;
671 
672   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
673                          const MachineOperand &MO) const;
674 
675   /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
676   /// This function will return false if you pass it a 32-bit instruction.
677   bool hasVALU32BitEncoding(unsigned Opcode) const;
678 
679   /// Returns true if this operand uses the constant bus.
680   bool usesConstantBus(const MachineRegisterInfo &MRI,
681                        const MachineOperand &MO,
682                        const MCOperandInfo &OpInfo) const;
683 
684   /// Return true if this instruction has any modifiers.
685   ///  e.g. src[012]_mod, omod, clamp.
686   bool hasModifiers(unsigned Opcode) const;
687 
688   bool hasModifiersSet(const MachineInstr &MI,
689                        unsigned OpName) const;
690   bool hasAnyModifiersSet(const MachineInstr &MI) const;
691 
692   bool verifyInstruction(const MachineInstr &MI,
693                          StringRef &ErrInfo) const override;
694 
695   unsigned getVALUOp(const MachineInstr &MI) const;
696 
697   /// Return the correct register class for \p OpNo.  For target-specific
698   /// instructions, this will return the register class that has been defined
699   /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
700   /// the register class of its machine operand.
701   /// to infer the correct register class base on the other operands.
702   const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
703                                            unsigned OpNo) const;
704 
705   /// Return the size in bytes of the operand OpNo on the given
706   // instruction opcode.
getOpSize(uint16_t Opcode,unsigned OpNo)707   unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
708     const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
709 
710     if (OpInfo.RegClass == -1) {
711       // If this is an immediate operand, this must be a 32-bit literal.
712       assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
713       return 4;
714     }
715 
716     return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
717   }
718 
719   /// This form should usually be preferred since it handles operands
720   /// with unknown register classes.
getOpSize(const MachineInstr & MI,unsigned OpNo)721   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
722     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
723   }
724 
725   /// \returns true if it is legal for the operand at index \p OpNo
726   /// to read a VGPR.
727   bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
728 
729   /// Legalize the \p OpIndex operand of this instruction by inserting
730   /// a MOV.  For example:
731   /// ADD_I32_e32 VGPR0, 15
732   /// to
733   /// MOV VGPR1, 15
734   /// ADD_I32_e32 VGPR0, VGPR1
735   ///
736   /// If the operand being legalized is a register, then a COPY will be used
737   /// instead of MOV.
738   void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
739 
740   /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
741   /// for \p MI.
742   bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
743                       const MachineOperand *MO = nullptr) const;
744 
745   /// Check if \p MO would be a valid operand for the given operand
746   /// definition \p OpInfo. Note this does not attempt to validate constant bus
747   /// restrictions (e.g. literal constant usage).
748   bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
749                           const MCOperandInfo &OpInfo,
750                           const MachineOperand &MO) const;
751 
752   /// Check if \p MO (a register operand) is a legal register for the
753   /// given operand description.
754   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
755                          const MCOperandInfo &OpInfo,
756                          const MachineOperand &MO) const;
757 
758   /// Legalize operands in \p MI by either commuting it or inserting a
759   /// copy of src1.
760   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
761 
762   /// Fix operands in \p MI to satisfy constant bus requirements.
763   void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
764 
765   /// Copy a value from a VGPR (\p SrcReg) to SGPR.  This function can only
766   /// be used when it is know that the value in SrcReg is same across all
767   /// threads in the wave.
768   /// \returns The SGPR register that \p SrcReg was copied to.
769   unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
770                               MachineRegisterInfo &MRI) const;
771 
772   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
773 
774   void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
775                               MachineBasicBlock::iterator I,
776                               const TargetRegisterClass *DstRC,
777                               MachineOperand &Op, MachineRegisterInfo &MRI,
778                               const DebugLoc &DL) const;
779 
780   /// Legalize all operands in this instruction.  This function may
781   /// create new instruction and insert them before \p MI.
782   void legalizeOperands(MachineInstr &MI) const;
783 
784   /// Replace this instruction's opcode with the equivalent VALU
785   /// opcode.  This function will also move the users of \p MI to the
786   /// VALU if necessary.
787   void moveToVALU(MachineInstr &MI) const;
788 
789   void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
790                         int Count) const;
791 
792   void insertNoop(MachineBasicBlock &MBB,
793                   MachineBasicBlock::iterator MI) const override;
794 
795   void insertReturn(MachineBasicBlock &MBB) const;
796   /// Return the number of wait states that result from executing this
797   /// instruction.
798   unsigned getNumWaitStates(const MachineInstr &MI) const;
799 
800   /// Returns the operand named \p Op.  If \p MI does not have an
801   /// operand named \c Op, this function returns nullptr.
802   LLVM_READONLY
803   MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
804 
805   LLVM_READONLY
getNamedOperand(const MachineInstr & MI,unsigned OpName)806   const MachineOperand *getNamedOperand(const MachineInstr &MI,
807                                         unsigned OpName) const {
808     return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
809   }
810 
811   /// Get required immediate operand
getNamedImmOperand(const MachineInstr & MI,unsigned OpName)812   int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
813     int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
814     return MI.getOperand(Idx).getImm();
815   }
816 
817   uint64_t getDefaultRsrcDataFormat() const;
818   uint64_t getScratchRsrcWords23() const;
819 
820   bool isLowLatencyInstruction(const MachineInstr &MI) const;
821   bool isHighLatencyInstruction(const MachineInstr &MI) const;
822 
823   /// Return the descriptor of the target-specific machine instruction
824   /// that corresponds to the specified pseudo or native opcode.
getMCOpcodeFromPseudo(unsigned Opcode)825   const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
826     return get(pseudoToMCOpcode(Opcode));
827   }
828 
829   unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
830   unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
831 
832   unsigned isLoadFromStackSlot(const MachineInstr &MI,
833                                int &FrameIndex) const override;
834   unsigned isStoreToStackSlot(const MachineInstr &MI,
835                               int &FrameIndex) const override;
836 
837   unsigned getInstBundleSize(const MachineInstr &MI) const;
838   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
839 
840   bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
841 
842   bool isNonUniformBranchInstr(MachineInstr &Instr) const;
843 
844   void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
845                                  MachineBasicBlock *IfEnd) const;
846 
847   void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
848                                    MachineBasicBlock *LoopEnd) const;
849 
850   std::pair<unsigned, unsigned>
851   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
852 
853   ArrayRef<std::pair<int, const char *>>
854   getSerializableTargetIndices() const override;
855 
856   ArrayRef<std::pair<unsigned, const char *>>
857   getSerializableDirectMachineOperandTargetFlags() const override;
858 
859   ScheduleHazardRecognizer *
860   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
861                                  const ScheduleDAG *DAG) const override;
862 
863   ScheduleHazardRecognizer *
864   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
865 
866   bool isBasicBlockPrologue(const MachineInstr &MI) const override;
867 
868   /// Return a partially built integer add instruction without carry.
869   /// Caller must add source operands.
870   /// For pre-GFX9 it will generate unused carry destination operand.
871   /// TODO: After GFX9 it should return a no-carry operation.
872   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
873                                     MachineBasicBlock::iterator I,
874                                     const DebugLoc &DL,
875                                     unsigned DestReg) const;
876 
877   static bool isKillTerminator(unsigned Opcode);
878   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
879 
isLegalMUBUFImmOffset(unsigned Imm)880   static bool isLegalMUBUFImmOffset(unsigned Imm) {
881     return isUInt<12>(Imm);
882   }
883 
884   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
885   /// Return -1 if the target-specific opcode for the pseudo instruction does
886   /// not exist. If Opcode is not a pseudo instruction, this is identity.
887   int pseudoToMCOpcode(int Opcode) const;
888 
889 };
890 
891 namespace AMDGPU {
892 
893   LLVM_READONLY
894   int getVOPe64(uint16_t Opcode);
895 
896   LLVM_READONLY
897   int getVOPe32(uint16_t Opcode);
898 
899   LLVM_READONLY
900   int getSDWAOp(uint16_t Opcode);
901 
902   LLVM_READONLY
903   int getBasicFromSDWAOp(uint16_t Opcode);
904 
905   LLVM_READONLY
906   int getCommuteRev(uint16_t Opcode);
907 
908   LLVM_READONLY
909   int getCommuteOrig(uint16_t Opcode);
910 
911   LLVM_READONLY
912   int getAddr64Inst(uint16_t Opcode);
913 
914   LLVM_READONLY
915   int getMUBUFNoLdsInst(uint16_t Opcode);
916 
917   LLVM_READONLY
918   int getAtomicRetOp(uint16_t Opcode);
919 
920   LLVM_READONLY
921   int getAtomicNoRetOp(uint16_t Opcode);
922 
923   LLVM_READONLY
924   int getSOPKOp(uint16_t Opcode);
925 
926   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
927   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
928   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
929   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
930 
931   // For MachineOperands.
932   enum TargetFlags {
933     TF_LONG_BRANCH_FORWARD = 1 << 0,
934     TF_LONG_BRANCH_BACKWARD = 1 << 1
935   };
936 
937 } // end namespace AMDGPU
938 
939 namespace SI {
940 namespace KernelInputOffsets {
941 
942 /// Offsets in bytes from the start of the input buffer
943 enum Offsets {
944   NGROUPS_X = 0,
945   NGROUPS_Y = 4,
946   NGROUPS_Z = 8,
947   GLOBAL_SIZE_X = 12,
948   GLOBAL_SIZE_Y = 16,
949   GLOBAL_SIZE_Z = 20,
950   LOCAL_SIZE_X = 24,
951   LOCAL_SIZE_Y = 28,
952   LOCAL_SIZE_Z = 32
953 };
954 
955 } // end namespace KernelInputOffsets
956 } // end namespace SI
957 
958 } // end namespace llvm
959 
960 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
961