1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
20 #include "R600MachineFunctionInfo.h"
21 #include "R600RegisterInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "r600mergeclause"
31 
32 namespace {
33 
isCFAlu(const MachineInstr * MI)34 static bool isCFAlu(const MachineInstr *MI) {
35   switch (MI->getOpcode()) {
36   case AMDGPU::CF_ALU:
37   case AMDGPU::CF_ALU_PUSH_BEFORE:
38     return true;
39   default:
40     return false;
41   }
42 }
43 
44 class R600ClauseMergePass : public MachineFunctionPass {
45 
46 private:
47   static char ID;
48   const R600InstrInfo *TII;
49 
50   unsigned getCFAluSize(const MachineInstr *MI) const;
51   bool isCFAluEnabled(const MachineInstr *MI) const;
52 
53   /// IfCvt pass can generate "disabled" ALU clause marker that need to be
54   /// removed and their content affected to the previous alu clause.
55   /// This function parse instructions after CFAlu until it find a disabled
56   /// CFAlu and merge the content, or an enabled CFAlu.
57   void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
58 
59   /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60   /// it is the case.
61   bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
62       const;
63 
64 public:
R600ClauseMergePass(TargetMachine & tm)65   R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
66 
67   bool runOnMachineFunction(MachineFunction &MF) override;
68 
69   const char *getPassName() const override;
70 };
71 
72 char R600ClauseMergePass::ID = 0;
73 
getCFAluSize(const MachineInstr * MI) const74 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
75   assert(isCFAlu(MI));
76   return MI->getOperand(
77       TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
78 }
79 
isCFAluEnabled(const MachineInstr * MI) const80 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
81   assert(isCFAlu(MI));
82   return MI->getOperand(
83       TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
84 }
85 
cleanPotentialDisabledCFAlu(MachineInstr * CFAlu) const86 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
87     const {
88   int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
89   MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
90   I++;
91   do {
92     while (I!= E && !isCFAlu(I))
93       I++;
94     if (I == E)
95       return;
96     MachineInstr *MI = I++;
97     if (isCFAluEnabled(MI))
98       break;
99     CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
100     MI->eraseFromParent();
101   } while (I != E);
102 }
103 
mergeIfPossible(MachineInstr * RootCFAlu,const MachineInstr * LatrCFAlu) const104 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
105                                           const MachineInstr *LatrCFAlu) const {
106   assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
107   int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
108   unsigned RootInstCount = getCFAluSize(RootCFAlu),
109       LaterInstCount = getCFAluSize(LatrCFAlu);
110   unsigned CumuledInsts = RootInstCount + LaterInstCount;
111   if (CumuledInsts >= TII->getMaxAlusPerClause()) {
112     DEBUG(dbgs() << "Excess inst counts\n");
113     return false;
114   }
115   if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
116     return false;
117   // Is KCache Bank 0 compatible ?
118   int Mode0Idx =
119       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
120   int KBank0Idx =
121       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
122   int KBank0LineIdx =
123       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
124   if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
125       RootCFAlu->getOperand(Mode0Idx).getImm() &&
126       (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
127        RootCFAlu->getOperand(KBank0Idx).getImm() ||
128       LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
129       RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
130     DEBUG(dbgs() << "Wrong KC0\n");
131     return false;
132   }
133   // Is KCache Bank 1 compatible ?
134   int Mode1Idx =
135       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
136   int KBank1Idx =
137       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
138   int KBank1LineIdx =
139       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
140   if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
141       RootCFAlu->getOperand(Mode1Idx).getImm() &&
142       (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
143       RootCFAlu->getOperand(KBank1Idx).getImm() ||
144       LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
145       RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
146     DEBUG(dbgs() << "Wrong KC0\n");
147     return false;
148   }
149   if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
150     RootCFAlu->getOperand(Mode0Idx).setImm(
151         LatrCFAlu->getOperand(Mode0Idx).getImm());
152     RootCFAlu->getOperand(KBank0Idx).setImm(
153         LatrCFAlu->getOperand(KBank0Idx).getImm());
154     RootCFAlu->getOperand(KBank0LineIdx).setImm(
155         LatrCFAlu->getOperand(KBank0LineIdx).getImm());
156   }
157   if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
158     RootCFAlu->getOperand(Mode1Idx).setImm(
159         LatrCFAlu->getOperand(Mode1Idx).getImm());
160     RootCFAlu->getOperand(KBank1Idx).setImm(
161         LatrCFAlu->getOperand(KBank1Idx).getImm());
162     RootCFAlu->getOperand(KBank1LineIdx).setImm(
163         LatrCFAlu->getOperand(KBank1LineIdx).getImm());
164   }
165   RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
166   RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
167   return true;
168 }
169 
runOnMachineFunction(MachineFunction & MF)170 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
171   TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
172   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
173                                                   BB != BB_E; ++BB) {
174     MachineBasicBlock &MBB = *BB;
175     MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
176     MachineBasicBlock::iterator LatestCFAlu = E;
177     while (I != E) {
178       MachineInstr *MI = I++;
179       if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
180           TII->mustBeLastInClause(MI->getOpcode()))
181         LatestCFAlu = E;
182       if (!isCFAlu(MI))
183         continue;
184       cleanPotentialDisabledCFAlu(MI);
185 
186       if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
187         MI->eraseFromParent();
188       } else {
189         assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
190         LatestCFAlu = MI;
191       }
192     }
193   }
194   return false;
195 }
196 
getPassName() const197 const char *R600ClauseMergePass::getPassName() const {
198   return "R600 Merge Clause Markers Pass";
199 }
200 
201 } // end anonymous namespace
202 
203 
createR600ClauseMergePass(TargetMachine & TM)204 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
205   return new R600ClauseMergePass(TM);
206 }
207