1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
20 #include "R600MachineFunctionInfo.h"
21 #include "R600RegisterInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "r600mergeclause"
31 
32 namespace {
33 
isCFAlu(const MachineInstr & MI)34 static bool isCFAlu(const MachineInstr &MI) {
35   switch (MI.getOpcode()) {
36   case AMDGPU::CF_ALU:
37   case AMDGPU::CF_ALU_PUSH_BEFORE:
38     return true;
39   default:
40     return false;
41   }
42 }
43 
44 class R600ClauseMergePass : public MachineFunctionPass {
45 
46 private:
47   static char ID;
48   const R600InstrInfo *TII;
49 
50   unsigned getCFAluSize(const MachineInstr &MI) const;
51   bool isCFAluEnabled(const MachineInstr &MI) const;
52 
53   /// IfCvt pass can generate "disabled" ALU clause marker that need to be
54   /// removed and their content affected to the previous alu clause.
55   /// This function parse instructions after CFAlu until it find a disabled
56   /// CFAlu and merge the content, or an enabled CFAlu.
57   void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
58 
59   /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60   /// it is the case.
61   bool mergeIfPossible(MachineInstr &RootCFAlu,
62                        const MachineInstr &LatrCFAlu) const;
63 
64 public:
R600ClauseMergePass(TargetMachine & tm)65   R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
66 
67   bool runOnMachineFunction(MachineFunction &MF) override;
68 
69   const char *getPassName() const override;
70 };
71 
72 char R600ClauseMergePass::ID = 0;
73 
getCFAluSize(const MachineInstr & MI) const74 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
75   assert(isCFAlu(MI));
76   return MI
77       .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
78       .getImm();
79 }
80 
isCFAluEnabled(const MachineInstr & MI) const81 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
82   assert(isCFAlu(MI));
83   return MI
84       .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
85       .getImm();
86 }
87 
cleanPotentialDisabledCFAlu(MachineInstr & CFAlu) const88 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
89     MachineInstr &CFAlu) const {
90   int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
91   MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
92   I++;
93   do {
94     while (I != E && !isCFAlu(*I))
95       I++;
96     if (I == E)
97       return;
98     MachineInstr &MI = *I++;
99     if (isCFAluEnabled(MI))
100       break;
101     CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
102     MI.eraseFromParent();
103   } while (I != E);
104 }
105 
mergeIfPossible(MachineInstr & RootCFAlu,const MachineInstr & LatrCFAlu) const106 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
107                                           const MachineInstr &LatrCFAlu) const {
108   assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
109   int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
110   unsigned RootInstCount = getCFAluSize(RootCFAlu),
111       LaterInstCount = getCFAluSize(LatrCFAlu);
112   unsigned CumuledInsts = RootInstCount + LaterInstCount;
113   if (CumuledInsts >= TII->getMaxAlusPerClause()) {
114     DEBUG(dbgs() << "Excess inst counts\n");
115     return false;
116   }
117   if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
118     return false;
119   // Is KCache Bank 0 compatible ?
120   int Mode0Idx =
121       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
122   int KBank0Idx =
123       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
124   int KBank0LineIdx =
125       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
126   if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
127       RootCFAlu.getOperand(Mode0Idx).getImm() &&
128       (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
129            RootCFAlu.getOperand(KBank0Idx).getImm() ||
130        LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
131            RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
132     DEBUG(dbgs() << "Wrong KC0\n");
133     return false;
134   }
135   // Is KCache Bank 1 compatible ?
136   int Mode1Idx =
137       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
138   int KBank1Idx =
139       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
140   int KBank1LineIdx =
141       TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
142   if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
143       RootCFAlu.getOperand(Mode1Idx).getImm() &&
144       (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
145            RootCFAlu.getOperand(KBank1Idx).getImm() ||
146        LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
147            RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
148     DEBUG(dbgs() << "Wrong KC0\n");
149     return false;
150   }
151   if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
152     RootCFAlu.getOperand(Mode0Idx).setImm(
153         LatrCFAlu.getOperand(Mode0Idx).getImm());
154     RootCFAlu.getOperand(KBank0Idx).setImm(
155         LatrCFAlu.getOperand(KBank0Idx).getImm());
156     RootCFAlu.getOperand(KBank0LineIdx)
157         .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
158   }
159   if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
160     RootCFAlu.getOperand(Mode1Idx).setImm(
161         LatrCFAlu.getOperand(Mode1Idx).getImm());
162     RootCFAlu.getOperand(KBank1Idx).setImm(
163         LatrCFAlu.getOperand(KBank1Idx).getImm());
164     RootCFAlu.getOperand(KBank1LineIdx)
165         .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
166   }
167   RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
168   RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
169   return true;
170 }
171 
runOnMachineFunction(MachineFunction & MF)172 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
173   if (skipFunction(*MF.getFunction()))
174     return false;
175 
176   const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
177   TII = ST.getInstrInfo();
178 
179   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
180                                                   BB != BB_E; ++BB) {
181     MachineBasicBlock &MBB = *BB;
182     MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
183     MachineBasicBlock::iterator LatestCFAlu = E;
184     while (I != E) {
185       MachineInstr &MI = *I++;
186       if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
187           TII->mustBeLastInClause(MI.getOpcode()))
188         LatestCFAlu = E;
189       if (!isCFAlu(MI))
190         continue;
191       cleanPotentialDisabledCFAlu(MI);
192 
193       if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
194         MI.eraseFromParent();
195       } else {
196         assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
197         LatestCFAlu = MI;
198       }
199     }
200   }
201   return false;
202 }
203 
getPassName() const204 const char *R600ClauseMergePass::getPassName() const {
205   return "R600 Merge Clause Markers Pass";
206 }
207 
208 } // end anonymous namespace
209 
210 
createR600ClauseMergePass(TargetMachine & TM)211 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
212   return new R600ClauseMergePass(TM);
213 }
214