1 //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Computations in WWM can overwrite values in inactive channels for
12 /// variables that the register allocator thinks are dead. This pass adds fake
13 /// uses of those variables to WWM instructions to make sure that they aren't
14 /// overwritten.
15 ///
16 /// As an example, consider this snippet:
17 /// %vgpr0 = V_MOV_B32_e32 0.0
18 /// if (...) {
19 ///   %vgpr1 = ...
20 ///   %vgpr2 = WWM killed %vgpr1
21 ///   ... = killed %vgpr2
22 ///   %vgpr0 = V_MOV_B32_e32 1.0
23 /// }
24 /// ... = %vgpr0
25 ///
26 /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
27 /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
28 /// writing %vgpr1 would only write to channels that would be clobbered by the
29 /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
30 /// it would clobber even the inactive channels for which the if-condition is
31 /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
32 /// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the
33 /// same register.
34 ///
35 /// In general, we need to figure out what registers might have their inactive
36 /// channels which are eventually used accidentally clobbered by a WWM
37 /// instruction. We approximate this using two conditions:
38 ///
39 /// 1. A definition of the variable reaches the WWM instruction.
40 /// 2. The variable would be live at the WWM instruction if all its defs were
41 /// partial defs (i.e. considered as a use), ignoring normal uses.
42 ///
43 /// If a register matches both conditions, then we add an implicit use of it to
44 /// the WWM instruction. Condition #2 is the heart of the matter: every
45 /// definition is really a partial definition, since every VALU instruction is
46 /// implicitly predicated.  We can usually ignore this, but WWM forces us not
47 /// to. Condition #1 prevents false positives if the variable is undefined at
48 /// the WWM instruction anyways. This is overly conservative in certain cases,
49 /// especially in uniform control flow, but this is a workaround anyways until
50 /// LLVM gains the notion of predicated uses and definitions of variables.
51 ///
52 //===----------------------------------------------------------------------===//
53 
54 #include "AMDGPU.h"
55 #include "AMDGPUSubtarget.h"
56 #include "SIInstrInfo.h"
57 #include "SIRegisterInfo.h"
58 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
59 #include "llvm/ADT/DepthFirstIterator.h"
60 #include "llvm/ADT/SparseBitVector.h"
61 #include "llvm/CodeGen/LiveIntervals.h"
62 #include "llvm/CodeGen/MachineFunctionPass.h"
63 #include "llvm/CodeGen/Passes.h"
64 #include "llvm/CodeGen/TargetRegisterInfo.h"
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "si-fix-wwm-liveness"
69 
70 namespace {
71 
72 class SIFixWWMLiveness : public MachineFunctionPass {
73 private:
74   LiveIntervals *LIS = nullptr;
75   const SIRegisterInfo *TRI;
76   MachineRegisterInfo *MRI;
77 
78 public:
79   static char ID;
80 
SIFixWWMLiveness()81   SIFixWWMLiveness() : MachineFunctionPass(ID) {
82     initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
83   }
84 
85   bool runOnMachineFunction(MachineFunction &MF) override;
86 
87   bool runOnWWMInstruction(MachineInstr &MI);
88 
89   void addDefs(const MachineInstr &MI, SparseBitVector<> &set);
90 
getPassName() const91   StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
92 
getAnalysisUsage(AnalysisUsage & AU) const93   void getAnalysisUsage(AnalysisUsage &AU) const override {
94     // Should preserve the same set that TwoAddressInstructions does.
95     AU.addPreserved<SlotIndexes>();
96     AU.addPreserved<LiveIntervals>();
97     AU.addPreservedID(LiveVariablesID);
98     AU.addPreservedID(MachineLoopInfoID);
99     AU.addPreservedID(MachineDominatorsID);
100     AU.setPreservesCFG();
101     MachineFunctionPass::getAnalysisUsage(AU);
102   }
103 };
104 
105 } // End anonymous namespace.
106 
107 INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE,
108                 "SI fix WWM liveness", false, false)
109 
110 char SIFixWWMLiveness::ID = 0;
111 
112 char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
113 
createSIFixWWMLivenessPass()114 FunctionPass *llvm::createSIFixWWMLivenessPass() {
115   return new SIFixWWMLiveness();
116 }
117 
addDefs(const MachineInstr & MI,SparseBitVector<> & Regs)118 void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs)
119 {
120   for (const MachineOperand &Op : MI.defs()) {
121     if (Op.isReg()) {
122       unsigned Reg = Op.getReg();
123       if (TRI->isVGPR(*MRI, Reg))
124         Regs.set(Reg);
125     }
126   }
127 }
128 
runOnWWMInstruction(MachineInstr & WWM)129 bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) {
130   MachineBasicBlock *MBB = WWM.getParent();
131 
132   // Compute the registers that are live out of MI by figuring out which defs
133   // are reachable from MI.
134   SparseBitVector<> LiveOut;
135 
136   for (auto II = MachineBasicBlock::iterator(WWM), IE =
137        MBB->end(); II != IE; ++II) {
138     addDefs(*II, LiveOut);
139   }
140 
141   for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB),
142                                         E = df_end(MBB);
143        I != E; ++I) {
144     for (const MachineInstr &MI : **I) {
145       addDefs(MI, LiveOut);
146     }
147   }
148 
149   // Compute the registers that reach MI.
150   SparseBitVector<> Reachable;
151 
152   for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE =
153        MBB->rend(); II != IE; ++II) {
154     addDefs(*II, Reachable);
155   }
156 
157   for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB),
158                                          E = idf_end(MBB);
159        I != E; ++I) {
160     for (const MachineInstr &MI : **I) {
161       addDefs(MI, Reachable);
162     }
163   }
164 
165   // find the intersection, and add implicit uses.
166   LiveOut &= Reachable;
167 
168   bool Modified = false;
169   for (unsigned Reg : LiveOut) {
170     WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
171     if (LIS) {
172       // FIXME: is there a better way to update the live interval?
173       LIS->removeInterval(Reg);
174       LIS->createAndComputeVirtRegInterval(Reg);
175     }
176     Modified = true;
177   }
178 
179   return Modified;
180 }
181 
runOnMachineFunction(MachineFunction & MF)182 bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
183   bool Modified = false;
184 
185   // This doesn't actually need LiveIntervals, but we can preserve them.
186   LIS = getAnalysisIfAvailable<LiveIntervals>();
187 
188   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
189   const SIInstrInfo *TII = ST.getInstrInfo();
190 
191   TRI = &TII->getRegisterInfo();
192   MRI = &MF.getRegInfo();
193 
194   for (MachineBasicBlock &MBB : MF) {
195     for (MachineInstr &MI : MBB) {
196       if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
197         Modified |= runOnWWMInstruction(MI);
198       }
199     }
200   }
201 
202   return Modified;
203 }
204