1 //==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file Break False Dependency pass.
11 ///
12 /// Some instructions have false dependencies which cause unnecessary stalls.
13 /// For exmaple, instructions that only write part of a register, and implicitly
14 /// need to read the other parts of the register.  This may cause unwanted
15 /// stalls preventing otherwise unrelated instructions from executing in
16 /// parallel in an out-of-order CPU.
17 /// This pass is aimed at identifying and avoiding these depepndencies when
18 /// possible.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/CodeGen/LivePhysRegs.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/ReachingDefAnalysis.h"
25 #include "llvm/CodeGen/RegisterClassInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 
29 
30 using namespace llvm;
31 
32 namespace llvm {
33 
34 class BreakFalseDeps : public MachineFunctionPass {
35 private:
36   MachineFunction *MF;
37   const TargetInstrInfo *TII;
38   const TargetRegisterInfo *TRI;
39   RegisterClassInfo RegClassInfo;
40 
41   /// List of undefined register reads in this block in forward order.
42   std::vector<std::pair<MachineInstr *, unsigned>> UndefReads;
43 
44   /// Storage for register unit liveness.
45   LivePhysRegs LiveRegSet;
46 
47   ReachingDefAnalysis *RDA;
48 
49 public:
50   static char ID; // Pass identification, replacement for typeid
51 
BreakFalseDeps()52   BreakFalseDeps() : MachineFunctionPass(ID) {
53     initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry());
54   }
55 
getAnalysisUsage(AnalysisUsage & AU) const56   void getAnalysisUsage(AnalysisUsage &AU) const override {
57     AU.setPreservesAll();
58     AU.addRequired<ReachingDefAnalysis>();
59     MachineFunctionPass::getAnalysisUsage(AU);
60   }
61 
62   bool runOnMachineFunction(MachineFunction &MF) override;
63 
getRequiredProperties() const64   MachineFunctionProperties getRequiredProperties() const override {
65     return MachineFunctionProperties().set(
66       MachineFunctionProperties::Property::NoVRegs);
67   }
68 
69 private:
70   /// Process he given basic block.
71   void processBasicBlock(MachineBasicBlock *MBB);
72 
73   /// Update def-ages for registers defined by MI.
74   /// Also break dependencies on partial defs and undef uses.
75   void processDefs(MachineInstr *MI);
76 
77   /// Helps avoid false dependencies on undef registers by updating the
78   /// machine instructions' undef operand to use a register that the instruction
79   /// is truly dependent on, or use a register with clearance higher than Pref.
80   /// Returns true if it was able to find a true dependency, thus not requiring
81   /// a dependency breaking instruction regardless of clearance.
82   bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
83     unsigned Pref);
84 
85   /// Return true to if it makes sense to break dependence on a partial
86   /// def or undef use.
87   bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref);
88 
89   /// Break false dependencies on undefined register reads.
90   /// Walk the block backward computing precise liveness. This is expensive, so
91   /// we only do it on demand. Note that the occurrence of undefined register
92   /// reads that should be broken is very rare, but when they occur we may have
93   /// many in a single block.
94   void processUndefReads(MachineBasicBlock *);
95 };
96 
97 } // namespace llvm
98 
99 #define DEBUG_TYPE "break-false-deps"
100 
101 char BreakFalseDeps::ID = 0;
102 INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false)
INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)103 INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)
104 INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false)
105 
106 FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); }
107 
pickBestRegisterForUndef(MachineInstr * MI,unsigned OpIdx,unsigned Pref)108 bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
109   unsigned Pref) {
110   MachineOperand &MO = MI->getOperand(OpIdx);
111   assert(MO.isUndef() && "Expected undef machine operand");
112 
113   unsigned OriginalReg = MO.getReg();
114 
115   // Update only undef operands that have reg units that are mapped to one root.
116   for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) {
117     unsigned NumRoots = 0;
118     for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) {
119       NumRoots++;
120       if (NumRoots > 1)
121         return false;
122     }
123   }
124 
125   // Get the undef operand's register class
126   const TargetRegisterClass *OpRC =
127     TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
128 
129   // If the instruction has a true dependency, we can hide the false depdency
130   // behind it.
131   for (MachineOperand &CurrMO : MI->operands()) {
132     if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
133       !OpRC->contains(CurrMO.getReg()))
134       continue;
135     // We found a true dependency - replace the undef register with the true
136     // dependency.
137     MO.setReg(CurrMO.getReg());
138     return true;
139   }
140 
141   // Go over all registers in the register class and find the register with
142   // max clearance or clearance higher than Pref.
143   unsigned MaxClearance = 0;
144   unsigned MaxClearanceReg = OriginalReg;
145   ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
146   for (MCPhysReg Reg : Order) {
147     unsigned Clearance = RDA->getClearance(MI, Reg);
148     if (Clearance <= MaxClearance)
149       continue;
150     MaxClearance = Clearance;
151     MaxClearanceReg = Reg;
152 
153     if (MaxClearance > Pref)
154       break;
155   }
156 
157   // Update the operand if we found a register with better clearance.
158   if (MaxClearanceReg != OriginalReg)
159     MO.setReg(MaxClearanceReg);
160 
161   return false;
162 }
163 
shouldBreakDependence(MachineInstr * MI,unsigned OpIdx,unsigned Pref)164 bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
165   unsigned Pref) {
166   unsigned reg = MI->getOperand(OpIdx).getReg();
167   unsigned Clearance = RDA->getClearance(MI, reg);
168   LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
169 
170   if (Pref > Clearance) {
171     LLVM_DEBUG(dbgs() << ": Break dependency.\n");
172     return true;
173   }
174   LLVM_DEBUG(dbgs() << ": OK .\n");
175   return false;
176 }
177 
processDefs(MachineInstr * MI)178 void BreakFalseDeps::processDefs(MachineInstr *MI) {
179   assert(!MI->isDebugInstr() && "Won't process debug values");
180 
181   // Break dependence on undef uses. Do this before updating LiveRegs below.
182   unsigned OpNum;
183   unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
184   if (Pref) {
185     bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
186     // We don't need to bother trying to break a dependency if this
187     // instruction has a true dependency on that register through another
188     // operand - we'll have to wait for it to be available regardless.
189     if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
190       UndefReads.push_back(std::make_pair(MI, OpNum));
191   }
192 
193   const MCInstrDesc &MCID = MI->getDesc();
194   for (unsigned i = 0,
195     e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
196     i != e; ++i) {
197     MachineOperand &MO = MI->getOperand(i);
198     if (!MO.isReg() || !MO.getReg())
199       continue;
200     if (MO.isUse())
201       continue;
202     // Check clearance before partial register updates.
203     unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
204     if (Pref && shouldBreakDependence(MI, i, Pref))
205       TII->breakPartialRegDependency(*MI, i, TRI);
206   }
207 }
208 
processUndefReads(MachineBasicBlock * MBB)209 void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {
210   if (UndefReads.empty())
211     return;
212 
213   // Collect this block's live out register units.
214   LiveRegSet.init(*TRI);
215   // We do not need to care about pristine registers as they are just preserved
216   // but not actually used in the function.
217   LiveRegSet.addLiveOutsNoPristines(*MBB);
218 
219   MachineInstr *UndefMI = UndefReads.back().first;
220   unsigned OpIdx = UndefReads.back().second;
221 
222   for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
223     // Update liveness, including the current instruction's defs.
224     LiveRegSet.stepBackward(I);
225 
226     if (UndefMI == &I) {
227       if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
228         TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
229 
230       UndefReads.pop_back();
231       if (UndefReads.empty())
232         return;
233 
234       UndefMI = UndefReads.back().first;
235       OpIdx = UndefReads.back().second;
236     }
237   }
238 }
239 
processBasicBlock(MachineBasicBlock * MBB)240 void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) {
241   UndefReads.clear();
242   // If this block is not done, it makes little sense to make any decisions
243   // based on clearance information. We need to make a second pass anyway,
244   // and by then we'll have better information, so we can avoid doing the work
245   // to try and break dependencies now.
246   for (MachineInstr &MI : *MBB) {
247     if (!MI.isDebugInstr())
248       processDefs(&MI);
249   }
250   processUndefReads(MBB);
251 }
252 
runOnMachineFunction(MachineFunction & mf)253 bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) {
254   if (skipFunction(mf.getFunction()))
255     return false;
256   MF = &mf;
257   TII = MF->getSubtarget().getInstrInfo();
258   TRI = MF->getSubtarget().getRegisterInfo();
259   RDA = &getAnalysis<ReachingDefAnalysis>();
260 
261   RegClassInfo.runOnMachineFunction(mf);
262 
263   LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n");
264 
265   // Traverse the basic blocks.
266   for (MachineBasicBlock &MBB : mf) {
267     processBasicBlock(&MBB);
268   }
269 
270   return false;
271 }
272