1 //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
11 // of a MachineFunction.
12 //
13 //   mov %SPL, %depot
14 //   cvta.local %SP, %SPL
15 //
16 // Because Frame Index is a generic address and alloca can only return generic
17 // pointer, without this pass the instructions producing alloca'ed address will
18 // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
19 // this address with their .local versions, but this may introduce a lot of
20 // cvta.to.local instructions. Performance can be improved if we avoid casting
21 // address back and forth and directly calculate local address based on %SPL.
22 // This peephole pass optimizes these cases, for example
23 //
24 // It will transform the following pattern
25 //    %vreg0<def> = LEA_ADDRi64 %VRFrame, 4
26 //    %vreg1<def> = cvta_to_local_yes_64 %vreg0
27 //
28 // into
29 //    %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4
30 //
31 // %VRFrameLocal is the virtual register name of %SPL
32 //
33 //===----------------------------------------------------------------------===//
34 
35 #include "NVPTX.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstrBuilder.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/Target/TargetRegisterInfo.h"
40 #include "llvm/Target/TargetInstrInfo.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "nvptx-peephole"
45 
46 namespace llvm {
47 void initializeNVPTXPeepholePass(PassRegistry &);
48 }
49 
50 namespace {
51 struct NVPTXPeephole : public MachineFunctionPass {
52  public:
53   static char ID;
NVPTXPeephole__anon739b03090111::NVPTXPeephole54   NVPTXPeephole() : MachineFunctionPass(ID) {
55     initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
56   }
57 
58   bool runOnMachineFunction(MachineFunction &MF) override;
59 
getPassName__anon739b03090111::NVPTXPeephole60   const char *getPassName() const override {
61     return "NVPTX optimize redundant cvta.to.local instruction";
62   }
63 
getAnalysisUsage__anon739b03090111::NVPTXPeephole64   void getAnalysisUsage(AnalysisUsage &AU) const override {
65     MachineFunctionPass::getAnalysisUsage(AU);
66   }
67 };
68 }
69 
70 char NVPTXPeephole::ID = 0;
71 
72 INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
73 
isCVTAToLocalCombinationCandidate(MachineInstr & Root)74 static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
75   auto &MBB = *Root.getParent();
76   auto &MF = *MBB.getParent();
77   // Check current instruction is cvta.to.local
78   if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
79       Root.getOpcode() != NVPTX::cvta_to_local_yes)
80     return false;
81 
82   auto &Op = Root.getOperand(1);
83   const auto &MRI = MF.getRegInfo();
84   MachineInstr *GenericAddrDef = nullptr;
85   if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
86     GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
87   }
88 
89   // Check the register operand is uniquely defined by LEA_ADDRi instruction
90   if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
91       (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
92        GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
93     return false;
94   }
95 
96   // Check the LEA_ADDRi operand is Frame index
97   auto &BaseAddrOp = GenericAddrDef->getOperand(1);
98   if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
99     return true;
100   }
101 
102   return false;
103 }
104 
CombineCVTAToLocal(MachineInstr & Root)105 static void CombineCVTAToLocal(MachineInstr &Root) {
106   auto &MBB = *Root.getParent();
107   auto &MF = *MBB.getParent();
108   const auto &MRI = MF.getRegInfo();
109   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
110   auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
111 
112   MachineInstrBuilder MIB =
113       BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
114               Root.getOperand(0).getReg())
115           .addReg(NVPTX::VRFrameLocal)
116           .addOperand(Prev.getOperand(2));
117 
118   MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
119 
120   // Check if MRI has only one non dbg use, which is Root
121   if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
122     Prev.eraseFromParentAndMarkDBGValuesForRemoval();
123   }
124   Root.eraseFromParentAndMarkDBGValuesForRemoval();
125 }
126 
runOnMachineFunction(MachineFunction & MF)127 bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
128   bool Changed = false;
129   // Loop over all of the basic blocks.
130   for (auto &MBB : MF) {
131     // Traverse the basic block.
132     auto BlockIter = MBB.begin();
133 
134     while (BlockIter != MBB.end()) {
135       auto &MI = *BlockIter++;
136       if (isCVTAToLocalCombinationCandidate(MI)) {
137         CombineCVTAToLocal(MI);
138         Changed = true;
139       }
140     }  // Instruction
141   }    // Basic Block
142 
143   // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
144   const auto &MRI = MF.getRegInfo();
145   if (MRI.use_empty(NVPTX::VRFrame)) {
146     if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
147       MI->eraseFromParentAndMarkDBGValuesForRemoval();
148     }
149   }
150 
151   return Changed;
152 }
153 
createNVPTXPeephole()154 MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
155