1 //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations  ------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Merge the offset of address calculation into the offset field
11 // of instructions in a global address lowering sequence. This pass transforms:
12 //   lui  vreg1, %hi(s)
13 //   addi vreg2, vreg1, %lo(s)
14 //   addi vreg3, verg2, Offset
15 //
16 //   Into:
17 //   lui  vreg1, %hi(s+Offset)
18 //   addi vreg2, vreg1, %lo(s+Offset)
19 //
20 // The transformation is carried out under certain conditions:
21 // 1) The offset field in the base of global address lowering sequence is zero.
22 // 2) The lowered global address has only one use.
23 //
24 // The offset field can be in a different form. This pass handles all of them.
25 //===----------------------------------------------------------------------===//
26 
27 #include "RISCV.h"
28 #include "RISCVTargetMachine.h"
29 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/TargetRegistry.h"
32 #include "llvm/Target/TargetOptions.h"
33 #include <set>
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "riscv-merge-base-offset"
37 #define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset"
38 namespace {
39 
40 struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
41   static char ID;
42   const MachineFunction *MF;
43   bool runOnMachineFunction(MachineFunction &Fn) override;
44   bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI);
45 
46   bool detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI);
47   void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail,
48                   int64_t Offset);
49   bool matchLargeOffset(MachineInstr &TailAdd, unsigned GSReg, int64_t &Offset);
RISCVMergeBaseOffsetOpt__anon641e422f0111::RISCVMergeBaseOffsetOpt50   RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
51 
getRequiredProperties__anon641e422f0111::RISCVMergeBaseOffsetOpt52   MachineFunctionProperties getRequiredProperties() const override {
53     return MachineFunctionProperties().set(
54         MachineFunctionProperties::Property::IsSSA);
55   }
56 
getPassName__anon641e422f0111::RISCVMergeBaseOffsetOpt57   StringRef getPassName() const override {
58     return RISCV_MERGE_BASE_OFFSET_NAME;
59   }
60 
61 private:
62   MachineRegisterInfo *MRI;
63   std::set<MachineInstr *> DeadInstrs;
64 };
65 }; // end anonymous namespace
66 
67 char RISCVMergeBaseOffsetOpt::ID = 0;
68 INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset",
69                 RISCV_MERGE_BASE_OFFSET_NAME, false, false)
70 
71 // Detect the pattern:
72 //   lui   vreg1, %hi(s)
73 //   addi  vreg2, vreg1, %lo(s)
74 //
75 //   Pattern only accepted if:
76 //     1) ADDI has only one use.
77 //     2) LUI has only one use; which is the ADDI.
78 //     3) Both ADDI and LUI have GlobalAddress type which indicates that these
79 //        are generated from global address lowering.
80 //     4) Offset value in the Global Address is 0.
detectLuiAddiGlobal(MachineInstr & HiLUI,MachineInstr * & LoADDI)81 bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI,
82                                                   MachineInstr *&LoADDI) {
83   if (HiLUI.getOpcode() != RISCV::LUI ||
84       HiLUI.getOperand(1).getTargetFlags() != RISCVII::MO_HI ||
85       HiLUI.getOperand(1).getType() != MachineOperand::MO_GlobalAddress ||
86       HiLUI.getOperand(1).getOffset() != 0 ||
87       !MRI->hasOneUse(HiLUI.getOperand(0).getReg()))
88     return false;
89   unsigned HiLuiDestReg = HiLUI.getOperand(0).getReg();
90   LoADDI = MRI->use_begin(HiLuiDestReg)->getParent();
91   if (LoADDI->getOpcode() != RISCV::ADDI ||
92       LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO ||
93       LoADDI->getOperand(2).getType() != MachineOperand::MO_GlobalAddress ||
94       LoADDI->getOperand(2).getOffset() != 0 ||
95       !MRI->hasOneUse(LoADDI->getOperand(0).getReg()))
96     return false;
97   return true;
98 }
99 
100 // Update the offset in HiLUI and LoADDI instructions.
101 // Delete the tail instruction and update all the uses to use the
102 // output from LoADDI.
foldOffset(MachineInstr & HiLUI,MachineInstr & LoADDI,MachineInstr & Tail,int64_t Offset)103 void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI,
104                                          MachineInstr &LoADDI,
105                                          MachineInstr &Tail, int64_t Offset) {
106   // Put the offset back in HiLUI and the LoADDI
107   HiLUI.getOperand(1).setOffset(Offset);
108   LoADDI.getOperand(2).setOffset(Offset);
109   // Delete the tail instruction.
110   DeadInstrs.insert(&Tail);
111   MRI->replaceRegWith(Tail.getOperand(0).getReg(),
112                       LoADDI.getOperand(0).getReg());
113   LLVM_DEBUG(dbgs() << "  Merged offset " << Offset << " into base.\n"
114                     << "     " << HiLUI << "     " << LoADDI;);
115 }
116 
117 // Detect patterns for large offsets that are passed into an ADD instruction.
118 //
119 //                     Base address lowering is of the form:
120 //                        HiLUI:  lui   vreg1, %hi(s)
121 //                       LoADDI:  addi  vreg2, vreg1, %lo(s)
122 //                       /                                  \
123 //                      /                                    \
124 //                     /                                      \
125 //                    /  The large offset can be of two forms: \
126 //  1) Offset that has non zero bits in lower      2) Offset that has non zero
127 //     12 bits and upper 20 bits                      bits in upper 20 bits only
128 //   OffseLUI: lui   vreg3, 4
129 // OffsetTail: addi  voff, vreg3, 188                OffsetTail: lui  voff, 128
130 //                    \                                        /
131 //                     \                                      /
132 //                      \                                    /
133 //                       \                                  /
134 //                         TailAdd: add  vreg4, vreg2, voff
matchLargeOffset(MachineInstr & TailAdd,unsigned GAReg,int64_t & Offset)135 bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
136                                                unsigned GAReg,
137                                                int64_t &Offset) {
138   assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
139   unsigned Rs = TailAdd.getOperand(1).getReg();
140   unsigned Rt = TailAdd.getOperand(2).getReg();
141   unsigned Reg = Rs == GAReg ? Rt : Rs;
142 
143   // Can't fold if the register has more than one use.
144   if (!MRI->hasOneUse(Reg))
145     return false;
146   // This can point to an ADDI or a LUI:
147   MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
148   if (OffsetTail.getOpcode() == RISCV::ADDI) {
149     // The offset value has non zero bits in both %hi and %lo parts.
150     // Detect an ADDI that feeds from a LUI instruction.
151     MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
152     if (AddiImmOp.getTargetFlags() != RISCVII::MO_None)
153       return false;
154     int64_t OffLo = AddiImmOp.getImm();
155     MachineInstr &OffsetLui =
156         *MRI->getVRegDef(OffsetTail.getOperand(1).getReg());
157     MachineOperand &LuiImmOp = OffsetLui.getOperand(1);
158     if (OffsetLui.getOpcode() != RISCV::LUI ||
159         LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
160         !MRI->hasOneUse(OffsetLui.getOperand(0).getReg()))
161       return false;
162     int64_t OffHi = OffsetLui.getOperand(1).getImm();
163     Offset = (OffHi << 12) + OffLo;
164     LLVM_DEBUG(dbgs() << "  Offset Instrs: " << OffsetTail
165                       << "                 " << OffsetLui);
166     DeadInstrs.insert(&OffsetTail);
167     DeadInstrs.insert(&OffsetLui);
168     return true;
169   } else if (OffsetTail.getOpcode() == RISCV::LUI) {
170     // The offset value has all zero bits in the lower 12 bits. Only LUI
171     // exists.
172     LLVM_DEBUG(dbgs() << "  Offset Instr: " << OffsetTail);
173     Offset = OffsetTail.getOperand(1).getImm() << 12;
174     DeadInstrs.insert(&OffsetTail);
175     return true;
176   }
177   return false;
178 }
179 
detectAndFoldOffset(MachineInstr & HiLUI,MachineInstr & LoADDI)180 bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
181                                                   MachineInstr &LoADDI) {
182   unsigned DestReg = LoADDI.getOperand(0).getReg();
183   assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI");
184   // LoADDI has only one use.
185   MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent();
186   switch (Tail.getOpcode()) {
187   default:
188     LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
189                       << Tail);
190     return false;
191   case RISCV::ADDI: {
192     // Offset is simply an immediate operand.
193     int64_t Offset = Tail.getOperand(2).getImm();
194     LLVM_DEBUG(dbgs() << "  Offset Instr: " << Tail);
195     foldOffset(HiLUI, LoADDI, Tail, Offset);
196     return true;
197   } break;
198   case RISCV::ADD: {
199     // The offset is too large to fit in the immediate field of ADDI.
200     // This can be in two forms:
201     // 1) LUI hi_Offset followed by:
202     //    ADDI lo_offset
203     //    This happens in case the offset has non zero bits in
204     //    both hi 20 and lo 12 bits.
205     // 2) LUI (offset20)
206     //    This happens in case the lower 12 bits of the offset are zeros.
207     int64_t Offset;
208     if (!matchLargeOffset(Tail, DestReg, Offset))
209       return false;
210     foldOffset(HiLUI, LoADDI, Tail, Offset);
211     return true;
212   } break;
213   case RISCV::LB:
214   case RISCV::LH:
215   case RISCV::LW:
216   case RISCV::LBU:
217   case RISCV::LHU:
218   case RISCV::LWU:
219   case RISCV::LD:
220   case RISCV::FLW:
221   case RISCV::FLD:
222   case RISCV::SB:
223   case RISCV::SH:
224   case RISCV::SW:
225   case RISCV::SD:
226   case RISCV::FSW:
227   case RISCV::FSD: {
228     // Transforms the sequence:            Into:
229     // HiLUI:  lui vreg1, %hi(foo)          --->  lui vreg1, %hi(foo+8)
230     // LoADDI: addi vreg2, vreg1, %lo(foo)  --->  lw vreg3, lo(foo+8)(vreg1)
231     // Tail:   lw vreg3, 8(vreg2)
232     if (Tail.getOperand(1).isFI())
233       return false;
234     // Register defined by LoADDI should be used in the base part of the
235     // load\store instruction. Otherwise, no folding possible.
236     unsigned BaseAddrReg = Tail.getOperand(1).getReg();
237     if (DestReg != BaseAddrReg)
238       return false;
239     MachineOperand &TailImmOp = Tail.getOperand(2);
240     int64_t Offset = TailImmOp.getImm();
241     // Update the offsets in global address lowering.
242     HiLUI.getOperand(1).setOffset(Offset);
243     // Update the immediate in the Tail instruction to add the offset.
244     Tail.RemoveOperand(2);
245     MachineOperand &ImmOp = LoADDI.getOperand(2);
246     ImmOp.setOffset(Offset);
247     Tail.addOperand(ImmOp);
248     // Update the base reg in the Tail instruction to feed from LUI.
249     // Output of HiLUI is only used in LoADDI, no need to use
250     // MRI->replaceRegWith().
251     Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg());
252     DeadInstrs.insert(&LoADDI);
253     return true;
254   } break;
255   }
256   return false;
257 }
258 
runOnMachineFunction(MachineFunction & Fn)259 bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
260   if (skipFunction(Fn.getFunction()))
261     return false;
262 
263   DeadInstrs.clear();
264   MRI = &Fn.getRegInfo();
265   for (MachineBasicBlock &MBB : Fn) {
266     LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
267     for (MachineInstr &HiLUI : MBB) {
268       MachineInstr *LoADDI = nullptr;
269       if (!detectLuiAddiGlobal(HiLUI, LoADDI))
270         continue;
271       LLVM_DEBUG(dbgs() << "  Found lowered global address with one use: "
272                         << *LoADDI->getOperand(2).getGlobal() << "\n");
273       // If the use count is only one, merge the offset
274       detectAndFoldOffset(HiLUI, *LoADDI);
275     }
276   }
277   // Delete dead instructions.
278   for (auto *MI : DeadInstrs)
279     MI->eraseFromParent();
280   return true;
281 }
282 
283 /// Returns an instance of the Merge Base Offset Optimization pass.
createRISCVMergeBaseOffsetOptPass()284 FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() {
285   return new RISCVMergeBaseOffsetOpt();
286 }
287