1 //===-- LanaiMemAluCombiner.cpp - Pass to combine memory & ALU operations -===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // Simple pass to combine memory and ALU operations
10 //
11 // The Lanai ISA supports instructions where a load/store modifies the base
12 // register used in the load/store operation. This pass finds suitable
13 // load/store and ALU instructions and combines them into one instruction.
14 //
15 // For example,
16 //   ld [ %r6 -- ], %r12
17 // is a supported instruction that is not currently generated by the instruction
18 // selection pass of this backend. This pass generates these instructions by
19 // merging
20 //   add %r6, -4, %r6
21 // followed by
22 //   ld [ %r6 ], %r12
23 // in the same machine basic block into one machine instruction.
24 //===----------------------------------------------------------------------===//
25 
26 #include "Lanai.h"
27 #include "LanaiTargetMachine.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/RegisterScavenging.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Target/TargetInstrInfo.h"
35 using namespace llvm;
36 
37 #define GET_INSTRMAP_INFO
38 #include "LanaiGenInstrInfo.inc"
39 
40 #define DEBUG_TYPE "lanai-mem-alu-combiner"
41 
42 STATISTIC(NumLdStAluCombined, "Number of memory and ALU instructions combined");
43 
44 static llvm::cl::opt<bool> DisableMemAluCombiner(
45     "disable-lanai-mem-alu-combiner", llvm::cl::init(false),
46     llvm::cl::desc("Do not combine ALU and memory operators"),
47     llvm::cl::Hidden);
48 
49 namespace llvm {
50 void initializeLanaiMemAluCombinerPass(PassRegistry &);
51 } // namespace llvm
52 
53 namespace {
54 typedef MachineBasicBlock::iterator MbbIterator;
55 typedef MachineFunction::iterator MfIterator;
56 
57 class LanaiMemAluCombiner : public MachineFunctionPass {
58 public:
59   static char ID;
LanaiMemAluCombiner()60   explicit LanaiMemAluCombiner() : MachineFunctionPass(ID) {
61     initializeLanaiMemAluCombinerPass(*PassRegistry::getPassRegistry());
62   }
63 
getPassName() const64   const char *getPassName() const override {
65     return "Lanai load / store optimization pass";
66   }
67 
68   bool runOnMachineFunction(MachineFunction &F) override;
69 
getRequiredProperties() const70   MachineFunctionProperties getRequiredProperties() const override {
71     return MachineFunctionProperties().set(
72         MachineFunctionProperties::Property::AllVRegsAllocated);
73   }
74 
75 private:
76   MbbIterator findClosestSuitableAluInstr(MachineBasicBlock *BB,
77                                           const MbbIterator &MemInstr,
78                                           bool Decrement);
79   void insertMergedInstruction(MachineBasicBlock *BB,
80                                const MbbIterator &MemInstr,
81                                const MbbIterator &AluInstr, bool Before);
82   bool combineMemAluInBasicBlock(MachineBasicBlock *BB);
83 
84   // Target machine description which we query for register names, data
85   // layout, etc.
86   const TargetInstrInfo *TII;
87 };
88 } // namespace
89 
90 char LanaiMemAluCombiner::ID = 0;
91 
92 INITIALIZE_PASS(LanaiMemAluCombiner, DEBUG_TYPE,
93                 "Lanai memory ALU combiner pass", false, false)
94 
95 namespace {
isSpls(uint16_t Opcode)96 bool isSpls(uint16_t Opcode) { return Lanai::splsIdempotent(Opcode) == Opcode; }
97 
98 // Determine the opcode for the merged instruction created by considering the
99 // old memory operation's opcode and whether the merged opcode will have an
100 // immediate offset.
mergedOpcode(unsigned OldOpcode,bool ImmediateOffset)101 unsigned mergedOpcode(unsigned OldOpcode, bool ImmediateOffset) {
102   switch (OldOpcode) {
103   case Lanai::LDW_RI:
104   case Lanai::LDW_RR:
105     if (ImmediateOffset)
106       return Lanai::LDW_RI;
107     return Lanai::LDW_RR;
108   case Lanai::LDHs_RI:
109   case Lanai::LDHs_RR:
110     if (ImmediateOffset)
111       return Lanai::LDHs_RI;
112     return Lanai::LDHs_RR;
113   case Lanai::LDHz_RI:
114   case Lanai::LDHz_RR:
115     if (ImmediateOffset)
116       return Lanai::LDHz_RI;
117     return Lanai::LDHz_RR;
118   case Lanai::LDBs_RI:
119   case Lanai::LDBs_RR:
120     if (ImmediateOffset)
121       return Lanai::LDBs_RI;
122     return Lanai::LDBs_RR;
123   case Lanai::LDBz_RI:
124   case Lanai::LDBz_RR:
125     if (ImmediateOffset)
126       return Lanai::LDBz_RI;
127     return Lanai::LDBz_RR;
128   case Lanai::SW_RI:
129   case Lanai::SW_RR:
130     if (ImmediateOffset)
131       return Lanai::SW_RI;
132     return Lanai::SW_RR;
133   case Lanai::STB_RI:
134   case Lanai::STB_RR:
135     if (ImmediateOffset)
136       return Lanai::STB_RI;
137     return Lanai::STB_RR;
138   case Lanai::STH_RI:
139   case Lanai::STH_RR:
140     if (ImmediateOffset)
141       return Lanai::STH_RI;
142     return Lanai::STH_RR;
143   default:
144     return 0;
145   }
146 }
147 
148 // Check if the machine instruction has non-volatile memory operands of the type
149 // supported for combining with ALU instructions.
isNonVolatileMemoryOp(const MachineInstr & MI)150 bool isNonVolatileMemoryOp(const MachineInstr &MI) {
151   if (!MI.hasOneMemOperand())
152     return false;
153 
154   // Determine if the machine instruction is a supported memory operation by
155   // testing if the computed merge opcode is a valid memory operation opcode.
156   if (mergedOpcode(MI.getOpcode(), false) == 0)
157     return false;
158 
159   const MachineMemOperand *MemOperand = *MI.memoperands_begin();
160 
161   // Don't move volatile memory accesses
162   if (MemOperand->isVolatile())
163     return false;
164 
165   return true;
166 }
167 
168 // Test to see if two machine operands are of the same type. This test is less
169 // strict than the MachineOperand::isIdenticalTo function.
isSameOperand(const MachineOperand & Op1,const MachineOperand & Op2)170 bool isSameOperand(const MachineOperand &Op1, const MachineOperand &Op2) {
171   if (Op1.getType() != Op2.getType())
172     return false;
173 
174   switch (Op1.getType()) {
175   case MachineOperand::MO_Register:
176     return Op1.getReg() == Op2.getReg();
177   case MachineOperand::MO_Immediate:
178     return Op1.getImm() == Op2.getImm();
179   default:
180     return false;
181   }
182 }
183 
isZeroOperand(const MachineOperand & Op)184 bool isZeroOperand(const MachineOperand &Op) {
185   return ((Op.isReg() && Op.getReg() == Lanai::R0) ||
186           (Op.isImm() && Op.getImm() == 0));
187 }
188 
189 // Determines whether a register is used by an instruction.
InstrUsesReg(const MbbIterator & Instr,const MachineOperand * Reg)190 bool InstrUsesReg(const MbbIterator &Instr, const MachineOperand *Reg) {
191   for (MachineInstr::const_mop_iterator Mop = Instr->operands_begin();
192        Mop != Instr->operands_end(); ++Mop) {
193     if (isSameOperand(*Mop, *Reg))
194       return true;
195   }
196   return false;
197 }
198 
199 // Converts between machine opcode and AluCode.
200 // Flag using/modifying ALU operations should not be considered for merging and
201 // are omitted from this list.
mergedAluCode(unsigned AluOpcode)202 LPAC::AluCode mergedAluCode(unsigned AluOpcode) {
203   switch (AluOpcode) {
204   case Lanai::ADD_I_LO:
205   case Lanai::ADD_R:
206     return LPAC::ADD;
207   case Lanai::SUB_I_LO:
208   case Lanai::SUB_R:
209     return LPAC::SUB;
210   case Lanai::AND_I_LO:
211   case Lanai::AND_R:
212     return LPAC::AND;
213   case Lanai::OR_I_LO:
214   case Lanai::OR_R:
215     return LPAC::OR;
216   case Lanai::XOR_I_LO:
217   case Lanai::XOR_R:
218     return LPAC::XOR;
219   case Lanai::SHL_R:
220     return LPAC::SHL;
221   case Lanai::SRL_R:
222     return LPAC::SRL;
223   case Lanai::SRA_R:
224     return LPAC::SRA;
225   case Lanai::SA_I:
226   case Lanai::SL_I:
227   default:
228     return LPAC::UNKNOWN;
229   }
230 }
231 
232 // Insert a new combined memory and ALU operation instruction.
233 //
234 // This function builds a new machine instruction using the MachineInstrBuilder
235 // class and inserts it before the memory instruction.
insertMergedInstruction(MachineBasicBlock * BB,const MbbIterator & MemInstr,const MbbIterator & AluInstr,bool Before)236 void LanaiMemAluCombiner::insertMergedInstruction(MachineBasicBlock *BB,
237                                                   const MbbIterator &MemInstr,
238                                                   const MbbIterator &AluInstr,
239                                                   bool Before) {
240   // Insert new combined load/store + alu operation
241   MachineOperand Dest = MemInstr->getOperand(0);
242   MachineOperand Base = MemInstr->getOperand(1);
243   MachineOperand MemOffset = MemInstr->getOperand(2);
244   MachineOperand AluOffset = AluInstr->getOperand(2);
245 
246   // Abort if ALU offset is not a register or immediate
247   assert((AluOffset.isReg() || AluOffset.isImm()) &&
248          "Unsupported operand type in merge");
249 
250   // Determined merged instructions opcode and ALU code
251   LPAC::AluCode AluOpcode = mergedAluCode(AluInstr->getOpcode());
252   unsigned NewOpc = mergedOpcode(MemInstr->getOpcode(), AluOffset.isImm());
253 
254   assert(AluOpcode != LPAC::UNKNOWN && "Unknown ALU code in merging");
255   assert(NewOpc != 0 && "Unknown merged node opcode");
256 
257   // Build and insert new machine instruction
258   MachineInstrBuilder InstrBuilder =
259       BuildMI(*BB, MemInstr, MemInstr->getDebugLoc(), TII->get(NewOpc));
260   InstrBuilder.addReg(Dest.getReg(), getDefRegState(true));
261   InstrBuilder.addReg(Base.getReg(), getKillRegState(true));
262 
263   // Add offset to machine instruction
264   if (AluOffset.isReg())
265     InstrBuilder.addReg(AluOffset.getReg());
266   else if (AluOffset.isImm())
267     InstrBuilder.addImm(AluOffset.getImm());
268   else
269     llvm_unreachable("Unsupported ld/st ALU merge.");
270 
271   // Create a pre-op if the ALU operation preceded the memory operation or the
272   // MemOffset is non-zero (i.e. the memory value should be adjusted before
273   // accessing it), else create a post-op.
274   if (Before || !isZeroOperand(MemOffset))
275     InstrBuilder.addImm(LPAC::makePreOp(AluOpcode));
276   else
277     InstrBuilder.addImm(LPAC::makePostOp(AluOpcode));
278 
279   // Transfer memory operands.
280   InstrBuilder->setMemRefs(MemInstr->memoperands_begin(),
281                            MemInstr->memoperands_end());
282 }
283 
284 // Function determines if ALU operation (in alu_iter) can be combined with
285 // a load/store with base and offset.
isSuitableAluInstr(bool IsSpls,const MbbIterator & AluIter,const MachineOperand & Base,const MachineOperand & Offset)286 bool isSuitableAluInstr(bool IsSpls, const MbbIterator &AluIter,
287                         const MachineOperand &Base,
288                         const MachineOperand &Offset) {
289   // ALU operations have 3 operands
290   if (AluIter->getNumOperands() != 3)
291     return false;
292 
293   MachineOperand &Dest = AluIter->getOperand(0);
294   MachineOperand &Op1 = AluIter->getOperand(1);
295   MachineOperand &Op2 = AluIter->getOperand(2);
296 
297   // Only match instructions using the base register as destination and with the
298   // base and first operand equal
299   if (!isSameOperand(Dest, Base) || !isSameOperand(Dest, Op1))
300     return false;
301 
302   if (Op2.isImm()) {
303     // It is not a match if the 2nd operand in the ALU operation is an
304     // immediate but the ALU operation is not an addition.
305     if (AluIter->getOpcode() != Lanai::ADD_I_LO)
306       return false;
307 
308     if (Offset.isReg() && Offset.getReg() == Lanai::R0)
309       return true;
310 
311     if (Offset.isImm() &&
312         ((Offset.getImm() == 0 &&
313           // Check that the Op2 would fit in the immediate field of the
314           // memory operation.
315           ((IsSpls && isInt<10>(Op2.getImm())) ||
316            (!IsSpls && isInt<16>(Op2.getImm())))) ||
317          Offset.getImm() == Op2.getImm()))
318       return true;
319   } else if (Op2.isReg()) {
320     // The Offset and 2nd operand are both registers and equal
321     if (Offset.isReg() && Op2.getReg() == Offset.getReg())
322       return true;
323   } else
324     // Only consider operations with register or immediate values
325     return false;
326 
327   return false;
328 }
329 
findClosestSuitableAluInstr(MachineBasicBlock * BB,const MbbIterator & MemInstr,const bool Decrement)330 MbbIterator LanaiMemAluCombiner::findClosestSuitableAluInstr(
331     MachineBasicBlock *BB, const MbbIterator &MemInstr, const bool Decrement) {
332   MachineOperand *Base = &MemInstr->getOperand(1);
333   MachineOperand *Offset = &MemInstr->getOperand(2);
334   bool IsSpls = isSpls(MemInstr->getOpcode());
335 
336   MbbIterator First = MemInstr;
337   MbbIterator Last = Decrement ? BB->begin() : BB->end();
338 
339   while (First != Last) {
340     Decrement ? --First : ++First;
341 
342     // Skip over debug instructions
343     if (First->isDebugValue())
344       continue;
345 
346     if (isSuitableAluInstr(IsSpls, First, *Base, *Offset)) {
347       return First;
348     }
349 
350     // Usage of the base or offset register is not a form suitable for merging.
351     if (First != Last) {
352       if (InstrUsesReg(First, Base))
353         break;
354       if (Offset->isReg() && InstrUsesReg(First, Offset))
355         break;
356     }
357   }
358 
359   return MemInstr;
360 }
361 
combineMemAluInBasicBlock(MachineBasicBlock * BB)362 bool LanaiMemAluCombiner::combineMemAluInBasicBlock(MachineBasicBlock *BB) {
363   bool Modified = false;
364 
365   MbbIterator MBBIter = BB->begin(), End = BB->end();
366   while (MBBIter != End) {
367     bool IsMemOp = isNonVolatileMemoryOp(*MBBIter);
368 
369     if (IsMemOp) {
370       MachineOperand AluOperand = MBBIter->getOperand(3);
371       unsigned int DestReg = MBBIter->getOperand(0).getReg(),
372                    BaseReg = MBBIter->getOperand(1).getReg();
373       assert(AluOperand.isImm() && "Unexpected memory operator type");
374       LPAC::AluCode AluOpcode = static_cast<LPAC::AluCode>(AluOperand.getImm());
375 
376       // Skip memory operations that already modify the base register or if
377       // the destination and base register are the same
378       if (!LPAC::modifiesOp(AluOpcode) && DestReg != BaseReg) {
379         for (int Inc = 0; Inc <= 1; ++Inc) {
380           MbbIterator AluIter =
381               findClosestSuitableAluInstr(BB, MBBIter, Inc == 0);
382           if (AluIter != MBBIter) {
383             insertMergedInstruction(BB, MBBIter, AluIter, Inc == 0);
384 
385             ++NumLdStAluCombined;
386             Modified = true;
387 
388             // Erase the matching ALU instruction
389             BB->erase(AluIter);
390             // Erase old load/store instruction
391             BB->erase(MBBIter++);
392             break;
393           }
394         }
395       }
396     }
397     if (MBBIter == End)
398       break;
399     ++MBBIter;
400   }
401 
402   return Modified;
403 }
404 
405 // Driver function that iterates over the machine basic building blocks of a
406 // machine function
runOnMachineFunction(MachineFunction & MF)407 bool LanaiMemAluCombiner::runOnMachineFunction(MachineFunction &MF) {
408   if (DisableMemAluCombiner)
409     return false;
410 
411   TII = MF.getSubtarget<LanaiSubtarget>().getInstrInfo();
412   bool Modified = false;
413   for (MfIterator MFI = MF.begin(); MFI != MF.end(); ++MFI) {
414     Modified |= combineMemAluInBasicBlock(&*MFI);
415   }
416   return Modified;
417 }
418 } // namespace
419 
createLanaiMemAluCombinerPass()420 FunctionPass *llvm::createLanaiMemAluCombinerPass() {
421   return new LanaiMemAluCombiner();
422 }
423