1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "ARM.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Target/TargetInstrInfo.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Target/TargetRegisterInfo.h"
45 using namespace llvm;
46
47 #define DEBUG_TYPE "arm-ldst-opt"
48
49 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
50 STATISTIC(NumSTMGened , "Number of stm instructions generated");
51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
60
61 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
62 /// load / store instructions to form ldm / stm instructions.
63
64 namespace {
65 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 static char ID;
ARMLoadStoreOpt__anon4bf681e70111::ARMLoadStoreOpt67 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68
69 const TargetInstrInfo *TII;
70 const TargetRegisterInfo *TRI;
71 const ARMSubtarget *STI;
72 const TargetLowering *TL;
73 ARMFunctionInfo *AFI;
74 RegScavenger *RS;
75 bool isThumb1, isThumb2;
76
77 bool runOnMachineFunction(MachineFunction &Fn) override;
78
getPassName__anon4bf681e70111::ARMLoadStoreOpt79 const char *getPassName() const override {
80 return "ARM load / store optimization pass";
81 }
82
83 private:
84 struct MemOpQueueEntry {
85 int Offset;
86 unsigned Reg;
87 bool isKill;
88 unsigned Position;
89 MachineBasicBlock::iterator MBBI;
90 bool Merged;
MemOpQueueEntry__anon4bf681e70111::ARMLoadStoreOpt::MemOpQueueEntry91 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
92 MachineBasicBlock::iterator i)
93 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 };
95 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
96 typedef MemOpQueue::iterator MemOpQueueIter;
97
98 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
99 const MemOpQueue &MemOps, unsigned DefReg,
100 unsigned RangeBegin, unsigned RangeEnd);
101 void UpdateBaseRegUses(MachineBasicBlock &MBB,
102 MachineBasicBlock::iterator MBBI,
103 DebugLoc dl, unsigned Base, unsigned WordOffset,
104 ARMCC::CondCodes Pred, unsigned PredReg);
105 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
106 int Offset, unsigned Base, bool BaseKill, int Opcode,
107 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108 DebugLoc dl,
109 ArrayRef<std::pair<unsigned, bool> > Regs,
110 ArrayRef<unsigned> ImpDefs);
111 void MergeOpsUpdate(MachineBasicBlock &MBB,
112 MemOpQueue &MemOps,
113 unsigned memOpsBegin,
114 unsigned memOpsEnd,
115 unsigned insertAfter,
116 int Offset,
117 unsigned Base,
118 bool BaseKill,
119 int Opcode,
120 ARMCC::CondCodes Pred,
121 unsigned PredReg,
122 unsigned Scratch,
123 DebugLoc dl,
124 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
125 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
126 int Opcode, unsigned Size,
127 ARMCC::CondCodes Pred, unsigned PredReg,
128 unsigned Scratch, MemOpQueue &MemOps,
129 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
130 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
131 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
132 MachineBasicBlock::iterator &MBBI);
133 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator MBBI,
135 const TargetInstrInfo *TII,
136 bool &Advance,
137 MachineBasicBlock::iterator &I);
138 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
139 MachineBasicBlock::iterator MBBI,
140 bool &Advance,
141 MachineBasicBlock::iterator &I);
142 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
143 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144 };
145 char ARMLoadStoreOpt::ID = 0;
146 }
147
definesCPSR(const MachineInstr * MI)148 static bool definesCPSR(const MachineInstr *MI) {
149 for (const auto &MO : MI->operands()) {
150 if (!MO.isReg())
151 continue;
152 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
153 // If the instruction has live CPSR def, then it's not safe to fold it
154 // into load / store.
155 return true;
156 }
157
158 return false;
159 }
160
getMemoryOpOffset(const MachineInstr * MI)161 static int getMemoryOpOffset(const MachineInstr *MI) {
162 int Opcode = MI->getOpcode();
163 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
164 unsigned NumOperands = MI->getDesc().getNumOperands();
165 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
166
167 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
168 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
169 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
170 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
171 return OffField;
172
173 // Thumb1 immediate offsets are scaled by 4
174 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
175 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
176 return OffField * 4;
177
178 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
179 : ARM_AM::getAM5Offset(OffField) * 4;
180 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
181 : ARM_AM::getAM5Op(OffField);
182
183 if (Op == ARM_AM::sub)
184 return -Offset;
185
186 return Offset;
187 }
188
getLoadStoreMultipleOpcode(int Opcode,ARM_AM::AMSubMode Mode)189 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
190 switch (Opcode) {
191 default: llvm_unreachable("Unhandled opcode!");
192 case ARM::LDRi12:
193 ++NumLDMGened;
194 switch (Mode) {
195 default: llvm_unreachable("Unhandled submode!");
196 case ARM_AM::ia: return ARM::LDMIA;
197 case ARM_AM::da: return ARM::LDMDA;
198 case ARM_AM::db: return ARM::LDMDB;
199 case ARM_AM::ib: return ARM::LDMIB;
200 }
201 case ARM::STRi12:
202 ++NumSTMGened;
203 switch (Mode) {
204 default: llvm_unreachable("Unhandled submode!");
205 case ARM_AM::ia: return ARM::STMIA;
206 case ARM_AM::da: return ARM::STMDA;
207 case ARM_AM::db: return ARM::STMDB;
208 case ARM_AM::ib: return ARM::STMIB;
209 }
210 case ARM::tLDRi:
211 case ARM::tLDRspi:
212 // tLDMIA is writeback-only - unless the base register is in the input
213 // reglist.
214 ++NumLDMGened;
215 switch (Mode) {
216 default: llvm_unreachable("Unhandled submode!");
217 case ARM_AM::ia: return ARM::tLDMIA;
218 }
219 case ARM::tSTRi:
220 case ARM::tSTRspi:
221 // There is no non-writeback tSTMIA either.
222 ++NumSTMGened;
223 switch (Mode) {
224 default: llvm_unreachable("Unhandled submode!");
225 case ARM_AM::ia: return ARM::tSTMIA_UPD;
226 }
227 case ARM::t2LDRi8:
228 case ARM::t2LDRi12:
229 ++NumLDMGened;
230 switch (Mode) {
231 default: llvm_unreachable("Unhandled submode!");
232 case ARM_AM::ia: return ARM::t2LDMIA;
233 case ARM_AM::db: return ARM::t2LDMDB;
234 }
235 case ARM::t2STRi8:
236 case ARM::t2STRi12:
237 ++NumSTMGened;
238 switch (Mode) {
239 default: llvm_unreachable("Unhandled submode!");
240 case ARM_AM::ia: return ARM::t2STMIA;
241 case ARM_AM::db: return ARM::t2STMDB;
242 }
243 case ARM::VLDRS:
244 ++NumVLDMGened;
245 switch (Mode) {
246 default: llvm_unreachable("Unhandled submode!");
247 case ARM_AM::ia: return ARM::VLDMSIA;
248 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
249 }
250 case ARM::VSTRS:
251 ++NumVSTMGened;
252 switch (Mode) {
253 default: llvm_unreachable("Unhandled submode!");
254 case ARM_AM::ia: return ARM::VSTMSIA;
255 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
256 }
257 case ARM::VLDRD:
258 ++NumVLDMGened;
259 switch (Mode) {
260 default: llvm_unreachable("Unhandled submode!");
261 case ARM_AM::ia: return ARM::VLDMDIA;
262 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
263 }
264 case ARM::VSTRD:
265 ++NumVSTMGened;
266 switch (Mode) {
267 default: llvm_unreachable("Unhandled submode!");
268 case ARM_AM::ia: return ARM::VSTMDIA;
269 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
270 }
271 }
272 }
273
274 namespace llvm {
275 namespace ARM_AM {
276
getLoadStoreMultipleSubMode(int Opcode)277 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
278 switch (Opcode) {
279 default: llvm_unreachable("Unhandled opcode!");
280 case ARM::LDMIA_RET:
281 case ARM::LDMIA:
282 case ARM::LDMIA_UPD:
283 case ARM::STMIA:
284 case ARM::STMIA_UPD:
285 case ARM::tLDMIA:
286 case ARM::tLDMIA_UPD:
287 case ARM::tSTMIA_UPD:
288 case ARM::t2LDMIA_RET:
289 case ARM::t2LDMIA:
290 case ARM::t2LDMIA_UPD:
291 case ARM::t2STMIA:
292 case ARM::t2STMIA_UPD:
293 case ARM::VLDMSIA:
294 case ARM::VLDMSIA_UPD:
295 case ARM::VSTMSIA:
296 case ARM::VSTMSIA_UPD:
297 case ARM::VLDMDIA:
298 case ARM::VLDMDIA_UPD:
299 case ARM::VSTMDIA:
300 case ARM::VSTMDIA_UPD:
301 return ARM_AM::ia;
302
303 case ARM::LDMDA:
304 case ARM::LDMDA_UPD:
305 case ARM::STMDA:
306 case ARM::STMDA_UPD:
307 return ARM_AM::da;
308
309 case ARM::LDMDB:
310 case ARM::LDMDB_UPD:
311 case ARM::STMDB:
312 case ARM::STMDB_UPD:
313 case ARM::t2LDMDB:
314 case ARM::t2LDMDB_UPD:
315 case ARM::t2STMDB:
316 case ARM::t2STMDB_UPD:
317 case ARM::VLDMSDB_UPD:
318 case ARM::VSTMSDB_UPD:
319 case ARM::VLDMDDB_UPD:
320 case ARM::VSTMDDB_UPD:
321 return ARM_AM::db;
322
323 case ARM::LDMIB:
324 case ARM::LDMIB_UPD:
325 case ARM::STMIB:
326 case ARM::STMIB_UPD:
327 return ARM_AM::ib;
328 }
329 }
330
331 } // end namespace ARM_AM
332 } // end namespace llvm
333
isT1i32Load(unsigned Opc)334 static bool isT1i32Load(unsigned Opc) {
335 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
336 }
337
isT2i32Load(unsigned Opc)338 static bool isT2i32Load(unsigned Opc) {
339 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
340 }
341
isi32Load(unsigned Opc)342 static bool isi32Load(unsigned Opc) {
343 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
344 }
345
isT1i32Store(unsigned Opc)346 static bool isT1i32Store(unsigned Opc) {
347 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
348 }
349
isT2i32Store(unsigned Opc)350 static bool isT2i32Store(unsigned Opc) {
351 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
352 }
353
isi32Store(unsigned Opc)354 static bool isi32Store(unsigned Opc) {
355 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
356 }
357
getImmScale(unsigned Opc)358 static unsigned getImmScale(unsigned Opc) {
359 switch (Opc) {
360 default: llvm_unreachable("Unhandled opcode!");
361 case ARM::tLDRi:
362 case ARM::tSTRi:
363 case ARM::tLDRspi:
364 case ARM::tSTRspi:
365 return 1;
366 case ARM::tLDRHi:
367 case ARM::tSTRHi:
368 return 2;
369 case ARM::tLDRBi:
370 case ARM::tSTRBi:
371 return 4;
372 }
373 }
374
375 /// Update future uses of the base register with the offset introduced
376 /// due to writeback. This function only works on Thumb1.
377 void
UpdateBaseRegUses(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc dl,unsigned Base,unsigned WordOffset,ARMCC::CondCodes Pred,unsigned PredReg)378 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
379 MachineBasicBlock::iterator MBBI,
380 DebugLoc dl, unsigned Base,
381 unsigned WordOffset,
382 ARMCC::CondCodes Pred, unsigned PredReg) {
383 assert(isThumb1 && "Can only update base register uses for Thumb1!");
384 // Start updating any instructions with immediate offsets. Insert a SUB before
385 // the first non-updateable instruction (if any).
386 for (; MBBI != MBB.end(); ++MBBI) {
387 bool InsertSub = false;
388 unsigned Opc = MBBI->getOpcode();
389
390 if (MBBI->readsRegister(Base)) {
391 int Offset;
392 bool IsLoad =
393 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
394 bool IsStore =
395 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
396
397 if (IsLoad || IsStore) {
398 // Loads and stores with immediate offsets can be updated, but only if
399 // the new offset isn't negative.
400 // The MachineOperand containing the offset immediate is the last one
401 // before predicates.
402 MachineOperand &MO =
403 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
404 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
405 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
406
407 // If storing the base register, it needs to be reset first.
408 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
409
410 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
411 MO.setImm(Offset);
412 else
413 InsertSub = true;
414
415 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
416 !definesCPSR(MBBI)) {
417 // SUBS/ADDS using this register, with a dead def of the CPSR.
418 // Merge it with the update; if the merged offset is too large,
419 // insert a new sub instead.
420 MachineOperand &MO =
421 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
422 Offset = (Opc == ARM::tSUBi8) ?
423 MO.getImm() + WordOffset * 4 :
424 MO.getImm() - WordOffset * 4 ;
425 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
426 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
427 // Offset == 0.
428 MO.setImm(Offset);
429 // The base register has now been reset, so exit early.
430 return;
431 } else {
432 InsertSub = true;
433 }
434
435 } else {
436 // Can't update the instruction.
437 InsertSub = true;
438 }
439
440 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
441 // Since SUBS sets the condition flags, we can't place the base reset
442 // after an instruction that has a live CPSR def.
443 // The base register might also contain an argument for a function call.
444 InsertSub = true;
445 }
446
447 if (InsertSub) {
448 // An instruction above couldn't be updated, so insert a sub.
449 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
450 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
451 .addImm(Pred).addReg(PredReg);
452 return;
453 }
454
455 if (MBBI->killsRegister(Base))
456 // Register got killed. Stop updating.
457 return;
458 }
459
460 // End of block was reached.
461 if (MBB.succ_size() > 0) {
462 // FIXME: Because of a bug, live registers are sometimes missing from
463 // the successor blocks' live-in sets. This means we can't trust that
464 // information and *always* have to reset at the end of a block.
465 // See PR21029.
466 if (MBBI != MBB.end()) --MBBI;
467 AddDefaultT1CC(
468 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
469 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
470 .addImm(Pred).addReg(PredReg);
471 }
472 }
473
474 /// MergeOps - Create and insert a LDM or STM with Base as base register and
475 /// registers in Regs as the register operands that would be loaded / stored.
476 /// It returns true if the transformation is done.
477 bool
MergeOps(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,ArrayRef<std::pair<unsigned,bool>> Regs,ArrayRef<unsigned> ImpDefs)478 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
479 MachineBasicBlock::iterator MBBI,
480 int Offset, unsigned Base, bool BaseKill,
481 int Opcode, ARMCC::CondCodes Pred,
482 unsigned PredReg, unsigned Scratch, DebugLoc dl,
483 ArrayRef<std::pair<unsigned, bool> > Regs,
484 ArrayRef<unsigned> ImpDefs) {
485 // Only a single register to load / store. Don't bother.
486 unsigned NumRegs = Regs.size();
487 if (NumRegs <= 1)
488 return false;
489
490 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
491 // Compute liveness information for that register to make the decision.
492 bool SafeToClobberCPSR = !isThumb1 ||
493 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
494 MachineBasicBlock::LQR_Dead);
495
496 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
497
498 // Exception: If the base register is in the input reglist, Thumb1 LDM is
499 // non-writeback.
500 // It's also not possible to merge an STR of the base register in Thumb1.
501 if (isThumb1)
502 for (unsigned I = 0; I < NumRegs; ++I)
503 if (Base == Regs[I].first) {
504 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
505 if (Opcode == ARM::tLDRi) {
506 Writeback = false;
507 break;
508 } else if (Opcode == ARM::tSTRi) {
509 return false;
510 }
511 }
512
513 ARM_AM::AMSubMode Mode = ARM_AM::ia;
514 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
515 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
516 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
517
518 if (Offset == 4 && haveIBAndDA) {
519 Mode = ARM_AM::ib;
520 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
521 Mode = ARM_AM::da;
522 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
523 // VLDM/VSTM do not support DB mode without also updating the base reg.
524 Mode = ARM_AM::db;
525 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
526 // Check if this is a supported opcode before inserting instructions to
527 // calculate a new base register.
528 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
529
530 // If starting offset isn't zero, insert a MI to materialize a new base.
531 // But only do so if it is cost effective, i.e. merging more than two
532 // loads / stores.
533 if (NumRegs <= 2)
534 return false;
535
536 // On Thumb1, it's not worth materializing a new base register without
537 // clobbering the CPSR (i.e. not using ADDS/SUBS).
538 if (!SafeToClobberCPSR)
539 return false;
540
541 unsigned NewBase;
542 if (isi32Load(Opcode)) {
543 // If it is a load, then just use one of the destination register to
544 // use as the new base.
545 NewBase = Regs[NumRegs-1].first;
546 } else {
547 // Use the scratch register to use as a new base.
548 NewBase = Scratch;
549 if (NewBase == 0)
550 return false;
551 }
552
553 int BaseOpc =
554 isThumb2 ? ARM::t2ADDri :
555 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
556 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
557 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
558
559 if (Offset < 0) {
560 Offset = - Offset;
561 BaseOpc =
562 isThumb2 ? ARM::t2SUBri :
563 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
564 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
565 }
566
567 if (!TL->isLegalAddImmediate(Offset))
568 // FIXME: Try add with register operand?
569 return false; // Probably not worth it then.
570
571 if (isThumb1) {
572 // Thumb1: depending on immediate size, use either
573 // ADDS NewBase, Base, #imm3
574 // or
575 // MOV NewBase, Base
576 // ADDS NewBase, #imm8.
577 if (Base != NewBase &&
578 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
579 // Need to insert a MOV to the new base first.
580 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
581 !STI->hasV6Ops()) {
582 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
583 if (Pred != ARMCC::AL)
584 return false;
585 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
586 .addReg(Base, getKillRegState(BaseKill));
587 } else
588 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
589 .addReg(Base, getKillRegState(BaseKill))
590 .addImm(Pred).addReg(PredReg);
591
592 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
593 Base = NewBase;
594 BaseKill = false;
595 }
596 if (BaseOpc == ARM::tADDrSPi) {
597 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
598 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
599 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
600 .addImm(Pred).addReg(PredReg);
601 } else
602 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
603 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
604 .addImm(Pred).addReg(PredReg);
605 } else {
606 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
607 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
608 .addImm(Pred).addReg(PredReg).addReg(0);
609 }
610 Base = NewBase;
611 BaseKill = true; // New base is always killed straight away.
612 }
613
614 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
615 Opcode == ARM::VLDRD);
616
617 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
618 // base register writeback.
619 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
620 if (!Opcode) return false;
621
622 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
623 // - There is no writeback (LDM of base register),
624 // - the base register is killed by the merged instruction,
625 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
626 // to reset the base register.
627 // Otherwise, don't merge.
628 // It's safe to return here since the code to materialize a new base register
629 // above is also conditional on SafeToClobberCPSR.
630 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
631 return false;
632
633 MachineInstrBuilder MIB;
634
635 if (Writeback) {
636 if (Opcode == ARM::tLDMIA)
637 // Update tLDMIA with writeback if necessary.
638 Opcode = ARM::tLDMIA_UPD;
639
640 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
641
642 // Thumb1: we might need to set base writeback when building the MI.
643 MIB.addReg(Base, getDefRegState(true))
644 .addReg(Base, getKillRegState(BaseKill));
645
646 // The base isn't dead after a merged instruction with writeback.
647 // Insert a sub instruction after the newly formed instruction to reset.
648 if (!BaseKill)
649 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
650
651 } else {
652 // No writeback, simply build the MachineInstr.
653 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
654 MIB.addReg(Base, getKillRegState(BaseKill));
655 }
656
657 MIB.addImm(Pred).addReg(PredReg);
658
659 for (unsigned i = 0; i != NumRegs; ++i)
660 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
661 | getKillRegState(Regs[i].second));
662
663 // Add implicit defs for super-registers.
664 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
665 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
666
667 return true;
668 }
669
670 /// \brief Find all instructions using a given imp-def within a range.
671 ///
672 /// We are trying to combine a range of instructions, one of which (located at
673 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
674 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
675 /// and RangeEnd must be modified to use an undefined value.
676 ///
677 /// The live range continues until we find a second definition or one of the
678 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
679 /// we must consider all uses and decide which are relevant in a second pass.
findUsesOfImpDef(SmallVectorImpl<MachineOperand * > & UsesOfImpDefs,const MemOpQueue & MemOps,unsigned DefReg,unsigned RangeBegin,unsigned RangeEnd)680 void ARMLoadStoreOpt::findUsesOfImpDef(
681 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
682 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
683 std::map<unsigned, MachineOperand *> Uses;
684 unsigned LastLivePos = RangeEnd;
685
686 // First we find all uses of this register with Position between RangeBegin
687 // and RangeEnd, any or all of these could be uses of a definition at
688 // RangeBegin. We also record the latest position a definition at RangeBegin
689 // would be considered live.
690 for (unsigned i = 0; i < MemOps.size(); ++i) {
691 MachineInstr &MI = *MemOps[i].MBBI;
692 unsigned MIPosition = MemOps[i].Position;
693 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
694 continue;
695
696 // If this instruction defines the register, then any later use will be of
697 // that definition rather than ours.
698 if (MI.definesRegister(DefReg))
699 LastLivePos = std::min(LastLivePos, MIPosition);
700
701 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
702 if (!UseOp)
703 continue;
704
705 // If this instruction kills the register then (assuming liveness is
706 // correct when we start) we don't need to think about anything after here.
707 if (UseOp->isKill())
708 LastLivePos = std::min(LastLivePos, MIPosition);
709
710 Uses[MIPosition] = UseOp;
711 }
712
713 // Now we traverse the list of all uses, and append the ones that actually use
714 // our definition to the requested list.
715 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
716 E = Uses.end();
717 I != E; ++I) {
718 // List is sorted by position so once we've found one out of range there
719 // will be no more to consider.
720 if (I->first > LastLivePos)
721 break;
722 UsesOfImpDefs.push_back(I->second);
723 }
724 }
725
726 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
727 // success.
MergeOpsUpdate(MachineBasicBlock & MBB,MemOpQueue & memOps,unsigned memOpsBegin,unsigned memOpsEnd,unsigned insertAfter,int Offset,unsigned Base,bool BaseKill,int Opcode,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,DebugLoc dl,SmallVectorImpl<MachineBasicBlock::iterator> & Merges)728 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
729 MemOpQueue &memOps,
730 unsigned memOpsBegin, unsigned memOpsEnd,
731 unsigned insertAfter, int Offset,
732 unsigned Base, bool BaseKill,
733 int Opcode,
734 ARMCC::CondCodes Pred, unsigned PredReg,
735 unsigned Scratch,
736 DebugLoc dl,
737 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
738 // First calculate which of the registers should be killed by the merged
739 // instruction.
740 const unsigned insertPos = memOps[insertAfter].Position;
741 SmallSet<unsigned, 4> KilledRegs;
742 DenseMap<unsigned, unsigned> Killer;
743 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
744 if (i == memOpsBegin) {
745 i = memOpsEnd;
746 if (i == e)
747 break;
748 }
749 if (memOps[i].Position < insertPos && memOps[i].isKill) {
750 unsigned Reg = memOps[i].Reg;
751 KilledRegs.insert(Reg);
752 Killer[Reg] = i;
753 }
754 }
755
756 SmallVector<std::pair<unsigned, bool>, 8> Regs;
757 SmallVector<unsigned, 8> ImpDefs;
758 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
759 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
760 unsigned Reg = memOps[i].Reg;
761 // If we are inserting the merged operation after an operation that
762 // uses the same register, make sure to transfer any kill flag.
763 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
764 Regs.push_back(std::make_pair(Reg, isKill));
765
766 // Collect any implicit defs of super-registers. They must be preserved.
767 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
768 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
769 continue;
770 unsigned DefReg = MO->getReg();
771 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
772 ImpDefs.push_back(DefReg);
773
774 // There may be other uses of the definition between this instruction and
775 // the eventual LDM/STM position. These should be marked undef if the
776 // merge takes place.
777 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
778 insertPos);
779 }
780 }
781
782 // Try to do the merge.
783 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
784 ++Loc;
785 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
786 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
787 return;
788
789 // Merge succeeded, update records.
790 Merges.push_back(std::prev(Loc));
791
792 // In gathering loads together, we may have moved the imp-def of a register
793 // past one of its uses. This is OK, since we know better than the rest of
794 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
795 // affected uses.
796 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
797 E = UsesOfImpDefs.end();
798 I != E; ++I)
799 (*I)->setIsUndef();
800
801 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
802 // Remove kill flags from any memops that come before insertPos.
803 if (Regs[i-memOpsBegin].second) {
804 unsigned Reg = Regs[i-memOpsBegin].first;
805 if (KilledRegs.count(Reg)) {
806 unsigned j = Killer[Reg];
807 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
808 assert(Idx >= 0 && "Cannot find killing operand");
809 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
810 memOps[j].isKill = false;
811 }
812 memOps[i].isKill = true;
813 }
814 MBB.erase(memOps[i].MBBI);
815 // Update this memop to refer to the merged instruction.
816 // We may need to move kill flags again.
817 memOps[i].Merged = true;
818 memOps[i].MBBI = Merges.back();
819 memOps[i].Position = insertPos;
820 }
821
822 // Update memOps offsets, since they may have been modified by MergeOps.
823 for (auto &MemOp : memOps) {
824 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
825 }
826 }
827
828 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
829 /// load / store multiple instructions.
830 void
MergeLDR_STR(MachineBasicBlock & MBB,unsigned SIndex,unsigned Base,int Opcode,unsigned Size,ARMCC::CondCodes Pred,unsigned PredReg,unsigned Scratch,MemOpQueue & MemOps,SmallVectorImpl<MachineBasicBlock::iterator> & Merges)831 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
832 unsigned Base, int Opcode, unsigned Size,
833 ARMCC::CondCodes Pred, unsigned PredReg,
834 unsigned Scratch, MemOpQueue &MemOps,
835 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
836 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
837 int Offset = MemOps[SIndex].Offset;
838 int SOffset = Offset;
839 unsigned insertAfter = SIndex;
840 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
841 DebugLoc dl = Loc->getDebugLoc();
842 const MachineOperand &PMO = Loc->getOperand(0);
843 unsigned PReg = PMO.getReg();
844 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
845 unsigned Count = 1;
846 unsigned Limit = ~0U;
847 bool BaseKill = false;
848 // vldm / vstm limit are 32 for S variants, 16 for D variants.
849
850 switch (Opcode) {
851 default: break;
852 case ARM::VSTRS:
853 Limit = 32;
854 break;
855 case ARM::VSTRD:
856 Limit = 16;
857 break;
858 case ARM::VLDRD:
859 Limit = 16;
860 break;
861 case ARM::VLDRS:
862 Limit = 32;
863 break;
864 }
865
866 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
867 int NewOffset = MemOps[i].Offset;
868 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
869 unsigned Reg = MO.getReg();
870 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
871 // Register numbers must be in ascending order. For VFP / NEON load and
872 // store multiples, the registers must also be consecutive and within the
873 // limit on the number of registers per instruction.
874 if (Reg != ARM::SP &&
875 NewOffset == Offset + (int)Size &&
876 ((isNotVFP && RegNum > PRegNum) ||
877 ((Count < Limit) && RegNum == PRegNum+1)) &&
878 // On Swift we don't want vldm/vstm to start with a odd register num
879 // because Q register unaligned vldm/vstm need more uops.
880 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
881 Offset += Size;
882 PRegNum = RegNum;
883 ++Count;
884 } else {
885 // Can't merge this in. Try merge the earlier ones first.
886 // We need to compute BaseKill here because the MemOps may have been
887 // reordered.
888 BaseKill = Loc->killsRegister(Base);
889
890 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
891 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
892 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
893 MemOps, Merges);
894 return;
895 }
896
897 if (MemOps[i].Position > MemOps[insertAfter].Position) {
898 insertAfter = i;
899 Loc = MemOps[i].MBBI;
900 }
901 }
902
903 BaseKill = Loc->killsRegister(Base);
904 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
905 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
906 }
907
isMatchingDecrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)908 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
909 unsigned Bytes, unsigned Limit,
910 ARMCC::CondCodes Pred, unsigned PredReg) {
911 unsigned MyPredReg = 0;
912 if (!MI)
913 return false;
914
915 bool CheckCPSRDef = false;
916 switch (MI->getOpcode()) {
917 default: return false;
918 case ARM::tSUBi8:
919 case ARM::t2SUBri:
920 case ARM::SUBri:
921 CheckCPSRDef = true;
922 // fallthrough
923 case ARM::tSUBspi:
924 break;
925 }
926
927 // Make sure the offset fits in 8 bits.
928 if (Bytes == 0 || (Limit && Bytes >= Limit))
929 return false;
930
931 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
932 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
933 if (!(MI->getOperand(0).getReg() == Base &&
934 MI->getOperand(1).getReg() == Base &&
935 (MI->getOperand(2).getImm() * Scale) == Bytes &&
936 getInstrPredicate(MI, MyPredReg) == Pred &&
937 MyPredReg == PredReg))
938 return false;
939
940 return CheckCPSRDef ? !definesCPSR(MI) : true;
941 }
942
isMatchingIncrement(MachineInstr * MI,unsigned Base,unsigned Bytes,unsigned Limit,ARMCC::CondCodes Pred,unsigned PredReg)943 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
944 unsigned Bytes, unsigned Limit,
945 ARMCC::CondCodes Pred, unsigned PredReg) {
946 unsigned MyPredReg = 0;
947 if (!MI)
948 return false;
949
950 bool CheckCPSRDef = false;
951 switch (MI->getOpcode()) {
952 default: return false;
953 case ARM::tADDi8:
954 case ARM::t2ADDri:
955 case ARM::ADDri:
956 CheckCPSRDef = true;
957 // fallthrough
958 case ARM::tADDspi:
959 break;
960 }
961
962 if (Bytes == 0 || (Limit && Bytes >= Limit))
963 // Make sure the offset fits in 8 bits.
964 return false;
965
966 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
967 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
968 if (!(MI->getOperand(0).getReg() == Base &&
969 MI->getOperand(1).getReg() == Base &&
970 (MI->getOperand(2).getImm() * Scale) == Bytes &&
971 getInstrPredicate(MI, MyPredReg) == Pred &&
972 MyPredReg == PredReg))
973 return false;
974
975 return CheckCPSRDef ? !definesCPSR(MI) : true;
976 }
977
getLSMultipleTransferSize(MachineInstr * MI)978 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
979 switch (MI->getOpcode()) {
980 default: return 0;
981 case ARM::LDRi12:
982 case ARM::STRi12:
983 case ARM::tLDRi:
984 case ARM::tSTRi:
985 case ARM::tLDRspi:
986 case ARM::tSTRspi:
987 case ARM::t2LDRi8:
988 case ARM::t2LDRi12:
989 case ARM::t2STRi8:
990 case ARM::t2STRi12:
991 case ARM::VLDRS:
992 case ARM::VSTRS:
993 return 4;
994 case ARM::VLDRD:
995 case ARM::VSTRD:
996 return 8;
997 case ARM::LDMIA:
998 case ARM::LDMDA:
999 case ARM::LDMDB:
1000 case ARM::LDMIB:
1001 case ARM::STMIA:
1002 case ARM::STMDA:
1003 case ARM::STMDB:
1004 case ARM::STMIB:
1005 case ARM::tLDMIA:
1006 case ARM::tLDMIA_UPD:
1007 case ARM::tSTMIA_UPD:
1008 case ARM::t2LDMIA:
1009 case ARM::t2LDMDB:
1010 case ARM::t2STMIA:
1011 case ARM::t2STMDB:
1012 case ARM::VLDMSIA:
1013 case ARM::VSTMSIA:
1014 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1015 case ARM::VLDMDIA:
1016 case ARM::VSTMDIA:
1017 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1018 }
1019 }
1020
getUpdatingLSMultipleOpcode(unsigned Opc,ARM_AM::AMSubMode Mode)1021 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1022 ARM_AM::AMSubMode Mode) {
1023 switch (Opc) {
1024 default: llvm_unreachable("Unhandled opcode!");
1025 case ARM::LDMIA:
1026 case ARM::LDMDA:
1027 case ARM::LDMDB:
1028 case ARM::LDMIB:
1029 switch (Mode) {
1030 default: llvm_unreachable("Unhandled submode!");
1031 case ARM_AM::ia: return ARM::LDMIA_UPD;
1032 case ARM_AM::ib: return ARM::LDMIB_UPD;
1033 case ARM_AM::da: return ARM::LDMDA_UPD;
1034 case ARM_AM::db: return ARM::LDMDB_UPD;
1035 }
1036 case ARM::STMIA:
1037 case ARM::STMDA:
1038 case ARM::STMDB:
1039 case ARM::STMIB:
1040 switch (Mode) {
1041 default: llvm_unreachable("Unhandled submode!");
1042 case ARM_AM::ia: return ARM::STMIA_UPD;
1043 case ARM_AM::ib: return ARM::STMIB_UPD;
1044 case ARM_AM::da: return ARM::STMDA_UPD;
1045 case ARM_AM::db: return ARM::STMDB_UPD;
1046 }
1047 case ARM::t2LDMIA:
1048 case ARM::t2LDMDB:
1049 switch (Mode) {
1050 default: llvm_unreachable("Unhandled submode!");
1051 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1052 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1053 }
1054 case ARM::t2STMIA:
1055 case ARM::t2STMDB:
1056 switch (Mode) {
1057 default: llvm_unreachable("Unhandled submode!");
1058 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1059 case ARM_AM::db: return ARM::t2STMDB_UPD;
1060 }
1061 case ARM::VLDMSIA:
1062 switch (Mode) {
1063 default: llvm_unreachable("Unhandled submode!");
1064 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1065 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1066 }
1067 case ARM::VLDMDIA:
1068 switch (Mode) {
1069 default: llvm_unreachable("Unhandled submode!");
1070 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1071 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1072 }
1073 case ARM::VSTMSIA:
1074 switch (Mode) {
1075 default: llvm_unreachable("Unhandled submode!");
1076 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1077 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1078 }
1079 case ARM::VSTMDIA:
1080 switch (Mode) {
1081 default: llvm_unreachable("Unhandled submode!");
1082 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1083 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1084 }
1085 }
1086 }
1087
1088 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
1089 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1090 ///
1091 /// stmia rn, <ra, rb, rc>
1092 /// rn := rn + 4 * 3;
1093 /// =>
1094 /// stmia rn!, <ra, rb, rc>
1095 ///
1096 /// rn := rn - 4 * 3;
1097 /// ldmia rn, <ra, rb, rc>
1098 /// =>
1099 /// ldmdb rn!, <ra, rb, rc>
MergeBaseUpdateLSMultiple(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool & Advance,MachineBasicBlock::iterator & I)1100 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1101 MachineBasicBlock::iterator MBBI,
1102 bool &Advance,
1103 MachineBasicBlock::iterator &I) {
1104 // Thumb1 is already using updating loads/stores.
1105 if (isThumb1) return false;
1106
1107 MachineInstr *MI = MBBI;
1108 unsigned Base = MI->getOperand(0).getReg();
1109 bool BaseKill = MI->getOperand(0).isKill();
1110 unsigned Bytes = getLSMultipleTransferSize(MI);
1111 unsigned PredReg = 0;
1112 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1113 int Opcode = MI->getOpcode();
1114 DebugLoc dl = MI->getDebugLoc();
1115
1116 // Can't use an updating ld/st if the base register is also a dest
1117 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1118 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1119 if (MI->getOperand(i).getReg() == Base)
1120 return false;
1121
1122 bool DoMerge = false;
1123 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1124
1125 // Try merging with the previous instruction.
1126 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1127 if (MBBI != BeginMBBI) {
1128 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1129 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1130 --PrevMBBI;
1131 if (Mode == ARM_AM::ia &&
1132 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1133 Mode = ARM_AM::db;
1134 DoMerge = true;
1135 } else if (Mode == ARM_AM::ib &&
1136 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1137 Mode = ARM_AM::da;
1138 DoMerge = true;
1139 }
1140 if (DoMerge)
1141 MBB.erase(PrevMBBI);
1142 }
1143
1144 // Try merging with the next instruction.
1145 MachineBasicBlock::iterator EndMBBI = MBB.end();
1146 if (!DoMerge && MBBI != EndMBBI) {
1147 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1148 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1149 ++NextMBBI;
1150 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1151 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1152 DoMerge = true;
1153 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1154 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1155 DoMerge = true;
1156 }
1157 if (DoMerge) {
1158 if (NextMBBI == I) {
1159 Advance = true;
1160 ++I;
1161 }
1162 MBB.erase(NextMBBI);
1163 }
1164 }
1165
1166 if (!DoMerge)
1167 return false;
1168
1169 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1170 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1171 .addReg(Base, getDefRegState(true)) // WB base register
1172 .addReg(Base, getKillRegState(BaseKill))
1173 .addImm(Pred).addReg(PredReg);
1174
1175 // Transfer the rest of operands.
1176 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1177 MIB.addOperand(MI->getOperand(OpNum));
1178
1179 // Transfer memoperands.
1180 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1181
1182 MBB.erase(MBBI);
1183 return true;
1184 }
1185
getPreIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)1186 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1187 ARM_AM::AddrOpc Mode) {
1188 switch (Opc) {
1189 case ARM::LDRi12:
1190 return ARM::LDR_PRE_IMM;
1191 case ARM::STRi12:
1192 return ARM::STR_PRE_IMM;
1193 case ARM::VLDRS:
1194 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1195 case ARM::VLDRD:
1196 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1197 case ARM::VSTRS:
1198 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1199 case ARM::VSTRD:
1200 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1201 case ARM::t2LDRi8:
1202 case ARM::t2LDRi12:
1203 return ARM::t2LDR_PRE;
1204 case ARM::t2STRi8:
1205 case ARM::t2STRi12:
1206 return ARM::t2STR_PRE;
1207 default: llvm_unreachable("Unhandled opcode!");
1208 }
1209 }
1210
getPostIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)1211 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1212 ARM_AM::AddrOpc Mode) {
1213 switch (Opc) {
1214 case ARM::LDRi12:
1215 return ARM::LDR_POST_IMM;
1216 case ARM::STRi12:
1217 return ARM::STR_POST_IMM;
1218 case ARM::VLDRS:
1219 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1220 case ARM::VLDRD:
1221 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1222 case ARM::VSTRS:
1223 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1224 case ARM::VSTRD:
1225 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1226 case ARM::t2LDRi8:
1227 case ARM::t2LDRi12:
1228 return ARM::t2LDR_POST;
1229 case ARM::t2STRi8:
1230 case ARM::t2STRi12:
1231 return ARM::t2STR_POST;
1232 default: llvm_unreachable("Unhandled opcode!");
1233 }
1234 }
1235
1236 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1237 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
MergeBaseUpdateLoadStore(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const TargetInstrInfo * TII,bool & Advance,MachineBasicBlock::iterator & I)1238 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1239 MachineBasicBlock::iterator MBBI,
1240 const TargetInstrInfo *TII,
1241 bool &Advance,
1242 MachineBasicBlock::iterator &I) {
1243 // Thumb1 doesn't have updating LDR/STR.
1244 // FIXME: Use LDM/STM with single register instead.
1245 if (isThumb1) return false;
1246
1247 MachineInstr *MI = MBBI;
1248 unsigned Base = MI->getOperand(1).getReg();
1249 bool BaseKill = MI->getOperand(1).isKill();
1250 unsigned Bytes = getLSMultipleTransferSize(MI);
1251 int Opcode = MI->getOpcode();
1252 DebugLoc dl = MI->getDebugLoc();
1253 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1254 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1255 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1256 if (isi32Load(Opcode) || isi32Store(Opcode))
1257 if (MI->getOperand(2).getImm() != 0)
1258 return false;
1259 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1260 return false;
1261
1262 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1263 // Can't do the merge if the destination register is the same as the would-be
1264 // writeback register.
1265 if (MI->getOperand(0).getReg() == Base)
1266 return false;
1267
1268 unsigned PredReg = 0;
1269 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1270 bool DoMerge = false;
1271 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1272 unsigned NewOpc = 0;
1273 // AM2 - 12 bits, thumb2 - 8 bits.
1274 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1275
1276 // Try merging with the previous instruction.
1277 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1278 if (MBBI != BeginMBBI) {
1279 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1280 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1281 --PrevMBBI;
1282 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1283 DoMerge = true;
1284 AddSub = ARM_AM::sub;
1285 } else if (!isAM5 &&
1286 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1287 DoMerge = true;
1288 }
1289 if (DoMerge) {
1290 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1291 MBB.erase(PrevMBBI);
1292 }
1293 }
1294
1295 // Try merging with the next instruction.
1296 MachineBasicBlock::iterator EndMBBI = MBB.end();
1297 if (!DoMerge && MBBI != EndMBBI) {
1298 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1299 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1300 ++NextMBBI;
1301 if (!isAM5 &&
1302 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1303 DoMerge = true;
1304 AddSub = ARM_AM::sub;
1305 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1306 DoMerge = true;
1307 }
1308 if (DoMerge) {
1309 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1310 if (NextMBBI == I) {
1311 Advance = true;
1312 ++I;
1313 }
1314 MBB.erase(NextMBBI);
1315 }
1316 }
1317
1318 if (!DoMerge)
1319 return false;
1320
1321 if (isAM5) {
1322 // VLDM[SD]_UPD, VSTM[SD]_UPD
1323 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1324 // updating load/store-multiple instructions can be used with only one
1325 // register.)
1326 MachineOperand &MO = MI->getOperand(0);
1327 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1328 .addReg(Base, getDefRegState(true)) // WB base register
1329 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1330 .addImm(Pred).addReg(PredReg)
1331 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1332 getKillRegState(MO.isKill())));
1333 } else if (isLd) {
1334 if (isAM2) {
1335 // LDR_PRE, LDR_POST
1336 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1337 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1338 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1339 .addReg(Base, RegState::Define)
1340 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1341 } else {
1342 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1343 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1344 .addReg(Base, RegState::Define)
1345 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1346 }
1347 } else {
1348 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1349 // t2LDR_PRE, t2LDR_POST
1350 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1351 .addReg(Base, RegState::Define)
1352 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1353 }
1354 } else {
1355 MachineOperand &MO = MI->getOperand(0);
1356 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1357 // the vestigal zero-reg offset register. When that's fixed, this clause
1358 // can be removed entirely.
1359 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1360 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1361 // STR_PRE, STR_POST
1362 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1363 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1364 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1365 } else {
1366 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1367 // t2STR_PRE, t2STR_POST
1368 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1369 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1370 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1371 }
1372 }
1373 MBB.erase(MBBI);
1374
1375 return true;
1376 }
1377
1378 /// isMemoryOp - Returns true if instruction is a memory operation that this
1379 /// pass is capable of operating on.
isMemoryOp(const MachineInstr * MI)1380 static bool isMemoryOp(const MachineInstr *MI) {
1381 // When no memory operands are present, conservatively assume unaligned,
1382 // volatile, unfoldable.
1383 if (!MI->hasOneMemOperand())
1384 return false;
1385
1386 const MachineMemOperand *MMO = *MI->memoperands_begin();
1387
1388 // Don't touch volatile memory accesses - we may be changing their order.
1389 if (MMO->isVolatile())
1390 return false;
1391
1392 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1393 // not.
1394 if (MMO->getAlignment() < 4)
1395 return false;
1396
1397 // str <undef> could probably be eliminated entirely, but for now we just want
1398 // to avoid making a mess of it.
1399 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1400 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1401 MI->getOperand(0).isUndef())
1402 return false;
1403
1404 // Likewise don't mess with references to undefined addresses.
1405 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1406 MI->getOperand(1).isUndef())
1407 return false;
1408
1409 int Opcode = MI->getOpcode();
1410 switch (Opcode) {
1411 default: break;
1412 case ARM::VLDRS:
1413 case ARM::VSTRS:
1414 return MI->getOperand(1).isReg();
1415 case ARM::VLDRD:
1416 case ARM::VSTRD:
1417 return MI->getOperand(1).isReg();
1418 case ARM::LDRi12:
1419 case ARM::STRi12:
1420 case ARM::tLDRi:
1421 case ARM::tSTRi:
1422 case ARM::tLDRspi:
1423 case ARM::tSTRspi:
1424 case ARM::t2LDRi8:
1425 case ARM::t2LDRi12:
1426 case ARM::t2STRi8:
1427 case ARM::t2STRi12:
1428 return MI->getOperand(1).isReg();
1429 }
1430 return false;
1431 }
1432
1433 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1434 /// op that is being merged.
AdvanceRS(MachineBasicBlock & MBB,MemOpQueue & MemOps)1435 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1436 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1437 unsigned Position = MemOps[0].Position;
1438 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1439 if (MemOps[i].Position < Position) {
1440 Position = MemOps[i].Position;
1441 Loc = MemOps[i].MBBI;
1442 }
1443 }
1444
1445 if (Loc != MBB.begin())
1446 RS->forward(std::prev(Loc));
1447 }
1448
InsertLDR_STR(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,int Offset,bool isDef,DebugLoc dl,unsigned NewOpc,unsigned Reg,bool RegDeadKill,bool RegUndef,unsigned BaseReg,bool BaseKill,bool BaseUndef,bool OffKill,bool OffUndef,ARMCC::CondCodes Pred,unsigned PredReg,const TargetInstrInfo * TII,bool isT2)1449 static void InsertLDR_STR(MachineBasicBlock &MBB,
1450 MachineBasicBlock::iterator &MBBI,
1451 int Offset, bool isDef,
1452 DebugLoc dl, unsigned NewOpc,
1453 unsigned Reg, bool RegDeadKill, bool RegUndef,
1454 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1455 bool OffKill, bool OffUndef,
1456 ARMCC::CondCodes Pred, unsigned PredReg,
1457 const TargetInstrInfo *TII, bool isT2) {
1458 if (isDef) {
1459 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1460 TII->get(NewOpc))
1461 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1462 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1463 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1464 } else {
1465 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1466 TII->get(NewOpc))
1467 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1468 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1469 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1470 }
1471 }
1472
FixInvalidRegPairOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)1473 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1474 MachineBasicBlock::iterator &MBBI) {
1475 MachineInstr *MI = &*MBBI;
1476 unsigned Opcode = MI->getOpcode();
1477 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1478 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1479 const MachineOperand &BaseOp = MI->getOperand(2);
1480 unsigned BaseReg = BaseOp.getReg();
1481 unsigned EvenReg = MI->getOperand(0).getReg();
1482 unsigned OddReg = MI->getOperand(1).getReg();
1483 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1484 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1485 // ARM errata 602117: LDRD with base in list may result in incorrect base
1486 // register when interrupted or faulted.
1487 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1488 if (!Errata602117 &&
1489 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1490 return false;
1491
1492 MachineBasicBlock::iterator NewBBI = MBBI;
1493 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1494 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1495 bool EvenDeadKill = isLd ?
1496 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1497 bool EvenUndef = MI->getOperand(0).isUndef();
1498 bool OddDeadKill = isLd ?
1499 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1500 bool OddUndef = MI->getOperand(1).isUndef();
1501 bool BaseKill = BaseOp.isKill();
1502 bool BaseUndef = BaseOp.isUndef();
1503 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1504 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1505 int OffImm = getMemoryOpOffset(MI);
1506 unsigned PredReg = 0;
1507 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1508
1509 if (OddRegNum > EvenRegNum && OffImm == 0) {
1510 // Ascending register numbers and no offset. It's safe to change it to a
1511 // ldm or stm.
1512 unsigned NewOpc = (isLd)
1513 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1514 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1515 if (isLd) {
1516 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1517 .addReg(BaseReg, getKillRegState(BaseKill))
1518 .addImm(Pred).addReg(PredReg)
1519 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1520 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1521 ++NumLDRD2LDM;
1522 } else {
1523 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1524 .addReg(BaseReg, getKillRegState(BaseKill))
1525 .addImm(Pred).addReg(PredReg)
1526 .addReg(EvenReg,
1527 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1528 .addReg(OddReg,
1529 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1530 ++NumSTRD2STM;
1531 }
1532 NewBBI = std::prev(MBBI);
1533 } else {
1534 // Split into two instructions.
1535 unsigned NewOpc = (isLd)
1536 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1537 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1538 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1539 // so adjust and use t2LDRi12 here for that.
1540 unsigned NewOpc2 = (isLd)
1541 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1542 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1543 DebugLoc dl = MBBI->getDebugLoc();
1544 // If this is a load and base register is killed, it may have been
1545 // re-defed by the load, make sure the first load does not clobber it.
1546 if (isLd &&
1547 (BaseKill || OffKill) &&
1548 (TRI->regsOverlap(EvenReg, BaseReg))) {
1549 assert(!TRI->regsOverlap(OddReg, BaseReg));
1550 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1551 OddReg, OddDeadKill, false,
1552 BaseReg, false, BaseUndef, false, OffUndef,
1553 Pred, PredReg, TII, isT2);
1554 NewBBI = std::prev(MBBI);
1555 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1556 EvenReg, EvenDeadKill, false,
1557 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1558 Pred, PredReg, TII, isT2);
1559 } else {
1560 if (OddReg == EvenReg && EvenDeadKill) {
1561 // If the two source operands are the same, the kill marker is
1562 // probably on the first one. e.g.
1563 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1564 EvenDeadKill = false;
1565 OddDeadKill = true;
1566 }
1567 // Never kill the base register in the first instruction.
1568 if (EvenReg == BaseReg)
1569 EvenDeadKill = false;
1570 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1571 EvenReg, EvenDeadKill, EvenUndef,
1572 BaseReg, false, BaseUndef, false, OffUndef,
1573 Pred, PredReg, TII, isT2);
1574 NewBBI = std::prev(MBBI);
1575 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1576 OddReg, OddDeadKill, OddUndef,
1577 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1578 Pred, PredReg, TII, isT2);
1579 }
1580 if (isLd)
1581 ++NumLDRD2LDR;
1582 else
1583 ++NumSTRD2STR;
1584 }
1585
1586 MBB.erase(MI);
1587 MBBI = NewBBI;
1588 return true;
1589 }
1590 return false;
1591 }
1592
1593 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1594 /// ops of the same base and incrementing offset into LDM / STM ops.
LoadStoreMultipleOpti(MachineBasicBlock & MBB)1595 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1596 unsigned NumMerges = 0;
1597 unsigned NumMemOps = 0;
1598 MemOpQueue MemOps;
1599 unsigned CurrBase = 0;
1600 int CurrOpc = -1;
1601 unsigned CurrSize = 0;
1602 ARMCC::CondCodes CurrPred = ARMCC::AL;
1603 unsigned CurrPredReg = 0;
1604 unsigned Position = 0;
1605 SmallVector<MachineBasicBlock::iterator,4> Merges;
1606
1607 RS->enterBasicBlock(&MBB);
1608 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1609 while (MBBI != E) {
1610 if (FixInvalidRegPairOp(MBB, MBBI))
1611 continue;
1612
1613 bool Advance = false;
1614 bool TryMerge = false;
1615 bool Clobber = false;
1616
1617 bool isMemOp = isMemoryOp(MBBI);
1618 if (isMemOp) {
1619 int Opcode = MBBI->getOpcode();
1620 unsigned Size = getLSMultipleTransferSize(MBBI);
1621 const MachineOperand &MO = MBBI->getOperand(0);
1622 unsigned Reg = MO.getReg();
1623 bool isKill = MO.isDef() ? false : MO.isKill();
1624 unsigned Base = MBBI->getOperand(1).getReg();
1625 unsigned PredReg = 0;
1626 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1627 int Offset = getMemoryOpOffset(MBBI);
1628 // Watch out for:
1629 // r4 := ldr [r5]
1630 // r5 := ldr [r5, #4]
1631 // r6 := ldr [r5, #8]
1632 //
1633 // The second ldr has effectively broken the chain even though it
1634 // looks like the later ldr(s) use the same base register. Try to
1635 // merge the ldr's so far, including this one. But don't try to
1636 // combine the following ldr(s).
1637 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1638
1639 // Watch out for:
1640 // r4 := ldr [r0, #8]
1641 // r4 := ldr [r0, #4]
1642 //
1643 // The optimization may reorder the second ldr in front of the first
1644 // ldr, which violates write after write(WAW) dependence. The same as
1645 // str. Try to merge inst(s) already in MemOps.
1646 bool Overlap = false;
1647 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1648 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1649 Overlap = true;
1650 break;
1651 }
1652 }
1653
1654 if (CurrBase == 0 && !Clobber) {
1655 // Start of a new chain.
1656 CurrBase = Base;
1657 CurrOpc = Opcode;
1658 CurrSize = Size;
1659 CurrPred = Pred;
1660 CurrPredReg = PredReg;
1661 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1662 ++NumMemOps;
1663 Advance = true;
1664 } else if (!Overlap) {
1665 if (Clobber) {
1666 TryMerge = true;
1667 Advance = true;
1668 }
1669
1670 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1671 // No need to match PredReg.
1672 // Continue adding to the queue.
1673 if (Offset > MemOps.back().Offset) {
1674 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1675 Position, MBBI));
1676 ++NumMemOps;
1677 Advance = true;
1678 } else {
1679 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1680 I != E; ++I) {
1681 if (Offset < I->Offset) {
1682 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1683 Position, MBBI));
1684 ++NumMemOps;
1685 Advance = true;
1686 break;
1687 } else if (Offset == I->Offset) {
1688 // Collision! This can't be merged!
1689 break;
1690 }
1691 }
1692 }
1693 }
1694 }
1695 }
1696
1697 if (MBBI->isDebugValue()) {
1698 ++MBBI;
1699 if (MBBI == E)
1700 // Reach the end of the block, try merging the memory instructions.
1701 TryMerge = true;
1702 } else if (Advance) {
1703 ++Position;
1704 ++MBBI;
1705 if (MBBI == E)
1706 // Reach the end of the block, try merging the memory instructions.
1707 TryMerge = true;
1708 } else {
1709 TryMerge = true;
1710 }
1711
1712 if (TryMerge) {
1713 if (NumMemOps > 1) {
1714 // Try to find a free register to use as a new base in case it's needed.
1715 // First advance to the instruction just before the start of the chain.
1716 AdvanceRS(MBB, MemOps);
1717
1718 // Find a scratch register.
1719 unsigned Scratch =
1720 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1721
1722 // Process the load / store instructions.
1723 RS->forward(std::prev(MBBI));
1724
1725 // Merge ops.
1726 Merges.clear();
1727 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1728 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1729
1730 // Try folding preceding/trailing base inc/dec into the generated
1731 // LDM/STM ops.
1732 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1733 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1734 ++NumMerges;
1735 NumMerges += Merges.size();
1736
1737 // Try folding preceding/trailing base inc/dec into those load/store
1738 // that were not merged to form LDM/STM ops.
1739 for (unsigned i = 0; i != NumMemOps; ++i)
1740 if (!MemOps[i].Merged)
1741 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1742 ++NumMerges;
1743
1744 // RS may be pointing to an instruction that's deleted.
1745 RS->skipTo(std::prev(MBBI));
1746 } else if (NumMemOps == 1) {
1747 // Try folding preceding/trailing base inc/dec into the single
1748 // load/store.
1749 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1750 ++NumMerges;
1751 RS->forward(std::prev(MBBI));
1752 }
1753 }
1754
1755 CurrBase = 0;
1756 CurrOpc = -1;
1757 CurrSize = 0;
1758 CurrPred = ARMCC::AL;
1759 CurrPredReg = 0;
1760 if (NumMemOps) {
1761 MemOps.clear();
1762 NumMemOps = 0;
1763 }
1764
1765 // If iterator hasn't been advanced and this is not a memory op, skip it.
1766 // It can't start a new chain anyway.
1767 if (!Advance && !isMemOp && MBBI != E) {
1768 ++Position;
1769 ++MBBI;
1770 }
1771 }
1772 }
1773 return NumMerges > 0;
1774 }
1775
1776 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1777 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1778 /// directly restore the value of LR into pc.
1779 /// ldmfd sp!, {..., lr}
1780 /// bx lr
1781 /// or
1782 /// ldmfd sp!, {..., lr}
1783 /// mov pc, lr
1784 /// =>
1785 /// ldmfd sp!, {..., pc}
MergeReturnIntoLDM(MachineBasicBlock & MBB)1786 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1787 // Thumb1 LDM doesn't allow high registers.
1788 if (isThumb1) return false;
1789 if (MBB.empty()) return false;
1790
1791 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1792 if (MBBI != MBB.begin() &&
1793 (MBBI->getOpcode() == ARM::BX_RET ||
1794 MBBI->getOpcode() == ARM::tBX_RET ||
1795 MBBI->getOpcode() == ARM::MOVPCLR)) {
1796 MachineInstr *PrevMI = std::prev(MBBI);
1797 unsigned Opcode = PrevMI->getOpcode();
1798 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1799 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1800 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1801 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1802 if (MO.getReg() != ARM::LR)
1803 return false;
1804 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1805 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1806 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1807 PrevMI->setDesc(TII->get(NewOpc));
1808 MO.setReg(ARM::PC);
1809 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1810 MBB.erase(MBBI);
1811 return true;
1812 }
1813 }
1814 return false;
1815 }
1816
runOnMachineFunction(MachineFunction & Fn)1817 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1818 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1819 TL = STI->getTargetLowering();
1820 AFI = Fn.getInfo<ARMFunctionInfo>();
1821 TII = STI->getInstrInfo();
1822 TRI = STI->getRegisterInfo();
1823 RS = new RegScavenger();
1824 isThumb2 = AFI->isThumb2Function();
1825 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1826
1827 bool Modified = false;
1828 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1829 ++MFI) {
1830 MachineBasicBlock &MBB = *MFI;
1831 Modified |= LoadStoreMultipleOpti(MBB);
1832 if (STI->hasV5TOps())
1833 Modified |= MergeReturnIntoLDM(MBB);
1834 }
1835
1836 delete RS;
1837 return Modified;
1838 }
1839
1840
1841 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1842 /// load / stores from consecutive locations close to make it more
1843 /// likely they will be combined later.
1844
1845 namespace {
1846 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1847 static char ID;
ARMPreAllocLoadStoreOpt__anon4bf681e70211::ARMPreAllocLoadStoreOpt1848 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1849
1850 const DataLayout *TD;
1851 const TargetInstrInfo *TII;
1852 const TargetRegisterInfo *TRI;
1853 const ARMSubtarget *STI;
1854 MachineRegisterInfo *MRI;
1855 MachineFunction *MF;
1856
1857 bool runOnMachineFunction(MachineFunction &Fn) override;
1858
getPassName__anon4bf681e70211::ARMPreAllocLoadStoreOpt1859 const char *getPassName() const override {
1860 return "ARM pre- register allocation load / store optimization pass";
1861 }
1862
1863 private:
1864 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1865 unsigned &NewOpc, unsigned &EvenReg,
1866 unsigned &OddReg, unsigned &BaseReg,
1867 int &Offset,
1868 unsigned &PredReg, ARMCC::CondCodes &Pred,
1869 bool &isT2);
1870 bool RescheduleOps(MachineBasicBlock *MBB,
1871 SmallVectorImpl<MachineInstr *> &Ops,
1872 unsigned Base, bool isLd,
1873 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1874 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1875 };
1876 char ARMPreAllocLoadStoreOpt::ID = 0;
1877 }
1878
runOnMachineFunction(MachineFunction & Fn)1879 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1880 TD = Fn.getTarget().getDataLayout();
1881 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1882 TII = STI->getInstrInfo();
1883 TRI = STI->getRegisterInfo();
1884 MRI = &Fn.getRegInfo();
1885 MF = &Fn;
1886
1887 bool Modified = false;
1888 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1889 ++MFI)
1890 Modified |= RescheduleLoadStoreInstrs(MFI);
1891
1892 return Modified;
1893 }
1894
IsSafeAndProfitableToMove(bool isLd,unsigned Base,MachineBasicBlock::iterator I,MachineBasicBlock::iterator E,SmallPtrSetImpl<MachineInstr * > & MemOps,SmallSet<unsigned,4> & MemRegs,const TargetRegisterInfo * TRI)1895 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1896 MachineBasicBlock::iterator I,
1897 MachineBasicBlock::iterator E,
1898 SmallPtrSetImpl<MachineInstr*> &MemOps,
1899 SmallSet<unsigned, 4> &MemRegs,
1900 const TargetRegisterInfo *TRI) {
1901 // Are there stores / loads / calls between them?
1902 // FIXME: This is overly conservative. We should make use of alias information
1903 // some day.
1904 SmallSet<unsigned, 4> AddedRegPressure;
1905 while (++I != E) {
1906 if (I->isDebugValue() || MemOps.count(&*I))
1907 continue;
1908 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1909 return false;
1910 if (isLd && I->mayStore())
1911 return false;
1912 if (!isLd) {
1913 if (I->mayLoad())
1914 return false;
1915 // It's not safe to move the first 'str' down.
1916 // str r1, [r0]
1917 // strh r5, [r0]
1918 // str r4, [r0, #+4]
1919 if (I->mayStore())
1920 return false;
1921 }
1922 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1923 MachineOperand &MO = I->getOperand(j);
1924 if (!MO.isReg())
1925 continue;
1926 unsigned Reg = MO.getReg();
1927 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1928 return false;
1929 if (Reg != Base && !MemRegs.count(Reg))
1930 AddedRegPressure.insert(Reg);
1931 }
1932 }
1933
1934 // Estimate register pressure increase due to the transformation.
1935 if (MemRegs.size() <= 4)
1936 // Ok if we are moving small number of instructions.
1937 return true;
1938 return AddedRegPressure.size() <= MemRegs.size() * 2;
1939 }
1940
1941
1942 /// Copy Op0 and Op1 operands into a new array assigned to MI.
concatenateMemOperands(MachineInstr * MI,MachineInstr * Op0,MachineInstr * Op1)1943 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1944 MachineInstr *Op1) {
1945 assert(MI->memoperands_empty() && "expected a new machineinstr");
1946 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1947 + (Op1->memoperands_end() - Op1->memoperands_begin());
1948
1949 MachineFunction *MF = MI->getParent()->getParent();
1950 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1951 MachineSDNode::mmo_iterator MemEnd =
1952 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1953 MemEnd =
1954 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1955 MI->setMemRefs(MemBegin, MemEnd);
1956 }
1957
1958 bool
CanFormLdStDWord(MachineInstr * Op0,MachineInstr * Op1,DebugLoc & dl,unsigned & NewOpc,unsigned & EvenReg,unsigned & OddReg,unsigned & BaseReg,int & Offset,unsigned & PredReg,ARMCC::CondCodes & Pred,bool & isT2)1959 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1960 DebugLoc &dl,
1961 unsigned &NewOpc, unsigned &EvenReg,
1962 unsigned &OddReg, unsigned &BaseReg,
1963 int &Offset, unsigned &PredReg,
1964 ARMCC::CondCodes &Pred,
1965 bool &isT2) {
1966 // Make sure we're allowed to generate LDRD/STRD.
1967 if (!STI->hasV5TEOps())
1968 return false;
1969
1970 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1971 unsigned Scale = 1;
1972 unsigned Opcode = Op0->getOpcode();
1973 if (Opcode == ARM::LDRi12) {
1974 NewOpc = ARM::LDRD;
1975 } else if (Opcode == ARM::STRi12) {
1976 NewOpc = ARM::STRD;
1977 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1978 NewOpc = ARM::t2LDRDi8;
1979 Scale = 4;
1980 isT2 = true;
1981 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1982 NewOpc = ARM::t2STRDi8;
1983 Scale = 4;
1984 isT2 = true;
1985 } else {
1986 return false;
1987 }
1988
1989 // Make sure the base address satisfies i64 ld / st alignment requirement.
1990 // At the moment, we ignore the memoryoperand's value.
1991 // If we want to use AliasAnalysis, we should check it accordingly.
1992 if (!Op0->hasOneMemOperand() ||
1993 (*Op0->memoperands_begin())->isVolatile())
1994 return false;
1995
1996 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1997 const Function *Func = MF->getFunction();
1998 unsigned ReqAlign = STI->hasV6Ops()
1999 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
2000 : 8; // Pre-v6 need 8-byte align
2001 if (Align < ReqAlign)
2002 return false;
2003
2004 // Then make sure the immediate offset fits.
2005 int OffImm = getMemoryOpOffset(Op0);
2006 if (isT2) {
2007 int Limit = (1 << 8) * Scale;
2008 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2009 return false;
2010 Offset = OffImm;
2011 } else {
2012 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2013 if (OffImm < 0) {
2014 AddSub = ARM_AM::sub;
2015 OffImm = - OffImm;
2016 }
2017 int Limit = (1 << 8) * Scale;
2018 if (OffImm >= Limit || (OffImm & (Scale-1)))
2019 return false;
2020 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2021 }
2022 EvenReg = Op0->getOperand(0).getReg();
2023 OddReg = Op1->getOperand(0).getReg();
2024 if (EvenReg == OddReg)
2025 return false;
2026 BaseReg = Op0->getOperand(1).getReg();
2027 Pred = getInstrPredicate(Op0, PredReg);
2028 dl = Op0->getDebugLoc();
2029 return true;
2030 }
2031
RescheduleOps(MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Ops,unsigned Base,bool isLd,DenseMap<MachineInstr *,unsigned> & MI2LocMap)2032 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2033 SmallVectorImpl<MachineInstr *> &Ops,
2034 unsigned Base, bool isLd,
2035 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2036 bool RetVal = false;
2037
2038 // Sort by offset (in reverse order).
2039 std::sort(Ops.begin(), Ops.end(),
2040 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2041 int LOffset = getMemoryOpOffset(LHS);
2042 int ROffset = getMemoryOpOffset(RHS);
2043 assert(LHS == RHS || LOffset != ROffset);
2044 return LOffset > ROffset;
2045 });
2046
2047 // The loads / stores of the same base are in order. Scan them from first to
2048 // last and check for the following:
2049 // 1. Any def of base.
2050 // 2. Any gaps.
2051 while (Ops.size() > 1) {
2052 unsigned FirstLoc = ~0U;
2053 unsigned LastLoc = 0;
2054 MachineInstr *FirstOp = nullptr;
2055 MachineInstr *LastOp = nullptr;
2056 int LastOffset = 0;
2057 unsigned LastOpcode = 0;
2058 unsigned LastBytes = 0;
2059 unsigned NumMove = 0;
2060 for (int i = Ops.size() - 1; i >= 0; --i) {
2061 MachineInstr *Op = Ops[i];
2062 unsigned Loc = MI2LocMap[Op];
2063 if (Loc <= FirstLoc) {
2064 FirstLoc = Loc;
2065 FirstOp = Op;
2066 }
2067 if (Loc >= LastLoc) {
2068 LastLoc = Loc;
2069 LastOp = Op;
2070 }
2071
2072 unsigned LSMOpcode
2073 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2074 if (LastOpcode && LSMOpcode != LastOpcode)
2075 break;
2076
2077 int Offset = getMemoryOpOffset(Op);
2078 unsigned Bytes = getLSMultipleTransferSize(Op);
2079 if (LastBytes) {
2080 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2081 break;
2082 }
2083 LastOffset = Offset;
2084 LastBytes = Bytes;
2085 LastOpcode = LSMOpcode;
2086 if (++NumMove == 8) // FIXME: Tune this limit.
2087 break;
2088 }
2089
2090 if (NumMove <= 1)
2091 Ops.pop_back();
2092 else {
2093 SmallPtrSet<MachineInstr*, 4> MemOps;
2094 SmallSet<unsigned, 4> MemRegs;
2095 for (int i = NumMove-1; i >= 0; --i) {
2096 MemOps.insert(Ops[i]);
2097 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2098 }
2099
2100 // Be conservative, if the instructions are too far apart, don't
2101 // move them. We want to limit the increase of register pressure.
2102 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2103 if (DoMove)
2104 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2105 MemOps, MemRegs, TRI);
2106 if (!DoMove) {
2107 for (unsigned i = 0; i != NumMove; ++i)
2108 Ops.pop_back();
2109 } else {
2110 // This is the new location for the loads / stores.
2111 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2112 while (InsertPos != MBB->end()
2113 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2114 ++InsertPos;
2115
2116 // If we are moving a pair of loads / stores, see if it makes sense
2117 // to try to allocate a pair of registers that can form register pairs.
2118 MachineInstr *Op0 = Ops.back();
2119 MachineInstr *Op1 = Ops[Ops.size()-2];
2120 unsigned EvenReg = 0, OddReg = 0;
2121 unsigned BaseReg = 0, PredReg = 0;
2122 ARMCC::CondCodes Pred = ARMCC::AL;
2123 bool isT2 = false;
2124 unsigned NewOpc = 0;
2125 int Offset = 0;
2126 DebugLoc dl;
2127 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2128 EvenReg, OddReg, BaseReg,
2129 Offset, PredReg, Pred, isT2)) {
2130 Ops.pop_back();
2131 Ops.pop_back();
2132
2133 const MCInstrDesc &MCID = TII->get(NewOpc);
2134 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2135 MRI->constrainRegClass(EvenReg, TRC);
2136 MRI->constrainRegClass(OddReg, TRC);
2137
2138 // Form the pair instruction.
2139 if (isLd) {
2140 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2141 .addReg(EvenReg, RegState::Define)
2142 .addReg(OddReg, RegState::Define)
2143 .addReg(BaseReg);
2144 // FIXME: We're converting from LDRi12 to an insn that still
2145 // uses addrmode2, so we need an explicit offset reg. It should
2146 // always by reg0 since we're transforming LDRi12s.
2147 if (!isT2)
2148 MIB.addReg(0);
2149 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2150 concatenateMemOperands(MIB, Op0, Op1);
2151 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2152 ++NumLDRDFormed;
2153 } else {
2154 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2155 .addReg(EvenReg)
2156 .addReg(OddReg)
2157 .addReg(BaseReg);
2158 // FIXME: We're converting from LDRi12 to an insn that still
2159 // uses addrmode2, so we need an explicit offset reg. It should
2160 // always by reg0 since we're transforming STRi12s.
2161 if (!isT2)
2162 MIB.addReg(0);
2163 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2164 concatenateMemOperands(MIB, Op0, Op1);
2165 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2166 ++NumSTRDFormed;
2167 }
2168 MBB->erase(Op0);
2169 MBB->erase(Op1);
2170
2171 // Add register allocation hints to form register pairs.
2172 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2173 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2174 } else {
2175 for (unsigned i = 0; i != NumMove; ++i) {
2176 MachineInstr *Op = Ops.back();
2177 Ops.pop_back();
2178 MBB->splice(InsertPos, MBB, Op);
2179 }
2180 }
2181
2182 NumLdStMoved += NumMove;
2183 RetVal = true;
2184 }
2185 }
2186 }
2187
2188 return RetVal;
2189 }
2190
2191 bool
RescheduleLoadStoreInstrs(MachineBasicBlock * MBB)2192 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2193 bool RetVal = false;
2194
2195 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2196 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2197 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2198 SmallVector<unsigned, 4> LdBases;
2199 SmallVector<unsigned, 4> StBases;
2200
2201 unsigned Loc = 0;
2202 MachineBasicBlock::iterator MBBI = MBB->begin();
2203 MachineBasicBlock::iterator E = MBB->end();
2204 while (MBBI != E) {
2205 for (; MBBI != E; ++MBBI) {
2206 MachineInstr *MI = MBBI;
2207 if (MI->isCall() || MI->isTerminator()) {
2208 // Stop at barriers.
2209 ++MBBI;
2210 break;
2211 }
2212
2213 if (!MI->isDebugValue())
2214 MI2LocMap[MI] = ++Loc;
2215
2216 if (!isMemoryOp(MI))
2217 continue;
2218 unsigned PredReg = 0;
2219 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2220 continue;
2221
2222 int Opc = MI->getOpcode();
2223 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2224 unsigned Base = MI->getOperand(1).getReg();
2225 int Offset = getMemoryOpOffset(MI);
2226
2227 bool StopHere = false;
2228 if (isLd) {
2229 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2230 Base2LdsMap.find(Base);
2231 if (BI != Base2LdsMap.end()) {
2232 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2233 if (Offset == getMemoryOpOffset(BI->second[i])) {
2234 StopHere = true;
2235 break;
2236 }
2237 }
2238 if (!StopHere)
2239 BI->second.push_back(MI);
2240 } else {
2241 Base2LdsMap[Base].push_back(MI);
2242 LdBases.push_back(Base);
2243 }
2244 } else {
2245 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2246 Base2StsMap.find(Base);
2247 if (BI != Base2StsMap.end()) {
2248 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2249 if (Offset == getMemoryOpOffset(BI->second[i])) {
2250 StopHere = true;
2251 break;
2252 }
2253 }
2254 if (!StopHere)
2255 BI->second.push_back(MI);
2256 } else {
2257 Base2StsMap[Base].push_back(MI);
2258 StBases.push_back(Base);
2259 }
2260 }
2261
2262 if (StopHere) {
2263 // Found a duplicate (a base+offset combination that's seen earlier).
2264 // Backtrack.
2265 --Loc;
2266 break;
2267 }
2268 }
2269
2270 // Re-schedule loads.
2271 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2272 unsigned Base = LdBases[i];
2273 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2274 if (Lds.size() > 1)
2275 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2276 }
2277
2278 // Re-schedule stores.
2279 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2280 unsigned Base = StBases[i];
2281 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2282 if (Sts.size() > 1)
2283 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2284 }
2285
2286 if (MBBI != E) {
2287 Base2LdsMap.clear();
2288 Base2StsMap.clear();
2289 LdBases.clear();
2290 StBases.clear();
2291 }
2292 }
2293
2294 return RetVal;
2295 }
2296
2297
2298 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2299 /// optimization pass.
createARMLoadStoreOptimizationPass(bool PreAlloc)2300 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2301 if (PreAlloc)
2302 return new ARMPreAllocLoadStoreOpt();
2303 return new ARMLoadStoreOpt();
2304 }
2305