1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineInstrBuilder.h"
19 #include "llvm/CodeGen/MachineMemOperand.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/PseudoSourceValue.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/TargetRegistry.h"
25 #include <algorithm>
26 
27 using namespace llvm;
28 
29 #define GET_INSTRINFO_CTOR_DTOR
30 #include "AArch64GenInstrInfo.inc"
31 
32 static LLVM_CONSTEXPR MachineMemOperand::Flags MOSuppressPair =
33     MachineMemOperand::MOTargetFlag1;
34 
AArch64InstrInfo(const AArch64Subtarget & STI)35 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
36     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
37       RI(STI.getTargetTriple()), Subtarget(STI) {}
38 
39 /// GetInstSize - Return the number of bytes of code the specified
40 /// instruction may be.  This returns the maximum number of bytes.
GetInstSizeInBytes(const MachineInstr & MI) const41 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
42   const MachineBasicBlock &MBB = *MI.getParent();
43   const MachineFunction *MF = MBB.getParent();
44   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
45 
46   if (MI.getOpcode() == AArch64::INLINEASM)
47     return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
48 
49   const MCInstrDesc &Desc = MI.getDesc();
50   switch (Desc.getOpcode()) {
51   default:
52     // Anything not explicitly designated otherwise is a nomal 4-byte insn.
53     return 4;
54   case TargetOpcode::DBG_VALUE:
55   case TargetOpcode::EH_LABEL:
56   case TargetOpcode::IMPLICIT_DEF:
57   case TargetOpcode::KILL:
58     return 0;
59   }
60 
61   llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
62 }
63 
parseCondBranch(MachineInstr * LastInst,MachineBasicBlock * & Target,SmallVectorImpl<MachineOperand> & Cond)64 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
65                             SmallVectorImpl<MachineOperand> &Cond) {
66   // Block ends with fall-through condbranch.
67   switch (LastInst->getOpcode()) {
68   default:
69     llvm_unreachable("Unknown branch instruction?");
70   case AArch64::Bcc:
71     Target = LastInst->getOperand(1).getMBB();
72     Cond.push_back(LastInst->getOperand(0));
73     break;
74   case AArch64::CBZW:
75   case AArch64::CBZX:
76   case AArch64::CBNZW:
77   case AArch64::CBNZX:
78     Target = LastInst->getOperand(1).getMBB();
79     Cond.push_back(MachineOperand::CreateImm(-1));
80     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
81     Cond.push_back(LastInst->getOperand(0));
82     break;
83   case AArch64::TBZW:
84   case AArch64::TBZX:
85   case AArch64::TBNZW:
86   case AArch64::TBNZX:
87     Target = LastInst->getOperand(2).getMBB();
88     Cond.push_back(MachineOperand::CreateImm(-1));
89     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
90     Cond.push_back(LastInst->getOperand(0));
91     Cond.push_back(LastInst->getOperand(1));
92   }
93 }
94 
95 // Branch analysis.
analyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const96 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
97                                      MachineBasicBlock *&TBB,
98                                      MachineBasicBlock *&FBB,
99                                      SmallVectorImpl<MachineOperand> &Cond,
100                                      bool AllowModify) const {
101   // If the block has no terminators, it just falls into the block after it.
102   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
103   if (I == MBB.end())
104     return false;
105 
106   if (!isUnpredicatedTerminator(*I))
107     return false;
108 
109   // Get the last instruction in the block.
110   MachineInstr *LastInst = &*I;
111 
112   // If there is only one terminator instruction, process it.
113   unsigned LastOpc = LastInst->getOpcode();
114   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
115     if (isUncondBranchOpcode(LastOpc)) {
116       TBB = LastInst->getOperand(0).getMBB();
117       return false;
118     }
119     if (isCondBranchOpcode(LastOpc)) {
120       // Block ends with fall-through condbranch.
121       parseCondBranch(LastInst, TBB, Cond);
122       return false;
123     }
124     return true; // Can't handle indirect branch.
125   }
126 
127   // Get the instruction before it if it is a terminator.
128   MachineInstr *SecondLastInst = &*I;
129   unsigned SecondLastOpc = SecondLastInst->getOpcode();
130 
131   // If AllowModify is true and the block ends with two or more unconditional
132   // branches, delete all but the first unconditional branch.
133   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
134     while (isUncondBranchOpcode(SecondLastOpc)) {
135       LastInst->eraseFromParent();
136       LastInst = SecondLastInst;
137       LastOpc = LastInst->getOpcode();
138       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
139         // Return now the only terminator is an unconditional branch.
140         TBB = LastInst->getOperand(0).getMBB();
141         return false;
142       } else {
143         SecondLastInst = &*I;
144         SecondLastOpc = SecondLastInst->getOpcode();
145       }
146     }
147   }
148 
149   // If there are three terminators, we don't know what sort of block this is.
150   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
151     return true;
152 
153   // If the block ends with a B and a Bcc, handle it.
154   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
155     parseCondBranch(SecondLastInst, TBB, Cond);
156     FBB = LastInst->getOperand(0).getMBB();
157     return false;
158   }
159 
160   // If the block ends with two unconditional branches, handle it.  The second
161   // one is not executed, so remove it.
162   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
163     TBB = SecondLastInst->getOperand(0).getMBB();
164     I = LastInst;
165     if (AllowModify)
166       I->eraseFromParent();
167     return false;
168   }
169 
170   // ...likewise if it ends with an indirect branch followed by an unconditional
171   // branch.
172   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
173     I = LastInst;
174     if (AllowModify)
175       I->eraseFromParent();
176     return true;
177   }
178 
179   // Otherwise, can't handle this.
180   return true;
181 }
182 
ReverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const183 bool AArch64InstrInfo::ReverseBranchCondition(
184     SmallVectorImpl<MachineOperand> &Cond) const {
185   if (Cond[0].getImm() != -1) {
186     // Regular Bcc
187     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
188     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
189   } else {
190     // Folded compare-and-branch
191     switch (Cond[1].getImm()) {
192     default:
193       llvm_unreachable("Unknown conditional branch!");
194     case AArch64::CBZW:
195       Cond[1].setImm(AArch64::CBNZW);
196       break;
197     case AArch64::CBNZW:
198       Cond[1].setImm(AArch64::CBZW);
199       break;
200     case AArch64::CBZX:
201       Cond[1].setImm(AArch64::CBNZX);
202       break;
203     case AArch64::CBNZX:
204       Cond[1].setImm(AArch64::CBZX);
205       break;
206     case AArch64::TBZW:
207       Cond[1].setImm(AArch64::TBNZW);
208       break;
209     case AArch64::TBNZW:
210       Cond[1].setImm(AArch64::TBZW);
211       break;
212     case AArch64::TBZX:
213       Cond[1].setImm(AArch64::TBNZX);
214       break;
215     case AArch64::TBNZX:
216       Cond[1].setImm(AArch64::TBZX);
217       break;
218     }
219   }
220 
221   return false;
222 }
223 
RemoveBranch(MachineBasicBlock & MBB) const224 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
225   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
226   if (I == MBB.end())
227     return 0;
228 
229   if (!isUncondBranchOpcode(I->getOpcode()) &&
230       !isCondBranchOpcode(I->getOpcode()))
231     return 0;
232 
233   // Remove the branch.
234   I->eraseFromParent();
235 
236   I = MBB.end();
237 
238   if (I == MBB.begin())
239     return 1;
240   --I;
241   if (!isCondBranchOpcode(I->getOpcode()))
242     return 1;
243 
244   // Remove the branch.
245   I->eraseFromParent();
246   return 2;
247 }
248 
instantiateCondBranch(MachineBasicBlock & MBB,const DebugLoc & DL,MachineBasicBlock * TBB,ArrayRef<MachineOperand> Cond) const249 void AArch64InstrInfo::instantiateCondBranch(
250     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
251     ArrayRef<MachineOperand> Cond) const {
252   if (Cond[0].getImm() != -1) {
253     // Regular Bcc
254     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
255   } else {
256     // Folded compare-and-branch
257     // Note that we use addOperand instead of addReg to keep the flags.
258     const MachineInstrBuilder MIB =
259         BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
260     if (Cond.size() > 3)
261       MIB.addImm(Cond[3].getImm());
262     MIB.addMBB(TBB);
263   }
264 }
265 
InsertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,ArrayRef<MachineOperand> Cond,const DebugLoc & DL) const266 unsigned AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB,
267                                         MachineBasicBlock *TBB,
268                                         MachineBasicBlock *FBB,
269                                         ArrayRef<MachineOperand> Cond,
270                                         const DebugLoc &DL) const {
271   // Shouldn't be a fall through.
272   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
273 
274   if (!FBB) {
275     if (Cond.empty()) // Unconditional branch?
276       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
277     else
278       instantiateCondBranch(MBB, DL, TBB, Cond);
279     return 1;
280   }
281 
282   // Two-way conditional branch.
283   instantiateCondBranch(MBB, DL, TBB, Cond);
284   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
285   return 2;
286 }
287 
288 // Find the original register that VReg is copied from.
removeCopies(const MachineRegisterInfo & MRI,unsigned VReg)289 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
290   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
291     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
292     if (!DefMI->isFullCopy())
293       return VReg;
294     VReg = DefMI->getOperand(1).getReg();
295   }
296   return VReg;
297 }
298 
299 // Determine if VReg is defined by an instruction that can be folded into a
300 // csel instruction. If so, return the folded opcode, and the replacement
301 // register.
canFoldIntoCSel(const MachineRegisterInfo & MRI,unsigned VReg,unsigned * NewVReg=nullptr)302 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
303                                 unsigned *NewVReg = nullptr) {
304   VReg = removeCopies(MRI, VReg);
305   if (!TargetRegisterInfo::isVirtualRegister(VReg))
306     return 0;
307 
308   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
309   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
310   unsigned Opc = 0;
311   unsigned SrcOpNum = 0;
312   switch (DefMI->getOpcode()) {
313   case AArch64::ADDSXri:
314   case AArch64::ADDSWri:
315     // if NZCV is used, do not fold.
316     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
317       return 0;
318   // fall-through to ADDXri and ADDWri.
319   case AArch64::ADDXri:
320   case AArch64::ADDWri:
321     // add x, 1 -> csinc.
322     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
323         DefMI->getOperand(3).getImm() != 0)
324       return 0;
325     SrcOpNum = 1;
326     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
327     break;
328 
329   case AArch64::ORNXrr:
330   case AArch64::ORNWrr: {
331     // not x -> csinv, represented as orn dst, xzr, src.
332     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
333     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
334       return 0;
335     SrcOpNum = 2;
336     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
337     break;
338   }
339 
340   case AArch64::SUBSXrr:
341   case AArch64::SUBSWrr:
342     // if NZCV is used, do not fold.
343     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
344       return 0;
345   // fall-through to SUBXrr and SUBWrr.
346   case AArch64::SUBXrr:
347   case AArch64::SUBWrr: {
348     // neg x -> csneg, represented as sub dst, xzr, src.
349     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
350     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
351       return 0;
352     SrcOpNum = 2;
353     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
354     break;
355   }
356   default:
357     return 0;
358   }
359   assert(Opc && SrcOpNum && "Missing parameters");
360 
361   if (NewVReg)
362     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
363   return Opc;
364 }
365 
canInsertSelect(const MachineBasicBlock & MBB,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg,int & CondCycles,int & TrueCycles,int & FalseCycles) const366 bool AArch64InstrInfo::canInsertSelect(
367     const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
368     unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
369     int &FalseCycles) const {
370   // Check register classes.
371   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
372   const TargetRegisterClass *RC =
373       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
374   if (!RC)
375     return false;
376 
377   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
378   unsigned ExtraCondLat = Cond.size() != 1;
379 
380   // GPRs are handled by csel.
381   // FIXME: Fold in x+1, -x, and ~x when applicable.
382   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
383       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
384     // Single-cycle csel, csinc, csinv, and csneg.
385     CondCycles = 1 + ExtraCondLat;
386     TrueCycles = FalseCycles = 1;
387     if (canFoldIntoCSel(MRI, TrueReg))
388       TrueCycles = 0;
389     else if (canFoldIntoCSel(MRI, FalseReg))
390       FalseCycles = 0;
391     return true;
392   }
393 
394   // Scalar floating point is handled by fcsel.
395   // FIXME: Form fabs, fmin, and fmax when applicable.
396   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
397       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
398     CondCycles = 5 + ExtraCondLat;
399     TrueCycles = FalseCycles = 2;
400     return true;
401   }
402 
403   // Can't do vectors.
404   return false;
405 }
406 
insertSelect(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DstReg,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg) const407 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
408                                     MachineBasicBlock::iterator I,
409                                     const DebugLoc &DL, unsigned DstReg,
410                                     ArrayRef<MachineOperand> Cond,
411                                     unsigned TrueReg, unsigned FalseReg) const {
412   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
413 
414   // Parse the condition code, see parseCondBranch() above.
415   AArch64CC::CondCode CC;
416   switch (Cond.size()) {
417   default:
418     llvm_unreachable("Unknown condition opcode in Cond");
419   case 1: // b.cc
420     CC = AArch64CC::CondCode(Cond[0].getImm());
421     break;
422   case 3: { // cbz/cbnz
423     // We must insert a compare against 0.
424     bool Is64Bit;
425     switch (Cond[1].getImm()) {
426     default:
427       llvm_unreachable("Unknown branch opcode in Cond");
428     case AArch64::CBZW:
429       Is64Bit = 0;
430       CC = AArch64CC::EQ;
431       break;
432     case AArch64::CBZX:
433       Is64Bit = 1;
434       CC = AArch64CC::EQ;
435       break;
436     case AArch64::CBNZW:
437       Is64Bit = 0;
438       CC = AArch64CC::NE;
439       break;
440     case AArch64::CBNZX:
441       Is64Bit = 1;
442       CC = AArch64CC::NE;
443       break;
444     }
445     unsigned SrcReg = Cond[2].getReg();
446     if (Is64Bit) {
447       // cmp reg, #0 is actually subs xzr, reg, #0.
448       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
449       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
450           .addReg(SrcReg)
451           .addImm(0)
452           .addImm(0);
453     } else {
454       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
455       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
456           .addReg(SrcReg)
457           .addImm(0)
458           .addImm(0);
459     }
460     break;
461   }
462   case 4: { // tbz/tbnz
463     // We must insert a tst instruction.
464     switch (Cond[1].getImm()) {
465     default:
466       llvm_unreachable("Unknown branch opcode in Cond");
467     case AArch64::TBZW:
468     case AArch64::TBZX:
469       CC = AArch64CC::EQ;
470       break;
471     case AArch64::TBNZW:
472     case AArch64::TBNZX:
473       CC = AArch64CC::NE;
474       break;
475     }
476     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
477     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
478       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
479           .addReg(Cond[2].getReg())
480           .addImm(
481               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
482     else
483       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
484           .addReg(Cond[2].getReg())
485           .addImm(
486               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
487     break;
488   }
489   }
490 
491   unsigned Opc = 0;
492   const TargetRegisterClass *RC = nullptr;
493   bool TryFold = false;
494   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
495     RC = &AArch64::GPR64RegClass;
496     Opc = AArch64::CSELXr;
497     TryFold = true;
498   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
499     RC = &AArch64::GPR32RegClass;
500     Opc = AArch64::CSELWr;
501     TryFold = true;
502   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
503     RC = &AArch64::FPR64RegClass;
504     Opc = AArch64::FCSELDrrr;
505   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
506     RC = &AArch64::FPR32RegClass;
507     Opc = AArch64::FCSELSrrr;
508   }
509   assert(RC && "Unsupported regclass");
510 
511   // Try folding simple instructions into the csel.
512   if (TryFold) {
513     unsigned NewVReg = 0;
514     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
515     if (FoldedOpc) {
516       // The folded opcodes csinc, csinc and csneg apply the operation to
517       // FalseReg, so we need to invert the condition.
518       CC = AArch64CC::getInvertedCondCode(CC);
519       TrueReg = FalseReg;
520     } else
521       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
522 
523     // Fold the operation. Leave any dead instructions for DCE to clean up.
524     if (FoldedOpc) {
525       FalseReg = NewVReg;
526       Opc = FoldedOpc;
527       // The extends the live range of NewVReg.
528       MRI.clearKillFlags(NewVReg);
529     }
530   }
531 
532   // Pull all virtual register into the appropriate class.
533   MRI.constrainRegClass(TrueReg, RC);
534   MRI.constrainRegClass(FalseReg, RC);
535 
536   // Insert the csel.
537   BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
538       CC);
539 }
540 
541 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
canBeExpandedToORR(const MachineInstr & MI,unsigned BitSize)542 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
543   uint64_t Imm = MI.getOperand(1).getImm();
544   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
545   uint64_t Encoding;
546   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
547 }
548 
549 // FIXME: this implementation should be micro-architecture dependent, so a
550 // micro-architecture target hook should be introduced here in future.
isAsCheapAsAMove(const MachineInstr & MI) const551 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
552   if (!Subtarget.hasCustomCheapAsMoveHandling())
553     return MI.isAsCheapAsAMove();
554 
555   unsigned Imm;
556 
557   switch (MI.getOpcode()) {
558   default:
559     return false;
560 
561   // add/sub on register without shift
562   case AArch64::ADDWri:
563   case AArch64::ADDXri:
564   case AArch64::SUBWri:
565   case AArch64::SUBXri:
566     return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
567             MI.getOperand(3).getImm() == 0);
568 
569   // add/sub on register with shift
570   case AArch64::ADDWrs:
571   case AArch64::ADDXrs:
572   case AArch64::SUBWrs:
573   case AArch64::SUBXrs:
574     Imm = MI.getOperand(3).getImm();
575     return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
576             AArch64_AM::getArithShiftValue(Imm) < 4);
577 
578   // logical ops on immediate
579   case AArch64::ANDWri:
580   case AArch64::ANDXri:
581   case AArch64::EORWri:
582   case AArch64::EORXri:
583   case AArch64::ORRWri:
584   case AArch64::ORRXri:
585     return true;
586 
587   // logical ops on register without shift
588   case AArch64::ANDWrr:
589   case AArch64::ANDXrr:
590   case AArch64::BICWrr:
591   case AArch64::BICXrr:
592   case AArch64::EONWrr:
593   case AArch64::EONXrr:
594   case AArch64::EORWrr:
595   case AArch64::EORXrr:
596   case AArch64::ORNWrr:
597   case AArch64::ORNXrr:
598   case AArch64::ORRWrr:
599   case AArch64::ORRXrr:
600     return true;
601 
602   // logical ops on register with shift
603   case AArch64::ANDWrs:
604   case AArch64::ANDXrs:
605   case AArch64::BICWrs:
606   case AArch64::BICXrs:
607   case AArch64::EONWrs:
608   case AArch64::EONXrs:
609   case AArch64::EORWrs:
610   case AArch64::EORXrs:
611   case AArch64::ORNWrs:
612   case AArch64::ORNXrs:
613   case AArch64::ORRWrs:
614   case AArch64::ORRXrs:
615     Imm = MI.getOperand(3).getImm();
616     return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
617             AArch64_AM::getShiftValue(Imm) < 4 &&
618             AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
619 
620   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
621   // ORRXri, it is as cheap as MOV
622   case AArch64::MOVi32imm:
623     return canBeExpandedToORR(MI, 32);
624   case AArch64::MOVi64imm:
625     return canBeExpandedToORR(MI, 64);
626 
627   // It is cheap to move #0 to float registers if the subtarget has
628   // ZeroCycleZeroing feature.
629   case AArch64::FMOVS0:
630   case AArch64::FMOVD0:
631     return Subtarget.hasZeroCycleZeroing();
632   }
633 
634   llvm_unreachable("Unknown opcode to check as cheap as a move!");
635 }
636 
isCoalescableExtInstr(const MachineInstr & MI,unsigned & SrcReg,unsigned & DstReg,unsigned & SubIdx) const637 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
638                                              unsigned &SrcReg, unsigned &DstReg,
639                                              unsigned &SubIdx) const {
640   switch (MI.getOpcode()) {
641   default:
642     return false;
643   case AArch64::SBFMXri: // aka sxtw
644   case AArch64::UBFMXri: // aka uxtw
645     // Check for the 32 -> 64 bit extension case, these instructions can do
646     // much more.
647     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
648       return false;
649     // This is a signed or unsigned 32 -> 64 bit extension.
650     SrcReg = MI.getOperand(1).getReg();
651     DstReg = MI.getOperand(0).getReg();
652     SubIdx = AArch64::sub_32;
653     return true;
654   }
655 }
656 
areMemAccessesTriviallyDisjoint(MachineInstr & MIa,MachineInstr & MIb,AliasAnalysis * AA) const657 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
658     MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
659   const TargetRegisterInfo *TRI = &getRegisterInfo();
660   unsigned BaseRegA = 0, BaseRegB = 0;
661   int64_t OffsetA = 0, OffsetB = 0;
662   unsigned WidthA = 0, WidthB = 0;
663 
664   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
665   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
666 
667   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
668       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
669     return false;
670 
671   // Retrieve the base register, offset from the base register and width. Width
672   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
673   // base registers are identical, and the offset of a lower memory access +
674   // the width doesn't overlap the offset of a higher memory access,
675   // then the memory accesses are different.
676   if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
677       getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
678     if (BaseRegA == BaseRegB) {
679       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
680       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
681       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
682       if (LowOffset + LowWidth <= HighOffset)
683         return true;
684     }
685   }
686   return false;
687 }
688 
689 /// analyzeCompare - For a comparison instruction, return the source registers
690 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
691 /// Return true if the comparison instruction can be analyzed.
analyzeCompare(const MachineInstr & MI,unsigned & SrcReg,unsigned & SrcReg2,int & CmpMask,int & CmpValue) const692 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
693                                       unsigned &SrcReg2, int &CmpMask,
694                                       int &CmpValue) const {
695   switch (MI.getOpcode()) {
696   default:
697     break;
698   case AArch64::SUBSWrr:
699   case AArch64::SUBSWrs:
700   case AArch64::SUBSWrx:
701   case AArch64::SUBSXrr:
702   case AArch64::SUBSXrs:
703   case AArch64::SUBSXrx:
704   case AArch64::ADDSWrr:
705   case AArch64::ADDSWrs:
706   case AArch64::ADDSWrx:
707   case AArch64::ADDSXrr:
708   case AArch64::ADDSXrs:
709   case AArch64::ADDSXrx:
710     // Replace SUBSWrr with SUBWrr if NZCV is not used.
711     SrcReg = MI.getOperand(1).getReg();
712     SrcReg2 = MI.getOperand(2).getReg();
713     CmpMask = ~0;
714     CmpValue = 0;
715     return true;
716   case AArch64::SUBSWri:
717   case AArch64::ADDSWri:
718   case AArch64::SUBSXri:
719   case AArch64::ADDSXri:
720     SrcReg = MI.getOperand(1).getReg();
721     SrcReg2 = 0;
722     CmpMask = ~0;
723     // FIXME: In order to convert CmpValue to 0 or 1
724     CmpValue = MI.getOperand(2).getImm() != 0;
725     return true;
726   case AArch64::ANDSWri:
727   case AArch64::ANDSXri:
728     // ANDS does not use the same encoding scheme as the others xxxS
729     // instructions.
730     SrcReg = MI.getOperand(1).getReg();
731     SrcReg2 = 0;
732     CmpMask = ~0;
733     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
734     // while the type of CmpValue is int. When converting uint64_t to int,
735     // the high 32 bits of uint64_t will be lost.
736     // In fact it causes a bug in spec2006-483.xalancbmk
737     // CmpValue is only used to compare with zero in OptimizeCompareInstr
738     CmpValue = AArch64_AM::decodeLogicalImmediate(
739                    MI.getOperand(2).getImm(),
740                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
741     return true;
742   }
743 
744   return false;
745 }
746 
UpdateOperandRegClass(MachineInstr & Instr)747 static bool UpdateOperandRegClass(MachineInstr &Instr) {
748   MachineBasicBlock *MBB = Instr.getParent();
749   assert(MBB && "Can't get MachineBasicBlock here");
750   MachineFunction *MF = MBB->getParent();
751   assert(MF && "Can't get MachineFunction here");
752   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
753   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
754   MachineRegisterInfo *MRI = &MF->getRegInfo();
755 
756   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
757        ++OpIdx) {
758     MachineOperand &MO = Instr.getOperand(OpIdx);
759     const TargetRegisterClass *OpRegCstraints =
760         Instr.getRegClassConstraint(OpIdx, TII, TRI);
761 
762     // If there's no constraint, there's nothing to do.
763     if (!OpRegCstraints)
764       continue;
765     // If the operand is a frame index, there's nothing to do here.
766     // A frame index operand will resolve correctly during PEI.
767     if (MO.isFI())
768       continue;
769 
770     assert(MO.isReg() &&
771            "Operand has register constraints without being a register!");
772 
773     unsigned Reg = MO.getReg();
774     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
775       if (!OpRegCstraints->contains(Reg))
776         return false;
777     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
778                !MRI->constrainRegClass(Reg, OpRegCstraints))
779       return false;
780   }
781 
782   return true;
783 }
784 
785 /// \brief Return the opcode that does not set flags when possible - otherwise
786 /// return the original opcode. The caller is responsible to do the actual
787 /// substitution and legality checking.
convertFlagSettingOpcode(const MachineInstr & MI)788 static unsigned convertFlagSettingOpcode(const MachineInstr &MI) {
789   // Don't convert all compare instructions, because for some the zero register
790   // encoding becomes the sp register.
791   bool MIDefinesZeroReg = false;
792   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
793     MIDefinesZeroReg = true;
794 
795   switch (MI.getOpcode()) {
796   default:
797     return MI.getOpcode();
798   case AArch64::ADDSWrr:
799     return AArch64::ADDWrr;
800   case AArch64::ADDSWri:
801     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
802   case AArch64::ADDSWrs:
803     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
804   case AArch64::ADDSWrx:
805     return AArch64::ADDWrx;
806   case AArch64::ADDSXrr:
807     return AArch64::ADDXrr;
808   case AArch64::ADDSXri:
809     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
810   case AArch64::ADDSXrs:
811     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
812   case AArch64::ADDSXrx:
813     return AArch64::ADDXrx;
814   case AArch64::SUBSWrr:
815     return AArch64::SUBWrr;
816   case AArch64::SUBSWri:
817     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
818   case AArch64::SUBSWrs:
819     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
820   case AArch64::SUBSWrx:
821     return AArch64::SUBWrx;
822   case AArch64::SUBSXrr:
823     return AArch64::SUBXrr;
824   case AArch64::SUBSXri:
825     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
826   case AArch64::SUBSXrs:
827     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
828   case AArch64::SUBSXrx:
829     return AArch64::SUBXrx;
830   }
831 }
832 
833 enum AccessKind {
834   AK_Write = 0x01,
835   AK_Read  = 0x10,
836   AK_All   = 0x11
837 };
838 
839 /// True when condition flags are accessed (either by writing or reading)
840 /// on the instruction trace starting at From and ending at To.
841 ///
842 /// Note: If From and To are from different blocks it's assumed CC are accessed
843 ///       on the path.
areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From,MachineBasicBlock::iterator To,const TargetRegisterInfo * TRI,const AccessKind AccessToCheck=AK_All)844 static bool areCFlagsAccessedBetweenInstrs(
845     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
846     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
847   // Early exit if To is at the beginning of the BB.
848   if (To == To->getParent()->begin())
849     return true;
850 
851   // Check whether the instructions are in the same basic block
852   // If not, assume the condition flags might get modified somewhere.
853   if (To->getParent() != From->getParent())
854     return true;
855 
856   // From must be above To.
857   assert(std::find_if(MachineBasicBlock::reverse_iterator(To),
858                       To->getParent()->rend(), [From](MachineInstr &MI) {
859                         return MachineBasicBlock::iterator(MI) == From;
860                       }) != To->getParent()->rend());
861 
862   // We iterate backward starting \p To until we hit \p From.
863   for (--To; To != From; --To) {
864     const MachineInstr &Instr = *To;
865 
866     if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
867          ((AccessToCheck & AK_Read)  && Instr.readsRegister(AArch64::NZCV, TRI)))
868       return true;
869   }
870   return false;
871 }
872 
873 /// Try to optimize a compare instruction. A compare instruction is an
874 /// instruction which produces AArch64::NZCV. It can be truly compare instruction
875 /// when there are no uses of its destination register.
876 ///
877 /// The following steps are tried in order:
878 /// 1. Convert CmpInstr into an unconditional version.
879 /// 2. Remove CmpInstr if above there is an instruction producing a needed
880 ///    condition code or an instruction which can be converted into such an instruction.
881 ///    Only comparison with zero is supported.
optimizeCompareInstr(MachineInstr & CmpInstr,unsigned SrcReg,unsigned SrcReg2,int CmpMask,int CmpValue,const MachineRegisterInfo * MRI) const882 bool AArch64InstrInfo::optimizeCompareInstr(
883     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
884     int CmpValue, const MachineRegisterInfo *MRI) const {
885   assert(CmpInstr.getParent());
886   assert(MRI);
887 
888   // Replace SUBSWrr with SUBWrr if NZCV is not used.
889   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
890   if (DeadNZCVIdx != -1) {
891     if (CmpInstr.definesRegister(AArch64::WZR) ||
892         CmpInstr.definesRegister(AArch64::XZR)) {
893       CmpInstr.eraseFromParent();
894       return true;
895     }
896     unsigned Opc = CmpInstr.getOpcode();
897     unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
898     if (NewOpc == Opc)
899       return false;
900     const MCInstrDesc &MCID = get(NewOpc);
901     CmpInstr.setDesc(MCID);
902     CmpInstr.RemoveOperand(DeadNZCVIdx);
903     bool succeeded = UpdateOperandRegClass(CmpInstr);
904     (void)succeeded;
905     assert(succeeded && "Some operands reg class are incompatible!");
906     return true;
907   }
908 
909   // Continue only if we have a "ri" where immediate is zero.
910   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
911   // function.
912   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
913   if (CmpValue != 0 || SrcReg2 != 0)
914     return false;
915 
916   // CmpInstr is a Compare instruction if destination register is not used.
917   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
918     return false;
919 
920   return substituteCmpToZero(CmpInstr, SrcReg, MRI);
921 }
922 
923 /// Get opcode of S version of Instr.
924 /// If Instr is S version its opcode is returned.
925 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
926 /// or we are not interested in it.
sForm(MachineInstr & Instr)927 static unsigned sForm(MachineInstr &Instr) {
928   switch (Instr.getOpcode()) {
929   default:
930     return AArch64::INSTRUCTION_LIST_END;
931 
932   case AArch64::ADDSWrr:
933   case AArch64::ADDSWri:
934   case AArch64::ADDSXrr:
935   case AArch64::ADDSXri:
936   case AArch64::SUBSWrr:
937   case AArch64::SUBSWri:
938   case AArch64::SUBSXrr:
939   case AArch64::SUBSXri:
940     return Instr.getOpcode();;
941 
942   case AArch64::ADDWrr:    return AArch64::ADDSWrr;
943   case AArch64::ADDWri:    return AArch64::ADDSWri;
944   case AArch64::ADDXrr:    return AArch64::ADDSXrr;
945   case AArch64::ADDXri:    return AArch64::ADDSXri;
946   case AArch64::ADCWr:     return AArch64::ADCSWr;
947   case AArch64::ADCXr:     return AArch64::ADCSXr;
948   case AArch64::SUBWrr:    return AArch64::SUBSWrr;
949   case AArch64::SUBWri:    return AArch64::SUBSWri;
950   case AArch64::SUBXrr:    return AArch64::SUBSXrr;
951   case AArch64::SUBXri:    return AArch64::SUBSXri;
952   case AArch64::SBCWr:     return AArch64::SBCSWr;
953   case AArch64::SBCXr:     return AArch64::SBCSXr;
954   case AArch64::ANDWri:    return AArch64::ANDSWri;
955   case AArch64::ANDXri:    return AArch64::ANDSXri;
956   }
957 }
958 
959 /// Check if AArch64::NZCV should be alive in successors of MBB.
areCFlagsAliveInSuccessors(MachineBasicBlock * MBB)960 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
961   for (auto *BB : MBB->successors())
962     if (BB->isLiveIn(AArch64::NZCV))
963       return true;
964   return false;
965 }
966 
967 struct UsedNZCV {
968   bool N;
969   bool Z;
970   bool C;
971   bool V;
UsedNZCVUsedNZCV972   UsedNZCV(): N(false), Z(false), C(false), V(false) {}
operator |=UsedNZCV973   UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
974     this->N |= UsedFlags.N;
975     this->Z |= UsedFlags.Z;
976     this->C |= UsedFlags.C;
977     this->V |= UsedFlags.V;
978     return *this;
979   }
980 };
981 
982 /// Find a condition code used by the instruction.
983 /// Returns AArch64CC::Invalid if either the instruction does not use condition
984 /// codes or we don't optimize CmpInstr in the presence of such instructions.
findCondCodeUsedByInstr(const MachineInstr & Instr)985 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
986   switch (Instr.getOpcode()) {
987     default:
988       return AArch64CC::Invalid;
989 
990     case AArch64::Bcc: {
991       int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
992       assert(Idx >= 2);
993       return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
994     }
995 
996     case AArch64::CSINVWr:
997     case AArch64::CSINVXr:
998     case AArch64::CSINCWr:
999     case AArch64::CSINCXr:
1000     case AArch64::CSELWr:
1001     case AArch64::CSELXr:
1002     case AArch64::CSNEGWr:
1003     case AArch64::CSNEGXr:
1004     case AArch64::FCSELSrrr:
1005     case AArch64::FCSELDrrr: {
1006       int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1007       assert(Idx >= 1);
1008       return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1009     }
1010   }
1011 }
1012 
getUsedNZCV(AArch64CC::CondCode CC)1013 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1014   assert(CC != AArch64CC::Invalid);
1015   UsedNZCV UsedFlags;
1016   switch (CC) {
1017     default:
1018       break;
1019 
1020     case AArch64CC::EQ: // Z set
1021     case AArch64CC::NE: // Z clear
1022       UsedFlags.Z = true;
1023       break;
1024 
1025     case AArch64CC::HI: // Z clear and C set
1026     case AArch64CC::LS: // Z set   or  C clear
1027       UsedFlags.Z = true;
1028     case AArch64CC::HS: // C set
1029     case AArch64CC::LO: // C clear
1030       UsedFlags.C = true;
1031       break;
1032 
1033     case AArch64CC::MI: // N set
1034     case AArch64CC::PL: // N clear
1035       UsedFlags.N = true;
1036       break;
1037 
1038     case AArch64CC::VS: // V set
1039     case AArch64CC::VC: // V clear
1040       UsedFlags.V = true;
1041       break;
1042 
1043     case AArch64CC::GT: // Z clear, N and V the same
1044     case AArch64CC::LE: // Z set,   N and V differ
1045       UsedFlags.Z = true;
1046     case AArch64CC::GE: // N and V the same
1047     case AArch64CC::LT: // N and V differ
1048       UsedFlags.N = true;
1049       UsedFlags.V = true;
1050       break;
1051   }
1052   return UsedFlags;
1053 }
1054 
isADDSRegImm(unsigned Opcode)1055 static bool isADDSRegImm(unsigned Opcode) {
1056   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1057 }
1058 
isSUBSRegImm(unsigned Opcode)1059 static bool isSUBSRegImm(unsigned Opcode) {
1060   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1061 }
1062 
1063 /// Check if CmpInstr can be substituted by MI.
1064 ///
1065 /// CmpInstr can be substituted:
1066 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1067 /// - and, MI and CmpInstr are from the same MachineBB
1068 /// - and, condition flags are not alive in successors of the CmpInstr parent
1069 /// - and, if MI opcode is the S form there must be no defs of flags between
1070 ///        MI and CmpInstr
1071 ///        or if MI opcode is not the S form there must be neither defs of flags
1072 ///        nor uses of flags between MI and CmpInstr.
1073 /// - and  C/V flags are not used after CmpInstr
canInstrSubstituteCmpInstr(MachineInstr * MI,MachineInstr * CmpInstr,const TargetRegisterInfo * TRI)1074 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1075     const TargetRegisterInfo *TRI) {
1076   assert(MI);
1077   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1078   assert(CmpInstr);
1079 
1080   const unsigned CmpOpcode = CmpInstr->getOpcode();
1081   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1082     return false;
1083 
1084   if (MI->getParent() != CmpInstr->getParent())
1085     return false;
1086 
1087   if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1088     return false;
1089 
1090   AccessKind AccessToCheck = AK_Write;
1091   if (sForm(*MI) != MI->getOpcode())
1092     AccessToCheck = AK_All;
1093   if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1094     return false;
1095 
1096   UsedNZCV NZCVUsedAfterCmp;
1097   for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
1098        I != E; ++I) {
1099     const MachineInstr &Instr = *I;
1100     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1101       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1102       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1103         return false;
1104       NZCVUsedAfterCmp |= getUsedNZCV(CC);
1105     }
1106 
1107     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1108       break;
1109   }
1110 
1111   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1112 }
1113 
1114 /// Substitute an instruction comparing to zero with another instruction
1115 /// which produces needed condition flags.
1116 ///
1117 /// Return true on success.
substituteCmpToZero(MachineInstr & CmpInstr,unsigned SrcReg,const MachineRegisterInfo * MRI) const1118 bool AArch64InstrInfo::substituteCmpToZero(
1119     MachineInstr &CmpInstr, unsigned SrcReg,
1120     const MachineRegisterInfo *MRI) const {
1121   assert(MRI);
1122   // Get the unique definition of SrcReg.
1123   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1124   if (!MI)
1125     return false;
1126 
1127   const TargetRegisterInfo *TRI = &getRegisterInfo();
1128 
1129   unsigned NewOpc = sForm(*MI);
1130   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1131     return false;
1132 
1133   if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1134     return false;
1135 
1136   // Update the instruction to set NZCV.
1137   MI->setDesc(get(NewOpc));
1138   CmpInstr.eraseFromParent();
1139   bool succeeded = UpdateOperandRegClass(*MI);
1140   (void)succeeded;
1141   assert(succeeded && "Some operands reg class are incompatible!");
1142   MI->addRegisterDefined(AArch64::NZCV, TRI);
1143   return true;
1144 }
1145 
expandPostRAPseudo(MachineInstr & MI) const1146 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1147   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1148     return false;
1149 
1150   MachineBasicBlock &MBB = *MI.getParent();
1151   DebugLoc DL = MI.getDebugLoc();
1152   unsigned Reg = MI.getOperand(0).getReg();
1153   const GlobalValue *GV =
1154       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1155   const TargetMachine &TM = MBB.getParent()->getTarget();
1156   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1157   const unsigned char MO_NC = AArch64II::MO_NC;
1158 
1159   if ((OpFlags & AArch64II::MO_GOT) != 0) {
1160     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1161         .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1162     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1163         .addReg(Reg, RegState::Kill)
1164         .addImm(0)
1165         .addMemOperand(*MI.memoperands_begin());
1166   } else if (TM.getCodeModel() == CodeModel::Large) {
1167     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1168         .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
1169     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1170         .addReg(Reg, RegState::Kill)
1171         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
1172     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1173         .addReg(Reg, RegState::Kill)
1174         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1175     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1176         .addReg(Reg, RegState::Kill)
1177         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
1178     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1179         .addReg(Reg, RegState::Kill)
1180         .addImm(0)
1181         .addMemOperand(*MI.memoperands_begin());
1182   } else {
1183     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1184         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1185     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1186     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1187         .addReg(Reg, RegState::Kill)
1188         .addGlobalAddress(GV, 0, LoFlags)
1189         .addMemOperand(*MI.memoperands_begin());
1190   }
1191 
1192   MBB.erase(MI);
1193 
1194   return true;
1195 }
1196 
1197 /// Return true if this is this instruction has a non-zero immediate
hasShiftedReg(const MachineInstr & MI) const1198 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1199   switch (MI.getOpcode()) {
1200   default:
1201     break;
1202   case AArch64::ADDSWrs:
1203   case AArch64::ADDSXrs:
1204   case AArch64::ADDWrs:
1205   case AArch64::ADDXrs:
1206   case AArch64::ANDSWrs:
1207   case AArch64::ANDSXrs:
1208   case AArch64::ANDWrs:
1209   case AArch64::ANDXrs:
1210   case AArch64::BICSWrs:
1211   case AArch64::BICSXrs:
1212   case AArch64::BICWrs:
1213   case AArch64::BICXrs:
1214   case AArch64::CRC32Brr:
1215   case AArch64::CRC32CBrr:
1216   case AArch64::CRC32CHrr:
1217   case AArch64::CRC32CWrr:
1218   case AArch64::CRC32CXrr:
1219   case AArch64::CRC32Hrr:
1220   case AArch64::CRC32Wrr:
1221   case AArch64::CRC32Xrr:
1222   case AArch64::EONWrs:
1223   case AArch64::EONXrs:
1224   case AArch64::EORWrs:
1225   case AArch64::EORXrs:
1226   case AArch64::ORNWrs:
1227   case AArch64::ORNXrs:
1228   case AArch64::ORRWrs:
1229   case AArch64::ORRXrs:
1230   case AArch64::SUBSWrs:
1231   case AArch64::SUBSXrs:
1232   case AArch64::SUBWrs:
1233   case AArch64::SUBXrs:
1234     if (MI.getOperand(3).isImm()) {
1235       unsigned val = MI.getOperand(3).getImm();
1236       return (val != 0);
1237     }
1238     break;
1239   }
1240   return false;
1241 }
1242 
1243 /// Return true if this is this instruction has a non-zero immediate
hasExtendedReg(const MachineInstr & MI) const1244 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1245   switch (MI.getOpcode()) {
1246   default:
1247     break;
1248   case AArch64::ADDSWrx:
1249   case AArch64::ADDSXrx:
1250   case AArch64::ADDSXrx64:
1251   case AArch64::ADDWrx:
1252   case AArch64::ADDXrx:
1253   case AArch64::ADDXrx64:
1254   case AArch64::SUBSWrx:
1255   case AArch64::SUBSXrx:
1256   case AArch64::SUBSXrx64:
1257   case AArch64::SUBWrx:
1258   case AArch64::SUBXrx:
1259   case AArch64::SUBXrx64:
1260     if (MI.getOperand(3).isImm()) {
1261       unsigned val = MI.getOperand(3).getImm();
1262       return (val != 0);
1263     }
1264     break;
1265   }
1266 
1267   return false;
1268 }
1269 
1270 // Return true if this instruction simply sets its single destination register
1271 // to zero. This is equivalent to a register rename of the zero-register.
isGPRZero(const MachineInstr & MI) const1272 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1273   switch (MI.getOpcode()) {
1274   default:
1275     break;
1276   case AArch64::MOVZWi:
1277   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1278     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1279       assert(MI.getDesc().getNumOperands() == 3 &&
1280              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1281       return true;
1282     }
1283     break;
1284   case AArch64::ANDWri: // and Rd, Rzr, #imm
1285     return MI.getOperand(1).getReg() == AArch64::WZR;
1286   case AArch64::ANDXri:
1287     return MI.getOperand(1).getReg() == AArch64::XZR;
1288   case TargetOpcode::COPY:
1289     return MI.getOperand(1).getReg() == AArch64::WZR;
1290   }
1291   return false;
1292 }
1293 
1294 // Return true if this instruction simply renames a general register without
1295 // modifying bits.
isGPRCopy(const MachineInstr & MI) const1296 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1297   switch (MI.getOpcode()) {
1298   default:
1299     break;
1300   case TargetOpcode::COPY: {
1301     // GPR32 copies will by lowered to ORRXrs
1302     unsigned DstReg = MI.getOperand(0).getReg();
1303     return (AArch64::GPR32RegClass.contains(DstReg) ||
1304             AArch64::GPR64RegClass.contains(DstReg));
1305   }
1306   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1307     if (MI.getOperand(1).getReg() == AArch64::XZR) {
1308       assert(MI.getDesc().getNumOperands() == 4 &&
1309              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1310       return true;
1311     }
1312     break;
1313   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1314     if (MI.getOperand(2).getImm() == 0) {
1315       assert(MI.getDesc().getNumOperands() == 4 &&
1316              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1317       return true;
1318     }
1319     break;
1320   }
1321   return false;
1322 }
1323 
1324 // Return true if this instruction simply renames a general register without
1325 // modifying bits.
isFPRCopy(const MachineInstr & MI) const1326 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1327   switch (MI.getOpcode()) {
1328   default:
1329     break;
1330   case TargetOpcode::COPY: {
1331     // FPR64 copies will by lowered to ORR.16b
1332     unsigned DstReg = MI.getOperand(0).getReg();
1333     return (AArch64::FPR64RegClass.contains(DstReg) ||
1334             AArch64::FPR128RegClass.contains(DstReg));
1335   }
1336   case AArch64::ORRv16i8:
1337     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1338       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1339              "invalid ORRv16i8 operands");
1340       return true;
1341     }
1342     break;
1343   }
1344   return false;
1345 }
1346 
isLoadFromStackSlot(const MachineInstr & MI,int & FrameIndex) const1347 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1348                                                int &FrameIndex) const {
1349   switch (MI.getOpcode()) {
1350   default:
1351     break;
1352   case AArch64::LDRWui:
1353   case AArch64::LDRXui:
1354   case AArch64::LDRBui:
1355   case AArch64::LDRHui:
1356   case AArch64::LDRSui:
1357   case AArch64::LDRDui:
1358   case AArch64::LDRQui:
1359     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1360         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1361       FrameIndex = MI.getOperand(1).getIndex();
1362       return MI.getOperand(0).getReg();
1363     }
1364     break;
1365   }
1366 
1367   return 0;
1368 }
1369 
isStoreToStackSlot(const MachineInstr & MI,int & FrameIndex) const1370 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1371                                               int &FrameIndex) const {
1372   switch (MI.getOpcode()) {
1373   default:
1374     break;
1375   case AArch64::STRWui:
1376   case AArch64::STRXui:
1377   case AArch64::STRBui:
1378   case AArch64::STRHui:
1379   case AArch64::STRSui:
1380   case AArch64::STRDui:
1381   case AArch64::STRQui:
1382     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1383         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1384       FrameIndex = MI.getOperand(1).getIndex();
1385       return MI.getOperand(0).getReg();
1386     }
1387     break;
1388   }
1389   return 0;
1390 }
1391 
1392 /// Return true if this is load/store scales or extends its register offset.
1393 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1394 /// MI should be a memory op that allows scaled addressing.
isScaledAddr(const MachineInstr & MI) const1395 bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1396   switch (MI.getOpcode()) {
1397   default:
1398     break;
1399   case AArch64::LDRBBroW:
1400   case AArch64::LDRBroW:
1401   case AArch64::LDRDroW:
1402   case AArch64::LDRHHroW:
1403   case AArch64::LDRHroW:
1404   case AArch64::LDRQroW:
1405   case AArch64::LDRSBWroW:
1406   case AArch64::LDRSBXroW:
1407   case AArch64::LDRSHWroW:
1408   case AArch64::LDRSHXroW:
1409   case AArch64::LDRSWroW:
1410   case AArch64::LDRSroW:
1411   case AArch64::LDRWroW:
1412   case AArch64::LDRXroW:
1413   case AArch64::STRBBroW:
1414   case AArch64::STRBroW:
1415   case AArch64::STRDroW:
1416   case AArch64::STRHHroW:
1417   case AArch64::STRHroW:
1418   case AArch64::STRQroW:
1419   case AArch64::STRSroW:
1420   case AArch64::STRWroW:
1421   case AArch64::STRXroW:
1422   case AArch64::LDRBBroX:
1423   case AArch64::LDRBroX:
1424   case AArch64::LDRDroX:
1425   case AArch64::LDRHHroX:
1426   case AArch64::LDRHroX:
1427   case AArch64::LDRQroX:
1428   case AArch64::LDRSBWroX:
1429   case AArch64::LDRSBXroX:
1430   case AArch64::LDRSHWroX:
1431   case AArch64::LDRSHXroX:
1432   case AArch64::LDRSWroX:
1433   case AArch64::LDRSroX:
1434   case AArch64::LDRWroX:
1435   case AArch64::LDRXroX:
1436   case AArch64::STRBBroX:
1437   case AArch64::STRBroX:
1438   case AArch64::STRDroX:
1439   case AArch64::STRHHroX:
1440   case AArch64::STRHroX:
1441   case AArch64::STRQroX:
1442   case AArch64::STRSroX:
1443   case AArch64::STRWroX:
1444   case AArch64::STRXroX:
1445 
1446     unsigned Val = MI.getOperand(3).getImm();
1447     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1448     return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1449   }
1450   return false;
1451 }
1452 
1453 /// Check all MachineMemOperands for a hint to suppress pairing.
isLdStPairSuppressed(const MachineInstr & MI) const1454 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
1455   return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1456     return MMO->getFlags() & MOSuppressPair;
1457   });
1458 }
1459 
1460 /// Set a flag on the first MachineMemOperand to suppress pairing.
suppressLdStPair(MachineInstr & MI) const1461 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1462   if (MI.memoperands_empty())
1463     return;
1464   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1465 }
1466 
isUnscaledLdSt(unsigned Opc) const1467 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1468   switch (Opc) {
1469   default:
1470     return false;
1471   case AArch64::STURSi:
1472   case AArch64::STURDi:
1473   case AArch64::STURQi:
1474   case AArch64::STURBBi:
1475   case AArch64::STURHHi:
1476   case AArch64::STURWi:
1477   case AArch64::STURXi:
1478   case AArch64::LDURSi:
1479   case AArch64::LDURDi:
1480   case AArch64::LDURQi:
1481   case AArch64::LDURWi:
1482   case AArch64::LDURXi:
1483   case AArch64::LDURSWi:
1484   case AArch64::LDURHHi:
1485   case AArch64::LDURBBi:
1486   case AArch64::LDURSBWi:
1487   case AArch64::LDURSHWi:
1488     return true;
1489   }
1490 }
1491 
isUnscaledLdSt(MachineInstr & MI) const1492 bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1493   return isUnscaledLdSt(MI.getOpcode());
1494 }
1495 
1496 // Is this a candidate for ld/st merging or pairing?  For example, we don't
1497 // touch volatiles or load/stores that have a hint to avoid pair formation.
isCandidateToMergeOrPair(MachineInstr & MI) const1498 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
1499   // If this is a volatile load/store, don't mess with it.
1500   if (MI.hasOrderedMemoryRef())
1501     return false;
1502 
1503   // Make sure this is a reg+imm (as opposed to an address reloc).
1504   assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1505   if (!MI.getOperand(2).isImm())
1506     return false;
1507 
1508   // Can't merge/pair if the instruction modifies the base register.
1509   // e.g., ldr x0, [x0]
1510   unsigned BaseReg = MI.getOperand(1).getReg();
1511   const TargetRegisterInfo *TRI = &getRegisterInfo();
1512   if (MI.modifiesRegister(BaseReg, TRI))
1513     return false;
1514 
1515   // Check if this load/store has a hint to avoid pair formation.
1516   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1517   if (isLdStPairSuppressed(MI))
1518     return false;
1519 
1520   // On some CPUs quad load/store pairs are slower than two single load/stores.
1521   if (Subtarget.avoidQuadLdStPairs()) {
1522     switch (MI.getOpcode()) {
1523     default:
1524       break;
1525 
1526     case AArch64::LDURQi:
1527     case AArch64::STURQi:
1528     case AArch64::LDRQui:
1529     case AArch64::STRQui:
1530       return false;
1531     }
1532   }
1533 
1534   return true;
1535 }
1536 
getMemOpBaseRegImmOfs(MachineInstr & LdSt,unsigned & BaseReg,int64_t & Offset,const TargetRegisterInfo * TRI) const1537 bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
1538     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
1539     const TargetRegisterInfo *TRI) const {
1540   switch (LdSt.getOpcode()) {
1541   default:
1542     return false;
1543   // Scaled instructions.
1544   case AArch64::STRSui:
1545   case AArch64::STRDui:
1546   case AArch64::STRQui:
1547   case AArch64::STRXui:
1548   case AArch64::STRWui:
1549   case AArch64::LDRSui:
1550   case AArch64::LDRDui:
1551   case AArch64::LDRQui:
1552   case AArch64::LDRXui:
1553   case AArch64::LDRWui:
1554   case AArch64::LDRSWui:
1555   // Unscaled instructions.
1556   case AArch64::STURSi:
1557   case AArch64::STURDi:
1558   case AArch64::STURQi:
1559   case AArch64::STURXi:
1560   case AArch64::STURWi:
1561   case AArch64::LDURSi:
1562   case AArch64::LDURDi:
1563   case AArch64::LDURQi:
1564   case AArch64::LDURWi:
1565   case AArch64::LDURXi:
1566   case AArch64::LDURSWi:
1567     unsigned Width;
1568     return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
1569   };
1570 }
1571 
getMemOpBaseRegImmOfsWidth(MachineInstr & LdSt,unsigned & BaseReg,int64_t & Offset,unsigned & Width,const TargetRegisterInfo * TRI) const1572 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
1573     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
1574     const TargetRegisterInfo *TRI) const {
1575   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1576   // Handle only loads/stores with base register followed by immediate offset.
1577   if (LdSt.getNumExplicitOperands() == 3) {
1578     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1579     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
1580       return false;
1581   } else if (LdSt.getNumExplicitOperands() == 4) {
1582     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1583     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1584         !LdSt.getOperand(3).isImm())
1585       return false;
1586   } else
1587     return false;
1588 
1589   // Offset is calculated as the immediate operand multiplied by the scaling factor.
1590   // Unscaled instructions have scaling factor set to 1.
1591   unsigned Scale = 0;
1592   switch (LdSt.getOpcode()) {
1593   default:
1594     return false;
1595   case AArch64::LDURQi:
1596   case AArch64::STURQi:
1597     Width = 16;
1598     Scale = 1;
1599     break;
1600   case AArch64::LDURXi:
1601   case AArch64::LDURDi:
1602   case AArch64::STURXi:
1603   case AArch64::STURDi:
1604     Width = 8;
1605     Scale = 1;
1606     break;
1607   case AArch64::LDURWi:
1608   case AArch64::LDURSi:
1609   case AArch64::LDURSWi:
1610   case AArch64::STURWi:
1611   case AArch64::STURSi:
1612     Width = 4;
1613     Scale = 1;
1614     break;
1615   case AArch64::LDURHi:
1616   case AArch64::LDURHHi:
1617   case AArch64::LDURSHXi:
1618   case AArch64::LDURSHWi:
1619   case AArch64::STURHi:
1620   case AArch64::STURHHi:
1621     Width = 2;
1622     Scale = 1;
1623     break;
1624   case AArch64::LDURBi:
1625   case AArch64::LDURBBi:
1626   case AArch64::LDURSBXi:
1627   case AArch64::LDURSBWi:
1628   case AArch64::STURBi:
1629   case AArch64::STURBBi:
1630     Width = 1;
1631     Scale = 1;
1632     break;
1633   case AArch64::LDPQi:
1634   case AArch64::LDNPQi:
1635   case AArch64::STPQi:
1636   case AArch64::STNPQi:
1637     Scale = 16;
1638     Width = 32;
1639     break;
1640   case AArch64::LDRQui:
1641   case AArch64::STRQui:
1642     Scale = Width = 16;
1643     break;
1644   case AArch64::LDPXi:
1645   case AArch64::LDPDi:
1646   case AArch64::LDNPXi:
1647   case AArch64::LDNPDi:
1648   case AArch64::STPXi:
1649   case AArch64::STPDi:
1650   case AArch64::STNPXi:
1651   case AArch64::STNPDi:
1652     Scale = 8;
1653     Width = 16;
1654     break;
1655   case AArch64::LDRXui:
1656   case AArch64::LDRDui:
1657   case AArch64::STRXui:
1658   case AArch64::STRDui:
1659     Scale = Width = 8;
1660     break;
1661   case AArch64::LDPWi:
1662   case AArch64::LDPSi:
1663   case AArch64::LDNPWi:
1664   case AArch64::LDNPSi:
1665   case AArch64::STPWi:
1666   case AArch64::STPSi:
1667   case AArch64::STNPWi:
1668   case AArch64::STNPSi:
1669     Scale = 4;
1670     Width = 8;
1671     break;
1672   case AArch64::LDRWui:
1673   case AArch64::LDRSui:
1674   case AArch64::LDRSWui:
1675   case AArch64::STRWui:
1676   case AArch64::STRSui:
1677     Scale = Width = 4;
1678     break;
1679   case AArch64::LDRHui:
1680   case AArch64::LDRHHui:
1681   case AArch64::STRHui:
1682   case AArch64::STRHHui:
1683     Scale = Width = 2;
1684     break;
1685   case AArch64::LDRBui:
1686   case AArch64::LDRBBui:
1687   case AArch64::STRBui:
1688   case AArch64::STRBBui:
1689     Scale = Width = 1;
1690     break;
1691   }
1692 
1693   if (LdSt.getNumExplicitOperands() == 3) {
1694     BaseReg = LdSt.getOperand(1).getReg();
1695     Offset = LdSt.getOperand(2).getImm() * Scale;
1696   } else {
1697     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1698     BaseReg = LdSt.getOperand(2).getReg();
1699     Offset = LdSt.getOperand(3).getImm() * Scale;
1700   }
1701   return true;
1702 }
1703 
1704 // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
1705 // scaled.
scaleOffset(unsigned Opc,int64_t & Offset)1706 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
1707   unsigned OffsetStride = 1;
1708   switch (Opc) {
1709   default:
1710     return false;
1711   case AArch64::LDURQi:
1712   case AArch64::STURQi:
1713     OffsetStride = 16;
1714     break;
1715   case AArch64::LDURXi:
1716   case AArch64::LDURDi:
1717   case AArch64::STURXi:
1718   case AArch64::STURDi:
1719     OffsetStride = 8;
1720     break;
1721   case AArch64::LDURWi:
1722   case AArch64::LDURSi:
1723   case AArch64::LDURSWi:
1724   case AArch64::STURWi:
1725   case AArch64::STURSi:
1726     OffsetStride = 4;
1727     break;
1728   }
1729   // If the byte-offset isn't a multiple of the stride, we can't scale this
1730   // offset.
1731   if (Offset % OffsetStride != 0)
1732     return false;
1733 
1734   // Convert the byte-offset used by unscaled into an "element" offset used
1735   // by the scaled pair load/store instructions.
1736   Offset /= OffsetStride;
1737   return true;
1738 }
1739 
canPairLdStOpc(unsigned FirstOpc,unsigned SecondOpc)1740 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
1741   if (FirstOpc == SecondOpc)
1742     return true;
1743   // We can also pair sign-ext and zero-ext instructions.
1744   switch (FirstOpc) {
1745   default:
1746     return false;
1747   case AArch64::LDRWui:
1748   case AArch64::LDURWi:
1749     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
1750   case AArch64::LDRSWui:
1751   case AArch64::LDURSWi:
1752     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
1753   }
1754   // These instructions can't be paired based on their opcodes.
1755   return false;
1756 }
1757 
1758 /// Detect opportunities for ldp/stp formation.
1759 ///
1760 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
shouldClusterMemOps(MachineInstr & FirstLdSt,MachineInstr & SecondLdSt,unsigned NumLoads) const1761 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
1762                                            MachineInstr &SecondLdSt,
1763                                            unsigned NumLoads) const {
1764   // Only cluster up to a single pair.
1765   if (NumLoads > 1)
1766     return false;
1767 
1768   // Can we pair these instructions based on their opcodes?
1769   unsigned FirstOpc = FirstLdSt.getOpcode();
1770   unsigned SecondOpc = SecondLdSt.getOpcode();
1771   if (!canPairLdStOpc(FirstOpc, SecondOpc))
1772     return false;
1773 
1774   // Can't merge volatiles or load/stores that have a hint to avoid pair
1775   // formation, for example.
1776   if (!isCandidateToMergeOrPair(FirstLdSt) ||
1777       !isCandidateToMergeOrPair(SecondLdSt))
1778     return false;
1779 
1780   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
1781   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
1782   if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
1783     return false;
1784 
1785   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
1786   if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
1787     return false;
1788 
1789   // Pairwise instructions have a 7-bit signed offset field.
1790   if (Offset1 > 63 || Offset1 < -64)
1791     return false;
1792 
1793   // The caller should already have ordered First/SecondLdSt by offset.
1794   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
1795   return Offset1 + 1 == Offset2;
1796 }
1797 
shouldScheduleAdjacent(MachineInstr & First,MachineInstr & Second) const1798 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
1799                                               MachineInstr &Second) const {
1800   if (Subtarget.hasMacroOpFusion()) {
1801     // Fuse CMN, CMP, TST followed by Bcc.
1802     unsigned SecondOpcode = Second.getOpcode();
1803     if (SecondOpcode == AArch64::Bcc) {
1804       switch (First.getOpcode()) {
1805       default:
1806         return false;
1807       case AArch64::SUBSWri:
1808       case AArch64::ADDSWri:
1809       case AArch64::ANDSWri:
1810       case AArch64::SUBSXri:
1811       case AArch64::ADDSXri:
1812       case AArch64::ANDSXri:
1813         return true;
1814       }
1815     }
1816     // Fuse ALU operations followed by CBZ/CBNZ.
1817     if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
1818         SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
1819       switch (First.getOpcode()) {
1820       default:
1821         return false;
1822       case AArch64::ADDWri:
1823       case AArch64::ADDXri:
1824       case AArch64::ANDWri:
1825       case AArch64::ANDXri:
1826       case AArch64::EORWri:
1827       case AArch64::EORXri:
1828       case AArch64::ORRWri:
1829       case AArch64::ORRXri:
1830       case AArch64::SUBWri:
1831       case AArch64::SUBXri:
1832         return true;
1833       }
1834     }
1835   }
1836   return false;
1837 }
1838 
emitFrameIndexDebugValue(MachineFunction & MF,int FrameIx,uint64_t Offset,const MDNode * Var,const MDNode * Expr,const DebugLoc & DL) const1839 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
1840     MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
1841     const MDNode *Expr, const DebugLoc &DL) const {
1842   MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1843                                 .addFrameIndex(FrameIx)
1844                                 .addImm(0)
1845                                 .addImm(Offset)
1846                                 .addMetadata(Var)
1847                                 .addMetadata(Expr);
1848   return &*MIB;
1849 }
1850 
AddSubReg(const MachineInstrBuilder & MIB,unsigned Reg,unsigned SubIdx,unsigned State,const TargetRegisterInfo * TRI)1851 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
1852                                             unsigned Reg, unsigned SubIdx,
1853                                             unsigned State,
1854                                             const TargetRegisterInfo *TRI) {
1855   if (!SubIdx)
1856     return MIB.addReg(Reg, State);
1857 
1858   if (TargetRegisterInfo::isPhysicalRegister(Reg))
1859     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1860   return MIB.addReg(Reg, State, SubIdx);
1861 }
1862 
forwardCopyWillClobberTuple(unsigned DestReg,unsigned SrcReg,unsigned NumRegs)1863 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
1864                                         unsigned NumRegs) {
1865   // We really want the positive remainder mod 32 here, that happens to be
1866   // easily obtainable with a mask.
1867   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
1868 }
1869 
copyPhysRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,bool KillSrc,unsigned Opcode,llvm::ArrayRef<unsigned> Indices) const1870 void AArch64InstrInfo::copyPhysRegTuple(
1871     MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
1872     unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
1873     llvm::ArrayRef<unsigned> Indices) const {
1874   assert(Subtarget.hasNEON() &&
1875          "Unexpected register copy without NEON");
1876   const TargetRegisterInfo *TRI = &getRegisterInfo();
1877   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
1878   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
1879   unsigned NumRegs = Indices.size();
1880 
1881   int SubReg = 0, End = NumRegs, Incr = 1;
1882   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
1883     SubReg = NumRegs - 1;
1884     End = -1;
1885     Incr = -1;
1886   }
1887 
1888   for (; SubReg != End; SubReg += Incr) {
1889     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
1890     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
1891     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
1892     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
1893   }
1894 }
1895 
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,bool KillSrc) const1896 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
1897                                    MachineBasicBlock::iterator I,
1898                                    const DebugLoc &DL, unsigned DestReg,
1899                                    unsigned SrcReg, bool KillSrc) const {
1900   if (AArch64::GPR32spRegClass.contains(DestReg) &&
1901       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
1902     const TargetRegisterInfo *TRI = &getRegisterInfo();
1903 
1904     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
1905       // If either operand is WSP, expand to ADD #0.
1906       if (Subtarget.hasZeroCycleRegMove()) {
1907         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
1908         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1909                                                      &AArch64::GPR64spRegClass);
1910         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1911                                                     &AArch64::GPR64spRegClass);
1912         // This instruction is reading and writing X registers.  This may upset
1913         // the register scavenger and machine verifier, so we need to indicate
1914         // that we are reading an undefined value from SrcRegX, but a proper
1915         // value from SrcReg.
1916         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
1917             .addReg(SrcRegX, RegState::Undef)
1918             .addImm(0)
1919             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1920             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1921       } else {
1922         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
1923             .addReg(SrcReg, getKillRegState(KillSrc))
1924             .addImm(0)
1925             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1926       }
1927     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
1928       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
1929           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1930     } else {
1931       if (Subtarget.hasZeroCycleRegMove()) {
1932         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
1933         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1934                                                      &AArch64::GPR64spRegClass);
1935         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1936                                                     &AArch64::GPR64spRegClass);
1937         // This instruction is reading and writing X registers.  This may upset
1938         // the register scavenger and machine verifier, so we need to indicate
1939         // that we are reading an undefined value from SrcRegX, but a proper
1940         // value from SrcReg.
1941         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
1942             .addReg(AArch64::XZR)
1943             .addReg(SrcRegX, RegState::Undef)
1944             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1945       } else {
1946         // Otherwise, expand to ORR WZR.
1947         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
1948             .addReg(AArch64::WZR)
1949             .addReg(SrcReg, getKillRegState(KillSrc));
1950       }
1951     }
1952     return;
1953   }
1954 
1955   if (AArch64::GPR64spRegClass.contains(DestReg) &&
1956       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
1957     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
1958       // If either operand is SP, expand to ADD #0.
1959       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
1960           .addReg(SrcReg, getKillRegState(KillSrc))
1961           .addImm(0)
1962           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1963     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
1964       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
1965           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1966     } else {
1967       // Otherwise, expand to ORR XZR.
1968       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
1969           .addReg(AArch64::XZR)
1970           .addReg(SrcReg, getKillRegState(KillSrc));
1971     }
1972     return;
1973   }
1974 
1975   // Copy a DDDD register quad by copying the individual sub-registers.
1976   if (AArch64::DDDDRegClass.contains(DestReg) &&
1977       AArch64::DDDDRegClass.contains(SrcReg)) {
1978     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1979                                         AArch64::dsub2, AArch64::dsub3 };
1980     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1981                      Indices);
1982     return;
1983   }
1984 
1985   // Copy a DDD register triple by copying the individual sub-registers.
1986   if (AArch64::DDDRegClass.contains(DestReg) &&
1987       AArch64::DDDRegClass.contains(SrcReg)) {
1988     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1989                                         AArch64::dsub2 };
1990     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1991                      Indices);
1992     return;
1993   }
1994 
1995   // Copy a DD register pair by copying the individual sub-registers.
1996   if (AArch64::DDRegClass.contains(DestReg) &&
1997       AArch64::DDRegClass.contains(SrcReg)) {
1998     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
1999     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2000                      Indices);
2001     return;
2002   }
2003 
2004   // Copy a QQQQ register quad by copying the individual sub-registers.
2005   if (AArch64::QQQQRegClass.contains(DestReg) &&
2006       AArch64::QQQQRegClass.contains(SrcReg)) {
2007     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2008                                         AArch64::qsub2, AArch64::qsub3 };
2009     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2010                      Indices);
2011     return;
2012   }
2013 
2014   // Copy a QQQ register triple by copying the individual sub-registers.
2015   if (AArch64::QQQRegClass.contains(DestReg) &&
2016       AArch64::QQQRegClass.contains(SrcReg)) {
2017     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2018                                         AArch64::qsub2 };
2019     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2020                      Indices);
2021     return;
2022   }
2023 
2024   // Copy a QQ register pair by copying the individual sub-registers.
2025   if (AArch64::QQRegClass.contains(DestReg) &&
2026       AArch64::QQRegClass.contains(SrcReg)) {
2027     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
2028     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2029                      Indices);
2030     return;
2031   }
2032 
2033   if (AArch64::FPR128RegClass.contains(DestReg) &&
2034       AArch64::FPR128RegClass.contains(SrcReg)) {
2035     if(Subtarget.hasNEON()) {
2036       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2037           .addReg(SrcReg)
2038           .addReg(SrcReg, getKillRegState(KillSrc));
2039     } else {
2040       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2041         .addReg(AArch64::SP, RegState::Define)
2042         .addReg(SrcReg, getKillRegState(KillSrc))
2043         .addReg(AArch64::SP)
2044         .addImm(-16);
2045       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2046         .addReg(AArch64::SP, RegState::Define)
2047         .addReg(DestReg, RegState::Define)
2048         .addReg(AArch64::SP)
2049         .addImm(16);
2050     }
2051     return;
2052   }
2053 
2054   if (AArch64::FPR64RegClass.contains(DestReg) &&
2055       AArch64::FPR64RegClass.contains(SrcReg)) {
2056     if(Subtarget.hasNEON()) {
2057       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2058                                        &AArch64::FPR128RegClass);
2059       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2060                                       &AArch64::FPR128RegClass);
2061       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2062           .addReg(SrcReg)
2063           .addReg(SrcReg, getKillRegState(KillSrc));
2064     } else {
2065       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2066           .addReg(SrcReg, getKillRegState(KillSrc));
2067     }
2068     return;
2069   }
2070 
2071   if (AArch64::FPR32RegClass.contains(DestReg) &&
2072       AArch64::FPR32RegClass.contains(SrcReg)) {
2073     if(Subtarget.hasNEON()) {
2074       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2075                                        &AArch64::FPR128RegClass);
2076       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2077                                       &AArch64::FPR128RegClass);
2078       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2079           .addReg(SrcReg)
2080           .addReg(SrcReg, getKillRegState(KillSrc));
2081     } else {
2082       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2083           .addReg(SrcReg, getKillRegState(KillSrc));
2084     }
2085     return;
2086   }
2087 
2088   if (AArch64::FPR16RegClass.contains(DestReg) &&
2089       AArch64::FPR16RegClass.contains(SrcReg)) {
2090     if(Subtarget.hasNEON()) {
2091       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2092                                        &AArch64::FPR128RegClass);
2093       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2094                                       &AArch64::FPR128RegClass);
2095       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2096           .addReg(SrcReg)
2097           .addReg(SrcReg, getKillRegState(KillSrc));
2098     } else {
2099       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2100                                        &AArch64::FPR32RegClass);
2101       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2102                                       &AArch64::FPR32RegClass);
2103       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2104           .addReg(SrcReg, getKillRegState(KillSrc));
2105     }
2106     return;
2107   }
2108 
2109   if (AArch64::FPR8RegClass.contains(DestReg) &&
2110       AArch64::FPR8RegClass.contains(SrcReg)) {
2111     if(Subtarget.hasNEON()) {
2112       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2113                                        &AArch64::FPR128RegClass);
2114       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2115                                       &AArch64::FPR128RegClass);
2116       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2117           .addReg(SrcReg)
2118           .addReg(SrcReg, getKillRegState(KillSrc));
2119     } else {
2120       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2121                                        &AArch64::FPR32RegClass);
2122       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2123                                       &AArch64::FPR32RegClass);
2124       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2125           .addReg(SrcReg, getKillRegState(KillSrc));
2126     }
2127     return;
2128   }
2129 
2130   // Copies between GPR64 and FPR64.
2131   if (AArch64::FPR64RegClass.contains(DestReg) &&
2132       AArch64::GPR64RegClass.contains(SrcReg)) {
2133     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2134         .addReg(SrcReg, getKillRegState(KillSrc));
2135     return;
2136   }
2137   if (AArch64::GPR64RegClass.contains(DestReg) &&
2138       AArch64::FPR64RegClass.contains(SrcReg)) {
2139     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2140         .addReg(SrcReg, getKillRegState(KillSrc));
2141     return;
2142   }
2143   // Copies between GPR32 and FPR32.
2144   if (AArch64::FPR32RegClass.contains(DestReg) &&
2145       AArch64::GPR32RegClass.contains(SrcReg)) {
2146     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2147         .addReg(SrcReg, getKillRegState(KillSrc));
2148     return;
2149   }
2150   if (AArch64::GPR32RegClass.contains(DestReg) &&
2151       AArch64::FPR32RegClass.contains(SrcReg)) {
2152     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2153         .addReg(SrcReg, getKillRegState(KillSrc));
2154     return;
2155   }
2156 
2157   if (DestReg == AArch64::NZCV) {
2158     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2159     BuildMI(MBB, I, DL, get(AArch64::MSR))
2160       .addImm(AArch64SysReg::NZCV)
2161       .addReg(SrcReg, getKillRegState(KillSrc))
2162       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2163     return;
2164   }
2165 
2166   if (SrcReg == AArch64::NZCV) {
2167     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2168     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2169       .addImm(AArch64SysReg::NZCV)
2170       .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2171     return;
2172   }
2173 
2174   llvm_unreachable("unimplemented reg-to-reg copy");
2175 }
2176 
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned SrcReg,bool isKill,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2177 void AArch64InstrInfo::storeRegToStackSlot(
2178     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2179     bool isKill, int FI, const TargetRegisterClass *RC,
2180     const TargetRegisterInfo *TRI) const {
2181   DebugLoc DL;
2182   if (MBBI != MBB.end())
2183     DL = MBBI->getDebugLoc();
2184   MachineFunction &MF = *MBB.getParent();
2185   MachineFrameInfo &MFI = *MF.getFrameInfo();
2186   unsigned Align = MFI.getObjectAlignment(FI);
2187 
2188   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2189   MachineMemOperand *MMO = MF.getMachineMemOperand(
2190       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2191   unsigned Opc = 0;
2192   bool Offset = true;
2193   switch (RC->getSize()) {
2194   case 1:
2195     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2196       Opc = AArch64::STRBui;
2197     break;
2198   case 2:
2199     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2200       Opc = AArch64::STRHui;
2201     break;
2202   case 4:
2203     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2204       Opc = AArch64::STRWui;
2205       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2206         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2207       else
2208         assert(SrcReg != AArch64::WSP);
2209     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2210       Opc = AArch64::STRSui;
2211     break;
2212   case 8:
2213     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2214       Opc = AArch64::STRXui;
2215       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2216         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2217       else
2218         assert(SrcReg != AArch64::SP);
2219     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2220       Opc = AArch64::STRDui;
2221     break;
2222   case 16:
2223     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2224       Opc = AArch64::STRQui;
2225     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2226       assert(Subtarget.hasNEON() &&
2227              "Unexpected register store without NEON");
2228       Opc = AArch64::ST1Twov1d;
2229       Offset = false;
2230     }
2231     break;
2232   case 24:
2233     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2234       assert(Subtarget.hasNEON() &&
2235              "Unexpected register store without NEON");
2236       Opc = AArch64::ST1Threev1d;
2237       Offset = false;
2238     }
2239     break;
2240   case 32:
2241     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2242       assert(Subtarget.hasNEON() &&
2243              "Unexpected register store without NEON");
2244       Opc = AArch64::ST1Fourv1d;
2245       Offset = false;
2246     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2247       assert(Subtarget.hasNEON() &&
2248              "Unexpected register store without NEON");
2249       Opc = AArch64::ST1Twov2d;
2250       Offset = false;
2251     }
2252     break;
2253   case 48:
2254     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2255       assert(Subtarget.hasNEON() &&
2256              "Unexpected register store without NEON");
2257       Opc = AArch64::ST1Threev2d;
2258       Offset = false;
2259     }
2260     break;
2261   case 64:
2262     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2263       assert(Subtarget.hasNEON() &&
2264              "Unexpected register store without NEON");
2265       Opc = AArch64::ST1Fourv2d;
2266       Offset = false;
2267     }
2268     break;
2269   }
2270   assert(Opc && "Unknown register class");
2271 
2272   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2273                                       .addReg(SrcReg, getKillRegState(isKill))
2274                                       .addFrameIndex(FI);
2275 
2276   if (Offset)
2277     MI.addImm(0);
2278   MI.addMemOperand(MMO);
2279 }
2280 
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned DestReg,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2281 void AArch64InstrInfo::loadRegFromStackSlot(
2282     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2283     int FI, const TargetRegisterClass *RC,
2284     const TargetRegisterInfo *TRI) const {
2285   DebugLoc DL;
2286   if (MBBI != MBB.end())
2287     DL = MBBI->getDebugLoc();
2288   MachineFunction &MF = *MBB.getParent();
2289   MachineFrameInfo &MFI = *MF.getFrameInfo();
2290   unsigned Align = MFI.getObjectAlignment(FI);
2291   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2292   MachineMemOperand *MMO = MF.getMachineMemOperand(
2293       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2294 
2295   unsigned Opc = 0;
2296   bool Offset = true;
2297   switch (RC->getSize()) {
2298   case 1:
2299     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2300       Opc = AArch64::LDRBui;
2301     break;
2302   case 2:
2303     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2304       Opc = AArch64::LDRHui;
2305     break;
2306   case 4:
2307     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2308       Opc = AArch64::LDRWui;
2309       if (TargetRegisterInfo::isVirtualRegister(DestReg))
2310         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2311       else
2312         assert(DestReg != AArch64::WSP);
2313     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2314       Opc = AArch64::LDRSui;
2315     break;
2316   case 8:
2317     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2318       Opc = AArch64::LDRXui;
2319       if (TargetRegisterInfo::isVirtualRegister(DestReg))
2320         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2321       else
2322         assert(DestReg != AArch64::SP);
2323     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2324       Opc = AArch64::LDRDui;
2325     break;
2326   case 16:
2327     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2328       Opc = AArch64::LDRQui;
2329     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2330       assert(Subtarget.hasNEON() &&
2331              "Unexpected register load without NEON");
2332       Opc = AArch64::LD1Twov1d;
2333       Offset = false;
2334     }
2335     break;
2336   case 24:
2337     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2338       assert(Subtarget.hasNEON() &&
2339              "Unexpected register load without NEON");
2340       Opc = AArch64::LD1Threev1d;
2341       Offset = false;
2342     }
2343     break;
2344   case 32:
2345     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2346       assert(Subtarget.hasNEON() &&
2347              "Unexpected register load without NEON");
2348       Opc = AArch64::LD1Fourv1d;
2349       Offset = false;
2350     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2351       assert(Subtarget.hasNEON() &&
2352              "Unexpected register load without NEON");
2353       Opc = AArch64::LD1Twov2d;
2354       Offset = false;
2355     }
2356     break;
2357   case 48:
2358     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2359       assert(Subtarget.hasNEON() &&
2360              "Unexpected register load without NEON");
2361       Opc = AArch64::LD1Threev2d;
2362       Offset = false;
2363     }
2364     break;
2365   case 64:
2366     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2367       assert(Subtarget.hasNEON() &&
2368              "Unexpected register load without NEON");
2369       Opc = AArch64::LD1Fourv2d;
2370       Offset = false;
2371     }
2372     break;
2373   }
2374   assert(Opc && "Unknown register class");
2375 
2376   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2377                                       .addReg(DestReg, getDefRegState(true))
2378                                       .addFrameIndex(FI);
2379   if (Offset)
2380     MI.addImm(0);
2381   MI.addMemOperand(MMO);
2382 }
2383 
emitFrameOffset(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,int Offset,const TargetInstrInfo * TII,MachineInstr::MIFlag Flag,bool SetNZCV)2384 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2385                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2386                            unsigned DestReg, unsigned SrcReg, int Offset,
2387                            const TargetInstrInfo *TII,
2388                            MachineInstr::MIFlag Flag, bool SetNZCV) {
2389   if (DestReg == SrcReg && Offset == 0)
2390     return;
2391 
2392   assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2393          "SP increment/decrement not 16-byte aligned");
2394 
2395   bool isSub = Offset < 0;
2396   if (isSub)
2397     Offset = -Offset;
2398 
2399   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2400   // scratch register.  If DestReg is a virtual register, use it as the
2401   // scratch register; otherwise, create a new virtual register (to be
2402   // replaced by the scavenger at the end of PEI).  That case can be optimized
2403   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2404   // register can be loaded with offset%8 and the add/sub can use an extending
2405   // instruction with LSL#3.
2406   // Currently the function handles any offsets but generates a poor sequence
2407   // of code.
2408   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2409 
2410   unsigned Opc;
2411   if (SetNZCV)
2412     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2413   else
2414     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2415   const unsigned MaxEncoding = 0xfff;
2416   const unsigned ShiftSize = 12;
2417   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2418   while (((unsigned)Offset) >= (1 << ShiftSize)) {
2419     unsigned ThisVal;
2420     if (((unsigned)Offset) > MaxEncodableValue) {
2421       ThisVal = MaxEncodableValue;
2422     } else {
2423       ThisVal = Offset & MaxEncodableValue;
2424     }
2425     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2426            "Encoding cannot handle value that big");
2427     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2428         .addReg(SrcReg)
2429         .addImm(ThisVal >> ShiftSize)
2430         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2431         .setMIFlag(Flag);
2432 
2433     SrcReg = DestReg;
2434     Offset -= ThisVal;
2435     if (Offset == 0)
2436       return;
2437   }
2438   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2439       .addReg(SrcReg)
2440       .addImm(Offset)
2441       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2442       .setMIFlag(Flag);
2443 }
2444 
foldMemoryOperandImpl(MachineFunction & MF,MachineInstr & MI,ArrayRef<unsigned> Ops,MachineBasicBlock::iterator InsertPt,int FrameIndex,LiveIntervals * LIS) const2445 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
2446     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
2447     MachineBasicBlock::iterator InsertPt, int FrameIndex,
2448     LiveIntervals *LIS) const {
2449   // This is a bit of a hack. Consider this instruction:
2450   //
2451   //   %vreg0<def> = COPY %SP; GPR64all:%vreg0
2452   //
2453   // We explicitly chose GPR64all for the virtual register so such a copy might
2454   // be eliminated by RegisterCoalescer. However, that may not be possible, and
2455   // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2456   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2457   //
2458   // To prevent that, we are going to constrain the %vreg0 register class here.
2459   //
2460   // <rdar://problem/11522048>
2461   //
2462   if (MI.isCopy()) {
2463     unsigned DstReg = MI.getOperand(0).getReg();
2464     unsigned SrcReg = MI.getOperand(1).getReg();
2465     if (SrcReg == AArch64::SP &&
2466         TargetRegisterInfo::isVirtualRegister(DstReg)) {
2467       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2468       return nullptr;
2469     }
2470     if (DstReg == AArch64::SP &&
2471         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2472       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2473       return nullptr;
2474     }
2475   }
2476 
2477   // Cannot fold.
2478   return nullptr;
2479 }
2480 
isAArch64FrameOffsetLegal(const MachineInstr & MI,int & Offset,bool * OutUseUnscaledOp,unsigned * OutUnscaledOp,int * EmittableOffset)2481 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2482                                     bool *OutUseUnscaledOp,
2483                                     unsigned *OutUnscaledOp,
2484                                     int *EmittableOffset) {
2485   int Scale = 1;
2486   bool IsSigned = false;
2487   // The ImmIdx should be changed case by case if it is not 2.
2488   unsigned ImmIdx = 2;
2489   unsigned UnscaledOp = 0;
2490   // Set output values in case of early exit.
2491   if (EmittableOffset)
2492     *EmittableOffset = 0;
2493   if (OutUseUnscaledOp)
2494     *OutUseUnscaledOp = false;
2495   if (OutUnscaledOp)
2496     *OutUnscaledOp = 0;
2497   switch (MI.getOpcode()) {
2498   default:
2499     llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
2500   // Vector spills/fills can't take an immediate offset.
2501   case AArch64::LD1Twov2d:
2502   case AArch64::LD1Threev2d:
2503   case AArch64::LD1Fourv2d:
2504   case AArch64::LD1Twov1d:
2505   case AArch64::LD1Threev1d:
2506   case AArch64::LD1Fourv1d:
2507   case AArch64::ST1Twov2d:
2508   case AArch64::ST1Threev2d:
2509   case AArch64::ST1Fourv2d:
2510   case AArch64::ST1Twov1d:
2511   case AArch64::ST1Threev1d:
2512   case AArch64::ST1Fourv1d:
2513     return AArch64FrameOffsetCannotUpdate;
2514   case AArch64::PRFMui:
2515     Scale = 8;
2516     UnscaledOp = AArch64::PRFUMi;
2517     break;
2518   case AArch64::LDRXui:
2519     Scale = 8;
2520     UnscaledOp = AArch64::LDURXi;
2521     break;
2522   case AArch64::LDRWui:
2523     Scale = 4;
2524     UnscaledOp = AArch64::LDURWi;
2525     break;
2526   case AArch64::LDRBui:
2527     Scale = 1;
2528     UnscaledOp = AArch64::LDURBi;
2529     break;
2530   case AArch64::LDRHui:
2531     Scale = 2;
2532     UnscaledOp = AArch64::LDURHi;
2533     break;
2534   case AArch64::LDRSui:
2535     Scale = 4;
2536     UnscaledOp = AArch64::LDURSi;
2537     break;
2538   case AArch64::LDRDui:
2539     Scale = 8;
2540     UnscaledOp = AArch64::LDURDi;
2541     break;
2542   case AArch64::LDRQui:
2543     Scale = 16;
2544     UnscaledOp = AArch64::LDURQi;
2545     break;
2546   case AArch64::LDRBBui:
2547     Scale = 1;
2548     UnscaledOp = AArch64::LDURBBi;
2549     break;
2550   case AArch64::LDRHHui:
2551     Scale = 2;
2552     UnscaledOp = AArch64::LDURHHi;
2553     break;
2554   case AArch64::LDRSBXui:
2555     Scale = 1;
2556     UnscaledOp = AArch64::LDURSBXi;
2557     break;
2558   case AArch64::LDRSBWui:
2559     Scale = 1;
2560     UnscaledOp = AArch64::LDURSBWi;
2561     break;
2562   case AArch64::LDRSHXui:
2563     Scale = 2;
2564     UnscaledOp = AArch64::LDURSHXi;
2565     break;
2566   case AArch64::LDRSHWui:
2567     Scale = 2;
2568     UnscaledOp = AArch64::LDURSHWi;
2569     break;
2570   case AArch64::LDRSWui:
2571     Scale = 4;
2572     UnscaledOp = AArch64::LDURSWi;
2573     break;
2574 
2575   case AArch64::STRXui:
2576     Scale = 8;
2577     UnscaledOp = AArch64::STURXi;
2578     break;
2579   case AArch64::STRWui:
2580     Scale = 4;
2581     UnscaledOp = AArch64::STURWi;
2582     break;
2583   case AArch64::STRBui:
2584     Scale = 1;
2585     UnscaledOp = AArch64::STURBi;
2586     break;
2587   case AArch64::STRHui:
2588     Scale = 2;
2589     UnscaledOp = AArch64::STURHi;
2590     break;
2591   case AArch64::STRSui:
2592     Scale = 4;
2593     UnscaledOp = AArch64::STURSi;
2594     break;
2595   case AArch64::STRDui:
2596     Scale = 8;
2597     UnscaledOp = AArch64::STURDi;
2598     break;
2599   case AArch64::STRQui:
2600     Scale = 16;
2601     UnscaledOp = AArch64::STURQi;
2602     break;
2603   case AArch64::STRBBui:
2604     Scale = 1;
2605     UnscaledOp = AArch64::STURBBi;
2606     break;
2607   case AArch64::STRHHui:
2608     Scale = 2;
2609     UnscaledOp = AArch64::STURHHi;
2610     break;
2611 
2612   case AArch64::LDPXi:
2613   case AArch64::LDPDi:
2614   case AArch64::STPXi:
2615   case AArch64::STPDi:
2616   case AArch64::LDNPXi:
2617   case AArch64::LDNPDi:
2618   case AArch64::STNPXi:
2619   case AArch64::STNPDi:
2620     ImmIdx = 3;
2621     IsSigned = true;
2622     Scale = 8;
2623     break;
2624   case AArch64::LDPQi:
2625   case AArch64::STPQi:
2626   case AArch64::LDNPQi:
2627   case AArch64::STNPQi:
2628     ImmIdx = 3;
2629     IsSigned = true;
2630     Scale = 16;
2631     break;
2632   case AArch64::LDPWi:
2633   case AArch64::LDPSi:
2634   case AArch64::STPWi:
2635   case AArch64::STPSi:
2636   case AArch64::LDNPWi:
2637   case AArch64::LDNPSi:
2638   case AArch64::STNPWi:
2639   case AArch64::STNPSi:
2640     ImmIdx = 3;
2641     IsSigned = true;
2642     Scale = 4;
2643     break;
2644 
2645   case AArch64::LDURXi:
2646   case AArch64::LDURWi:
2647   case AArch64::LDURBi:
2648   case AArch64::LDURHi:
2649   case AArch64::LDURSi:
2650   case AArch64::LDURDi:
2651   case AArch64::LDURQi:
2652   case AArch64::LDURHHi:
2653   case AArch64::LDURBBi:
2654   case AArch64::LDURSBXi:
2655   case AArch64::LDURSBWi:
2656   case AArch64::LDURSHXi:
2657   case AArch64::LDURSHWi:
2658   case AArch64::LDURSWi:
2659   case AArch64::STURXi:
2660   case AArch64::STURWi:
2661   case AArch64::STURBi:
2662   case AArch64::STURHi:
2663   case AArch64::STURSi:
2664   case AArch64::STURDi:
2665   case AArch64::STURQi:
2666   case AArch64::STURBBi:
2667   case AArch64::STURHHi:
2668     Scale = 1;
2669     break;
2670   }
2671 
2672   Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2673 
2674   bool useUnscaledOp = false;
2675   // If the offset doesn't match the scale, we rewrite the instruction to
2676   // use the unscaled instruction instead. Likewise, if we have a negative
2677   // offset (and have an unscaled op to use).
2678   if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2679     useUnscaledOp = true;
2680 
2681   // Use an unscaled addressing mode if the instruction has a negative offset
2682   // (or if the instruction is already using an unscaled addressing mode).
2683   unsigned MaskBits;
2684   if (IsSigned) {
2685     // ldp/stp instructions.
2686     MaskBits = 7;
2687     Offset /= Scale;
2688   } else if (UnscaledOp == 0 || useUnscaledOp) {
2689     MaskBits = 9;
2690     IsSigned = true;
2691     Scale = 1;
2692   } else {
2693     MaskBits = 12;
2694     IsSigned = false;
2695     Offset /= Scale;
2696   }
2697 
2698   // Attempt to fold address computation.
2699   int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2700   int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2701   if (Offset >= MinOff && Offset <= MaxOff) {
2702     if (EmittableOffset)
2703       *EmittableOffset = Offset;
2704     Offset = 0;
2705   } else {
2706     int NewOff = Offset < 0 ? MinOff : MaxOff;
2707     if (EmittableOffset)
2708       *EmittableOffset = NewOff;
2709     Offset = (Offset - NewOff) * Scale;
2710   }
2711   if (OutUseUnscaledOp)
2712     *OutUseUnscaledOp = useUnscaledOp;
2713   if (OutUnscaledOp)
2714     *OutUnscaledOp = UnscaledOp;
2715   return AArch64FrameOffsetCanUpdate |
2716          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
2717 }
2718 
rewriteAArch64FrameIndex(MachineInstr & MI,unsigned FrameRegIdx,unsigned FrameReg,int & Offset,const AArch64InstrInfo * TII)2719 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2720                                     unsigned FrameReg, int &Offset,
2721                                     const AArch64InstrInfo *TII) {
2722   unsigned Opcode = MI.getOpcode();
2723   unsigned ImmIdx = FrameRegIdx + 1;
2724 
2725   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
2726     Offset += MI.getOperand(ImmIdx).getImm();
2727     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
2728                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
2729                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
2730     MI.eraseFromParent();
2731     Offset = 0;
2732     return true;
2733   }
2734 
2735   int NewOffset;
2736   unsigned UnscaledOp;
2737   bool UseUnscaledOp;
2738   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
2739                                          &UnscaledOp, &NewOffset);
2740   if (Status & AArch64FrameOffsetCanUpdate) {
2741     if (Status & AArch64FrameOffsetIsLegal)
2742       // Replace the FrameIndex with FrameReg.
2743       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2744     if (UseUnscaledOp)
2745       MI.setDesc(TII->get(UnscaledOp));
2746 
2747     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
2748     return Offset == 0;
2749   }
2750 
2751   return false;
2752 }
2753 
getNoopForMachoTarget(MCInst & NopInst) const2754 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
2755   NopInst.setOpcode(AArch64::HINT);
2756   NopInst.addOperand(MCOperand::createImm(0));
2757 }
2758 
2759 // AArch64 supports MachineCombiner.
useMachineCombiner() const2760 bool AArch64InstrInfo::useMachineCombiner() const {
2761 
2762   return true;
2763 }
2764 //
2765 // True when Opc sets flag
isCombineInstrSettingFlag(unsigned Opc)2766 static bool isCombineInstrSettingFlag(unsigned Opc) {
2767   switch (Opc) {
2768   case AArch64::ADDSWrr:
2769   case AArch64::ADDSWri:
2770   case AArch64::ADDSXrr:
2771   case AArch64::ADDSXri:
2772   case AArch64::SUBSWrr:
2773   case AArch64::SUBSXrr:
2774   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2775   case AArch64::SUBSWri:
2776   case AArch64::SUBSXri:
2777     return true;
2778   default:
2779     break;
2780   }
2781   return false;
2782 }
2783 //
2784 // 32b Opcodes that can be combined with a MUL
isCombineInstrCandidate32(unsigned Opc)2785 static bool isCombineInstrCandidate32(unsigned Opc) {
2786   switch (Opc) {
2787   case AArch64::ADDWrr:
2788   case AArch64::ADDWri:
2789   case AArch64::SUBWrr:
2790   case AArch64::ADDSWrr:
2791   case AArch64::ADDSWri:
2792   case AArch64::SUBSWrr:
2793   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2794   case AArch64::SUBWri:
2795   case AArch64::SUBSWri:
2796     return true;
2797   default:
2798     break;
2799   }
2800   return false;
2801 }
2802 //
2803 // 64b Opcodes that can be combined with a MUL
isCombineInstrCandidate64(unsigned Opc)2804 static bool isCombineInstrCandidate64(unsigned Opc) {
2805   switch (Opc) {
2806   case AArch64::ADDXrr:
2807   case AArch64::ADDXri:
2808   case AArch64::SUBXrr:
2809   case AArch64::ADDSXrr:
2810   case AArch64::ADDSXri:
2811   case AArch64::SUBSXrr:
2812   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2813   case AArch64::SUBXri:
2814   case AArch64::SUBSXri:
2815     return true;
2816   default:
2817     break;
2818   }
2819   return false;
2820 }
2821 //
2822 // FP Opcodes that can be combined with a FMUL
isCombineInstrCandidateFP(const MachineInstr & Inst)2823 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
2824   switch (Inst.getOpcode()) {
2825   case AArch64::FADDSrr:
2826   case AArch64::FADDDrr:
2827   case AArch64::FADDv2f32:
2828   case AArch64::FADDv2f64:
2829   case AArch64::FADDv4f32:
2830   case AArch64::FSUBSrr:
2831   case AArch64::FSUBDrr:
2832   case AArch64::FSUBv2f32:
2833   case AArch64::FSUBv2f64:
2834   case AArch64::FSUBv4f32:
2835     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2836   default:
2837     break;
2838   }
2839   return false;
2840 }
2841 //
2842 // Opcodes that can be combined with a MUL
isCombineInstrCandidate(unsigned Opc)2843 static bool isCombineInstrCandidate(unsigned Opc) {
2844   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
2845 }
2846 
2847 //
2848 // Utility routine that checks if \param MO is defined by an
2849 // \param CombineOpc instruction in the basic block \param MBB
canCombine(MachineBasicBlock & MBB,MachineOperand & MO,unsigned CombineOpc,unsigned ZeroReg=0,bool CheckZeroReg=false)2850 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
2851                        unsigned CombineOpc, unsigned ZeroReg = 0,
2852                        bool CheckZeroReg = false) {
2853   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2854   MachineInstr *MI = nullptr;
2855 
2856   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
2857     MI = MRI.getUniqueVRegDef(MO.getReg());
2858   // And it needs to be in the trace (otherwise, it won't have a depth).
2859   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
2860     return false;
2861   // Must only used by the user we combine with.
2862   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2863     return false;
2864 
2865   if (CheckZeroReg) {
2866     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
2867            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
2868            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
2869     // The third input reg must be zero.
2870     if (MI->getOperand(3).getReg() != ZeroReg)
2871       return false;
2872   }
2873 
2874   return true;
2875 }
2876 
2877 //
2878 // Is \param MO defined by an integer multiply and can be combined?
canCombineWithMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc,unsigned ZeroReg)2879 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2880                               unsigned MulOpc, unsigned ZeroReg) {
2881   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
2882 }
2883 
2884 //
2885 // Is \param MO defined by a floating-point multiply and can be combined?
canCombineWithFMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc)2886 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2887                                unsigned MulOpc) {
2888   return canCombine(MBB, MO, MulOpc);
2889 }
2890 
2891 // TODO: There are many more machine instruction opcodes to match:
2892 //       1. Other data types (integer, vectors)
2893 //       2. Other math / logic operations (xor, or)
2894 //       3. Other forms of the same operation (intrinsics and other variants)
isAssociativeAndCommutative(const MachineInstr & Inst) const2895 bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
2896   switch (Inst.getOpcode()) {
2897   case AArch64::FADDDrr:
2898   case AArch64::FADDSrr:
2899   case AArch64::FADDv2f32:
2900   case AArch64::FADDv2f64:
2901   case AArch64::FADDv4f32:
2902   case AArch64::FMULDrr:
2903   case AArch64::FMULSrr:
2904   case AArch64::FMULX32:
2905   case AArch64::FMULX64:
2906   case AArch64::FMULXv2f32:
2907   case AArch64::FMULXv2f64:
2908   case AArch64::FMULXv4f32:
2909   case AArch64::FMULv2f32:
2910   case AArch64::FMULv2f64:
2911   case AArch64::FMULv4f32:
2912     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2913   default:
2914     return false;
2915   }
2916 }
2917 
2918 /// Find instructions that can be turned into madd.
getMaddPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)2919 static bool getMaddPatterns(MachineInstr &Root,
2920                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
2921   unsigned Opc = Root.getOpcode();
2922   MachineBasicBlock &MBB = *Root.getParent();
2923   bool Found = false;
2924 
2925   if (!isCombineInstrCandidate(Opc))
2926     return false;
2927   if (isCombineInstrSettingFlag(Opc)) {
2928     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
2929     // When NZCV is live bail out.
2930     if (Cmp_NZCV == -1)
2931       return false;
2932     unsigned NewOpc = convertFlagSettingOpcode(Root);
2933     // When opcode can't change bail out.
2934     // CHECKME: do we miss any cases for opcode conversion?
2935     if (NewOpc == Opc)
2936       return false;
2937     Opc = NewOpc;
2938   }
2939 
2940   switch (Opc) {
2941   default:
2942     break;
2943   case AArch64::ADDWrr:
2944     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
2945            "ADDWrr does not have register operands");
2946     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2947                           AArch64::WZR)) {
2948       Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
2949       Found = true;
2950     }
2951     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2952                           AArch64::WZR)) {
2953       Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
2954       Found = true;
2955     }
2956     break;
2957   case AArch64::ADDXrr:
2958     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2959                           AArch64::XZR)) {
2960       Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
2961       Found = true;
2962     }
2963     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2964                           AArch64::XZR)) {
2965       Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
2966       Found = true;
2967     }
2968     break;
2969   case AArch64::SUBWrr:
2970     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2971                           AArch64::WZR)) {
2972       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
2973       Found = true;
2974     }
2975     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2976                           AArch64::WZR)) {
2977       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
2978       Found = true;
2979     }
2980     break;
2981   case AArch64::SUBXrr:
2982     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2983                           AArch64::XZR)) {
2984       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
2985       Found = true;
2986     }
2987     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2988                           AArch64::XZR)) {
2989       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
2990       Found = true;
2991     }
2992     break;
2993   case AArch64::ADDWri:
2994     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2995                           AArch64::WZR)) {
2996       Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
2997       Found = true;
2998     }
2999     break;
3000   case AArch64::ADDXri:
3001     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3002                           AArch64::XZR)) {
3003       Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3004       Found = true;
3005     }
3006     break;
3007   case AArch64::SUBWri:
3008     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3009                           AArch64::WZR)) {
3010       Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3011       Found = true;
3012     }
3013     break;
3014   case AArch64::SUBXri:
3015     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3016                           AArch64::XZR)) {
3017       Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3018       Found = true;
3019     }
3020     break;
3021   }
3022   return Found;
3023 }
3024 /// Floating-Point Support
3025 
3026 /// Find instructions that can be turned into madd.
getFMAPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3027 static bool getFMAPatterns(MachineInstr &Root,
3028                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3029 
3030   if (!isCombineInstrCandidateFP(Root))
3031     return 0;
3032 
3033   MachineBasicBlock &MBB = *Root.getParent();
3034   bool Found = false;
3035 
3036   switch (Root.getOpcode()) {
3037   default:
3038     assert(false && "Unsupported FP instruction in combiner\n");
3039     break;
3040   case AArch64::FADDSrr:
3041     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3042            "FADDWrr does not have register operands");
3043     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3044       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3045       Found = true;
3046     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3047                                   AArch64::FMULv1i32_indexed)) {
3048       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3049       Found = true;
3050     }
3051     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3052       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3053       Found = true;
3054     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3055                                   AArch64::FMULv1i32_indexed)) {
3056       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3057       Found = true;
3058     }
3059     break;
3060   case AArch64::FADDDrr:
3061     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3062       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3063       Found = true;
3064     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3065                                   AArch64::FMULv1i64_indexed)) {
3066       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3067       Found = true;
3068     }
3069     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3070       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3071       Found = true;
3072     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3073                                   AArch64::FMULv1i64_indexed)) {
3074       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3075       Found = true;
3076     }
3077     break;
3078   case AArch64::FADDv2f32:
3079     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3080                            AArch64::FMULv2i32_indexed)) {
3081       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3082       Found = true;
3083     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3084                                   AArch64::FMULv2f32)) {
3085       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3086       Found = true;
3087     }
3088     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3089                            AArch64::FMULv2i32_indexed)) {
3090       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3091       Found = true;
3092     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3093                                   AArch64::FMULv2f32)) {
3094       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3095       Found = true;
3096     }
3097     break;
3098   case AArch64::FADDv2f64:
3099     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3100                            AArch64::FMULv2i64_indexed)) {
3101       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3102       Found = true;
3103     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3104                                   AArch64::FMULv2f64)) {
3105       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3106       Found = true;
3107     }
3108     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3109                            AArch64::FMULv2i64_indexed)) {
3110       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3111       Found = true;
3112     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3113                                   AArch64::FMULv2f64)) {
3114       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3115       Found = true;
3116     }
3117     break;
3118   case AArch64::FADDv4f32:
3119     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3120                            AArch64::FMULv4i32_indexed)) {
3121       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3122       Found = true;
3123     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3124                                   AArch64::FMULv4f32)) {
3125       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3126       Found = true;
3127     }
3128     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3129                            AArch64::FMULv4i32_indexed)) {
3130       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3131       Found = true;
3132     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3133                                   AArch64::FMULv4f32)) {
3134       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3135       Found = true;
3136     }
3137     break;
3138 
3139   case AArch64::FSUBSrr:
3140     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3141       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3142       Found = true;
3143     }
3144     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3145       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3146       Found = true;
3147     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3148                                   AArch64::FMULv1i32_indexed)) {
3149       Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3150       Found = true;
3151     }
3152     break;
3153   case AArch64::FSUBDrr:
3154     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3155       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3156       Found = true;
3157     }
3158     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3159       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3160       Found = true;
3161     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3162                                   AArch64::FMULv1i64_indexed)) {
3163       Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3164       Found = true;
3165     }
3166     break;
3167   case AArch64::FSUBv2f32:
3168     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3169                            AArch64::FMULv2i32_indexed)) {
3170       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3171       Found = true;
3172     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3173                                   AArch64::FMULv2f32)) {
3174       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3175       Found = true;
3176     }
3177     break;
3178   case AArch64::FSUBv2f64:
3179     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3180                            AArch64::FMULv2i64_indexed)) {
3181       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3182       Found = true;
3183     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3184                                   AArch64::FMULv2f64)) {
3185       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3186       Found = true;
3187     }
3188     break;
3189   case AArch64::FSUBv4f32:
3190     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3191                            AArch64::FMULv4i32_indexed)) {
3192       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3193       Found = true;
3194     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3195                                   AArch64::FMULv4f32)) {
3196       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3197       Found = true;
3198     }
3199     break;
3200   }
3201   return Found;
3202 }
3203 
3204 /// Return true when a code sequence can improve throughput. It
3205 /// should be called only for instructions in loops.
3206 /// \param Pattern - combiner pattern
3207 bool
isThroughputPattern(MachineCombinerPattern Pattern) const3208 AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
3209   switch (Pattern) {
3210   default:
3211     break;
3212   case MachineCombinerPattern::FMULADDS_OP1:
3213   case MachineCombinerPattern::FMULADDS_OP2:
3214   case MachineCombinerPattern::FMULSUBS_OP1:
3215   case MachineCombinerPattern::FMULSUBS_OP2:
3216   case MachineCombinerPattern::FMULADDD_OP1:
3217   case MachineCombinerPattern::FMULADDD_OP2:
3218   case MachineCombinerPattern::FMULSUBD_OP1:
3219   case MachineCombinerPattern::FMULSUBD_OP2:
3220   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3221   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3222   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3223   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3224   case MachineCombinerPattern::FMLAv2f32_OP2:
3225   case MachineCombinerPattern::FMLAv2f32_OP1:
3226   case MachineCombinerPattern::FMLAv2f64_OP1:
3227   case MachineCombinerPattern::FMLAv2f64_OP2:
3228   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3229   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3230   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3231   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3232   case MachineCombinerPattern::FMLAv4f32_OP1:
3233   case MachineCombinerPattern::FMLAv4f32_OP2:
3234   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3235   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3236   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3237   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3238   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3239   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3240   case MachineCombinerPattern::FMLSv2f32_OP2:
3241   case MachineCombinerPattern::FMLSv2f64_OP2:
3242   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3243   case MachineCombinerPattern::FMLSv4f32_OP2:
3244     return true;
3245   } // end switch (Pattern)
3246   return false;
3247 }
3248 /// Return true when there is potentially a faster code sequence for an
3249 /// instruction chain ending in \p Root. All potential patterns are listed in
3250 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3251 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3252 
getMachineCombinerPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns) const3253 bool AArch64InstrInfo::getMachineCombinerPatterns(
3254     MachineInstr &Root,
3255     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3256   // Integer patterns
3257   if (getMaddPatterns(Root, Patterns))
3258     return true;
3259   // Floating point patterns
3260   if (getFMAPatterns(Root, Patterns))
3261     return true;
3262 
3263   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3264 }
3265 
3266 enum class FMAInstKind { Default, Indexed, Accumulator };
3267 /// genFusedMultiply - Generate fused multiply instructions.
3268 /// This function supports both integer and floating point instructions.
3269 /// A typical example:
3270 ///  F|MUL I=A,B,0
3271 ///  F|ADD R,I,C
3272 ///  ==> F|MADD R,A,B,C
3273 /// \param Root is the F|ADD instruction
3274 /// \param [out] InsInstrs is a vector of machine instructions and will
3275 /// contain the generated madd instruction
3276 /// \param IdxMulOpd is index of operand in Root that is the result of
3277 /// the F|MUL. In the example above IdxMulOpd is 1.
3278 /// \param MaddOpc the opcode fo the f|madd instruction
3279 static MachineInstr *
genFusedMultiply(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC,FMAInstKind kind=FMAInstKind::Default)3280 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3281                  const TargetInstrInfo *TII, MachineInstr &Root,
3282                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3283                  unsigned MaddOpc, const TargetRegisterClass *RC,
3284                  FMAInstKind kind = FMAInstKind::Default) {
3285   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3286 
3287   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3288   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3289   unsigned ResultReg = Root.getOperand(0).getReg();
3290   unsigned SrcReg0 = MUL->getOperand(1).getReg();
3291   bool Src0IsKill = MUL->getOperand(1).isKill();
3292   unsigned SrcReg1 = MUL->getOperand(2).getReg();
3293   bool Src1IsKill = MUL->getOperand(2).isKill();
3294   unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3295   bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3296 
3297   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3298     MRI.constrainRegClass(ResultReg, RC);
3299   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3300     MRI.constrainRegClass(SrcReg0, RC);
3301   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3302     MRI.constrainRegClass(SrcReg1, RC);
3303   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3304     MRI.constrainRegClass(SrcReg2, RC);
3305 
3306   MachineInstrBuilder MIB;
3307   if (kind == FMAInstKind::Default)
3308     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3309               .addReg(SrcReg0, getKillRegState(Src0IsKill))
3310               .addReg(SrcReg1, getKillRegState(Src1IsKill))
3311               .addReg(SrcReg2, getKillRegState(Src2IsKill));
3312   else if (kind == FMAInstKind::Indexed)
3313     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3314               .addReg(SrcReg2, getKillRegState(Src2IsKill))
3315               .addReg(SrcReg0, getKillRegState(Src0IsKill))
3316               .addReg(SrcReg1, getKillRegState(Src1IsKill))
3317               .addImm(MUL->getOperand(3).getImm());
3318   else if (kind == FMAInstKind::Accumulator)
3319     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3320               .addReg(SrcReg2, getKillRegState(Src2IsKill))
3321               .addReg(SrcReg0, getKillRegState(Src0IsKill))
3322               .addReg(SrcReg1, getKillRegState(Src1IsKill));
3323   else
3324     assert(false && "Invalid FMA instruction kind \n");
3325   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3326   InsInstrs.push_back(MIB);
3327   return MUL;
3328 }
3329 
3330 /// genMaddR - Generate madd instruction and combine mul and add using
3331 /// an extra virtual register
3332 /// Example - an ADD intermediate needs to be stored in a register:
3333 ///   MUL I=A,B,0
3334 ///   ADD R,I,Imm
3335 ///   ==> ORR  V, ZR, Imm
3336 ///   ==> MADD R,A,B,V
3337 /// \param Root is the ADD instruction
3338 /// \param [out] InsInstrs is a vector of machine instructions and will
3339 /// contain the generated madd instruction
3340 /// \param IdxMulOpd is index of operand in Root that is the result of
3341 /// the MUL. In the example above IdxMulOpd is 1.
3342 /// \param MaddOpc the opcode fo the madd instruction
3343 /// \param VR is a virtual register that holds the value of an ADD operand
3344 /// (V in the example above).
genMaddR(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,unsigned VR,const TargetRegisterClass * RC)3345 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3346                               const TargetInstrInfo *TII, MachineInstr &Root,
3347                               SmallVectorImpl<MachineInstr *> &InsInstrs,
3348                               unsigned IdxMulOpd, unsigned MaddOpc,
3349                               unsigned VR, const TargetRegisterClass *RC) {
3350   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3351 
3352   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3353   unsigned ResultReg = Root.getOperand(0).getReg();
3354   unsigned SrcReg0 = MUL->getOperand(1).getReg();
3355   bool Src0IsKill = MUL->getOperand(1).isKill();
3356   unsigned SrcReg1 = MUL->getOperand(2).getReg();
3357   bool Src1IsKill = MUL->getOperand(2).isKill();
3358 
3359   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3360     MRI.constrainRegClass(ResultReg, RC);
3361   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3362     MRI.constrainRegClass(SrcReg0, RC);
3363   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3364     MRI.constrainRegClass(SrcReg1, RC);
3365   if (TargetRegisterInfo::isVirtualRegister(VR))
3366     MRI.constrainRegClass(VR, RC);
3367 
3368   MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
3369                                     ResultReg)
3370                                 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3371                                 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3372                                 .addReg(VR);
3373   // Insert the MADD
3374   InsInstrs.push_back(MIB);
3375   return MUL;
3376 }
3377 
3378 /// When getMachineCombinerPatterns() finds potential patterns,
3379 /// this function generates the instructions that could replace the
3380 /// original code sequence
genAlternativeCodeSequence(MachineInstr & Root,MachineCombinerPattern Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg) const3381 void AArch64InstrInfo::genAlternativeCodeSequence(
3382     MachineInstr &Root, MachineCombinerPattern Pattern,
3383     SmallVectorImpl<MachineInstr *> &InsInstrs,
3384     SmallVectorImpl<MachineInstr *> &DelInstrs,
3385     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3386   MachineBasicBlock &MBB = *Root.getParent();
3387   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3388   MachineFunction &MF = *MBB.getParent();
3389   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
3390 
3391   MachineInstr *MUL;
3392   const TargetRegisterClass *RC;
3393   unsigned Opc;
3394   switch (Pattern) {
3395   default:
3396     // Reassociate instructions.
3397     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3398                                                 DelInstrs, InstrIdxForVirtReg);
3399     return;
3400   case MachineCombinerPattern::MULADDW_OP1:
3401   case MachineCombinerPattern::MULADDX_OP1:
3402     // MUL I=A,B,0
3403     // ADD R,I,C
3404     // ==> MADD R,A,B,C
3405     // --- Create(MADD);
3406     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
3407       Opc = AArch64::MADDWrrr;
3408       RC = &AArch64::GPR32RegClass;
3409     } else {
3410       Opc = AArch64::MADDXrrr;
3411       RC = &AArch64::GPR64RegClass;
3412     }
3413     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3414     break;
3415   case MachineCombinerPattern::MULADDW_OP2:
3416   case MachineCombinerPattern::MULADDX_OP2:
3417     // MUL I=A,B,0
3418     // ADD R,C,I
3419     // ==> MADD R,A,B,C
3420     // --- Create(MADD);
3421     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
3422       Opc = AArch64::MADDWrrr;
3423       RC = &AArch64::GPR32RegClass;
3424     } else {
3425       Opc = AArch64::MADDXrrr;
3426       RC = &AArch64::GPR64RegClass;
3427     }
3428     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3429     break;
3430   case MachineCombinerPattern::MULADDWI_OP1:
3431   case MachineCombinerPattern::MULADDXI_OP1: {
3432     // MUL I=A,B,0
3433     // ADD R,I,Imm
3434     // ==> ORR  V, ZR, Imm
3435     // ==> MADD R,A,B,V
3436     // --- Create(MADD);
3437     const TargetRegisterClass *OrrRC;
3438     unsigned BitSize, OrrOpc, ZeroReg;
3439     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
3440       OrrOpc = AArch64::ORRWri;
3441       OrrRC = &AArch64::GPR32spRegClass;
3442       BitSize = 32;
3443       ZeroReg = AArch64::WZR;
3444       Opc = AArch64::MADDWrrr;
3445       RC = &AArch64::GPR32RegClass;
3446     } else {
3447       OrrOpc = AArch64::ORRXri;
3448       OrrRC = &AArch64::GPR64spRegClass;
3449       BitSize = 64;
3450       ZeroReg = AArch64::XZR;
3451       Opc = AArch64::MADDXrrr;
3452       RC = &AArch64::GPR64RegClass;
3453     }
3454     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3455     uint64_t Imm = Root.getOperand(2).getImm();
3456 
3457     if (Root.getOperand(3).isImm()) {
3458       unsigned Val = Root.getOperand(3).getImm();
3459       Imm = Imm << Val;
3460     }
3461     uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
3462     uint64_t Encoding;
3463     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3464       MachineInstrBuilder MIB1 =
3465           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3466               .addReg(ZeroReg)
3467               .addImm(Encoding);
3468       InsInstrs.push_back(MIB1);
3469       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3470       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3471     }
3472     break;
3473   }
3474   case MachineCombinerPattern::MULSUBW_OP1:
3475   case MachineCombinerPattern::MULSUBX_OP1: {
3476     // MUL I=A,B,0
3477     // SUB R,I, C
3478     // ==> SUB  V, 0, C
3479     // ==> MADD R,A,B,V // = -C + A*B
3480     // --- Create(MADD);
3481     const TargetRegisterClass *SubRC;
3482     unsigned SubOpc, ZeroReg;
3483     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
3484       SubOpc = AArch64::SUBWrr;
3485       SubRC = &AArch64::GPR32spRegClass;
3486       ZeroReg = AArch64::WZR;
3487       Opc = AArch64::MADDWrrr;
3488       RC = &AArch64::GPR32RegClass;
3489     } else {
3490       SubOpc = AArch64::SUBXrr;
3491       SubRC = &AArch64::GPR64spRegClass;
3492       ZeroReg = AArch64::XZR;
3493       Opc = AArch64::MADDXrrr;
3494       RC = &AArch64::GPR64RegClass;
3495     }
3496     unsigned NewVR = MRI.createVirtualRegister(SubRC);
3497     // SUB NewVR, 0, C
3498     MachineInstrBuilder MIB1 =
3499         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
3500             .addReg(ZeroReg)
3501             .addOperand(Root.getOperand(2));
3502     InsInstrs.push_back(MIB1);
3503     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3504     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3505     break;
3506   }
3507   case MachineCombinerPattern::MULSUBW_OP2:
3508   case MachineCombinerPattern::MULSUBX_OP2:
3509     // MUL I=A,B,0
3510     // SUB R,C,I
3511     // ==> MSUB R,A,B,C (computes C - A*B)
3512     // --- Create(MSUB);
3513     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
3514       Opc = AArch64::MSUBWrrr;
3515       RC = &AArch64::GPR32RegClass;
3516     } else {
3517       Opc = AArch64::MSUBXrrr;
3518       RC = &AArch64::GPR64RegClass;
3519     }
3520     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3521     break;
3522   case MachineCombinerPattern::MULSUBWI_OP1:
3523   case MachineCombinerPattern::MULSUBXI_OP1: {
3524     // MUL I=A,B,0
3525     // SUB R,I, Imm
3526     // ==> ORR  V, ZR, -Imm
3527     // ==> MADD R,A,B,V // = -Imm + A*B
3528     // --- Create(MADD);
3529     const TargetRegisterClass *OrrRC;
3530     unsigned BitSize, OrrOpc, ZeroReg;
3531     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
3532       OrrOpc = AArch64::ORRWri;
3533       OrrRC = &AArch64::GPR32spRegClass;
3534       BitSize = 32;
3535       ZeroReg = AArch64::WZR;
3536       Opc = AArch64::MADDWrrr;
3537       RC = &AArch64::GPR32RegClass;
3538     } else {
3539       OrrOpc = AArch64::ORRXri;
3540       OrrRC = &AArch64::GPR64spRegClass;
3541       BitSize = 64;
3542       ZeroReg = AArch64::XZR;
3543       Opc = AArch64::MADDXrrr;
3544       RC = &AArch64::GPR64RegClass;
3545     }
3546     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3547     int Imm = Root.getOperand(2).getImm();
3548     if (Root.getOperand(3).isImm()) {
3549       unsigned Val = Root.getOperand(3).getImm();
3550       Imm = Imm << Val;
3551     }
3552     uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
3553     uint64_t Encoding;
3554     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3555       MachineInstrBuilder MIB1 =
3556           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3557               .addReg(ZeroReg)
3558               .addImm(Encoding);
3559       InsInstrs.push_back(MIB1);
3560       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3561       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3562     }
3563     break;
3564   }
3565   // Floating Point Support
3566   case MachineCombinerPattern::FMULADDS_OP1:
3567   case MachineCombinerPattern::FMULADDD_OP1:
3568     // MUL I=A,B,0
3569     // ADD R,I,C
3570     // ==> MADD R,A,B,C
3571     // --- Create(MADD);
3572     if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
3573       Opc = AArch64::FMADDSrrr;
3574       RC = &AArch64::FPR32RegClass;
3575     } else {
3576       Opc = AArch64::FMADDDrrr;
3577       RC = &AArch64::FPR64RegClass;
3578     }
3579     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3580     break;
3581   case MachineCombinerPattern::FMULADDS_OP2:
3582   case MachineCombinerPattern::FMULADDD_OP2:
3583     // FMUL I=A,B,0
3584     // FADD R,C,I
3585     // ==> FMADD R,A,B,C
3586     // --- Create(FMADD);
3587     if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
3588       Opc = AArch64::FMADDSrrr;
3589       RC = &AArch64::FPR32RegClass;
3590     } else {
3591       Opc = AArch64::FMADDDrrr;
3592       RC = &AArch64::FPR64RegClass;
3593     }
3594     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3595     break;
3596 
3597   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3598     Opc = AArch64::FMLAv1i32_indexed;
3599     RC = &AArch64::FPR32RegClass;
3600     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3601                            FMAInstKind::Indexed);
3602     break;
3603   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3604     Opc = AArch64::FMLAv1i32_indexed;
3605     RC = &AArch64::FPR32RegClass;
3606     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3607                            FMAInstKind::Indexed);
3608     break;
3609 
3610   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3611     Opc = AArch64::FMLAv1i64_indexed;
3612     RC = &AArch64::FPR64RegClass;
3613     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3614                            FMAInstKind::Indexed);
3615     break;
3616   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3617     Opc = AArch64::FMLAv1i64_indexed;
3618     RC = &AArch64::FPR64RegClass;
3619     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3620                            FMAInstKind::Indexed);
3621     break;
3622 
3623   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3624   case MachineCombinerPattern::FMLAv2f32_OP1:
3625     RC = &AArch64::FPR64RegClass;
3626     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
3627       Opc = AArch64::FMLAv2i32_indexed;
3628       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3629                              FMAInstKind::Indexed);
3630     } else {
3631       Opc = AArch64::FMLAv2f32;
3632       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3633                              FMAInstKind::Accumulator);
3634     }
3635     break;
3636   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3637   case MachineCombinerPattern::FMLAv2f32_OP2:
3638     RC = &AArch64::FPR64RegClass;
3639     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
3640       Opc = AArch64::FMLAv2i32_indexed;
3641       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3642                              FMAInstKind::Indexed);
3643     } else {
3644       Opc = AArch64::FMLAv2f32;
3645       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3646                              FMAInstKind::Accumulator);
3647     }
3648     break;
3649 
3650   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3651   case MachineCombinerPattern::FMLAv2f64_OP1:
3652     RC = &AArch64::FPR128RegClass;
3653     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
3654       Opc = AArch64::FMLAv2i64_indexed;
3655       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3656                              FMAInstKind::Indexed);
3657     } else {
3658       Opc = AArch64::FMLAv2f64;
3659       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3660                              FMAInstKind::Accumulator);
3661     }
3662     break;
3663   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3664   case MachineCombinerPattern::FMLAv2f64_OP2:
3665     RC = &AArch64::FPR128RegClass;
3666     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
3667       Opc = AArch64::FMLAv2i64_indexed;
3668       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3669                              FMAInstKind::Indexed);
3670     } else {
3671       Opc = AArch64::FMLAv2f64;
3672       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3673                              FMAInstKind::Accumulator);
3674     }
3675     break;
3676 
3677   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3678   case MachineCombinerPattern::FMLAv4f32_OP1:
3679     RC = &AArch64::FPR128RegClass;
3680     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
3681       Opc = AArch64::FMLAv4i32_indexed;
3682       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3683                              FMAInstKind::Indexed);
3684     } else {
3685       Opc = AArch64::FMLAv4f32;
3686       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3687                              FMAInstKind::Accumulator);
3688     }
3689     break;
3690 
3691   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3692   case MachineCombinerPattern::FMLAv4f32_OP2:
3693     RC = &AArch64::FPR128RegClass;
3694     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
3695       Opc = AArch64::FMLAv4i32_indexed;
3696       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3697                              FMAInstKind::Indexed);
3698     } else {
3699       Opc = AArch64::FMLAv4f32;
3700       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3701                              FMAInstKind::Accumulator);
3702     }
3703     break;
3704 
3705   case MachineCombinerPattern::FMULSUBS_OP1:
3706   case MachineCombinerPattern::FMULSUBD_OP1: {
3707     // FMUL I=A,B,0
3708     // FSUB R,I,C
3709     // ==> FNMSUB R,A,B,C // = -C + A*B
3710     // --- Create(FNMSUB);
3711     if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
3712       Opc = AArch64::FNMSUBSrrr;
3713       RC = &AArch64::FPR32RegClass;
3714     } else {
3715       Opc = AArch64::FNMSUBDrrr;
3716       RC = &AArch64::FPR64RegClass;
3717     }
3718     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3719     break;
3720   }
3721   case MachineCombinerPattern::FMULSUBS_OP2:
3722   case MachineCombinerPattern::FMULSUBD_OP2: {
3723     // FMUL I=A,B,0
3724     // FSUB R,C,I
3725     // ==> FMSUB R,A,B,C (computes C - A*B)
3726     // --- Create(FMSUB);
3727     if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
3728       Opc = AArch64::FMSUBSrrr;
3729       RC = &AArch64::FPR32RegClass;
3730     } else {
3731       Opc = AArch64::FMSUBDrrr;
3732       RC = &AArch64::FPR64RegClass;
3733     }
3734     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3735     break;
3736 
3737   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3738     Opc = AArch64::FMLSv1i32_indexed;
3739     RC = &AArch64::FPR32RegClass;
3740     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3741                            FMAInstKind::Indexed);
3742     break;
3743 
3744   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3745     Opc = AArch64::FMLSv1i64_indexed;
3746     RC = &AArch64::FPR64RegClass;
3747     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3748                            FMAInstKind::Indexed);
3749     break;
3750 
3751   case MachineCombinerPattern::FMLSv2f32_OP2:
3752   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3753     RC = &AArch64::FPR64RegClass;
3754     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
3755       Opc = AArch64::FMLSv2i32_indexed;
3756       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3757                              FMAInstKind::Indexed);
3758     } else {
3759       Opc = AArch64::FMLSv2f32;
3760       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3761                              FMAInstKind::Accumulator);
3762     }
3763     break;
3764 
3765   case MachineCombinerPattern::FMLSv2f64_OP2:
3766   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3767     RC = &AArch64::FPR128RegClass;
3768     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
3769       Opc = AArch64::FMLSv2i64_indexed;
3770       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3771                              FMAInstKind::Indexed);
3772     } else {
3773       Opc = AArch64::FMLSv2f64;
3774       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3775                              FMAInstKind::Accumulator);
3776     }
3777     break;
3778 
3779   case MachineCombinerPattern::FMLSv4f32_OP2:
3780   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3781     RC = &AArch64::FPR128RegClass;
3782     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
3783       Opc = AArch64::FMLSv4i32_indexed;
3784       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3785                              FMAInstKind::Indexed);
3786     } else {
3787       Opc = AArch64::FMLSv4f32;
3788       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3789                              FMAInstKind::Accumulator);
3790     }
3791     break;
3792   }
3793   } // end switch (Pattern)
3794   // Record MUL and ADD/SUB for deletion
3795   DelInstrs.push_back(MUL);
3796   DelInstrs.push_back(&Root);
3797 
3798   return;
3799 }
3800 
3801 /// \brief Replace csincr-branch sequence by simple conditional branch
3802 ///
3803 /// Examples:
3804 /// 1.
3805 ///   csinc  w9, wzr, wzr, <condition code>
3806 ///   tbnz   w9, #0, 0x44
3807 /// to
3808 ///   b.<inverted condition code>
3809 ///
3810 /// 2.
3811 ///   csinc w9, wzr, wzr, <condition code>
3812 ///   tbz   w9, #0, 0x44
3813 /// to
3814 ///   b.<condition code>
3815 ///
3816 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
3817 /// compare's constant operand is power of 2.
3818 ///
3819 /// Examples:
3820 ///   and  w8, w8, #0x400
3821 ///   cbnz w8, L1
3822 /// to
3823 ///   tbnz w8, #10, L1
3824 ///
3825 /// \param  MI Conditional Branch
3826 /// \return True when the simple conditional branch is generated
3827 ///
optimizeCondBranch(MachineInstr & MI) const3828 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
3829   bool IsNegativeBranch = false;
3830   bool IsTestAndBranch = false;
3831   unsigned TargetBBInMI = 0;
3832   switch (MI.getOpcode()) {
3833   default:
3834     llvm_unreachable("Unknown branch instruction?");
3835   case AArch64::Bcc:
3836     return false;
3837   case AArch64::CBZW:
3838   case AArch64::CBZX:
3839     TargetBBInMI = 1;
3840     break;
3841   case AArch64::CBNZW:
3842   case AArch64::CBNZX:
3843     TargetBBInMI = 1;
3844     IsNegativeBranch = true;
3845     break;
3846   case AArch64::TBZW:
3847   case AArch64::TBZX:
3848     TargetBBInMI = 2;
3849     IsTestAndBranch = true;
3850     break;
3851   case AArch64::TBNZW:
3852   case AArch64::TBNZX:
3853     TargetBBInMI = 2;
3854     IsNegativeBranch = true;
3855     IsTestAndBranch = true;
3856     break;
3857   }
3858   // So we increment a zero register and test for bits other
3859   // than bit 0? Conservatively bail out in case the verifier
3860   // missed this case.
3861   if (IsTestAndBranch && MI.getOperand(1).getImm())
3862     return false;
3863 
3864   // Find Definition.
3865   assert(MI.getParent() && "Incomplete machine instruciton\n");
3866   MachineBasicBlock *MBB = MI.getParent();
3867   MachineFunction *MF = MBB->getParent();
3868   MachineRegisterInfo *MRI = &MF->getRegInfo();
3869   unsigned VReg = MI.getOperand(0).getReg();
3870   if (!TargetRegisterInfo::isVirtualRegister(VReg))
3871     return false;
3872 
3873   MachineInstr *DefMI = MRI->getVRegDef(VReg);
3874 
3875   // Look through COPY instructions to find definition.
3876   while (DefMI->isCopy()) {
3877     unsigned CopyVReg = DefMI->getOperand(1).getReg();
3878     if (!MRI->hasOneNonDBGUse(CopyVReg))
3879       return false;
3880     if (!MRI->hasOneDef(CopyVReg))
3881       return false;
3882     DefMI = MRI->getVRegDef(CopyVReg);
3883   }
3884 
3885   switch (DefMI->getOpcode()) {
3886   default:
3887     return false;
3888   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
3889   case AArch64::ANDWri:
3890   case AArch64::ANDXri: {
3891     if (IsTestAndBranch)
3892       return false;
3893     if (DefMI->getParent() != MBB)
3894       return false;
3895     if (!MRI->hasOneNonDBGUse(VReg))
3896       return false;
3897 
3898     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
3899     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
3900         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
3901     if (!isPowerOf2_64(Mask))
3902       return false;
3903 
3904     MachineOperand &MO = DefMI->getOperand(1);
3905     unsigned NewReg = MO.getReg();
3906     if (!TargetRegisterInfo::isVirtualRegister(NewReg))
3907       return false;
3908 
3909     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
3910 
3911     MachineBasicBlock &RefToMBB = *MBB;
3912     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
3913     DebugLoc DL = MI.getDebugLoc();
3914     unsigned Imm = Log2_64(Mask);
3915     unsigned Opc = (Imm < 32)
3916                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
3917                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
3918     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
3919                               .addReg(NewReg)
3920                               .addImm(Imm)
3921                               .addMBB(TBB);
3922     // Register lives on to the CBZ now.
3923     MO.setIsKill(false);
3924 
3925     // For immediate smaller than 32, we need to use the 32-bit
3926     // variant (W) in all cases. Indeed the 64-bit variant does not
3927     // allow to encode them.
3928     // Therefore, if the input register is 64-bit, we need to take the
3929     // 32-bit sub-part.
3930     if (!Is32Bit && Imm < 32)
3931       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
3932     MI.eraseFromParent();
3933     return true;
3934   }
3935   // Look for CSINC
3936   case AArch64::CSINCWr:
3937   case AArch64::CSINCXr: {
3938     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
3939           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
3940         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
3941           DefMI->getOperand(2).getReg() == AArch64::XZR))
3942       return false;
3943 
3944     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
3945       return false;
3946 
3947     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
3948     // Convert only when the condition code is not modified between
3949     // the CSINC and the branch. The CC may be used by other
3950     // instructions in between.
3951     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
3952       return false;
3953     MachineBasicBlock &RefToMBB = *MBB;
3954     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
3955     DebugLoc DL = MI.getDebugLoc();
3956     if (IsNegativeBranch)
3957       CC = AArch64CC::getInvertedCondCode(CC);
3958     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
3959     MI.eraseFromParent();
3960     return true;
3961   }
3962   }
3963 }
3964 
3965 std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const3966 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
3967   const unsigned Mask = AArch64II::MO_FRAGMENT;
3968   return std::make_pair(TF & Mask, TF & ~Mask);
3969 }
3970 
3971 ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const3972 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
3973   using namespace AArch64II;
3974   static const std::pair<unsigned, const char *> TargetFlags[] = {
3975       {MO_PAGE, "aarch64-page"},
3976       {MO_PAGEOFF, "aarch64-pageoff"},
3977       {MO_G3, "aarch64-g3"},
3978       {MO_G2, "aarch64-g2"},
3979       {MO_G1, "aarch64-g1"},
3980       {MO_G0, "aarch64-g0"},
3981       {MO_HI12, "aarch64-hi12"}};
3982   return makeArrayRef(TargetFlags);
3983 }
3984 
3985 ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const3986 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
3987   using namespace AArch64II;
3988   static const std::pair<unsigned, const char *> TargetFlags[] = {
3989       {MO_GOT, "aarch64-got"},
3990       {MO_NC, "aarch64-nc"},
3991       {MO_TLS, "aarch64-tls"}};
3992   return makeArrayRef(TargetFlags);
3993 }
3994