1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineCombinerPattern.h"
16 #include "AArch64Subtarget.h"
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineMemOperand.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/PseudoSourceValue.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/TargetRegistry.h"
26 
27 using namespace llvm;
28 
29 #define GET_INSTRINFO_CTOR_DTOR
30 #include "AArch64GenInstrInfo.inc"
31 
AArch64InstrInfo(const AArch64Subtarget & STI)32 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
33     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
34       RI(STI.getTargetTriple()), Subtarget(STI) {}
35 
36 /// GetInstSize - Return the number of bytes of code the specified
37 /// instruction may be.  This returns the maximum number of bytes.
GetInstSizeInBytes(const MachineInstr * MI) const38 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
39   const MachineBasicBlock &MBB = *MI->getParent();
40   const MachineFunction *MF = MBB.getParent();
41   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
42 
43   if (MI->getOpcode() == AArch64::INLINEASM)
44     return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
45 
46   const MCInstrDesc &Desc = MI->getDesc();
47   switch (Desc.getOpcode()) {
48   default:
49     // Anything not explicitly designated otherwise is a nomal 4-byte insn.
50     return 4;
51   case TargetOpcode::DBG_VALUE:
52   case TargetOpcode::EH_LABEL:
53   case TargetOpcode::IMPLICIT_DEF:
54   case TargetOpcode::KILL:
55     return 0;
56   }
57 
58   llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
59 }
60 
parseCondBranch(MachineInstr * LastInst,MachineBasicBlock * & Target,SmallVectorImpl<MachineOperand> & Cond)61 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
62                             SmallVectorImpl<MachineOperand> &Cond) {
63   // Block ends with fall-through condbranch.
64   switch (LastInst->getOpcode()) {
65   default:
66     llvm_unreachable("Unknown branch instruction?");
67   case AArch64::Bcc:
68     Target = LastInst->getOperand(1).getMBB();
69     Cond.push_back(LastInst->getOperand(0));
70     break;
71   case AArch64::CBZW:
72   case AArch64::CBZX:
73   case AArch64::CBNZW:
74   case AArch64::CBNZX:
75     Target = LastInst->getOperand(1).getMBB();
76     Cond.push_back(MachineOperand::CreateImm(-1));
77     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
78     Cond.push_back(LastInst->getOperand(0));
79     break;
80   case AArch64::TBZW:
81   case AArch64::TBZX:
82   case AArch64::TBNZW:
83   case AArch64::TBNZX:
84     Target = LastInst->getOperand(2).getMBB();
85     Cond.push_back(MachineOperand::CreateImm(-1));
86     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
87     Cond.push_back(LastInst->getOperand(0));
88     Cond.push_back(LastInst->getOperand(1));
89   }
90 }
91 
92 // Branch analysis.
AnalyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const93 bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
94                                    MachineBasicBlock *&TBB,
95                                    MachineBasicBlock *&FBB,
96                                    SmallVectorImpl<MachineOperand> &Cond,
97                                    bool AllowModify) const {
98   // If the block has no terminators, it just falls into the block after it.
99   MachineBasicBlock::iterator I = MBB.end();
100   if (I == MBB.begin())
101     return false;
102   --I;
103   while (I->isDebugValue()) {
104     if (I == MBB.begin())
105       return false;
106     --I;
107   }
108   if (!isUnpredicatedTerminator(I))
109     return false;
110 
111   // Get the last instruction in the block.
112   MachineInstr *LastInst = I;
113 
114   // If there is only one terminator instruction, process it.
115   unsigned LastOpc = LastInst->getOpcode();
116   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
117     if (isUncondBranchOpcode(LastOpc)) {
118       TBB = LastInst->getOperand(0).getMBB();
119       return false;
120     }
121     if (isCondBranchOpcode(LastOpc)) {
122       // Block ends with fall-through condbranch.
123       parseCondBranch(LastInst, TBB, Cond);
124       return false;
125     }
126     return true; // Can't handle indirect branch.
127   }
128 
129   // Get the instruction before it if it is a terminator.
130   MachineInstr *SecondLastInst = I;
131   unsigned SecondLastOpc = SecondLastInst->getOpcode();
132 
133   // If AllowModify is true and the block ends with two or more unconditional
134   // branches, delete all but the first unconditional branch.
135   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
136     while (isUncondBranchOpcode(SecondLastOpc)) {
137       LastInst->eraseFromParent();
138       LastInst = SecondLastInst;
139       LastOpc = LastInst->getOpcode();
140       if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
141         // Return now the only terminator is an unconditional branch.
142         TBB = LastInst->getOperand(0).getMBB();
143         return false;
144       } else {
145         SecondLastInst = I;
146         SecondLastOpc = SecondLastInst->getOpcode();
147       }
148     }
149   }
150 
151   // If there are three terminators, we don't know what sort of block this is.
152   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
153     return true;
154 
155   // If the block ends with a B and a Bcc, handle it.
156   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
157     parseCondBranch(SecondLastInst, TBB, Cond);
158     FBB = LastInst->getOperand(0).getMBB();
159     return false;
160   }
161 
162   // If the block ends with two unconditional branches, handle it.  The second
163   // one is not executed, so remove it.
164   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
165     TBB = SecondLastInst->getOperand(0).getMBB();
166     I = LastInst;
167     if (AllowModify)
168       I->eraseFromParent();
169     return false;
170   }
171 
172   // ...likewise if it ends with an indirect branch followed by an unconditional
173   // branch.
174   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
175     I = LastInst;
176     if (AllowModify)
177       I->eraseFromParent();
178     return true;
179   }
180 
181   // Otherwise, can't handle this.
182   return true;
183 }
184 
ReverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const185 bool AArch64InstrInfo::ReverseBranchCondition(
186     SmallVectorImpl<MachineOperand> &Cond) const {
187   if (Cond[0].getImm() != -1) {
188     // Regular Bcc
189     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
190     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
191   } else {
192     // Folded compare-and-branch
193     switch (Cond[1].getImm()) {
194     default:
195       llvm_unreachable("Unknown conditional branch!");
196     case AArch64::CBZW:
197       Cond[1].setImm(AArch64::CBNZW);
198       break;
199     case AArch64::CBNZW:
200       Cond[1].setImm(AArch64::CBZW);
201       break;
202     case AArch64::CBZX:
203       Cond[1].setImm(AArch64::CBNZX);
204       break;
205     case AArch64::CBNZX:
206       Cond[1].setImm(AArch64::CBZX);
207       break;
208     case AArch64::TBZW:
209       Cond[1].setImm(AArch64::TBNZW);
210       break;
211     case AArch64::TBNZW:
212       Cond[1].setImm(AArch64::TBZW);
213       break;
214     case AArch64::TBZX:
215       Cond[1].setImm(AArch64::TBNZX);
216       break;
217     case AArch64::TBNZX:
218       Cond[1].setImm(AArch64::TBZX);
219       break;
220     }
221   }
222 
223   return false;
224 }
225 
RemoveBranch(MachineBasicBlock & MBB) const226 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
227   MachineBasicBlock::iterator I = MBB.end();
228   if (I == MBB.begin())
229     return 0;
230   --I;
231   while (I->isDebugValue()) {
232     if (I == MBB.begin())
233       return 0;
234     --I;
235   }
236   if (!isUncondBranchOpcode(I->getOpcode()) &&
237       !isCondBranchOpcode(I->getOpcode()))
238     return 0;
239 
240   // Remove the branch.
241   I->eraseFromParent();
242 
243   I = MBB.end();
244 
245   if (I == MBB.begin())
246     return 1;
247   --I;
248   if (!isCondBranchOpcode(I->getOpcode()))
249     return 1;
250 
251   // Remove the branch.
252   I->eraseFromParent();
253   return 2;
254 }
255 
instantiateCondBranch(MachineBasicBlock & MBB,DebugLoc DL,MachineBasicBlock * TBB,const SmallVectorImpl<MachineOperand> & Cond) const256 void AArch64InstrInfo::instantiateCondBranch(
257     MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB,
258     const SmallVectorImpl<MachineOperand> &Cond) const {
259   if (Cond[0].getImm() != -1) {
260     // Regular Bcc
261     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
262   } else {
263     // Folded compare-and-branch
264     // Note that we use addOperand instead of addReg to keep the flags.
265     const MachineInstrBuilder MIB =
266         BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
267     if (Cond.size() > 3)
268       MIB.addImm(Cond[3].getImm());
269     MIB.addMBB(TBB);
270   }
271 }
272 
InsertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,const SmallVectorImpl<MachineOperand> & Cond,DebugLoc DL) const273 unsigned AArch64InstrInfo::InsertBranch(
274     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
275     const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
276   // Shouldn't be a fall through.
277   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
278 
279   if (!FBB) {
280     if (Cond.empty()) // Unconditional branch?
281       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
282     else
283       instantiateCondBranch(MBB, DL, TBB, Cond);
284     return 1;
285   }
286 
287   // Two-way conditional branch.
288   instantiateCondBranch(MBB, DL, TBB, Cond);
289   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
290   return 2;
291 }
292 
293 // Find the original register that VReg is copied from.
removeCopies(const MachineRegisterInfo & MRI,unsigned VReg)294 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
295   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
296     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
297     if (!DefMI->isFullCopy())
298       return VReg;
299     VReg = DefMI->getOperand(1).getReg();
300   }
301   return VReg;
302 }
303 
304 // Determine if VReg is defined by an instruction that can be folded into a
305 // csel instruction. If so, return the folded opcode, and the replacement
306 // register.
canFoldIntoCSel(const MachineRegisterInfo & MRI,unsigned VReg,unsigned * NewVReg=nullptr)307 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
308                                 unsigned *NewVReg = nullptr) {
309   VReg = removeCopies(MRI, VReg);
310   if (!TargetRegisterInfo::isVirtualRegister(VReg))
311     return 0;
312 
313   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
314   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
315   unsigned Opc = 0;
316   unsigned SrcOpNum = 0;
317   switch (DefMI->getOpcode()) {
318   case AArch64::ADDSXri:
319   case AArch64::ADDSWri:
320     // if NZCV is used, do not fold.
321     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
322       return 0;
323   // fall-through to ADDXri and ADDWri.
324   case AArch64::ADDXri:
325   case AArch64::ADDWri:
326     // add x, 1 -> csinc.
327     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
328         DefMI->getOperand(3).getImm() != 0)
329       return 0;
330     SrcOpNum = 1;
331     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
332     break;
333 
334   case AArch64::ORNXrr:
335   case AArch64::ORNWrr: {
336     // not x -> csinv, represented as orn dst, xzr, src.
337     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
338     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
339       return 0;
340     SrcOpNum = 2;
341     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
342     break;
343   }
344 
345   case AArch64::SUBSXrr:
346   case AArch64::SUBSWrr:
347     // if NZCV is used, do not fold.
348     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
349       return 0;
350   // fall-through to SUBXrr and SUBWrr.
351   case AArch64::SUBXrr:
352   case AArch64::SUBWrr: {
353     // neg x -> csneg, represented as sub dst, xzr, src.
354     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
355     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
356       return 0;
357     SrcOpNum = 2;
358     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
359     break;
360   }
361   default:
362     return 0;
363   }
364   assert(Opc && SrcOpNum && "Missing parameters");
365 
366   if (NewVReg)
367     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
368   return Opc;
369 }
370 
canInsertSelect(const MachineBasicBlock & MBB,const SmallVectorImpl<MachineOperand> & Cond,unsigned TrueReg,unsigned FalseReg,int & CondCycles,int & TrueCycles,int & FalseCycles) const371 bool AArch64InstrInfo::canInsertSelect(
372     const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond,
373     unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
374     int &FalseCycles) const {
375   // Check register classes.
376   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
377   const TargetRegisterClass *RC =
378       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
379   if (!RC)
380     return false;
381 
382   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
383   unsigned ExtraCondLat = Cond.size() != 1;
384 
385   // GPRs are handled by csel.
386   // FIXME: Fold in x+1, -x, and ~x when applicable.
387   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
388       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
389     // Single-cycle csel, csinc, csinv, and csneg.
390     CondCycles = 1 + ExtraCondLat;
391     TrueCycles = FalseCycles = 1;
392     if (canFoldIntoCSel(MRI, TrueReg))
393       TrueCycles = 0;
394     else if (canFoldIntoCSel(MRI, FalseReg))
395       FalseCycles = 0;
396     return true;
397   }
398 
399   // Scalar floating point is handled by fcsel.
400   // FIXME: Form fabs, fmin, and fmax when applicable.
401   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
402       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
403     CondCycles = 5 + ExtraCondLat;
404     TrueCycles = FalseCycles = 2;
405     return true;
406   }
407 
408   // Can't do vectors.
409   return false;
410 }
411 
insertSelect(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,unsigned DstReg,const SmallVectorImpl<MachineOperand> & Cond,unsigned TrueReg,unsigned FalseReg) const412 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
413                                     MachineBasicBlock::iterator I, DebugLoc DL,
414                                     unsigned DstReg,
415                                     const SmallVectorImpl<MachineOperand> &Cond,
416                                     unsigned TrueReg, unsigned FalseReg) const {
417   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
418 
419   // Parse the condition code, see parseCondBranch() above.
420   AArch64CC::CondCode CC;
421   switch (Cond.size()) {
422   default:
423     llvm_unreachable("Unknown condition opcode in Cond");
424   case 1: // b.cc
425     CC = AArch64CC::CondCode(Cond[0].getImm());
426     break;
427   case 3: { // cbz/cbnz
428     // We must insert a compare against 0.
429     bool Is64Bit;
430     switch (Cond[1].getImm()) {
431     default:
432       llvm_unreachable("Unknown branch opcode in Cond");
433     case AArch64::CBZW:
434       Is64Bit = 0;
435       CC = AArch64CC::EQ;
436       break;
437     case AArch64::CBZX:
438       Is64Bit = 1;
439       CC = AArch64CC::EQ;
440       break;
441     case AArch64::CBNZW:
442       Is64Bit = 0;
443       CC = AArch64CC::NE;
444       break;
445     case AArch64::CBNZX:
446       Is64Bit = 1;
447       CC = AArch64CC::NE;
448       break;
449     }
450     unsigned SrcReg = Cond[2].getReg();
451     if (Is64Bit) {
452       // cmp reg, #0 is actually subs xzr, reg, #0.
453       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
454       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
455           .addReg(SrcReg)
456           .addImm(0)
457           .addImm(0);
458     } else {
459       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
460       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
461           .addReg(SrcReg)
462           .addImm(0)
463           .addImm(0);
464     }
465     break;
466   }
467   case 4: { // tbz/tbnz
468     // We must insert a tst instruction.
469     switch (Cond[1].getImm()) {
470     default:
471       llvm_unreachable("Unknown branch opcode in Cond");
472     case AArch64::TBZW:
473     case AArch64::TBZX:
474       CC = AArch64CC::EQ;
475       break;
476     case AArch64::TBNZW:
477     case AArch64::TBNZX:
478       CC = AArch64CC::NE;
479       break;
480     }
481     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
482     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
483       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
484           .addReg(Cond[2].getReg())
485           .addImm(
486               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
487     else
488       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
489           .addReg(Cond[2].getReg())
490           .addImm(
491               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
492     break;
493   }
494   }
495 
496   unsigned Opc = 0;
497   const TargetRegisterClass *RC = nullptr;
498   bool TryFold = false;
499   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
500     RC = &AArch64::GPR64RegClass;
501     Opc = AArch64::CSELXr;
502     TryFold = true;
503   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
504     RC = &AArch64::GPR32RegClass;
505     Opc = AArch64::CSELWr;
506     TryFold = true;
507   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
508     RC = &AArch64::FPR64RegClass;
509     Opc = AArch64::FCSELDrrr;
510   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
511     RC = &AArch64::FPR32RegClass;
512     Opc = AArch64::FCSELSrrr;
513   }
514   assert(RC && "Unsupported regclass");
515 
516   // Try folding simple instructions into the csel.
517   if (TryFold) {
518     unsigned NewVReg = 0;
519     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
520     if (FoldedOpc) {
521       // The folded opcodes csinc, csinc and csneg apply the operation to
522       // FalseReg, so we need to invert the condition.
523       CC = AArch64CC::getInvertedCondCode(CC);
524       TrueReg = FalseReg;
525     } else
526       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
527 
528     // Fold the operation. Leave any dead instructions for DCE to clean up.
529     if (FoldedOpc) {
530       FalseReg = NewVReg;
531       Opc = FoldedOpc;
532       // The extends the live range of NewVReg.
533       MRI.clearKillFlags(NewVReg);
534     }
535   }
536 
537   // Pull all virtual register into the appropriate class.
538   MRI.constrainRegClass(TrueReg, RC);
539   MRI.constrainRegClass(FalseReg, RC);
540 
541   // Insert the csel.
542   BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
543       CC);
544 }
545 
546 // FIXME: this implementation should be micro-architecture dependent, so a
547 // micro-architecture target hook should be introduced here in future.
isAsCheapAsAMove(const MachineInstr * MI) const548 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
549   if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53())
550     return MI->isAsCheapAsAMove();
551 
552   switch (MI->getOpcode()) {
553   default:
554     return false;
555 
556   // add/sub on register without shift
557   case AArch64::ADDWri:
558   case AArch64::ADDXri:
559   case AArch64::SUBWri:
560   case AArch64::SUBXri:
561     return (MI->getOperand(3).getImm() == 0);
562 
563   // logical ops on immediate
564   case AArch64::ANDWri:
565   case AArch64::ANDXri:
566   case AArch64::EORWri:
567   case AArch64::EORXri:
568   case AArch64::ORRWri:
569   case AArch64::ORRXri:
570     return true;
571 
572   // logical ops on register without shift
573   case AArch64::ANDWrr:
574   case AArch64::ANDXrr:
575   case AArch64::BICWrr:
576   case AArch64::BICXrr:
577   case AArch64::EONWrr:
578   case AArch64::EONXrr:
579   case AArch64::EORWrr:
580   case AArch64::EORXrr:
581   case AArch64::ORNWrr:
582   case AArch64::ORNXrr:
583   case AArch64::ORRWrr:
584   case AArch64::ORRXrr:
585     return true;
586   }
587 
588   llvm_unreachable("Unknown opcode to check as cheap as a move!");
589 }
590 
isCoalescableExtInstr(const MachineInstr & MI,unsigned & SrcReg,unsigned & DstReg,unsigned & SubIdx) const591 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
592                                              unsigned &SrcReg, unsigned &DstReg,
593                                              unsigned &SubIdx) const {
594   switch (MI.getOpcode()) {
595   default:
596     return false;
597   case AArch64::SBFMXri: // aka sxtw
598   case AArch64::UBFMXri: // aka uxtw
599     // Check for the 32 -> 64 bit extension case, these instructions can do
600     // much more.
601     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
602       return false;
603     // This is a signed or unsigned 32 -> 64 bit extension.
604     SrcReg = MI.getOperand(1).getReg();
605     DstReg = MI.getOperand(0).getReg();
606     SubIdx = AArch64::sub_32;
607     return true;
608   }
609 }
610 
611 bool
areMemAccessesTriviallyDisjoint(MachineInstr * MIa,MachineInstr * MIb,AliasAnalysis * AA) const612 AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
613                                                   MachineInstr *MIb,
614                                                   AliasAnalysis *AA) const {
615   const TargetRegisterInfo *TRI = &getRegisterInfo();
616   unsigned BaseRegA = 0, BaseRegB = 0;
617   int OffsetA = 0, OffsetB = 0;
618   int WidthA = 0, WidthB = 0;
619 
620   assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
621          "MIa must be a store or a load");
622   assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
623          "MIb must be a store or a load");
624 
625   if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
626       MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
627     return false;
628 
629   // Retrieve the base register, offset from the base register and width. Width
630   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
631   // base registers are identical, and the offset of a lower memory access +
632   // the width doesn't overlap the offset of a higher memory access,
633   // then the memory accesses are different.
634   if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
635       getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
636     if (BaseRegA == BaseRegB) {
637       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
638       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
639       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
640       if (LowOffset + LowWidth <= HighOffset)
641         return true;
642     }
643   }
644   return false;
645 }
646 
647 /// analyzeCompare - For a comparison instruction, return the source registers
648 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
649 /// Return true if the comparison instruction can be analyzed.
analyzeCompare(const MachineInstr * MI,unsigned & SrcReg,unsigned & SrcReg2,int & CmpMask,int & CmpValue) const650 bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
651                                       unsigned &SrcReg2, int &CmpMask,
652                                       int &CmpValue) const {
653   switch (MI->getOpcode()) {
654   default:
655     break;
656   case AArch64::SUBSWrr:
657   case AArch64::SUBSWrs:
658   case AArch64::SUBSWrx:
659   case AArch64::SUBSXrr:
660   case AArch64::SUBSXrs:
661   case AArch64::SUBSXrx:
662   case AArch64::ADDSWrr:
663   case AArch64::ADDSWrs:
664   case AArch64::ADDSWrx:
665   case AArch64::ADDSXrr:
666   case AArch64::ADDSXrs:
667   case AArch64::ADDSXrx:
668     // Replace SUBSWrr with SUBWrr if NZCV is not used.
669     SrcReg = MI->getOperand(1).getReg();
670     SrcReg2 = MI->getOperand(2).getReg();
671     CmpMask = ~0;
672     CmpValue = 0;
673     return true;
674   case AArch64::SUBSWri:
675   case AArch64::ADDSWri:
676   case AArch64::SUBSXri:
677   case AArch64::ADDSXri:
678     SrcReg = MI->getOperand(1).getReg();
679     SrcReg2 = 0;
680     CmpMask = ~0;
681     // FIXME: In order to convert CmpValue to 0 or 1
682     CmpValue = (MI->getOperand(2).getImm() != 0);
683     return true;
684   case AArch64::ANDSWri:
685   case AArch64::ANDSXri:
686     // ANDS does not use the same encoding scheme as the others xxxS
687     // instructions.
688     SrcReg = MI->getOperand(1).getReg();
689     SrcReg2 = 0;
690     CmpMask = ~0;
691     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
692     // while the type of CmpValue is int. When converting uint64_t to int,
693     // the high 32 bits of uint64_t will be lost.
694     // In fact it causes a bug in spec2006-483.xalancbmk
695     // CmpValue is only used to compare with zero in OptimizeCompareInstr
696     CmpValue = (AArch64_AM::decodeLogicalImmediate(
697                     MI->getOperand(2).getImm(),
698                     MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0);
699     return true;
700   }
701 
702   return false;
703 }
704 
UpdateOperandRegClass(MachineInstr * Instr)705 static bool UpdateOperandRegClass(MachineInstr *Instr) {
706   MachineBasicBlock *MBB = Instr->getParent();
707   assert(MBB && "Can't get MachineBasicBlock here");
708   MachineFunction *MF = MBB->getParent();
709   assert(MF && "Can't get MachineFunction here");
710   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
711   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
712   MachineRegisterInfo *MRI = &MF->getRegInfo();
713 
714   for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
715        ++OpIdx) {
716     MachineOperand &MO = Instr->getOperand(OpIdx);
717     const TargetRegisterClass *OpRegCstraints =
718         Instr->getRegClassConstraint(OpIdx, TII, TRI);
719 
720     // If there's no constraint, there's nothing to do.
721     if (!OpRegCstraints)
722       continue;
723     // If the operand is a frame index, there's nothing to do here.
724     // A frame index operand will resolve correctly during PEI.
725     if (MO.isFI())
726       continue;
727 
728     assert(MO.isReg() &&
729            "Operand has register constraints without being a register!");
730 
731     unsigned Reg = MO.getReg();
732     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
733       if (!OpRegCstraints->contains(Reg))
734         return false;
735     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
736                !MRI->constrainRegClass(Reg, OpRegCstraints))
737       return false;
738   }
739 
740   return true;
741 }
742 
743 /// \brief Return the opcode that does not set flags when possible - otherwise
744 /// return the original opcode. The caller is responsible to do the actual
745 /// substitution and legality checking.
convertFlagSettingOpcode(const MachineInstr * MI)746 static unsigned convertFlagSettingOpcode(const MachineInstr *MI) {
747   // Don't convert all compare instructions, because for some the zero register
748   // encoding becomes the sp register.
749   bool MIDefinesZeroReg = false;
750   if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR))
751     MIDefinesZeroReg = true;
752 
753   switch (MI->getOpcode()) {
754   default:
755     return MI->getOpcode();
756   case AArch64::ADDSWrr:
757     return AArch64::ADDWrr;
758   case AArch64::ADDSWri:
759     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
760   case AArch64::ADDSWrs:
761     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
762   case AArch64::ADDSWrx:
763     return AArch64::ADDWrx;
764   case AArch64::ADDSXrr:
765     return AArch64::ADDXrr;
766   case AArch64::ADDSXri:
767     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
768   case AArch64::ADDSXrs:
769     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
770   case AArch64::ADDSXrx:
771     return AArch64::ADDXrx;
772   case AArch64::SUBSWrr:
773     return AArch64::SUBWrr;
774   case AArch64::SUBSWri:
775     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
776   case AArch64::SUBSWrs:
777     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
778   case AArch64::SUBSWrx:
779     return AArch64::SUBWrx;
780   case AArch64::SUBSXrr:
781     return AArch64::SUBXrr;
782   case AArch64::SUBSXri:
783     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
784   case AArch64::SUBSXrs:
785     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
786   case AArch64::SUBSXrx:
787     return AArch64::SUBXrx;
788   }
789 }
790 
791 /// True when condition code could be modified on the instruction
792 /// trace starting at from and ending at to.
modifiesConditionCode(MachineInstr * From,MachineInstr * To,const bool CheckOnlyCCWrites,const TargetRegisterInfo * TRI)793 static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To,
794                                   const bool CheckOnlyCCWrites,
795                                   const TargetRegisterInfo *TRI) {
796   // We iterate backward starting \p To until we hit \p From
797   MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin();
798 
799   // Early exit if To is at the beginning of the BB.
800   if (I == B)
801     return true;
802 
803   // Check whether the definition of SrcReg is in the same basic block as
804   // Compare. If not, assume the condition code gets modified on some path.
805   if (To->getParent() != From->getParent())
806     return true;
807 
808   // Check that NZCV isn't set on the trace.
809   for (--I; I != E; --I) {
810     const MachineInstr &Instr = *I;
811 
812     if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
813         (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI)))
814       // This instruction modifies or uses NZCV after the one we want to
815       // change.
816       return true;
817     if (I == B)
818       // We currently don't allow the instruction trace to cross basic
819       // block boundaries
820       return true;
821   }
822   return false;
823 }
824 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
825 /// comparison into one that sets the zero bit in the flags register.
optimizeCompareInstr(MachineInstr * CmpInstr,unsigned SrcReg,unsigned SrcReg2,int CmpMask,int CmpValue,const MachineRegisterInfo * MRI) const826 bool AArch64InstrInfo::optimizeCompareInstr(
827     MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
828     int CmpValue, const MachineRegisterInfo *MRI) const {
829 
830   // Replace SUBSWrr with SUBWrr if NZCV is not used.
831   int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
832   if (Cmp_NZCV != -1) {
833     if (CmpInstr->definesRegister(AArch64::WZR) ||
834         CmpInstr->definesRegister(AArch64::XZR)) {
835       CmpInstr->eraseFromParent();
836       return true;
837     }
838     unsigned Opc = CmpInstr->getOpcode();
839     unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
840     if (NewOpc == Opc)
841       return false;
842     const MCInstrDesc &MCID = get(NewOpc);
843     CmpInstr->setDesc(MCID);
844     CmpInstr->RemoveOperand(Cmp_NZCV);
845     bool succeeded = UpdateOperandRegClass(CmpInstr);
846     (void)succeeded;
847     assert(succeeded && "Some operands reg class are incompatible!");
848     return true;
849   }
850 
851   // Continue only if we have a "ri" where immediate is zero.
852   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
853   // function.
854   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
855   if (CmpValue != 0 || SrcReg2 != 0)
856     return false;
857 
858   // CmpInstr is a Compare instruction if destination register is not used.
859   if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
860     return false;
861 
862   // Get the unique definition of SrcReg.
863   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
864   if (!MI)
865     return false;
866 
867   bool CheckOnlyCCWrites = false;
868   const TargetRegisterInfo *TRI = &getRegisterInfo();
869   if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI))
870     return false;
871 
872   unsigned NewOpc = MI->getOpcode();
873   switch (MI->getOpcode()) {
874   default:
875     return false;
876   case AArch64::ADDSWrr:
877   case AArch64::ADDSWri:
878   case AArch64::ADDSXrr:
879   case AArch64::ADDSXri:
880   case AArch64::SUBSWrr:
881   case AArch64::SUBSWri:
882   case AArch64::SUBSXrr:
883   case AArch64::SUBSXri:
884     break;
885   case AArch64::ADDWrr:    NewOpc = AArch64::ADDSWrr; break;
886   case AArch64::ADDWri:    NewOpc = AArch64::ADDSWri; break;
887   case AArch64::ADDXrr:    NewOpc = AArch64::ADDSXrr; break;
888   case AArch64::ADDXri:    NewOpc = AArch64::ADDSXri; break;
889   case AArch64::ADCWr:     NewOpc = AArch64::ADCSWr; break;
890   case AArch64::ADCXr:     NewOpc = AArch64::ADCSXr; break;
891   case AArch64::SUBWrr:    NewOpc = AArch64::SUBSWrr; break;
892   case AArch64::SUBWri:    NewOpc = AArch64::SUBSWri; break;
893   case AArch64::SUBXrr:    NewOpc = AArch64::SUBSXrr; break;
894   case AArch64::SUBXri:    NewOpc = AArch64::SUBSXri; break;
895   case AArch64::SBCWr:     NewOpc = AArch64::SBCSWr; break;
896   case AArch64::SBCXr:     NewOpc = AArch64::SBCSXr; break;
897   case AArch64::ANDWri:    NewOpc = AArch64::ANDSWri; break;
898   case AArch64::ANDXri:    NewOpc = AArch64::ANDSXri; break;
899   }
900 
901   // Scan forward for the use of NZCV.
902   // When checking against MI: if it's a conditional code requires
903   // checking of V bit, then this is not safe to do.
904   // It is safe to remove CmpInstr if NZCV is redefined or killed.
905   // If we are done with the basic block, we need to check whether NZCV is
906   // live-out.
907   bool IsSafe = false;
908   for (MachineBasicBlock::iterator I = CmpInstr,
909                                    E = CmpInstr->getParent()->end();
910        !IsSafe && ++I != E;) {
911     const MachineInstr &Instr = *I;
912     for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO;
913          ++IO) {
914       const MachineOperand &MO = Instr.getOperand(IO);
915       if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) {
916         IsSafe = true;
917         break;
918       }
919       if (!MO.isReg() || MO.getReg() != AArch64::NZCV)
920         continue;
921       if (MO.isDef()) {
922         IsSafe = true;
923         break;
924       }
925 
926       // Decode the condition code.
927       unsigned Opc = Instr.getOpcode();
928       AArch64CC::CondCode CC;
929       switch (Opc) {
930       default:
931         return false;
932       case AArch64::Bcc:
933         CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm();
934         break;
935       case AArch64::CSINVWr:
936       case AArch64::CSINVXr:
937       case AArch64::CSINCWr:
938       case AArch64::CSINCXr:
939       case AArch64::CSELWr:
940       case AArch64::CSELXr:
941       case AArch64::CSNEGWr:
942       case AArch64::CSNEGXr:
943       case AArch64::FCSELSrrr:
944       case AArch64::FCSELDrrr:
945         CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm();
946         break;
947       }
948 
949       // It is not safe to remove Compare instruction if Overflow(V) is used.
950       switch (CC) {
951       default:
952         // NZCV can be used multiple times, we should continue.
953         break;
954       case AArch64CC::VS:
955       case AArch64CC::VC:
956       case AArch64CC::GE:
957       case AArch64CC::LT:
958       case AArch64CC::GT:
959       case AArch64CC::LE:
960         return false;
961       }
962     }
963   }
964 
965   // If NZCV is not killed nor re-defined, we should check whether it is
966   // live-out. If it is live-out, do not optimize.
967   if (!IsSafe) {
968     MachineBasicBlock *ParentBlock = CmpInstr->getParent();
969     for (auto *MBB : ParentBlock->successors())
970       if (MBB->isLiveIn(AArch64::NZCV))
971         return false;
972   }
973 
974   // Update the instruction to set NZCV.
975   MI->setDesc(get(NewOpc));
976   CmpInstr->eraseFromParent();
977   bool succeeded = UpdateOperandRegClass(MI);
978   (void)succeeded;
979   assert(succeeded && "Some operands reg class are incompatible!");
980   MI->addRegisterDefined(AArch64::NZCV, TRI);
981   return true;
982 }
983 
984 bool
expandPostRAPseudo(MachineBasicBlock::iterator MI) const985 AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
986   if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
987     return false;
988 
989   MachineBasicBlock &MBB = *MI->getParent();
990   DebugLoc DL = MI->getDebugLoc();
991   unsigned Reg = MI->getOperand(0).getReg();
992   const GlobalValue *GV =
993       cast<GlobalValue>((*MI->memoperands_begin())->getValue());
994   const TargetMachine &TM = MBB.getParent()->getTarget();
995   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
996   const unsigned char MO_NC = AArch64II::MO_NC;
997 
998   if ((OpFlags & AArch64II::MO_GOT) != 0) {
999     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1000         .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1001     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1002         .addReg(Reg, RegState::Kill).addImm(0)
1003         .addMemOperand(*MI->memoperands_begin());
1004   } else if (TM.getCodeModel() == CodeModel::Large) {
1005     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1006         .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
1007     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1008         .addReg(Reg, RegState::Kill)
1009         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
1010     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1011         .addReg(Reg, RegState::Kill)
1012         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1013     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1014         .addReg(Reg, RegState::Kill)
1015         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
1016     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1017         .addReg(Reg, RegState::Kill).addImm(0)
1018         .addMemOperand(*MI->memoperands_begin());
1019   } else {
1020     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1021         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1022     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1023     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1024         .addReg(Reg, RegState::Kill)
1025         .addGlobalAddress(GV, 0, LoFlags)
1026         .addMemOperand(*MI->memoperands_begin());
1027   }
1028 
1029   MBB.erase(MI);
1030 
1031   return true;
1032 }
1033 
1034 /// Return true if this is this instruction has a non-zero immediate
hasShiftedReg(const MachineInstr * MI) const1035 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
1036   switch (MI->getOpcode()) {
1037   default:
1038     break;
1039   case AArch64::ADDSWrs:
1040   case AArch64::ADDSXrs:
1041   case AArch64::ADDWrs:
1042   case AArch64::ADDXrs:
1043   case AArch64::ANDSWrs:
1044   case AArch64::ANDSXrs:
1045   case AArch64::ANDWrs:
1046   case AArch64::ANDXrs:
1047   case AArch64::BICSWrs:
1048   case AArch64::BICSXrs:
1049   case AArch64::BICWrs:
1050   case AArch64::BICXrs:
1051   case AArch64::CRC32Brr:
1052   case AArch64::CRC32CBrr:
1053   case AArch64::CRC32CHrr:
1054   case AArch64::CRC32CWrr:
1055   case AArch64::CRC32CXrr:
1056   case AArch64::CRC32Hrr:
1057   case AArch64::CRC32Wrr:
1058   case AArch64::CRC32Xrr:
1059   case AArch64::EONWrs:
1060   case AArch64::EONXrs:
1061   case AArch64::EORWrs:
1062   case AArch64::EORXrs:
1063   case AArch64::ORNWrs:
1064   case AArch64::ORNXrs:
1065   case AArch64::ORRWrs:
1066   case AArch64::ORRXrs:
1067   case AArch64::SUBSWrs:
1068   case AArch64::SUBSXrs:
1069   case AArch64::SUBWrs:
1070   case AArch64::SUBXrs:
1071     if (MI->getOperand(3).isImm()) {
1072       unsigned val = MI->getOperand(3).getImm();
1073       return (val != 0);
1074     }
1075     break;
1076   }
1077   return false;
1078 }
1079 
1080 /// Return true if this is this instruction has a non-zero immediate
hasExtendedReg(const MachineInstr * MI) const1081 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
1082   switch (MI->getOpcode()) {
1083   default:
1084     break;
1085   case AArch64::ADDSWrx:
1086   case AArch64::ADDSXrx:
1087   case AArch64::ADDSXrx64:
1088   case AArch64::ADDWrx:
1089   case AArch64::ADDXrx:
1090   case AArch64::ADDXrx64:
1091   case AArch64::SUBSWrx:
1092   case AArch64::SUBSXrx:
1093   case AArch64::SUBSXrx64:
1094   case AArch64::SUBWrx:
1095   case AArch64::SUBXrx:
1096   case AArch64::SUBXrx64:
1097     if (MI->getOperand(3).isImm()) {
1098       unsigned val = MI->getOperand(3).getImm();
1099       return (val != 0);
1100     }
1101     break;
1102   }
1103 
1104   return false;
1105 }
1106 
1107 // Return true if this instruction simply sets its single destination register
1108 // to zero. This is equivalent to a register rename of the zero-register.
isGPRZero(const MachineInstr * MI) const1109 bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const {
1110   switch (MI->getOpcode()) {
1111   default:
1112     break;
1113   case AArch64::MOVZWi:
1114   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1115     if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) {
1116       assert(MI->getDesc().getNumOperands() == 3 &&
1117              MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1118       return true;
1119     }
1120     break;
1121   case AArch64::ANDWri: // and Rd, Rzr, #imm
1122     return MI->getOperand(1).getReg() == AArch64::WZR;
1123   case AArch64::ANDXri:
1124     return MI->getOperand(1).getReg() == AArch64::XZR;
1125   case TargetOpcode::COPY:
1126     return MI->getOperand(1).getReg() == AArch64::WZR;
1127   }
1128   return false;
1129 }
1130 
1131 // Return true if this instruction simply renames a general register without
1132 // modifying bits.
isGPRCopy(const MachineInstr * MI) const1133 bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
1134   switch (MI->getOpcode()) {
1135   default:
1136     break;
1137   case TargetOpcode::COPY: {
1138     // GPR32 copies will by lowered to ORRXrs
1139     unsigned DstReg = MI->getOperand(0).getReg();
1140     return (AArch64::GPR32RegClass.contains(DstReg) ||
1141             AArch64::GPR64RegClass.contains(DstReg));
1142   }
1143   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1144     if (MI->getOperand(1).getReg() == AArch64::XZR) {
1145       assert(MI->getDesc().getNumOperands() == 4 &&
1146              MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1147       return true;
1148     }
1149     break;
1150   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1151     if (MI->getOperand(2).getImm() == 0) {
1152       assert(MI->getDesc().getNumOperands() == 4 &&
1153              MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1154       return true;
1155     }
1156     break;
1157   }
1158   return false;
1159 }
1160 
1161 // Return true if this instruction simply renames a general register without
1162 // modifying bits.
isFPRCopy(const MachineInstr * MI) const1163 bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
1164   switch (MI->getOpcode()) {
1165   default:
1166     break;
1167   case TargetOpcode::COPY: {
1168     // FPR64 copies will by lowered to ORR.16b
1169     unsigned DstReg = MI->getOperand(0).getReg();
1170     return (AArch64::FPR64RegClass.contains(DstReg) ||
1171             AArch64::FPR128RegClass.contains(DstReg));
1172   }
1173   case AArch64::ORRv16i8:
1174     if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
1175       assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() &&
1176              "invalid ORRv16i8 operands");
1177       return true;
1178     }
1179     break;
1180   }
1181   return false;
1182 }
1183 
isLoadFromStackSlot(const MachineInstr * MI,int & FrameIndex) const1184 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
1185                                                int &FrameIndex) const {
1186   switch (MI->getOpcode()) {
1187   default:
1188     break;
1189   case AArch64::LDRWui:
1190   case AArch64::LDRXui:
1191   case AArch64::LDRBui:
1192   case AArch64::LDRHui:
1193   case AArch64::LDRSui:
1194   case AArch64::LDRDui:
1195   case AArch64::LDRQui:
1196     if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
1197         MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
1198       FrameIndex = MI->getOperand(1).getIndex();
1199       return MI->getOperand(0).getReg();
1200     }
1201     break;
1202   }
1203 
1204   return 0;
1205 }
1206 
isStoreToStackSlot(const MachineInstr * MI,int & FrameIndex) const1207 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
1208                                               int &FrameIndex) const {
1209   switch (MI->getOpcode()) {
1210   default:
1211     break;
1212   case AArch64::STRWui:
1213   case AArch64::STRXui:
1214   case AArch64::STRBui:
1215   case AArch64::STRHui:
1216   case AArch64::STRSui:
1217   case AArch64::STRDui:
1218   case AArch64::STRQui:
1219     if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
1220         MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
1221       FrameIndex = MI->getOperand(1).getIndex();
1222       return MI->getOperand(0).getReg();
1223     }
1224     break;
1225   }
1226   return 0;
1227 }
1228 
1229 /// Return true if this is load/store scales or extends its register offset.
1230 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1231 /// MI should be a memory op that allows scaled addressing.
isScaledAddr(const MachineInstr * MI) const1232 bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
1233   switch (MI->getOpcode()) {
1234   default:
1235     break;
1236   case AArch64::LDRBBroW:
1237   case AArch64::LDRBroW:
1238   case AArch64::LDRDroW:
1239   case AArch64::LDRHHroW:
1240   case AArch64::LDRHroW:
1241   case AArch64::LDRQroW:
1242   case AArch64::LDRSBWroW:
1243   case AArch64::LDRSBXroW:
1244   case AArch64::LDRSHWroW:
1245   case AArch64::LDRSHXroW:
1246   case AArch64::LDRSWroW:
1247   case AArch64::LDRSroW:
1248   case AArch64::LDRWroW:
1249   case AArch64::LDRXroW:
1250   case AArch64::STRBBroW:
1251   case AArch64::STRBroW:
1252   case AArch64::STRDroW:
1253   case AArch64::STRHHroW:
1254   case AArch64::STRHroW:
1255   case AArch64::STRQroW:
1256   case AArch64::STRSroW:
1257   case AArch64::STRWroW:
1258   case AArch64::STRXroW:
1259   case AArch64::LDRBBroX:
1260   case AArch64::LDRBroX:
1261   case AArch64::LDRDroX:
1262   case AArch64::LDRHHroX:
1263   case AArch64::LDRHroX:
1264   case AArch64::LDRQroX:
1265   case AArch64::LDRSBWroX:
1266   case AArch64::LDRSBXroX:
1267   case AArch64::LDRSHWroX:
1268   case AArch64::LDRSHXroX:
1269   case AArch64::LDRSWroX:
1270   case AArch64::LDRSroX:
1271   case AArch64::LDRWroX:
1272   case AArch64::LDRXroX:
1273   case AArch64::STRBBroX:
1274   case AArch64::STRBroX:
1275   case AArch64::STRDroX:
1276   case AArch64::STRHHroX:
1277   case AArch64::STRHroX:
1278   case AArch64::STRQroX:
1279   case AArch64::STRSroX:
1280   case AArch64::STRWroX:
1281   case AArch64::STRXroX:
1282 
1283     unsigned Val = MI->getOperand(3).getImm();
1284     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1285     return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1286   }
1287   return false;
1288 }
1289 
1290 /// Check all MachineMemOperands for a hint to suppress pairing.
isLdStPairSuppressed(const MachineInstr * MI) const1291 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
1292   assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
1293          "Too many target MO flags");
1294   for (auto *MM : MI->memoperands()) {
1295     if (MM->getFlags() &
1296         (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) {
1297       return true;
1298     }
1299   }
1300   return false;
1301 }
1302 
1303 /// Set a flag on the first MachineMemOperand to suppress pairing.
suppressLdStPair(MachineInstr * MI) const1304 void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
1305   if (MI->memoperands_empty())
1306     return;
1307 
1308   assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
1309          "Too many target MO flags");
1310   (*MI->memoperands_begin())
1311       ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit);
1312 }
1313 
1314 bool
getLdStBaseRegImmOfs(MachineInstr * LdSt,unsigned & BaseReg,unsigned & Offset,const TargetRegisterInfo * TRI) const1315 AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
1316                                        unsigned &Offset,
1317                                        const TargetRegisterInfo *TRI) const {
1318   switch (LdSt->getOpcode()) {
1319   default:
1320     return false;
1321   case AArch64::STRSui:
1322   case AArch64::STRDui:
1323   case AArch64::STRQui:
1324   case AArch64::STRXui:
1325   case AArch64::STRWui:
1326   case AArch64::LDRSui:
1327   case AArch64::LDRDui:
1328   case AArch64::LDRQui:
1329   case AArch64::LDRXui:
1330   case AArch64::LDRWui:
1331     if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
1332       return false;
1333     BaseReg = LdSt->getOperand(1).getReg();
1334     MachineFunction &MF = *LdSt->getParent()->getParent();
1335     unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize();
1336     Offset = LdSt->getOperand(2).getImm() * Width;
1337     return true;
1338   };
1339 }
1340 
getLdStBaseRegImmOfsWidth(MachineInstr * LdSt,unsigned & BaseReg,int & Offset,int & Width,const TargetRegisterInfo * TRI) const1341 bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
1342     MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
1343     const TargetRegisterInfo *TRI) const {
1344   // Handle only loads/stores with base register followed by immediate offset.
1345   if (LdSt->getNumOperands() != 3)
1346     return false;
1347   if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
1348     return false;
1349 
1350   // Offset is calculated as the immediate operand multiplied by the scaling factor.
1351   // Unscaled instructions have scaling factor set to 1.
1352   int Scale = 0;
1353   switch (LdSt->getOpcode()) {
1354   default:
1355     return false;
1356   case AArch64::LDURQi:
1357   case AArch64::STURQi:
1358     Width = 16;
1359     Scale = 1;
1360     break;
1361   case AArch64::LDURXi:
1362   case AArch64::LDURDi:
1363   case AArch64::STURXi:
1364   case AArch64::STURDi:
1365     Width = 8;
1366     Scale = 1;
1367     break;
1368   case AArch64::LDURWi:
1369   case AArch64::LDURSi:
1370   case AArch64::LDURSWi:
1371   case AArch64::STURWi:
1372   case AArch64::STURSi:
1373     Width = 4;
1374     Scale = 1;
1375     break;
1376   case AArch64::LDURHi:
1377   case AArch64::LDURHHi:
1378   case AArch64::LDURSHXi:
1379   case AArch64::LDURSHWi:
1380   case AArch64::STURHi:
1381   case AArch64::STURHHi:
1382     Width = 2;
1383     Scale = 1;
1384     break;
1385   case AArch64::LDURBi:
1386   case AArch64::LDURBBi:
1387   case AArch64::LDURSBXi:
1388   case AArch64::LDURSBWi:
1389   case AArch64::STURBi:
1390   case AArch64::STURBBi:
1391     Width = 1;
1392     Scale = 1;
1393     break;
1394   case AArch64::LDRXui:
1395   case AArch64::STRXui:
1396     Scale = Width = 8;
1397     break;
1398   case AArch64::LDRWui:
1399   case AArch64::STRWui:
1400     Scale = Width = 4;
1401     break;
1402   case AArch64::LDRBui:
1403   case AArch64::STRBui:
1404     Scale = Width = 1;
1405     break;
1406   case AArch64::LDRHui:
1407   case AArch64::STRHui:
1408     Scale = Width = 2;
1409     break;
1410   case AArch64::LDRSui:
1411   case AArch64::STRSui:
1412     Scale = Width = 4;
1413     break;
1414   case AArch64::LDRDui:
1415   case AArch64::STRDui:
1416     Scale = Width = 8;
1417     break;
1418   case AArch64::LDRQui:
1419   case AArch64::STRQui:
1420     Scale = Width = 16;
1421     break;
1422   case AArch64::LDRBBui:
1423   case AArch64::STRBBui:
1424     Scale = Width = 1;
1425     break;
1426   case AArch64::LDRHHui:
1427   case AArch64::STRHHui:
1428     Scale = Width = 2;
1429     break;
1430   };
1431 
1432   BaseReg = LdSt->getOperand(1).getReg();
1433   Offset = LdSt->getOperand(2).getImm() * Scale;
1434   return true;
1435 }
1436 
1437 /// Detect opportunities for ldp/stp formation.
1438 ///
1439 /// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
shouldClusterLoads(MachineInstr * FirstLdSt,MachineInstr * SecondLdSt,unsigned NumLoads) const1440 bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
1441                                           MachineInstr *SecondLdSt,
1442                                           unsigned NumLoads) const {
1443   // Only cluster up to a single pair.
1444   if (NumLoads > 1)
1445     return false;
1446   if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode())
1447     return false;
1448   // getLdStBaseRegImmOfs guarantees that oper 2 isImm.
1449   unsigned Ofs1 = FirstLdSt->getOperand(2).getImm();
1450   // Allow 6 bits of positive range.
1451   if (Ofs1 > 64)
1452     return false;
1453   // The caller should already have ordered First/SecondLdSt by offset.
1454   unsigned Ofs2 = SecondLdSt->getOperand(2).getImm();
1455   return Ofs1 + 1 == Ofs2;
1456 }
1457 
shouldScheduleAdjacent(MachineInstr * First,MachineInstr * Second) const1458 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
1459                                               MachineInstr *Second) const {
1460   // Cyclone can fuse CMN, CMP followed by Bcc.
1461 
1462   // FIXME: B0 can also fuse:
1463   // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ.
1464   if (Second->getOpcode() != AArch64::Bcc)
1465     return false;
1466   switch (First->getOpcode()) {
1467   default:
1468     return false;
1469   case AArch64::SUBSWri:
1470   case AArch64::ADDSWri:
1471   case AArch64::ANDSWri:
1472   case AArch64::SUBSXri:
1473   case AArch64::ADDSXri:
1474   case AArch64::ANDSXri:
1475     return true;
1476   }
1477 }
1478 
emitFrameIndexDebugValue(MachineFunction & MF,int FrameIx,uint64_t Offset,const MDNode * Var,const MDNode * Expr,DebugLoc DL) const1479 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
1480     MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
1481     const MDNode *Expr, DebugLoc DL) const {
1482   MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1483                                 .addFrameIndex(FrameIx)
1484                                 .addImm(0)
1485                                 .addImm(Offset)
1486                                 .addMetadata(Var)
1487                                 .addMetadata(Expr);
1488   return &*MIB;
1489 }
1490 
AddSubReg(const MachineInstrBuilder & MIB,unsigned Reg,unsigned SubIdx,unsigned State,const TargetRegisterInfo * TRI)1491 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
1492                                             unsigned Reg, unsigned SubIdx,
1493                                             unsigned State,
1494                                             const TargetRegisterInfo *TRI) {
1495   if (!SubIdx)
1496     return MIB.addReg(Reg, State);
1497 
1498   if (TargetRegisterInfo::isPhysicalRegister(Reg))
1499     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1500   return MIB.addReg(Reg, State, SubIdx);
1501 }
1502 
forwardCopyWillClobberTuple(unsigned DestReg,unsigned SrcReg,unsigned NumRegs)1503 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
1504                                         unsigned NumRegs) {
1505   // We really want the positive remainder mod 32 here, that happens to be
1506   // easily obtainable with a mask.
1507   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
1508 }
1509 
copyPhysRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,unsigned DestReg,unsigned SrcReg,bool KillSrc,unsigned Opcode,llvm::ArrayRef<unsigned> Indices) const1510 void AArch64InstrInfo::copyPhysRegTuple(
1511     MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
1512     unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
1513     llvm::ArrayRef<unsigned> Indices) const {
1514   assert(Subtarget.hasNEON() &&
1515          "Unexpected register copy without NEON");
1516   const TargetRegisterInfo *TRI = &getRegisterInfo();
1517   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
1518   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
1519   unsigned NumRegs = Indices.size();
1520 
1521   int SubReg = 0, End = NumRegs, Incr = 1;
1522   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
1523     SubReg = NumRegs - 1;
1524     End = -1;
1525     Incr = -1;
1526   }
1527 
1528   for (; SubReg != End; SubReg += Incr) {
1529     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
1530     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
1531     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
1532     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
1533   }
1534 }
1535 
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,unsigned DestReg,unsigned SrcReg,bool KillSrc) const1536 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
1537                                    MachineBasicBlock::iterator I, DebugLoc DL,
1538                                    unsigned DestReg, unsigned SrcReg,
1539                                    bool KillSrc) const {
1540   if (AArch64::GPR32spRegClass.contains(DestReg) &&
1541       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
1542     const TargetRegisterInfo *TRI = &getRegisterInfo();
1543 
1544     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
1545       // If either operand is WSP, expand to ADD #0.
1546       if (Subtarget.hasZeroCycleRegMove()) {
1547         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
1548         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1549                                                      &AArch64::GPR64spRegClass);
1550         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1551                                                     &AArch64::GPR64spRegClass);
1552         // This instruction is reading and writing X registers.  This may upset
1553         // the register scavenger and machine verifier, so we need to indicate
1554         // that we are reading an undefined value from SrcRegX, but a proper
1555         // value from SrcReg.
1556         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
1557             .addReg(SrcRegX, RegState::Undef)
1558             .addImm(0)
1559             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1560             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1561       } else {
1562         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
1563             .addReg(SrcReg, getKillRegState(KillSrc))
1564             .addImm(0)
1565             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1566       }
1567     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
1568       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
1569           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1570     } else {
1571       if (Subtarget.hasZeroCycleRegMove()) {
1572         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
1573         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1574                                                      &AArch64::GPR64spRegClass);
1575         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1576                                                     &AArch64::GPR64spRegClass);
1577         // This instruction is reading and writing X registers.  This may upset
1578         // the register scavenger and machine verifier, so we need to indicate
1579         // that we are reading an undefined value from SrcRegX, but a proper
1580         // value from SrcReg.
1581         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
1582             .addReg(AArch64::XZR)
1583             .addReg(SrcRegX, RegState::Undef)
1584             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
1585       } else {
1586         // Otherwise, expand to ORR WZR.
1587         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
1588             .addReg(AArch64::WZR)
1589             .addReg(SrcReg, getKillRegState(KillSrc));
1590       }
1591     }
1592     return;
1593   }
1594 
1595   if (AArch64::GPR64spRegClass.contains(DestReg) &&
1596       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
1597     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
1598       // If either operand is SP, expand to ADD #0.
1599       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
1600           .addReg(SrcReg, getKillRegState(KillSrc))
1601           .addImm(0)
1602           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1603     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
1604       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
1605           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1606     } else {
1607       // Otherwise, expand to ORR XZR.
1608       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
1609           .addReg(AArch64::XZR)
1610           .addReg(SrcReg, getKillRegState(KillSrc));
1611     }
1612     return;
1613   }
1614 
1615   // Copy a DDDD register quad by copying the individual sub-registers.
1616   if (AArch64::DDDDRegClass.contains(DestReg) &&
1617       AArch64::DDDDRegClass.contains(SrcReg)) {
1618     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1619                                         AArch64::dsub2, AArch64::dsub3 };
1620     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1621                      Indices);
1622     return;
1623   }
1624 
1625   // Copy a DDD register triple by copying the individual sub-registers.
1626   if (AArch64::DDDRegClass.contains(DestReg) &&
1627       AArch64::DDDRegClass.contains(SrcReg)) {
1628     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
1629                                         AArch64::dsub2 };
1630     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1631                      Indices);
1632     return;
1633   }
1634 
1635   // Copy a DD register pair by copying the individual sub-registers.
1636   if (AArch64::DDRegClass.contains(DestReg) &&
1637       AArch64::DDRegClass.contains(SrcReg)) {
1638     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
1639     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
1640                      Indices);
1641     return;
1642   }
1643 
1644   // Copy a QQQQ register quad by copying the individual sub-registers.
1645   if (AArch64::QQQQRegClass.contains(DestReg) &&
1646       AArch64::QQQQRegClass.contains(SrcReg)) {
1647     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
1648                                         AArch64::qsub2, AArch64::qsub3 };
1649     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
1650                      Indices);
1651     return;
1652   }
1653 
1654   // Copy a QQQ register triple by copying the individual sub-registers.
1655   if (AArch64::QQQRegClass.contains(DestReg) &&
1656       AArch64::QQQRegClass.contains(SrcReg)) {
1657     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
1658                                         AArch64::qsub2 };
1659     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
1660                      Indices);
1661     return;
1662   }
1663 
1664   // Copy a QQ register pair by copying the individual sub-registers.
1665   if (AArch64::QQRegClass.contains(DestReg) &&
1666       AArch64::QQRegClass.contains(SrcReg)) {
1667     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
1668     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
1669                      Indices);
1670     return;
1671   }
1672 
1673   if (AArch64::FPR128RegClass.contains(DestReg) &&
1674       AArch64::FPR128RegClass.contains(SrcReg)) {
1675     if(Subtarget.hasNEON()) {
1676       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
1677           .addReg(SrcReg)
1678           .addReg(SrcReg, getKillRegState(KillSrc));
1679     } else {
1680       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
1681         .addReg(AArch64::SP, RegState::Define)
1682         .addReg(SrcReg, getKillRegState(KillSrc))
1683         .addReg(AArch64::SP)
1684         .addImm(-16);
1685       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
1686         .addReg(AArch64::SP, RegState::Define)
1687         .addReg(DestReg, RegState::Define)
1688         .addReg(AArch64::SP)
1689         .addImm(16);
1690     }
1691     return;
1692   }
1693 
1694   if (AArch64::FPR64RegClass.contains(DestReg) &&
1695       AArch64::FPR64RegClass.contains(SrcReg)) {
1696     if(Subtarget.hasNEON()) {
1697       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
1698                                        &AArch64::FPR128RegClass);
1699       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
1700                                       &AArch64::FPR128RegClass);
1701       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
1702           .addReg(SrcReg)
1703           .addReg(SrcReg, getKillRegState(KillSrc));
1704     } else {
1705       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
1706           .addReg(SrcReg, getKillRegState(KillSrc));
1707     }
1708     return;
1709   }
1710 
1711   if (AArch64::FPR32RegClass.contains(DestReg) &&
1712       AArch64::FPR32RegClass.contains(SrcReg)) {
1713     if(Subtarget.hasNEON()) {
1714       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
1715                                        &AArch64::FPR128RegClass);
1716       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
1717                                       &AArch64::FPR128RegClass);
1718       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
1719           .addReg(SrcReg)
1720           .addReg(SrcReg, getKillRegState(KillSrc));
1721     } else {
1722       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
1723           .addReg(SrcReg, getKillRegState(KillSrc));
1724     }
1725     return;
1726   }
1727 
1728   if (AArch64::FPR16RegClass.contains(DestReg) &&
1729       AArch64::FPR16RegClass.contains(SrcReg)) {
1730     if(Subtarget.hasNEON()) {
1731       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
1732                                        &AArch64::FPR128RegClass);
1733       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
1734                                       &AArch64::FPR128RegClass);
1735       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
1736           .addReg(SrcReg)
1737           .addReg(SrcReg, getKillRegState(KillSrc));
1738     } else {
1739       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
1740                                        &AArch64::FPR32RegClass);
1741       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
1742                                       &AArch64::FPR32RegClass);
1743       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
1744           .addReg(SrcReg, getKillRegState(KillSrc));
1745     }
1746     return;
1747   }
1748 
1749   if (AArch64::FPR8RegClass.contains(DestReg) &&
1750       AArch64::FPR8RegClass.contains(SrcReg)) {
1751     if(Subtarget.hasNEON()) {
1752       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
1753                                        &AArch64::FPR128RegClass);
1754       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
1755                                       &AArch64::FPR128RegClass);
1756       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
1757           .addReg(SrcReg)
1758           .addReg(SrcReg, getKillRegState(KillSrc));
1759     } else {
1760       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
1761                                        &AArch64::FPR32RegClass);
1762       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
1763                                       &AArch64::FPR32RegClass);
1764       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
1765           .addReg(SrcReg, getKillRegState(KillSrc));
1766     }
1767     return;
1768   }
1769 
1770   // Copies between GPR64 and FPR64.
1771   if (AArch64::FPR64RegClass.contains(DestReg) &&
1772       AArch64::GPR64RegClass.contains(SrcReg)) {
1773     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
1774         .addReg(SrcReg, getKillRegState(KillSrc));
1775     return;
1776   }
1777   if (AArch64::GPR64RegClass.contains(DestReg) &&
1778       AArch64::FPR64RegClass.contains(SrcReg)) {
1779     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
1780         .addReg(SrcReg, getKillRegState(KillSrc));
1781     return;
1782   }
1783   // Copies between GPR32 and FPR32.
1784   if (AArch64::FPR32RegClass.contains(DestReg) &&
1785       AArch64::GPR32RegClass.contains(SrcReg)) {
1786     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
1787         .addReg(SrcReg, getKillRegState(KillSrc));
1788     return;
1789   }
1790   if (AArch64::GPR32RegClass.contains(DestReg) &&
1791       AArch64::FPR32RegClass.contains(SrcReg)) {
1792     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
1793         .addReg(SrcReg, getKillRegState(KillSrc));
1794     return;
1795   }
1796 
1797   if (DestReg == AArch64::NZCV) {
1798     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
1799     BuildMI(MBB, I, DL, get(AArch64::MSR))
1800       .addImm(AArch64SysReg::NZCV)
1801       .addReg(SrcReg, getKillRegState(KillSrc))
1802       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
1803     return;
1804   }
1805 
1806   if (SrcReg == AArch64::NZCV) {
1807     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
1808     BuildMI(MBB, I, DL, get(AArch64::MRS))
1809       .addReg(DestReg)
1810       .addImm(AArch64SysReg::NZCV)
1811       .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
1812     return;
1813   }
1814 
1815   llvm_unreachable("unimplemented reg-to-reg copy");
1816 }
1817 
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned SrcReg,bool isKill,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const1818 void AArch64InstrInfo::storeRegToStackSlot(
1819     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
1820     bool isKill, int FI, const TargetRegisterClass *RC,
1821     const TargetRegisterInfo *TRI) const {
1822   DebugLoc DL;
1823   if (MBBI != MBB.end())
1824     DL = MBBI->getDebugLoc();
1825   MachineFunction &MF = *MBB.getParent();
1826   MachineFrameInfo &MFI = *MF.getFrameInfo();
1827   unsigned Align = MFI.getObjectAlignment(FI);
1828 
1829   MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
1830   MachineMemOperand *MMO = MF.getMachineMemOperand(
1831       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
1832   unsigned Opc = 0;
1833   bool Offset = true;
1834   switch (RC->getSize()) {
1835   case 1:
1836     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
1837       Opc = AArch64::STRBui;
1838     break;
1839   case 2:
1840     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
1841       Opc = AArch64::STRHui;
1842     break;
1843   case 4:
1844     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
1845       Opc = AArch64::STRWui;
1846       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
1847         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
1848       else
1849         assert(SrcReg != AArch64::WSP);
1850     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
1851       Opc = AArch64::STRSui;
1852     break;
1853   case 8:
1854     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
1855       Opc = AArch64::STRXui;
1856       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
1857         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
1858       else
1859         assert(SrcReg != AArch64::SP);
1860     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
1861       Opc = AArch64::STRDui;
1862     break;
1863   case 16:
1864     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
1865       Opc = AArch64::STRQui;
1866     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
1867       assert(Subtarget.hasNEON() &&
1868              "Unexpected register store without NEON");
1869       Opc = AArch64::ST1Twov1d, Offset = false;
1870     }
1871     break;
1872   case 24:
1873     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
1874       assert(Subtarget.hasNEON() &&
1875              "Unexpected register store without NEON");
1876       Opc = AArch64::ST1Threev1d, Offset = false;
1877     }
1878     break;
1879   case 32:
1880     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
1881       assert(Subtarget.hasNEON() &&
1882              "Unexpected register store without NEON");
1883       Opc = AArch64::ST1Fourv1d, Offset = false;
1884     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
1885       assert(Subtarget.hasNEON() &&
1886              "Unexpected register store without NEON");
1887       Opc = AArch64::ST1Twov2d, Offset = false;
1888     }
1889     break;
1890   case 48:
1891     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
1892       assert(Subtarget.hasNEON() &&
1893              "Unexpected register store without NEON");
1894       Opc = AArch64::ST1Threev2d, Offset = false;
1895     }
1896     break;
1897   case 64:
1898     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
1899       assert(Subtarget.hasNEON() &&
1900              "Unexpected register store without NEON");
1901       Opc = AArch64::ST1Fourv2d, Offset = false;
1902     }
1903     break;
1904   }
1905   assert(Opc && "Unknown register class");
1906 
1907   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
1908                                       .addReg(SrcReg, getKillRegState(isKill))
1909                                       .addFrameIndex(FI);
1910 
1911   if (Offset)
1912     MI.addImm(0);
1913   MI.addMemOperand(MMO);
1914 }
1915 
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned DestReg,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const1916 void AArch64InstrInfo::loadRegFromStackSlot(
1917     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
1918     int FI, const TargetRegisterClass *RC,
1919     const TargetRegisterInfo *TRI) const {
1920   DebugLoc DL;
1921   if (MBBI != MBB.end())
1922     DL = MBBI->getDebugLoc();
1923   MachineFunction &MF = *MBB.getParent();
1924   MachineFrameInfo &MFI = *MF.getFrameInfo();
1925   unsigned Align = MFI.getObjectAlignment(FI);
1926   MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
1927   MachineMemOperand *MMO = MF.getMachineMemOperand(
1928       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
1929 
1930   unsigned Opc = 0;
1931   bool Offset = true;
1932   switch (RC->getSize()) {
1933   case 1:
1934     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
1935       Opc = AArch64::LDRBui;
1936     break;
1937   case 2:
1938     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
1939       Opc = AArch64::LDRHui;
1940     break;
1941   case 4:
1942     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
1943       Opc = AArch64::LDRWui;
1944       if (TargetRegisterInfo::isVirtualRegister(DestReg))
1945         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
1946       else
1947         assert(DestReg != AArch64::WSP);
1948     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
1949       Opc = AArch64::LDRSui;
1950     break;
1951   case 8:
1952     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
1953       Opc = AArch64::LDRXui;
1954       if (TargetRegisterInfo::isVirtualRegister(DestReg))
1955         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
1956       else
1957         assert(DestReg != AArch64::SP);
1958     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
1959       Opc = AArch64::LDRDui;
1960     break;
1961   case 16:
1962     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
1963       Opc = AArch64::LDRQui;
1964     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
1965       assert(Subtarget.hasNEON() &&
1966              "Unexpected register load without NEON");
1967       Opc = AArch64::LD1Twov1d, Offset = false;
1968     }
1969     break;
1970   case 24:
1971     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
1972       assert(Subtarget.hasNEON() &&
1973              "Unexpected register load without NEON");
1974       Opc = AArch64::LD1Threev1d, Offset = false;
1975     }
1976     break;
1977   case 32:
1978     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
1979       assert(Subtarget.hasNEON() &&
1980              "Unexpected register load without NEON");
1981       Opc = AArch64::LD1Fourv1d, Offset = false;
1982     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
1983       assert(Subtarget.hasNEON() &&
1984              "Unexpected register load without NEON");
1985       Opc = AArch64::LD1Twov2d, Offset = false;
1986     }
1987     break;
1988   case 48:
1989     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
1990       assert(Subtarget.hasNEON() &&
1991              "Unexpected register load without NEON");
1992       Opc = AArch64::LD1Threev2d, Offset = false;
1993     }
1994     break;
1995   case 64:
1996     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
1997       assert(Subtarget.hasNEON() &&
1998              "Unexpected register load without NEON");
1999       Opc = AArch64::LD1Fourv2d, Offset = false;
2000     }
2001     break;
2002   }
2003   assert(Opc && "Unknown register class");
2004 
2005   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2006                                       .addReg(DestReg, getDefRegState(true))
2007                                       .addFrameIndex(FI);
2008   if (Offset)
2009     MI.addImm(0);
2010   MI.addMemOperand(MMO);
2011 }
2012 
emitFrameOffset(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc DL,unsigned DestReg,unsigned SrcReg,int Offset,const TargetInstrInfo * TII,MachineInstr::MIFlag Flag,bool SetNZCV)2013 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2014                            MachineBasicBlock::iterator MBBI, DebugLoc DL,
2015                            unsigned DestReg, unsigned SrcReg, int Offset,
2016                            const TargetInstrInfo *TII,
2017                            MachineInstr::MIFlag Flag, bool SetNZCV) {
2018   if (DestReg == SrcReg && Offset == 0)
2019     return;
2020 
2021   bool isSub = Offset < 0;
2022   if (isSub)
2023     Offset = -Offset;
2024 
2025   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2026   // scratch register.  If DestReg is a virtual register, use it as the
2027   // scratch register; otherwise, create a new virtual register (to be
2028   // replaced by the scavenger at the end of PEI).  That case can be optimized
2029   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2030   // register can be loaded with offset%8 and the add/sub can use an extending
2031   // instruction with LSL#3.
2032   // Currently the function handles any offsets but generates a poor sequence
2033   // of code.
2034   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2035 
2036   unsigned Opc;
2037   if (SetNZCV)
2038     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2039   else
2040     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2041   const unsigned MaxEncoding = 0xfff;
2042   const unsigned ShiftSize = 12;
2043   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2044   while (((unsigned)Offset) >= (1 << ShiftSize)) {
2045     unsigned ThisVal;
2046     if (((unsigned)Offset) > MaxEncodableValue) {
2047       ThisVal = MaxEncodableValue;
2048     } else {
2049       ThisVal = Offset & MaxEncodableValue;
2050     }
2051     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2052            "Encoding cannot handle value that big");
2053     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2054         .addReg(SrcReg)
2055         .addImm(ThisVal >> ShiftSize)
2056         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2057         .setMIFlag(Flag);
2058 
2059     SrcReg = DestReg;
2060     Offset -= ThisVal;
2061     if (Offset == 0)
2062       return;
2063   }
2064   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2065       .addReg(SrcReg)
2066       .addImm(Offset)
2067       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2068       .setMIFlag(Flag);
2069 }
2070 
foldMemoryOperandImpl(MachineFunction & MF,MachineInstr * MI,ArrayRef<unsigned> Ops,int FrameIndex) const2071 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
2072                                                       MachineInstr *MI,
2073                                                       ArrayRef<unsigned> Ops,
2074                                                       int FrameIndex) const {
2075   // This is a bit of a hack. Consider this instruction:
2076   //
2077   //   %vreg0<def> = COPY %SP; GPR64all:%vreg0
2078   //
2079   // We explicitly chose GPR64all for the virtual register so such a copy might
2080   // be eliminated by RegisterCoalescer. However, that may not be possible, and
2081   // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2082   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2083   //
2084   // To prevent that, we are going to constrain the %vreg0 register class here.
2085   //
2086   // <rdar://problem/11522048>
2087   //
2088   if (MI->isCopy()) {
2089     unsigned DstReg = MI->getOperand(0).getReg();
2090     unsigned SrcReg = MI->getOperand(1).getReg();
2091     if (SrcReg == AArch64::SP &&
2092         TargetRegisterInfo::isVirtualRegister(DstReg)) {
2093       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2094       return nullptr;
2095     }
2096     if (DstReg == AArch64::SP &&
2097         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2098       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2099       return nullptr;
2100     }
2101   }
2102 
2103   // Cannot fold.
2104   return nullptr;
2105 }
2106 
isAArch64FrameOffsetLegal(const MachineInstr & MI,int & Offset,bool * OutUseUnscaledOp,unsigned * OutUnscaledOp,int * EmittableOffset)2107 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2108                                     bool *OutUseUnscaledOp,
2109                                     unsigned *OutUnscaledOp,
2110                                     int *EmittableOffset) {
2111   int Scale = 1;
2112   bool IsSigned = false;
2113   // The ImmIdx should be changed case by case if it is not 2.
2114   unsigned ImmIdx = 2;
2115   unsigned UnscaledOp = 0;
2116   // Set output values in case of early exit.
2117   if (EmittableOffset)
2118     *EmittableOffset = 0;
2119   if (OutUseUnscaledOp)
2120     *OutUseUnscaledOp = false;
2121   if (OutUnscaledOp)
2122     *OutUnscaledOp = 0;
2123   switch (MI.getOpcode()) {
2124   default:
2125     llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
2126   // Vector spills/fills can't take an immediate offset.
2127   case AArch64::LD1Twov2d:
2128   case AArch64::LD1Threev2d:
2129   case AArch64::LD1Fourv2d:
2130   case AArch64::LD1Twov1d:
2131   case AArch64::LD1Threev1d:
2132   case AArch64::LD1Fourv1d:
2133   case AArch64::ST1Twov2d:
2134   case AArch64::ST1Threev2d:
2135   case AArch64::ST1Fourv2d:
2136   case AArch64::ST1Twov1d:
2137   case AArch64::ST1Threev1d:
2138   case AArch64::ST1Fourv1d:
2139     return AArch64FrameOffsetCannotUpdate;
2140   case AArch64::PRFMui:
2141     Scale = 8;
2142     UnscaledOp = AArch64::PRFUMi;
2143     break;
2144   case AArch64::LDRXui:
2145     Scale = 8;
2146     UnscaledOp = AArch64::LDURXi;
2147     break;
2148   case AArch64::LDRWui:
2149     Scale = 4;
2150     UnscaledOp = AArch64::LDURWi;
2151     break;
2152   case AArch64::LDRBui:
2153     Scale = 1;
2154     UnscaledOp = AArch64::LDURBi;
2155     break;
2156   case AArch64::LDRHui:
2157     Scale = 2;
2158     UnscaledOp = AArch64::LDURHi;
2159     break;
2160   case AArch64::LDRSui:
2161     Scale = 4;
2162     UnscaledOp = AArch64::LDURSi;
2163     break;
2164   case AArch64::LDRDui:
2165     Scale = 8;
2166     UnscaledOp = AArch64::LDURDi;
2167     break;
2168   case AArch64::LDRQui:
2169     Scale = 16;
2170     UnscaledOp = AArch64::LDURQi;
2171     break;
2172   case AArch64::LDRBBui:
2173     Scale = 1;
2174     UnscaledOp = AArch64::LDURBBi;
2175     break;
2176   case AArch64::LDRHHui:
2177     Scale = 2;
2178     UnscaledOp = AArch64::LDURHHi;
2179     break;
2180   case AArch64::LDRSBXui:
2181     Scale = 1;
2182     UnscaledOp = AArch64::LDURSBXi;
2183     break;
2184   case AArch64::LDRSBWui:
2185     Scale = 1;
2186     UnscaledOp = AArch64::LDURSBWi;
2187     break;
2188   case AArch64::LDRSHXui:
2189     Scale = 2;
2190     UnscaledOp = AArch64::LDURSHXi;
2191     break;
2192   case AArch64::LDRSHWui:
2193     Scale = 2;
2194     UnscaledOp = AArch64::LDURSHWi;
2195     break;
2196   case AArch64::LDRSWui:
2197     Scale = 4;
2198     UnscaledOp = AArch64::LDURSWi;
2199     break;
2200 
2201   case AArch64::STRXui:
2202     Scale = 8;
2203     UnscaledOp = AArch64::STURXi;
2204     break;
2205   case AArch64::STRWui:
2206     Scale = 4;
2207     UnscaledOp = AArch64::STURWi;
2208     break;
2209   case AArch64::STRBui:
2210     Scale = 1;
2211     UnscaledOp = AArch64::STURBi;
2212     break;
2213   case AArch64::STRHui:
2214     Scale = 2;
2215     UnscaledOp = AArch64::STURHi;
2216     break;
2217   case AArch64::STRSui:
2218     Scale = 4;
2219     UnscaledOp = AArch64::STURSi;
2220     break;
2221   case AArch64::STRDui:
2222     Scale = 8;
2223     UnscaledOp = AArch64::STURDi;
2224     break;
2225   case AArch64::STRQui:
2226     Scale = 16;
2227     UnscaledOp = AArch64::STURQi;
2228     break;
2229   case AArch64::STRBBui:
2230     Scale = 1;
2231     UnscaledOp = AArch64::STURBBi;
2232     break;
2233   case AArch64::STRHHui:
2234     Scale = 2;
2235     UnscaledOp = AArch64::STURHHi;
2236     break;
2237 
2238   case AArch64::LDPXi:
2239   case AArch64::LDPDi:
2240   case AArch64::STPXi:
2241   case AArch64::STPDi:
2242     IsSigned = true;
2243     Scale = 8;
2244     break;
2245   case AArch64::LDPQi:
2246   case AArch64::STPQi:
2247     IsSigned = true;
2248     Scale = 16;
2249     break;
2250   case AArch64::LDPWi:
2251   case AArch64::LDPSi:
2252   case AArch64::STPWi:
2253   case AArch64::STPSi:
2254     IsSigned = true;
2255     Scale = 4;
2256     break;
2257 
2258   case AArch64::LDURXi:
2259   case AArch64::LDURWi:
2260   case AArch64::LDURBi:
2261   case AArch64::LDURHi:
2262   case AArch64::LDURSi:
2263   case AArch64::LDURDi:
2264   case AArch64::LDURQi:
2265   case AArch64::LDURHHi:
2266   case AArch64::LDURBBi:
2267   case AArch64::LDURSBXi:
2268   case AArch64::LDURSBWi:
2269   case AArch64::LDURSHXi:
2270   case AArch64::LDURSHWi:
2271   case AArch64::LDURSWi:
2272   case AArch64::STURXi:
2273   case AArch64::STURWi:
2274   case AArch64::STURBi:
2275   case AArch64::STURHi:
2276   case AArch64::STURSi:
2277   case AArch64::STURDi:
2278   case AArch64::STURQi:
2279   case AArch64::STURBBi:
2280   case AArch64::STURHHi:
2281     Scale = 1;
2282     break;
2283   }
2284 
2285   Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2286 
2287   bool useUnscaledOp = false;
2288   // If the offset doesn't match the scale, we rewrite the instruction to
2289   // use the unscaled instruction instead. Likewise, if we have a negative
2290   // offset (and have an unscaled op to use).
2291   if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2292     useUnscaledOp = true;
2293 
2294   // Use an unscaled addressing mode if the instruction has a negative offset
2295   // (or if the instruction is already using an unscaled addressing mode).
2296   unsigned MaskBits;
2297   if (IsSigned) {
2298     // ldp/stp instructions.
2299     MaskBits = 7;
2300     Offset /= Scale;
2301   } else if (UnscaledOp == 0 || useUnscaledOp) {
2302     MaskBits = 9;
2303     IsSigned = true;
2304     Scale = 1;
2305   } else {
2306     MaskBits = 12;
2307     IsSigned = false;
2308     Offset /= Scale;
2309   }
2310 
2311   // Attempt to fold address computation.
2312   int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2313   int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2314   if (Offset >= MinOff && Offset <= MaxOff) {
2315     if (EmittableOffset)
2316       *EmittableOffset = Offset;
2317     Offset = 0;
2318   } else {
2319     int NewOff = Offset < 0 ? MinOff : MaxOff;
2320     if (EmittableOffset)
2321       *EmittableOffset = NewOff;
2322     Offset = (Offset - NewOff) * Scale;
2323   }
2324   if (OutUseUnscaledOp)
2325     *OutUseUnscaledOp = useUnscaledOp;
2326   if (OutUnscaledOp)
2327     *OutUnscaledOp = UnscaledOp;
2328   return AArch64FrameOffsetCanUpdate |
2329          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
2330 }
2331 
rewriteAArch64FrameIndex(MachineInstr & MI,unsigned FrameRegIdx,unsigned FrameReg,int & Offset,const AArch64InstrInfo * TII)2332 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2333                                     unsigned FrameReg, int &Offset,
2334                                     const AArch64InstrInfo *TII) {
2335   unsigned Opcode = MI.getOpcode();
2336   unsigned ImmIdx = FrameRegIdx + 1;
2337 
2338   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
2339     Offset += MI.getOperand(ImmIdx).getImm();
2340     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
2341                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
2342                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
2343     MI.eraseFromParent();
2344     Offset = 0;
2345     return true;
2346   }
2347 
2348   int NewOffset;
2349   unsigned UnscaledOp;
2350   bool UseUnscaledOp;
2351   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
2352                                          &UnscaledOp, &NewOffset);
2353   if (Status & AArch64FrameOffsetCanUpdate) {
2354     if (Status & AArch64FrameOffsetIsLegal)
2355       // Replace the FrameIndex with FrameReg.
2356       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2357     if (UseUnscaledOp)
2358       MI.setDesc(TII->get(UnscaledOp));
2359 
2360     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
2361     return Offset == 0;
2362   }
2363 
2364   return false;
2365 }
2366 
getNoopForMachoTarget(MCInst & NopInst) const2367 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
2368   NopInst.setOpcode(AArch64::HINT);
2369   NopInst.addOperand(MCOperand::CreateImm(0));
2370 }
2371 /// useMachineCombiner - return true when a target supports MachineCombiner
useMachineCombiner() const2372 bool AArch64InstrInfo::useMachineCombiner() const {
2373   // AArch64 supports the combiner
2374   return true;
2375 }
2376 //
2377 // True when Opc sets flag
isCombineInstrSettingFlag(unsigned Opc)2378 static bool isCombineInstrSettingFlag(unsigned Opc) {
2379   switch (Opc) {
2380   case AArch64::ADDSWrr:
2381   case AArch64::ADDSWri:
2382   case AArch64::ADDSXrr:
2383   case AArch64::ADDSXri:
2384   case AArch64::SUBSWrr:
2385   case AArch64::SUBSXrr:
2386   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2387   case AArch64::SUBSWri:
2388   case AArch64::SUBSXri:
2389     return true;
2390   default:
2391     break;
2392   }
2393   return false;
2394 }
2395 //
2396 // 32b Opcodes that can be combined with a MUL
isCombineInstrCandidate32(unsigned Opc)2397 static bool isCombineInstrCandidate32(unsigned Opc) {
2398   switch (Opc) {
2399   case AArch64::ADDWrr:
2400   case AArch64::ADDWri:
2401   case AArch64::SUBWrr:
2402   case AArch64::ADDSWrr:
2403   case AArch64::ADDSWri:
2404   case AArch64::SUBSWrr:
2405   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2406   case AArch64::SUBWri:
2407   case AArch64::SUBSWri:
2408     return true;
2409   default:
2410     break;
2411   }
2412   return false;
2413 }
2414 //
2415 // 64b Opcodes that can be combined with a MUL
isCombineInstrCandidate64(unsigned Opc)2416 static bool isCombineInstrCandidate64(unsigned Opc) {
2417   switch (Opc) {
2418   case AArch64::ADDXrr:
2419   case AArch64::ADDXri:
2420   case AArch64::SUBXrr:
2421   case AArch64::ADDSXrr:
2422   case AArch64::ADDSXri:
2423   case AArch64::SUBSXrr:
2424   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2425   case AArch64::SUBXri:
2426   case AArch64::SUBSXri:
2427     return true;
2428   default:
2429     break;
2430   }
2431   return false;
2432 }
2433 //
2434 // Opcodes that can be combined with a MUL
isCombineInstrCandidate(unsigned Opc)2435 static bool isCombineInstrCandidate(unsigned Opc) {
2436   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
2437 }
2438 
canCombineWithMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc,unsigned ZeroReg)2439 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2440                               unsigned MulOpc, unsigned ZeroReg) {
2441   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2442   MachineInstr *MI = nullptr;
2443   // We need a virtual register definition.
2444   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
2445     MI = MRI.getUniqueVRegDef(MO.getReg());
2446   // And it needs to be in the trace (otherwise, it won't have a depth).
2447   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc)
2448     return false;
2449 
2450   assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
2451          MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
2452          MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
2453 
2454   // The third input reg must be zero.
2455   if (MI->getOperand(3).getReg() != ZeroReg)
2456     return false;
2457 
2458   // Must only used by the user we combine with.
2459   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2460     return false;
2461 
2462   return true;
2463 }
2464 
2465 /// hasPattern - return true when there is potentially a faster code sequence
2466 /// for an instruction chain ending in \p Root. All potential patterns are
2467 /// listed
2468 /// in the \p Pattern vector. Pattern should be sorted in priority order since
2469 /// the pattern evaluator stops checking as soon as it finds a faster sequence.
2470 
hasPattern(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> & Pattern) const2471 bool AArch64InstrInfo::hasPattern(
2472     MachineInstr &Root,
2473     SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
2474   unsigned Opc = Root.getOpcode();
2475   MachineBasicBlock &MBB = *Root.getParent();
2476   bool Found = false;
2477 
2478   if (!isCombineInstrCandidate(Opc))
2479     return 0;
2480   if (isCombineInstrSettingFlag(Opc)) {
2481     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
2482     // When NZCV is live bail out.
2483     if (Cmp_NZCV == -1)
2484       return 0;
2485     unsigned NewOpc = convertFlagSettingOpcode(&Root);
2486     // When opcode can't change bail out.
2487     // CHECKME: do we miss any cases for opcode conversion?
2488     if (NewOpc == Opc)
2489       return 0;
2490     Opc = NewOpc;
2491   }
2492 
2493   switch (Opc) {
2494   default:
2495     break;
2496   case AArch64::ADDWrr:
2497     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
2498            "ADDWrr does not have register operands");
2499     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2500                           AArch64::WZR)) {
2501       Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
2502       Found = true;
2503     }
2504     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2505                           AArch64::WZR)) {
2506       Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
2507       Found = true;
2508     }
2509     break;
2510   case AArch64::ADDXrr:
2511     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2512                           AArch64::XZR)) {
2513       Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
2514       Found = true;
2515     }
2516     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2517                           AArch64::XZR)) {
2518       Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
2519       Found = true;
2520     }
2521     break;
2522   case AArch64::SUBWrr:
2523     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2524                           AArch64::WZR)) {
2525       Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
2526       Found = true;
2527     }
2528     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
2529                           AArch64::WZR)) {
2530       Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
2531       Found = true;
2532     }
2533     break;
2534   case AArch64::SUBXrr:
2535     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2536                           AArch64::XZR)) {
2537       Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
2538       Found = true;
2539     }
2540     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
2541                           AArch64::XZR)) {
2542       Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
2543       Found = true;
2544     }
2545     break;
2546   case AArch64::ADDWri:
2547     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2548                           AArch64::WZR)) {
2549       Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
2550       Found = true;
2551     }
2552     break;
2553   case AArch64::ADDXri:
2554     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2555                           AArch64::XZR)) {
2556       Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
2557       Found = true;
2558     }
2559     break;
2560   case AArch64::SUBWri:
2561     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
2562                           AArch64::WZR)) {
2563       Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
2564       Found = true;
2565     }
2566     break;
2567   case AArch64::SUBXri:
2568     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
2569                           AArch64::XZR)) {
2570       Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
2571       Found = true;
2572     }
2573     break;
2574   }
2575   return Found;
2576 }
2577 
2578 /// genMadd - Generate madd instruction and combine mul and add.
2579 /// Example:
2580 ///  MUL I=A,B,0
2581 ///  ADD R,I,C
2582 ///  ==> MADD R,A,B,C
2583 /// \param Root is the ADD instruction
2584 /// \param [out] InsInstrs is a vector of machine instructions and will
2585 /// contain the generated madd instruction
2586 /// \param IdxMulOpd is index of operand in Root that is the result of
2587 /// the MUL. In the example above IdxMulOpd is 1.
2588 /// \param MaddOpc the opcode fo the madd instruction
genMadd(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC)2589 static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI,
2590                              const TargetInstrInfo *TII, MachineInstr &Root,
2591                              SmallVectorImpl<MachineInstr *> &InsInstrs,
2592                              unsigned IdxMulOpd, unsigned MaddOpc,
2593                              const TargetRegisterClass *RC) {
2594   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
2595 
2596   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
2597   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
2598   unsigned ResultReg = Root.getOperand(0).getReg();
2599   unsigned SrcReg0 = MUL->getOperand(1).getReg();
2600   bool Src0IsKill = MUL->getOperand(1).isKill();
2601   unsigned SrcReg1 = MUL->getOperand(2).getReg();
2602   bool Src1IsKill = MUL->getOperand(2).isKill();
2603   unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
2604   bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
2605 
2606   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
2607     MRI.constrainRegClass(ResultReg, RC);
2608   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
2609     MRI.constrainRegClass(SrcReg0, RC);
2610   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
2611     MRI.constrainRegClass(SrcReg1, RC);
2612   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
2613     MRI.constrainRegClass(SrcReg2, RC);
2614 
2615   MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
2616                                     ResultReg)
2617                                 .addReg(SrcReg0, getKillRegState(Src0IsKill))
2618                                 .addReg(SrcReg1, getKillRegState(Src1IsKill))
2619                                 .addReg(SrcReg2, getKillRegState(Src2IsKill));
2620   // Insert the MADD
2621   InsInstrs.push_back(MIB);
2622   return MUL;
2623 }
2624 
2625 /// genMaddR - Generate madd instruction and combine mul and add using
2626 /// an extra virtual register
2627 /// Example - an ADD intermediate needs to be stored in a register:
2628 ///   MUL I=A,B,0
2629 ///   ADD R,I,Imm
2630 ///   ==> ORR  V, ZR, Imm
2631 ///   ==> MADD R,A,B,V
2632 /// \param Root is the ADD instruction
2633 /// \param [out] InsInstrs is a vector of machine instructions and will
2634 /// contain the generated madd instruction
2635 /// \param IdxMulOpd is index of operand in Root that is the result of
2636 /// the MUL. In the example above IdxMulOpd is 1.
2637 /// \param MaddOpc the opcode fo the madd instruction
2638 /// \param VR is a virtual register that holds the value of an ADD operand
2639 /// (V in the example above).
genMaddR(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,unsigned VR,const TargetRegisterClass * RC)2640 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
2641                               const TargetInstrInfo *TII, MachineInstr &Root,
2642                               SmallVectorImpl<MachineInstr *> &InsInstrs,
2643                               unsigned IdxMulOpd, unsigned MaddOpc,
2644                               unsigned VR, const TargetRegisterClass *RC) {
2645   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
2646 
2647   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
2648   unsigned ResultReg = Root.getOperand(0).getReg();
2649   unsigned SrcReg0 = MUL->getOperand(1).getReg();
2650   bool Src0IsKill = MUL->getOperand(1).isKill();
2651   unsigned SrcReg1 = MUL->getOperand(2).getReg();
2652   bool Src1IsKill = MUL->getOperand(2).isKill();
2653 
2654   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
2655     MRI.constrainRegClass(ResultReg, RC);
2656   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
2657     MRI.constrainRegClass(SrcReg0, RC);
2658   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
2659     MRI.constrainRegClass(SrcReg1, RC);
2660   if (TargetRegisterInfo::isVirtualRegister(VR))
2661     MRI.constrainRegClass(VR, RC);
2662 
2663   MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
2664                                     ResultReg)
2665                                 .addReg(SrcReg0, getKillRegState(Src0IsKill))
2666                                 .addReg(SrcReg1, getKillRegState(Src1IsKill))
2667                                 .addReg(VR);
2668   // Insert the MADD
2669   InsInstrs.push_back(MIB);
2670   return MUL;
2671 }
2672 
2673 /// genAlternativeCodeSequence - when hasPattern() finds a pattern
2674 /// this function generates the instructions that could replace the
2675 /// original code sequence
genAlternativeCodeSequence(MachineInstr & Root,MachineCombinerPattern::MC_PATTERN Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg) const2676 void AArch64InstrInfo::genAlternativeCodeSequence(
2677     MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
2678     SmallVectorImpl<MachineInstr *> &InsInstrs,
2679     SmallVectorImpl<MachineInstr *> &DelInstrs,
2680     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
2681   MachineBasicBlock &MBB = *Root.getParent();
2682   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2683   MachineFunction &MF = *MBB.getParent();
2684   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
2685 
2686   MachineInstr *MUL;
2687   const TargetRegisterClass *RC;
2688   unsigned Opc;
2689   switch (Pattern) {
2690   default:
2691     // signal error.
2692     break;
2693   case MachineCombinerPattern::MC_MULADDW_OP1:
2694   case MachineCombinerPattern::MC_MULADDX_OP1:
2695     // MUL I=A,B,0
2696     // ADD R,I,C
2697     // ==> MADD R,A,B,C
2698     // --- Create(MADD);
2699     if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) {
2700       Opc = AArch64::MADDWrrr;
2701       RC = &AArch64::GPR32RegClass;
2702     } else {
2703       Opc = AArch64::MADDXrrr;
2704       RC = &AArch64::GPR64RegClass;
2705     }
2706     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
2707     break;
2708   case MachineCombinerPattern::MC_MULADDW_OP2:
2709   case MachineCombinerPattern::MC_MULADDX_OP2:
2710     // MUL I=A,B,0
2711     // ADD R,C,I
2712     // ==> MADD R,A,B,C
2713     // --- Create(MADD);
2714     if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) {
2715       Opc = AArch64::MADDWrrr;
2716       RC = &AArch64::GPR32RegClass;
2717     } else {
2718       Opc = AArch64::MADDXrrr;
2719       RC = &AArch64::GPR64RegClass;
2720     }
2721     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
2722     break;
2723   case MachineCombinerPattern::MC_MULADDWI_OP1:
2724   case MachineCombinerPattern::MC_MULADDXI_OP1: {
2725     // MUL I=A,B,0
2726     // ADD R,I,Imm
2727     // ==> ORR  V, ZR, Imm
2728     // ==> MADD R,A,B,V
2729     // --- Create(MADD);
2730     const TargetRegisterClass *OrrRC;
2731     unsigned BitSize, OrrOpc, ZeroReg;
2732     if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
2733       OrrOpc = AArch64::ORRWri;
2734       OrrRC = &AArch64::GPR32spRegClass;
2735       BitSize = 32;
2736       ZeroReg = AArch64::WZR;
2737       Opc = AArch64::MADDWrrr;
2738       RC = &AArch64::GPR32RegClass;
2739     } else {
2740       OrrOpc = AArch64::ORRXri;
2741       OrrRC = &AArch64::GPR64spRegClass;
2742       BitSize = 64;
2743       ZeroReg = AArch64::XZR;
2744       Opc = AArch64::MADDXrrr;
2745       RC = &AArch64::GPR64RegClass;
2746     }
2747     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
2748     uint64_t Imm = Root.getOperand(2).getImm();
2749 
2750     if (Root.getOperand(3).isImm()) {
2751       unsigned Val = Root.getOperand(3).getImm();
2752       Imm = Imm << Val;
2753     }
2754     uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
2755     uint64_t Encoding;
2756     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
2757       MachineInstrBuilder MIB1 =
2758           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
2759               .addReg(ZeroReg)
2760               .addImm(Encoding);
2761       InsInstrs.push_back(MIB1);
2762       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
2763       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
2764     }
2765     break;
2766   }
2767   case MachineCombinerPattern::MC_MULSUBW_OP1:
2768   case MachineCombinerPattern::MC_MULSUBX_OP1: {
2769     // MUL I=A,B,0
2770     // SUB R,I, C
2771     // ==> SUB  V, 0, C
2772     // ==> MADD R,A,B,V // = -C + A*B
2773     // --- Create(MADD);
2774     const TargetRegisterClass *SubRC;
2775     unsigned SubOpc, ZeroReg;
2776     if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
2777       SubOpc = AArch64::SUBWrr;
2778       SubRC = &AArch64::GPR32spRegClass;
2779       ZeroReg = AArch64::WZR;
2780       Opc = AArch64::MADDWrrr;
2781       RC = &AArch64::GPR32RegClass;
2782     } else {
2783       SubOpc = AArch64::SUBXrr;
2784       SubRC = &AArch64::GPR64spRegClass;
2785       ZeroReg = AArch64::XZR;
2786       Opc = AArch64::MADDXrrr;
2787       RC = &AArch64::GPR64RegClass;
2788     }
2789     unsigned NewVR = MRI.createVirtualRegister(SubRC);
2790     // SUB NewVR, 0, C
2791     MachineInstrBuilder MIB1 =
2792         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
2793             .addReg(ZeroReg)
2794             .addOperand(Root.getOperand(2));
2795     InsInstrs.push_back(MIB1);
2796     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
2797     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
2798     break;
2799   }
2800   case MachineCombinerPattern::MC_MULSUBW_OP2:
2801   case MachineCombinerPattern::MC_MULSUBX_OP2:
2802     // MUL I=A,B,0
2803     // SUB R,C,I
2804     // ==> MSUB R,A,B,C (computes C - A*B)
2805     // --- Create(MSUB);
2806     if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) {
2807       Opc = AArch64::MSUBWrrr;
2808       RC = &AArch64::GPR32RegClass;
2809     } else {
2810       Opc = AArch64::MSUBXrrr;
2811       RC = &AArch64::GPR64RegClass;
2812     }
2813     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
2814     break;
2815   case MachineCombinerPattern::MC_MULSUBWI_OP1:
2816   case MachineCombinerPattern::MC_MULSUBXI_OP1: {
2817     // MUL I=A,B,0
2818     // SUB R,I, Imm
2819     // ==> ORR  V, ZR, -Imm
2820     // ==> MADD R,A,B,V // = -Imm + A*B
2821     // --- Create(MADD);
2822     const TargetRegisterClass *OrrRC;
2823     unsigned BitSize, OrrOpc, ZeroReg;
2824     if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
2825       OrrOpc = AArch64::ORRWri;
2826       OrrRC = &AArch64::GPR32spRegClass;
2827       BitSize = 32;
2828       ZeroReg = AArch64::WZR;
2829       Opc = AArch64::MADDWrrr;
2830       RC = &AArch64::GPR32RegClass;
2831     } else {
2832       OrrOpc = AArch64::ORRXri;
2833       OrrRC = &AArch64::GPR64spRegClass;
2834       BitSize = 64;
2835       ZeroReg = AArch64::XZR;
2836       Opc = AArch64::MADDXrrr;
2837       RC = &AArch64::GPR64RegClass;
2838     }
2839     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
2840     int Imm = Root.getOperand(2).getImm();
2841     if (Root.getOperand(3).isImm()) {
2842       unsigned Val = Root.getOperand(3).getImm();
2843       Imm = Imm << Val;
2844     }
2845     uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
2846     uint64_t Encoding;
2847     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
2848       MachineInstrBuilder MIB1 =
2849           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
2850               .addReg(ZeroReg)
2851               .addImm(Encoding);
2852       InsInstrs.push_back(MIB1);
2853       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
2854       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
2855     }
2856     break;
2857   }
2858   } // end switch (Pattern)
2859   // Record MUL and ADD/SUB for deletion
2860   DelInstrs.push_back(MUL);
2861   DelInstrs.push_back(&Root);
2862 
2863   return;
2864 }
2865 
2866 /// \brief Replace csincr-branch sequence by simple conditional branch
2867 ///
2868 /// Examples:
2869 /// 1.
2870 ///   csinc  w9, wzr, wzr, <condition code>
2871 ///   tbnz   w9, #0, 0x44
2872 /// to
2873 ///   b.<inverted condition code>
2874 ///
2875 /// 2.
2876 ///   csinc w9, wzr, wzr, <condition code>
2877 ///   tbz   w9, #0, 0x44
2878 /// to
2879 ///   b.<condition code>
2880 ///
2881 /// \param  MI Conditional Branch
2882 /// \return True when the simple conditional branch is generated
2883 ///
optimizeCondBranch(MachineInstr * MI) const2884 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
2885   bool IsNegativeBranch = false;
2886   bool IsTestAndBranch = false;
2887   unsigned TargetBBInMI = 0;
2888   switch (MI->getOpcode()) {
2889   default:
2890     llvm_unreachable("Unknown branch instruction?");
2891   case AArch64::Bcc:
2892     return false;
2893   case AArch64::CBZW:
2894   case AArch64::CBZX:
2895     TargetBBInMI = 1;
2896     break;
2897   case AArch64::CBNZW:
2898   case AArch64::CBNZX:
2899     TargetBBInMI = 1;
2900     IsNegativeBranch = true;
2901     break;
2902   case AArch64::TBZW:
2903   case AArch64::TBZX:
2904     TargetBBInMI = 2;
2905     IsTestAndBranch = true;
2906     break;
2907   case AArch64::TBNZW:
2908   case AArch64::TBNZX:
2909     TargetBBInMI = 2;
2910     IsNegativeBranch = true;
2911     IsTestAndBranch = true;
2912     break;
2913   }
2914   // So we increment a zero register and test for bits other
2915   // than bit 0? Conservatively bail out in case the verifier
2916   // missed this case.
2917   if (IsTestAndBranch && MI->getOperand(1).getImm())
2918     return false;
2919 
2920   // Find Definition.
2921   assert(MI->getParent() && "Incomplete machine instruciton\n");
2922   MachineBasicBlock *MBB = MI->getParent();
2923   MachineFunction *MF = MBB->getParent();
2924   MachineRegisterInfo *MRI = &MF->getRegInfo();
2925   unsigned VReg = MI->getOperand(0).getReg();
2926   if (!TargetRegisterInfo::isVirtualRegister(VReg))
2927     return false;
2928 
2929   MachineInstr *DefMI = MRI->getVRegDef(VReg);
2930 
2931   // Look for CSINC
2932   if (!(DefMI->getOpcode() == AArch64::CSINCWr &&
2933         DefMI->getOperand(1).getReg() == AArch64::WZR &&
2934         DefMI->getOperand(2).getReg() == AArch64::WZR) &&
2935       !(DefMI->getOpcode() == AArch64::CSINCXr &&
2936         DefMI->getOperand(1).getReg() == AArch64::XZR &&
2937         DefMI->getOperand(2).getReg() == AArch64::XZR))
2938     return false;
2939 
2940   if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
2941     return false;
2942 
2943   AArch64CC::CondCode CC =
2944       (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
2945   bool CheckOnlyCCWrites = true;
2946   // Convert only when the condition code is not modified between
2947   // the CSINC and the branch. The CC may be used by other
2948   // instructions in between.
2949   if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo()))
2950     return false;
2951   MachineBasicBlock &RefToMBB = *MBB;
2952   MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB();
2953   DebugLoc DL = MI->getDebugLoc();
2954   if (IsNegativeBranch)
2955     CC = AArch64CC::getInvertedCondCode(CC);
2956   BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
2957   MI->eraseFromParent();
2958   return true;
2959 }
2960