1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64RegisterBankInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "MCTargetDesc/AArch64MCTargetDesc.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27 #include "llvm/CodeGen/GlobalISel/Utils.h"
28 #include "llvm/CodeGen/MachineBasicBlock.h"
29 #include "llvm/CodeGen/MachineConstantPool.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineOperand.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/TargetOpcodes.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/PatternMatch.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/IntrinsicsAArch64.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/raw_ostream.h"
43 
44 #define DEBUG_TYPE "aarch64-isel"
45 
46 using namespace llvm;
47 using namespace MIPatternMatch;
48 
49 namespace {
50 
51 #define GET_GLOBALISEL_PREDICATE_BITSET
52 #include "AArch64GenGlobalISel.inc"
53 #undef GET_GLOBALISEL_PREDICATE_BITSET
54 
55 class AArch64InstructionSelector : public InstructionSelector {
56 public:
57   AArch64InstructionSelector(const AArch64TargetMachine &TM,
58                              const AArch64Subtarget &STI,
59                              const AArch64RegisterBankInfo &RBI);
60 
61   bool select(MachineInstr &I) override;
getName()62   static const char *getName() { return DEBUG_TYPE; }
63 
setupMF(MachineFunction & MF,GISelKnownBits & KB,CodeGenCoverage & CoverageInfo)64   void setupMF(MachineFunction &MF, GISelKnownBits &KB,
65                CodeGenCoverage &CoverageInfo) override {
66     InstructionSelector::setupMF(MF, KB, CoverageInfo);
67 
68     // hasFnAttribute() is expensive to call on every BRCOND selection, so
69     // cache it here for each run of the selector.
70     ProduceNonFlagSettingCondBr =
71         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
72     MFReturnAddr = Register();
73 
74     processPHIs(MF);
75   }
76 
77 private:
78   /// tblgen-erated 'select' implementation, used as the initial selector for
79   /// the patterns that don't require complex C++.
80   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
81 
82   // A lowering phase that runs before any selection attempts.
83   // Returns true if the instruction was modified.
84   bool preISelLower(MachineInstr &I);
85 
86   // An early selection function that runs before the selectImpl() call.
87   bool earlySelect(MachineInstr &I) const;
88 
89   // Do some preprocessing of G_PHIs before we begin selection.
90   void processPHIs(MachineFunction &MF);
91 
92   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
93 
94   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
95   bool contractCrossBankCopyIntoStore(MachineInstr &I,
96                                       MachineRegisterInfo &MRI);
97 
98   bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
99 
100   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
101                           MachineRegisterInfo &MRI) const;
102   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
103                            MachineRegisterInfo &MRI) const;
104 
105   ///@{
106   /// Helper functions for selectCompareBranch.
107   bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
108                                     MachineIRBuilder &MIB) const;
109   bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
110                                     MachineIRBuilder &MIB) const;
111   bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
112                                     MachineIRBuilder &MIB) const;
113   bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
114                                   MachineBasicBlock *DstMBB,
115                                   MachineIRBuilder &MIB) const;
116   ///@}
117 
118   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
119                            MachineRegisterInfo &MRI) const;
120 
121   bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
122   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
123 
124   // Helper to generate an equivalent of scalar_to_vector into a new register,
125   // returned via 'Dst'.
126   MachineInstr *emitScalarToVector(unsigned EltSize,
127                                    const TargetRegisterClass *DstRC,
128                                    Register Scalar,
129                                    MachineIRBuilder &MIRBuilder) const;
130 
131   /// Emit a lane insert into \p DstReg, or a new vector register if None is
132   /// provided.
133   ///
134   /// The lane inserted into is defined by \p LaneIdx. The vector source
135   /// register is given by \p SrcReg. The register containing the element is
136   /// given by \p EltReg.
137   MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
138                                Register EltReg, unsigned LaneIdx,
139                                const RegisterBank &RB,
140                                MachineIRBuilder &MIRBuilder) const;
141   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
142   bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
143                               MachineRegisterInfo &MRI) const;
144   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
145   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
146   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
147 
148   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
149   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
150   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
151   bool selectSplitVectorUnmerge(MachineInstr &I,
152                                 MachineRegisterInfo &MRI) const;
153   bool selectIntrinsicWithSideEffects(MachineInstr &I,
154                                       MachineRegisterInfo &MRI) const;
155   bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
156   bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
157   bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
158   bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
159   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
160   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
161   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
162   bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
163 
164   unsigned emitConstantPoolEntry(const Constant *CPVal,
165                                  MachineFunction &MF) const;
166   MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
167                                          MachineIRBuilder &MIRBuilder) const;
168 
169   // Emit a vector concat operation.
170   MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
171                                  Register Op2,
172                                  MachineIRBuilder &MIRBuilder) const;
173 
174   // Emit an integer compare between LHS and RHS, which checks for Predicate.
175   MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
176                                    MachineOperand &Predicate,
177                                    MachineIRBuilder &MIRBuilder) const;
178 
179   /// Emit a floating point comparison between \p LHS and \p RHS.
180   MachineInstr *emitFPCompare(Register LHS, Register RHS,
181                               MachineIRBuilder &MIRBuilder) const;
182 
183   MachineInstr *emitInstr(unsigned Opcode,
184                           std::initializer_list<llvm::DstOp> DstOps,
185                           std::initializer_list<llvm::SrcOp> SrcOps,
186                           MachineIRBuilder &MIRBuilder,
187                           const ComplexRendererFns &RenderFns = None) const;
188   /// Helper function to emit an add or sub instruction.
189   ///
190   /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
191   /// in a specific order.
192   ///
193   /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
194   ///
195   /// \code
196   ///   const std::array<std::array<unsigned, 2>, 4> Table {
197   ///    {{AArch64::ADDXri, AArch64::ADDWri},
198   ///     {AArch64::ADDXrs, AArch64::ADDWrs},
199   ///     {AArch64::ADDXrr, AArch64::ADDWrr},
200   ///     {AArch64::SUBXri, AArch64::SUBWri},
201   ///     {AArch64::ADDXrx, AArch64::ADDWrx}}};
202   /// \endcode
203   ///
204   /// Each row in the table corresponds to a different addressing mode. Each
205   /// column corresponds to a different register size.
206   ///
207   /// \attention Rows must be structured as follows:
208   ///   - Row 0: The ri opcode variants
209   ///   - Row 1: The rs opcode variants
210   ///   - Row 2: The rr opcode variants
211   ///   - Row 3: The ri opcode variants for negative immediates
212   ///   - Row 4: The rx opcode variants
213   ///
214   /// \attention Columns must be structured as follows:
215   ///   - Column 0: The 64-bit opcode variants
216   ///   - Column 1: The 32-bit opcode variants
217   ///
218   /// \p Dst is the destination register of the binop to emit.
219   /// \p LHS is the left-hand operand of the binop to emit.
220   /// \p RHS is the right-hand operand of the binop to emit.
221   MachineInstr *emitAddSub(
222       const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
223       Register Dst, MachineOperand &LHS, MachineOperand &RHS,
224       MachineIRBuilder &MIRBuilder) const;
225   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
226                         MachineOperand &RHS,
227                         MachineIRBuilder &MIRBuilder) const;
228   MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
229                          MachineIRBuilder &MIRBuilder) const;
230   MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
231                          MachineIRBuilder &MIRBuilder) const;
232   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
233                         MachineIRBuilder &MIRBuilder) const;
234   MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
235                         MachineIRBuilder &MIRBuilder) const;
236   MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
237                            AArch64CC::CondCode CC,
238                            MachineIRBuilder &MIRBuilder) const;
239   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
240                                      const RegisterBank &DstRB, LLT ScalarTy,
241                                      Register VecReg, unsigned LaneIdx,
242                                      MachineIRBuilder &MIRBuilder) const;
243 
244   /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
245   /// materialized using a FMOV instruction, then update MI and return it.
246   /// Otherwise, do nothing and return a nullptr.
247   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
248                                      MachineRegisterInfo &MRI) const;
249 
250   /// Emit a CSet for an integer compare.
251   ///
252   /// \p DefReg is expected to be a 32-bit scalar register.
253   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
254                                 MachineIRBuilder &MIRBuilder) const;
255   /// Emit a CSet for a FP compare.
256   ///
257   /// \p Dst is expected to be a 32-bit scalar register.
258   MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
259                                 MachineIRBuilder &MIRBuilder) const;
260 
261   /// Emit the overflow op for \p Opcode.
262   ///
263   /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
264   /// G_USUBO, etc.
265   std::pair<MachineInstr *, AArch64CC::CondCode>
266   emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
267                  MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
268 
269   /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
270   /// \p IsNegative is true if the test should be "not zero".
271   /// This will also optimize the test bit instruction when possible.
272   MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
273                             MachineBasicBlock *DstMBB,
274                             MachineIRBuilder &MIB) const;
275 
276   /// Emit a CB(N)Z instruction which branches to \p DestMBB.
277   MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
278                         MachineBasicBlock *DestMBB,
279                         MachineIRBuilder &MIB) const;
280 
281   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
282   // We use these manually instead of using the importer since it doesn't
283   // support SDNodeXForm.
284   ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
285   ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
286   ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
287   ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
288 
289   ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
290   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
291   ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
292 
293   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
294                                             unsigned Size) const;
295 
selectAddrModeUnscaled8(MachineOperand & Root) const296   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
297     return selectAddrModeUnscaled(Root, 1);
298   }
selectAddrModeUnscaled16(MachineOperand & Root) const299   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
300     return selectAddrModeUnscaled(Root, 2);
301   }
selectAddrModeUnscaled32(MachineOperand & Root) const302   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
303     return selectAddrModeUnscaled(Root, 4);
304   }
selectAddrModeUnscaled64(MachineOperand & Root) const305   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
306     return selectAddrModeUnscaled(Root, 8);
307   }
selectAddrModeUnscaled128(MachineOperand & Root) const308   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
309     return selectAddrModeUnscaled(Root, 16);
310   }
311 
312   /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
313   /// from complex pattern matchers like selectAddrModeIndexed().
314   ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
315                                           MachineRegisterInfo &MRI) const;
316 
317   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
318                                            unsigned Size) const;
319   template <int Width>
selectAddrModeIndexed(MachineOperand & Root) const320   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
321     return selectAddrModeIndexed(Root, Width / 8);
322   }
323 
324   bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
325                                      const MachineRegisterInfo &MRI) const;
326   ComplexRendererFns
327   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
328                                   unsigned SizeInBytes) const;
329 
330   /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
331   /// or not a shift + extend should be folded into an addressing mode. Returns
332   /// None when this is not profitable or possible.
333   ComplexRendererFns
334   selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
335                     MachineOperand &Offset, unsigned SizeInBytes,
336                     bool WantsExt) const;
337   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
338   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
339                                        unsigned SizeInBytes) const;
340   template <int Width>
selectAddrModeXRO(MachineOperand & Root) const341   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
342     return selectAddrModeXRO(Root, Width / 8);
343   }
344 
345   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
346                                        unsigned SizeInBytes) const;
347   template <int Width>
selectAddrModeWRO(MachineOperand & Root) const348   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
349     return selectAddrModeWRO(Root, Width / 8);
350   }
351 
352   ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
353 
selectArithShiftedRegister(MachineOperand & Root) const354   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
355     return selectShiftedRegister(Root);
356   }
357 
selectLogicalShiftedRegister(MachineOperand & Root) const358   ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
359     // TODO: selectShiftedRegister should allow for rotates on logical shifts.
360     // For now, make them the same. The only difference between the two is that
361     // logical shifts are allowed to fold in rotates. Otherwise, these are
362     // functionally the same.
363     return selectShiftedRegister(Root);
364   }
365 
366   /// Given an extend instruction, determine the correct shift-extend type for
367   /// that instruction.
368   ///
369   /// If the instruction is going to be used in a load or store, pass
370   /// \p IsLoadStore = true.
371   AArch64_AM::ShiftExtendType
372   getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
373                        bool IsLoadStore = false) const;
374 
375   /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
376   ///
377   /// \returns Either \p Reg if no change was necessary, or the new register
378   /// created by moving \p Reg.
379   ///
380   /// Note: This uses emitCopy right now.
381   Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
382                               MachineIRBuilder &MIB) const;
383 
384   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
385 
386   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
387                       int OpIdx = -1) const;
388   void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
389                           int OpIdx = -1) const;
390   void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
391                           int OpIdx = -1) const;
392 
393   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
394   void materializeLargeCMVal(MachineInstr &I, const Value *V,
395                              unsigned OpFlags) const;
396 
397   // Optimization methods.
398   bool tryOptSelect(MachineInstr &MI) const;
399   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
400                                       MachineOperand &Predicate,
401                                       MachineIRBuilder &MIRBuilder) const;
402 
403   /// Return true if \p MI is a load or store of \p NumBytes bytes.
404   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
405 
406   /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
407   /// register zeroed out. In other words, the result of MI has been explicitly
408   /// zero extended.
409   bool isDef32(const MachineInstr &MI) const;
410 
411   const AArch64TargetMachine &TM;
412   const AArch64Subtarget &STI;
413   const AArch64InstrInfo &TII;
414   const AArch64RegisterInfo &TRI;
415   const AArch64RegisterBankInfo &RBI;
416 
417   bool ProduceNonFlagSettingCondBr = false;
418 
419   // Some cached values used during selection.
420   // We use LR as a live-in register, and we keep track of it here as it can be
421   // clobbered by calls.
422   Register MFReturnAddr;
423 
424 #define GET_GLOBALISEL_PREDICATES_DECL
425 #include "AArch64GenGlobalISel.inc"
426 #undef GET_GLOBALISEL_PREDICATES_DECL
427 
428 // We declare the temporaries used by selectImpl() in the class to minimize the
429 // cost of constructing placeholder values.
430 #define GET_GLOBALISEL_TEMPORARIES_DECL
431 #include "AArch64GenGlobalISel.inc"
432 #undef GET_GLOBALISEL_TEMPORARIES_DECL
433 };
434 
435 } // end anonymous namespace
436 
437 #define GET_GLOBALISEL_IMPL
438 #include "AArch64GenGlobalISel.inc"
439 #undef GET_GLOBALISEL_IMPL
440 
AArch64InstructionSelector(const AArch64TargetMachine & TM,const AArch64Subtarget & STI,const AArch64RegisterBankInfo & RBI)441 AArch64InstructionSelector::AArch64InstructionSelector(
442     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
443     const AArch64RegisterBankInfo &RBI)
444     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
445       TRI(*STI.getRegisterInfo()), RBI(RBI),
446 #define GET_GLOBALISEL_PREDICATES_INIT
447 #include "AArch64GenGlobalISel.inc"
448 #undef GET_GLOBALISEL_PREDICATES_INIT
449 #define GET_GLOBALISEL_TEMPORARIES_INIT
450 #include "AArch64GenGlobalISel.inc"
451 #undef GET_GLOBALISEL_TEMPORARIES_INIT
452 {
453 }
454 
455 // FIXME: This should be target-independent, inferred from the types declared
456 // for each class in the bank.
457 static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty,const RegisterBank & RB,const RegisterBankInfo & RBI,bool GetAllRegSet=false)458 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
459                          const RegisterBankInfo &RBI,
460                          bool GetAllRegSet = false) {
461   if (RB.getID() == AArch64::GPRRegBankID) {
462     if (Ty.getSizeInBits() <= 32)
463       return GetAllRegSet ? &AArch64::GPR32allRegClass
464                           : &AArch64::GPR32RegClass;
465     if (Ty.getSizeInBits() == 64)
466       return GetAllRegSet ? &AArch64::GPR64allRegClass
467                           : &AArch64::GPR64RegClass;
468     return nullptr;
469   }
470 
471   if (RB.getID() == AArch64::FPRRegBankID) {
472     if (Ty.getSizeInBits() <= 16)
473       return &AArch64::FPR16RegClass;
474     if (Ty.getSizeInBits() == 32)
475       return &AArch64::FPR32RegClass;
476     if (Ty.getSizeInBits() == 64)
477       return &AArch64::FPR64RegClass;
478     if (Ty.getSizeInBits() == 128)
479       return &AArch64::FPR128RegClass;
480     return nullptr;
481   }
482 
483   return nullptr;
484 }
485 
486 /// Given a register bank, and size in bits, return the smallest register class
487 /// that can represent that combination.
488 static const TargetRegisterClass *
getMinClassForRegBank(const RegisterBank & RB,unsigned SizeInBits,bool GetAllRegSet=false)489 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
490                       bool GetAllRegSet = false) {
491   unsigned RegBankID = RB.getID();
492 
493   if (RegBankID == AArch64::GPRRegBankID) {
494     if (SizeInBits <= 32)
495       return GetAllRegSet ? &AArch64::GPR32allRegClass
496                           : &AArch64::GPR32RegClass;
497     if (SizeInBits == 64)
498       return GetAllRegSet ? &AArch64::GPR64allRegClass
499                           : &AArch64::GPR64RegClass;
500   }
501 
502   if (RegBankID == AArch64::FPRRegBankID) {
503     switch (SizeInBits) {
504     default:
505       return nullptr;
506     case 8:
507       return &AArch64::FPR8RegClass;
508     case 16:
509       return &AArch64::FPR16RegClass;
510     case 32:
511       return &AArch64::FPR32RegClass;
512     case 64:
513       return &AArch64::FPR64RegClass;
514     case 128:
515       return &AArch64::FPR128RegClass;
516     }
517   }
518 
519   return nullptr;
520 }
521 
522 /// Returns the correct subregister to use for a given register class.
getSubRegForClass(const TargetRegisterClass * RC,const TargetRegisterInfo & TRI,unsigned & SubReg)523 static bool getSubRegForClass(const TargetRegisterClass *RC,
524                               const TargetRegisterInfo &TRI, unsigned &SubReg) {
525   switch (TRI.getRegSizeInBits(*RC)) {
526   case 8:
527     SubReg = AArch64::bsub;
528     break;
529   case 16:
530     SubReg = AArch64::hsub;
531     break;
532   case 32:
533     if (RC != &AArch64::FPR32RegClass)
534       SubReg = AArch64::sub_32;
535     else
536       SubReg = AArch64::ssub;
537     break;
538   case 64:
539     SubReg = AArch64::dsub;
540     break;
541   default:
542     LLVM_DEBUG(
543         dbgs() << "Couldn't find appropriate subregister for register class.");
544     return false;
545   }
546 
547   return true;
548 }
549 
550 /// Returns the minimum size the given register bank can hold.
getMinSizeForRegBank(const RegisterBank & RB)551 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
552   switch (RB.getID()) {
553   case AArch64::GPRRegBankID:
554     return 32;
555   case AArch64::FPRRegBankID:
556     return 8;
557   default:
558     llvm_unreachable("Tried to get minimum size for unknown register bank.");
559   }
560 }
561 
getImmedFromMO(const MachineOperand & Root)562 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
563   auto &MI = *Root.getParent();
564   auto &MBB = *MI.getParent();
565   auto &MF = *MBB.getParent();
566   auto &MRI = MF.getRegInfo();
567   uint64_t Immed;
568   if (Root.isImm())
569     Immed = Root.getImm();
570   else if (Root.isCImm())
571     Immed = Root.getCImm()->getZExtValue();
572   else if (Root.isReg()) {
573     auto ValAndVReg =
574         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
575     if (!ValAndVReg)
576       return None;
577     Immed = ValAndVReg->Value;
578   } else
579     return None;
580   return Immed;
581 }
582 
583 /// Check whether \p I is a currently unsupported binary operation:
584 /// - it has an unsized type
585 /// - an operand is not a vreg
586 /// - all operands are not in the same bank
587 /// These are checks that should someday live in the verifier, but right now,
588 /// these are mostly limitations of the aarch64 selector.
unsupportedBinOp(const MachineInstr & I,const AArch64RegisterBankInfo & RBI,const MachineRegisterInfo & MRI,const AArch64RegisterInfo & TRI)589 static bool unsupportedBinOp(const MachineInstr &I,
590                              const AArch64RegisterBankInfo &RBI,
591                              const MachineRegisterInfo &MRI,
592                              const AArch64RegisterInfo &TRI) {
593   LLT Ty = MRI.getType(I.getOperand(0).getReg());
594   if (!Ty.isValid()) {
595     LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
596     return true;
597   }
598 
599   const RegisterBank *PrevOpBank = nullptr;
600   for (auto &MO : I.operands()) {
601     // FIXME: Support non-register operands.
602     if (!MO.isReg()) {
603       LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
604       return true;
605     }
606 
607     // FIXME: Can generic operations have physical registers operands? If
608     // so, this will need to be taught about that, and we'll need to get the
609     // bank out of the minimal class for the register.
610     // Either way, this needs to be documented (and possibly verified).
611     if (!Register::isVirtualRegister(MO.getReg())) {
612       LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
613       return true;
614     }
615 
616     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
617     if (!OpBank) {
618       LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
619       return true;
620     }
621 
622     if (PrevOpBank && OpBank != PrevOpBank) {
623       LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
624       return true;
625     }
626     PrevOpBank = OpBank;
627   }
628   return false;
629 }
630 
631 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
632 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
633 /// and of size \p OpSize.
634 /// \returns \p GenericOpc if the combination is unsupported.
selectBinaryOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)635 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
636                                unsigned OpSize) {
637   switch (RegBankID) {
638   case AArch64::GPRRegBankID:
639     if (OpSize == 32) {
640       switch (GenericOpc) {
641       case TargetOpcode::G_SHL:
642         return AArch64::LSLVWr;
643       case TargetOpcode::G_LSHR:
644         return AArch64::LSRVWr;
645       case TargetOpcode::G_ASHR:
646         return AArch64::ASRVWr;
647       default:
648         return GenericOpc;
649       }
650     } else if (OpSize == 64) {
651       switch (GenericOpc) {
652       case TargetOpcode::G_PTR_ADD:
653         return AArch64::ADDXrr;
654       case TargetOpcode::G_SHL:
655         return AArch64::LSLVXr;
656       case TargetOpcode::G_LSHR:
657         return AArch64::LSRVXr;
658       case TargetOpcode::G_ASHR:
659         return AArch64::ASRVXr;
660       default:
661         return GenericOpc;
662       }
663     }
664     break;
665   case AArch64::FPRRegBankID:
666     switch (OpSize) {
667     case 32:
668       switch (GenericOpc) {
669       case TargetOpcode::G_FADD:
670         return AArch64::FADDSrr;
671       case TargetOpcode::G_FSUB:
672         return AArch64::FSUBSrr;
673       case TargetOpcode::G_FMUL:
674         return AArch64::FMULSrr;
675       case TargetOpcode::G_FDIV:
676         return AArch64::FDIVSrr;
677       default:
678         return GenericOpc;
679       }
680     case 64:
681       switch (GenericOpc) {
682       case TargetOpcode::G_FADD:
683         return AArch64::FADDDrr;
684       case TargetOpcode::G_FSUB:
685         return AArch64::FSUBDrr;
686       case TargetOpcode::G_FMUL:
687         return AArch64::FMULDrr;
688       case TargetOpcode::G_FDIV:
689         return AArch64::FDIVDrr;
690       case TargetOpcode::G_OR:
691         return AArch64::ORRv8i8;
692       default:
693         return GenericOpc;
694       }
695     }
696     break;
697   }
698   return GenericOpc;
699 }
700 
701 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
702 /// appropriate for the (value) register bank \p RegBankID and of memory access
703 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
704 /// addressing mode (e.g., LDRXui).
705 /// \returns \p GenericOpc if the combination is unsupported.
selectLoadStoreUIOp(unsigned GenericOpc,unsigned RegBankID,unsigned OpSize)706 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
707                                     unsigned OpSize) {
708   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
709   switch (RegBankID) {
710   case AArch64::GPRRegBankID:
711     switch (OpSize) {
712     case 8:
713       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
714     case 16:
715       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
716     case 32:
717       return isStore ? AArch64::STRWui : AArch64::LDRWui;
718     case 64:
719       return isStore ? AArch64::STRXui : AArch64::LDRXui;
720     }
721     break;
722   case AArch64::FPRRegBankID:
723     switch (OpSize) {
724     case 8:
725       return isStore ? AArch64::STRBui : AArch64::LDRBui;
726     case 16:
727       return isStore ? AArch64::STRHui : AArch64::LDRHui;
728     case 32:
729       return isStore ? AArch64::STRSui : AArch64::LDRSui;
730     case 64:
731       return isStore ? AArch64::STRDui : AArch64::LDRDui;
732     }
733     break;
734   }
735   return GenericOpc;
736 }
737 
738 #ifndef NDEBUG
739 /// Helper function that verifies that we have a valid copy at the end of
740 /// selectCopy. Verifies that the source and dest have the expected sizes and
741 /// then returns true.
isValidCopy(const MachineInstr & I,const RegisterBank & DstBank,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)742 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
743                         const MachineRegisterInfo &MRI,
744                         const TargetRegisterInfo &TRI,
745                         const RegisterBankInfo &RBI) {
746   const Register DstReg = I.getOperand(0).getReg();
747   const Register SrcReg = I.getOperand(1).getReg();
748   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
749   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
750 
751   // Make sure the size of the source and dest line up.
752   assert(
753       (DstSize == SrcSize ||
754        // Copies are a mean to setup initial types, the number of
755        // bits may not exactly match.
756        (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
757        // Copies are a mean to copy bits around, as long as we are
758        // on the same register class, that's fine. Otherwise, that
759        // means we need some SUBREG_TO_REG or AND & co.
760        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
761       "Copy with different width?!");
762 
763   // Check the size of the destination.
764   assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
765          "GPRs cannot get more than 64-bit width values");
766 
767   return true;
768 }
769 #endif
770 
771 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
772 /// to \p *To.
773 ///
774 /// E.g "To = COPY SrcReg:SubReg"
copySubReg(MachineInstr & I,MachineRegisterInfo & MRI,const RegisterBankInfo & RBI,Register SrcReg,const TargetRegisterClass * To,unsigned SubReg)775 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
776                        const RegisterBankInfo &RBI, Register SrcReg,
777                        const TargetRegisterClass *To, unsigned SubReg) {
778   assert(SrcReg.isValid() && "Expected a valid source register?");
779   assert(To && "Destination register class cannot be null");
780   assert(SubReg && "Expected a valid subregister");
781 
782   MachineIRBuilder MIB(I);
783   auto SubRegCopy =
784       MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
785   MachineOperand &RegOp = I.getOperand(1);
786   RegOp.setReg(SubRegCopy.getReg(0));
787 
788   // It's possible that the destination register won't be constrained. Make
789   // sure that happens.
790   if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
791     RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
792 
793   return true;
794 }
795 
796 /// Helper function to get the source and destination register classes for a
797 /// copy. Returns a std::pair containing the source register class for the
798 /// copy, and the destination register class for the copy. If a register class
799 /// cannot be determined, then it will be nullptr.
800 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)801 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
802                      MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
803                      const RegisterBankInfo &RBI) {
804   Register DstReg = I.getOperand(0).getReg();
805   Register SrcReg = I.getOperand(1).getReg();
806   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
807   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
808   unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
809   unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
810 
811   // Special casing for cross-bank copies of s1s. We can technically represent
812   // a 1-bit value with any size of register. The minimum size for a GPR is 32
813   // bits. So, we need to put the FPR on 32 bits as well.
814   //
815   // FIXME: I'm not sure if this case holds true outside of copies. If it does,
816   // then we can pull it into the helpers that get the appropriate class for a
817   // register bank. Or make a new helper that carries along some constraint
818   // information.
819   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
820     SrcSize = DstSize = 32;
821 
822   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
823           getMinClassForRegBank(DstRegBank, DstSize, true)};
824 }
825 
selectCopy(MachineInstr & I,const TargetInstrInfo & TII,MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const RegisterBankInfo & RBI)826 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
827                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
828                        const RegisterBankInfo &RBI) {
829   Register DstReg = I.getOperand(0).getReg();
830   Register SrcReg = I.getOperand(1).getReg();
831   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
832   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
833 
834   // Find the correct register classes for the source and destination registers.
835   const TargetRegisterClass *SrcRC;
836   const TargetRegisterClass *DstRC;
837   std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
838 
839   if (!DstRC) {
840     LLVM_DEBUG(dbgs() << "Unexpected dest size "
841                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
842     return false;
843   }
844 
845   // A couple helpers below, for making sure that the copy we produce is valid.
846 
847   // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
848   // to verify that the src and dst are the same size, since that's handled by
849   // the SUBREG_TO_REG.
850   bool KnownValid = false;
851 
852   // Returns true, or asserts if something we don't expect happens. Instead of
853   // returning true, we return isValidCopy() to ensure that we verify the
854   // result.
855   auto CheckCopy = [&]() {
856     // If we have a bitcast or something, we can't have physical registers.
857     assert((I.isCopy() ||
858             (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
859              !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
860            "No phys reg on generic operator!");
861     bool ValidCopy = true;
862 #ifndef NDEBUG
863     ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
864     assert(ValidCopy && "Invalid copy.");
865 #endif
866     return ValidCopy;
867   };
868 
869   // Is this a copy? If so, then we may need to insert a subregister copy.
870   if (I.isCopy()) {
871     // Yes. Check if there's anything to fix up.
872     if (!SrcRC) {
873       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
874       return false;
875     }
876 
877     unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
878     unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
879     unsigned SubReg;
880 
881     // If the source bank doesn't support a subregister copy small enough,
882     // then we first need to copy to the destination bank.
883     if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
884       const TargetRegisterClass *DstTempRC =
885           getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
886       getSubRegForClass(DstRC, TRI, SubReg);
887 
888       MachineIRBuilder MIB(I);
889       auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
890       copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
891     } else if (SrcSize > DstSize) {
892       // If the source register is bigger than the destination we need to
893       // perform a subregister copy.
894       const TargetRegisterClass *SubRegRC =
895           getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
896       getSubRegForClass(SubRegRC, TRI, SubReg);
897       copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
898     } else if (DstSize > SrcSize) {
899       // If the destination register is bigger than the source we need to do
900       // a promotion using SUBREG_TO_REG.
901       const TargetRegisterClass *PromotionRC =
902           getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
903       getSubRegForClass(SrcRC, TRI, SubReg);
904 
905       Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
906       BuildMI(*I.getParent(), I, I.getDebugLoc(),
907               TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
908           .addImm(0)
909           .addUse(SrcReg)
910           .addImm(SubReg);
911       MachineOperand &RegOp = I.getOperand(1);
912       RegOp.setReg(PromoteReg);
913 
914       // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
915       KnownValid = true;
916     }
917 
918     // If the destination is a physical register, then there's nothing to
919     // change, so we're done.
920     if (Register::isPhysicalRegister(DstReg))
921       return CheckCopy();
922   }
923 
924   // No need to constrain SrcReg. It will get constrained when we hit another
925   // of its use or its defs. Copies do not have constraints.
926   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
927     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
928                       << " operand\n");
929     return false;
930   }
931   I.setDesc(TII.get(AArch64::COPY));
932   return CheckCopy();
933 }
934 
selectFPConvOpc(unsigned GenericOpc,LLT DstTy,LLT SrcTy)935 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
936   if (!DstTy.isScalar() || !SrcTy.isScalar())
937     return GenericOpc;
938 
939   const unsigned DstSize = DstTy.getSizeInBits();
940   const unsigned SrcSize = SrcTy.getSizeInBits();
941 
942   switch (DstSize) {
943   case 32:
944     switch (SrcSize) {
945     case 32:
946       switch (GenericOpc) {
947       case TargetOpcode::G_SITOFP:
948         return AArch64::SCVTFUWSri;
949       case TargetOpcode::G_UITOFP:
950         return AArch64::UCVTFUWSri;
951       case TargetOpcode::G_FPTOSI:
952         return AArch64::FCVTZSUWSr;
953       case TargetOpcode::G_FPTOUI:
954         return AArch64::FCVTZUUWSr;
955       default:
956         return GenericOpc;
957       }
958     case 64:
959       switch (GenericOpc) {
960       case TargetOpcode::G_SITOFP:
961         return AArch64::SCVTFUXSri;
962       case TargetOpcode::G_UITOFP:
963         return AArch64::UCVTFUXSri;
964       case TargetOpcode::G_FPTOSI:
965         return AArch64::FCVTZSUWDr;
966       case TargetOpcode::G_FPTOUI:
967         return AArch64::FCVTZUUWDr;
968       default:
969         return GenericOpc;
970       }
971     default:
972       return GenericOpc;
973     }
974   case 64:
975     switch (SrcSize) {
976     case 32:
977       switch (GenericOpc) {
978       case TargetOpcode::G_SITOFP:
979         return AArch64::SCVTFUWDri;
980       case TargetOpcode::G_UITOFP:
981         return AArch64::UCVTFUWDri;
982       case TargetOpcode::G_FPTOSI:
983         return AArch64::FCVTZSUXSr;
984       case TargetOpcode::G_FPTOUI:
985         return AArch64::FCVTZUUXSr;
986       default:
987         return GenericOpc;
988       }
989     case 64:
990       switch (GenericOpc) {
991       case TargetOpcode::G_SITOFP:
992         return AArch64::SCVTFUXDri;
993       case TargetOpcode::G_UITOFP:
994         return AArch64::UCVTFUXDri;
995       case TargetOpcode::G_FPTOSI:
996         return AArch64::FCVTZSUXDr;
997       case TargetOpcode::G_FPTOUI:
998         return AArch64::FCVTZUUXDr;
999       default:
1000         return GenericOpc;
1001       }
1002     default:
1003       return GenericOpc;
1004     }
1005   default:
1006     return GenericOpc;
1007   };
1008   return GenericOpc;
1009 }
1010 
1011 MachineInstr *
emitSelect(Register Dst,Register True,Register False,AArch64CC::CondCode CC,MachineIRBuilder & MIB) const1012 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1013                                        Register False, AArch64CC::CondCode CC,
1014                                        MachineIRBuilder &MIB) const {
1015   MachineRegisterInfo &MRI = *MIB.getMRI();
1016   assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1017              RBI.getRegBank(True, MRI, TRI)->getID() &&
1018          "Expected both select operands to have the same regbank?");
1019   LLT Ty = MRI.getType(True);
1020   if (Ty.isVector())
1021     return nullptr;
1022   const unsigned Size = Ty.getSizeInBits();
1023   assert((Size == 32 || Size == 64) &&
1024          "Expected 32 bit or 64 bit select only?");
1025   const bool Is32Bit = Size == 32;
1026   if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1027     unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1028     auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1029     constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1030     return &*FCSel;
1031   }
1032 
1033   // By default, we'll try and emit a CSEL.
1034   unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1035   bool Optimized = false;
1036   auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1037                                  &Optimized](Register &Reg, Register &OtherReg,
1038                                              bool Invert) {
1039     if (Optimized)
1040       return false;
1041 
1042     // Attempt to fold:
1043     //
1044     // %sub = G_SUB 0, %x
1045     // %select = G_SELECT cc, %reg, %sub
1046     //
1047     // Into:
1048     // %select = CSNEG %reg, %x, cc
1049     Register MatchReg;
1050     if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1051       Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1052       Reg = MatchReg;
1053       if (Invert) {
1054         CC = AArch64CC::getInvertedCondCode(CC);
1055         std::swap(Reg, OtherReg);
1056       }
1057       return true;
1058     }
1059 
1060     // Attempt to fold:
1061     //
1062     // %xor = G_XOR %x, -1
1063     // %select = G_SELECT cc, %reg, %xor
1064     //
1065     // Into:
1066     // %select = CSINV %reg, %x, cc
1067     if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1068       Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1069       Reg = MatchReg;
1070       if (Invert) {
1071         CC = AArch64CC::getInvertedCondCode(CC);
1072         std::swap(Reg, OtherReg);
1073       }
1074       return true;
1075     }
1076 
1077     // Attempt to fold:
1078     //
1079     // %add = G_ADD %x, 1
1080     // %select = G_SELECT cc, %reg, %add
1081     //
1082     // Into:
1083     // %select = CSINC %reg, %x, cc
1084     if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
1085       Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1086       Reg = MatchReg;
1087       if (Invert) {
1088         CC = AArch64CC::getInvertedCondCode(CC);
1089         std::swap(Reg, OtherReg);
1090       }
1091       return true;
1092     }
1093 
1094     return false;
1095   };
1096 
1097   // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1098   // true/false values are constants.
1099   // FIXME: All of these patterns already exist in tablegen. We should be
1100   // able to import these.
1101   auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1102                           &Optimized]() {
1103     if (Optimized)
1104       return false;
1105     auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
1106     auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
1107     if (!TrueCst && !FalseCst)
1108       return false;
1109 
1110     Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1111     if (TrueCst && FalseCst) {
1112       auto T = TrueCst->Value;
1113       auto F = FalseCst->Value;
1114 
1115       if (T == 0 && F == 1) {
1116         // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1117         Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1118         True = ZReg;
1119         False = ZReg;
1120         return true;
1121       }
1122 
1123       if (T == 0 && F == -1) {
1124         // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1125         Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1126         True = ZReg;
1127         False = ZReg;
1128         return true;
1129       }
1130     }
1131 
1132     if (TrueCst) {
1133       auto T = TrueCst->Value;
1134       if (T == 1) {
1135         // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1136         Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1137         True = False;
1138         False = ZReg;
1139         CC = AArch64CC::getInvertedCondCode(CC);
1140         return true;
1141       }
1142 
1143       if (T == -1) {
1144         // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1145         Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1146         True = False;
1147         False = ZReg;
1148         CC = AArch64CC::getInvertedCondCode(CC);
1149         return true;
1150       }
1151     }
1152 
1153     if (FalseCst) {
1154       auto F = FalseCst->Value;
1155       if (F == 1) {
1156         // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1157         Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1158         False = ZReg;
1159         return true;
1160       }
1161 
1162       if (F == -1) {
1163         // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1164         Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1165         False = ZReg;
1166         return true;
1167       }
1168     }
1169     return false;
1170   };
1171 
1172   Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1173   Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1174   Optimized |= TryOptSelectCst();
1175   auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1176   constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1177   return &*SelectInst;
1178 }
1179 
changeICMPPredToAArch64CC(CmpInst::Predicate P)1180 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1181   switch (P) {
1182   default:
1183     llvm_unreachable("Unknown condition code!");
1184   case CmpInst::ICMP_NE:
1185     return AArch64CC::NE;
1186   case CmpInst::ICMP_EQ:
1187     return AArch64CC::EQ;
1188   case CmpInst::ICMP_SGT:
1189     return AArch64CC::GT;
1190   case CmpInst::ICMP_SGE:
1191     return AArch64CC::GE;
1192   case CmpInst::ICMP_SLT:
1193     return AArch64CC::LT;
1194   case CmpInst::ICMP_SLE:
1195     return AArch64CC::LE;
1196   case CmpInst::ICMP_UGT:
1197     return AArch64CC::HI;
1198   case CmpInst::ICMP_UGE:
1199     return AArch64CC::HS;
1200   case CmpInst::ICMP_ULT:
1201     return AArch64CC::LO;
1202   case CmpInst::ICMP_ULE:
1203     return AArch64CC::LS;
1204   }
1205 }
1206 
changeFCMPPredToAArch64CC(CmpInst::Predicate P,AArch64CC::CondCode & CondCode,AArch64CC::CondCode & CondCode2)1207 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1208                                       AArch64CC::CondCode &CondCode,
1209                                       AArch64CC::CondCode &CondCode2) {
1210   CondCode2 = AArch64CC::AL;
1211   switch (P) {
1212   default:
1213     llvm_unreachable("Unknown FP condition!");
1214   case CmpInst::FCMP_OEQ:
1215     CondCode = AArch64CC::EQ;
1216     break;
1217   case CmpInst::FCMP_OGT:
1218     CondCode = AArch64CC::GT;
1219     break;
1220   case CmpInst::FCMP_OGE:
1221     CondCode = AArch64CC::GE;
1222     break;
1223   case CmpInst::FCMP_OLT:
1224     CondCode = AArch64CC::MI;
1225     break;
1226   case CmpInst::FCMP_OLE:
1227     CondCode = AArch64CC::LS;
1228     break;
1229   case CmpInst::FCMP_ONE:
1230     CondCode = AArch64CC::MI;
1231     CondCode2 = AArch64CC::GT;
1232     break;
1233   case CmpInst::FCMP_ORD:
1234     CondCode = AArch64CC::VC;
1235     break;
1236   case CmpInst::FCMP_UNO:
1237     CondCode = AArch64CC::VS;
1238     break;
1239   case CmpInst::FCMP_UEQ:
1240     CondCode = AArch64CC::EQ;
1241     CondCode2 = AArch64CC::VS;
1242     break;
1243   case CmpInst::FCMP_UGT:
1244     CondCode = AArch64CC::HI;
1245     break;
1246   case CmpInst::FCMP_UGE:
1247     CondCode = AArch64CC::PL;
1248     break;
1249   case CmpInst::FCMP_ULT:
1250     CondCode = AArch64CC::LT;
1251     break;
1252   case CmpInst::FCMP_ULE:
1253     CondCode = AArch64CC::LE;
1254     break;
1255   case CmpInst::FCMP_UNE:
1256     CondCode = AArch64CC::NE;
1257     break;
1258   }
1259 }
1260 
1261 /// Return a register which can be used as a bit to test in a TB(N)Z.
getTestBitReg(Register Reg,uint64_t & Bit,bool & Invert,MachineRegisterInfo & MRI)1262 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1263                               MachineRegisterInfo &MRI) {
1264   assert(Reg.isValid() && "Expected valid register!");
1265   while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1266     unsigned Opc = MI->getOpcode();
1267 
1268     if (!MI->getOperand(0).isReg() ||
1269         !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1270       break;
1271 
1272     // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1273     //
1274     // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1275     // on the truncated x is the same as the bit number on x.
1276     if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1277         Opc == TargetOpcode::G_TRUNC) {
1278       Register NextReg = MI->getOperand(1).getReg();
1279       // Did we find something worth folding?
1280       if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1281         break;
1282 
1283       // NextReg is worth folding. Keep looking.
1284       Reg = NextReg;
1285       continue;
1286     }
1287 
1288     // Attempt to find a suitable operation with a constant on one side.
1289     Optional<uint64_t> C;
1290     Register TestReg;
1291     switch (Opc) {
1292     default:
1293       break;
1294     case TargetOpcode::G_AND:
1295     case TargetOpcode::G_XOR: {
1296       TestReg = MI->getOperand(1).getReg();
1297       Register ConstantReg = MI->getOperand(2).getReg();
1298       auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1299       if (!VRegAndVal) {
1300         // AND commutes, check the other side for a constant.
1301         // FIXME: Can we canonicalize the constant so that it's always on the
1302         // same side at some point earlier?
1303         std::swap(ConstantReg, TestReg);
1304         VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1305       }
1306       if (VRegAndVal)
1307         C = VRegAndVal->Value;
1308       break;
1309     }
1310     case TargetOpcode::G_ASHR:
1311     case TargetOpcode::G_LSHR:
1312     case TargetOpcode::G_SHL: {
1313       TestReg = MI->getOperand(1).getReg();
1314       auto VRegAndVal =
1315           getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1316       if (VRegAndVal)
1317         C = VRegAndVal->Value;
1318       break;
1319     }
1320     }
1321 
1322     // Didn't find a constant or viable register. Bail out of the loop.
1323     if (!C || !TestReg.isValid())
1324       break;
1325 
1326     // We found a suitable instruction with a constant. Check to see if we can
1327     // walk through the instruction.
1328     Register NextReg;
1329     unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1330     switch (Opc) {
1331     default:
1332       break;
1333     case TargetOpcode::G_AND:
1334       // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1335       if ((*C >> Bit) & 1)
1336         NextReg = TestReg;
1337       break;
1338     case TargetOpcode::G_SHL:
1339       // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1340       // the type of the register.
1341       if (*C <= Bit && (Bit - *C) < TestRegSize) {
1342         NextReg = TestReg;
1343         Bit = Bit - *C;
1344       }
1345       break;
1346     case TargetOpcode::G_ASHR:
1347       // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1348       // in x
1349       NextReg = TestReg;
1350       Bit = Bit + *C;
1351       if (Bit >= TestRegSize)
1352         Bit = TestRegSize - 1;
1353       break;
1354     case TargetOpcode::G_LSHR:
1355       // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1356       if ((Bit + *C) < TestRegSize) {
1357         NextReg = TestReg;
1358         Bit = Bit + *C;
1359       }
1360       break;
1361     case TargetOpcode::G_XOR:
1362       // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1363       // appropriate.
1364       //
1365       // e.g. If x' = xor x, c, and the b-th bit is set in c then
1366       //
1367       // tbz x', b -> tbnz x, b
1368       //
1369       // Because x' only has the b-th bit set if x does not.
1370       if ((*C >> Bit) & 1)
1371         Invert = !Invert;
1372       NextReg = TestReg;
1373       break;
1374     }
1375 
1376     // Check if we found anything worth folding.
1377     if (!NextReg.isValid())
1378       return Reg;
1379     Reg = NextReg;
1380   }
1381 
1382   return Reg;
1383 }
1384 
emitTestBit(Register TestReg,uint64_t Bit,bool IsNegative,MachineBasicBlock * DstMBB,MachineIRBuilder & MIB) const1385 MachineInstr *AArch64InstructionSelector::emitTestBit(
1386     Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1387     MachineIRBuilder &MIB) const {
1388   assert(TestReg.isValid());
1389   assert(ProduceNonFlagSettingCondBr &&
1390          "Cannot emit TB(N)Z with speculation tracking!");
1391   MachineRegisterInfo &MRI = *MIB.getMRI();
1392 
1393   // Attempt to optimize the test bit by walking over instructions.
1394   TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1395   LLT Ty = MRI.getType(TestReg);
1396   unsigned Size = Ty.getSizeInBits();
1397   assert(!Ty.isVector() && "Expected a scalar!");
1398   assert(Bit < 64 && "Bit is too large!");
1399 
1400   // When the test register is a 64-bit register, we have to narrow to make
1401   // TBNZW work.
1402   bool UseWReg = Bit < 32;
1403   unsigned NecessarySize = UseWReg ? 32 : 64;
1404   if (Size != NecessarySize)
1405     TestReg = moveScalarRegClass(
1406         TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1407         MIB);
1408 
1409   static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1410                                           {AArch64::TBZW, AArch64::TBNZW}};
1411   unsigned Opc = OpcTable[UseWReg][IsNegative];
1412   auto TestBitMI =
1413       MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1414   constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1415   return &*TestBitMI;
1416 }
1417 
tryOptAndIntoCompareBranch(MachineInstr & AndInst,bool Invert,MachineBasicBlock * DstMBB,MachineIRBuilder & MIB) const1418 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1419     MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1420     MachineIRBuilder &MIB) const {
1421   assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1422   // Given something like this:
1423   //
1424   //  %x = ...Something...
1425   //  %one = G_CONSTANT i64 1
1426   //  %zero = G_CONSTANT i64 0
1427   //  %and = G_AND %x, %one
1428   //  %cmp = G_ICMP intpred(ne), %and, %zero
1429   //  %cmp_trunc = G_TRUNC %cmp
1430   //  G_BRCOND %cmp_trunc, %bb.3
1431   //
1432   // We want to try and fold the AND into the G_BRCOND and produce either a
1433   // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1434   //
1435   // In this case, we'd get
1436   //
1437   // TBNZ %x %bb.3
1438   //
1439 
1440   // Check if the AND has a constant on its RHS which we can use as a mask.
1441   // If it's a power of 2, then it's the same as checking a specific bit.
1442   // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1443   auto MaybeBit = getConstantVRegValWithLookThrough(
1444       AndInst.getOperand(2).getReg(), *MIB.getMRI());
1445   if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
1446     return false;
1447 
1448   uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
1449   Register TestReg = AndInst.getOperand(1).getReg();
1450 
1451   // Emit a TB(N)Z.
1452   emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1453   return true;
1454 }
1455 
emitCBZ(Register CompareReg,bool IsNegative,MachineBasicBlock * DestMBB,MachineIRBuilder & MIB) const1456 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1457                                                   bool IsNegative,
1458                                                   MachineBasicBlock *DestMBB,
1459                                                   MachineIRBuilder &MIB) const {
1460   assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1461   MachineRegisterInfo &MRI = *MIB.getMRI();
1462   assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1463              AArch64::GPRRegBankID &&
1464          "Expected GPRs only?");
1465   auto Ty = MRI.getType(CompareReg);
1466   unsigned Width = Ty.getSizeInBits();
1467   assert(!Ty.isVector() && "Expected scalar only?");
1468   assert(Width <= 64 && "Expected width to be at most 64?");
1469   static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1470                                           {AArch64::CBNZW, AArch64::CBNZX}};
1471   unsigned Opc = OpcTable[IsNegative][Width == 64];
1472   auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1473   constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1474   return &*BranchMI;
1475 }
1476 
selectCompareBranchFedByFCmp(MachineInstr & I,MachineInstr & FCmp,MachineIRBuilder & MIB) const1477 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1478     MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1479   assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1480   assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1481   // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1482   // totally clean.  Some of them require two branches to implement.
1483   emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB);
1484   AArch64CC::CondCode CC1, CC2;
1485   changeFCMPPredToAArch64CC(
1486       static_cast<CmpInst::Predicate>(FCmp.getOperand(1).getPredicate()), CC1,
1487       CC2);
1488   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1489   MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1490   if (CC2 != AArch64CC::AL)
1491     MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1492   I.eraseFromParent();
1493   return true;
1494 }
1495 
tryOptCompareBranchFedByICmp(MachineInstr & I,MachineInstr & ICmp,MachineIRBuilder & MIB) const1496 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1497     MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1498   assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1499   assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1500   // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1501   //
1502   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1503   // instructions will not be produced, as they are conditional branch
1504   // instructions that do not set flags.
1505   if (!ProduceNonFlagSettingCondBr)
1506     return false;
1507 
1508   MachineRegisterInfo &MRI = *MIB.getMRI();
1509   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1510   auto Pred =
1511       static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1512   Register LHS = ICmp.getOperand(2).getReg();
1513   Register RHS = ICmp.getOperand(3).getReg();
1514 
1515   // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1516   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1517   MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1518 
1519   // When we can emit a TB(N)Z, prefer that.
1520   //
1521   // Handle non-commutative condition codes first.
1522   // Note that we don't want to do this when we have a G_AND because it can
1523   // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1524   if (VRegAndVal && !AndInst) {
1525     int64_t C = VRegAndVal->Value;
1526 
1527     // When we have a greater-than comparison, we can just test if the msb is
1528     // zero.
1529     if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1530       uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1531       emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1532       I.eraseFromParent();
1533       return true;
1534     }
1535 
1536     // When we have a less than comparison, we can just test if the msb is not
1537     // zero.
1538     if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1539       uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1540       emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1541       I.eraseFromParent();
1542       return true;
1543     }
1544   }
1545 
1546   // Attempt to handle commutative condition codes. Right now, that's only
1547   // eq/ne.
1548   if (ICmpInst::isEquality(Pred)) {
1549     if (!VRegAndVal) {
1550       std::swap(RHS, LHS);
1551       VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1552       AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1553     }
1554 
1555     if (VRegAndVal && VRegAndVal->Value == 0) {
1556       // If there's a G_AND feeding into this branch, try to fold it away by
1557       // emitting a TB(N)Z instead.
1558       //
1559       // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1560       // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1561       // would be redundant.
1562       if (AndInst &&
1563           tryOptAndIntoCompareBranch(
1564               *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1565         I.eraseFromParent();
1566         return true;
1567       }
1568 
1569       // Otherwise, try to emit a CB(N)Z instead.
1570       auto LHSTy = MRI.getType(LHS);
1571       if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1572         emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1573         I.eraseFromParent();
1574         return true;
1575       }
1576     }
1577   }
1578 
1579   return false;
1580 }
1581 
selectCompareBranchFedByICmp(MachineInstr & I,MachineInstr & ICmp,MachineIRBuilder & MIB) const1582 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1583     MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1584   assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1585   assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1586   if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1587     return true;
1588 
1589   // Couldn't optimize. Emit a compare + a Bcc.
1590   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1591   auto PredOp = ICmp.getOperand(1);
1592   emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1593   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1594       static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1595   MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1596   I.eraseFromParent();
1597   return true;
1598 }
1599 
selectCompareBranch(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1600 bool AArch64InstructionSelector::selectCompareBranch(
1601     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1602   Register CondReg = I.getOperand(0).getReg();
1603   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1604   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1605     CondReg = CCMI->getOperand(1).getReg();
1606     CCMI = MRI.getVRegDef(CondReg);
1607   }
1608 
1609   // Try to select the G_BRCOND using whatever is feeding the condition if
1610   // possible.
1611   MachineIRBuilder MIB(I);
1612   unsigned CCMIOpc = CCMI->getOpcode();
1613   if (CCMIOpc == TargetOpcode::G_FCMP)
1614     return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1615   if (CCMIOpc == TargetOpcode::G_ICMP)
1616     return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1617 
1618   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1619   // instructions will not be produced, as they are conditional branch
1620   // instructions that do not set flags.
1621   if (ProduceNonFlagSettingCondBr) {
1622     emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1623                 I.getOperand(1).getMBB(), MIB);
1624     I.eraseFromParent();
1625     return true;
1626   }
1627 
1628   // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1629   auto TstMI =
1630       MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1631   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1632   auto Bcc = MIB.buildInstr(AArch64::Bcc)
1633                  .addImm(AArch64CC::EQ)
1634                  .addMBB(I.getOperand(1).getMBB());
1635   I.eraseFromParent();
1636   return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1637 }
1638 
1639 /// Returns the element immediate value of a vector shift operand if found.
1640 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
getVectorShiftImm(Register Reg,MachineRegisterInfo & MRI)1641 static Optional<int64_t> getVectorShiftImm(Register Reg,
1642                                            MachineRegisterInfo &MRI) {
1643   assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1644   MachineInstr *OpMI = MRI.getVRegDef(Reg);
1645   assert(OpMI && "Expected to find a vreg def for vector shift operand");
1646   if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1647     return None;
1648 
1649   // Check all operands are identical immediates.
1650   int64_t ImmVal = 0;
1651   for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1652     auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1653     if (!VRegAndVal)
1654       return None;
1655 
1656     if (Idx == 1)
1657       ImmVal = VRegAndVal->Value;
1658     if (ImmVal != VRegAndVal->Value)
1659       return None;
1660   }
1661 
1662   return ImmVal;
1663 }
1664 
1665 /// Matches and returns the shift immediate value for a SHL instruction given
1666 /// a shift operand.
getVectorSHLImm(LLT SrcTy,Register Reg,MachineRegisterInfo & MRI)1667 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1668   Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1669   if (!ShiftImm)
1670     return None;
1671   // Check the immediate is in range for a SHL.
1672   int64_t Imm = *ShiftImm;
1673   if (Imm < 0)
1674     return None;
1675   switch (SrcTy.getElementType().getSizeInBits()) {
1676   default:
1677     LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1678     return None;
1679   case 8:
1680     if (Imm > 7)
1681       return None;
1682     break;
1683   case 16:
1684     if (Imm > 15)
1685       return None;
1686     break;
1687   case 32:
1688     if (Imm > 31)
1689       return None;
1690     break;
1691   case 64:
1692     if (Imm > 63)
1693       return None;
1694     break;
1695   }
1696   return Imm;
1697 }
1698 
selectVectorSHL(MachineInstr & I,MachineRegisterInfo & MRI) const1699 bool AArch64InstructionSelector::selectVectorSHL(
1700     MachineInstr &I, MachineRegisterInfo &MRI) const {
1701   assert(I.getOpcode() == TargetOpcode::G_SHL);
1702   Register DstReg = I.getOperand(0).getReg();
1703   const LLT Ty = MRI.getType(DstReg);
1704   Register Src1Reg = I.getOperand(1).getReg();
1705   Register Src2Reg = I.getOperand(2).getReg();
1706 
1707   if (!Ty.isVector())
1708     return false;
1709 
1710   // Check if we have a vector of constants on RHS that we can select as the
1711   // immediate form.
1712   Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1713 
1714   unsigned Opc = 0;
1715   if (Ty == LLT::vector(2, 64)) {
1716     Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1717   } else if (Ty == LLT::vector(4, 32)) {
1718     Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1719   } else if (Ty == LLT::vector(2, 32)) {
1720     Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1721   } else if (Ty == LLT::vector(4, 16)) {
1722     Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1723   } else if (Ty == LLT::vector(8, 16)) {
1724     Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1725   } else if (Ty == LLT::vector(16, 8)) {
1726     Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1727   } else if (Ty == LLT::vector(8, 8)) {
1728     Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1729   } else {
1730     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1731     return false;
1732   }
1733 
1734   MachineIRBuilder MIB(I);
1735   auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1736   if (ImmVal)
1737     Shl.addImm(*ImmVal);
1738   else
1739     Shl.addUse(Src2Reg);
1740   constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1741   I.eraseFromParent();
1742   return true;
1743 }
1744 
selectVectorAshrLshr(MachineInstr & I,MachineRegisterInfo & MRI) const1745 bool AArch64InstructionSelector::selectVectorAshrLshr(
1746     MachineInstr &I, MachineRegisterInfo &MRI) const {
1747   assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1748          I.getOpcode() == TargetOpcode::G_LSHR);
1749   Register DstReg = I.getOperand(0).getReg();
1750   const LLT Ty = MRI.getType(DstReg);
1751   Register Src1Reg = I.getOperand(1).getReg();
1752   Register Src2Reg = I.getOperand(2).getReg();
1753 
1754   if (!Ty.isVector())
1755     return false;
1756 
1757   bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1758 
1759   // We expect the immediate case to be lowered in the PostLegalCombiner to
1760   // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1761 
1762   // There is not a shift right register instruction, but the shift left
1763   // register instruction takes a signed value, where negative numbers specify a
1764   // right shift.
1765 
1766   unsigned Opc = 0;
1767   unsigned NegOpc = 0;
1768   const TargetRegisterClass *RC =
1769       getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1770   if (Ty == LLT::vector(2, 64)) {
1771     Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1772     NegOpc = AArch64::NEGv2i64;
1773   } else if (Ty == LLT::vector(4, 32)) {
1774     Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1775     NegOpc = AArch64::NEGv4i32;
1776   } else if (Ty == LLT::vector(2, 32)) {
1777     Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1778     NegOpc = AArch64::NEGv2i32;
1779   } else if (Ty == LLT::vector(4, 16)) {
1780     Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1781     NegOpc = AArch64::NEGv4i16;
1782   } else if (Ty == LLT::vector(8, 16)) {
1783     Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1784     NegOpc = AArch64::NEGv8i16;
1785   } else if (Ty == LLT::vector(16, 8)) {
1786     Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1787     NegOpc = AArch64::NEGv8i16;
1788   } else if (Ty == LLT::vector(8, 8)) {
1789     Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1790     NegOpc = AArch64::NEGv8i8;
1791   } else {
1792     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1793     return false;
1794   }
1795 
1796   MachineIRBuilder MIB(I);
1797   auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1798   constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1799   auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1800   constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1801   I.eraseFromParent();
1802   return true;
1803 }
1804 
selectVaStartAAPCS(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1805 bool AArch64InstructionSelector::selectVaStartAAPCS(
1806     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1807   return false;
1808 }
1809 
selectVaStartDarwin(MachineInstr & I,MachineFunction & MF,MachineRegisterInfo & MRI) const1810 bool AArch64InstructionSelector::selectVaStartDarwin(
1811     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1812   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1813   Register ListReg = I.getOperand(0).getReg();
1814 
1815   Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1816 
1817   auto MIB =
1818       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1819           .addDef(ArgsAddrReg)
1820           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1821           .addImm(0)
1822           .addImm(0);
1823 
1824   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1825 
1826   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1827             .addUse(ArgsAddrReg)
1828             .addUse(ListReg)
1829             .addImm(0)
1830             .addMemOperand(*I.memoperands_begin());
1831 
1832   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1833   I.eraseFromParent();
1834   return true;
1835 }
1836 
materializeLargeCMVal(MachineInstr & I,const Value * V,unsigned OpFlags) const1837 void AArch64InstructionSelector::materializeLargeCMVal(
1838     MachineInstr &I, const Value *V, unsigned OpFlags) const {
1839   MachineBasicBlock &MBB = *I.getParent();
1840   MachineFunction &MF = *MBB.getParent();
1841   MachineRegisterInfo &MRI = MF.getRegInfo();
1842   MachineIRBuilder MIB(I);
1843 
1844   auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1845   MovZ->addOperand(MF, I.getOperand(1));
1846   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1847                                      AArch64II::MO_NC);
1848   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1849   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1850 
1851   auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1852                        Register ForceDstReg) {
1853     Register DstReg = ForceDstReg
1854                           ? ForceDstReg
1855                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1856     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1857     if (auto *GV = dyn_cast<GlobalValue>(V)) {
1858       MovI->addOperand(MF, MachineOperand::CreateGA(
1859                                GV, MovZ->getOperand(1).getOffset(), Flags));
1860     } else {
1861       MovI->addOperand(
1862           MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1863                                        MovZ->getOperand(1).getOffset(), Flags));
1864     }
1865     MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1866     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1867     return DstReg;
1868   };
1869   Register DstReg = BuildMovK(MovZ.getReg(0),
1870                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1871   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1872   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1873   return;
1874 }
1875 
preISelLower(MachineInstr & I)1876 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1877   MachineBasicBlock &MBB = *I.getParent();
1878   MachineFunction &MF = *MBB.getParent();
1879   MachineRegisterInfo &MRI = MF.getRegInfo();
1880 
1881   switch (I.getOpcode()) {
1882   case TargetOpcode::G_SHL:
1883   case TargetOpcode::G_ASHR:
1884   case TargetOpcode::G_LSHR: {
1885     // These shifts are legalized to have 64 bit shift amounts because we want
1886     // to take advantage of the existing imported selection patterns that assume
1887     // the immediates are s64s. However, if the shifted type is 32 bits and for
1888     // some reason we receive input GMIR that has an s64 shift amount that's not
1889     // a G_CONSTANT, insert a truncate so that we can still select the s32
1890     // register-register variant.
1891     Register SrcReg = I.getOperand(1).getReg();
1892     Register ShiftReg = I.getOperand(2).getReg();
1893     const LLT ShiftTy = MRI.getType(ShiftReg);
1894     const LLT SrcTy = MRI.getType(SrcReg);
1895     if (SrcTy.isVector())
1896       return false;
1897     assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1898     if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1899       return false;
1900     auto *AmtMI = MRI.getVRegDef(ShiftReg);
1901     assert(AmtMI && "could not find a vreg definition for shift amount");
1902     if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1903       // Insert a subregister copy to implement a 64->32 trunc
1904       MachineIRBuilder MIB(I);
1905       auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1906                        .addReg(ShiftReg, 0, AArch64::sub_32);
1907       MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1908       I.getOperand(2).setReg(Trunc.getReg(0));
1909     }
1910     return true;
1911   }
1912   case TargetOpcode::G_STORE:
1913     return contractCrossBankCopyIntoStore(I, MRI);
1914   case TargetOpcode::G_PTR_ADD:
1915     return convertPtrAddToAdd(I, MRI);
1916   case TargetOpcode::G_LOAD: {
1917     // For scalar loads of pointers, we try to convert the dest type from p0
1918     // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1919     // conversion, this should be ok because all users should have been
1920     // selected already, so the type doesn't matter for them.
1921     Register DstReg = I.getOperand(0).getReg();
1922     const LLT DstTy = MRI.getType(DstReg);
1923     if (!DstTy.isPointer())
1924       return false;
1925     MRI.setType(DstReg, LLT::scalar(64));
1926     return true;
1927   }
1928   case AArch64::G_DUP: {
1929     // Convert the type from p0 to s64 to help selection.
1930     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1931     if (!DstTy.getElementType().isPointer())
1932       return false;
1933     MachineIRBuilder MIB(I);
1934     auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1935     MRI.setType(I.getOperand(0).getReg(),
1936                 DstTy.changeElementType(LLT::scalar(64)));
1937     MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1938     I.getOperand(1).setReg(NewSrc.getReg(0));
1939     return true;
1940   }
1941   default:
1942     return false;
1943   }
1944 }
1945 
1946 /// This lowering tries to look for G_PTR_ADD instructions and then converts
1947 /// them to a standard G_ADD with a COPY on the source.
1948 ///
1949 /// The motivation behind this is to expose the add semantics to the imported
1950 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1951 /// because the selector works bottom up, uses before defs. By the time we
1952 /// end up trying to select a G_PTR_ADD, we should have already attempted to
1953 /// fold this into addressing modes and were therefore unsuccessful.
convertPtrAddToAdd(MachineInstr & I,MachineRegisterInfo & MRI)1954 bool AArch64InstructionSelector::convertPtrAddToAdd(
1955     MachineInstr &I, MachineRegisterInfo &MRI) {
1956   assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1957   Register DstReg = I.getOperand(0).getReg();
1958   Register AddOp1Reg = I.getOperand(1).getReg();
1959   const LLT PtrTy = MRI.getType(DstReg);
1960   if (PtrTy.getAddressSpace() != 0)
1961     return false;
1962 
1963   MachineIRBuilder MIB(I);
1964   const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1965   auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1966   // Set regbanks on the registers.
1967   if (PtrTy.isVector())
1968     MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1969   else
1970     MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1971 
1972   // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
1973   // %dst(intty) = G_ADD %intbase, off
1974   I.setDesc(TII.get(TargetOpcode::G_ADD));
1975   MRI.setType(DstReg, CastPtrTy);
1976   I.getOperand(1).setReg(PtrToInt.getReg(0));
1977   if (!select(*PtrToInt)) {
1978     LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
1979     return false;
1980   }
1981 
1982   // Also take the opportunity here to try to do some optimization.
1983   // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
1984   Register NegatedReg;
1985   if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
1986     return true;
1987   I.getOperand(2).setReg(NegatedReg);
1988   I.setDesc(TII.get(TargetOpcode::G_SUB));
1989   return true;
1990 }
1991 
earlySelectSHL(MachineInstr & I,MachineRegisterInfo & MRI) const1992 bool AArch64InstructionSelector::earlySelectSHL(
1993     MachineInstr &I, MachineRegisterInfo &MRI) const {
1994   // We try to match the immediate variant of LSL, which is actually an alias
1995   // for a special case of UBFM. Otherwise, we fall back to the imported
1996   // selector which will match the register variant.
1997   assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1998   const auto &MO = I.getOperand(2);
1999   auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
2000   if (!VRegAndVal)
2001     return false;
2002 
2003   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2004   if (DstTy.isVector())
2005     return false;
2006   bool Is64Bit = DstTy.getSizeInBits() == 64;
2007   auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2008   auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2009   MachineIRBuilder MIB(I);
2010 
2011   if (!Imm1Fn || !Imm2Fn)
2012     return false;
2013 
2014   auto NewI =
2015       MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2016                      {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2017 
2018   for (auto &RenderFn : *Imm1Fn)
2019     RenderFn(NewI);
2020   for (auto &RenderFn : *Imm2Fn)
2021     RenderFn(NewI);
2022 
2023   I.eraseFromParent();
2024   return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2025 }
2026 
contractCrossBankCopyIntoStore(MachineInstr & I,MachineRegisterInfo & MRI)2027 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2028     MachineInstr &I, MachineRegisterInfo &MRI) {
2029   assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2030   // If we're storing a scalar, it doesn't matter what register bank that
2031   // scalar is on. All that matters is the size.
2032   //
2033   // So, if we see something like this (with a 32-bit scalar as an example):
2034   //
2035   // %x:gpr(s32) = ... something ...
2036   // %y:fpr(s32) = COPY %x:gpr(s32)
2037   // G_STORE %y:fpr(s32)
2038   //
2039   // We can fix this up into something like this:
2040   //
2041   // G_STORE %x:gpr(s32)
2042   //
2043   // And then continue the selection process normally.
2044   Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2045   if (!DefDstReg.isValid())
2046     return false;
2047   LLT DefDstTy = MRI.getType(DefDstReg);
2048   Register StoreSrcReg = I.getOperand(0).getReg();
2049   LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2050 
2051   // If we get something strange like a physical register, then we shouldn't
2052   // go any further.
2053   if (!DefDstTy.isValid())
2054     return false;
2055 
2056   // Are the source and dst types the same size?
2057   if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2058     return false;
2059 
2060   if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2061       RBI.getRegBank(DefDstReg, MRI, TRI))
2062     return false;
2063 
2064   // We have a cross-bank copy, which is entering a store. Let's fold it.
2065   I.getOperand(0).setReg(DefDstReg);
2066   return true;
2067 }
2068 
earlySelect(MachineInstr & I) const2069 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
2070   assert(I.getParent() && "Instruction should be in a basic block!");
2071   assert(I.getParent()->getParent() && "Instruction should be in a function!");
2072 
2073   MachineBasicBlock &MBB = *I.getParent();
2074   MachineFunction &MF = *MBB.getParent();
2075   MachineRegisterInfo &MRI = MF.getRegInfo();
2076 
2077   switch (I.getOpcode()) {
2078   case TargetOpcode::G_BR: {
2079     // If the branch jumps to the fallthrough block, don't bother emitting it.
2080     // Only do this for -O0 for a good code size improvement, because when
2081     // optimizations are enabled we want to leave this choice to
2082     // MachineBlockPlacement.
2083     bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
2084     if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
2085       return false;
2086     I.eraseFromParent();
2087     return true;
2088   }
2089   case TargetOpcode::G_SHL:
2090     return earlySelectSHL(I, MRI);
2091   case TargetOpcode::G_CONSTANT: {
2092     bool IsZero = false;
2093     if (I.getOperand(1).isCImm())
2094       IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2095     else if (I.getOperand(1).isImm())
2096       IsZero = I.getOperand(1).getImm() == 0;
2097 
2098     if (!IsZero)
2099       return false;
2100 
2101     Register DefReg = I.getOperand(0).getReg();
2102     LLT Ty = MRI.getType(DefReg);
2103     if (Ty.getSizeInBits() == 64) {
2104       I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2105       RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2106     } else if (Ty.getSizeInBits() == 32) {
2107       I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2108       RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2109     } else
2110       return false;
2111 
2112     I.setDesc(TII.get(TargetOpcode::COPY));
2113     return true;
2114   }
2115   default:
2116     return false;
2117   }
2118 }
2119 
select(MachineInstr & I)2120 bool AArch64InstructionSelector::select(MachineInstr &I) {
2121   assert(I.getParent() && "Instruction should be in a basic block!");
2122   assert(I.getParent()->getParent() && "Instruction should be in a function!");
2123 
2124   MachineBasicBlock &MBB = *I.getParent();
2125   MachineFunction &MF = *MBB.getParent();
2126   MachineRegisterInfo &MRI = MF.getRegInfo();
2127 
2128   const AArch64Subtarget *Subtarget =
2129       &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2130   if (Subtarget->requiresStrictAlign()) {
2131     // We don't support this feature yet.
2132     LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2133     return false;
2134   }
2135 
2136   unsigned Opcode = I.getOpcode();
2137   // G_PHI requires same handling as PHI
2138   if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2139     // Certain non-generic instructions also need some special handling.
2140 
2141     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
2142       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2143 
2144     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2145       const Register DefReg = I.getOperand(0).getReg();
2146       const LLT DefTy = MRI.getType(DefReg);
2147 
2148       const RegClassOrRegBank &RegClassOrBank =
2149         MRI.getRegClassOrRegBank(DefReg);
2150 
2151       const TargetRegisterClass *DefRC
2152         = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2153       if (!DefRC) {
2154         if (!DefTy.isValid()) {
2155           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2156           return false;
2157         }
2158         const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2159         DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2160         if (!DefRC) {
2161           LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2162           return false;
2163         }
2164       }
2165 
2166       I.setDesc(TII.get(TargetOpcode::PHI));
2167 
2168       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2169     }
2170 
2171     if (I.isCopy())
2172       return selectCopy(I, TII, MRI, TRI, RBI);
2173 
2174     return true;
2175   }
2176 
2177 
2178   if (I.getNumOperands() != I.getNumExplicitOperands()) {
2179     LLVM_DEBUG(
2180         dbgs() << "Generic instruction has unexpected implicit operands\n");
2181     return false;
2182   }
2183 
2184   // Try to do some lowering before we start instruction selecting. These
2185   // lowerings are purely transformations on the input G_MIR and so selection
2186   // must continue after any modification of the instruction.
2187   if (preISelLower(I)) {
2188     Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2189   }
2190 
2191   // There may be patterns where the importer can't deal with them optimally,
2192   // but does select it to a suboptimal sequence so our custom C++ selection
2193   // code later never has a chance to work on it. Therefore, we have an early
2194   // selection attempt here to give priority to certain selection routines
2195   // over the imported ones.
2196   if (earlySelect(I))
2197     return true;
2198 
2199   if (selectImpl(I, *CoverageInfo))
2200     return true;
2201 
2202   LLT Ty =
2203       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2204 
2205   MachineIRBuilder MIB(I);
2206 
2207   switch (Opcode) {
2208   case TargetOpcode::G_BRCOND:
2209     return selectCompareBranch(I, MF, MRI);
2210 
2211   case TargetOpcode::G_BRINDIRECT: {
2212     I.setDesc(TII.get(AArch64::BR));
2213     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2214   }
2215 
2216   case TargetOpcode::G_BRJT:
2217     return selectBrJT(I, MRI);
2218 
2219   case AArch64::G_ADD_LOW: {
2220     // This op may have been separated from it's ADRP companion by the localizer
2221     // or some other code motion pass. Given that many CPUs will try to
2222     // macro fuse these operations anyway, select this into a MOVaddr pseudo
2223     // which will later be expanded into an ADRP+ADD pair after scheduling.
2224     MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2225     if (BaseMI->getOpcode() != AArch64::ADRP) {
2226       I.setDesc(TII.get(AArch64::ADDXri));
2227       I.addOperand(MachineOperand::CreateImm(0));
2228       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2229     }
2230     assert(TM.getCodeModel() == CodeModel::Small &&
2231            "Expected small code model");
2232     MachineIRBuilder MIB(I);
2233     auto Op1 = BaseMI->getOperand(1);
2234     auto Op2 = I.getOperand(2);
2235     auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2236                        .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2237                                          Op1.getTargetFlags())
2238                        .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2239                                          Op2.getTargetFlags());
2240     I.eraseFromParent();
2241     return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2242   }
2243 
2244   case TargetOpcode::G_BSWAP: {
2245     // Handle vector types for G_BSWAP directly.
2246     Register DstReg = I.getOperand(0).getReg();
2247     LLT DstTy = MRI.getType(DstReg);
2248 
2249     // We should only get vector types here; everything else is handled by the
2250     // importer right now.
2251     if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2252       LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
2253       return false;
2254     }
2255 
2256     // Only handle 4 and 2 element vectors for now.
2257     // TODO: 16-bit elements.
2258     unsigned NumElts = DstTy.getNumElements();
2259     if (NumElts != 4 && NumElts != 2) {
2260       LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
2261       return false;
2262     }
2263 
2264     // Choose the correct opcode for the supported types. Right now, that's
2265     // v2s32, v4s32, and v2s64.
2266     unsigned Opc = 0;
2267     unsigned EltSize = DstTy.getElementType().getSizeInBits();
2268     if (EltSize == 32)
2269       Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2270                                           : AArch64::REV32v16i8;
2271     else if (EltSize == 64)
2272       Opc = AArch64::REV64v16i8;
2273 
2274     // We should always get something by the time we get here...
2275     assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
2276 
2277     I.setDesc(TII.get(Opc));
2278     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2279   }
2280 
2281   case TargetOpcode::G_FCONSTANT:
2282   case TargetOpcode::G_CONSTANT: {
2283     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2284 
2285     const LLT s8 = LLT::scalar(8);
2286     const LLT s16 = LLT::scalar(16);
2287     const LLT s32 = LLT::scalar(32);
2288     const LLT s64 = LLT::scalar(64);
2289     const LLT p0 = LLT::pointer(0, 64);
2290 
2291     const Register DefReg = I.getOperand(0).getReg();
2292     const LLT DefTy = MRI.getType(DefReg);
2293     const unsigned DefSize = DefTy.getSizeInBits();
2294     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2295 
2296     // FIXME: Redundant check, but even less readable when factored out.
2297     if (isFP) {
2298       if (Ty != s32 && Ty != s64) {
2299         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2300                           << " constant, expected: " << s32 << " or " << s64
2301                           << '\n');
2302         return false;
2303       }
2304 
2305       if (RB.getID() != AArch64::FPRRegBankID) {
2306         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2307                           << " constant on bank: " << RB
2308                           << ", expected: FPR\n");
2309         return false;
2310       }
2311 
2312       // The case when we have 0.0 is covered by tablegen. Reject it here so we
2313       // can be sure tablegen works correctly and isn't rescued by this code.
2314       if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
2315         return false;
2316     } else {
2317       // s32 and s64 are covered by tablegen.
2318       if (Ty != p0 && Ty != s8 && Ty != s16) {
2319         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2320                           << " constant, expected: " << s32 << ", " << s64
2321                           << ", or " << p0 << '\n');
2322         return false;
2323       }
2324 
2325       if (RB.getID() != AArch64::GPRRegBankID) {
2326         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2327                           << " constant on bank: " << RB
2328                           << ", expected: GPR\n");
2329         return false;
2330       }
2331     }
2332 
2333     // We allow G_CONSTANT of types < 32b.
2334     const unsigned MovOpc =
2335         DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2336 
2337     if (isFP) {
2338       // Either emit a FMOV, or emit a copy to emit a normal mov.
2339       const TargetRegisterClass &GPRRC =
2340           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2341       const TargetRegisterClass &FPRRC =
2342           DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
2343 
2344       // Can we use a FMOV instruction to represent the immediate?
2345       if (emitFMovForFConstant(I, MRI))
2346         return true;
2347 
2348       // For 64b values, emit a constant pool load instead.
2349       if (DefSize == 64) {
2350         auto *FPImm = I.getOperand(1).getFPImm();
2351         MachineIRBuilder MIB(I);
2352         auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2353         if (!LoadMI) {
2354           LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2355           return false;
2356         }
2357         MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2358         I.eraseFromParent();
2359         return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2360       }
2361 
2362       // Nope. Emit a copy and use a normal mov instead.
2363       const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2364       MachineOperand &RegOp = I.getOperand(0);
2365       RegOp.setReg(DefGPRReg);
2366       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2367       MIB.buildCopy({DefReg}, {DefGPRReg});
2368 
2369       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2370         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2371         return false;
2372       }
2373 
2374       MachineOperand &ImmOp = I.getOperand(1);
2375       // FIXME: Is going through int64_t always correct?
2376       ImmOp.ChangeToImmediate(
2377           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2378     } else if (I.getOperand(1).isCImm()) {
2379       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2380       I.getOperand(1).ChangeToImmediate(Val);
2381     } else if (I.getOperand(1).isImm()) {
2382       uint64_t Val = I.getOperand(1).getImm();
2383       I.getOperand(1).ChangeToImmediate(Val);
2384     }
2385 
2386     I.setDesc(TII.get(MovOpc));
2387     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2388     return true;
2389   }
2390   case TargetOpcode::G_EXTRACT: {
2391     Register DstReg = I.getOperand(0).getReg();
2392     Register SrcReg = I.getOperand(1).getReg();
2393     LLT SrcTy = MRI.getType(SrcReg);
2394     LLT DstTy = MRI.getType(DstReg);
2395     (void)DstTy;
2396     unsigned SrcSize = SrcTy.getSizeInBits();
2397 
2398     if (SrcTy.getSizeInBits() > 64) {
2399       // This should be an extract of an s128, which is like a vector extract.
2400       if (SrcTy.getSizeInBits() != 128)
2401         return false;
2402       // Only support extracting 64 bits from an s128 at the moment.
2403       if (DstTy.getSizeInBits() != 64)
2404         return false;
2405 
2406       const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2407       const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2408       // Check we have the right regbank always.
2409       assert(SrcRB.getID() == AArch64::FPRRegBankID &&
2410              DstRB.getID() == AArch64::FPRRegBankID &&
2411              "Wrong extract regbank!");
2412       (void)SrcRB;
2413 
2414       // Emit the same code as a vector extract.
2415       // Offset must be a multiple of 64.
2416       unsigned Offset = I.getOperand(2).getImm();
2417       if (Offset % 64 != 0)
2418         return false;
2419       unsigned LaneIdx = Offset / 64;
2420       MachineIRBuilder MIB(I);
2421       MachineInstr *Extract = emitExtractVectorElt(
2422           DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2423       if (!Extract)
2424         return false;
2425       I.eraseFromParent();
2426       return true;
2427     }
2428 
2429     I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2430     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2431                                       Ty.getSizeInBits() - 1);
2432 
2433     if (SrcSize < 64) {
2434       assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2435              "unexpected G_EXTRACT types");
2436       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2437     }
2438 
2439     DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2440     MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2441     MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2442         .addReg(DstReg, 0, AArch64::sub_32);
2443     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2444                                  AArch64::GPR32RegClass, MRI);
2445     I.getOperand(0).setReg(DstReg);
2446 
2447     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2448   }
2449 
2450   case TargetOpcode::G_INSERT: {
2451     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2452     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2453     unsigned DstSize = DstTy.getSizeInBits();
2454     // Larger inserts are vectors, same-size ones should be something else by
2455     // now (split up or turned into COPYs).
2456     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2457       return false;
2458 
2459     I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2460     unsigned LSB = I.getOperand(3).getImm();
2461     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2462     I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2463     MachineInstrBuilder(MF, I).addImm(Width - 1);
2464 
2465     if (DstSize < 64) {
2466       assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2467              "unexpected G_INSERT types");
2468       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2469     }
2470 
2471     Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2472     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2473             TII.get(AArch64::SUBREG_TO_REG))
2474         .addDef(SrcReg)
2475         .addImm(0)
2476         .addUse(I.getOperand(2).getReg())
2477         .addImm(AArch64::sub_32);
2478     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2479                                  AArch64::GPR32RegClass, MRI);
2480     I.getOperand(2).setReg(SrcReg);
2481 
2482     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2483   }
2484   case TargetOpcode::G_FRAME_INDEX: {
2485     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2486     if (Ty != LLT::pointer(0, 64)) {
2487       LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2488                         << ", expected: " << LLT::pointer(0, 64) << '\n');
2489       return false;
2490     }
2491     I.setDesc(TII.get(AArch64::ADDXri));
2492 
2493     // MOs for a #0 shifted immediate.
2494     I.addOperand(MachineOperand::CreateImm(0));
2495     I.addOperand(MachineOperand::CreateImm(0));
2496 
2497     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2498   }
2499 
2500   case TargetOpcode::G_GLOBAL_VALUE: {
2501     auto GV = I.getOperand(1).getGlobal();
2502     if (GV->isThreadLocal())
2503       return selectTLSGlobalValue(I, MRI);
2504 
2505     unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2506     if (OpFlags & AArch64II::MO_GOT) {
2507       I.setDesc(TII.get(AArch64::LOADgot));
2508       I.getOperand(1).setTargetFlags(OpFlags);
2509     } else if (TM.getCodeModel() == CodeModel::Large) {
2510       // Materialize the global using movz/movk instructions.
2511       materializeLargeCMVal(I, GV, OpFlags);
2512       I.eraseFromParent();
2513       return true;
2514     } else if (TM.getCodeModel() == CodeModel::Tiny) {
2515       I.setDesc(TII.get(AArch64::ADR));
2516       I.getOperand(1).setTargetFlags(OpFlags);
2517     } else {
2518       I.setDesc(TII.get(AArch64::MOVaddr));
2519       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2520       MachineInstrBuilder MIB(MF, I);
2521       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2522                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2523     }
2524     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2525   }
2526 
2527   case TargetOpcode::G_ZEXTLOAD:
2528   case TargetOpcode::G_LOAD:
2529   case TargetOpcode::G_STORE: {
2530     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2531     MachineIRBuilder MIB(I);
2532 
2533     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2534 
2535     if (PtrTy != LLT::pointer(0, 64)) {
2536       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2537                         << ", expected: " << LLT::pointer(0, 64) << '\n');
2538       return false;
2539     }
2540 
2541     auto &MemOp = **I.memoperands_begin();
2542     uint64_t MemSizeInBytes = MemOp.getSize();
2543     if (MemOp.isAtomic()) {
2544       // For now we just support s8 acquire loads to be able to compile stack
2545       // protector code.
2546       if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2547           MemSizeInBytes == 1) {
2548         I.setDesc(TII.get(AArch64::LDARB));
2549         return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2550       }
2551       LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
2552       return false;
2553     }
2554     unsigned MemSizeInBits = MemSizeInBytes * 8;
2555 
2556 #ifndef NDEBUG
2557     const Register PtrReg = I.getOperand(1).getReg();
2558     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2559     // Sanity-check the pointer register.
2560     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2561            "Load/Store pointer operand isn't a GPR");
2562     assert(MRI.getType(PtrReg).isPointer() &&
2563            "Load/Store pointer operand isn't a pointer");
2564 #endif
2565 
2566     const Register ValReg = I.getOperand(0).getReg();
2567     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2568 
2569     // Helper lambda for partially selecting I. Either returns the original
2570     // instruction with an updated opcode, or a new instruction.
2571     auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2572       bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
2573       const unsigned NewOpc =
2574           selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2575       if (NewOpc == I.getOpcode())
2576         return nullptr;
2577       // Check if we can fold anything into the addressing mode.
2578       auto AddrModeFns =
2579           selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2580       if (!AddrModeFns) {
2581         // Can't fold anything. Use the original instruction.
2582         I.setDesc(TII.get(NewOpc));
2583         I.addOperand(MachineOperand::CreateImm(0));
2584         return &I;
2585       }
2586 
2587       // Folded something. Create a new instruction and return it.
2588       auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2589       IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2590       NewInst.cloneMemRefs(I);
2591       for (auto &Fn : *AddrModeFns)
2592         Fn(NewInst);
2593       I.eraseFromParent();
2594       return &*NewInst;
2595     };
2596 
2597     MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2598     if (!LoadStore)
2599       return false;
2600 
2601     // If we're storing a 0, use WZR/XZR.
2602     if (Opcode == TargetOpcode::G_STORE) {
2603       auto CVal = getConstantVRegValWithLookThrough(
2604           LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2605           /*HandleFConstants = */ false);
2606       if (CVal && CVal->Value == 0) {
2607         switch (LoadStore->getOpcode()) {
2608         case AArch64::STRWui:
2609         case AArch64::STRHHui:
2610         case AArch64::STRBBui:
2611           LoadStore->getOperand(0).setReg(AArch64::WZR);
2612           break;
2613         case AArch64::STRXui:
2614           LoadStore->getOperand(0).setReg(AArch64::XZR);
2615           break;
2616         }
2617       }
2618     }
2619 
2620     if (IsZExtLoad) {
2621       // The zextload from a smaller type to i32 should be handled by the
2622       // importer.
2623       if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2624         return false;
2625       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2626       // and zero_extend with SUBREG_TO_REG.
2627       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2628       Register DstReg = LoadStore->getOperand(0).getReg();
2629       LoadStore->getOperand(0).setReg(LdReg);
2630 
2631       MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2632       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2633           .addImm(0)
2634           .addUse(LdReg)
2635           .addImm(AArch64::sub_32);
2636       constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2637       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2638                                           MRI);
2639     }
2640     return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2641   }
2642 
2643   case TargetOpcode::G_SMULH:
2644   case TargetOpcode::G_UMULH: {
2645     // Reject the various things we don't support yet.
2646     if (unsupportedBinOp(I, RBI, MRI, TRI))
2647       return false;
2648 
2649     const Register DefReg = I.getOperand(0).getReg();
2650     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2651 
2652     if (RB.getID() != AArch64::GPRRegBankID) {
2653       LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2654       return false;
2655     }
2656 
2657     if (Ty != LLT::scalar(64)) {
2658       LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2659                         << ", expected: " << LLT::scalar(64) << '\n');
2660       return false;
2661     }
2662 
2663     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2664                                                              : AArch64::UMULHrr;
2665     I.setDesc(TII.get(NewOpc));
2666 
2667     // Now that we selected an opcode, we need to constrain the register
2668     // operands to use appropriate classes.
2669     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2670   }
2671   case TargetOpcode::G_LSHR:
2672   case TargetOpcode::G_ASHR:
2673     if (MRI.getType(I.getOperand(0).getReg()).isVector())
2674       return selectVectorAshrLshr(I, MRI);
2675     LLVM_FALLTHROUGH;
2676   case TargetOpcode::G_SHL:
2677     if (Opcode == TargetOpcode::G_SHL &&
2678         MRI.getType(I.getOperand(0).getReg()).isVector())
2679       return selectVectorSHL(I, MRI);
2680     LLVM_FALLTHROUGH;
2681   case TargetOpcode::G_FADD:
2682   case TargetOpcode::G_FSUB:
2683   case TargetOpcode::G_FMUL:
2684   case TargetOpcode::G_FDIV:
2685   case TargetOpcode::G_OR: {
2686     // Reject the various things we don't support yet.
2687     if (unsupportedBinOp(I, RBI, MRI, TRI))
2688       return false;
2689 
2690     const unsigned OpSize = Ty.getSizeInBits();
2691 
2692     const Register DefReg = I.getOperand(0).getReg();
2693     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2694 
2695     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2696     if (NewOpc == I.getOpcode())
2697       return false;
2698 
2699     I.setDesc(TII.get(NewOpc));
2700     // FIXME: Should the type be always reset in setDesc?
2701 
2702     // Now that we selected an opcode, we need to constrain the register
2703     // operands to use appropriate classes.
2704     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2705   }
2706 
2707   case TargetOpcode::G_PTR_ADD: {
2708     MachineIRBuilder MIRBuilder(I);
2709     emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2710             MIRBuilder);
2711     I.eraseFromParent();
2712     return true;
2713   }
2714   case TargetOpcode::G_SADDO:
2715   case TargetOpcode::G_UADDO:
2716   case TargetOpcode::G_SSUBO: {
2717     // Emit the operation and get the correct condition code.
2718     MachineIRBuilder MIRBuilder(I);
2719     auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2720                                   I.getOperand(2), I.getOperand(3), MIRBuilder);
2721 
2722     // Now, put the overflow result in the register given by the first operand
2723     // to the overflow op. CSINC increments the result when the predicate is
2724     // false, so to get the increment when it's true, we need to use the
2725     // inverse. In this case, we want to increment when carry is set.
2726     Register ZReg = AArch64::WZR;
2727     auto CsetMI = MIRBuilder
2728                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2729                                   {ZReg, ZReg})
2730                       .addImm(getInvertedCondCode(OpAndCC.second));
2731     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2732     I.eraseFromParent();
2733     return true;
2734   }
2735 
2736   case TargetOpcode::G_PTRMASK: {
2737     Register MaskReg = I.getOperand(2).getReg();
2738     Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
2739     // TODO: Implement arbitrary cases
2740     if (!MaskVal || !isShiftedMask_64(*MaskVal))
2741       return false;
2742 
2743     uint64_t Mask = *MaskVal;
2744     I.setDesc(TII.get(AArch64::ANDXri));
2745     I.getOperand(2).ChangeToImmediate(
2746         AArch64_AM::encodeLogicalImmediate(Mask, 64));
2747 
2748     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2749   }
2750   case TargetOpcode::G_PTRTOINT:
2751   case TargetOpcode::G_TRUNC: {
2752     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2753     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2754 
2755     const Register DstReg = I.getOperand(0).getReg();
2756     const Register SrcReg = I.getOperand(1).getReg();
2757 
2758     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2759     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2760 
2761     if (DstRB.getID() != SrcRB.getID()) {
2762       LLVM_DEBUG(
2763           dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
2764       return false;
2765     }
2766 
2767     if (DstRB.getID() == AArch64::GPRRegBankID) {
2768       const TargetRegisterClass *DstRC =
2769           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2770       if (!DstRC)
2771         return false;
2772 
2773       const TargetRegisterClass *SrcRC =
2774           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2775       if (!SrcRC)
2776         return false;
2777 
2778       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2779           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2780         LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
2781         return false;
2782       }
2783 
2784       if (DstRC == SrcRC) {
2785         // Nothing to be done
2786       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2787                  SrcTy == LLT::scalar(64)) {
2788         llvm_unreachable("TableGen can import this case");
2789         return false;
2790       } else if (DstRC == &AArch64::GPR32RegClass &&
2791                  SrcRC == &AArch64::GPR64RegClass) {
2792         I.getOperand(1).setSubReg(AArch64::sub_32);
2793       } else {
2794         LLVM_DEBUG(
2795             dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2796         return false;
2797       }
2798 
2799       I.setDesc(TII.get(TargetOpcode::COPY));
2800       return true;
2801     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2802       if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2803         I.setDesc(TII.get(AArch64::XTNv4i16));
2804         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2805         return true;
2806       }
2807 
2808       if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2809         MachineIRBuilder MIB(I);
2810         MachineInstr *Extract = emitExtractVectorElt(
2811             DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2812         if (!Extract)
2813           return false;
2814         I.eraseFromParent();
2815         return true;
2816       }
2817 
2818       // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2819       if (Opcode == TargetOpcode::G_PTRTOINT) {
2820         assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
2821         I.setDesc(TII.get(TargetOpcode::COPY));
2822         return true;
2823       }
2824     }
2825 
2826     return false;
2827   }
2828 
2829   case TargetOpcode::G_ANYEXT: {
2830     const Register DstReg = I.getOperand(0).getReg();
2831     const Register SrcReg = I.getOperand(1).getReg();
2832 
2833     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2834     if (RBDst.getID() != AArch64::GPRRegBankID) {
2835       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2836                         << ", expected: GPR\n");
2837       return false;
2838     }
2839 
2840     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2841     if (RBSrc.getID() != AArch64::GPRRegBankID) {
2842       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2843                         << ", expected: GPR\n");
2844       return false;
2845     }
2846 
2847     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2848 
2849     if (DstSize == 0) {
2850       LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2851       return false;
2852     }
2853 
2854     if (DstSize != 64 && DstSize > 32) {
2855       LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2856                         << ", expected: 32 or 64\n");
2857       return false;
2858     }
2859     // At this point G_ANYEXT is just like a plain COPY, but we need
2860     // to explicitly form the 64-bit value if any.
2861     if (DstSize > 32) {
2862       Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2863       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2864           .addDef(ExtSrc)
2865           .addImm(0)
2866           .addUse(SrcReg)
2867           .addImm(AArch64::sub_32);
2868       I.getOperand(1).setReg(ExtSrc);
2869     }
2870     return selectCopy(I, TII, MRI, TRI, RBI);
2871   }
2872 
2873   case TargetOpcode::G_ZEXT:
2874   case TargetOpcode::G_SEXT_INREG:
2875   case TargetOpcode::G_SEXT: {
2876     unsigned Opcode = I.getOpcode();
2877     const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2878     const Register DefReg = I.getOperand(0).getReg();
2879     Register SrcReg = I.getOperand(1).getReg();
2880     const LLT DstTy = MRI.getType(DefReg);
2881     const LLT SrcTy = MRI.getType(SrcReg);
2882     unsigned DstSize = DstTy.getSizeInBits();
2883     unsigned SrcSize = SrcTy.getSizeInBits();
2884 
2885     // SEXT_INREG has the same src reg size as dst, the size of the value to be
2886     // extended is encoded in the imm.
2887     if (Opcode == TargetOpcode::G_SEXT_INREG)
2888       SrcSize = I.getOperand(2).getImm();
2889 
2890     if (DstTy.isVector())
2891       return false; // Should be handled by imported patterns.
2892 
2893     assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2894                AArch64::GPRRegBankID &&
2895            "Unexpected ext regbank");
2896 
2897     MachineIRBuilder MIB(I);
2898     MachineInstr *ExtI;
2899 
2900     // First check if we're extending the result of a load which has a dest type
2901     // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2902     // GPR register on AArch64 and all loads which are smaller automatically
2903     // zero-extend the upper bits. E.g.
2904     // %v(s8) = G_LOAD %p, :: (load 1)
2905     // %v2(s32) = G_ZEXT %v(s8)
2906     if (!IsSigned) {
2907       auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2908       bool IsGPR =
2909           RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2910       if (LoadMI && IsGPR) {
2911         const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2912         unsigned BytesLoaded = MemOp->getSize();
2913         if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2914           return selectCopy(I, TII, MRI, TRI, RBI);
2915       }
2916 
2917       // If we are zero extending from 32 bits to 64 bits, it's possible that
2918       // the instruction implicitly does the zero extend for us. In that case,
2919       // we can just emit a SUBREG_TO_REG.
2920       if (IsGPR && SrcSize == 32 && DstSize == 64) {
2921         // Unlike with the G_LOAD case, we don't want to look through copies
2922         // here.
2923         MachineInstr *Def = MRI.getVRegDef(SrcReg);
2924         if (Def && isDef32(*Def)) {
2925           MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2926               .addImm(0)
2927               .addUse(SrcReg)
2928               .addImm(AArch64::sub_32);
2929 
2930           if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2931                                             MRI)) {
2932             LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
2933             return false;
2934           }
2935 
2936           if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2937                                             MRI)) {
2938             LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
2939             return false;
2940           }
2941 
2942           I.eraseFromParent();
2943           return true;
2944         }
2945       }
2946     }
2947 
2948     if (DstSize == 64) {
2949       if (Opcode != TargetOpcode::G_SEXT_INREG) {
2950         // FIXME: Can we avoid manually doing this?
2951         if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2952                                           MRI)) {
2953           LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2954                             << " operand\n");
2955           return false;
2956         }
2957         SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2958                                 {&AArch64::GPR64RegClass}, {})
2959                      .addImm(0)
2960                      .addUse(SrcReg)
2961                      .addImm(AArch64::sub_32)
2962                      .getReg(0);
2963       }
2964 
2965       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2966                              {DefReg}, {SrcReg})
2967                   .addImm(0)
2968                   .addImm(SrcSize - 1);
2969     } else if (DstSize <= 32) {
2970       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2971                              {DefReg}, {SrcReg})
2972                   .addImm(0)
2973                   .addImm(SrcSize - 1);
2974     } else {
2975       return false;
2976     }
2977 
2978     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2979     I.eraseFromParent();
2980     return true;
2981   }
2982 
2983   case TargetOpcode::G_SITOFP:
2984   case TargetOpcode::G_UITOFP:
2985   case TargetOpcode::G_FPTOSI:
2986   case TargetOpcode::G_FPTOUI: {
2987     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2988               SrcTy = MRI.getType(I.getOperand(1).getReg());
2989     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2990     if (NewOpc == Opcode)
2991       return false;
2992 
2993     I.setDesc(TII.get(NewOpc));
2994     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2995 
2996     return true;
2997   }
2998 
2999   case TargetOpcode::G_FREEZE:
3000     return selectCopy(I, TII, MRI, TRI, RBI);
3001 
3002   case TargetOpcode::G_INTTOPTR:
3003     // The importer is currently unable to import pointer types since they
3004     // didn't exist in SelectionDAG.
3005     return selectCopy(I, TII, MRI, TRI, RBI);
3006 
3007   case TargetOpcode::G_BITCAST:
3008     // Imported SelectionDAG rules can handle every bitcast except those that
3009     // bitcast from a type to the same type. Ideally, these shouldn't occur
3010     // but we might not run an optimizer that deletes them. The other exception
3011     // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3012     // of them.
3013     return selectCopy(I, TII, MRI, TRI, RBI);
3014 
3015   case TargetOpcode::G_SELECT: {
3016     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3017       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
3018                         << ", expected: " << LLT::scalar(1) << '\n');
3019       return false;
3020     }
3021 
3022     const Register CondReg = I.getOperand(1).getReg();
3023     const Register TReg = I.getOperand(2).getReg();
3024     const Register FReg = I.getOperand(3).getReg();
3025 
3026     if (tryOptSelect(I))
3027       return true;
3028 
3029     // Make sure to use an unused vreg instead of wzr, so that the peephole
3030     // optimizations will be able to optimize these.
3031     MachineIRBuilder MIB(I);
3032     Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3033     auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3034                      .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3035     constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3036     if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3037       return false;
3038     I.eraseFromParent();
3039     return true;
3040   }
3041   case TargetOpcode::G_ICMP: {
3042     if (Ty.isVector())
3043       return selectVectorICmp(I, MRI);
3044 
3045     if (Ty != LLT::scalar(32)) {
3046       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3047                         << ", expected: " << LLT::scalar(32) << '\n');
3048       return false;
3049     }
3050 
3051     MachineIRBuilder MIRBuilder(I);
3052     auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3053     emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3054                        MIRBuilder);
3055     emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
3056     I.eraseFromParent();
3057     return true;
3058   }
3059 
3060   case TargetOpcode::G_FCMP: {
3061     MachineIRBuilder MIRBuilder(I);
3062     CmpInst::Predicate Pred =
3063         static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3064     if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
3065                        MIRBuilder) ||
3066         !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
3067       return false;
3068     I.eraseFromParent();
3069     return true;
3070   }
3071   case TargetOpcode::G_VASTART:
3072     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3073                                 : selectVaStartAAPCS(I, MF, MRI);
3074   case TargetOpcode::G_INTRINSIC:
3075     return selectIntrinsic(I, MRI);
3076   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3077     return selectIntrinsicWithSideEffects(I, MRI);
3078   case TargetOpcode::G_IMPLICIT_DEF: {
3079     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3080     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3081     const Register DstReg = I.getOperand(0).getReg();
3082     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3083     const TargetRegisterClass *DstRC =
3084         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3085     RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3086     return true;
3087   }
3088   case TargetOpcode::G_BLOCK_ADDR: {
3089     if (TM.getCodeModel() == CodeModel::Large) {
3090       materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3091       I.eraseFromParent();
3092       return true;
3093     } else {
3094       I.setDesc(TII.get(AArch64::MOVaddrBA));
3095       auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3096                            I.getOperand(0).getReg())
3097                        .addBlockAddress(I.getOperand(1).getBlockAddress(),
3098                                         /* Offset */ 0, AArch64II::MO_PAGE)
3099                        .addBlockAddress(
3100                            I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3101                            AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3102       I.eraseFromParent();
3103       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3104     }
3105   }
3106   case AArch64::G_DUP: {
3107     // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3108     // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3109     // difficult because at RBS we may end up pessimizing the fpr case if we
3110     // decided to add an anyextend to fix this. Manual selection is the most
3111     // robust solution for now.
3112     Register SrcReg = I.getOperand(1).getReg();
3113     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
3114       return false; // We expect the fpr regbank case to be imported.
3115     LLT SrcTy = MRI.getType(SrcReg);
3116     if (SrcTy.getSizeInBits() == 16)
3117       I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3118     else if (SrcTy.getSizeInBits() == 8)
3119       I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3120     else
3121       return false;
3122     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3123   }
3124   case TargetOpcode::G_INTRINSIC_TRUNC:
3125     return selectIntrinsicTrunc(I, MRI);
3126   case TargetOpcode::G_INTRINSIC_ROUND:
3127     return selectIntrinsicRound(I, MRI);
3128   case TargetOpcode::G_BUILD_VECTOR:
3129     return selectBuildVector(I, MRI);
3130   case TargetOpcode::G_MERGE_VALUES:
3131     return selectMergeValues(I, MRI);
3132   case TargetOpcode::G_UNMERGE_VALUES:
3133     return selectUnmergeValues(I, MRI);
3134   case TargetOpcode::G_SHUFFLE_VECTOR:
3135     return selectShuffleVector(I, MRI);
3136   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3137     return selectExtractElt(I, MRI);
3138   case TargetOpcode::G_INSERT_VECTOR_ELT:
3139     return selectInsertElt(I, MRI);
3140   case TargetOpcode::G_CONCAT_VECTORS:
3141     return selectConcatVectors(I, MRI);
3142   case TargetOpcode::G_JUMP_TABLE:
3143     return selectJumpTable(I, MRI);
3144   case TargetOpcode::G_VECREDUCE_FADD:
3145   case TargetOpcode::G_VECREDUCE_ADD:
3146     return selectReduction(I, MRI);
3147   }
3148 
3149   return false;
3150 }
3151 
selectReduction(MachineInstr & I,MachineRegisterInfo & MRI) const3152 bool AArch64InstructionSelector::selectReduction(
3153     MachineInstr &I, MachineRegisterInfo &MRI) const {
3154   Register VecReg = I.getOperand(1).getReg();
3155   LLT VecTy = MRI.getType(VecReg);
3156   if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3157     unsigned Opc = 0;
3158     if (VecTy == LLT::vector(16, 8))
3159       Opc = AArch64::ADDVv16i8v;
3160     else if (VecTy == LLT::vector(8, 16))
3161       Opc = AArch64::ADDVv8i16v;
3162     else if (VecTy == LLT::vector(4, 32))
3163       Opc = AArch64::ADDVv4i32v;
3164     else if (VecTy == LLT::vector(2, 64))
3165       Opc = AArch64::ADDPv2i64p;
3166     else {
3167       LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
3168       return false;
3169     }
3170     I.setDesc(TII.get(Opc));
3171     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3172   }
3173 
3174   if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3175     unsigned Opc = 0;
3176     if (VecTy == LLT::vector(2, 32))
3177       Opc = AArch64::FADDPv2i32p;
3178     else if (VecTy == LLT::vector(2, 64))
3179       Opc = AArch64::FADDPv2i64p;
3180     else {
3181       LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
3182       return false;
3183     }
3184     I.setDesc(TII.get(Opc));
3185     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3186   }
3187   return false;
3188 }
3189 
selectBrJT(MachineInstr & I,MachineRegisterInfo & MRI) const3190 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3191                                             MachineRegisterInfo &MRI) const {
3192   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3193   Register JTAddr = I.getOperand(0).getReg();
3194   unsigned JTI = I.getOperand(1).getIndex();
3195   Register Index = I.getOperand(2).getReg();
3196   MachineIRBuilder MIB(I);
3197 
3198   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3199   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3200 
3201   MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3202   auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3203                                       {TargetReg, ScratchReg}, {JTAddr, Index})
3204                            .addJumpTableIndex(JTI);
3205   // Build the indirect branch.
3206   MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3207   I.eraseFromParent();
3208   return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3209 }
3210 
selectJumpTable(MachineInstr & I,MachineRegisterInfo & MRI) const3211 bool AArch64InstructionSelector::selectJumpTable(
3212     MachineInstr &I, MachineRegisterInfo &MRI) const {
3213   assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3214   assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3215 
3216   Register DstReg = I.getOperand(0).getReg();
3217   unsigned JTI = I.getOperand(1).getIndex();
3218   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3219   MachineIRBuilder MIB(I);
3220   auto MovMI =
3221     MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3222           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3223           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3224   I.eraseFromParent();
3225   return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3226 }
3227 
selectTLSGlobalValue(MachineInstr & I,MachineRegisterInfo & MRI) const3228 bool AArch64InstructionSelector::selectTLSGlobalValue(
3229     MachineInstr &I, MachineRegisterInfo &MRI) const {
3230   if (!STI.isTargetMachO())
3231     return false;
3232   MachineFunction &MF = *I.getParent()->getParent();
3233   MF.getFrameInfo().setAdjustsStack(true);
3234 
3235   const GlobalValue &GV = *I.getOperand(1).getGlobal();
3236   MachineIRBuilder MIB(I);
3237 
3238   auto LoadGOT =
3239       MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3240           .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3241 
3242   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3243                              {LoadGOT.getReg(0)})
3244                   .addImm(0);
3245 
3246   MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3247   // TLS calls preserve all registers except those that absolutely must be
3248   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3249   // silly).
3250   MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3251       .addUse(AArch64::X0, RegState::Implicit)
3252       .addDef(AArch64::X0, RegState::Implicit)
3253       .addRegMask(TRI.getTLSCallPreservedMask());
3254 
3255   MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3256   RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3257                                MRI);
3258   I.eraseFromParent();
3259   return true;
3260 }
3261 
selectIntrinsicTrunc(MachineInstr & I,MachineRegisterInfo & MRI) const3262 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3263     MachineInstr &I, MachineRegisterInfo &MRI) const {
3264   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3265 
3266   // Select the correct opcode.
3267   unsigned Opc = 0;
3268   if (!SrcTy.isVector()) {
3269     switch (SrcTy.getSizeInBits()) {
3270     default:
3271     case 16:
3272       Opc = AArch64::FRINTZHr;
3273       break;
3274     case 32:
3275       Opc = AArch64::FRINTZSr;
3276       break;
3277     case 64:
3278       Opc = AArch64::FRINTZDr;
3279       break;
3280     }
3281   } else {
3282     unsigned NumElts = SrcTy.getNumElements();
3283     switch (SrcTy.getElementType().getSizeInBits()) {
3284     default:
3285       break;
3286     case 16:
3287       if (NumElts == 4)
3288         Opc = AArch64::FRINTZv4f16;
3289       else if (NumElts == 8)
3290         Opc = AArch64::FRINTZv8f16;
3291       break;
3292     case 32:
3293       if (NumElts == 2)
3294         Opc = AArch64::FRINTZv2f32;
3295       else if (NumElts == 4)
3296         Opc = AArch64::FRINTZv4f32;
3297       break;
3298     case 64:
3299       if (NumElts == 2)
3300         Opc = AArch64::FRINTZv2f64;
3301       break;
3302     }
3303   }
3304 
3305   if (!Opc) {
3306     // Didn't get an opcode above, bail.
3307     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3308     return false;
3309   }
3310 
3311   // Legalization would have set us up perfectly for this; we just need to
3312   // set the opcode and move on.
3313   I.setDesc(TII.get(Opc));
3314   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3315 }
3316 
selectIntrinsicRound(MachineInstr & I,MachineRegisterInfo & MRI) const3317 bool AArch64InstructionSelector::selectIntrinsicRound(
3318     MachineInstr &I, MachineRegisterInfo &MRI) const {
3319   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3320 
3321   // Select the correct opcode.
3322   unsigned Opc = 0;
3323   if (!SrcTy.isVector()) {
3324     switch (SrcTy.getSizeInBits()) {
3325     default:
3326     case 16:
3327       Opc = AArch64::FRINTAHr;
3328       break;
3329     case 32:
3330       Opc = AArch64::FRINTASr;
3331       break;
3332     case 64:
3333       Opc = AArch64::FRINTADr;
3334       break;
3335     }
3336   } else {
3337     unsigned NumElts = SrcTy.getNumElements();
3338     switch (SrcTy.getElementType().getSizeInBits()) {
3339     default:
3340       break;
3341     case 16:
3342       if (NumElts == 4)
3343         Opc = AArch64::FRINTAv4f16;
3344       else if (NumElts == 8)
3345         Opc = AArch64::FRINTAv8f16;
3346       break;
3347     case 32:
3348       if (NumElts == 2)
3349         Opc = AArch64::FRINTAv2f32;
3350       else if (NumElts == 4)
3351         Opc = AArch64::FRINTAv4f32;
3352       break;
3353     case 64:
3354       if (NumElts == 2)
3355         Opc = AArch64::FRINTAv2f64;
3356       break;
3357     }
3358   }
3359 
3360   if (!Opc) {
3361     // Didn't get an opcode above, bail.
3362     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3363     return false;
3364   }
3365 
3366   // Legalization would have set us up perfectly for this; we just need to
3367   // set the opcode and move on.
3368   I.setDesc(TII.get(Opc));
3369   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3370 }
3371 
selectVectorICmp(MachineInstr & I,MachineRegisterInfo & MRI) const3372 bool AArch64InstructionSelector::selectVectorICmp(
3373     MachineInstr &I, MachineRegisterInfo &MRI) const {
3374   Register DstReg = I.getOperand(0).getReg();
3375   LLT DstTy = MRI.getType(DstReg);
3376   Register SrcReg = I.getOperand(2).getReg();
3377   Register Src2Reg = I.getOperand(3).getReg();
3378   LLT SrcTy = MRI.getType(SrcReg);
3379 
3380   unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3381   unsigned NumElts = DstTy.getNumElements();
3382 
3383   // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3384   // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3385   // Third index is cc opcode:
3386   // 0 == eq
3387   // 1 == ugt
3388   // 2 == uge
3389   // 3 == ult
3390   // 4 == ule
3391   // 5 == sgt
3392   // 6 == sge
3393   // 7 == slt
3394   // 8 == sle
3395   // ne is done by negating 'eq' result.
3396 
3397   // This table below assumes that for some comparisons the operands will be
3398   // commuted.
3399   // ult op == commute + ugt op
3400   // ule op == commute + uge op
3401   // slt op == commute + sgt op
3402   // sle op == commute + sge op
3403   unsigned PredIdx = 0;
3404   bool SwapOperands = false;
3405   CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3406   switch (Pred) {
3407   case CmpInst::ICMP_NE:
3408   case CmpInst::ICMP_EQ:
3409     PredIdx = 0;
3410     break;
3411   case CmpInst::ICMP_UGT:
3412     PredIdx = 1;
3413     break;
3414   case CmpInst::ICMP_UGE:
3415     PredIdx = 2;
3416     break;
3417   case CmpInst::ICMP_ULT:
3418     PredIdx = 3;
3419     SwapOperands = true;
3420     break;
3421   case CmpInst::ICMP_ULE:
3422     PredIdx = 4;
3423     SwapOperands = true;
3424     break;
3425   case CmpInst::ICMP_SGT:
3426     PredIdx = 5;
3427     break;
3428   case CmpInst::ICMP_SGE:
3429     PredIdx = 6;
3430     break;
3431   case CmpInst::ICMP_SLT:
3432     PredIdx = 7;
3433     SwapOperands = true;
3434     break;
3435   case CmpInst::ICMP_SLE:
3436     PredIdx = 8;
3437     SwapOperands = true;
3438     break;
3439   default:
3440     llvm_unreachable("Unhandled icmp predicate");
3441     return false;
3442   }
3443 
3444   // This table obviously should be tablegen'd when we have our GISel native
3445   // tablegen selector.
3446 
3447   static const unsigned OpcTable[4][4][9] = {
3448       {
3449           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3450            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3451            0 /* invalid */},
3452           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3453            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3454            0 /* invalid */},
3455           {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3456            AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3457            AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3458           {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3459            AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3460            AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3461       },
3462       {
3463           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3464            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3465            0 /* invalid */},
3466           {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3467            AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3468            AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3469           {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3470            AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3471            AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3472           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3473            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3474            0 /* invalid */}
3475       },
3476       {
3477           {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3478            AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3479            AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3480           {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3481            AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3482            AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3483           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3484            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3485            0 /* invalid */},
3486           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3487            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3488            0 /* invalid */}
3489       },
3490       {
3491           {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3492            AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3493            AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3494           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3495            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3496            0 /* invalid */},
3497           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3498            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3499            0 /* invalid */},
3500           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3501            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3502            0 /* invalid */}
3503       },
3504   };
3505   unsigned EltIdx = Log2_32(SrcEltSize / 8);
3506   unsigned NumEltsIdx = Log2_32(NumElts / 2);
3507   unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3508   if (!Opc) {
3509     LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3510     return false;
3511   }
3512 
3513   const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3514   const TargetRegisterClass *SrcRC =
3515       getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3516   if (!SrcRC) {
3517     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3518     return false;
3519   }
3520 
3521   unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3522   if (SrcTy.getSizeInBits() == 128)
3523     NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3524 
3525   if (SwapOperands)
3526     std::swap(SrcReg, Src2Reg);
3527 
3528   MachineIRBuilder MIB(I);
3529   auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3530   constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3531 
3532   // Invert if we had a 'ne' cc.
3533   if (NotOpc) {
3534     Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3535     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3536   } else {
3537     MIB.buildCopy(DstReg, Cmp.getReg(0));
3538   }
3539   RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3540   I.eraseFromParent();
3541   return true;
3542 }
3543 
emitScalarToVector(unsigned EltSize,const TargetRegisterClass * DstRC,Register Scalar,MachineIRBuilder & MIRBuilder) const3544 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3545     unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3546     MachineIRBuilder &MIRBuilder) const {
3547   auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3548 
3549   auto BuildFn = [&](unsigned SubregIndex) {
3550     auto Ins =
3551         MIRBuilder
3552             .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3553             .addImm(SubregIndex);
3554     constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3555     constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3556     return &*Ins;
3557   };
3558 
3559   switch (EltSize) {
3560   case 16:
3561     return BuildFn(AArch64::hsub);
3562   case 32:
3563     return BuildFn(AArch64::ssub);
3564   case 64:
3565     return BuildFn(AArch64::dsub);
3566   default:
3567     return nullptr;
3568   }
3569 }
3570 
selectMergeValues(MachineInstr & I,MachineRegisterInfo & MRI) const3571 bool AArch64InstructionSelector::selectMergeValues(
3572     MachineInstr &I, MachineRegisterInfo &MRI) const {
3573   assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3574   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3575   const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3576   assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3577   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3578 
3579   if (I.getNumOperands() != 3)
3580     return false;
3581 
3582   // Merging 2 s64s into an s128.
3583   if (DstTy == LLT::scalar(128)) {
3584     if (SrcTy.getSizeInBits() != 64)
3585       return false;
3586     MachineIRBuilder MIB(I);
3587     Register DstReg = I.getOperand(0).getReg();
3588     Register Src1Reg = I.getOperand(1).getReg();
3589     Register Src2Reg = I.getOperand(2).getReg();
3590     auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3591     MachineInstr *InsMI =
3592         emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3593     if (!InsMI)
3594       return false;
3595     MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3596                                           Src2Reg, /* LaneIdx */ 1, RB, MIB);
3597     if (!Ins2MI)
3598       return false;
3599     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3600     constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3601     I.eraseFromParent();
3602     return true;
3603   }
3604 
3605   if (RB.getID() != AArch64::GPRRegBankID)
3606     return false;
3607 
3608   if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3609     return false;
3610 
3611   auto *DstRC = &AArch64::GPR64RegClass;
3612   Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3613   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3614                                     TII.get(TargetOpcode::SUBREG_TO_REG))
3615                                 .addDef(SubToRegDef)
3616                                 .addImm(0)
3617                                 .addUse(I.getOperand(1).getReg())
3618                                 .addImm(AArch64::sub_32);
3619   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3620   // Need to anyext the second scalar before we can use bfm
3621   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3622                                     TII.get(TargetOpcode::SUBREG_TO_REG))
3623                                 .addDef(SubToRegDef2)
3624                                 .addImm(0)
3625                                 .addUse(I.getOperand(2).getReg())
3626                                 .addImm(AArch64::sub_32);
3627   MachineInstr &BFM =
3628       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3629            .addDef(I.getOperand(0).getReg())
3630            .addUse(SubToRegDef)
3631            .addUse(SubToRegDef2)
3632            .addImm(32)
3633            .addImm(31);
3634   constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3635   constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3636   constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3637   I.eraseFromParent();
3638   return true;
3639 }
3640 
getLaneCopyOpcode(unsigned & CopyOpc,unsigned & ExtractSubReg,const unsigned EltSize)3641 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3642                               const unsigned EltSize) {
3643   // Choose a lane copy opcode and subregister based off of the size of the
3644   // vector's elements.
3645   switch (EltSize) {
3646   case 16:
3647     CopyOpc = AArch64::CPYi16;
3648     ExtractSubReg = AArch64::hsub;
3649     break;
3650   case 32:
3651     CopyOpc = AArch64::CPYi32;
3652     ExtractSubReg = AArch64::ssub;
3653     break;
3654   case 64:
3655     CopyOpc = AArch64::CPYi64;
3656     ExtractSubReg = AArch64::dsub;
3657     break;
3658   default:
3659     // Unknown size, bail out.
3660     LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3661     return false;
3662   }
3663   return true;
3664 }
3665 
emitExtractVectorElt(Optional<Register> DstReg,const RegisterBank & DstRB,LLT ScalarTy,Register VecReg,unsigned LaneIdx,MachineIRBuilder & MIRBuilder) const3666 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3667     Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3668     Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3669   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3670   unsigned CopyOpc = 0;
3671   unsigned ExtractSubReg = 0;
3672   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3673     LLVM_DEBUG(
3674         dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3675     return nullptr;
3676   }
3677 
3678   const TargetRegisterClass *DstRC =
3679       getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3680   if (!DstRC) {
3681     LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3682     return nullptr;
3683   }
3684 
3685   const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3686   const LLT &VecTy = MRI.getType(VecReg);
3687   const TargetRegisterClass *VecRC =
3688       getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3689   if (!VecRC) {
3690     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3691     return nullptr;
3692   }
3693 
3694   // The register that we're going to copy into.
3695   Register InsertReg = VecReg;
3696   if (!DstReg)
3697     DstReg = MRI.createVirtualRegister(DstRC);
3698   // If the lane index is 0, we just use a subregister COPY.
3699   if (LaneIdx == 0) {
3700     auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3701                     .addReg(VecReg, 0, ExtractSubReg);
3702     RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3703     return &*Copy;
3704   }
3705 
3706   // Lane copies require 128-bit wide registers. If we're dealing with an
3707   // unpacked vector, then we need to move up to that width. Insert an implicit
3708   // def and a subregister insert to get us there.
3709   if (VecTy.getSizeInBits() != 128) {
3710     MachineInstr *ScalarToVector = emitScalarToVector(
3711         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3712     if (!ScalarToVector)
3713       return nullptr;
3714     InsertReg = ScalarToVector->getOperand(0).getReg();
3715   }
3716 
3717   MachineInstr *LaneCopyMI =
3718       MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3719   constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3720 
3721   // Make sure that we actually constrain the initial copy.
3722   RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3723   return LaneCopyMI;
3724 }
3725 
selectExtractElt(MachineInstr & I,MachineRegisterInfo & MRI) const3726 bool AArch64InstructionSelector::selectExtractElt(
3727     MachineInstr &I, MachineRegisterInfo &MRI) const {
3728   assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3729          "unexpected opcode!");
3730   Register DstReg = I.getOperand(0).getReg();
3731   const LLT NarrowTy = MRI.getType(DstReg);
3732   const Register SrcReg = I.getOperand(1).getReg();
3733   const LLT WideTy = MRI.getType(SrcReg);
3734   (void)WideTy;
3735   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3736          "source register size too small!");
3737   assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3738 
3739   // Need the lane index to determine the correct copy opcode.
3740   MachineOperand &LaneIdxOp = I.getOperand(2);
3741   assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3742 
3743   if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3744     LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3745     return false;
3746   }
3747 
3748   // Find the index to extract from.
3749   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3750   if (!VRegAndVal)
3751     return false;
3752   unsigned LaneIdx = VRegAndVal->Value;
3753 
3754   MachineIRBuilder MIRBuilder(I);
3755 
3756   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3757   MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3758                                                LaneIdx, MIRBuilder);
3759   if (!Extract)
3760     return false;
3761 
3762   I.eraseFromParent();
3763   return true;
3764 }
3765 
selectSplitVectorUnmerge(MachineInstr & I,MachineRegisterInfo & MRI) const3766 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3767     MachineInstr &I, MachineRegisterInfo &MRI) const {
3768   unsigned NumElts = I.getNumOperands() - 1;
3769   Register SrcReg = I.getOperand(NumElts).getReg();
3770   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3771   const LLT SrcTy = MRI.getType(SrcReg);
3772 
3773   assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3774   if (SrcTy.getSizeInBits() > 128) {
3775     LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3776     return false;
3777   }
3778 
3779   MachineIRBuilder MIB(I);
3780 
3781   // We implement a split vector operation by treating the sub-vectors as
3782   // scalars and extracting them.
3783   const RegisterBank &DstRB =
3784       *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3785   for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3786     Register Dst = I.getOperand(OpIdx).getReg();
3787     MachineInstr *Extract =
3788         emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3789     if (!Extract)
3790       return false;
3791   }
3792   I.eraseFromParent();
3793   return true;
3794 }
3795 
selectUnmergeValues(MachineInstr & I,MachineRegisterInfo & MRI) const3796 bool AArch64InstructionSelector::selectUnmergeValues(
3797     MachineInstr &I, MachineRegisterInfo &MRI) const {
3798   assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3799          "unexpected opcode");
3800 
3801   // TODO: Handle unmerging into GPRs and from scalars to scalars.
3802   if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3803           AArch64::FPRRegBankID ||
3804       RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3805           AArch64::FPRRegBankID) {
3806     LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3807                          "currently unsupported.\n");
3808     return false;
3809   }
3810 
3811   // The last operand is the vector source register, and every other operand is
3812   // a register to unpack into.
3813   unsigned NumElts = I.getNumOperands() - 1;
3814   Register SrcReg = I.getOperand(NumElts).getReg();
3815   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3816   const LLT WideTy = MRI.getType(SrcReg);
3817   (void)WideTy;
3818   assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3819          "can only unmerge from vector or s128 types!");
3820   assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3821          "source register size too small!");
3822 
3823   if (!NarrowTy.isScalar())
3824     return selectSplitVectorUnmerge(I, MRI);
3825 
3826   MachineIRBuilder MIB(I);
3827 
3828   // Choose a lane copy opcode and subregister based off of the size of the
3829   // vector's elements.
3830   unsigned CopyOpc = 0;
3831   unsigned ExtractSubReg = 0;
3832   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3833     return false;
3834 
3835   // Set up for the lane copies.
3836   MachineBasicBlock &MBB = *I.getParent();
3837 
3838   // Stores the registers we'll be copying from.
3839   SmallVector<Register, 4> InsertRegs;
3840 
3841   // We'll use the first register twice, so we only need NumElts-1 registers.
3842   unsigned NumInsertRegs = NumElts - 1;
3843 
3844   // If our elements fit into exactly 128 bits, then we can copy from the source
3845   // directly. Otherwise, we need to do a bit of setup with some subregister
3846   // inserts.
3847   if (NarrowTy.getSizeInBits() * NumElts == 128) {
3848     InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3849   } else {
3850     // No. We have to perform subregister inserts. For each insert, create an
3851     // implicit def and a subregister insert, and save the register we create.
3852     for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3853       Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3854       MachineInstr &ImpDefMI =
3855           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3856                    ImpDefReg);
3857 
3858       // Now, create the subregister insert from SrcReg.
3859       Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3860       MachineInstr &InsMI =
3861           *BuildMI(MBB, I, I.getDebugLoc(),
3862                    TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3863                .addUse(ImpDefReg)
3864                .addUse(SrcReg)
3865                .addImm(AArch64::dsub);
3866 
3867       constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3868       constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3869 
3870       // Save the register so that we can copy from it after.
3871       InsertRegs.push_back(InsertReg);
3872     }
3873   }
3874 
3875   // Now that we've created any necessary subregister inserts, we can
3876   // create the copies.
3877   //
3878   // Perform the first copy separately as a subregister copy.
3879   Register CopyTo = I.getOperand(0).getReg();
3880   auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3881                        .addReg(InsertRegs[0], 0, ExtractSubReg);
3882   constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3883 
3884   // Now, perform the remaining copies as vector lane copies.
3885   unsigned LaneIdx = 1;
3886   for (Register InsReg : InsertRegs) {
3887     Register CopyTo = I.getOperand(LaneIdx).getReg();
3888     MachineInstr &CopyInst =
3889         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3890              .addUse(InsReg)
3891              .addImm(LaneIdx);
3892     constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3893     ++LaneIdx;
3894   }
3895 
3896   // Separately constrain the first copy's destination. Because of the
3897   // limitation in constrainOperandRegClass, we can't guarantee that this will
3898   // actually be constrained. So, do it ourselves using the second operand.
3899   const TargetRegisterClass *RC =
3900       MRI.getRegClassOrNull(I.getOperand(1).getReg());
3901   if (!RC) {
3902     LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3903     return false;
3904   }
3905 
3906   RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3907   I.eraseFromParent();
3908   return true;
3909 }
3910 
selectConcatVectors(MachineInstr & I,MachineRegisterInfo & MRI) const3911 bool AArch64InstructionSelector::selectConcatVectors(
3912     MachineInstr &I, MachineRegisterInfo &MRI) const {
3913   assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3914          "Unexpected opcode");
3915   Register Dst = I.getOperand(0).getReg();
3916   Register Op1 = I.getOperand(1).getReg();
3917   Register Op2 = I.getOperand(2).getReg();
3918   MachineIRBuilder MIRBuilder(I);
3919   MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3920   if (!ConcatMI)
3921     return false;
3922   I.eraseFromParent();
3923   return true;
3924 }
3925 
3926 unsigned
emitConstantPoolEntry(const Constant * CPVal,MachineFunction & MF) const3927 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3928                                                   MachineFunction &MF) const {
3929   Type *CPTy = CPVal->getType();
3930   Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3931 
3932   MachineConstantPool *MCP = MF.getConstantPool();
3933   return MCP->getConstantPoolIndex(CPVal, Alignment);
3934 }
3935 
emitLoadFromConstantPool(const Constant * CPVal,MachineIRBuilder & MIRBuilder) const3936 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3937     const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3938   unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3939 
3940   auto Adrp =
3941       MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3942           .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3943 
3944   MachineInstr *LoadMI = nullptr;
3945   switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3946   case 16:
3947     LoadMI =
3948         &*MIRBuilder
3949               .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3950               .addConstantPoolIndex(CPIdx, 0,
3951                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3952     break;
3953   case 8:
3954     LoadMI = &*MIRBuilder
3955                  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3956                  .addConstantPoolIndex(
3957                      CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3958     break;
3959   default:
3960     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3961                       << *CPVal->getType());
3962     return nullptr;
3963   }
3964   constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3965   constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3966   return LoadMI;
3967 }
3968 
3969 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3970 /// size and RB.
3971 static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank & RB,unsigned EltSize)3972 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3973   unsigned Opc, SubregIdx;
3974   if (RB.getID() == AArch64::GPRRegBankID) {
3975     if (EltSize == 16) {
3976       Opc = AArch64::INSvi16gpr;
3977       SubregIdx = AArch64::ssub;
3978     } else if (EltSize == 32) {
3979       Opc = AArch64::INSvi32gpr;
3980       SubregIdx = AArch64::ssub;
3981     } else if (EltSize == 64) {
3982       Opc = AArch64::INSvi64gpr;
3983       SubregIdx = AArch64::dsub;
3984     } else {
3985       llvm_unreachable("invalid elt size!");
3986     }
3987   } else {
3988     if (EltSize == 8) {
3989       Opc = AArch64::INSvi8lane;
3990       SubregIdx = AArch64::bsub;
3991     } else if (EltSize == 16) {
3992       Opc = AArch64::INSvi16lane;
3993       SubregIdx = AArch64::hsub;
3994     } else if (EltSize == 32) {
3995       Opc = AArch64::INSvi32lane;
3996       SubregIdx = AArch64::ssub;
3997     } else if (EltSize == 64) {
3998       Opc = AArch64::INSvi64lane;
3999       SubregIdx = AArch64::dsub;
4000     } else {
4001       llvm_unreachable("invalid elt size!");
4002     }
4003   }
4004   return std::make_pair(Opc, SubregIdx);
4005 }
4006 
emitInstr(unsigned Opcode,std::initializer_list<llvm::DstOp> DstOps,std::initializer_list<llvm::SrcOp> SrcOps,MachineIRBuilder & MIRBuilder,const ComplexRendererFns & RenderFns) const4007 MachineInstr *AArch64InstructionSelector::emitInstr(
4008     unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4009     std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4010     const ComplexRendererFns &RenderFns) const {
4011   assert(Opcode && "Expected an opcode?");
4012   assert(!isPreISelGenericOpcode(Opcode) &&
4013          "Function should only be used to produce selected instructions!");
4014   auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4015   if (RenderFns)
4016     for (auto &Fn : *RenderFns)
4017       Fn(MI);
4018   constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4019   return &*MI;
4020 }
4021 
emitAddSub(const std::array<std::array<unsigned,2>,5> & AddrModeAndSizeToOpcode,Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4022 MachineInstr *AArch64InstructionSelector::emitAddSub(
4023     const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4024     Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4025     MachineIRBuilder &MIRBuilder) const {
4026   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4027   assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4028   auto Ty = MRI.getType(LHS.getReg());
4029   assert(!Ty.isVector() && "Expected a scalar or pointer?");
4030   unsigned Size = Ty.getSizeInBits();
4031   assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4032   bool Is32Bit = Size == 32;
4033 
4034   // INSTRri form with positive arithmetic immediate.
4035   if (auto Fns = selectArithImmed(RHS))
4036     return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4037                      MIRBuilder, Fns);
4038 
4039   // INSTRri form with negative arithmetic immediate.
4040   if (auto Fns = selectNegArithImmed(RHS))
4041     return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4042                      MIRBuilder, Fns);
4043 
4044   // INSTRrx form.
4045   if (auto Fns = selectArithExtendedRegister(RHS))
4046     return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4047                      MIRBuilder, Fns);
4048 
4049   // INSTRrs form.
4050   if (auto Fns = selectShiftedRegister(RHS))
4051     return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4052                      MIRBuilder, Fns);
4053   return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4054                    MIRBuilder);
4055 }
4056 
4057 MachineInstr *
emitADD(Register DefReg,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4058 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4059                                     MachineOperand &RHS,
4060                                     MachineIRBuilder &MIRBuilder) const {
4061   const std::array<std::array<unsigned, 2>, 5> OpcTable{
4062       {{AArch64::ADDXri, AArch64::ADDWri},
4063        {AArch64::ADDXrs, AArch64::ADDWrs},
4064        {AArch64::ADDXrr, AArch64::ADDWrr},
4065        {AArch64::SUBXri, AArch64::SUBWri},
4066        {AArch64::ADDXrx, AArch64::ADDWrx}}};
4067   return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4068 }
4069 
4070 MachineInstr *
emitADDS(Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4071 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4072                                      MachineOperand &RHS,
4073                                      MachineIRBuilder &MIRBuilder) const {
4074   const std::array<std::array<unsigned, 2>, 5> OpcTable{
4075       {{AArch64::ADDSXri, AArch64::ADDSWri},
4076        {AArch64::ADDSXrs, AArch64::ADDSWrs},
4077        {AArch64::ADDSXrr, AArch64::ADDSWrr},
4078        {AArch64::SUBSXri, AArch64::SUBSWri},
4079        {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4080   return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4081 }
4082 
4083 MachineInstr *
emitSUBS(Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4084 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4085                                      MachineOperand &RHS,
4086                                      MachineIRBuilder &MIRBuilder) const {
4087   const std::array<std::array<unsigned, 2>, 5> OpcTable{
4088       {{AArch64::SUBSXri, AArch64::SUBSWri},
4089        {AArch64::SUBSXrs, AArch64::SUBSWrs},
4090        {AArch64::SUBSXrr, AArch64::SUBSWrr},
4091        {AArch64::ADDSXri, AArch64::ADDSWri},
4092        {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4093   return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4094 }
4095 
4096 MachineInstr *
emitCMN(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4097 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4098                                     MachineIRBuilder &MIRBuilder) const {
4099   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4100   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4101   auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4102   return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4103 }
4104 
4105 MachineInstr *
emitTST(MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4106 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4107                                     MachineIRBuilder &MIRBuilder) const {
4108   assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4109   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4110   LLT Ty = MRI.getType(LHS.getReg());
4111   unsigned RegSize = Ty.getSizeInBits();
4112   bool Is32Bit = (RegSize == 32);
4113   const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4114                                    {AArch64::ANDSXrs, AArch64::ANDSWrs},
4115                                    {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4116   // ANDS needs a logical immediate for its immediate form. Check if we can
4117   // fold one in.
4118   if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4119     if (AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize)) {
4120       auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4121       TstMI.addImm(
4122           AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
4123       constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4124       return &*TstMI;
4125     }
4126   }
4127 
4128   if (auto Fns = selectLogicalShiftedRegister(RHS))
4129     return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4130   return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4131 }
4132 
emitIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const4133 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4134     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4135     MachineIRBuilder &MIRBuilder) const {
4136   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4137   assert(Predicate.isPredicate() && "Expected predicate?");
4138   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4139   LLT CmpTy = MRI.getType(LHS.getReg());
4140   assert(!CmpTy.isVector() && "Expected scalar or pointer");
4141   unsigned Size = CmpTy.getSizeInBits();
4142   (void)Size;
4143   assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4144   // Fold the compare into a cmn or tst if possible.
4145   if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4146     return FoldCmp;
4147   auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4148   return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4149 }
4150 
emitCSetForFCmp(Register Dst,CmpInst::Predicate Pred,MachineIRBuilder & MIRBuilder) const4151 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4152     Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4153   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4154 #ifndef NDEBUG
4155   LLT Ty = MRI.getType(Dst);
4156   assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4157          "Expected a 32-bit scalar register?");
4158 #endif
4159   const Register ZeroReg = AArch64::WZR;
4160   auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4161     auto CSet =
4162         MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4163             .addImm(getInvertedCondCode(CC));
4164     constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4165     return &*CSet;
4166   };
4167 
4168   AArch64CC::CondCode CC1, CC2;
4169   changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4170   if (CC2 == AArch64CC::AL)
4171     return EmitCSet(Dst, CC1);
4172 
4173   const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4174   Register Def1Reg = MRI.createVirtualRegister(RC);
4175   Register Def2Reg = MRI.createVirtualRegister(RC);
4176   EmitCSet(Def1Reg, CC1);
4177   EmitCSet(Def2Reg, CC2);
4178   auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4179   constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4180   return &*OrMI;
4181 }
4182 
4183 MachineInstr *
emitFPCompare(Register LHS,Register RHS,MachineIRBuilder & MIRBuilder) const4184 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4185                                           MachineIRBuilder &MIRBuilder) const {
4186   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4187   LLT Ty = MRI.getType(LHS);
4188   if (Ty.isVector())
4189     return nullptr;
4190   unsigned OpSize = Ty.getSizeInBits();
4191   if (OpSize != 32 && OpSize != 64)
4192     return nullptr;
4193 
4194   // If this is a compare against +0.0, then we don't have
4195   // to explicitly materialize a constant.
4196   const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4197   bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4198   unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4199                               {AArch64::FCMPSri, AArch64::FCMPDri}};
4200   unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4201 
4202   // Partially build the compare. Decide if we need to add a use for the
4203   // third operand based off whether or not we're comparing against 0.0.
4204   auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4205   if (!ShouldUseImm)
4206     CmpMI.addUse(RHS);
4207   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4208   return &*CmpMI;
4209 }
4210 
emitVectorConcat(Optional<Register> Dst,Register Op1,Register Op2,MachineIRBuilder & MIRBuilder) const4211 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4212     Optional<Register> Dst, Register Op1, Register Op2,
4213     MachineIRBuilder &MIRBuilder) const {
4214   // We implement a vector concat by:
4215   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4216   // 2. Insert the upper vector into the destination's upper element
4217   // TODO: some of this code is common with G_BUILD_VECTOR handling.
4218   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4219 
4220   const LLT Op1Ty = MRI.getType(Op1);
4221   const LLT Op2Ty = MRI.getType(Op2);
4222 
4223   if (Op1Ty != Op2Ty) {
4224     LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4225     return nullptr;
4226   }
4227   assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4228 
4229   if (Op1Ty.getSizeInBits() >= 128) {
4230     LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4231     return nullptr;
4232   }
4233 
4234   // At the moment we just support 64 bit vector concats.
4235   if (Op1Ty.getSizeInBits() != 64) {
4236     LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4237     return nullptr;
4238   }
4239 
4240   const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4241   const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4242   const TargetRegisterClass *DstRC =
4243       getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4244 
4245   MachineInstr *WidenedOp1 =
4246       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4247   MachineInstr *WidenedOp2 =
4248       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4249   if (!WidenedOp1 || !WidenedOp2) {
4250     LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4251     return nullptr;
4252   }
4253 
4254   // Now do the insert of the upper element.
4255   unsigned InsertOpc, InsSubRegIdx;
4256   std::tie(InsertOpc, InsSubRegIdx) =
4257       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4258 
4259   if (!Dst)
4260     Dst = MRI.createVirtualRegister(DstRC);
4261   auto InsElt =
4262       MIRBuilder
4263           .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4264           .addImm(1) /* Lane index */
4265           .addUse(WidenedOp2->getOperand(0).getReg())
4266           .addImm(0);
4267   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4268   return &*InsElt;
4269 }
4270 
emitFMovForFConstant(MachineInstr & I,MachineRegisterInfo & MRI) const4271 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
4272     MachineInstr &I, MachineRegisterInfo &MRI) const {
4273   assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
4274          "Expected a G_FCONSTANT!");
4275   MachineOperand &ImmOp = I.getOperand(1);
4276   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
4277 
4278   // Only handle 32 and 64 bit defs for now.
4279   if (DefSize != 32 && DefSize != 64)
4280     return nullptr;
4281 
4282   // Don't handle null values using FMOV.
4283   if (ImmOp.getFPImm()->isNullValue())
4284     return nullptr;
4285 
4286   // Get the immediate representation for the FMOV.
4287   const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
4288   int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
4289                           : AArch64_AM::getFP64Imm(ImmValAPF);
4290 
4291   // If this is -1, it means the immediate can't be represented as the requested
4292   // floating point value. Bail.
4293   if (Imm == -1)
4294     return nullptr;
4295 
4296   // Update MI to represent the new FMOV instruction, constrain it, and return.
4297   ImmOp.ChangeToImmediate(Imm);
4298   unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
4299   I.setDesc(TII.get(MovOpc));
4300   constrainSelectedInstRegOperands(I, TII, TRI, RBI);
4301   return &I;
4302 }
4303 
4304 MachineInstr *
emitCSetForICMP(Register DefReg,unsigned Pred,MachineIRBuilder & MIRBuilder) const4305 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4306                                      MachineIRBuilder &MIRBuilder) const {
4307   // CSINC increments the result when the predicate is false. Invert it.
4308   const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4309       CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4310   auto I =
4311       MIRBuilder
4312     .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
4313           .addImm(InvCC);
4314   constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4315   return &*I;
4316 }
4317 
4318 std::pair<MachineInstr *, AArch64CC::CondCode>
emitOverflowOp(unsigned Opcode,Register Dst,MachineOperand & LHS,MachineOperand & RHS,MachineIRBuilder & MIRBuilder) const4319 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4320                                            MachineOperand &LHS,
4321                                            MachineOperand &RHS,
4322                                            MachineIRBuilder &MIRBuilder) const {
4323   switch (Opcode) {
4324   default:
4325     llvm_unreachable("Unexpected opcode!");
4326   case TargetOpcode::G_SADDO:
4327     return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4328   case TargetOpcode::G_UADDO:
4329     return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4330   case TargetOpcode::G_SSUBO:
4331     return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4332   }
4333 }
4334 
tryOptSelect(MachineInstr & I) const4335 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
4336   MachineIRBuilder MIB(I);
4337   MachineRegisterInfo &MRI = *MIB.getMRI();
4338   // We want to recognize this pattern:
4339   //
4340   // $z = G_FCMP pred, $x, $y
4341   // ...
4342   // $w = G_SELECT $z, $a, $b
4343   //
4344   // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4345   // some copies/truncs in between.)
4346   //
4347   // If we see this, then we can emit something like this:
4348   //
4349   // fcmp $x, $y
4350   // fcsel $w, $a, $b, pred
4351   //
4352   // Rather than emitting both of the rather long sequences in the standard
4353   // G_FCMP/G_SELECT select methods.
4354 
4355   // First, check if the condition is defined by a compare.
4356   MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4357   while (CondDef) {
4358     // We can only fold if all of the defs have one use.
4359     Register CondDefReg = CondDef->getOperand(0).getReg();
4360     if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4361       // Unless it's another select.
4362       for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4363         if (CondDef == &UI)
4364           continue;
4365         if (UI.getOpcode() != TargetOpcode::G_SELECT)
4366           return false;
4367       }
4368     }
4369 
4370     // We can skip over G_TRUNC since the condition is 1-bit.
4371     // Truncating/extending can have no impact on the value.
4372     unsigned Opc = CondDef->getOpcode();
4373     if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4374       break;
4375 
4376     // Can't see past copies from physregs.
4377     if (Opc == TargetOpcode::COPY &&
4378         Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4379       return false;
4380 
4381     CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4382   }
4383 
4384   // Is the condition defined by a compare?
4385   if (!CondDef)
4386     return false;
4387 
4388   unsigned CondOpc = CondDef->getOpcode();
4389   if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4390     return false;
4391 
4392   AArch64CC::CondCode CondCode;
4393   if (CondOpc == TargetOpcode::G_ICMP) {
4394     auto Pred =
4395         static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4396     CondCode = changeICMPPredToAArch64CC(Pred);
4397     emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4398                        CondDef->getOperand(1), MIB);
4399   } else {
4400     // Get the condition code for the select.
4401     auto Pred =
4402         static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4403     AArch64CC::CondCode CondCode2;
4404     changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4405 
4406     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4407     // instructions to emit the comparison.
4408     // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4409     // unnecessary.
4410     if (CondCode2 != AArch64CC::AL)
4411       return false;
4412 
4413     if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4414                        CondDef->getOperand(3).getReg(), MIB)) {
4415       LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4416       return false;
4417     }
4418   }
4419 
4420   // Emit the select.
4421   emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4422              I.getOperand(3).getReg(), CondCode, MIB);
4423   I.eraseFromParent();
4424   return true;
4425 }
4426 
tryFoldIntegerCompare(MachineOperand & LHS,MachineOperand & RHS,MachineOperand & Predicate,MachineIRBuilder & MIRBuilder) const4427 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4428     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4429     MachineIRBuilder &MIRBuilder) const {
4430   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4431          "Unexpected MachineOperand");
4432   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4433   // We want to find this sort of thing:
4434   // x = G_SUB 0, y
4435   // G_ICMP z, x
4436   //
4437   // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4438   // e.g:
4439   //
4440   // cmn z, y
4441 
4442   // Helper lambda to detect the subtract followed by the compare.
4443   // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4444   auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4445     if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4446       return false;
4447 
4448     // Need to make sure NZCV is the same at the end of the transformation.
4449     if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4450       return false;
4451 
4452     // We want to match against SUBs.
4453     if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4454       return false;
4455 
4456     // Make sure that we're getting
4457     // x = G_SUB 0, y
4458     auto ValAndVReg =
4459         getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4460     if (!ValAndVReg || ValAndVReg->Value != 0)
4461       return false;
4462 
4463     // This can safely be represented as a CMN.
4464     return true;
4465   };
4466 
4467   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4468   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4469   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4470   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4471   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4472 
4473   // Given this:
4474   //
4475   // x = G_SUB 0, y
4476   // G_ICMP x, z
4477   //
4478   // Produce this:
4479   //
4480   // cmn y, z
4481   if (IsCMN(LHSDef, CC))
4482     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4483 
4484   // Same idea here, but with the RHS of the compare instead:
4485   //
4486   // Given this:
4487   //
4488   // x = G_SUB 0, y
4489   // G_ICMP z, x
4490   //
4491   // Produce this:
4492   //
4493   // cmn z, y
4494   if (IsCMN(RHSDef, CC))
4495     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4496 
4497   // Given this:
4498   //
4499   // z = G_AND x, y
4500   // G_ICMP z, 0
4501   //
4502   // Produce this if the compare is signed:
4503   //
4504   // tst x, y
4505   if (!CmpInst::isUnsigned(P) && LHSDef &&
4506       LHSDef->getOpcode() == TargetOpcode::G_AND) {
4507     // Make sure that the RHS is 0.
4508     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4509     if (!ValAndVReg || ValAndVReg->Value != 0)
4510       return nullptr;
4511 
4512     return emitTST(LHSDef->getOperand(1),
4513                    LHSDef->getOperand(2), MIRBuilder);
4514   }
4515 
4516   return nullptr;
4517 }
4518 
selectShuffleVector(MachineInstr & I,MachineRegisterInfo & MRI) const4519 bool AArch64InstructionSelector::selectShuffleVector(
4520     MachineInstr &I, MachineRegisterInfo &MRI) const {
4521   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4522   Register Src1Reg = I.getOperand(1).getReg();
4523   const LLT Src1Ty = MRI.getType(Src1Reg);
4524   Register Src2Reg = I.getOperand(2).getReg();
4525   const LLT Src2Ty = MRI.getType(Src2Reg);
4526   ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4527 
4528   MachineBasicBlock &MBB = *I.getParent();
4529   MachineFunction &MF = *MBB.getParent();
4530   LLVMContext &Ctx = MF.getFunction().getContext();
4531 
4532   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4533   // it's originated from a <1 x T> type. Those should have been lowered into
4534   // G_BUILD_VECTOR earlier.
4535   if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4536     LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4537     return false;
4538   }
4539 
4540   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4541 
4542   SmallVector<Constant *, 64> CstIdxs;
4543   for (int Val : Mask) {
4544     // For now, any undef indexes we'll just assume to be 0. This should be
4545     // optimized in future, e.g. to select DUP etc.
4546     Val = Val < 0 ? 0 : Val;
4547     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4548       unsigned Offset = Byte + Val * BytesPerElt;
4549       CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4550     }
4551   }
4552 
4553   MachineIRBuilder MIRBuilder(I);
4554 
4555   // Use a constant pool to load the index vector for TBL.
4556   Constant *CPVal = ConstantVector::get(CstIdxs);
4557   MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4558   if (!IndexLoad) {
4559     LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4560     return false;
4561   }
4562 
4563   if (DstTy.getSizeInBits() != 128) {
4564     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4565     // This case can be done with TBL1.
4566     MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4567     if (!Concat) {
4568       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4569       return false;
4570     }
4571 
4572     // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4573     IndexLoad =
4574         emitScalarToVector(64, &AArch64::FPR128RegClass,
4575                            IndexLoad->getOperand(0).getReg(), MIRBuilder);
4576 
4577     auto TBL1 = MIRBuilder.buildInstr(
4578         AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4579         {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4580     constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4581 
4582     auto Copy =
4583         MIRBuilder
4584             .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4585             .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4586     RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4587     I.eraseFromParent();
4588     return true;
4589   }
4590 
4591   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4592   // Q registers for regalloc.
4593   auto RegSeq = MIRBuilder
4594                     .buildInstr(TargetOpcode::REG_SEQUENCE,
4595                                 {&AArch64::QQRegClass}, {Src1Reg})
4596                     .addImm(AArch64::qsub0)
4597                     .addUse(Src2Reg)
4598                     .addImm(AArch64::qsub1);
4599 
4600   auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4601                                     {RegSeq, IndexLoad->getOperand(0)});
4602   constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4603   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4604   I.eraseFromParent();
4605   return true;
4606 }
4607 
emitLaneInsert(Optional<Register> DstReg,Register SrcReg,Register EltReg,unsigned LaneIdx,const RegisterBank & RB,MachineIRBuilder & MIRBuilder) const4608 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4609     Optional<Register> DstReg, Register SrcReg, Register EltReg,
4610     unsigned LaneIdx, const RegisterBank &RB,
4611     MachineIRBuilder &MIRBuilder) const {
4612   MachineInstr *InsElt = nullptr;
4613   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4614   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4615 
4616   // Create a register to define with the insert if one wasn't passed in.
4617   if (!DstReg)
4618     DstReg = MRI.createVirtualRegister(DstRC);
4619 
4620   unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4621   unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4622 
4623   if (RB.getID() == AArch64::FPRRegBankID) {
4624     auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4625     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4626                  .addImm(LaneIdx)
4627                  .addUse(InsSub->getOperand(0).getReg())
4628                  .addImm(0);
4629   } else {
4630     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4631                  .addImm(LaneIdx)
4632                  .addUse(EltReg);
4633   }
4634 
4635   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4636   return InsElt;
4637 }
4638 
selectInsertElt(MachineInstr & I,MachineRegisterInfo & MRI) const4639 bool AArch64InstructionSelector::selectInsertElt(
4640     MachineInstr &I, MachineRegisterInfo &MRI) const {
4641   assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4642 
4643   // Get information on the destination.
4644   Register DstReg = I.getOperand(0).getReg();
4645   const LLT DstTy = MRI.getType(DstReg);
4646   unsigned VecSize = DstTy.getSizeInBits();
4647 
4648   // Get information on the element we want to insert into the destination.
4649   Register EltReg = I.getOperand(2).getReg();
4650   const LLT EltTy = MRI.getType(EltReg);
4651   unsigned EltSize = EltTy.getSizeInBits();
4652   if (EltSize < 16 || EltSize > 64)
4653     return false; // Don't support all element types yet.
4654 
4655   // Find the definition of the index. Bail out if it's not defined by a
4656   // G_CONSTANT.
4657   Register IdxReg = I.getOperand(3).getReg();
4658   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4659   if (!VRegAndVal)
4660     return false;
4661   unsigned LaneIdx = VRegAndVal->Value;
4662 
4663   // Perform the lane insert.
4664   Register SrcReg = I.getOperand(1).getReg();
4665   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4666   MachineIRBuilder MIRBuilder(I);
4667 
4668   if (VecSize < 128) {
4669     // If the vector we're inserting into is smaller than 128 bits, widen it
4670     // to 128 to do the insert.
4671     MachineInstr *ScalarToVec = emitScalarToVector(
4672         VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4673     if (!ScalarToVec)
4674       return false;
4675     SrcReg = ScalarToVec->getOperand(0).getReg();
4676   }
4677 
4678   // Create an insert into a new FPR128 register.
4679   // Note that if our vector is already 128 bits, we end up emitting an extra
4680   // register.
4681   MachineInstr *InsMI =
4682       emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4683 
4684   if (VecSize < 128) {
4685     // If we had to widen to perform the insert, then we have to demote back to
4686     // the original size to get the result we want.
4687     Register DemoteVec = InsMI->getOperand(0).getReg();
4688     const TargetRegisterClass *RC =
4689         getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4690     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4691       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4692       return false;
4693     }
4694     unsigned SubReg = 0;
4695     if (!getSubRegForClass(RC, TRI, SubReg))
4696       return false;
4697     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4698       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
4699                         << "\n");
4700       return false;
4701     }
4702     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4703         .addReg(DemoteVec, 0, SubReg);
4704     RBI.constrainGenericRegister(DstReg, *RC, MRI);
4705   } else {
4706     // No widening needed.
4707     InsMI->getOperand(0).setReg(DstReg);
4708     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4709   }
4710 
4711   I.eraseFromParent();
4712   return true;
4713 }
4714 
tryOptConstantBuildVec(MachineInstr & I,LLT DstTy,MachineRegisterInfo & MRI) const4715 bool AArch64InstructionSelector::tryOptConstantBuildVec(
4716     MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4717   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4718   unsigned DstSize = DstTy.getSizeInBits();
4719   assert(DstSize <= 128 && "Unexpected build_vec type!");
4720   if (DstSize < 32)
4721     return false;
4722   // Check if we're building a constant vector, in which case we want to
4723   // generate a constant pool load instead of a vector insert sequence.
4724   SmallVector<Constant *, 16> Csts;
4725   for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4726     // Try to find G_CONSTANT or G_FCONSTANT
4727     auto *OpMI =
4728         getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4729     if (OpMI)
4730       Csts.emplace_back(
4731           const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4732     else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4733                                   I.getOperand(Idx).getReg(), MRI)))
4734       Csts.emplace_back(
4735           const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4736     else
4737       return false;
4738   }
4739   Constant *CV = ConstantVector::get(Csts);
4740   MachineIRBuilder MIB(I);
4741   if (CV->isNullValue()) {
4742     // Until the importer can support immAllZerosV in pattern leaf nodes,
4743     // select a zero move manually here.
4744     Register DstReg = I.getOperand(0).getReg();
4745     if (DstSize == 128) {
4746       auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
4747       I.eraseFromParent();
4748       return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
4749     } else if (DstSize == 64) {
4750       auto Mov =
4751           MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4752               .addImm(0);
4753       MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4754           .addReg(Mov.getReg(0), 0, AArch64::dsub);
4755       I.eraseFromParent();
4756       return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
4757     }
4758   }
4759   auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4760   if (!CPLoad) {
4761     LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
4762     return false;
4763   }
4764   MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4765   RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4766                                *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4767                                MRI);
4768   I.eraseFromParent();
4769   return true;
4770 }
4771 
selectBuildVector(MachineInstr & I,MachineRegisterInfo & MRI) const4772 bool AArch64InstructionSelector::selectBuildVector(
4773     MachineInstr &I, MachineRegisterInfo &MRI) const {
4774   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4775   // Until we port more of the optimized selections, for now just use a vector
4776   // insert sequence.
4777   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4778   const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4779   unsigned EltSize = EltTy.getSizeInBits();
4780 
4781   if (tryOptConstantBuildVec(I, DstTy, MRI))
4782     return true;
4783   if (EltSize < 16 || EltSize > 64)
4784     return false; // Don't support all element types yet.
4785   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4786   MachineIRBuilder MIRBuilder(I);
4787 
4788   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4789   MachineInstr *ScalarToVec =
4790       emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4791                          I.getOperand(1).getReg(), MIRBuilder);
4792   if (!ScalarToVec)
4793     return false;
4794 
4795   Register DstVec = ScalarToVec->getOperand(0).getReg();
4796   unsigned DstSize = DstTy.getSizeInBits();
4797 
4798   // Keep track of the last MI we inserted. Later on, we might be able to save
4799   // a copy using it.
4800   MachineInstr *PrevMI = nullptr;
4801   for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4802     // Note that if we don't do a subregister copy, we can end up making an
4803     // extra register.
4804     PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4805                               MIRBuilder);
4806     DstVec = PrevMI->getOperand(0).getReg();
4807   }
4808 
4809   // If DstTy's size in bits is less than 128, then emit a subregister copy
4810   // from DstVec to the last register we've defined.
4811   if (DstSize < 128) {
4812     // Force this to be FPR using the destination vector.
4813     const TargetRegisterClass *RC =
4814         getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4815     if (!RC)
4816       return false;
4817     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4818       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4819       return false;
4820     }
4821 
4822     unsigned SubReg = 0;
4823     if (!getSubRegForClass(RC, TRI, SubReg))
4824       return false;
4825     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4826       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
4827                         << "\n");
4828       return false;
4829     }
4830 
4831     Register Reg = MRI.createVirtualRegister(RC);
4832     Register DstReg = I.getOperand(0).getReg();
4833 
4834     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4835         .addReg(DstVec, 0, SubReg);
4836     MachineOperand &RegOp = I.getOperand(1);
4837     RegOp.setReg(Reg);
4838     RBI.constrainGenericRegister(DstReg, *RC, MRI);
4839   } else {
4840     // We don't need a subregister copy. Save a copy by re-using the
4841     // destination register on the final insert.
4842     assert(PrevMI && "PrevMI was null?");
4843     PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4844     constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4845   }
4846 
4847   I.eraseFromParent();
4848   return true;
4849 }
4850 
4851 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4852 /// ID if it exists, and 0 otherwise.
findIntrinsicID(MachineInstr & I)4853 static unsigned findIntrinsicID(MachineInstr &I) {
4854   auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4855     return Op.isIntrinsicID();
4856   });
4857   if (IntrinOp == I.operands_end())
4858     return 0;
4859   return IntrinOp->getIntrinsicID();
4860 }
4861 
selectIntrinsicWithSideEffects(MachineInstr & I,MachineRegisterInfo & MRI) const4862 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4863     MachineInstr &I, MachineRegisterInfo &MRI) const {
4864   // Find the intrinsic ID.
4865   unsigned IntrinID = findIntrinsicID(I);
4866   if (!IntrinID)
4867     return false;
4868   MachineIRBuilder MIRBuilder(I);
4869 
4870   // Select the instruction.
4871   switch (IntrinID) {
4872   default:
4873     return false;
4874   case Intrinsic::trap:
4875     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4876     break;
4877   case Intrinsic::debugtrap:
4878     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4879     break;
4880   case Intrinsic::ubsantrap:
4881     MIRBuilder.buildInstr(AArch64::BRK, {}, {})
4882         .addImm(I.getOperand(1).getImm() | ('U' << 8));
4883     break;
4884   }
4885 
4886   I.eraseFromParent();
4887   return true;
4888 }
4889 
selectIntrinsic(MachineInstr & I,MachineRegisterInfo & MRI)4890 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4891                                                  MachineRegisterInfo &MRI) {
4892   unsigned IntrinID = findIntrinsicID(I);
4893   if (!IntrinID)
4894     return false;
4895   MachineIRBuilder MIRBuilder(I);
4896 
4897   switch (IntrinID) {
4898   default:
4899     break;
4900   case Intrinsic::aarch64_crypto_sha1h: {
4901     Register DstReg = I.getOperand(0).getReg();
4902     Register SrcReg = I.getOperand(2).getReg();
4903 
4904     // FIXME: Should this be an assert?
4905     if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4906         MRI.getType(SrcReg).getSizeInBits() != 32)
4907       return false;
4908 
4909     // The operation has to happen on FPRs. Set up some new FPR registers for
4910     // the source and destination if they are on GPRs.
4911     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4912       SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4913       MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4914 
4915       // Make sure the copy ends up getting constrained properly.
4916       RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4917                                    AArch64::GPR32RegClass, MRI);
4918     }
4919 
4920     if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4921       DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4922 
4923     // Actually insert the instruction.
4924     auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4925     constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4926 
4927     // Did we create a new register for the destination?
4928     if (DstReg != I.getOperand(0).getReg()) {
4929       // Yep. Copy the result of the instruction back into the original
4930       // destination.
4931       MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4932       RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4933                                    AArch64::GPR32RegClass, MRI);
4934     }
4935 
4936     I.eraseFromParent();
4937     return true;
4938   }
4939   case Intrinsic::frameaddress:
4940   case Intrinsic::returnaddress: {
4941     MachineFunction &MF = *I.getParent()->getParent();
4942     MachineFrameInfo &MFI = MF.getFrameInfo();
4943 
4944     unsigned Depth = I.getOperand(2).getImm();
4945     Register DstReg = I.getOperand(0).getReg();
4946     RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4947 
4948     if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4949       if (!MFReturnAddr) {
4950         // Insert the copy from LR/X30 into the entry block, before it can be
4951         // clobbered by anything.
4952         MFI.setReturnAddressIsTaken(true);
4953         MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
4954                                                 AArch64::GPR64RegClass);
4955       }
4956 
4957       if (STI.hasV8_3aOps()) {
4958         MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
4959       } else {
4960         MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
4961         MIRBuilder.buildInstr(AArch64::XPACLRI);
4962         MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4963       }
4964 
4965       I.eraseFromParent();
4966       return true;
4967     }
4968 
4969     MFI.setFrameAddressIsTaken(true);
4970     Register FrameAddr(AArch64::FP);
4971     while (Depth--) {
4972       Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
4973       auto Ldr =
4974           MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
4975               .addImm(0);
4976       constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
4977       FrameAddr = NextFrame;
4978     }
4979 
4980     if (IntrinID == Intrinsic::frameaddress)
4981       MIRBuilder.buildCopy({DstReg}, {FrameAddr});
4982     else {
4983       MFI.setReturnAddressIsTaken(true);
4984 
4985       if (STI.hasV8_3aOps()) {
4986         Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4987         MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
4988         MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
4989       } else {
4990         MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
4991         MIRBuilder.buildInstr(AArch64::XPACLRI);
4992         MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4993       }
4994     }
4995 
4996     I.eraseFromParent();
4997     return true;
4998   }
4999   }
5000   return false;
5001 }
5002 
5003 InstructionSelector::ComplexRendererFns
selectShiftA_32(const MachineOperand & Root) const5004 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5005   auto MaybeImmed = getImmedFromMO(Root);
5006   if (MaybeImmed == None || *MaybeImmed > 31)
5007     return None;
5008   uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5009   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5010 }
5011 
5012 InstructionSelector::ComplexRendererFns
selectShiftB_32(const MachineOperand & Root) const5013 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5014   auto MaybeImmed = getImmedFromMO(Root);
5015   if (MaybeImmed == None || *MaybeImmed > 31)
5016     return None;
5017   uint64_t Enc = 31 - *MaybeImmed;
5018   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5019 }
5020 
5021 InstructionSelector::ComplexRendererFns
selectShiftA_64(const MachineOperand & Root) const5022 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5023   auto MaybeImmed = getImmedFromMO(Root);
5024   if (MaybeImmed == None || *MaybeImmed > 63)
5025     return None;
5026   uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5027   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5028 }
5029 
5030 InstructionSelector::ComplexRendererFns
selectShiftB_64(const MachineOperand & Root) const5031 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5032   auto MaybeImmed = getImmedFromMO(Root);
5033   if (MaybeImmed == None || *MaybeImmed > 63)
5034     return None;
5035   uint64_t Enc = 63 - *MaybeImmed;
5036   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5037 }
5038 
5039 /// Helper to select an immediate value that can be represented as a 12-bit
5040 /// value shifted left by either 0 or 12. If it is possible to do so, return
5041 /// the immediate and shift value. If not, return None.
5042 ///
5043 /// Used by selectArithImmed and selectNegArithImmed.
5044 InstructionSelector::ComplexRendererFns
select12BitValueWithLeftShift(uint64_t Immed) const5045 AArch64InstructionSelector::select12BitValueWithLeftShift(
5046     uint64_t Immed) const {
5047   unsigned ShiftAmt;
5048   if (Immed >> 12 == 0) {
5049     ShiftAmt = 0;
5050   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5051     ShiftAmt = 12;
5052     Immed = Immed >> 12;
5053   } else
5054     return None;
5055 
5056   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5057   return {{
5058       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5059       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5060   }};
5061 }
5062 
5063 /// SelectArithImmed - Select an immediate value that can be represented as
5064 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
5065 /// Val set to the 12-bit value and Shift set to the shifter operand.
5066 InstructionSelector::ComplexRendererFns
selectArithImmed(MachineOperand & Root) const5067 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5068   // This function is called from the addsub_shifted_imm ComplexPattern,
5069   // which lists [imm] as the list of opcode it's interested in, however
5070   // we still need to check whether the operand is actually an immediate
5071   // here because the ComplexPattern opcode list is only used in
5072   // root-level opcode matching.
5073   auto MaybeImmed = getImmedFromMO(Root);
5074   if (MaybeImmed == None)
5075     return None;
5076   return select12BitValueWithLeftShift(*MaybeImmed);
5077 }
5078 
5079 /// SelectNegArithImmed - As above, but negates the value before trying to
5080 /// select it.
5081 InstructionSelector::ComplexRendererFns
selectNegArithImmed(MachineOperand & Root) const5082 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5083   // We need a register here, because we need to know if we have a 64 or 32
5084   // bit immediate.
5085   if (!Root.isReg())
5086     return None;
5087   auto MaybeImmed = getImmedFromMO(Root);
5088   if (MaybeImmed == None)
5089     return None;
5090   uint64_t Immed = *MaybeImmed;
5091 
5092   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5093   // have the opposite effect on the C flag, so this pattern mustn't match under
5094   // those circumstances.
5095   if (Immed == 0)
5096     return None;
5097 
5098   // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5099   // the root.
5100   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5101   if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5102     Immed = ~((uint32_t)Immed) + 1;
5103   else
5104     Immed = ~Immed + 1ULL;
5105 
5106   if (Immed & 0xFFFFFFFFFF000000ULL)
5107     return None;
5108 
5109   Immed &= 0xFFFFFFULL;
5110   return select12BitValueWithLeftShift(Immed);
5111 }
5112 
5113 /// Return true if it is worth folding MI into an extended register. That is,
5114 /// if it's safe to pull it into the addressing mode of a load or store as a
5115 /// shift.
isWorthFoldingIntoExtendedReg(MachineInstr & MI,const MachineRegisterInfo & MRI) const5116 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5117     MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5118   // Always fold if there is one use, or if we're optimizing for size.
5119   Register DefReg = MI.getOperand(0).getReg();
5120   if (MRI.hasOneNonDBGUse(DefReg) ||
5121       MI.getParent()->getParent()->getFunction().hasMinSize())
5122     return true;
5123 
5124   // It's better to avoid folding and recomputing shifts when we don't have a
5125   // fastpath.
5126   if (!STI.hasLSLFast())
5127     return false;
5128 
5129   // We have a fastpath, so folding a shift in and potentially computing it
5130   // many times may be beneficial. Check if this is only used in memory ops.
5131   // If it is, then we should fold.
5132   return all_of(MRI.use_nodbg_instructions(DefReg),
5133                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5134 }
5135 
isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)5136 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5137   switch (Type) {
5138   case AArch64_AM::SXTB:
5139   case AArch64_AM::SXTH:
5140   case AArch64_AM::SXTW:
5141     return true;
5142   default:
5143     return false;
5144   }
5145 }
5146 
5147 InstructionSelector::ComplexRendererFns
selectExtendedSHL(MachineOperand & Root,MachineOperand & Base,MachineOperand & Offset,unsigned SizeInBytes,bool WantsExt) const5148 AArch64InstructionSelector::selectExtendedSHL(
5149     MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5150     unsigned SizeInBytes, bool WantsExt) const {
5151   assert(Base.isReg() && "Expected base to be a register operand");
5152   assert(Offset.isReg() && "Expected offset to be a register operand");
5153 
5154   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5155   MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5156   if (!OffsetInst)
5157     return None;
5158 
5159   unsigned OffsetOpc = OffsetInst->getOpcode();
5160   bool LookedThroughZExt = false;
5161   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5162     // Try to look through a ZEXT.
5163     if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5164       return None;
5165 
5166     OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5167     OffsetOpc = OffsetInst->getOpcode();
5168     LookedThroughZExt = true;
5169 
5170     if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5171       return None;
5172   }
5173   // Make sure that the memory op is a valid size.
5174   int64_t LegalShiftVal = Log2_32(SizeInBytes);
5175   if (LegalShiftVal == 0)
5176     return None;
5177   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5178     return None;
5179 
5180   // Now, try to find the specific G_CONSTANT. Start by assuming that the
5181   // register we will offset is the LHS, and the register containing the
5182   // constant is the RHS.
5183   Register OffsetReg = OffsetInst->getOperand(1).getReg();
5184   Register ConstantReg = OffsetInst->getOperand(2).getReg();
5185   auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5186   if (!ValAndVReg) {
5187     // We didn't get a constant on the RHS. If the opcode is a shift, then
5188     // we're done.
5189     if (OffsetOpc == TargetOpcode::G_SHL)
5190       return None;
5191 
5192     // If we have a G_MUL, we can use either register. Try looking at the RHS.
5193     std::swap(OffsetReg, ConstantReg);
5194     ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5195     if (!ValAndVReg)
5196       return None;
5197   }
5198 
5199   // The value must fit into 3 bits, and must be positive. Make sure that is
5200   // true.
5201   int64_t ImmVal = ValAndVReg->Value;
5202 
5203   // Since we're going to pull this into a shift, the constant value must be
5204   // a power of 2. If we got a multiply, then we need to check this.
5205   if (OffsetOpc == TargetOpcode::G_MUL) {
5206     if (!isPowerOf2_32(ImmVal))
5207       return None;
5208 
5209     // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5210     ImmVal = Log2_32(ImmVal);
5211   }
5212 
5213   if ((ImmVal & 0x7) != ImmVal)
5214     return None;
5215 
5216   // We are only allowed to shift by LegalShiftVal. This shift value is built
5217   // into the instruction, so we can't just use whatever we want.
5218   if (ImmVal != LegalShiftVal)
5219     return None;
5220 
5221   unsigned SignExtend = 0;
5222   if (WantsExt) {
5223     // Check if the offset is defined by an extend, unless we looked through a
5224     // G_ZEXT earlier.
5225     if (!LookedThroughZExt) {
5226       MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5227       auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5228       if (Ext == AArch64_AM::InvalidShiftExtend)
5229         return None;
5230 
5231       SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5232       // We only support SXTW for signed extension here.
5233       if (SignExtend && Ext != AArch64_AM::SXTW)
5234         return None;
5235       OffsetReg = ExtInst->getOperand(1).getReg();
5236     }
5237 
5238     // Need a 32-bit wide register here.
5239     MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5240     OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
5241   }
5242 
5243   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5244   // offset. Signify that we are shifting by setting the shift flag to 1.
5245   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5246            [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5247            [=](MachineInstrBuilder &MIB) {
5248              // Need to add both immediates here to make sure that they are both
5249              // added to the instruction.
5250              MIB.addImm(SignExtend);
5251              MIB.addImm(1);
5252            }}};
5253 }
5254 
5255 /// This is used for computing addresses like this:
5256 ///
5257 /// ldr x1, [x2, x3, lsl #3]
5258 ///
5259 /// Where x2 is the base register, and x3 is an offset register. The shift-left
5260 /// is a constant value specific to this load instruction. That is, we'll never
5261 /// see anything other than a 3 here (which corresponds to the size of the
5262 /// element being loaded.)
5263 InstructionSelector::ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand & Root,unsigned SizeInBytes) const5264 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5265     MachineOperand &Root, unsigned SizeInBytes) const {
5266   if (!Root.isReg())
5267     return None;
5268   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5269 
5270   // We want to find something like this:
5271   //
5272   // val = G_CONSTANT LegalShiftVal
5273   // shift = G_SHL off_reg val
5274   // ptr = G_PTR_ADD base_reg shift
5275   // x = G_LOAD ptr
5276   //
5277   // And fold it into this addressing mode:
5278   //
5279   // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5280 
5281   // Check if we can find the G_PTR_ADD.
5282   MachineInstr *PtrAdd =
5283       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5284   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5285     return None;
5286 
5287   // Now, try to match an opcode which will match our specific offset.
5288   // We want a G_SHL or a G_MUL.
5289   MachineInstr *OffsetInst =
5290       getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5291   return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5292                            OffsetInst->getOperand(0), SizeInBytes,
5293                            /*WantsExt=*/false);
5294 }
5295 
5296 /// This is used for computing addresses like this:
5297 ///
5298 /// ldr x1, [x2, x3]
5299 ///
5300 /// Where x2 is the base register, and x3 is an offset register.
5301 ///
5302 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5303 /// this will do so. Otherwise, it will return None.
5304 InstructionSelector::ComplexRendererFns
selectAddrModeRegisterOffset(MachineOperand & Root) const5305 AArch64InstructionSelector::selectAddrModeRegisterOffset(
5306     MachineOperand &Root) const {
5307   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5308 
5309   // We need a GEP.
5310   MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5311   if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5312     return None;
5313 
5314   // If this is used more than once, let's not bother folding.
5315   // TODO: Check if they are memory ops. If they are, then we can still fold
5316   // without having to recompute anything.
5317   if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5318     return None;
5319 
5320   // Base is the GEP's LHS, offset is its RHS.
5321   return {{[=](MachineInstrBuilder &MIB) {
5322              MIB.addUse(Gep->getOperand(1).getReg());
5323            },
5324            [=](MachineInstrBuilder &MIB) {
5325              MIB.addUse(Gep->getOperand(2).getReg());
5326            },
5327            [=](MachineInstrBuilder &MIB) {
5328              // Need to add both immediates here to make sure that they are both
5329              // added to the instruction.
5330              MIB.addImm(0);
5331              MIB.addImm(0);
5332            }}};
5333 }
5334 
5335 /// This is intended to be equivalent to selectAddrModeXRO in
5336 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5337 InstructionSelector::ComplexRendererFns
selectAddrModeXRO(MachineOperand & Root,unsigned SizeInBytes) const5338 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5339                                               unsigned SizeInBytes) const {
5340   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5341   if (!Root.isReg())
5342     return None;
5343   MachineInstr *PtrAdd =
5344       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5345   if (!PtrAdd)
5346     return None;
5347 
5348   // Check for an immediates which cannot be encoded in the [base + imm]
5349   // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5350   // end up with code like:
5351   //
5352   // mov x0, wide
5353   // add x1 base, x0
5354   // ldr x2, [x1, x0]
5355   //
5356   // In this situation, we can use the [base, xreg] addressing mode to save an
5357   // add/sub:
5358   //
5359   // mov x0, wide
5360   // ldr x2, [base, x0]
5361   auto ValAndVReg =
5362       getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5363   if (ValAndVReg) {
5364     unsigned Scale = Log2_32(SizeInBytes);
5365     int64_t ImmOff = ValAndVReg->Value;
5366 
5367     // Skip immediates that can be selected in the load/store addresing
5368     // mode.
5369     if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5370         ImmOff < (0x1000 << Scale))
5371       return None;
5372 
5373     // Helper lambda to decide whether or not it is preferable to emit an add.
5374     auto isPreferredADD = [](int64_t ImmOff) {
5375       // Constants in [0x0, 0xfff] can be encoded in an add.
5376       if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5377         return true;
5378 
5379       // Can it be encoded in an add lsl #12?
5380       if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5381         return false;
5382 
5383       // It can be encoded in an add lsl #12, but we may not want to. If it is
5384       // possible to select this as a single movz, then prefer that. A single
5385       // movz is faster than an add with a shift.
5386       return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5387              (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5388     };
5389 
5390     // If the immediate can be encoded in a single add/sub, then bail out.
5391     if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5392       return None;
5393   }
5394 
5395   // Try to fold shifts into the addressing mode.
5396   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5397   if (AddrModeFns)
5398     return AddrModeFns;
5399 
5400   // If that doesn't work, see if it's possible to fold in registers from
5401   // a GEP.
5402   return selectAddrModeRegisterOffset(Root);
5403 }
5404 
5405 /// This is used for computing addresses like this:
5406 ///
5407 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5408 ///
5409 /// Where we have a 64-bit base register, a 32-bit offset register, and an
5410 /// extend (which may or may not be signed).
5411 InstructionSelector::ComplexRendererFns
selectAddrModeWRO(MachineOperand & Root,unsigned SizeInBytes) const5412 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5413                                               unsigned SizeInBytes) const {
5414   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5415 
5416   MachineInstr *PtrAdd =
5417       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5418   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5419     return None;
5420 
5421   MachineOperand &LHS = PtrAdd->getOperand(1);
5422   MachineOperand &RHS = PtrAdd->getOperand(2);
5423   MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5424 
5425   // The first case is the same as selectAddrModeXRO, except we need an extend.
5426   // In this case, we try to find a shift and extend, and fold them into the
5427   // addressing mode.
5428   //
5429   // E.g.
5430   //
5431   // off_reg = G_Z/S/ANYEXT ext_reg
5432   // val = G_CONSTANT LegalShiftVal
5433   // shift = G_SHL off_reg val
5434   // ptr = G_PTR_ADD base_reg shift
5435   // x = G_LOAD ptr
5436   //
5437   // In this case we can get a load like this:
5438   //
5439   // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5440   auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5441                                        SizeInBytes, /*WantsExt=*/true);
5442   if (ExtendedShl)
5443     return ExtendedShl;
5444 
5445   // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5446   //
5447   // e.g.
5448   // ldr something, [base_reg, ext_reg, sxtw]
5449   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5450     return None;
5451 
5452   // Check if this is an extend. We'll get an extend type if it is.
5453   AArch64_AM::ShiftExtendType Ext =
5454       getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5455   if (Ext == AArch64_AM::InvalidShiftExtend)
5456     return None;
5457 
5458   // Need a 32-bit wide register.
5459   MachineIRBuilder MIB(*PtrAdd);
5460   Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5461                                        AArch64::GPR32RegClass, MIB);
5462   unsigned SignExtend = Ext == AArch64_AM::SXTW;
5463 
5464   // Base is LHS, offset is ExtReg.
5465   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5466            [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5467            [=](MachineInstrBuilder &MIB) {
5468              MIB.addImm(SignExtend);
5469              MIB.addImm(0);
5470            }}};
5471 }
5472 
5473 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
5474 /// should only match when there is an offset that is not valid for a scaled
5475 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
5476 /// memory reference, which is needed here to know what is valid for a scaled
5477 /// immediate.
5478 InstructionSelector::ComplexRendererFns
selectAddrModeUnscaled(MachineOperand & Root,unsigned Size) const5479 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5480                                                    unsigned Size) const {
5481   MachineRegisterInfo &MRI =
5482       Root.getParent()->getParent()->getParent()->getRegInfo();
5483 
5484   if (!Root.isReg())
5485     return None;
5486 
5487   if (!isBaseWithConstantOffset(Root, MRI))
5488     return None;
5489 
5490   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5491   if (!RootDef)
5492     return None;
5493 
5494   MachineOperand &OffImm = RootDef->getOperand(2);
5495   if (!OffImm.isReg())
5496     return None;
5497   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5498   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5499     return None;
5500   int64_t RHSC;
5501   MachineOperand &RHSOp1 = RHS->getOperand(1);
5502   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5503     return None;
5504   RHSC = RHSOp1.getCImm()->getSExtValue();
5505 
5506   // If the offset is valid as a scaled immediate, don't match here.
5507   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5508     return None;
5509   if (RHSC >= -256 && RHSC < 256) {
5510     MachineOperand &Base = RootDef->getOperand(1);
5511     return {{
5512         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5513         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5514     }};
5515   }
5516   return None;
5517 }
5518 
5519 InstructionSelector::ComplexRendererFns
tryFoldAddLowIntoImm(MachineInstr & RootDef,unsigned Size,MachineRegisterInfo & MRI) const5520 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5521                                                  unsigned Size,
5522                                                  MachineRegisterInfo &MRI) const {
5523   if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5524     return None;
5525   MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5526   if (Adrp.getOpcode() != AArch64::ADRP)
5527     return None;
5528 
5529   // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5530   // TODO: Need to check GV's offset % size if doing offset folding into globals.
5531   assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
5532   auto GV = Adrp.getOperand(1).getGlobal();
5533   if (GV->isThreadLocal())
5534     return None;
5535 
5536   auto &MF = *RootDef.getParent()->getParent();
5537   if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5538     return None;
5539 
5540   unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5541   MachineIRBuilder MIRBuilder(RootDef);
5542   Register AdrpReg = Adrp.getOperand(0).getReg();
5543   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5544            [=](MachineInstrBuilder &MIB) {
5545              MIB.addGlobalAddress(GV, /* Offset */ 0,
5546                                   OpFlags | AArch64II::MO_PAGEOFF |
5547                                       AArch64II::MO_NC);
5548            }}};
5549 }
5550 
5551 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
5552 /// "Size" argument is the size in bytes of the memory reference, which
5553 /// determines the scale.
5554 InstructionSelector::ComplexRendererFns
selectAddrModeIndexed(MachineOperand & Root,unsigned Size) const5555 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5556                                                   unsigned Size) const {
5557   MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5558   MachineRegisterInfo &MRI = MF.getRegInfo();
5559 
5560   if (!Root.isReg())
5561     return None;
5562 
5563   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5564   if (!RootDef)
5565     return None;
5566 
5567   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
5568     return {{
5569         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5570         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5571     }};
5572   }
5573 
5574   CodeModel::Model CM = MF.getTarget().getCodeModel();
5575   // Check if we can fold in the ADD of small code model ADRP + ADD address.
5576   if (CM == CodeModel::Small) {
5577     auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
5578     if (OpFns)
5579       return OpFns;
5580   }
5581 
5582   if (isBaseWithConstantOffset(Root, MRI)) {
5583     MachineOperand &LHS = RootDef->getOperand(1);
5584     MachineOperand &RHS = RootDef->getOperand(2);
5585     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
5586     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
5587     if (LHSDef && RHSDef) {
5588       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
5589       unsigned Scale = Log2_32(Size);
5590       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
5591         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
5592           return {{
5593               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
5594               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5595           }};
5596 
5597         return {{
5598             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
5599             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5600         }};
5601       }
5602     }
5603   }
5604 
5605   // Before falling back to our general case, check if the unscaled
5606   // instructions can handle this. If so, that's preferable.
5607   if (selectAddrModeUnscaled(Root, Size).hasValue())
5608     return None;
5609 
5610   return {{
5611       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
5612       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5613   }};
5614 }
5615 
5616 /// Given a shift instruction, return the correct shift type for that
5617 /// instruction.
getShiftTypeForInst(MachineInstr & MI)5618 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
5619   // TODO: Handle AArch64_AM::ROR
5620   switch (MI.getOpcode()) {
5621   default:
5622     return AArch64_AM::InvalidShiftExtend;
5623   case TargetOpcode::G_SHL:
5624     return AArch64_AM::LSL;
5625   case TargetOpcode::G_LSHR:
5626     return AArch64_AM::LSR;
5627   case TargetOpcode::G_ASHR:
5628     return AArch64_AM::ASR;
5629   }
5630 }
5631 
5632 /// Select a "shifted register" operand. If the value is not shifted, set the
5633 /// shift operand to a default value of "lsl 0".
5634 ///
5635 /// TODO: Allow shifted register to be rotated in logical instructions.
5636 InstructionSelector::ComplexRendererFns
selectShiftedRegister(MachineOperand & Root) const5637 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
5638   if (!Root.isReg())
5639     return None;
5640   MachineRegisterInfo &MRI =
5641       Root.getParent()->getParent()->getParent()->getRegInfo();
5642 
5643   // Check if the operand is defined by an instruction which corresponds to
5644   // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
5645   //
5646   // TODO: Handle AArch64_AM::ROR for logical instructions.
5647   MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
5648   if (!ShiftInst)
5649     return None;
5650   AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
5651   if (ShType == AArch64_AM::InvalidShiftExtend)
5652     return None;
5653   if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
5654     return None;
5655 
5656   // Need an immediate on the RHS.
5657   MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
5658   auto Immed = getImmedFromMO(ShiftRHS);
5659   if (!Immed)
5660     return None;
5661 
5662   // We have something that we can fold. Fold in the shift's LHS and RHS into
5663   // the instruction.
5664   MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
5665   Register ShiftReg = ShiftLHS.getReg();
5666 
5667   unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
5668   unsigned Val = *Immed & (NumBits - 1);
5669   unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
5670 
5671   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
5672            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
5673 }
5674 
getExtendTypeForInst(MachineInstr & MI,MachineRegisterInfo & MRI,bool IsLoadStore) const5675 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
5676     MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
5677   unsigned Opc = MI.getOpcode();
5678 
5679   // Handle explicit extend instructions first.
5680   if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
5681     unsigned Size;
5682     if (Opc == TargetOpcode::G_SEXT)
5683       Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5684     else
5685       Size = MI.getOperand(2).getImm();
5686     assert(Size != 64 && "Extend from 64 bits?");
5687     switch (Size) {
5688     case 8:
5689       return AArch64_AM::SXTB;
5690     case 16:
5691       return AArch64_AM::SXTH;
5692     case 32:
5693       return AArch64_AM::SXTW;
5694     default:
5695       return AArch64_AM::InvalidShiftExtend;
5696     }
5697   }
5698 
5699   if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
5700     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5701     assert(Size != 64 && "Extend from 64 bits?");
5702     switch (Size) {
5703     case 8:
5704       return AArch64_AM::UXTB;
5705     case 16:
5706       return AArch64_AM::UXTH;
5707     case 32:
5708       return AArch64_AM::UXTW;
5709     default:
5710       return AArch64_AM::InvalidShiftExtend;
5711     }
5712   }
5713 
5714   // Don't have an explicit extend. Try to handle a G_AND with a constant mask
5715   // on the RHS.
5716   if (Opc != TargetOpcode::G_AND)
5717     return AArch64_AM::InvalidShiftExtend;
5718 
5719   Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
5720   if (!MaybeAndMask)
5721     return AArch64_AM::InvalidShiftExtend;
5722   uint64_t AndMask = *MaybeAndMask;
5723   switch (AndMask) {
5724   default:
5725     return AArch64_AM::InvalidShiftExtend;
5726   case 0xFF:
5727     return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
5728   case 0xFFFF:
5729     return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
5730   case 0xFFFFFFFF:
5731     return AArch64_AM::UXTW;
5732   }
5733 }
5734 
moveScalarRegClass(Register Reg,const TargetRegisterClass & RC,MachineIRBuilder & MIB) const5735 Register AArch64InstructionSelector::moveScalarRegClass(
5736     Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
5737   MachineRegisterInfo &MRI = *MIB.getMRI();
5738   auto Ty = MRI.getType(Reg);
5739   assert(!Ty.isVector() && "Expected scalars only!");
5740   if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
5741     return Reg;
5742 
5743   // Create a copy and immediately select it.
5744   // FIXME: We should have an emitCopy function?
5745   auto Copy = MIB.buildCopy({&RC}, {Reg});
5746   selectCopy(*Copy, TII, MRI, TRI, RBI);
5747   return Copy.getReg(0);
5748 }
5749 
5750 /// Select an "extended register" operand. This operand folds in an extend
5751 /// followed by an optional left shift.
5752 InstructionSelector::ComplexRendererFns
selectArithExtendedRegister(MachineOperand & Root) const5753 AArch64InstructionSelector::selectArithExtendedRegister(
5754     MachineOperand &Root) const {
5755   if (!Root.isReg())
5756     return None;
5757   MachineRegisterInfo &MRI =
5758       Root.getParent()->getParent()->getParent()->getRegInfo();
5759 
5760   uint64_t ShiftVal = 0;
5761   Register ExtReg;
5762   AArch64_AM::ShiftExtendType Ext;
5763   MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
5764   if (!RootDef)
5765     return None;
5766 
5767   if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
5768     return None;
5769 
5770   // Check if we can fold a shift and an extend.
5771   if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
5772     // Look for a constant on the RHS of the shift.
5773     MachineOperand &RHS = RootDef->getOperand(2);
5774     Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
5775     if (!MaybeShiftVal)
5776       return None;
5777     ShiftVal = *MaybeShiftVal;
5778     if (ShiftVal > 4)
5779       return None;
5780     // Look for a valid extend instruction on the LHS of the shift.
5781     MachineOperand &LHS = RootDef->getOperand(1);
5782     MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5783     if (!ExtDef)
5784       return None;
5785     Ext = getExtendTypeForInst(*ExtDef, MRI);
5786     if (Ext == AArch64_AM::InvalidShiftExtend)
5787       return None;
5788     ExtReg = ExtDef->getOperand(1).getReg();
5789   } else {
5790     // Didn't get a shift. Try just folding an extend.
5791     Ext = getExtendTypeForInst(*RootDef, MRI);
5792     if (Ext == AArch64_AM::InvalidShiftExtend)
5793       return None;
5794     ExtReg = RootDef->getOperand(1).getReg();
5795 
5796     // If we have a 32 bit instruction which zeroes out the high half of a
5797     // register, we get an implicit zero extend for free. Check if we have one.
5798     // FIXME: We actually emit the extend right now even though we don't have
5799     // to.
5800     if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
5801       MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
5802       if (ExtInst && isDef32(*ExtInst))
5803         return None;
5804     }
5805   }
5806 
5807   // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
5808   // copy.
5809   MachineIRBuilder MIB(*RootDef);
5810   ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
5811 
5812   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5813            [=](MachineInstrBuilder &MIB) {
5814              MIB.addImm(getArithExtendImm(Ext, ShiftVal));
5815            }}};
5816 }
5817 
renderTruncImm(MachineInstrBuilder & MIB,const MachineInstr & MI,int OpIdx) const5818 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
5819                                                 const MachineInstr &MI,
5820                                                 int OpIdx) const {
5821   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5822   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5823          "Expected G_CONSTANT");
5824   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
5825   assert(CstVal && "Expected constant value");
5826   MIB.addImm(CstVal.getValue());
5827 }
5828 
renderLogicalImm32(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const5829 void AArch64InstructionSelector::renderLogicalImm32(
5830   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5831   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5832          "Expected G_CONSTANT");
5833   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5834   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
5835   MIB.addImm(Enc);
5836 }
5837 
renderLogicalImm64(MachineInstrBuilder & MIB,const MachineInstr & I,int OpIdx) const5838 void AArch64InstructionSelector::renderLogicalImm64(
5839   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5840   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5841          "Expected G_CONSTANT");
5842   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5843   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
5844   MIB.addImm(Enc);
5845 }
5846 
isLoadStoreOfNumBytes(const MachineInstr & MI,unsigned NumBytes) const5847 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
5848     const MachineInstr &MI, unsigned NumBytes) const {
5849   if (!MI.mayLoadOrStore())
5850     return false;
5851   assert(MI.hasOneMemOperand() &&
5852          "Expected load/store to have only one mem op!");
5853   return (*MI.memoperands_begin())->getSize() == NumBytes;
5854 }
5855 
isDef32(const MachineInstr & MI) const5856 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
5857   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5858   if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
5859     return false;
5860 
5861   // Only return true if we know the operation will zero-out the high half of
5862   // the 64-bit register. Truncates can be subregister copies, which don't
5863   // zero out the high bits. Copies and other copy-like instructions can be
5864   // fed by truncates, or could be lowered as subregister copies.
5865   switch (MI.getOpcode()) {
5866   default:
5867     return true;
5868   case TargetOpcode::COPY:
5869   case TargetOpcode::G_BITCAST:
5870   case TargetOpcode::G_TRUNC:
5871   case TargetOpcode::G_PHI:
5872     return false;
5873   }
5874 }
5875 
5876 
5877 // Perform fixups on the given PHI instruction's operands to force them all
5878 // to be the same as the destination regbank.
fixupPHIOpBanks(MachineInstr & MI,MachineRegisterInfo & MRI,const AArch64RegisterBankInfo & RBI)5879 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
5880                             const AArch64RegisterBankInfo &RBI) {
5881   assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
5882   Register DstReg = MI.getOperand(0).getReg();
5883   const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
5884   assert(DstRB && "Expected PHI dst to have regbank assigned");
5885   MachineIRBuilder MIB(MI);
5886 
5887   // Go through each operand and ensure it has the same regbank.
5888   for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5889     MachineOperand &MO = MI.getOperand(OpIdx);
5890     if (!MO.isReg())
5891       continue;
5892     Register OpReg = MO.getReg();
5893     const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
5894     if (RB != DstRB) {
5895       // Insert a cross-bank copy.
5896       auto *OpDef = MRI.getVRegDef(OpReg);
5897       const LLT &Ty = MRI.getType(OpReg);
5898       MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
5899       auto Copy = MIB.buildCopy(Ty, OpReg);
5900       MRI.setRegBank(Copy.getReg(0), *DstRB);
5901       MO.setReg(Copy.getReg(0));
5902     }
5903   }
5904 }
5905 
processPHIs(MachineFunction & MF)5906 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
5907   // We're looking for PHIs, build a list so we don't invalidate iterators.
5908   MachineRegisterInfo &MRI = MF.getRegInfo();
5909   SmallVector<MachineInstr *, 32> Phis;
5910   for (auto &BB : MF) {
5911     for (auto &MI : BB) {
5912       if (MI.getOpcode() == TargetOpcode::G_PHI)
5913         Phis.emplace_back(&MI);
5914     }
5915   }
5916 
5917   for (auto *MI : Phis) {
5918     // We need to do some work here if the operand types are < 16 bit and they
5919     // are split across fpr/gpr banks. Since all types <32b on gpr
5920     // end up being assigned gpr32 regclasses, we can end up with PHIs here
5921     // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
5922     // be selecting heterogenous regbanks for operands if possible, but we
5923     // still need to be able to deal with it here.
5924     //
5925     // To fix this, if we have a gpr-bank operand < 32b in size and at least
5926     // one other operand is on the fpr bank, then we add cross-bank copies
5927     // to homogenize the operand banks. For simplicity the bank that we choose
5928     // to settle on is whatever bank the def operand has. For example:
5929     //
5930     // %endbb:
5931     //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
5932     //  =>
5933     // %bb2:
5934     //   ...
5935     //   %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
5936     //   ...
5937     // %endbb:
5938     //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
5939     bool HasGPROp = false, HasFPROp = false;
5940     for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
5941       const auto &MO = MI->getOperand(OpIdx);
5942       if (!MO.isReg())
5943         continue;
5944       const LLT &Ty = MRI.getType(MO.getReg());
5945       if (!Ty.isValid() || !Ty.isScalar())
5946         break;
5947       if (Ty.getSizeInBits() >= 32)
5948         break;
5949       const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
5950       // If for some reason we don't have a regbank yet. Don't try anything.
5951       if (!RB)
5952         break;
5953 
5954       if (RB->getID() == AArch64::GPRRegBankID)
5955         HasGPROp = true;
5956       else
5957         HasFPROp = true;
5958     }
5959     // We have heterogenous regbanks, need to fixup.
5960     if (HasGPROp && HasFPROp)
5961       fixupPHIOpBanks(*MI, MRI, RBI);
5962   }
5963 }
5964 
5965 namespace llvm {
5966 InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine & TM,AArch64Subtarget & Subtarget,AArch64RegisterBankInfo & RBI)5967 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
5968                                  AArch64Subtarget &Subtarget,
5969                                  AArch64RegisterBankInfo &RBI) {
5970   return new AArch64InstructionSelector(TM, Subtarget, RBI);
5971 }
5972 }
5973