1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the AArch64 specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64Subtarget.h"
15 
16 #include "AArch64.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64PBQPRegAlloc.h"
19 #include "AArch64TargetMachine.h"
20 
21 #include "AArch64CallLowering.h"
22 #include "AArch64LegalizerInfo.h"
23 #include "AArch64RegisterBankInfo.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
25 #include "llvm/CodeGen/MachineScheduler.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/Support/TargetParser.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "aarch64-subtarget"
32 
33 #define GET_SUBTARGETINFO_CTOR
34 #define GET_SUBTARGETINFO_TARGET_DESC
35 #include "AArch64GenSubtargetInfo.inc"
36 
37 static cl::opt<bool>
38 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
39                      "converter pass"), cl::init(true), cl::Hidden);
40 
41 // If OS supports TBI, use this flag to enable it.
42 static cl::opt<bool>
43 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
44                          "an address is ignored"), cl::init(false), cl::Hidden);
45 
46 static cl::opt<bool>
47     UseNonLazyBind("aarch64-enable-nonlazybind",
48                    cl::desc("Call nonlazybind functions via direct GOT load"),
49                    cl::init(false), cl::Hidden);
50 
51 AArch64Subtarget &
initializeSubtargetDependencies(StringRef FS,StringRef CPUString)52 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
53                                                   StringRef CPUString) {
54   // Determine default and user-specified characteristics
55 
56   if (CPUString.empty())
57     CPUString = "generic";
58 
59   ParseSubtargetFeatures(CPUString, FS);
60   initializeProperties();
61 
62   return *this;
63 }
64 
initializeProperties()65 void AArch64Subtarget::initializeProperties() {
66   // Initialize CPU specific properties. We should add a tablegen feature for
67   // this in the future so we can specify it together with the subtarget
68   // features.
69   switch (ARMProcFamily) {
70   case Cyclone:
71     CacheLineSize = 64;
72     PrefetchDistance = 280;
73     MinPrefetchStride = 2048;
74     MaxPrefetchIterationsAhead = 3;
75     break;
76   case CortexA57:
77     MaxInterleaveFactor = 4;
78     PrefFunctionAlignment = 4;
79     break;
80   case ExynosM1:
81     MaxInterleaveFactor = 4;
82     MaxJumpTableSize = 8;
83     PrefFunctionAlignment = 4;
84     PrefLoopAlignment = 3;
85     break;
86   case ExynosM3:
87     MaxInterleaveFactor = 4;
88     MaxJumpTableSize = 20;
89     PrefFunctionAlignment = 5;
90     PrefLoopAlignment = 4;
91     break;
92   case Falkor:
93     MaxInterleaveFactor = 4;
94     // FIXME: remove this to enable 64-bit SLP if performance looks good.
95     MinVectorRegisterBitWidth = 128;
96     CacheLineSize = 128;
97     PrefetchDistance = 820;
98     MinPrefetchStride = 2048;
99     MaxPrefetchIterationsAhead = 8;
100     break;
101   case Saphira:
102     MaxInterleaveFactor = 4;
103     // FIXME: remove this to enable 64-bit SLP if performance looks good.
104     MinVectorRegisterBitWidth = 128;
105     break;
106   case Kryo:
107     MaxInterleaveFactor = 4;
108     VectorInsertExtractBaseCost = 2;
109     CacheLineSize = 128;
110     PrefetchDistance = 740;
111     MinPrefetchStride = 1024;
112     MaxPrefetchIterationsAhead = 11;
113     // FIXME: remove this to enable 64-bit SLP if performance looks good.
114     MinVectorRegisterBitWidth = 128;
115     break;
116   case ThunderX2T99:
117     CacheLineSize = 64;
118     PrefFunctionAlignment = 3;
119     PrefLoopAlignment = 2;
120     MaxInterleaveFactor = 4;
121     PrefetchDistance = 128;
122     MinPrefetchStride = 1024;
123     MaxPrefetchIterationsAhead = 4;
124     // FIXME: remove this to enable 64-bit SLP if performance looks good.
125     MinVectorRegisterBitWidth = 128;
126     break;
127   case ThunderX:
128   case ThunderXT88:
129   case ThunderXT81:
130   case ThunderXT83:
131     CacheLineSize = 128;
132     PrefFunctionAlignment = 3;
133     PrefLoopAlignment = 2;
134     // FIXME: remove this to enable 64-bit SLP if performance looks good.
135     MinVectorRegisterBitWidth = 128;
136     break;
137   case CortexA35: break;
138   case CortexA53:
139     PrefFunctionAlignment = 3;
140     break;
141   case CortexA55: break;
142   case CortexA72:
143   case CortexA73:
144   case CortexA75:
145     PrefFunctionAlignment = 4;
146     break;
147   case Others: break;
148   }
149 }
150 
AArch64Subtarget(const Triple & TT,const std::string & CPU,const std::string & FS,const TargetMachine & TM,bool LittleEndian)151 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
152                                    const std::string &FS,
153                                    const TargetMachine &TM, bool LittleEndian)
154     : AArch64GenSubtargetInfo(TT, CPU, FS),
155       ReserveX18(AArch64::isX18ReservedByDefault(TT)), IsLittle(LittleEndian),
156       TargetTriple(TT), FrameLowering(),
157       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
158       TLInfo(TM, *this) {
159   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
160   Legalizer.reset(new AArch64LegalizerInfo(*this));
161 
162   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
163 
164   // FIXME: At this point, we can't rely on Subtarget having RBI.
165   // It's awkward to mix passing RBI and the Subtarget; should we pass
166   // TII/TRI as well?
167   InstSelector.reset(createAArch64InstructionSelector(
168       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
169 
170   RegBankInfo.reset(RBI);
171 }
172 
getCallLowering() const173 const CallLowering *AArch64Subtarget::getCallLowering() const {
174   return CallLoweringInfo.get();
175 }
176 
getInstructionSelector() const177 const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
178   return InstSelector.get();
179 }
180 
getLegalizerInfo() const181 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
182   return Legalizer.get();
183 }
184 
getRegBankInfo() const185 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
186   return RegBankInfo.get();
187 }
188 
189 /// Find the target operand flags that describe how a global value should be
190 /// referenced for the current subtarget.
191 unsigned char
ClassifyGlobalReference(const GlobalValue * GV,const TargetMachine & TM) const192 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
193                                           const TargetMachine &TM) const {
194   // MachO large model always goes via a GOT, simply to get a single 8-byte
195   // absolute relocation on all global addresses.
196   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
197     return AArch64II::MO_GOT;
198 
199   unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
200                                                   : AArch64II::MO_NO_FLAG;
201 
202   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
203     return AArch64II::MO_GOT | Flags;
204 
205   // The small code model's direct accesses use ADRP, which cannot
206   // necessarily produce the value 0 (if the code is above 4GB).
207   if (useSmallAddressing() && GV->hasExternalWeakLinkage())
208     return AArch64II::MO_GOT | Flags;
209 
210   return Flags;
211 }
212 
classifyGlobalFunctionReference(const GlobalValue * GV,const TargetMachine & TM) const213 unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
214     const GlobalValue *GV, const TargetMachine &TM) const {
215   // MachO large model always goes via a GOT, because we don't have the
216   // relocations available to do anything else..
217   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
218       !GV->hasInternalLinkage())
219     return AArch64II::MO_GOT;
220 
221   // NonLazyBind goes via GOT unless we know it's available locally.
222   auto *F = dyn_cast<Function>(GV);
223   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
224       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
225     return AArch64II::MO_GOT;
226 
227   return AArch64II::MO_NO_FLAG;
228 }
229 
overrideSchedPolicy(MachineSchedPolicy & Policy,unsigned NumRegionInstrs) const230 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
231                                            unsigned NumRegionInstrs) const {
232   // LNT run (at least on Cyclone) showed reasonably significant gains for
233   // bi-directional scheduling. 253.perlbmk.
234   Policy.OnlyTopDown = false;
235   Policy.OnlyBottomUp = false;
236   // Enabling or Disabling the latency heuristic is a close call: It seems to
237   // help nearly no benchmark on out-of-order architectures, on the other hand
238   // it regresses register pressure on a few benchmarking.
239   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
240 }
241 
enableEarlyIfConversion() const242 bool AArch64Subtarget::enableEarlyIfConversion() const {
243   return EnableEarlyIfConvert;
244 }
245 
supportsAddressTopByteIgnored() const246 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
247   if (!UseAddressTopByteIgnored)
248     return false;
249 
250   if (TargetTriple.isiOS()) {
251     unsigned Major, Minor, Micro;
252     TargetTriple.getiOSVersion(Major, Minor, Micro);
253     return Major >= 8;
254   }
255 
256   return false;
257 }
258 
259 std::unique_ptr<PBQPRAConstraint>
getCustomPBQPConstraints() const260 AArch64Subtarget::getCustomPBQPConstraints() const {
261   return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
262 }
263 
mirFileLoaded(MachineFunction & MF) const264 void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
265   // We usually compute max call frame size after ISel. Do the computation now
266   // if the .mir file didn't specify it. Note that this will probably give you
267   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
268   // instructions, specify explicitely if you need it to be correct.
269   MachineFrameInfo &MFI = MF.getFrameInfo();
270   if (!MFI.isMaxCallFrameSizeComputed())
271     MFI.computeMaxCallFrameSize(MF);
272 }
273