1 //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file declares the X86 specific subclass of TargetSubtargetInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H 15 #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H 16 17 #include "X86FrameLowering.h" 18 #include "X86ISelLowering.h" 19 #include "X86InstrInfo.h" 20 #include "X86SelectionDAGInfo.h" 21 #include "llvm/ADT/Triple.h" 22 #include "llvm/IR/CallingConv.h" 23 #include "llvm/Target/TargetSubtargetInfo.h" 24 #include <string> 25 26 #define GET_SUBTARGETINFO_HEADER 27 #include "X86GenSubtargetInfo.inc" 28 29 namespace llvm { 30 class GlobalValue; 31 class StringRef; 32 class TargetMachine; 33 34 /// The X86 backend supports a number of different styles of PIC. 35 /// 36 namespace PICStyles { 37 enum Style { 38 StubPIC, // Used on i386-darwin in -fPIC mode. 39 StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode. 40 GOT, // Used on many 32-bit unices in -fPIC mode. 41 RIPRel, // Used on X86-64 when not in -static mode. 42 None // Set when in -static mode (not PIC or DynamicNoPIC mode). 43 }; 44 } 45 46 class X86Subtarget final : public X86GenSubtargetInfo { 47 48 protected: 49 enum X86SSEEnum { 50 NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F 51 }; 52 53 enum X863DNowEnum { 54 NoThreeDNow, ThreeDNow, ThreeDNowA 55 }; 56 57 enum X86ProcFamilyEnum { 58 Others, IntelAtom, IntelSLM 59 }; 60 61 /// X86 processor family: Intel Atom, and others 62 X86ProcFamilyEnum X86ProcFamily; 63 64 /// Which PIC style to use 65 PICStyles::Style PICStyle; 66 67 /// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. 68 X86SSEEnum X86SSELevel; 69 70 /// 3DNow, 3DNow Athlon, or none supported. 71 X863DNowEnum X863DNowLevel; 72 73 /// True if this processor has conditional move instructions 74 /// (generally pentium pro+). 75 bool HasCMov; 76 77 /// True if the processor supports X86-64 instructions. 78 bool HasX86_64; 79 80 /// True if the processor supports POPCNT. 81 bool HasPOPCNT; 82 83 /// True if the processor supports SSE4A instructions. 84 bool HasSSE4A; 85 86 /// Target has AES instructions 87 bool HasAES; 88 89 /// Target has carry-less multiplication 90 bool HasPCLMUL; 91 92 /// Target has 3-operand fused multiply-add 93 bool HasFMA; 94 95 /// Target has 4-operand fused multiply-add 96 bool HasFMA4; 97 98 /// Target has XOP instructions 99 bool HasXOP; 100 101 /// Target has TBM instructions. 102 bool HasTBM; 103 104 /// True if the processor has the MOVBE instruction. 105 bool HasMOVBE; 106 107 /// True if the processor has the RDRAND instruction. 108 bool HasRDRAND; 109 110 /// Processor has 16-bit floating point conversion instructions. 111 bool HasF16C; 112 113 /// Processor has FS/GS base insturctions. 114 bool HasFSGSBase; 115 116 /// Processor has LZCNT instruction. 117 bool HasLZCNT; 118 119 /// Processor has BMI1 instructions. 120 bool HasBMI; 121 122 /// Processor has BMI2 instructions. 123 bool HasBMI2; 124 125 /// Processor has RTM instructions. 126 bool HasRTM; 127 128 /// Processor has HLE. 129 bool HasHLE; 130 131 /// Processor has ADX instructions. 132 bool HasADX; 133 134 /// Processor has SHA instructions. 135 bool HasSHA; 136 137 /// Processor has PRFCHW instructions. 138 bool HasPRFCHW; 139 140 /// Processor has RDSEED instructions. 141 bool HasRDSEED; 142 143 /// True if BT (bit test) of memory instructions are slow. 144 bool IsBTMemSlow; 145 146 /// True if SHLD instructions are slow. 147 bool IsSHLDSlow; 148 149 /// True if unaligned memory access is fast. 150 bool IsUAMemFast; 151 152 /// True if unaligned 32-byte memory accesses are slow. 153 bool IsUAMem32Slow; 154 155 /// True if SSE operations can have unaligned memory operands. 156 /// This may require setting a configuration bit in the processor. 157 bool HasSSEUnalignedMem; 158 159 /// True if this processor has the CMPXCHG16B instruction; 160 /// this is true for most x86-64 chips, but not the first AMD chips. 161 bool HasCmpxchg16b; 162 163 /// True if the LEA instruction should be used for adjusting 164 /// the stack pointer. This is an optimization for Intel Atom processors. 165 bool UseLeaForSP; 166 167 /// True if 8-bit divisions are significantly faster than 168 /// 32-bit divisions and should be used when possible. 169 bool HasSlowDivide32; 170 171 /// True if 16-bit divides are significantly faster than 172 /// 64-bit divisions and should be used when possible. 173 bool HasSlowDivide64; 174 175 /// True if the short functions should be padded to prevent 176 /// a stall when returning too early. 177 bool PadShortFunctions; 178 179 /// True if the Calls with memory reference should be converted 180 /// to a register-based indirect call. 181 bool CallRegIndirect; 182 183 /// True if the LEA instruction inputs have to be ready at address generation 184 /// (AG) time. 185 bool LEAUsesAG; 186 187 /// True if the LEA instruction with certain arguments is slow 188 bool SlowLEA; 189 190 /// True if INC and DEC instructions are slow when writing to flags 191 bool SlowIncDec; 192 193 /// Use the RSQRT* instructions to optimize square root calculations. 194 /// For this to be profitable, the cost of FSQRT and FDIV must be 195 /// substantially higher than normal FP ops like FADD and FMUL. 196 bool UseSqrtEst; 197 198 /// Use the RCP* instructions to optimize FP division calculations. 199 /// For this to be profitable, the cost of FDIV must be 200 /// substantially higher than normal FP ops like FADD and FMUL. 201 bool UseReciprocalEst; 202 203 /// Processor has AVX-512 PreFetch Instructions 204 bool HasPFI; 205 206 /// Processor has AVX-512 Exponential and Reciprocal Instructions 207 bool HasERI; 208 209 /// Processor has AVX-512 Conflict Detection Instructions 210 bool HasCDI; 211 212 /// Processor has AVX-512 Doubleword and Quadword instructions 213 bool HasDQI; 214 215 /// Processor has AVX-512 Byte and Word instructions 216 bool HasBWI; 217 218 /// Processor has AVX-512 Vector Length eXtenstions 219 bool HasVLX; 220 221 /// The minimum alignment known to hold of the stack frame on 222 /// entry to the function and which must be maintained by every function. 223 unsigned stackAlignment; 224 225 /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. 226 /// 227 unsigned MaxInlineSizeThreshold; 228 229 /// What processor and OS we're targeting. 230 Triple TargetTriple; 231 232 /// Instruction itineraries for scheduling 233 InstrItineraryData InstrItins; 234 235 private: 236 237 /// Override the stack alignment. 238 unsigned StackAlignOverride; 239 240 /// True if compiling for 64-bit, false for 16-bit or 32-bit. 241 bool In64BitMode; 242 243 /// True if compiling for 32-bit, false for 16-bit or 64-bit. 244 bool In32BitMode; 245 246 /// True if compiling for 16-bit, false for 32-bit or 64-bit. 247 bool In16BitMode; 248 249 X86SelectionDAGInfo TSInfo; 250 // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which 251 // X86TargetLowering needs. 252 X86InstrInfo InstrInfo; 253 X86TargetLowering TLInfo; 254 X86FrameLowering FrameLowering; 255 256 public: 257 /// This constructor initializes the data members to match that 258 /// of the specified triple. 259 /// 260 X86Subtarget(const std::string &TT, const std::string &CPU, 261 const std::string &FS, const X86TargetMachine &TM, 262 unsigned StackAlignOverride); 263 getTargetLowering()264 const X86TargetLowering *getTargetLowering() const override { 265 return &TLInfo; 266 } getInstrInfo()267 const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; } getFrameLowering()268 const X86FrameLowering *getFrameLowering() const override { 269 return &FrameLowering; 270 } getSelectionDAGInfo()271 const X86SelectionDAGInfo *getSelectionDAGInfo() const override { 272 return &TSInfo; 273 } getRegisterInfo()274 const X86RegisterInfo *getRegisterInfo() const override { 275 return &getInstrInfo()->getRegisterInfo(); 276 } 277 278 /// Returns the minimum alignment known to hold of the 279 /// stack frame on entry to the function and which must be maintained by every 280 /// function for this subtarget. getStackAlignment()281 unsigned getStackAlignment() const { return stackAlignment; } 282 283 /// Returns the maximum memset / memcpy size 284 /// that still makes it profitable to inline the call. getMaxInlineSizeThreshold()285 unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; } 286 287 /// ParseSubtargetFeatures - Parses features string setting specified 288 /// subtarget options. Definition of function is auto generated by tblgen. 289 void ParseSubtargetFeatures(StringRef CPU, StringRef FS); 290 291 private: 292 /// Initialize the full set of dependencies so we can use an initializer 293 /// list for X86Subtarget. 294 X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); 295 void initializeEnvironment(); 296 void initSubtargetFeatures(StringRef CPU, StringRef FS); 297 public: 298 /// Is this x86_64? (disregarding specific ABI / programming model) is64Bit()299 bool is64Bit() const { 300 return In64BitMode; 301 } 302 is32Bit()303 bool is32Bit() const { 304 return In32BitMode; 305 } 306 is16Bit()307 bool is16Bit() const { 308 return In16BitMode; 309 } 310 311 /// Is this x86_64 with the ILP32 programming model (x32 ABI)? isTarget64BitILP32()312 bool isTarget64BitILP32() const { 313 return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32 || 314 TargetTriple.isOSNaCl()); 315 } 316 317 /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? isTarget64BitLP64()318 bool isTarget64BitLP64() const { 319 return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32 && 320 !TargetTriple.isOSNaCl()); 321 } 322 getPICStyle()323 PICStyles::Style getPICStyle() const { return PICStyle; } setPICStyle(PICStyles::Style Style)324 void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } 325 hasCMov()326 bool hasCMov() const { return HasCMov; } hasMMX()327 bool hasMMX() const { return X86SSELevel >= MMX; } hasSSE1()328 bool hasSSE1() const { return X86SSELevel >= SSE1; } hasSSE2()329 bool hasSSE2() const { return X86SSELevel >= SSE2; } hasSSE3()330 bool hasSSE3() const { return X86SSELevel >= SSE3; } hasSSSE3()331 bool hasSSSE3() const { return X86SSELevel >= SSSE3; } hasSSE41()332 bool hasSSE41() const { return X86SSELevel >= SSE41; } hasSSE42()333 bool hasSSE42() const { return X86SSELevel >= SSE42; } hasAVX()334 bool hasAVX() const { return X86SSELevel >= AVX; } hasAVX2()335 bool hasAVX2() const { return X86SSELevel >= AVX2; } hasAVX512()336 bool hasAVX512() const { return X86SSELevel >= AVX512F; } hasFp256()337 bool hasFp256() const { return hasAVX(); } hasInt256()338 bool hasInt256() const { return hasAVX2(); } hasSSE4A()339 bool hasSSE4A() const { return HasSSE4A; } has3DNow()340 bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } has3DNowA()341 bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } hasPOPCNT()342 bool hasPOPCNT() const { return HasPOPCNT; } hasAES()343 bool hasAES() const { return HasAES; } hasPCLMUL()344 bool hasPCLMUL() const { return HasPCLMUL; } hasFMA()345 bool hasFMA() const { return HasFMA; } 346 // FIXME: Favor FMA when both are enabled. Is this the right thing to do? hasFMA4()347 bool hasFMA4() const { return HasFMA4 && !HasFMA; } hasXOP()348 bool hasXOP() const { return HasXOP; } hasTBM()349 bool hasTBM() const { return HasTBM; } hasMOVBE()350 bool hasMOVBE() const { return HasMOVBE; } hasRDRAND()351 bool hasRDRAND() const { return HasRDRAND; } hasF16C()352 bool hasF16C() const { return HasF16C; } hasFSGSBase()353 bool hasFSGSBase() const { return HasFSGSBase; } hasLZCNT()354 bool hasLZCNT() const { return HasLZCNT; } hasBMI()355 bool hasBMI() const { return HasBMI; } hasBMI2()356 bool hasBMI2() const { return HasBMI2; } hasRTM()357 bool hasRTM() const { return HasRTM; } hasHLE()358 bool hasHLE() const { return HasHLE; } hasADX()359 bool hasADX() const { return HasADX; } hasSHA()360 bool hasSHA() const { return HasSHA; } hasPRFCHW()361 bool hasPRFCHW() const { return HasPRFCHW; } hasRDSEED()362 bool hasRDSEED() const { return HasRDSEED; } isBTMemSlow()363 bool isBTMemSlow() const { return IsBTMemSlow; } isSHLDSlow()364 bool isSHLDSlow() const { return IsSHLDSlow; } isUnalignedMemAccessFast()365 bool isUnalignedMemAccessFast() const { return IsUAMemFast; } isUnalignedMem32Slow()366 bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } hasSSEUnalignedMem()367 bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } hasCmpxchg16b()368 bool hasCmpxchg16b() const { return HasCmpxchg16b; } useLeaForSP()369 bool useLeaForSP() const { return UseLeaForSP; } hasSlowDivide32()370 bool hasSlowDivide32() const { return HasSlowDivide32; } hasSlowDivide64()371 bool hasSlowDivide64() const { return HasSlowDivide64; } padShortFunctions()372 bool padShortFunctions() const { return PadShortFunctions; } callRegIndirect()373 bool callRegIndirect() const { return CallRegIndirect; } LEAusesAG()374 bool LEAusesAG() const { return LEAUsesAG; } slowLEA()375 bool slowLEA() const { return SlowLEA; } slowIncDec()376 bool slowIncDec() const { return SlowIncDec; } useSqrtEst()377 bool useSqrtEst() const { return UseSqrtEst; } useReciprocalEst()378 bool useReciprocalEst() const { return UseReciprocalEst; } hasCDI()379 bool hasCDI() const { return HasCDI; } hasPFI()380 bool hasPFI() const { return HasPFI; } hasERI()381 bool hasERI() const { return HasERI; } hasDQI()382 bool hasDQI() const { return HasDQI; } hasBWI()383 bool hasBWI() const { return HasBWI; } hasVLX()384 bool hasVLX() const { return HasVLX; } 385 isAtom()386 bool isAtom() const { return X86ProcFamily == IntelAtom; } isSLM()387 bool isSLM() const { return X86ProcFamily == IntelSLM; } 388 getTargetTriple()389 const Triple &getTargetTriple() const { return TargetTriple; } 390 isTargetDarwin()391 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } isTargetFreeBSD()392 bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } isTargetDragonFly()393 bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); } isTargetSolaris()394 bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); } isTargetPS4()395 bool isTargetPS4() const { return TargetTriple.isPS4(); } 396 isTargetELF()397 bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } isTargetCOFF()398 bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } isTargetMachO()399 bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } 400 isTargetLinux()401 bool isTargetLinux() const { return TargetTriple.isOSLinux(); } isTargetNaCl()402 bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } isTargetNaCl32()403 bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } isTargetNaCl64()404 bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } 405 isTargetWindowsMSVC()406 bool isTargetWindowsMSVC() const { 407 return TargetTriple.isWindowsMSVCEnvironment(); 408 } 409 isTargetKnownWindowsMSVC()410 bool isTargetKnownWindowsMSVC() const { 411 return TargetTriple.isKnownWindowsMSVCEnvironment(); 412 } 413 isTargetWindowsCygwin()414 bool isTargetWindowsCygwin() const { 415 return TargetTriple.isWindowsCygwinEnvironment(); 416 } 417 isTargetWindowsGNU()418 bool isTargetWindowsGNU() const { 419 return TargetTriple.isWindowsGNUEnvironment(); 420 } 421 isTargetWindowsItanium()422 bool isTargetWindowsItanium() const { 423 return TargetTriple.isWindowsItaniumEnvironment(); 424 } 425 isTargetCygMing()426 bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); } 427 isOSWindows()428 bool isOSWindows() const { return TargetTriple.isOSWindows(); } 429 isTargetWin64()430 bool isTargetWin64() const { 431 return In64BitMode && TargetTriple.isOSWindows(); 432 } 433 isTargetWin32()434 bool isTargetWin32() const { 435 return !In64BitMode && (isTargetCygMing() || isTargetKnownWindowsMSVC()); 436 } 437 isPICStyleSet()438 bool isPICStyleSet() const { return PICStyle != PICStyles::None; } isPICStyleGOT()439 bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; } isPICStyleRIPRel()440 bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; } 441 isPICStyleStubPIC()442 bool isPICStyleStubPIC() const { 443 return PICStyle == PICStyles::StubPIC; 444 } 445 isPICStyleStubNoDynamic()446 bool isPICStyleStubNoDynamic() const { 447 return PICStyle == PICStyles::StubDynamicNoPIC; 448 } isPICStyleStubAny()449 bool isPICStyleStubAny() const { 450 return PICStyle == PICStyles::StubDynamicNoPIC || 451 PICStyle == PICStyles::StubPIC; 452 } 453 isCallingConvWin64(CallingConv::ID CC)454 bool isCallingConvWin64(CallingConv::ID CC) const { 455 return (isTargetWin64() && CC != CallingConv::X86_64_SysV) || 456 CC == CallingConv::X86_64_Win64; 457 } 458 459 /// ClassifyGlobalReference - Classify a global variable reference for the 460 /// current subtarget according to how we should reference it in a non-pcrel 461 /// context. 462 unsigned char ClassifyGlobalReference(const GlobalValue *GV, 463 const TargetMachine &TM)const; 464 465 /// Classify a blockaddress reference for the current subtarget according to 466 /// how we should reference it in a non-pcrel context. 467 unsigned char ClassifyBlockAddressReference() const; 468 469 /// Return true if the subtarget allows calls to immediate address. 470 bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const; 471 472 /// This function returns the name of a function which has an interface 473 /// like the non-standard bzero function, if such a function exists on 474 /// the current subtarget and it is considered prefereable over 475 /// memset with zero passed as the second argument. Otherwise it 476 /// returns null. 477 const char *getBZeroEntry() const; 478 479 /// This function returns true if the target has sincos() routine in its 480 /// compiler runtime or math libraries. 481 bool hasSinCos() const; 482 483 /// Enable the MachineScheduler pass for all X86 subtargets. enableMachineScheduler()484 bool enableMachineScheduler() const override { return true; } 485 486 bool enableEarlyIfConversion() const override; 487 488 /// Return the instruction itineraries based on the subtarget selection. getInstrItineraryData()489 const InstrItineraryData *getInstrItineraryData() const override { 490 return &InstrItins; 491 } 492 getAntiDepBreakMode()493 AntiDepBreakMode getAntiDepBreakMode() const override { 494 return TargetSubtargetInfo::ANTIDEP_CRITICAL; 495 } 496 }; 497 498 } // End llvm namespace 499 500 #endif 501