1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This file a TargetTransformInfo::Concept conforming object specific to the 12 /// AMDGPU target machine. It uses the target's detailed information to 13 /// provide more precise answers to certain TTI queries, while letting the 14 /// target independent and default TTI implementations handle the rest. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 19 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 20 21 #include "AMDGPU.h" 22 #include "AMDGPUSubtarget.h" 23 #include "AMDGPUTargetMachine.h" 24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 25 #include "Utils/AMDGPUBaseInfo.h" 26 #include "llvm/ADT/ArrayRef.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/CodeGen/BasicTTIImpl.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/MC/SubtargetFeature.h" 31 #include "llvm/Support/MathExtras.h" 32 #include <cassert> 33 34 namespace llvm { 35 36 class AMDGPUTargetLowering; 37 class Loop; 38 class ScalarEvolution; 39 class Type; 40 class Value; 41 42 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { 43 using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>; 44 using TTI = TargetTransformInfo; 45 46 friend BaseT; 47 48 Triple TargetTriple; 49 50 public: AMDGPUTTIImpl(const AMDGPUTargetMachine * TM,const Function & F)51 explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) 52 : BaseT(TM, F.getParent()->getDataLayout()), 53 TargetTriple(TM->getTargetTriple()) {} 54 55 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 56 TTI::UnrollingPreferences &UP); 57 }; 58 59 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { 60 using BaseT = BasicTTIImplBase<GCNTTIImpl>; 61 using TTI = TargetTransformInfo; 62 63 friend BaseT; 64 65 const GCNSubtarget *ST; 66 const AMDGPUTargetLowering *TLI; 67 AMDGPUTTIImpl CommonTTI; 68 bool IsGraphicsShader; 69 70 const FeatureBitset InlineFeatureIgnoreList = { 71 // Codegen control options which don't matter. 72 AMDGPU::FeatureEnableLoadStoreOpt, 73 AMDGPU::FeatureEnableSIScheduler, 74 AMDGPU::FeatureEnableUnsafeDSOffsetFolding, 75 AMDGPU::FeatureFlatForGlobal, 76 AMDGPU::FeaturePromoteAlloca, 77 AMDGPU::FeatureUnalignedBufferAccess, 78 AMDGPU::FeatureUnalignedScratchAccess, 79 80 AMDGPU::FeatureAutoWaitcntBeforeBarrier, 81 AMDGPU::FeatureDebuggerEmitPrologue, 82 AMDGPU::FeatureDebuggerInsertNops, 83 84 // Property of the kernel/environment which can't actually differ. 85 AMDGPU::FeatureSGPRInitBug, 86 AMDGPU::FeatureXNACK, 87 AMDGPU::FeatureTrapHandler, 88 89 // Perf-tuning features 90 AMDGPU::FeatureFastFMAF32, 91 AMDGPU::HalfRate64Ops 92 }; 93 getST()94 const GCNSubtarget *getST() const { return ST; } getTLI()95 const AMDGPUTargetLowering *getTLI() const { return TLI; } 96 getFullRateInstrCost()97 static inline int getFullRateInstrCost() { 98 return TargetTransformInfo::TCC_Basic; 99 } 100 getHalfRateInstrCost()101 static inline int getHalfRateInstrCost() { 102 return 2 * TargetTransformInfo::TCC_Basic; 103 } 104 105 // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe 106 // should be 2 or 4. getQuarterRateInstrCost()107 static inline int getQuarterRateInstrCost() { 108 return 3 * TargetTransformInfo::TCC_Basic; 109 } 110 111 // On some parts, normal fp64 operations are half rate, and others 112 // quarter. This also applies to some integer operations. get64BitInstrCost()113 inline int get64BitInstrCost() const { 114 return ST->hasHalfRate64Ops() ? 115 getHalfRateInstrCost() : getQuarterRateInstrCost(); 116 } 117 118 public: GCNTTIImpl(const AMDGPUTargetMachine * TM,const Function & F)119 explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) 120 : BaseT(TM, F.getParent()->getDataLayout()), 121 ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))), 122 TLI(ST->getTargetLowering()), 123 CommonTTI(TM, F), 124 IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {} 125 hasBranchDivergence()126 bool hasBranchDivergence() { return true; } 127 128 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 129 TTI::UnrollingPreferences &UP); 130 getPopcntSupport(unsigned TyWidth)131 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { 132 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 133 return TTI::PSK_FastHardware; 134 } 135 136 unsigned getHardwareNumberOfRegisters(bool Vector) const; 137 unsigned getNumberOfRegisters(bool Vector) const; 138 unsigned getRegisterBitWidth(bool Vector) const; 139 unsigned getMinVectorRegisterBitWidth() const; 140 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 141 unsigned ChainSizeInBytes, 142 VectorType *VecTy) const; 143 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 144 unsigned ChainSizeInBytes, 145 VectorType *VecTy) const; 146 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 147 148 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, 149 unsigned Alignment, 150 unsigned AddrSpace) const; 151 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 152 unsigned Alignment, 153 unsigned AddrSpace) const; 154 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 155 unsigned Alignment, 156 unsigned AddrSpace) const; 157 158 unsigned getMaxInterleaveFactor(unsigned VF); 159 160 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 161 162 int getArithmeticInstrCost( 163 unsigned Opcode, Type *Ty, 164 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 165 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 166 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 167 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 168 ArrayRef<const Value *> Args = ArrayRef<const Value *>()); 169 170 unsigned getCFInstrCost(unsigned Opcode); 171 172 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); 173 bool isSourceOfDivergence(const Value *V) const; 174 bool isAlwaysUniform(const Value *V) const; 175 getFlatAddressSpace()176 unsigned getFlatAddressSpace() const { 177 // Don't bother running InferAddressSpaces pass on graphics shaders which 178 // don't use flat addressing. 179 if (IsGraphicsShader) 180 return -1; 181 return ST->hasFlatAddressSpace() ? 182 ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE; 183 } 184 getVectorSplitCost()185 unsigned getVectorSplitCost() { return 0; } 186 187 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, 188 Type *SubTp); 189 190 bool areInlineCompatible(const Function *Caller, 191 const Function *Callee) const; 192 getInliningThresholdMultiplier()193 unsigned getInliningThresholdMultiplier() { return 9; } 194 195 int getArithmeticReductionCost(unsigned Opcode, 196 Type *Ty, 197 bool IsPairwise); 198 int getMinMaxReductionCost(Type *Ty, Type *CondTy, 199 bool IsPairwiseForm, 200 bool IsUnsigned); 201 }; 202 203 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> { 204 using BaseT = BasicTTIImplBase<R600TTIImpl>; 205 using TTI = TargetTransformInfo; 206 207 friend BaseT; 208 209 const R600Subtarget *ST; 210 const AMDGPUTargetLowering *TLI; 211 AMDGPUTTIImpl CommonTTI; 212 213 public: R600TTIImpl(const AMDGPUTargetMachine * TM,const Function & F)214 explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) 215 : BaseT(TM, F.getParent()->getDataLayout()), 216 ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))), 217 TLI(ST->getTargetLowering()), 218 CommonTTI(TM, F) {} 219 getST()220 const R600Subtarget *getST() const { return ST; } getTLI()221 const AMDGPUTargetLowering *getTLI() const { return TLI; } 222 223 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 224 TTI::UnrollingPreferences &UP); 225 unsigned getHardwareNumberOfRegisters(bool Vec) const; 226 unsigned getNumberOfRegisters(bool Vec) const; 227 unsigned getRegisterBitWidth(bool Vector) const; 228 unsigned getMinVectorRegisterBitWidth() const; 229 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 230 bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment, 231 unsigned AddrSpace) const; 232 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 233 unsigned Alignment, 234 unsigned AddrSpace) const; 235 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 236 unsigned Alignment, 237 unsigned AddrSpace) const; 238 unsigned getMaxInterleaveFactor(unsigned VF); 239 unsigned getCFInstrCost(unsigned Opcode); 240 int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); 241 }; 242 243 } // end namespace llvm 244 245 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H 246