1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file provides helpers for the implementation of 11 /// a TargetTransformInfo-conforming class. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 17 18 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 19 #include "llvm/Analysis/TargetTransformInfo.h" 20 #include "llvm/Analysis/VectorUtils.h" 21 #include "llvm/IR/CallSite.h" 22 #include "llvm/IR/DataLayout.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/GetElementPtrTypeIterator.h" 25 #include "llvm/IR/Operator.h" 26 #include "llvm/IR/Type.h" 27 28 namespace llvm { 29 30 /// Base class for use as a mix-in that aids implementing 31 /// a TargetTransformInfo-compatible class. 32 class TargetTransformInfoImplBase { 33 protected: 34 typedef TargetTransformInfo TTI; 35 36 const DataLayout &DL; 37 TargetTransformInfoImplBase(const DataLayout & DL)38 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 39 40 public: 41 // Provide value semantics. MSVC requires that we spell all of these out. TargetTransformInfoImplBase(const TargetTransformInfoImplBase & Arg)42 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) 43 : DL(Arg.DL) {} TargetTransformInfoImplBase(TargetTransformInfoImplBase && Arg)44 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 45 getDataLayout()46 const DataLayout &getDataLayout() const { return DL; } 47 getOperationCost(unsigned Opcode,Type * Ty,Type * OpTy)48 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { 49 switch (Opcode) { 50 default: 51 // By default, just classify everything as 'basic'. 52 return TTI::TCC_Basic; 53 54 case Instruction::GetElementPtr: 55 llvm_unreachable("Use getGEPCost for GEP operations!"); 56 57 case Instruction::BitCast: 58 assert(OpTy && "Cast instructions must provide the operand type"); 59 if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) 60 // Identity and pointer-to-pointer casts are free. 61 return TTI::TCC_Free; 62 63 // Otherwise, the default basic cost is used. 64 return TTI::TCC_Basic; 65 66 case Instruction::FDiv: 67 case Instruction::FRem: 68 case Instruction::SDiv: 69 case Instruction::SRem: 70 case Instruction::UDiv: 71 case Instruction::URem: 72 return TTI::TCC_Expensive; 73 74 case Instruction::IntToPtr: { 75 // An inttoptr cast is free so long as the input is a legal integer type 76 // which doesn't contain values outside the range of a pointer. 77 unsigned OpSize = OpTy->getScalarSizeInBits(); 78 if (DL.isLegalInteger(OpSize) && 79 OpSize <= DL.getPointerTypeSizeInBits(Ty)) 80 return TTI::TCC_Free; 81 82 // Otherwise it's not a no-op. 83 return TTI::TCC_Basic; 84 } 85 case Instruction::PtrToInt: { 86 // A ptrtoint cast is free so long as the result is large enough to store 87 // the pointer, and a legal integer type. 88 unsigned DestSize = Ty->getScalarSizeInBits(); 89 if (DL.isLegalInteger(DestSize) && 90 DestSize >= DL.getPointerTypeSizeInBits(OpTy)) 91 return TTI::TCC_Free; 92 93 // Otherwise it's not a no-op. 94 return TTI::TCC_Basic; 95 } 96 case Instruction::Trunc: 97 // trunc to a native type is free (assuming the target has compare and 98 // shift-right of the same width). 99 if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty))) 100 return TTI::TCC_Free; 101 102 return TTI::TCC_Basic; 103 } 104 } 105 getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)106 int getGEPCost(Type *PointeeType, const Value *Ptr, 107 ArrayRef<const Value *> Operands) { 108 // In the basic model, we just assume that all-constant GEPs will be folded 109 // into their uses via addressing modes. 110 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) 111 if (!isa<Constant>(Operands[Idx])) 112 return TTI::TCC_Basic; 113 114 return TTI::TCC_Free; 115 } 116 getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize)117 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 118 unsigned &JTSize) { 119 JTSize = 0; 120 return SI.getNumCases(); 121 } 122 getExtCost(const Instruction * I,const Value * Src)123 int getExtCost(const Instruction *I, const Value *Src) { 124 return TTI::TCC_Basic; 125 } 126 getCallCost(FunctionType * FTy,int NumArgs)127 unsigned getCallCost(FunctionType *FTy, int NumArgs) { 128 assert(FTy && "FunctionType must be provided to this routine."); 129 130 // The target-independent implementation just measures the size of the 131 // function by approximating that each argument will take on average one 132 // instruction to prepare. 133 134 if (NumArgs < 0) 135 // Set the argument number to the number of explicit arguments in the 136 // function. 137 NumArgs = FTy->getNumParams(); 138 139 return TTI::TCC_Basic * (NumArgs + 1); 140 } 141 getInliningThresholdMultiplier()142 unsigned getInliningThresholdMultiplier() { return 1; } 143 getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<Type * > ParamTys)144 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 145 ArrayRef<Type *> ParamTys) { 146 switch (IID) { 147 default: 148 // Intrinsics rarely (if ever) have normal argument setup constraints. 149 // Model them as having a basic instruction cost. 150 // FIXME: This is wrong for libc intrinsics. 151 return TTI::TCC_Basic; 152 153 case Intrinsic::annotation: 154 case Intrinsic::assume: 155 case Intrinsic::sideeffect: 156 case Intrinsic::dbg_declare: 157 case Intrinsic::dbg_value: 158 case Intrinsic::dbg_label: 159 case Intrinsic::invariant_start: 160 case Intrinsic::invariant_end: 161 case Intrinsic::lifetime_start: 162 case Intrinsic::lifetime_end: 163 case Intrinsic::objectsize: 164 case Intrinsic::ptr_annotation: 165 case Intrinsic::var_annotation: 166 case Intrinsic::experimental_gc_result: 167 case Intrinsic::experimental_gc_relocate: 168 case Intrinsic::coro_alloc: 169 case Intrinsic::coro_begin: 170 case Intrinsic::coro_free: 171 case Intrinsic::coro_end: 172 case Intrinsic::coro_frame: 173 case Intrinsic::coro_size: 174 case Intrinsic::coro_suspend: 175 case Intrinsic::coro_param: 176 case Intrinsic::coro_subfn_addr: 177 // These intrinsics don't actually represent code after lowering. 178 return TTI::TCC_Free; 179 } 180 } 181 hasBranchDivergence()182 bool hasBranchDivergence() { return false; } 183 isSourceOfDivergence(const Value * V)184 bool isSourceOfDivergence(const Value *V) { return false; } 185 isAlwaysUniform(const Value * V)186 bool isAlwaysUniform(const Value *V) { return false; } 187 getFlatAddressSpace()188 unsigned getFlatAddressSpace () { 189 return -1; 190 } 191 isLoweredToCall(const Function * F)192 bool isLoweredToCall(const Function *F) { 193 assert(F && "A concrete function must be provided to this routine."); 194 195 // FIXME: These should almost certainly not be handled here, and instead 196 // handled with the help of TLI or the target itself. This was largely 197 // ported from existing analysis heuristics here so that such refactorings 198 // can take place in the future. 199 200 if (F->isIntrinsic()) 201 return false; 202 203 if (F->hasLocalLinkage() || !F->hasName()) 204 return true; 205 206 StringRef Name = F->getName(); 207 208 // These will all likely lower to a single selection DAG node. 209 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 210 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 211 Name == "fmin" || Name == "fminf" || Name == "fminl" || 212 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 213 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 214 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 215 return false; 216 217 // These are all likely to be optimized into something smaller. 218 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 219 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 220 Name == "floorf" || Name == "ceil" || Name == "round" || 221 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 222 Name == "llabs") 223 return false; 224 225 return true; 226 } 227 getUnrollingPreferences(Loop *,ScalarEvolution &,TTI::UnrollingPreferences &)228 void getUnrollingPreferences(Loop *, ScalarEvolution &, 229 TTI::UnrollingPreferences &) {} 230 isLegalAddImmediate(int64_t Imm)231 bool isLegalAddImmediate(int64_t Imm) { return false; } 232 isLegalICmpImmediate(int64_t Imm)233 bool isLegalICmpImmediate(int64_t Imm) { return false; } 234 235 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 236 bool HasBaseReg, int64_t Scale, 237 unsigned AddrSpace, Instruction *I = nullptr) { 238 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 239 // taken from the implementation of LSR. 240 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 241 } 242 isLSRCostLess(TTI::LSRCost & C1,TTI::LSRCost & C2)243 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { 244 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 245 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 246 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 247 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 248 } 249 canMacroFuseCmp()250 bool canMacroFuseCmp() { return false; } 251 shouldFavorPostInc()252 bool shouldFavorPostInc() const { return false; } 253 isLegalMaskedStore(Type * DataType)254 bool isLegalMaskedStore(Type *DataType) { return false; } 255 isLegalMaskedLoad(Type * DataType)256 bool isLegalMaskedLoad(Type *DataType) { return false; } 257 isLegalMaskedScatter(Type * DataType)258 bool isLegalMaskedScatter(Type *DataType) { return false; } 259 isLegalMaskedGather(Type * DataType)260 bool isLegalMaskedGather(Type *DataType) { return false; } 261 hasDivRemOp(Type * DataType,bool IsSigned)262 bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } 263 hasVolatileVariant(Instruction * I,unsigned AddrSpace)264 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } 265 prefersVectorizedAddressing()266 bool prefersVectorizedAddressing() { return true; } 267 getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)268 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 269 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { 270 // Guess that all legal addressing mode are free. 271 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, 272 Scale, AddrSpace)) 273 return 0; 274 return -1; 275 } 276 LSRWithInstrQueries()277 bool LSRWithInstrQueries() { return false; } 278 isTruncateFree(Type * Ty1,Type * Ty2)279 bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } 280 isProfitableToHoist(Instruction * I)281 bool isProfitableToHoist(Instruction *I) { return true; } 282 useAA()283 bool useAA() { return false; } 284 isTypeLegal(Type * Ty)285 bool isTypeLegal(Type *Ty) { return false; } 286 getJumpBufAlignment()287 unsigned getJumpBufAlignment() { return 0; } 288 getJumpBufSize()289 unsigned getJumpBufSize() { return 0; } 290 shouldBuildLookupTables()291 bool shouldBuildLookupTables() { return true; } shouldBuildLookupTablesForConstant(Constant * C)292 bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } 293 useColdCCForColdCall(Function & F)294 bool useColdCCForColdCall(Function &F) { return false; } 295 getScalarizationOverhead(Type * Ty,bool Insert,bool Extract)296 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { 297 return 0; 298 } 299 getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,unsigned VF)300 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 301 unsigned VF) { return 0; } 302 supportsEfficientVectorElementLoadStore()303 bool supportsEfficientVectorElementLoadStore() { return false; } 304 enableAggressiveInterleaving(bool LoopHasReductions)305 bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } 306 enableMemCmpExpansion(bool IsZeroCmp)307 const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( 308 bool IsZeroCmp) const { 309 return nullptr; 310 } 311 enableInterleavedAccessVectorization()312 bool enableInterleavedAccessVectorization() { return false; } 313 isFPVectorizationPotentiallyUnsafe()314 bool isFPVectorizationPotentiallyUnsafe() { return false; } 315 allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,unsigned Alignment,bool * Fast)316 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, 317 unsigned BitWidth, 318 unsigned AddressSpace, 319 unsigned Alignment, 320 bool *Fast) { return false; } 321 getPopcntSupport(unsigned IntTyWidthInBit)322 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { 323 return TTI::PSK_Software; 324 } 325 haveFastSqrt(Type * Ty)326 bool haveFastSqrt(Type *Ty) { return false; } 327 isFCmpOrdCheaperThanFCmpZero(Type * Ty)328 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } 329 getFPOpCost(Type * Ty)330 unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } 331 getIntImmCodeSizeCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)332 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 333 Type *Ty) { 334 return 0; 335 } 336 getIntImmCost(const APInt & Imm,Type * Ty)337 unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } 338 getIntImmCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)339 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 340 Type *Ty) { 341 return TTI::TCC_Free; 342 } 343 getIntImmCost(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)344 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 345 Type *Ty) { 346 return TTI::TCC_Free; 347 } 348 getNumberOfRegisters(bool Vector)349 unsigned getNumberOfRegisters(bool Vector) { return 8; } 350 getRegisterBitWidth(bool Vector)351 unsigned getRegisterBitWidth(bool Vector) const { return 32; } 352 getMinVectorRegisterBitWidth()353 unsigned getMinVectorRegisterBitWidth() { return 128; } 354 shouldMaximizeVectorBandwidth(bool OptSize)355 bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } 356 getMinimumVF(unsigned ElemWidth)357 unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } 358 359 bool shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)360 shouldConsiderAddressTypePromotion(const Instruction &I, 361 bool &AllowPromotionWithoutCommonHeader) { 362 AllowPromotionWithoutCommonHeader = false; 363 return false; 364 } 365 getCacheLineSize()366 unsigned getCacheLineSize() { return 0; } 367 getCacheSize(TargetTransformInfo::CacheLevel Level)368 llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) { 369 switch (Level) { 370 case TargetTransformInfo::CacheLevel::L1D: 371 LLVM_FALLTHROUGH; 372 case TargetTransformInfo::CacheLevel::L2D: 373 return llvm::Optional<unsigned>(); 374 } 375 376 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 377 } 378 getCacheAssociativity(TargetTransformInfo::CacheLevel Level)379 llvm::Optional<unsigned> getCacheAssociativity( 380 TargetTransformInfo::CacheLevel Level) { 381 switch (Level) { 382 case TargetTransformInfo::CacheLevel::L1D: 383 LLVM_FALLTHROUGH; 384 case TargetTransformInfo::CacheLevel::L2D: 385 return llvm::Optional<unsigned>(); 386 } 387 388 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 389 } 390 getPrefetchDistance()391 unsigned getPrefetchDistance() { return 0; } 392 getMinPrefetchStride()393 unsigned getMinPrefetchStride() { return 1; } 394 getMaxPrefetchIterationsAhead()395 unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } 396 getMaxInterleaveFactor(unsigned VF)397 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } 398 getArithmeticInstrCost(unsigned Opcode,Type * Ty,TTI::OperandValueKind Opd1Info,TTI::OperandValueKind Opd2Info,TTI::OperandValueProperties Opd1PropInfo,TTI::OperandValueProperties Opd2PropInfo,ArrayRef<const Value * > Args)399 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 400 TTI::OperandValueKind Opd1Info, 401 TTI::OperandValueKind Opd2Info, 402 TTI::OperandValueProperties Opd1PropInfo, 403 TTI::OperandValueProperties Opd2PropInfo, 404 ArrayRef<const Value *> Args) { 405 return 1; 406 } 407 getShuffleCost(TTI::ShuffleKind Kind,Type * Ty,int Index,Type * SubTp)408 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index, 409 Type *SubTp) { 410 return 1; 411 } 412 getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,const Instruction * I)413 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 414 const Instruction *I) { return 1; } 415 getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)416 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, 417 VectorType *VecTy, unsigned Index) { 418 return 1; 419 } 420 getCFInstrCost(unsigned Opcode)421 unsigned getCFInstrCost(unsigned Opcode) { return 1; } 422 getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,const Instruction * I)423 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 424 const Instruction *I) { 425 return 1; 426 } 427 getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)428 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { 429 return 1; 430 } 431 getMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace,const Instruction * I)432 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 433 unsigned AddressSpace, const Instruction *I) { 434 return 1; 435 } 436 getMaskedMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)437 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 438 unsigned AddressSpace) { 439 return 1; 440 } 441 getGatherScatterOpCost(unsigned Opcode,Type * DataTy,Value * Ptr,bool VariableMask,unsigned Alignment)442 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, 443 bool VariableMask, 444 unsigned Alignment) { 445 return 1; 446 } 447 getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,unsigned Alignment,unsigned AddressSpace)448 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, 449 unsigned Factor, 450 ArrayRef<unsigned> Indices, 451 unsigned Alignment, 452 unsigned AddressSpace) { 453 return 1; 454 } 455 getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Type * > Tys,FastMathFlags FMF,unsigned ScalarizationCostPassed)456 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 457 ArrayRef<Type *> Tys, FastMathFlags FMF, 458 unsigned ScalarizationCostPassed) { 459 return 1; 460 } getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Value * > Args,FastMathFlags FMF,unsigned VF)461 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 462 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { 463 return 1; 464 } 465 getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys)466 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { 467 return 1; 468 } 469 getNumberOfParts(Type * Tp)470 unsigned getNumberOfParts(Type *Tp) { return 0; } 471 getAddressComputationCost(Type * Tp,ScalarEvolution *,const SCEV *)472 unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, 473 const SCEV *) { 474 return 0; 475 } 476 getArithmeticReductionCost(unsigned,Type *,bool)477 unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } 478 getMinMaxReductionCost(Type *,Type *,bool,bool)479 unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } 480 getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)481 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } 482 getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)483 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { 484 return false; 485 } 486 getAtomicMemIntrinsicMaxElementSize()487 unsigned getAtomicMemIntrinsicMaxElementSize() const { 488 // Note for overrides: You must ensure for all element unordered-atomic 489 // memory intrinsics that all power-of-2 element sizes up to, and 490 // including, the return value of this method have a corresponding 491 // runtime lib call. These runtime lib call definitions can be found 492 // in RuntimeLibcalls.h 493 return 0; 494 } 495 getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)496 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 497 Type *ExpectedType) { 498 return nullptr; 499 } 500 getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAlign,unsigned DestAlign)501 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 502 unsigned SrcAlign, unsigned DestAlign) const { 503 return Type::getInt8Ty(Context); 504 } 505 getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAlign,unsigned DestAlign)506 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, 507 LLVMContext &Context, 508 unsigned RemainingBytes, 509 unsigned SrcAlign, 510 unsigned DestAlign) const { 511 for (unsigned i = 0; i != RemainingBytes; ++i) 512 OpsOut.push_back(Type::getInt8Ty(Context)); 513 } 514 areInlineCompatible(const Function * Caller,const Function * Callee)515 bool areInlineCompatible(const Function *Caller, 516 const Function *Callee) const { 517 return (Caller->getFnAttribute("target-cpu") == 518 Callee->getFnAttribute("target-cpu")) && 519 (Caller->getFnAttribute("target-features") == 520 Callee->getFnAttribute("target-features")); 521 } 522 isIndexedLoadLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)523 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 524 const DataLayout &DL) const { 525 return false; 526 } 527 isIndexedStoreLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)528 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 529 const DataLayout &DL) const { 530 return false; 531 } 532 getLoadStoreVecRegBitWidth(unsigned AddrSpace)533 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 534 isLegalToVectorizeLoad(LoadInst * LI)535 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 536 isLegalToVectorizeStore(StoreInst * SI)537 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 538 isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)539 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 540 unsigned Alignment, 541 unsigned AddrSpace) const { 542 return true; 543 } 544 isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)545 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 546 unsigned Alignment, 547 unsigned AddrSpace) const { 548 return true; 549 } 550 getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)551 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 552 unsigned ChainSizeInBytes, 553 VectorType *VecTy) const { 554 return VF; 555 } 556 getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)557 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 558 unsigned ChainSizeInBytes, 559 VectorType *VecTy) const { 560 return VF; 561 } 562 useReductionIntrinsic(unsigned Opcode,Type * Ty,TTI::ReductionFlags Flags)563 bool useReductionIntrinsic(unsigned Opcode, Type *Ty, 564 TTI::ReductionFlags Flags) const { 565 return false; 566 } 567 shouldExpandReduction(const IntrinsicInst * II)568 bool shouldExpandReduction(const IntrinsicInst *II) const { 569 return true; 570 } 571 572 protected: 573 // Obtain the minimum required size to hold the value (without the sign) 574 // In case of a vector it returns the min required size for one element. minRequiredElementSize(const Value * Val,bool & isSigned)575 unsigned minRequiredElementSize(const Value* Val, bool &isSigned) { 576 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 577 const auto* VectorValue = cast<Constant>(Val); 578 579 // In case of a vector need to pick the max between the min 580 // required size for each element 581 auto *VT = cast<VectorType>(Val->getType()); 582 583 // Assume unsigned elements 584 isSigned = false; 585 586 // The max required size is the total vector width divided by num 587 // of elements in the vector 588 unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements(); 589 590 unsigned MinRequiredSize = 0; 591 for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 592 if (auto* IntElement = 593 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 594 bool signedElement = IntElement->getValue().isNegative(); 595 // Get the element min required size. 596 unsigned ElementMinRequiredSize = 597 IntElement->getValue().getMinSignedBits() - 1; 598 // In case one element is signed then all the vector is signed. 599 isSigned |= signedElement; 600 // Save the max required bit size between all the elements. 601 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 602 } 603 else { 604 // not an int constant element 605 return MaxRequiredSize; 606 } 607 } 608 return MinRequiredSize; 609 } 610 611 if (const auto* CI = dyn_cast<ConstantInt>(Val)) { 612 isSigned = CI->getValue().isNegative(); 613 return CI->getValue().getMinSignedBits() - 1; 614 } 615 616 if (const auto* Cast = dyn_cast<SExtInst>(Val)) { 617 isSigned = true; 618 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 619 } 620 621 if (const auto* Cast = dyn_cast<ZExtInst>(Val)) { 622 isSigned = false; 623 return Cast->getSrcTy()->getScalarSizeInBits(); 624 } 625 626 isSigned = false; 627 return Val->getType()->getScalarSizeInBits(); 628 } 629 isStridedAccess(const SCEV * Ptr)630 bool isStridedAccess(const SCEV *Ptr) { 631 return Ptr && isa<SCEVAddRecExpr>(Ptr); 632 } 633 getConstantStrideStep(ScalarEvolution * SE,const SCEV * Ptr)634 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 635 const SCEV *Ptr) { 636 if (!isStridedAccess(Ptr)) 637 return nullptr; 638 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 639 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 640 } 641 isConstantStridedAccessLessThan(ScalarEvolution * SE,const SCEV * Ptr,int64_t MergeDistance)642 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 643 int64_t MergeDistance) { 644 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 645 if (!Step) 646 return false; 647 APInt StrideVal = Step->getAPInt(); 648 if (StrideVal.getBitWidth() > 64) 649 return false; 650 // FIXME: Need to take absolute value for negative stride case. 651 return StrideVal.getSExtValue() < MergeDistance; 652 } 653 }; 654 655 /// CRTP base class for use as a mix-in that aids implementing 656 /// a TargetTransformInfo-compatible class. 657 template <typename T> 658 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 659 private: 660 typedef TargetTransformInfoImplBase BaseT; 661 662 protected: TargetTransformInfoImplCRTPBase(const DataLayout & DL)663 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 664 665 public: 666 using BaseT::getCallCost; 667 getCallCost(const Function * F,int NumArgs)668 unsigned getCallCost(const Function *F, int NumArgs) { 669 assert(F && "A concrete function must be provided to this routine."); 670 671 if (NumArgs < 0) 672 // Set the argument number to the number of explicit arguments in the 673 // function. 674 NumArgs = F->arg_size(); 675 676 if (Intrinsic::ID IID = F->getIntrinsicID()) { 677 FunctionType *FTy = F->getFunctionType(); 678 SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); 679 return static_cast<T *>(this) 680 ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); 681 } 682 683 if (!static_cast<T *>(this)->isLoweredToCall(F)) 684 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 685 // directly. 686 687 return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs); 688 } 689 getCallCost(const Function * F,ArrayRef<const Value * > Arguments)690 unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments) { 691 // Simply delegate to generic handling of the call. 692 // FIXME: We should use instsimplify or something else to catch calls which 693 // will constant fold with these arguments. 694 return static_cast<T *>(this)->getCallCost(F, Arguments.size()); 695 } 696 697 using BaseT::getGEPCost; 698 getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)699 int getGEPCost(Type *PointeeType, const Value *Ptr, 700 ArrayRef<const Value *> Operands) { 701 const GlobalValue *BaseGV = nullptr; 702 if (Ptr != nullptr) { 703 // TODO: will remove this when pointers have an opaque type. 704 assert(Ptr->getType()->getScalarType()->getPointerElementType() == 705 PointeeType && 706 "explicit pointee type doesn't match operand's pointee type"); 707 BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 708 } 709 bool HasBaseReg = (BaseGV == nullptr); 710 711 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 712 APInt BaseOffset(PtrSizeBits, 0); 713 int64_t Scale = 0; 714 715 auto GTI = gep_type_begin(PointeeType, Operands); 716 Type *TargetType = nullptr; 717 718 // Handle the case where the GEP instruction has a single operand, 719 // the basis, therefore TargetType is a nullptr. 720 if (Operands.empty()) 721 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 722 723 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 724 TargetType = GTI.getIndexedType(); 725 // We assume that the cost of Scalar GEP with constant index and the 726 // cost of Vector GEP with splat constant index are the same. 727 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 728 if (!ConstIdx) 729 if (auto Splat = getSplatValue(*I)) 730 ConstIdx = dyn_cast<ConstantInt>(Splat); 731 if (StructType *STy = GTI.getStructTypeOrNull()) { 732 // For structures the index is always splat or scalar constant 733 assert(ConstIdx && "Unexpected GEP index"); 734 uint64_t Field = ConstIdx->getZExtValue(); 735 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 736 } else { 737 int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); 738 if (ConstIdx) { 739 BaseOffset += 740 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 741 } else { 742 // Needs scale register. 743 if (Scale != 0) 744 // No addressing mode takes two scale registers. 745 return TTI::TCC_Basic; 746 Scale = ElementSize; 747 } 748 } 749 } 750 751 // Assumes the address space is 0 when Ptr is nullptr. 752 unsigned AS = 753 (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); 754 755 if (static_cast<T *>(this)->isLegalAddressingMode( 756 TargetType, const_cast<GlobalValue *>(BaseGV), 757 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS)) 758 return TTI::TCC_Free; 759 return TTI::TCC_Basic; 760 } 761 762 using BaseT::getIntrinsicCost; 763 getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<const Value * > Arguments)764 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 765 ArrayRef<const Value *> Arguments) { 766 // Delegate to the generic intrinsic handling code. This mostly provides an 767 // opportunity for targets to (for example) special case the cost of 768 // certain intrinsics based on constants used as arguments. 769 SmallVector<Type *, 8> ParamTys; 770 ParamTys.reserve(Arguments.size()); 771 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) 772 ParamTys.push_back(Arguments[Idx]->getType()); 773 return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys); 774 } 775 getUserCost(const User * U,ArrayRef<const Value * > Operands)776 unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) { 777 if (isa<PHINode>(U)) 778 return TTI::TCC_Free; // Model all PHI nodes as free. 779 780 // Static alloca doesn't generate target instructions. 781 if (auto *A = dyn_cast<AllocaInst>(U)) 782 if (A->isStaticAlloca()) 783 return TTI::TCC_Free; 784 785 if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) { 786 return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), 787 GEP->getPointerOperand(), 788 Operands.drop_front()); 789 } 790 791 if (auto CS = ImmutableCallSite(U)) { 792 const Function *F = CS.getCalledFunction(); 793 if (!F) { 794 // Just use the called value type. 795 Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); 796 return static_cast<T *>(this) 797 ->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); 798 } 799 800 SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); 801 return static_cast<T *>(this)->getCallCost(F, Arguments); 802 } 803 804 if (const CastInst *CI = dyn_cast<CastInst>(U)) { 805 // Result of a cmp instruction is often extended (to be used by other 806 // cmp instructions, logical or return instructions). These are usually 807 // nop on most sane targets. 808 if (isa<CmpInst>(CI->getOperand(0))) 809 return TTI::TCC_Free; 810 if (isa<SExtInst>(CI) || isa<ZExtInst>(CI) || isa<FPExtInst>(CI)) 811 return static_cast<T *>(this)->getExtCost(CI, Operands.back()); 812 } 813 814 return static_cast<T *>(this)->getOperationCost( 815 Operator::getOpcode(U), U->getType(), 816 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); 817 } 818 getInstructionLatency(const Instruction * I)819 int getInstructionLatency(const Instruction *I) { 820 SmallVector<const Value *, 4> Operands(I->value_op_begin(), 821 I->value_op_end()); 822 if (getUserCost(I, Operands) == TTI::TCC_Free) 823 return 0; 824 825 if (isa<LoadInst>(I)) 826 return 4; 827 828 Type *DstTy = I->getType(); 829 830 // Usually an intrinsic is a simple instruction. 831 // A real function call is much slower. 832 if (auto *CI = dyn_cast<CallInst>(I)) { 833 const Function *F = CI->getCalledFunction(); 834 if (!F || static_cast<T *>(this)->isLoweredToCall(F)) 835 return 40; 836 // Some intrinsics return a value and a flag, we use the value type 837 // to decide its latency. 838 if (StructType* StructTy = dyn_cast<StructType>(DstTy)) 839 DstTy = StructTy->getElementType(0); 840 // Fall through to simple instructions. 841 } 842 843 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) 844 DstTy = VectorTy->getElementType(); 845 if (DstTy->isFloatingPointTy()) 846 return 3; 847 848 return 1; 849 } 850 }; 851 } 852 853 #endif 854