1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file provides a helper that implements much of the TTI interface in 11 /// terms of the target-independent code generator and TargetLowering 12 /// interfaces. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H 17 #define LLVM_CODEGEN_BASICTTIIMPL_H 18 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/TargetTransformInfoImpl.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Target/TargetLowering.h" 23 #include "llvm/Target/TargetSubtargetInfo.h" 24 #include "llvm/Analysis/TargetLibraryInfo.h" 25 26 namespace llvm { 27 28 extern cl::opt<unsigned> PartialUnrollingThreshold; 29 30 /// \brief Base class which can be used to help build a TTI implementation. 31 /// 32 /// This class provides as much implementation of the TTI interface as is 33 /// possible using the target independent parts of the code generator. 34 /// 35 /// In order to subclass it, your class must implement a getST() method to 36 /// return the subtarget, and a getTLI() method to return the target lowering. 37 /// We need these methods implemented in the derived class so that this class 38 /// doesn't have to duplicate storage for them. 39 template <typename T> 40 class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { 41 private: 42 typedef TargetTransformInfoImplCRTPBase<T> BaseT; 43 typedef TargetTransformInfo TTI; 44 45 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 46 /// are set if the result needs to be inserted and/or extracted from vectors. getScalarizationOverhead(Type * Ty,bool Insert,bool Extract)47 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { 48 assert(Ty->isVectorTy() && "Can only scalarize vectors"); 49 unsigned Cost = 0; 50 51 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { 52 if (Insert) 53 Cost += static_cast<T *>(this) 54 ->getVectorInstrCost(Instruction::InsertElement, Ty, i); 55 if (Extract) 56 Cost += static_cast<T *>(this) 57 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); 58 } 59 60 return Cost; 61 } 62 63 /// Estimate the cost overhead of SK_Alternate shuffle. getAltShuffleOverhead(Type * Ty)64 unsigned getAltShuffleOverhead(Type *Ty) { 65 assert(Ty->isVectorTy() && "Can only shuffle vectors"); 66 unsigned Cost = 0; 67 // Shuffle cost is equal to the cost of extracting element from its argument 68 // plus the cost of inserting them onto the result vector. 69 70 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from 71 // index 0 of first vector, index 1 of second vector,index 2 of first 72 // vector and finally index 3 of second vector and insert them at index 73 // <0,1,2,3> of result vector. 74 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { 75 Cost += static_cast<T *>(this) 76 ->getVectorInstrCost(Instruction::InsertElement, Ty, i); 77 Cost += static_cast<T *>(this) 78 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); 79 } 80 return Cost; 81 } 82 83 /// \brief Local query method delegates up to T which *must* implement this! getST()84 const TargetSubtargetInfo *getST() const { 85 return static_cast<const T *>(this)->getST(); 86 } 87 88 /// \brief Local query method delegates up to T which *must* implement this! getTLI()89 const TargetLoweringBase *getTLI() const { 90 return static_cast<const T *>(this)->getTLI(); 91 } 92 93 protected: BasicTTIImplBase(const TargetMachine * TM)94 explicit BasicTTIImplBase(const TargetMachine *TM) 95 : BaseT(TM->getDataLayout()) {} 96 97 public: 98 // Provide value semantics. MSVC requires that we spell all of these out. BasicTTIImplBase(const BasicTTIImplBase & Arg)99 BasicTTIImplBase(const BasicTTIImplBase &Arg) 100 : BaseT(static_cast<const BaseT &>(Arg)) {} BasicTTIImplBase(BasicTTIImplBase && Arg)101 BasicTTIImplBase(BasicTTIImplBase &&Arg) 102 : BaseT(std::move(static_cast<BaseT &>(Arg))) {} 103 BasicTTIImplBase &operator=(const BasicTTIImplBase &RHS) { 104 BaseT::operator=(static_cast<const BaseT &>(RHS)); 105 return *this; 106 } 107 BasicTTIImplBase &operator=(BasicTTIImplBase &&RHS) { 108 BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); 109 return *this; 110 } 111 112 /// \name Scalar TTI Implementations 113 /// @{ 114 hasBranchDivergence()115 bool hasBranchDivergence() { return false; } 116 isSourceOfDivergence(const Value * V)117 bool isSourceOfDivergence(const Value *V) { return false; } 118 isLegalAddImmediate(int64_t imm)119 bool isLegalAddImmediate(int64_t imm) { 120 return getTLI()->isLegalAddImmediate(imm); 121 } 122 isLegalICmpImmediate(int64_t imm)123 bool isLegalICmpImmediate(int64_t imm) { 124 return getTLI()->isLegalICmpImmediate(imm); 125 } 126 isLegalAddressingMode(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)127 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 128 bool HasBaseReg, int64_t Scale) { 129 TargetLoweringBase::AddrMode AM; 130 AM.BaseGV = BaseGV; 131 AM.BaseOffs = BaseOffset; 132 AM.HasBaseReg = HasBaseReg; 133 AM.Scale = Scale; 134 return getTLI()->isLegalAddressingMode(AM, Ty); 135 } 136 getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)137 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 138 bool HasBaseReg, int64_t Scale) { 139 TargetLoweringBase::AddrMode AM; 140 AM.BaseGV = BaseGV; 141 AM.BaseOffs = BaseOffset; 142 AM.HasBaseReg = HasBaseReg; 143 AM.Scale = Scale; 144 return getTLI()->getScalingFactorCost(AM, Ty); 145 } 146 isTruncateFree(Type * Ty1,Type * Ty2)147 bool isTruncateFree(Type *Ty1, Type *Ty2) { 148 return getTLI()->isTruncateFree(Ty1, Ty2); 149 } 150 isProfitableToHoist(Instruction * I)151 bool isProfitableToHoist(Instruction *I) { 152 return getTLI()->isProfitableToHoist(I); 153 } 154 isTypeLegal(Type * Ty)155 bool isTypeLegal(Type *Ty) { 156 EVT VT = getTLI()->getValueType(Ty); 157 return getTLI()->isTypeLegal(VT); 158 } 159 getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<const Value * > Arguments)160 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 161 ArrayRef<const Value *> Arguments) { 162 return BaseT::getIntrinsicCost(IID, RetTy, Arguments); 163 } 164 getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<Type * > ParamTys)165 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 166 ArrayRef<Type *> ParamTys) { 167 if (IID == Intrinsic::cttz) { 168 if (getTLI()->isCheapToSpeculateCttz()) 169 return TargetTransformInfo::TCC_Basic; 170 return TargetTransformInfo::TCC_Expensive; 171 } 172 173 if (IID == Intrinsic::ctlz) { 174 if (getTLI()->isCheapToSpeculateCtlz()) 175 return TargetTransformInfo::TCC_Basic; 176 return TargetTransformInfo::TCC_Expensive; 177 } 178 179 return BaseT::getIntrinsicCost(IID, RetTy, ParamTys); 180 } 181 getJumpBufAlignment()182 unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); } 183 getJumpBufSize()184 unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); } 185 shouldBuildLookupTables()186 bool shouldBuildLookupTables() { 187 const TargetLoweringBase *TLI = getTLI(); 188 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 189 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); 190 } 191 haveFastSqrt(Type * Ty)192 bool haveFastSqrt(Type *Ty) { 193 const TargetLoweringBase *TLI = getTLI(); 194 EVT VT = TLI->getValueType(Ty); 195 return TLI->isTypeLegal(VT) && 196 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); 197 } 198 getFPOpCost(Type * Ty)199 unsigned getFPOpCost(Type *Ty) { 200 // By default, FP instructions are no more expensive since they are 201 // implemented in HW. Target specific TTI can override this. 202 return TargetTransformInfo::TCC_Basic; 203 } 204 getOperationCost(unsigned Opcode,Type * Ty,Type * OpTy)205 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { 206 const TargetLoweringBase *TLI = getTLI(); 207 switch (Opcode) { 208 default: break; 209 case Instruction::Trunc: { 210 if (TLI->isTruncateFree(OpTy, Ty)) 211 return TargetTransformInfo::TCC_Free; 212 return TargetTransformInfo::TCC_Basic; 213 } 214 case Instruction::ZExt: { 215 if (TLI->isZExtFree(OpTy, Ty)) 216 return TargetTransformInfo::TCC_Free; 217 return TargetTransformInfo::TCC_Basic; 218 } 219 } 220 221 return BaseT::getOperationCost(Opcode, Ty, OpTy); 222 } 223 getUnrollingPreferences(Loop * L,TTI::UnrollingPreferences & UP)224 void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) { 225 // This unrolling functionality is target independent, but to provide some 226 // motivation for its intended use, for x86: 227 228 // According to the Intel 64 and IA-32 Architectures Optimization Reference 229 // Manual, Intel Core models and later have a loop stream detector (and 230 // associated uop queue) that can benefit from partial unrolling. 231 // The relevant requirements are: 232 // - The loop must have no more than 4 (8 for Nehalem and later) branches 233 // taken, and none of them may be calls. 234 // - The loop can have no more than 18 (28 for Nehalem and later) uops. 235 236 // According to the Software Optimization Guide for AMD Family 15h 237 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor 238 // and loop buffer which can benefit from partial unrolling. 239 // The relevant requirements are: 240 // - The loop must have fewer than 16 branches 241 // - The loop must have less than 40 uops in all executed loop branches 242 243 // The number of taken branches in a loop is hard to estimate here, and 244 // benchmarking has revealed that it is better not to be conservative when 245 // estimating the branch count. As a result, we'll ignore the branch limits 246 // until someone finds a case where it matters in practice. 247 248 unsigned MaxOps; 249 const TargetSubtargetInfo *ST = getST(); 250 if (PartialUnrollingThreshold.getNumOccurrences() > 0) 251 MaxOps = PartialUnrollingThreshold; 252 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) 253 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; 254 else 255 return; 256 257 // Scan the loop: don't unroll loops with calls. 258 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; 259 ++I) { 260 BasicBlock *BB = *I; 261 262 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) 263 if (isa<CallInst>(J) || isa<InvokeInst>(J)) { 264 ImmutableCallSite CS(J); 265 if (const Function *F = CS.getCalledFunction()) { 266 if (!static_cast<T *>(this)->isLoweredToCall(F)) 267 continue; 268 } 269 270 return; 271 } 272 } 273 274 // Enable runtime and partial unrolling up to the specified size. 275 UP.Partial = UP.Runtime = true; 276 UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; 277 } 278 279 /// @} 280 281 /// \name Vector TTI Implementations 282 /// @{ 283 getNumberOfRegisters(bool Vector)284 unsigned getNumberOfRegisters(bool Vector) { return 1; } 285 getRegisterBitWidth(bool Vector)286 unsigned getRegisterBitWidth(bool Vector) { return 32; } 287 getMaxInterleaveFactor()288 unsigned getMaxInterleaveFactor() { return 1; } 289 290 unsigned getArithmeticInstrCost( 291 unsigned Opcode, Type *Ty, 292 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 293 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 294 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 295 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) { 296 // Check if any of the operands are vector operands. 297 const TargetLoweringBase *TLI = getTLI(); 298 int ISD = TLI->InstructionOpcodeToISD(Opcode); 299 assert(ISD && "Invalid opcode"); 300 301 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); 302 303 bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); 304 // Assume that floating point arithmetic operations cost twice as much as 305 // integer operations. 306 unsigned OpCost = (IsFloat ? 2 : 1); 307 308 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { 309 // The operation is legal. Assume it costs 1. 310 // If the type is split to multiple registers, assume that there is some 311 // overhead to this. 312 // TODO: Once we have extract/insert subvector cost we need to use them. 313 if (LT.first > 1) 314 return LT.first * 2 * OpCost; 315 return LT.first * 1 * OpCost; 316 } 317 318 if (!TLI->isOperationExpand(ISD, LT.second)) { 319 // If the operation is custom lowered then assume 320 // thare the code is twice as expensive. 321 return LT.first * 2 * OpCost; 322 } 323 324 // Else, assume that we need to scalarize this op. 325 if (Ty->isVectorTy()) { 326 unsigned Num = Ty->getVectorNumElements(); 327 unsigned Cost = static_cast<T *>(this) 328 ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); 329 // return the cost of multiple scalar invocation plus the cost of 330 // inserting 331 // and extracting the values. 332 return getScalarizationOverhead(Ty, true, true) + Num * Cost; 333 } 334 335 // We don't know anything about this scalar instruction. 336 return OpCost; 337 } 338 getShuffleCost(TTI::ShuffleKind Kind,Type * Tp,int Index,Type * SubTp)339 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, 340 Type *SubTp) { 341 if (Kind == TTI::SK_Alternate) { 342 return getAltShuffleOverhead(Tp); 343 } 344 return 1; 345 } 346 getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src)347 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { 348 const TargetLoweringBase *TLI = getTLI(); 349 int ISD = TLI->InstructionOpcodeToISD(Opcode); 350 assert(ISD && "Invalid opcode"); 351 352 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src); 353 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst); 354 355 // Check for NOOP conversions. 356 if (SrcLT.first == DstLT.first && 357 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { 358 359 // Bitcast between types that are legalized to the same type are free. 360 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) 361 return 0; 362 } 363 364 if (Opcode == Instruction::Trunc && 365 TLI->isTruncateFree(SrcLT.second, DstLT.second)) 366 return 0; 367 368 if (Opcode == Instruction::ZExt && 369 TLI->isZExtFree(SrcLT.second, DstLT.second)) 370 return 0; 371 372 // If the cast is marked as legal (or promote) then assume low cost. 373 if (SrcLT.first == DstLT.first && 374 TLI->isOperationLegalOrPromote(ISD, DstLT.second)) 375 return 1; 376 377 // Handle scalar conversions. 378 if (!Src->isVectorTy() && !Dst->isVectorTy()) { 379 380 // Scalar bitcasts are usually free. 381 if (Opcode == Instruction::BitCast) 382 return 0; 383 384 // Just check the op cost. If the operation is legal then assume it costs 385 // 1. 386 if (!TLI->isOperationExpand(ISD, DstLT.second)) 387 return 1; 388 389 // Assume that illegal scalar instruction are expensive. 390 return 4; 391 } 392 393 // Check vector-to-vector casts. 394 if (Dst->isVectorTy() && Src->isVectorTy()) { 395 396 // If the cast is between same-sized registers, then the check is simple. 397 if (SrcLT.first == DstLT.first && 398 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { 399 400 // Assume that Zext is done using AND. 401 if (Opcode == Instruction::ZExt) 402 return 1; 403 404 // Assume that sext is done using SHL and SRA. 405 if (Opcode == Instruction::SExt) 406 return 2; 407 408 // Just check the op cost. If the operation is legal then assume it 409 // costs 410 // 1 and multiply by the type-legalization overhead. 411 if (!TLI->isOperationExpand(ISD, DstLT.second)) 412 return SrcLT.first * 1; 413 } 414 415 // If we are converting vectors and the operation is illegal, or 416 // if the vectors are legalized to different types, estimate the 417 // scalarization costs. 418 unsigned Num = Dst->getVectorNumElements(); 419 unsigned Cost = static_cast<T *>(this)->getCastInstrCost( 420 Opcode, Dst->getScalarType(), Src->getScalarType()); 421 422 // Return the cost of multiple scalar invocation plus the cost of 423 // inserting and extracting the values. 424 return getScalarizationOverhead(Dst, true, true) + Num * Cost; 425 } 426 427 // We already handled vector-to-vector and scalar-to-scalar conversions. 428 // This 429 // is where we handle bitcast between vectors and scalars. We need to assume 430 // that the conversion is scalarized in one way or another. 431 if (Opcode == Instruction::BitCast) 432 // Illegal bitcasts are done by storing and loading from a stack slot. 433 return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) 434 : 0) + 435 (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) 436 : 0); 437 438 llvm_unreachable("Unhandled cast"); 439 } 440 getCFInstrCost(unsigned Opcode)441 unsigned getCFInstrCost(unsigned Opcode) { 442 // Branches are assumed to be predicted. 443 return 0; 444 } 445 getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy)446 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { 447 const TargetLoweringBase *TLI = getTLI(); 448 int ISD = TLI->InstructionOpcodeToISD(Opcode); 449 assert(ISD && "Invalid opcode"); 450 451 // Selects on vectors are actually vector selects. 452 if (ISD == ISD::SELECT) { 453 assert(CondTy && "CondTy must exist"); 454 if (CondTy->isVectorTy()) 455 ISD = ISD::VSELECT; 456 } 457 458 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); 459 460 if (!(ValTy->isVectorTy() && !LT.second.isVector()) && 461 !TLI->isOperationExpand(ISD, LT.second)) { 462 // The operation is legal. Assume it costs 1. Multiply 463 // by the type-legalization overhead. 464 return LT.first * 1; 465 } 466 467 // Otherwise, assume that the cast is scalarized. 468 if (ValTy->isVectorTy()) { 469 unsigned Num = ValTy->getVectorNumElements(); 470 if (CondTy) 471 CondTy = CondTy->getScalarType(); 472 unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( 473 Opcode, ValTy->getScalarType(), CondTy); 474 475 // Return the cost of multiple scalar invocation plus the cost of 476 // inserting 477 // and extracting the values. 478 return getScalarizationOverhead(ValTy, true, false) + Num * Cost; 479 } 480 481 // Unknown scalar opcode. 482 return 1; 483 } 484 getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)485 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { 486 std::pair<unsigned, MVT> LT = 487 getTLI()->getTypeLegalizationCost(Val->getScalarType()); 488 489 return LT.first; 490 } 491 getMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)492 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 493 unsigned AddressSpace) { 494 assert(!Src->isVoidTy() && "Invalid type"); 495 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); 496 497 // Assuming that all loads of legal types cost 1. 498 unsigned Cost = LT.first; 499 500 if (Src->isVectorTy() && 501 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { 502 // This is a vector load that legalizes to a larger type than the vector 503 // itself. Unless the corresponding extending load or truncating store is 504 // legal, then this will scalarize. 505 TargetLowering::LegalizeAction LA = TargetLowering::Expand; 506 EVT MemVT = getTLI()->getValueType(Src, true); 507 if (MemVT.isSimple() && MemVT != MVT::Other) { 508 if (Opcode == Instruction::Store) 509 LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); 510 else 511 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); 512 } 513 514 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { 515 // This is a vector load/store for some illegal type that is scalarized. 516 // We must account for the cost of building or decomposing the vector. 517 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, 518 Opcode == Instruction::Store); 519 } 520 } 521 522 return Cost; 523 } 524 getIntrinsicInstrCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<Type * > Tys)525 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, 526 ArrayRef<Type *> Tys) { 527 unsigned ISD = 0; 528 switch (IID) { 529 default: { 530 // Assume that we need to scalarize this intrinsic. 531 unsigned ScalarizationCost = 0; 532 unsigned ScalarCalls = 1; 533 Type *ScalarRetTy = RetTy; 534 if (RetTy->isVectorTy()) { 535 ScalarizationCost = getScalarizationOverhead(RetTy, true, false); 536 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); 537 ScalarRetTy = RetTy->getScalarType(); 538 } 539 SmallVector<Type *, 4> ScalarTys; 540 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { 541 Type *Ty = Tys[i]; 542 if (Ty->isVectorTy()) { 543 ScalarizationCost += getScalarizationOverhead(Ty, false, true); 544 ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); 545 Ty = Ty->getScalarType(); 546 } 547 ScalarTys.push_back(Ty); 548 } 549 if (ScalarCalls == 1) 550 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. 551 552 unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost( 553 IID, ScalarRetTy, ScalarTys); 554 555 return ScalarCalls * ScalarCost + ScalarizationCost; 556 } 557 // Look for intrinsics that can be lowered directly or turned into a scalar 558 // intrinsic call. 559 case Intrinsic::sqrt: 560 ISD = ISD::FSQRT; 561 break; 562 case Intrinsic::sin: 563 ISD = ISD::FSIN; 564 break; 565 case Intrinsic::cos: 566 ISD = ISD::FCOS; 567 break; 568 case Intrinsic::exp: 569 ISD = ISD::FEXP; 570 break; 571 case Intrinsic::exp2: 572 ISD = ISD::FEXP2; 573 break; 574 case Intrinsic::log: 575 ISD = ISD::FLOG; 576 break; 577 case Intrinsic::log10: 578 ISD = ISD::FLOG10; 579 break; 580 case Intrinsic::log2: 581 ISD = ISD::FLOG2; 582 break; 583 case Intrinsic::fabs: 584 ISD = ISD::FABS; 585 break; 586 case Intrinsic::minnum: 587 ISD = ISD::FMINNUM; 588 break; 589 case Intrinsic::maxnum: 590 ISD = ISD::FMAXNUM; 591 break; 592 case Intrinsic::copysign: 593 ISD = ISD::FCOPYSIGN; 594 break; 595 case Intrinsic::floor: 596 ISD = ISD::FFLOOR; 597 break; 598 case Intrinsic::ceil: 599 ISD = ISD::FCEIL; 600 break; 601 case Intrinsic::trunc: 602 ISD = ISD::FTRUNC; 603 break; 604 case Intrinsic::nearbyint: 605 ISD = ISD::FNEARBYINT; 606 break; 607 case Intrinsic::rint: 608 ISD = ISD::FRINT; 609 break; 610 case Intrinsic::round: 611 ISD = ISD::FROUND; 612 break; 613 case Intrinsic::pow: 614 ISD = ISD::FPOW; 615 break; 616 case Intrinsic::fma: 617 ISD = ISD::FMA; 618 break; 619 case Intrinsic::fmuladd: 620 ISD = ISD::FMA; 621 break; 622 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. 623 case Intrinsic::lifetime_start: 624 case Intrinsic::lifetime_end: 625 return 0; 626 case Intrinsic::masked_store: 627 return static_cast<T *>(this) 628 ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0); 629 case Intrinsic::masked_load: 630 return static_cast<T *>(this) 631 ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); 632 } 633 634 const TargetLoweringBase *TLI = getTLI(); 635 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); 636 637 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { 638 // The operation is legal. Assume it costs 1. 639 // If the type is split to multiple registers, assume that there is some 640 // overhead to this. 641 // TODO: Once we have extract/insert subvector cost we need to use them. 642 if (LT.first > 1) 643 return LT.first * 2; 644 return LT.first * 1; 645 } 646 647 if (!TLI->isOperationExpand(ISD, LT.second)) { 648 // If the operation is custom lowered then assume 649 // thare the code is twice as expensive. 650 return LT.first * 2; 651 } 652 653 // If we can't lower fmuladd into an FMA estimate the cost as a floating 654 // point mul followed by an add. 655 if (IID == Intrinsic::fmuladd) 656 return static_cast<T *>(this) 657 ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + 658 static_cast<T *>(this) 659 ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); 660 661 // Else, assume that we need to scalarize this intrinsic. For math builtins 662 // this will emit a costly libcall, adding call overhead and spills. Make it 663 // very expensive. 664 if (RetTy->isVectorTy()) { 665 unsigned ScalarizationCost = getScalarizationOverhead(RetTy, true, false); 666 unsigned ScalarCalls = RetTy->getVectorNumElements(); 667 SmallVector<Type *, 4> ScalarTys; 668 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { 669 Type *Ty = Tys[i]; 670 if (Ty->isVectorTy()) 671 Ty = Ty->getScalarType(); 672 ScalarTys.push_back(Ty); 673 } 674 unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost( 675 IID, RetTy->getScalarType(), ScalarTys); 676 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { 677 if (Tys[i]->isVectorTy()) { 678 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); 679 ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); 680 } 681 } 682 683 return ScalarCalls * ScalarCost + ScalarizationCost; 684 } 685 686 // This is going to be turned into a library call, make it expensive. 687 return 10; 688 } 689 690 /// \brief Compute a cost of the given call instruction. 691 /// 692 /// Compute the cost of calling function F with return type RetTy and 693 /// argument types Tys. F might be nullptr, in this case the cost of an 694 /// arbitrary call with the specified signature will be returned. 695 /// This is used, for instance, when we estimate call of a vector 696 /// counterpart of the given function. 697 /// \param F Called function, might be nullptr. 698 /// \param RetTy Return value types. 699 /// \param Tys Argument types. 700 /// \returns The cost of Call instruction. getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys)701 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { 702 return 10; 703 } 704 getNumberOfParts(Type * Tp)705 unsigned getNumberOfParts(Type *Tp) { 706 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp); 707 return LT.first; 708 } 709 getAddressComputationCost(Type * Ty,bool IsComplex)710 unsigned getAddressComputationCost(Type *Ty, bool IsComplex) { return 0; } 711 getReductionCost(unsigned Opcode,Type * Ty,bool IsPairwise)712 unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) { 713 assert(Ty->isVectorTy() && "Expect a vector type"); 714 unsigned NumVecElts = Ty->getVectorNumElements(); 715 unsigned NumReduxLevels = Log2_32(NumVecElts); 716 unsigned ArithCost = 717 NumReduxLevels * 718 static_cast<T *>(this)->getArithmeticInstrCost(Opcode, Ty); 719 // Assume the pairwise shuffles add a cost. 720 unsigned ShuffleCost = 721 NumReduxLevels * (IsPairwise + 1) * 722 static_cast<T *>(this) 723 ->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); 724 return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); 725 } 726 727 /// @} 728 }; 729 730 /// \brief Concrete BasicTTIImpl that can be used if no further customization 731 /// is needed. 732 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { 733 typedef BasicTTIImplBase<BasicTTIImpl> BaseT; 734 friend class BasicTTIImplBase<BasicTTIImpl>; 735 736 const TargetSubtargetInfo *ST; 737 const TargetLoweringBase *TLI; 738 getST()739 const TargetSubtargetInfo *getST() const { return ST; } getTLI()740 const TargetLoweringBase *getTLI() const { return TLI; } 741 742 public: 743 explicit BasicTTIImpl(const TargetMachine *ST, Function &F); 744 745 // Provide value semantics. MSVC requires that we spell all of these out. BasicTTIImpl(const BasicTTIImpl & Arg)746 BasicTTIImpl(const BasicTTIImpl &Arg) 747 : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} BasicTTIImpl(BasicTTIImpl && Arg)748 BasicTTIImpl(BasicTTIImpl &&Arg) 749 : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), 750 TLI(std::move(Arg.TLI)) {} 751 BasicTTIImpl &operator=(const BasicTTIImpl &RHS) { 752 BaseT::operator=(static_cast<const BaseT &>(RHS)); 753 ST = RHS.ST; 754 TLI = RHS.TLI; 755 return *this; 756 } 757 BasicTTIImpl &operator=(BasicTTIImpl &&RHS) { 758 BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); 759 ST = std::move(RHS.ST); 760 TLI = std::move(RHS.TLI); 761 return *this; 762 } 763 }; 764 765 } 766 767 #endif 768