1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This file provides helpers for the implementation of
11 /// a TargetTransformInfo-conforming class.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
17 
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/Analysis/TargetTransformInfo.h"
20 #include "llvm/Analysis/VectorUtils.h"
21 #include "llvm/IR/CallSite.h"
22 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/GetElementPtrTypeIterator.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/Type.h"
27 
28 namespace llvm {
29 
30 /// Base class for use as a mix-in that aids implementing
31 /// a TargetTransformInfo-compatible class.
32 class TargetTransformInfoImplBase {
33 protected:
34   typedef TargetTransformInfo TTI;
35 
36   const DataLayout &DL;
37 
TargetTransformInfoImplBase(const DataLayout & DL)38   explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
39 
40 public:
41   // Provide value semantics. MSVC requires that we spell all of these out.
TargetTransformInfoImplBase(const TargetTransformInfoImplBase & Arg)42   TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
43       : DL(Arg.DL) {}
TargetTransformInfoImplBase(TargetTransformInfoImplBase && Arg)44   TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
45 
getDataLayout()46   const DataLayout &getDataLayout() const { return DL; }
47 
getOperationCost(unsigned Opcode,Type * Ty,Type * OpTy)48   unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
49     switch (Opcode) {
50     default:
51       // By default, just classify everything as 'basic'.
52       return TTI::TCC_Basic;
53 
54     case Instruction::GetElementPtr:
55       llvm_unreachable("Use getGEPCost for GEP operations!");
56 
57     case Instruction::BitCast:
58       assert(OpTy && "Cast instructions must provide the operand type");
59       if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
60         // Identity and pointer-to-pointer casts are free.
61         return TTI::TCC_Free;
62 
63       // Otherwise, the default basic cost is used.
64       return TTI::TCC_Basic;
65 
66     case Instruction::FDiv:
67     case Instruction::FRem:
68     case Instruction::SDiv:
69     case Instruction::SRem:
70     case Instruction::UDiv:
71     case Instruction::URem:
72       return TTI::TCC_Expensive;
73 
74     case Instruction::IntToPtr: {
75       // An inttoptr cast is free so long as the input is a legal integer type
76       // which doesn't contain values outside the range of a pointer.
77       unsigned OpSize = OpTy->getScalarSizeInBits();
78       if (DL.isLegalInteger(OpSize) &&
79           OpSize <= DL.getPointerTypeSizeInBits(Ty))
80         return TTI::TCC_Free;
81 
82       // Otherwise it's not a no-op.
83       return TTI::TCC_Basic;
84     }
85     case Instruction::PtrToInt: {
86       // A ptrtoint cast is free so long as the result is large enough to store
87       // the pointer, and a legal integer type.
88       unsigned DestSize = Ty->getScalarSizeInBits();
89       if (DL.isLegalInteger(DestSize) &&
90           DestSize >= DL.getPointerTypeSizeInBits(OpTy))
91         return TTI::TCC_Free;
92 
93       // Otherwise it's not a no-op.
94       return TTI::TCC_Basic;
95     }
96     case Instruction::Trunc:
97       // trunc to a native type is free (assuming the target has compare and
98       // shift-right of the same width).
99       if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
100         return TTI::TCC_Free;
101 
102       return TTI::TCC_Basic;
103     }
104   }
105 
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)106   int getGEPCost(Type *PointeeType, const Value *Ptr,
107                  ArrayRef<const Value *> Operands) {
108     // In the basic model, we just assume that all-constant GEPs will be folded
109     // into their uses via addressing modes.
110     for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
111       if (!isa<Constant>(Operands[Idx]))
112         return TTI::TCC_Basic;
113 
114     return TTI::TCC_Free;
115   }
116 
getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize)117   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
118                                             unsigned &JTSize) {
119     JTSize = 0;
120     return SI.getNumCases();
121   }
122 
getExtCost(const Instruction * I,const Value * Src)123   int getExtCost(const Instruction *I, const Value *Src) {
124     return TTI::TCC_Basic;
125   }
126 
getCallCost(FunctionType * FTy,int NumArgs)127   unsigned getCallCost(FunctionType *FTy, int NumArgs) {
128     assert(FTy && "FunctionType must be provided to this routine.");
129 
130     // The target-independent implementation just measures the size of the
131     // function by approximating that each argument will take on average one
132     // instruction to prepare.
133 
134     if (NumArgs < 0)
135       // Set the argument number to the number of explicit arguments in the
136       // function.
137       NumArgs = FTy->getNumParams();
138 
139     return TTI::TCC_Basic * (NumArgs + 1);
140   }
141 
getInliningThresholdMultiplier()142   unsigned getInliningThresholdMultiplier() { return 1; }
143 
getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<Type * > ParamTys)144   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
145                             ArrayRef<Type *> ParamTys) {
146     switch (IID) {
147     default:
148       // Intrinsics rarely (if ever) have normal argument setup constraints.
149       // Model them as having a basic instruction cost.
150       // FIXME: This is wrong for libc intrinsics.
151       return TTI::TCC_Basic;
152 
153     case Intrinsic::annotation:
154     case Intrinsic::assume:
155     case Intrinsic::sideeffect:
156     case Intrinsic::dbg_declare:
157     case Intrinsic::dbg_value:
158     case Intrinsic::dbg_label:
159     case Intrinsic::invariant_start:
160     case Intrinsic::invariant_end:
161     case Intrinsic::lifetime_start:
162     case Intrinsic::lifetime_end:
163     case Intrinsic::objectsize:
164     case Intrinsic::ptr_annotation:
165     case Intrinsic::var_annotation:
166     case Intrinsic::experimental_gc_result:
167     case Intrinsic::experimental_gc_relocate:
168     case Intrinsic::coro_alloc:
169     case Intrinsic::coro_begin:
170     case Intrinsic::coro_free:
171     case Intrinsic::coro_end:
172     case Intrinsic::coro_frame:
173     case Intrinsic::coro_size:
174     case Intrinsic::coro_suspend:
175     case Intrinsic::coro_param:
176     case Intrinsic::coro_subfn_addr:
177       // These intrinsics don't actually represent code after lowering.
178       return TTI::TCC_Free;
179     }
180   }
181 
hasBranchDivergence()182   bool hasBranchDivergence() { return false; }
183 
isSourceOfDivergence(const Value * V)184   bool isSourceOfDivergence(const Value *V) { return false; }
185 
isAlwaysUniform(const Value * V)186   bool isAlwaysUniform(const Value *V) { return false; }
187 
getFlatAddressSpace()188   unsigned getFlatAddressSpace () {
189     return -1;
190   }
191 
isLoweredToCall(const Function * F)192   bool isLoweredToCall(const Function *F) {
193     assert(F && "A concrete function must be provided to this routine.");
194 
195     // FIXME: These should almost certainly not be handled here, and instead
196     // handled with the help of TLI or the target itself. This was largely
197     // ported from existing analysis heuristics here so that such refactorings
198     // can take place in the future.
199 
200     if (F->isIntrinsic())
201       return false;
202 
203     if (F->hasLocalLinkage() || !F->hasName())
204       return true;
205 
206     StringRef Name = F->getName();
207 
208     // These will all likely lower to a single selection DAG node.
209     if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
210         Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
211         Name == "fmin" || Name == "fminf" || Name == "fminl" ||
212         Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
213         Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
214         Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
215       return false;
216 
217     // These are all likely to be optimized into something smaller.
218     if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
219         Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
220         Name == "floorf" || Name == "ceil" || Name == "round" ||
221         Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
222         Name == "llabs")
223       return false;
224 
225     return true;
226   }
227 
getUnrollingPreferences(Loop *,ScalarEvolution &,TTI::UnrollingPreferences &)228   void getUnrollingPreferences(Loop *, ScalarEvolution &,
229                                TTI::UnrollingPreferences &) {}
230 
isLegalAddImmediate(int64_t Imm)231   bool isLegalAddImmediate(int64_t Imm) { return false; }
232 
isLegalICmpImmediate(int64_t Imm)233   bool isLegalICmpImmediate(int64_t Imm) { return false; }
234 
235   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
236                              bool HasBaseReg, int64_t Scale,
237                              unsigned AddrSpace, Instruction *I = nullptr) {
238     // Guess that only reg and reg+reg addressing is allowed. This heuristic is
239     // taken from the implementation of LSR.
240     return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
241   }
242 
isLSRCostLess(TTI::LSRCost & C1,TTI::LSRCost & C2)243   bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
244     return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
245                     C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
246            std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
247                     C2.ScaleCost, C2.ImmCost, C2.SetupCost);
248   }
249 
canMacroFuseCmp()250   bool canMacroFuseCmp() { return false; }
251 
shouldFavorPostInc()252   bool shouldFavorPostInc() const { return false; }
253 
isLegalMaskedStore(Type * DataType)254   bool isLegalMaskedStore(Type *DataType) { return false; }
255 
isLegalMaskedLoad(Type * DataType)256   bool isLegalMaskedLoad(Type *DataType) { return false; }
257 
isLegalMaskedScatter(Type * DataType)258   bool isLegalMaskedScatter(Type *DataType) { return false; }
259 
isLegalMaskedGather(Type * DataType)260   bool isLegalMaskedGather(Type *DataType) { return false; }
261 
hasDivRemOp(Type * DataType,bool IsSigned)262   bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
263 
hasVolatileVariant(Instruction * I,unsigned AddrSpace)264   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
265 
prefersVectorizedAddressing()266   bool prefersVectorizedAddressing() { return true; }
267 
getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)268   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
269                            bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
270     // Guess that all legal addressing mode are free.
271     if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
272                               Scale, AddrSpace))
273       return 0;
274     return -1;
275   }
276 
LSRWithInstrQueries()277   bool LSRWithInstrQueries() { return false; }
278 
isTruncateFree(Type * Ty1,Type * Ty2)279   bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
280 
isProfitableToHoist(Instruction * I)281   bool isProfitableToHoist(Instruction *I) { return true; }
282 
useAA()283   bool useAA() { return false; }
284 
isTypeLegal(Type * Ty)285   bool isTypeLegal(Type *Ty) { return false; }
286 
getJumpBufAlignment()287   unsigned getJumpBufAlignment() { return 0; }
288 
getJumpBufSize()289   unsigned getJumpBufSize() { return 0; }
290 
shouldBuildLookupTables()291   bool shouldBuildLookupTables() { return true; }
shouldBuildLookupTablesForConstant(Constant * C)292   bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
293 
useColdCCForColdCall(Function & F)294   bool useColdCCForColdCall(Function &F) { return false; }
295 
getScalarizationOverhead(Type * Ty,bool Insert,bool Extract)296   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
297     return 0;
298   }
299 
getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,unsigned VF)300   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
301                                             unsigned VF) { return 0; }
302 
supportsEfficientVectorElementLoadStore()303   bool supportsEfficientVectorElementLoadStore() { return false; }
304 
enableAggressiveInterleaving(bool LoopHasReductions)305   bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
306 
enableMemCmpExpansion(bool IsZeroCmp)307   const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
308       bool IsZeroCmp) const {
309     return nullptr;
310   }
311 
enableInterleavedAccessVectorization()312   bool enableInterleavedAccessVectorization() { return false; }
313 
isFPVectorizationPotentiallyUnsafe()314   bool isFPVectorizationPotentiallyUnsafe() { return false; }
315 
allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,unsigned Alignment,bool * Fast)316   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
317                                       unsigned BitWidth,
318                                       unsigned AddressSpace,
319                                       unsigned Alignment,
320                                       bool *Fast) { return false; }
321 
getPopcntSupport(unsigned IntTyWidthInBit)322   TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
323     return TTI::PSK_Software;
324   }
325 
haveFastSqrt(Type * Ty)326   bool haveFastSqrt(Type *Ty) { return false; }
327 
isFCmpOrdCheaperThanFCmpZero(Type * Ty)328   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
329 
getFPOpCost(Type * Ty)330   unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
331 
getIntImmCodeSizeCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)332   int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
333                             Type *Ty) {
334     return 0;
335   }
336 
getIntImmCost(const APInt & Imm,Type * Ty)337   unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
338 
getIntImmCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)339   unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
340                          Type *Ty) {
341     return TTI::TCC_Free;
342   }
343 
getIntImmCost(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)344   unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
345                          Type *Ty) {
346     return TTI::TCC_Free;
347   }
348 
getNumberOfRegisters(bool Vector)349   unsigned getNumberOfRegisters(bool Vector) { return 8; }
350 
getRegisterBitWidth(bool Vector)351   unsigned getRegisterBitWidth(bool Vector) const { return 32; }
352 
getMinVectorRegisterBitWidth()353   unsigned getMinVectorRegisterBitWidth() { return 128; }
354 
shouldMaximizeVectorBandwidth(bool OptSize)355   bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
356 
getMinimumVF(unsigned ElemWidth)357   unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
358 
359   bool
shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)360   shouldConsiderAddressTypePromotion(const Instruction &I,
361                                      bool &AllowPromotionWithoutCommonHeader) {
362     AllowPromotionWithoutCommonHeader = false;
363     return false;
364   }
365 
getCacheLineSize()366   unsigned getCacheLineSize() { return 0; }
367 
getCacheSize(TargetTransformInfo::CacheLevel Level)368   llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) {
369     switch (Level) {
370     case TargetTransformInfo::CacheLevel::L1D:
371       LLVM_FALLTHROUGH;
372     case TargetTransformInfo::CacheLevel::L2D:
373       return llvm::Optional<unsigned>();
374     }
375 
376     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
377   }
378 
getCacheAssociativity(TargetTransformInfo::CacheLevel Level)379   llvm::Optional<unsigned> getCacheAssociativity(
380     TargetTransformInfo::CacheLevel Level) {
381     switch (Level) {
382     case TargetTransformInfo::CacheLevel::L1D:
383       LLVM_FALLTHROUGH;
384     case TargetTransformInfo::CacheLevel::L2D:
385       return llvm::Optional<unsigned>();
386     }
387 
388     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
389   }
390 
getPrefetchDistance()391   unsigned getPrefetchDistance() { return 0; }
392 
getMinPrefetchStride()393   unsigned getMinPrefetchStride() { return 1; }
394 
getMaxPrefetchIterationsAhead()395   unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
396 
getMaxInterleaveFactor(unsigned VF)397   unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
398 
getArithmeticInstrCost(unsigned Opcode,Type * Ty,TTI::OperandValueKind Opd1Info,TTI::OperandValueKind Opd2Info,TTI::OperandValueProperties Opd1PropInfo,TTI::OperandValueProperties Opd2PropInfo,ArrayRef<const Value * > Args)399   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
400                                   TTI::OperandValueKind Opd1Info,
401                                   TTI::OperandValueKind Opd2Info,
402                                   TTI::OperandValueProperties Opd1PropInfo,
403                                   TTI::OperandValueProperties Opd2PropInfo,
404                                   ArrayRef<const Value *> Args) {
405     return 1;
406   }
407 
getShuffleCost(TTI::ShuffleKind Kind,Type * Ty,int Index,Type * SubTp)408   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
409                           Type *SubTp) {
410     return 1;
411   }
412 
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,const Instruction * I)413   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
414                             const Instruction *I) { return 1; }
415 
getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)416   unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
417                                     VectorType *VecTy, unsigned Index) {
418     return 1;
419   }
420 
getCFInstrCost(unsigned Opcode)421   unsigned getCFInstrCost(unsigned Opcode) { return 1; }
422 
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,const Instruction * I)423   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
424                               const Instruction *I) {
425     return 1;
426   }
427 
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)428   unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
429     return 1;
430   }
431 
getMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace,const Instruction * I)432   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
433                            unsigned AddressSpace, const Instruction *I) {
434     return 1;
435   }
436 
getMaskedMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)437   unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
438                                  unsigned AddressSpace) {
439     return 1;
440   }
441 
getGatherScatterOpCost(unsigned Opcode,Type * DataTy,Value * Ptr,bool VariableMask,unsigned Alignment)442   unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
443                                   bool VariableMask,
444                                   unsigned Alignment) {
445     return 1;
446   }
447 
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,unsigned Alignment,unsigned AddressSpace)448   unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
449                                       unsigned Factor,
450                                       ArrayRef<unsigned> Indices,
451                                       unsigned Alignment,
452                                       unsigned AddressSpace) {
453     return 1;
454   }
455 
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Type * > Tys,FastMathFlags FMF,unsigned ScalarizationCostPassed)456   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
457                                  ArrayRef<Type *> Tys, FastMathFlags FMF,
458                                  unsigned ScalarizationCostPassed) {
459     return 1;
460   }
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Value * > Args,FastMathFlags FMF,unsigned VF)461   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
462             ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
463     return 1;
464   }
465 
getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys)466   unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
467     return 1;
468   }
469 
getNumberOfParts(Type * Tp)470   unsigned getNumberOfParts(Type *Tp) { return 0; }
471 
getAddressComputationCost(Type * Tp,ScalarEvolution *,const SCEV *)472   unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
473                                      const SCEV *) {
474     return 0;
475   }
476 
getArithmeticReductionCost(unsigned,Type *,bool)477   unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
478 
getMinMaxReductionCost(Type *,Type *,bool,bool)479   unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; }
480 
getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)481   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
482 
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)483   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
484     return false;
485   }
486 
getAtomicMemIntrinsicMaxElementSize()487   unsigned getAtomicMemIntrinsicMaxElementSize() const {
488     // Note for overrides: You must ensure for all element unordered-atomic
489     // memory intrinsics that all power-of-2 element sizes up to, and
490     // including, the return value of this method have a corresponding
491     // runtime lib call. These runtime lib call definitions can be found
492     // in RuntimeLibcalls.h
493     return 0;
494   }
495 
getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)496   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
497                                            Type *ExpectedType) {
498     return nullptr;
499   }
500 
getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAlign,unsigned DestAlign)501   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
502                                   unsigned SrcAlign, unsigned DestAlign) const {
503     return Type::getInt8Ty(Context);
504   }
505 
getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAlign,unsigned DestAlign)506   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
507                                          LLVMContext &Context,
508                                          unsigned RemainingBytes,
509                                          unsigned SrcAlign,
510                                          unsigned DestAlign) const {
511     for (unsigned i = 0; i != RemainingBytes; ++i)
512       OpsOut.push_back(Type::getInt8Ty(Context));
513   }
514 
areInlineCompatible(const Function * Caller,const Function * Callee)515   bool areInlineCompatible(const Function *Caller,
516                            const Function *Callee) const {
517     return (Caller->getFnAttribute("target-cpu") ==
518             Callee->getFnAttribute("target-cpu")) &&
519            (Caller->getFnAttribute("target-features") ==
520             Callee->getFnAttribute("target-features"));
521   }
522 
isIndexedLoadLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)523   bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
524                           const DataLayout &DL) const {
525     return false;
526   }
527 
isIndexedStoreLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)528   bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
529                            const DataLayout &DL) const {
530     return false;
531   }
532 
getLoadStoreVecRegBitWidth(unsigned AddrSpace)533   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
534 
isLegalToVectorizeLoad(LoadInst * LI)535   bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
536 
isLegalToVectorizeStore(StoreInst * SI)537   bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
538 
isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)539   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
540                                    unsigned Alignment,
541                                    unsigned AddrSpace) const {
542     return true;
543   }
544 
isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)545   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
546                                     unsigned Alignment,
547                                     unsigned AddrSpace) const {
548     return true;
549   }
550 
getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)551   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
552                                unsigned ChainSizeInBytes,
553                                VectorType *VecTy) const {
554     return VF;
555   }
556 
getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)557   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
558                                 unsigned ChainSizeInBytes,
559                                 VectorType *VecTy) const {
560     return VF;
561   }
562 
useReductionIntrinsic(unsigned Opcode,Type * Ty,TTI::ReductionFlags Flags)563   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
564                              TTI::ReductionFlags Flags) const {
565     return false;
566   }
567 
shouldExpandReduction(const IntrinsicInst * II)568   bool shouldExpandReduction(const IntrinsicInst *II) const {
569     return true;
570   }
571 
572 protected:
573   // Obtain the minimum required size to hold the value (without the sign)
574   // In case of a vector it returns the min required size for one element.
minRequiredElementSize(const Value * Val,bool & isSigned)575   unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
576     if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
577       const auto* VectorValue = cast<Constant>(Val);
578 
579       // In case of a vector need to pick the max between the min
580       // required size for each element
581       auto *VT = cast<VectorType>(Val->getType());
582 
583       // Assume unsigned elements
584       isSigned = false;
585 
586       // The max required size is the total vector width divided by num
587       // of elements in the vector
588       unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
589 
590       unsigned MinRequiredSize = 0;
591       for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
592         if (auto* IntElement =
593               dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
594           bool signedElement = IntElement->getValue().isNegative();
595           // Get the element min required size.
596           unsigned ElementMinRequiredSize =
597             IntElement->getValue().getMinSignedBits() - 1;
598           // In case one element is signed then all the vector is signed.
599           isSigned |= signedElement;
600           // Save the max required bit size between all the elements.
601           MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
602         }
603         else {
604           // not an int constant element
605           return MaxRequiredSize;
606         }
607       }
608       return MinRequiredSize;
609     }
610 
611     if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
612       isSigned = CI->getValue().isNegative();
613       return CI->getValue().getMinSignedBits() - 1;
614     }
615 
616     if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
617       isSigned = true;
618       return Cast->getSrcTy()->getScalarSizeInBits() - 1;
619     }
620 
621     if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
622       isSigned = false;
623       return Cast->getSrcTy()->getScalarSizeInBits();
624     }
625 
626     isSigned = false;
627     return Val->getType()->getScalarSizeInBits();
628   }
629 
isStridedAccess(const SCEV * Ptr)630   bool isStridedAccess(const SCEV *Ptr) {
631     return Ptr && isa<SCEVAddRecExpr>(Ptr);
632   }
633 
getConstantStrideStep(ScalarEvolution * SE,const SCEV * Ptr)634   const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
635                                             const SCEV *Ptr) {
636     if (!isStridedAccess(Ptr))
637       return nullptr;
638     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
639     return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
640   }
641 
isConstantStridedAccessLessThan(ScalarEvolution * SE,const SCEV * Ptr,int64_t MergeDistance)642   bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
643                                        int64_t MergeDistance) {
644     const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
645     if (!Step)
646       return false;
647     APInt StrideVal = Step->getAPInt();
648     if (StrideVal.getBitWidth() > 64)
649       return false;
650     // FIXME: Need to take absolute value for negative stride case.
651     return StrideVal.getSExtValue() < MergeDistance;
652   }
653 };
654 
655 /// CRTP base class for use as a mix-in that aids implementing
656 /// a TargetTransformInfo-compatible class.
657 template <typename T>
658 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
659 private:
660   typedef TargetTransformInfoImplBase BaseT;
661 
662 protected:
TargetTransformInfoImplCRTPBase(const DataLayout & DL)663   explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
664 
665 public:
666   using BaseT::getCallCost;
667 
getCallCost(const Function * F,int NumArgs)668   unsigned getCallCost(const Function *F, int NumArgs) {
669     assert(F && "A concrete function must be provided to this routine.");
670 
671     if (NumArgs < 0)
672       // Set the argument number to the number of explicit arguments in the
673       // function.
674       NumArgs = F->arg_size();
675 
676     if (Intrinsic::ID IID = F->getIntrinsicID()) {
677       FunctionType *FTy = F->getFunctionType();
678       SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
679       return static_cast<T *>(this)
680           ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
681     }
682 
683     if (!static_cast<T *>(this)->isLoweredToCall(F))
684       return TTI::TCC_Basic; // Give a basic cost if it will be lowered
685                              // directly.
686 
687     return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs);
688   }
689 
getCallCost(const Function * F,ArrayRef<const Value * > Arguments)690   unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments) {
691     // Simply delegate to generic handling of the call.
692     // FIXME: We should use instsimplify or something else to catch calls which
693     // will constant fold with these arguments.
694     return static_cast<T *>(this)->getCallCost(F, Arguments.size());
695   }
696 
697   using BaseT::getGEPCost;
698 
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)699   int getGEPCost(Type *PointeeType, const Value *Ptr,
700                  ArrayRef<const Value *> Operands) {
701     const GlobalValue *BaseGV = nullptr;
702     if (Ptr != nullptr) {
703       // TODO: will remove this when pointers have an opaque type.
704       assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
705                  PointeeType &&
706              "explicit pointee type doesn't match operand's pointee type");
707       BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
708     }
709     bool HasBaseReg = (BaseGV == nullptr);
710 
711     auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
712     APInt BaseOffset(PtrSizeBits, 0);
713     int64_t Scale = 0;
714 
715     auto GTI = gep_type_begin(PointeeType, Operands);
716     Type *TargetType = nullptr;
717 
718     // Handle the case where the GEP instruction has a single operand,
719     // the basis, therefore TargetType is a nullptr.
720     if (Operands.empty())
721       return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
722 
723     for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
724       TargetType = GTI.getIndexedType();
725       // We assume that the cost of Scalar GEP with constant index and the
726       // cost of Vector GEP with splat constant index are the same.
727       const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
728       if (!ConstIdx)
729         if (auto Splat = getSplatValue(*I))
730           ConstIdx = dyn_cast<ConstantInt>(Splat);
731       if (StructType *STy = GTI.getStructTypeOrNull()) {
732         // For structures the index is always splat or scalar constant
733         assert(ConstIdx && "Unexpected GEP index");
734         uint64_t Field = ConstIdx->getZExtValue();
735         BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
736       } else {
737         int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
738         if (ConstIdx) {
739           BaseOffset +=
740               ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
741         } else {
742           // Needs scale register.
743           if (Scale != 0)
744             // No addressing mode takes two scale registers.
745             return TTI::TCC_Basic;
746           Scale = ElementSize;
747         }
748       }
749     }
750 
751     // Assumes the address space is 0 when Ptr is nullptr.
752     unsigned AS =
753         (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
754 
755     if (static_cast<T *>(this)->isLegalAddressingMode(
756             TargetType, const_cast<GlobalValue *>(BaseGV),
757             BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS))
758       return TTI::TCC_Free;
759     return TTI::TCC_Basic;
760   }
761 
762   using BaseT::getIntrinsicCost;
763 
getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<const Value * > Arguments)764   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
765                             ArrayRef<const Value *> Arguments) {
766     // Delegate to the generic intrinsic handling code. This mostly provides an
767     // opportunity for targets to (for example) special case the cost of
768     // certain intrinsics based on constants used as arguments.
769     SmallVector<Type *, 8> ParamTys;
770     ParamTys.reserve(Arguments.size());
771     for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
772       ParamTys.push_back(Arguments[Idx]->getType());
773     return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);
774   }
775 
getUserCost(const User * U,ArrayRef<const Value * > Operands)776   unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
777     if (isa<PHINode>(U))
778       return TTI::TCC_Free; // Model all PHI nodes as free.
779 
780     // Static alloca doesn't generate target instructions.
781     if (auto *A = dyn_cast<AllocaInst>(U))
782       if (A->isStaticAlloca())
783         return TTI::TCC_Free;
784 
785     if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
786       return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
787                                                 GEP->getPointerOperand(),
788                                                 Operands.drop_front());
789     }
790 
791     if (auto CS = ImmutableCallSite(U)) {
792       const Function *F = CS.getCalledFunction();
793       if (!F) {
794         // Just use the called value type.
795         Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
796         return static_cast<T *>(this)
797             ->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
798       }
799 
800       SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
801       return static_cast<T *>(this)->getCallCost(F, Arguments);
802     }
803 
804     if (const CastInst *CI = dyn_cast<CastInst>(U)) {
805       // Result of a cmp instruction is often extended (to be used by other
806       // cmp instructions, logical or return instructions). These are usually
807       // nop on most sane targets.
808       if (isa<CmpInst>(CI->getOperand(0)))
809         return TTI::TCC_Free;
810       if (isa<SExtInst>(CI) || isa<ZExtInst>(CI) || isa<FPExtInst>(CI))
811         return static_cast<T *>(this)->getExtCost(CI, Operands.back());
812     }
813 
814     return static_cast<T *>(this)->getOperationCost(
815         Operator::getOpcode(U), U->getType(),
816         U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
817   }
818 
getInstructionLatency(const Instruction * I)819   int getInstructionLatency(const Instruction *I) {
820     SmallVector<const Value *, 4> Operands(I->value_op_begin(),
821                                            I->value_op_end());
822     if (getUserCost(I, Operands) == TTI::TCC_Free)
823       return 0;
824 
825     if (isa<LoadInst>(I))
826       return 4;
827 
828     Type *DstTy = I->getType();
829 
830     // Usually an intrinsic is a simple instruction.
831     // A real function call is much slower.
832     if (auto *CI = dyn_cast<CallInst>(I)) {
833       const Function *F = CI->getCalledFunction();
834       if (!F || static_cast<T *>(this)->isLoweredToCall(F))
835         return 40;
836       // Some intrinsics return a value and a flag, we use the value type
837       // to decide its latency.
838       if (StructType* StructTy = dyn_cast<StructType>(DstTy))
839         DstTy = StructTy->getElementType(0);
840       // Fall through to simple instructions.
841     }
842 
843     if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
844       DstTy = VectorTy->getElementType();
845     if (DstTy->isFloatingPointTy())
846       return 3;
847 
848     return 1;
849   }
850 };
851 }
852 
853 #endif
854