1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This pass exposes codegen information to IR-level passes. Every
11 /// transformation that uses codegen information is broken into three parts:
12 /// 1. The IR-level analysis pass.
13 /// 2. The IR-level transformation interface which provides the needed
14 ///    information.
15 /// 3. Codegen-level implementation which uses target-specific hooks.
16 ///
17 /// This file defines #2, which is the interface that IR-level transformations
18 /// use for querying the codegen.
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
24 
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Operator.h"
29 #include "llvm/IR/PassManager.h"
30 #include "llvm/Pass.h"
31 #include "llvm/Support/DataTypes.h"
32 #include <functional>
33 
34 namespace llvm {
35 
36 class Function;
37 class GlobalValue;
38 class Loop;
39 class Type;
40 class User;
41 class Value;
42 
43 /// \brief Information about a load/store intrinsic defined by the target.
44 struct MemIntrinsicInfo {
MemIntrinsicInfoMemIntrinsicInfo45   MemIntrinsicInfo()
46       : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0),
47         NumMemRefs(0), PtrVal(nullptr) {}
48   bool ReadMem;
49   bool WriteMem;
50   /// True only if this memory operation is non-volatile, non-atomic, and
51   /// unordered.  (See LoadInst/StoreInst for details on each)
52   bool IsSimple;
53   // Same Id is set by the target for corresponding load/store intrinsics.
54   unsigned short MatchingId;
55   int NumMemRefs;
56   Value *PtrVal;
57 };
58 
59 /// \brief This pass provides access to the codegen interfaces that are needed
60 /// for IR-level transformations.
61 class TargetTransformInfo {
62 public:
63   /// \brief Construct a TTI object using a type implementing the \c Concept
64   /// API below.
65   ///
66   /// This is used by targets to construct a TTI wrapping their target-specific
67   /// implementaion that encodes appropriate costs for their target.
68   template <typename T> TargetTransformInfo(T Impl);
69 
70   /// \brief Construct a baseline TTI object using a minimal implementation of
71   /// the \c Concept API below.
72   ///
73   /// The TTI implementation will reflect the information in the DataLayout
74   /// provided if non-null.
75   explicit TargetTransformInfo(const DataLayout &DL);
76 
77   // Provide move semantics.
78   TargetTransformInfo(TargetTransformInfo &&Arg);
79   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
80 
81   // We need to define the destructor out-of-line to define our sub-classes
82   // out-of-line.
83   ~TargetTransformInfo();
84 
85   /// \brief Handle the invalidation of this information.
86   ///
87   /// When used as a result of \c TargetIRAnalysis this method will be called
88   /// when the function this was computed for changes. When it returns false,
89   /// the information is preserved across those changes.
invalidate(Function &,const PreservedAnalyses &)90   bool invalidate(Function &, const PreservedAnalyses &) {
91     // FIXME: We should probably in some way ensure that the subtarget
92     // information for a function hasn't changed.
93     return false;
94   }
95 
96   /// \name Generic Target Information
97   /// @{
98 
99   /// \brief Underlying constants for 'cost' values in this interface.
100   ///
101   /// Many APIs in this interface return a cost. This enum defines the
102   /// fundamental values that should be used to interpret (and produce) those
103   /// costs. The costs are returned as an int rather than a member of this
104   /// enumeration because it is expected that the cost of one IR instruction
105   /// may have a multiplicative factor to it or otherwise won't fit directly
106   /// into the enum. Moreover, it is common to sum or average costs which works
107   /// better as simple integral values. Thus this enum only provides constants.
108   /// Also note that the returned costs are signed integers to make it natural
109   /// to add, subtract, and test with zero (a common boundary condition). It is
110   /// not expected that 2^32 is a realistic cost to be modeling at any point.
111   ///
112   /// Note that these costs should usually reflect the intersection of code-size
113   /// cost and execution cost. A free instruction is typically one that folds
114   /// into another instruction. For example, reg-to-reg moves can often be
115   /// skipped by renaming the registers in the CPU, but they still are encoded
116   /// and thus wouldn't be considered 'free' here.
117   enum TargetCostConstants {
118     TCC_Free = 0,     ///< Expected to fold away in lowering.
119     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
120     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
121   };
122 
123   /// \brief Estimate the cost of a specific operation when lowered.
124   ///
125   /// Note that this is designed to work on an arbitrary synthetic opcode, and
126   /// thus work for hypothetical queries before an instruction has even been
127   /// formed. However, this does *not* work for GEPs, and must not be called
128   /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
129   /// analyzing a GEP's cost required more information.
130   ///
131   /// Typically only the result type is required, and the operand type can be
132   /// omitted. However, if the opcode is one of the cast instructions, the
133   /// operand type is required.
134   ///
135   /// The returned cost is defined in terms of \c TargetCostConstants, see its
136   /// comments for a detailed explanation of the cost values.
137   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
138 
139   /// \brief Estimate the cost of a GEP operation when lowered.
140   ///
141   /// The contract for this function is the same as \c getOperationCost except
142   /// that it supports an interface that provides extra information specific to
143   /// the GEP operation.
144   int getGEPCost(Type *PointeeType, const Value *Ptr,
145                  ArrayRef<const Value *> Operands) const;
146 
147   /// \brief Estimate the cost of a function call when lowered.
148   ///
149   /// The contract for this is the same as \c getOperationCost except that it
150   /// supports an interface that provides extra information specific to call
151   /// instructions.
152   ///
153   /// This is the most basic query for estimating call cost: it only knows the
154   /// function type and (potentially) the number of arguments at the call site.
155   /// The latter is only interesting for varargs function types.
156   int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
157 
158   /// \brief Estimate the cost of calling a specific function when lowered.
159   ///
160   /// This overload adds the ability to reason about the particular function
161   /// being called in the event it is a library call with special lowering.
162   int getCallCost(const Function *F, int NumArgs = -1) const;
163 
164   /// \brief Estimate the cost of calling a specific function when lowered.
165   ///
166   /// This overload allows specifying a set of candidate argument values.
167   int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
168 
169   /// \returns A value by which our inlining threshold should be multiplied.
170   /// This is primarily used to bump up the inlining threshold wholesale on
171   /// targets where calls are unusually expensive.
172   ///
173   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
174   /// individual classes of instructions would be better.
175   unsigned getInliningThresholdMultiplier() const;
176 
177   /// \brief Estimate the cost of an intrinsic when lowered.
178   ///
179   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
180   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
181                        ArrayRef<Type *> ParamTys) const;
182 
183   /// \brief Estimate the cost of an intrinsic when lowered.
184   ///
185   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
186   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
187                        ArrayRef<const Value *> Arguments) const;
188 
189   /// \brief Estimate the cost of a given IR user when lowered.
190   ///
191   /// This can estimate the cost of either a ConstantExpr or Instruction when
192   /// lowered. It has two primary advantages over the \c getOperationCost and
193   /// \c getGEPCost above, and one significant disadvantage: it can only be
194   /// used when the IR construct has already been formed.
195   ///
196   /// The advantages are that it can inspect the SSA use graph to reason more
197   /// accurately about the cost. For example, all-constant-GEPs can often be
198   /// folded into a load or other instruction, but if they are used in some
199   /// other context they may not be folded. This routine can distinguish such
200   /// cases.
201   ///
202   /// The returned cost is defined in terms of \c TargetCostConstants, see its
203   /// comments for a detailed explanation of the cost values.
204   int getUserCost(const User *U) const;
205 
206   /// \brief Return true if branch divergence exists.
207   ///
208   /// Branch divergence has a significantly negative impact on GPU performance
209   /// when threads in the same wavefront take different paths due to conditional
210   /// branches.
211   bool hasBranchDivergence() const;
212 
213   /// \brief Returns whether V is a source of divergence.
214   ///
215   /// This function provides the target-dependent information for
216   /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
217   /// builds the dependency graph, and then runs the reachability algorithm
218   /// starting with the sources of divergence.
219   bool isSourceOfDivergence(const Value *V) const;
220 
221   /// \brief Test whether calls to a function lower to actual program function
222   /// calls.
223   ///
224   /// The idea is to test whether the program is likely to require a 'call'
225   /// instruction or equivalent in order to call the given function.
226   ///
227   /// FIXME: It's not clear that this is a good or useful query API. Client's
228   /// should probably move to simpler cost metrics using the above.
229   /// Alternatively, we could split the cost interface into distinct code-size
230   /// and execution-speed costs. This would allow modelling the core of this
231   /// query more accurately as a call is a single small instruction, but
232   /// incurs significant execution cost.
233   bool isLoweredToCall(const Function *F) const;
234 
235   /// Parameters that control the generic loop unrolling transformation.
236   struct UnrollingPreferences {
237     /// The cost threshold for the unrolled loop. Should be relative to the
238     /// getUserCost values returned by this API, and the expectation is that
239     /// the unrolled loop's instructions when run through that interface should
240     /// not exceed this cost. However, this is only an estimate. Also, specific
241     /// loops may be unrolled even with a cost above this threshold if deemed
242     /// profitable. Set this to UINT_MAX to disable the loop body cost
243     /// restriction.
244     unsigned Threshold;
245     /// If complete unrolling will reduce the cost of the loop below its
246     /// expected dynamic cost while rolled by this percentage, apply a discount
247     /// (below) to its unrolled cost.
248     unsigned PercentDynamicCostSavedThreshold;
249     /// The discount applied to the unrolled cost when the *dynamic* cost
250     /// savings of unrolling exceed the \c PercentDynamicCostSavedThreshold.
251     unsigned DynamicCostSavingsDiscount;
252     /// The cost threshold for the unrolled loop when optimizing for size (set
253     /// to UINT_MAX to disable).
254     unsigned OptSizeThreshold;
255     /// The cost threshold for the unrolled loop, like Threshold, but used
256     /// for partial/runtime unrolling (set to UINT_MAX to disable).
257     unsigned PartialThreshold;
258     /// The cost threshold for the unrolled loop when optimizing for size, like
259     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
260     /// UINT_MAX to disable).
261     unsigned PartialOptSizeThreshold;
262     /// A forced unrolling factor (the number of concatenated bodies of the
263     /// original loop in the unrolled loop body). When set to 0, the unrolling
264     /// transformation will select an unrolling factor based on the current cost
265     /// threshold and other factors.
266     unsigned Count;
267     // Set the maximum unrolling factor. The unrolling factor may be selected
268     // using the appropriate cost threshold, but may not exceed this number
269     // (set to UINT_MAX to disable). This does not apply in cases where the
270     // loop is being fully unrolled.
271     unsigned MaxCount;
272     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
273     /// applies even if full unrolling is selected. This allows a target to fall
274     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
275     unsigned FullUnrollMaxCount;
276     /// Allow partial unrolling (unrolling of loops to expand the size of the
277     /// loop body, not only to eliminate small constant-trip-count loops).
278     bool Partial;
279     /// Allow runtime unrolling (unrolling of loops to expand the size of the
280     /// loop body even when the number of loop iterations is not known at
281     /// compile time).
282     bool Runtime;
283     /// Allow generation of a loop remainder (extra iterations after unroll).
284     bool AllowRemainder;
285     /// Allow emitting expensive instructions (such as divisions) when computing
286     /// the trip count of a loop for runtime unrolling.
287     bool AllowExpensiveTripCount;
288     /// Apply loop unroll on any kind of loop
289     /// (mainly to loops that fail runtime unrolling).
290     bool Force;
291   };
292 
293   /// \brief Get target-customized preferences for the generic loop unrolling
294   /// transformation. The caller will initialize UP with the current
295   /// target-independent defaults.
296   void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
297 
298   /// @}
299 
300   /// \name Scalar Target Information
301   /// @{
302 
303   /// \brief Flags indicating the kind of support for population count.
304   ///
305   /// Compared to the SW implementation, HW support is supposed to
306   /// significantly boost the performance when the population is dense, and it
307   /// may or may not degrade performance if the population is sparse. A HW
308   /// support is considered as "Fast" if it can outperform, or is on a par
309   /// with, SW implementation when the population is sparse; otherwise, it is
310   /// considered as "Slow".
311   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
312 
313   /// \brief Return true if the specified immediate is legal add immediate, that
314   /// is the target has add instructions which can add a register with the
315   /// immediate without having to materialize the immediate into a register.
316   bool isLegalAddImmediate(int64_t Imm) const;
317 
318   /// \brief Return true if the specified immediate is legal icmp immediate,
319   /// that is the target has icmp instructions which can compare a register
320   /// against the immediate without having to materialize the immediate into a
321   /// register.
322   bool isLegalICmpImmediate(int64_t Imm) const;
323 
324   /// \brief Return true if the addressing mode represented by AM is legal for
325   /// this target, for a load/store of the specified type.
326   /// The type may be VoidTy, in which case only return true if the addressing
327   /// mode is legal for a load/store of any legal type.
328   /// TODO: Handle pre/postinc as well.
329   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
330                              bool HasBaseReg, int64_t Scale,
331                              unsigned AddrSpace = 0) const;
332 
333   /// \brief Return true if the target supports masked load/store
334   /// AVX2 and AVX-512 targets allow masks for consecutive load and store
335   bool isLegalMaskedStore(Type *DataType) const;
336   bool isLegalMaskedLoad(Type *DataType) const;
337 
338   /// \brief Return true if the target supports masked gather/scatter
339   /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
340   /// bits scalar type.
341   bool isLegalMaskedScatter(Type *DataType) const;
342   bool isLegalMaskedGather(Type *DataType) const;
343 
344   /// \brief Return the cost of the scaling factor used in the addressing
345   /// mode represented by AM for this target, for a load/store
346   /// of the specified type.
347   /// If the AM is supported, the return value must be >= 0.
348   /// If the AM is not supported, it returns a negative value.
349   /// TODO: Handle pre/postinc as well.
350   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
351                            bool HasBaseReg, int64_t Scale,
352                            unsigned AddrSpace = 0) const;
353 
354   /// \brief Return true if it's free to truncate a value of type Ty1 to type
355   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
356   /// by referencing its sub-register AX.
357   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
358 
359   /// \brief Return true if it is profitable to hoist instruction in the
360   /// then/else to before if.
361   bool isProfitableToHoist(Instruction *I) const;
362 
363   /// \brief Return true if this type is legal.
364   bool isTypeLegal(Type *Ty) const;
365 
366   /// \brief Returns the target's jmp_buf alignment in bytes.
367   unsigned getJumpBufAlignment() const;
368 
369   /// \brief Returns the target's jmp_buf size in bytes.
370   unsigned getJumpBufSize() const;
371 
372   /// \brief Return true if switches should be turned into lookup tables for the
373   /// target.
374   bool shouldBuildLookupTables() const;
375 
376   /// \brief Don't restrict interleaved unrolling to small loops.
377   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
378 
379   /// \brief Enable matching of interleaved access groups.
380   bool enableInterleavedAccessVectorization() const;
381 
382   /// \brief Indicate that it is potentially unsafe to automatically vectorize
383   /// floating-point operations because the semantics of vector and scalar
384   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
385   /// does not support IEEE-754 denormal numbers, while depending on the
386   /// platform, scalar floating-point math does.
387   /// This applies to floating-point math operations and calls, not memory
388   /// operations, shuffles, or casts.
389   bool isFPVectorizationPotentiallyUnsafe() const;
390 
391   /// \brief Determine if the target supports unaligned memory accesses.
392   bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0,
393                                       unsigned Alignment = 1,
394                                       bool *Fast = nullptr) const;
395 
396   /// \brief Return hardware support for population count.
397   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
398 
399   /// \brief Return true if the hardware has a fast square-root instruction.
400   bool haveFastSqrt(Type *Ty) const;
401 
402   /// \brief Return the expected cost of supporting the floating point operation
403   /// of the specified type.
404   int getFPOpCost(Type *Ty) const;
405 
406   /// \brief Return the expected cost of materializing for the given integer
407   /// immediate of the specified type.
408   int getIntImmCost(const APInt &Imm, Type *Ty) const;
409 
410   /// \brief Return the expected cost of materialization for the given integer
411   /// immediate of the specified type for a given instruction. The cost can be
412   /// zero if the immediate can be folded into the specified instruction.
413   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
414                     Type *Ty) const;
415   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
416                     Type *Ty) const;
417 
418   /// \brief Return the expected cost for the given integer when optimising
419   /// for size. This is different than the other integer immediate cost
420   /// functions in that it is subtarget agnostic. This is useful when you e.g.
421   /// target one ISA such as Aarch32 but smaller encodings could be possible
422   /// with another such as Thumb. This return value is used as a penalty when
423   /// the total costs for a constant is calculated (the bigger the cost, the
424   /// more beneficial constant hoisting is).
425   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
426                             Type *Ty) const;
427   /// @}
428 
429   /// \name Vector Target Information
430   /// @{
431 
432   /// \brief The various kinds of shuffle patterns for vector queries.
433   enum ShuffleKind {
434     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
435     SK_Reverse,         ///< Reverse the order of the vector.
436     SK_Alternate,       ///< Choose alternate elements from vector.
437     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
438     SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
439   };
440 
441   /// \brief Additional information about an operand's possible values.
442   enum OperandValueKind {
443     OK_AnyValue,               // Operand can have any value.
444     OK_UniformValue,           // Operand is uniform (splat of a value).
445     OK_UniformConstantValue,   // Operand is uniform constant.
446     OK_NonUniformConstantValue // Operand is a non uniform constant value.
447   };
448 
449   /// \brief Additional properties of an operand's values.
450   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
451 
452   /// \return The number of scalar or vector registers that the target has.
453   /// If 'Vectors' is true, it returns the number of vector registers. If it is
454   /// set to false, it returns the number of scalar registers.
455   unsigned getNumberOfRegisters(bool Vector) const;
456 
457   /// \return The width of the largest scalar or vector register type.
458   unsigned getRegisterBitWidth(bool Vector) const;
459 
460   /// \return The bitwidth of the largest vector type that should be used to
461   /// load/store in the given address space.
462   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
463 
464   /// \return The size of a cache line in bytes.
465   unsigned getCacheLineSize() const;
466 
467   /// \return How much before a load we should place the prefetch instruction.
468   /// This is currently measured in number of instructions.
469   unsigned getPrefetchDistance() const;
470 
471   /// \return Some HW prefetchers can handle accesses up to a certain constant
472   /// stride.  This is the minimum stride in bytes where it makes sense to start
473   /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
474   unsigned getMinPrefetchStride() const;
475 
476   /// \return The maximum number of iterations to prefetch ahead.  If the
477   /// required number of iterations is more than this number, no prefetching is
478   /// performed.
479   unsigned getMaxPrefetchIterationsAhead() const;
480 
481   /// \return The maximum interleave factor that any transform should try to
482   /// perform for this target. This number depends on the level of parallelism
483   /// and the number of execution units in the CPU.
484   unsigned getMaxInterleaveFactor(unsigned VF) const;
485 
486   /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
487   int getArithmeticInstrCost(
488       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
489       OperandValueKind Opd2Info = OK_AnyValue,
490       OperandValueProperties Opd1PropInfo = OP_None,
491       OperandValueProperties Opd2PropInfo = OP_None) const;
492 
493   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
494   /// The index and subtype parameters are used by the subvector insertion and
495   /// extraction shuffle kinds.
496   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
497                      Type *SubTp = nullptr) const;
498 
499   /// \return The expected cost of cast instructions, such as bitcast, trunc,
500   /// zext, etc.
501   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
502 
503   /// \return The expected cost of a sign- or zero-extended vector extract. Use
504   /// -1 to indicate that there is no information about the index value.
505   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
506                                unsigned Index = -1) const;
507 
508   /// \return The expected cost of control-flow related instructions such as
509   /// Phi, Ret, Br.
510   int getCFInstrCost(unsigned Opcode) const;
511 
512   /// \returns The expected cost of compare and select instructions.
513   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
514                          Type *CondTy = nullptr) const;
515 
516   /// \return The expected cost of vector Insert and Extract.
517   /// Use -1 to indicate that there is no information on the index value.
518   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
519 
520   /// \return The cost of Load and Store instructions.
521   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
522                       unsigned AddressSpace) const;
523 
524   /// \return The cost of masked Load and Store instructions.
525   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
526                             unsigned AddressSpace) const;
527 
528   /// \return The cost of Gather or Scatter operation
529   /// \p Opcode - is a type of memory access Load or Store
530   /// \p DataTy - a vector type of the data to be loaded or stored
531   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
532   /// \p VariableMask - true when the memory access is predicated with a mask
533   ///                   that is not a compile-time constant
534   /// \p Alignment - alignment of single element
535   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
536                              bool VariableMask, unsigned Alignment) const;
537 
538   /// \return The cost of the interleaved memory operation.
539   /// \p Opcode is the memory operation code
540   /// \p VecTy is the vector type of the interleaved access.
541   /// \p Factor is the interleave factor
542   /// \p Indices is the indices for interleaved load members (as interleaved
543   ///    load allows gaps)
544   /// \p Alignment is the alignment of the memory operation
545   /// \p AddressSpace is address space of the pointer.
546   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
547                                  ArrayRef<unsigned> Indices, unsigned Alignment,
548                                  unsigned AddressSpace) const;
549 
550   /// \brief Calculate the cost of performing a vector reduction.
551   ///
552   /// This is the cost of reducing the vector value of type \p Ty to a scalar
553   /// value using the operation denoted by \p Opcode. The form of the reduction
554   /// can either be a pairwise reduction or a reduction that splits the vector
555   /// at every reduction level.
556   ///
557   /// Pairwise:
558   ///  (v0, v1, v2, v3)
559   ///  ((v0+v1), (v2, v3), undef, undef)
560   /// Split:
561   ///  (v0, v1, v2, v3)
562   ///  ((v0+v2), (v1+v3), undef, undef)
563   int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
564 
565   /// \returns The cost of Intrinsic instructions. Types analysis only.
566   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
567                             ArrayRef<Type *> Tys, FastMathFlags FMF) const;
568 
569   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
570   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
571                             ArrayRef<Value *> Args, FastMathFlags FMF) const;
572 
573   /// \returns The cost of Call instructions.
574   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
575 
576   /// \returns The number of pieces into which the provided type must be
577   /// split during legalization. Zero is returned when the answer is unknown.
578   unsigned getNumberOfParts(Type *Tp) const;
579 
580   /// \returns The cost of the address computation. For most targets this can be
581   /// merged into the instruction indexing mode. Some targets might want to
582   /// distinguish between address computation for memory operations on vector
583   /// types and scalar types. Such targets should override this function.
584   /// The 'IsComplex' parameter is a hint that the address computation is likely
585   /// to involve multiple instructions and as such unlikely to be merged into
586   /// the address indexing mode.
587   int getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
588 
589   /// \returns The cost, if any, of keeping values of the given types alive
590   /// over a callsite.
591   ///
592   /// Some types may require the use of register classes that do not have
593   /// any callee-saved registers, so would require a spill and fill.
594   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
595 
596   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
597   /// will contain additional information - whether the intrinsic may write
598   /// or read to memory, volatility and the pointer.  Info is undefined
599   /// if false is returned.
600   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
601 
602   /// \returns A value which is the result of the given memory intrinsic.  New
603   /// instructions may be created to extract the result from the given intrinsic
604   /// memory operation.  Returns nullptr if the target cannot create a result
605   /// from the given intrinsic.
606   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
607                                            Type *ExpectedType) const;
608 
609   /// \returns True if the two functions have compatible attributes for inlining
610   /// purposes.
611   bool areInlineCompatible(const Function *Caller,
612                            const Function *Callee) const;
613 
614   /// @}
615 
616 private:
617   /// \brief The abstract base class used to type erase specific TTI
618   /// implementations.
619   class Concept;
620 
621   /// \brief The template model for the base class which wraps a concrete
622   /// implementation in a type erased interface.
623   template <typename T> class Model;
624 
625   std::unique_ptr<Concept> TTIImpl;
626 };
627 
628 class TargetTransformInfo::Concept {
629 public:
630   virtual ~Concept() = 0;
631   virtual const DataLayout &getDataLayout() const = 0;
632   virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
633   virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
634                          ArrayRef<const Value *> Operands) = 0;
635   virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
636   virtual int getCallCost(const Function *F, int NumArgs) = 0;
637   virtual int getCallCost(const Function *F,
638                           ArrayRef<const Value *> Arguments) = 0;
639   virtual unsigned getInliningThresholdMultiplier() = 0;
640   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
641                                ArrayRef<Type *> ParamTys) = 0;
642   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
643                                ArrayRef<const Value *> Arguments) = 0;
644   virtual int getUserCost(const User *U) = 0;
645   virtual bool hasBranchDivergence() = 0;
646   virtual bool isSourceOfDivergence(const Value *V) = 0;
647   virtual bool isLoweredToCall(const Function *F) = 0;
648   virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
649   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
650   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
651   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
652                                      int64_t BaseOffset, bool HasBaseReg,
653                                      int64_t Scale,
654                                      unsigned AddrSpace) = 0;
655   virtual bool isLegalMaskedStore(Type *DataType) = 0;
656   virtual bool isLegalMaskedLoad(Type *DataType) = 0;
657   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
658   virtual bool isLegalMaskedGather(Type *DataType) = 0;
659   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
660                                    int64_t BaseOffset, bool HasBaseReg,
661                                    int64_t Scale, unsigned AddrSpace) = 0;
662   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
663   virtual bool isProfitableToHoist(Instruction *I) = 0;
664   virtual bool isTypeLegal(Type *Ty) = 0;
665   virtual unsigned getJumpBufAlignment() = 0;
666   virtual unsigned getJumpBufSize() = 0;
667   virtual bool shouldBuildLookupTables() = 0;
668   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
669   virtual bool enableInterleavedAccessVectorization() = 0;
670   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
671   virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
672                                               unsigned AddressSpace,
673                                               unsigned Alignment,
674                                               bool *Fast) = 0;
675   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
676   virtual bool haveFastSqrt(Type *Ty) = 0;
677   virtual int getFPOpCost(Type *Ty) = 0;
678   virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
679                                     Type *Ty) = 0;
680   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
681   virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
682                             Type *Ty) = 0;
683   virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
684                             Type *Ty) = 0;
685   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
686   virtual unsigned getRegisterBitWidth(bool Vector) = 0;
687   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) = 0;
688   virtual unsigned getCacheLineSize() = 0;
689   virtual unsigned getPrefetchDistance() = 0;
690   virtual unsigned getMinPrefetchStride() = 0;
691   virtual unsigned getMaxPrefetchIterationsAhead() = 0;
692   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
693   virtual unsigned
694   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
695                          OperandValueKind Opd2Info,
696                          OperandValueProperties Opd1PropInfo,
697                          OperandValueProperties Opd2PropInfo) = 0;
698   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
699                              Type *SubTp) = 0;
700   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
701   virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
702                                        VectorType *VecTy, unsigned Index) = 0;
703   virtual int getCFInstrCost(unsigned Opcode) = 0;
704   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
705                                  Type *CondTy) = 0;
706   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
707                                  unsigned Index) = 0;
708   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
709                               unsigned AddressSpace) = 0;
710   virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
711                                     unsigned Alignment,
712                                     unsigned AddressSpace) = 0;
713   virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
714                                      Value *Ptr, bool VariableMask,
715                                      unsigned Alignment) = 0;
716   virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
717                                          unsigned Factor,
718                                          ArrayRef<unsigned> Indices,
719                                          unsigned Alignment,
720                                          unsigned AddressSpace) = 0;
721   virtual int getReductionCost(unsigned Opcode, Type *Ty,
722                                bool IsPairwiseForm) = 0;
723   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
724                                     ArrayRef<Type *> Tys,
725                                     FastMathFlags FMF) = 0;
726   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
727                                     ArrayRef<Value *> Args,
728                                     FastMathFlags FMF) = 0;
729   virtual int getCallInstrCost(Function *F, Type *RetTy,
730                                ArrayRef<Type *> Tys) = 0;
731   virtual unsigned getNumberOfParts(Type *Tp) = 0;
732   virtual int getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
733   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
734   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
735                                   MemIntrinsicInfo &Info) = 0;
736   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
737                                                    Type *ExpectedType) = 0;
738   virtual bool areInlineCompatible(const Function *Caller,
739                                    const Function *Callee) const = 0;
740 };
741 
742 template <typename T>
743 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
744   T Impl;
745 
746 public:
Model(T Impl)747   Model(T Impl) : Impl(std::move(Impl)) {}
~Model()748   ~Model() override {}
749 
getDataLayout()750   const DataLayout &getDataLayout() const override {
751     return Impl.getDataLayout();
752   }
753 
getOperationCost(unsigned Opcode,Type * Ty,Type * OpTy)754   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
755     return Impl.getOperationCost(Opcode, Ty, OpTy);
756   }
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)757   int getGEPCost(Type *PointeeType, const Value *Ptr,
758                  ArrayRef<const Value *> Operands) override {
759     return Impl.getGEPCost(PointeeType, Ptr, Operands);
760   }
getCallCost(FunctionType * FTy,int NumArgs)761   int getCallCost(FunctionType *FTy, int NumArgs) override {
762     return Impl.getCallCost(FTy, NumArgs);
763   }
getCallCost(const Function * F,int NumArgs)764   int getCallCost(const Function *F, int NumArgs) override {
765     return Impl.getCallCost(F, NumArgs);
766   }
getCallCost(const Function * F,ArrayRef<const Value * > Arguments)767   int getCallCost(const Function *F,
768                   ArrayRef<const Value *> Arguments) override {
769     return Impl.getCallCost(F, Arguments);
770   }
getInliningThresholdMultiplier()771   unsigned getInliningThresholdMultiplier() override {
772     return Impl.getInliningThresholdMultiplier();
773   }
getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<Type * > ParamTys)774   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
775                        ArrayRef<Type *> ParamTys) override {
776     return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
777   }
getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<const Value * > Arguments)778   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
779                        ArrayRef<const Value *> Arguments) override {
780     return Impl.getIntrinsicCost(IID, RetTy, Arguments);
781   }
getUserCost(const User * U)782   int getUserCost(const User *U) override { return Impl.getUserCost(U); }
hasBranchDivergence()783   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
isSourceOfDivergence(const Value * V)784   bool isSourceOfDivergence(const Value *V) override {
785     return Impl.isSourceOfDivergence(V);
786   }
isLoweredToCall(const Function * F)787   bool isLoweredToCall(const Function *F) override {
788     return Impl.isLoweredToCall(F);
789   }
getUnrollingPreferences(Loop * L,UnrollingPreferences & UP)790   void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override {
791     return Impl.getUnrollingPreferences(L, UP);
792   }
isLegalAddImmediate(int64_t Imm)793   bool isLegalAddImmediate(int64_t Imm) override {
794     return Impl.isLegalAddImmediate(Imm);
795   }
isLegalICmpImmediate(int64_t Imm)796   bool isLegalICmpImmediate(int64_t Imm) override {
797     return Impl.isLegalICmpImmediate(Imm);
798   }
isLegalAddressingMode(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)799   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
800                              bool HasBaseReg, int64_t Scale,
801                              unsigned AddrSpace) override {
802     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
803                                       Scale, AddrSpace);
804   }
isLegalMaskedStore(Type * DataType)805   bool isLegalMaskedStore(Type *DataType) override {
806     return Impl.isLegalMaskedStore(DataType);
807   }
isLegalMaskedLoad(Type * DataType)808   bool isLegalMaskedLoad(Type *DataType) override {
809     return Impl.isLegalMaskedLoad(DataType);
810   }
isLegalMaskedScatter(Type * DataType)811   bool isLegalMaskedScatter(Type *DataType) override {
812     return Impl.isLegalMaskedScatter(DataType);
813   }
isLegalMaskedGather(Type * DataType)814   bool isLegalMaskedGather(Type *DataType) override {
815     return Impl.isLegalMaskedGather(DataType);
816   }
getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)817   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
818                            bool HasBaseReg, int64_t Scale,
819                            unsigned AddrSpace) override {
820     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
821                                      Scale, AddrSpace);
822   }
isTruncateFree(Type * Ty1,Type * Ty2)823   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
824     return Impl.isTruncateFree(Ty1, Ty2);
825   }
isProfitableToHoist(Instruction * I)826   bool isProfitableToHoist(Instruction *I) override {
827     return Impl.isProfitableToHoist(I);
828   }
isTypeLegal(Type * Ty)829   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
getJumpBufAlignment()830   unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
getJumpBufSize()831   unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
shouldBuildLookupTables()832   bool shouldBuildLookupTables() override {
833     return Impl.shouldBuildLookupTables();
834   }
enableAggressiveInterleaving(bool LoopHasReductions)835   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
836     return Impl.enableAggressiveInterleaving(LoopHasReductions);
837   }
enableInterleavedAccessVectorization()838   bool enableInterleavedAccessVectorization() override {
839     return Impl.enableInterleavedAccessVectorization();
840   }
isFPVectorizationPotentiallyUnsafe()841   bool isFPVectorizationPotentiallyUnsafe() override {
842     return Impl.isFPVectorizationPotentiallyUnsafe();
843   }
allowsMisalignedMemoryAccesses(unsigned BitWidth,unsigned AddressSpace,unsigned Alignment,bool * Fast)844   bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
845                                       unsigned Alignment, bool *Fast) override {
846     return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
847                                                Alignment, Fast);
848   }
getPopcntSupport(unsigned IntTyWidthInBit)849   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
850     return Impl.getPopcntSupport(IntTyWidthInBit);
851   }
haveFastSqrt(Type * Ty)852   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
853 
getFPOpCost(Type * Ty)854   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
855 
getIntImmCodeSizeCost(unsigned Opc,unsigned Idx,const APInt & Imm,Type * Ty)856   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
857                             Type *Ty) override {
858     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
859   }
getIntImmCost(const APInt & Imm,Type * Ty)860   int getIntImmCost(const APInt &Imm, Type *Ty) override {
861     return Impl.getIntImmCost(Imm, Ty);
862   }
getIntImmCost(unsigned Opc,unsigned Idx,const APInt & Imm,Type * Ty)863   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
864                     Type *Ty) override {
865     return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
866   }
getIntImmCost(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)867   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
868                     Type *Ty) override {
869     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
870   }
getNumberOfRegisters(bool Vector)871   unsigned getNumberOfRegisters(bool Vector) override {
872     return Impl.getNumberOfRegisters(Vector);
873   }
getRegisterBitWidth(bool Vector)874   unsigned getRegisterBitWidth(bool Vector) override {
875     return Impl.getRegisterBitWidth(Vector);
876   }
877 
getLoadStoreVecRegBitWidth(unsigned AddrSpace)878   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) override {
879     return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
880   }
881 
getCacheLineSize()882   unsigned getCacheLineSize() override {
883     return Impl.getCacheLineSize();
884   }
getPrefetchDistance()885   unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
getMinPrefetchStride()886   unsigned getMinPrefetchStride() override {
887     return Impl.getMinPrefetchStride();
888   }
getMaxPrefetchIterationsAhead()889   unsigned getMaxPrefetchIterationsAhead() override {
890     return Impl.getMaxPrefetchIterationsAhead();
891   }
getMaxInterleaveFactor(unsigned VF)892   unsigned getMaxInterleaveFactor(unsigned VF) override {
893     return Impl.getMaxInterleaveFactor(VF);
894   }
895   unsigned
getArithmeticInstrCost(unsigned Opcode,Type * Ty,OperandValueKind Opd1Info,OperandValueKind Opd2Info,OperandValueProperties Opd1PropInfo,OperandValueProperties Opd2PropInfo)896   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
897                          OperandValueKind Opd2Info,
898                          OperandValueProperties Opd1PropInfo,
899                          OperandValueProperties Opd2PropInfo) override {
900     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
901                                        Opd1PropInfo, Opd2PropInfo);
902   }
getShuffleCost(ShuffleKind Kind,Type * Tp,int Index,Type * SubTp)903   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
904                      Type *SubTp) override {
905     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
906   }
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src)907   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
908     return Impl.getCastInstrCost(Opcode, Dst, Src);
909   }
getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)910   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
911                                unsigned Index) override {
912     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
913   }
getCFInstrCost(unsigned Opcode)914   int getCFInstrCost(unsigned Opcode) override {
915     return Impl.getCFInstrCost(Opcode);
916   }
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy)917   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override {
918     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
919   }
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)920   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
921     return Impl.getVectorInstrCost(Opcode, Val, Index);
922   }
getMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)923   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
924                       unsigned AddressSpace) override {
925     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
926   }
getMaskedMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)927   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
928                             unsigned AddressSpace) override {
929     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
930   }
getGatherScatterOpCost(unsigned Opcode,Type * DataTy,Value * Ptr,bool VariableMask,unsigned Alignment)931   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
932                              Value *Ptr, bool VariableMask,
933                              unsigned Alignment) override {
934     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
935                                        Alignment);
936   }
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,unsigned Alignment,unsigned AddressSpace)937   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
938                                  ArrayRef<unsigned> Indices, unsigned Alignment,
939                                  unsigned AddressSpace) override {
940     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
941                                            Alignment, AddressSpace);
942   }
getReductionCost(unsigned Opcode,Type * Ty,bool IsPairwiseForm)943   int getReductionCost(unsigned Opcode, Type *Ty,
944                        bool IsPairwiseForm) override {
945     return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
946   }
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Type * > Tys,FastMathFlags FMF)947   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
948                             FastMathFlags FMF) override {
949     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
950   }
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Value * > Args,FastMathFlags FMF)951   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
952                             ArrayRef<Value *> Args,
953                             FastMathFlags FMF) override {
954     return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
955   }
getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys)956   int getCallInstrCost(Function *F, Type *RetTy,
957                        ArrayRef<Type *> Tys) override {
958     return Impl.getCallInstrCost(F, RetTy, Tys);
959   }
getNumberOfParts(Type * Tp)960   unsigned getNumberOfParts(Type *Tp) override {
961     return Impl.getNumberOfParts(Tp);
962   }
getAddressComputationCost(Type * Ty,bool IsComplex)963   int getAddressComputationCost(Type *Ty, bool IsComplex) override {
964     return Impl.getAddressComputationCost(Ty, IsComplex);
965   }
getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)966   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
967     return Impl.getCostOfKeepingLiveOverCall(Tys);
968   }
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)969   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
970                           MemIntrinsicInfo &Info) override {
971     return Impl.getTgtMemIntrinsic(Inst, Info);
972   }
getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)973   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
974                                            Type *ExpectedType) override {
975     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
976   }
areInlineCompatible(const Function * Caller,const Function * Callee)977   bool areInlineCompatible(const Function *Caller,
978                            const Function *Callee) const override {
979     return Impl.areInlineCompatible(Caller, Callee);
980   }
981 };
982 
983 template <typename T>
TargetTransformInfo(T Impl)984 TargetTransformInfo::TargetTransformInfo(T Impl)
985     : TTIImpl(new Model<T>(Impl)) {}
986 
987 /// \brief Analysis pass providing the \c TargetTransformInfo.
988 ///
989 /// The core idea of the TargetIRAnalysis is to expose an interface through
990 /// which LLVM targets can analyze and provide information about the middle
991 /// end's target-independent IR. This supports use cases such as target-aware
992 /// cost modeling of IR constructs.
993 ///
994 /// This is a function analysis because much of the cost modeling for targets
995 /// is done in a subtarget specific way and LLVM supports compiling different
996 /// functions targeting different subtargets in order to support runtime
997 /// dispatch according to the observed subtarget.
998 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
999 public:
1000   typedef TargetTransformInfo Result;
1001 
1002   /// \brief Default construct a target IR analysis.
1003   ///
1004   /// This will use the module's datalayout to construct a baseline
1005   /// conservative TTI result.
1006   TargetIRAnalysis();
1007 
1008   /// \brief Construct an IR analysis pass around a target-provide callback.
1009   ///
1010   /// The callback will be called with a particular function for which the TTI
1011   /// is needed and must return a TTI object for that function.
1012   TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1013 
1014   // Value semantics. We spell out the constructors for MSVC.
TargetIRAnalysis(const TargetIRAnalysis & Arg)1015   TargetIRAnalysis(const TargetIRAnalysis &Arg)
1016       : TTICallback(Arg.TTICallback) {}
TargetIRAnalysis(TargetIRAnalysis && Arg)1017   TargetIRAnalysis(TargetIRAnalysis &&Arg)
1018       : TTICallback(std::move(Arg.TTICallback)) {}
1019   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
1020     TTICallback = RHS.TTICallback;
1021     return *this;
1022   }
1023   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
1024     TTICallback = std::move(RHS.TTICallback);
1025     return *this;
1026   }
1027 
1028   Result run(const Function &F, AnalysisManager<Function> &);
1029 
1030 private:
1031   friend AnalysisInfoMixin<TargetIRAnalysis>;
1032   static char PassID;
1033 
1034   /// \brief The callback used to produce a result.
1035   ///
1036   /// We use a completely opaque callback so that targets can provide whatever
1037   /// mechanism they desire for constructing the TTI for a given function.
1038   ///
1039   /// FIXME: Should we really use std::function? It's relatively inefficient.
1040   /// It might be possible to arrange for even stateful callbacks to outlive
1041   /// the analysis and thus use a function_ref which would be lighter weight.
1042   /// This may also be less error prone as the callback is likely to reference
1043   /// the external TargetMachine, and that reference needs to never dangle.
1044   std::function<Result(const Function &)> TTICallback;
1045 
1046   /// \brief Helper function used as the callback in the default constructor.
1047   static Result getDefaultTTI(const Function &F);
1048 };
1049 
1050 /// \brief Wrapper pass for TargetTransformInfo.
1051 ///
1052 /// This pass can be constructed from a TTI object which it stores internally
1053 /// and is queried by passes.
1054 class TargetTransformInfoWrapperPass : public ImmutablePass {
1055   TargetIRAnalysis TIRA;
1056   Optional<TargetTransformInfo> TTI;
1057 
1058   virtual void anchor();
1059 
1060 public:
1061   static char ID;
1062 
1063   /// \brief We must provide a default constructor for the pass but it should
1064   /// never be used.
1065   ///
1066   /// Use the constructor below or call one of the creation routines.
1067   TargetTransformInfoWrapperPass();
1068 
1069   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1070 
1071   TargetTransformInfo &getTTI(const Function &F);
1072 };
1073 
1074 /// \brief Create an analysis pass wrapper around a TTI object.
1075 ///
1076 /// This analysis pass just holds the TTI instance and makes it available to
1077 /// clients.
1078 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1079 
1080 } // End llvm namespace
1081 
1082 #endif
1083