1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This file defines the LoopVectorizationLegality class. Original code 12 /// in Loop Vectorizer has been moved out to its own file for modularity 13 /// and reusability. 14 /// 15 /// Currently, it works for innermost loop vectorization. Extending this to 16 /// outer loop vectorization is a TODO item. 17 /// 18 /// Also provides: 19 /// 1) LoopVectorizeHints class which keeps a number of loop annotations 20 /// locally for easy look up. It has the ability to write them back as 21 /// loop metadata, upon request. 22 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose 23 /// of reporting useful failure to vectorize message. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 28 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 29 30 #include "llvm/ADT/MapVector.h" 31 #include "llvm/Analysis/LoopAccessAnalysis.h" 32 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 33 #include "llvm/Transforms/Utils/LoopUtils.h" 34 35 namespace llvm { 36 37 /// Create an analysis remark that explains why vectorization failed 38 /// 39 /// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p 40 /// RemarkName is the identifier for the remark. If \p I is passed it is an 41 /// instruction that prevents vectorization. Otherwise \p TheLoop is used for 42 /// the location of the remark. \return the remark object that can be 43 /// streamed to. 44 OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, 45 StringRef RemarkName, 46 Loop *TheLoop, 47 Instruction *I = nullptr); 48 49 /// Utility class for getting and setting loop vectorizer hints in the form 50 /// of loop metadata. 51 /// This class keeps a number of loop annotations locally (as member variables) 52 /// and can, upon request, write them back as metadata on the loop. It will 53 /// initially scan the loop for existing metadata, and will update the local 54 /// values based on information in the loop. 55 /// We cannot write all values to metadata, as the mere presence of some info, 56 /// for example 'force', means a decision has been made. So, we need to be 57 /// careful NOT to add them if the user hasn't specifically asked so. 58 class LoopVectorizeHints { 59 enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED }; 60 61 /// Hint - associates name and validation with the hint value. 62 struct Hint { 63 const char *Name; 64 unsigned Value; // This may have to change for non-numeric values. 65 HintKind Kind; 66 HintHint67 Hint(const char *Name, unsigned Value, HintKind Kind) 68 : Name(Name), Value(Value), Kind(Kind) {} 69 70 bool validate(unsigned Val); 71 }; 72 73 /// Vectorization width. 74 Hint Width; 75 76 /// Vectorization interleave factor. 77 Hint Interleave; 78 79 /// Vectorization forced 80 Hint Force; 81 82 /// Already Vectorized 83 Hint IsVectorized; 84 85 /// Return the loop metadata prefix. Prefix()86 static StringRef Prefix() { return "llvm.loop."; } 87 88 /// True if there is any unsafe math in the loop. 89 bool PotentiallyUnsafe = false; 90 91 public: 92 enum ForceKind { 93 FK_Undefined = -1, ///< Not selected. 94 FK_Disabled = 0, ///< Forcing disabled. 95 FK_Enabled = 1, ///< Forcing enabled. 96 }; 97 98 LoopVectorizeHints(const Loop *L, bool DisableInterleaving, 99 OptimizationRemarkEmitter &ORE); 100 101 /// Mark the loop L as already vectorized by setting the width to 1. setAlreadyVectorized()102 void setAlreadyVectorized() { 103 IsVectorized.Value = 1; 104 Hint Hints[] = {IsVectorized}; 105 writeHintsToMetadata(Hints); 106 } 107 108 bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const; 109 110 /// Dumps all the hint information. 111 void emitRemarkWithHints() const; 112 getWidth()113 unsigned getWidth() const { return Width.Value; } getInterleave()114 unsigned getInterleave() const { return Interleave.Value; } getIsVectorized()115 unsigned getIsVectorized() const { return IsVectorized.Value; } getForce()116 enum ForceKind getForce() const { return (ForceKind)Force.Value; } 117 118 /// If hints are provided that force vectorization, use the AlwaysPrint 119 /// pass name to force the frontend to print the diagnostic. 120 const char *vectorizeAnalysisPassName() const; 121 allowReordering()122 bool allowReordering() const { 123 // When enabling loop hints are provided we allow the vectorizer to change 124 // the order of operations that is given by the scalar loop. This is not 125 // enabled by default because can be unsafe or inefficient. For example, 126 // reordering floating-point operations will change the way round-off 127 // error accumulates in the loop. 128 return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; 129 } 130 isPotentiallyUnsafe()131 bool isPotentiallyUnsafe() const { 132 // Avoid FP vectorization if the target is unsure about proper support. 133 // This may be related to the SIMD unit in the target not handling 134 // IEEE 754 FP ops properly, or bad single-to-double promotions. 135 // Otherwise, a sequence of vectorized loops, even without reduction, 136 // could lead to different end results on the destination vectors. 137 return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe; 138 } 139 setPotentiallyUnsafe()140 void setPotentiallyUnsafe() { PotentiallyUnsafe = true; } 141 142 private: 143 /// Find hints specified in the loop metadata and update local values. 144 void getHintsFromMetadata(); 145 146 /// Checks string hint with one operand and set value if valid. 147 void setHint(StringRef Name, Metadata *Arg); 148 149 /// Create a new hint from name / value pair. 150 MDNode *createHintMetadata(StringRef Name, unsigned V) const; 151 152 /// Matches metadata with hint name. 153 bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes); 154 155 /// Sets current hints into loop metadata, keeping other values intact. 156 void writeHintsToMetadata(ArrayRef<Hint> HintTypes); 157 158 /// The loop these hints belong to. 159 const Loop *TheLoop; 160 161 /// Interface to emit optimization remarks. 162 OptimizationRemarkEmitter &ORE; 163 }; 164 165 /// This holds vectorization requirements that must be verified late in 166 /// the process. The requirements are set by legalize and costmodel. Once 167 /// vectorization has been determined to be possible and profitable the 168 /// requirements can be verified by looking for metadata or compiler options. 169 /// For example, some loops require FP commutativity which is only allowed if 170 /// vectorization is explicitly specified or if the fast-math compiler option 171 /// has been provided. 172 /// Late evaluation of these requirements allows helpful diagnostics to be 173 /// composed that tells the user what need to be done to vectorize the loop. For 174 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late 175 /// evaluation should be used only when diagnostics can generated that can be 176 /// followed by a non-expert user. 177 class LoopVectorizationRequirements { 178 public: LoopVectorizationRequirements(OptimizationRemarkEmitter & ORE)179 LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {} 180 addUnsafeAlgebraInst(Instruction * I)181 void addUnsafeAlgebraInst(Instruction *I) { 182 // First unsafe algebra instruction. 183 if (!UnsafeAlgebraInst) 184 UnsafeAlgebraInst = I; 185 } 186 addRuntimePointerChecks(unsigned Num)187 void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } 188 189 bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints); 190 191 private: 192 unsigned NumRuntimePointerChecks = 0; 193 Instruction *UnsafeAlgebraInst = nullptr; 194 195 /// Interface to emit optimization remarks. 196 OptimizationRemarkEmitter &ORE; 197 }; 198 199 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and 200 /// to what vectorization factor. 201 /// This class does not look at the profitability of vectorization, only the 202 /// legality. This class has two main kinds of checks: 203 /// * Memory checks - The code in canVectorizeMemory checks if vectorization 204 /// will change the order of memory accesses in a way that will change the 205 /// correctness of the program. 206 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory 207 /// checks for a number of different conditions, such as the availability of a 208 /// single induction variable, that all types are supported and vectorize-able, 209 /// etc. This code reflects the capabilities of InnerLoopVectorizer. 210 /// This class is also used by InnerLoopVectorizer for identifying 211 /// induction variable and the different reduction variables. 212 class LoopVectorizationLegality { 213 public: LoopVectorizationLegality(Loop * L,PredicatedScalarEvolution & PSE,DominatorTree * DT,TargetLibraryInfo * TLI,AliasAnalysis * AA,Function * F,std::function<const LoopAccessInfo & (Loop &)> * GetLAA,LoopInfo * LI,OptimizationRemarkEmitter * ORE,LoopVectorizationRequirements * R,LoopVectorizeHints * H,DemandedBits * DB,AssumptionCache * AC)214 LoopVectorizationLegality( 215 Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, 216 TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, 217 std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI, 218 OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, 219 LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC) 220 : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA), 221 ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {} 222 223 /// ReductionList contains the reduction descriptors for all 224 /// of the reductions that were found in the loop. 225 using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>; 226 227 /// InductionList saves induction variables and maps them to the 228 /// induction descriptor. 229 using InductionList = MapVector<PHINode *, InductionDescriptor>; 230 231 /// RecurrenceSet contains the phi nodes that are recurrences other than 232 /// inductions and reductions. 233 using RecurrenceSet = SmallPtrSet<const PHINode *, 8>; 234 235 /// Returns true if it is legal to vectorize this loop. 236 /// This does not mean that it is profitable to vectorize this 237 /// loop, only that it is legal to do so. 238 /// Temporarily taking UseVPlanNativePath parameter. If true, take 239 /// the new code path being implemented for outer loop vectorization 240 /// (should be functional for inner loop vectorization) based on VPlan. 241 /// If false, good old LV code. 242 bool canVectorize(bool UseVPlanNativePath); 243 244 /// Returns the primary induction variable. getPrimaryInduction()245 PHINode *getPrimaryInduction() { return PrimaryInduction; } 246 247 /// Returns the reduction variables found in the loop. getReductionVars()248 ReductionList *getReductionVars() { return &Reductions; } 249 250 /// Returns the induction variables found in the loop. getInductionVars()251 InductionList *getInductionVars() { return &Inductions; } 252 253 /// Return the first-order recurrences found in the loop. getFirstOrderRecurrences()254 RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; } 255 256 /// Return the set of instructions to sink to handle first-order recurrences. getSinkAfter()257 DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; } 258 259 /// Returns the widest induction type. getWidestInductionType()260 Type *getWidestInductionType() { return WidestIndTy; } 261 262 /// Returns True if V is a Phi node of an induction variable in this loop. 263 bool isInductionPhi(const Value *V); 264 265 /// Returns True if V is a cast that is part of an induction def-use chain, 266 /// and had been proven to be redundant under a runtime guard (in other 267 /// words, the cast has the same SCEV expression as the induction phi). 268 bool isCastedInductionVariable(const Value *V); 269 270 /// Returns True if V can be considered as an induction variable in this 271 /// loop. V can be the induction phi, or some redundant cast in the def-use 272 /// chain of the inducion phi. 273 bool isInductionVariable(const Value *V); 274 275 /// Returns True if PN is a reduction variable in this loop. isReductionVariable(PHINode * PN)276 bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); } 277 278 /// Returns True if Phi is a first-order recurrence in this loop. 279 bool isFirstOrderRecurrence(const PHINode *Phi); 280 281 /// Return true if the block BB needs to be predicated in order for the loop 282 /// to be vectorized. 283 bool blockNeedsPredication(BasicBlock *BB); 284 285 /// Check if this pointer is consecutive when vectorizing. This happens 286 /// when the last index of the GEP is the induction variable, or that the 287 /// pointer itself is an induction variable. 288 /// This check allows us to vectorize A[idx] into a wide load/store. 289 /// Returns: 290 /// 0 - Stride is unknown or non-consecutive. 291 /// 1 - Address is consecutive. 292 /// -1 - Address is consecutive, and decreasing. 293 /// NOTE: This method must only be used before modifying the original scalar 294 /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). 295 int isConsecutivePtr(Value *Ptr); 296 297 /// Returns true if the value V is uniform within the loop. 298 bool isUniform(Value *V); 299 300 /// Returns the information that we collected about runtime memory check. getRuntimePointerChecking()301 const RuntimePointerChecking *getRuntimePointerChecking() const { 302 return LAI->getRuntimePointerChecking(); 303 } 304 getLAI()305 const LoopAccessInfo *getLAI() const { return LAI; } 306 getMaxSafeDepDistBytes()307 unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } 308 getMaxSafeRegisterWidth()309 uint64_t getMaxSafeRegisterWidth() const { 310 return LAI->getDepChecker().getMaxSafeRegisterWidth(); 311 } 312 hasStride(Value * V)313 bool hasStride(Value *V) { return LAI->hasStride(V); } 314 315 /// Returns true if vector representation of the instruction \p I 316 /// requires mask. isMaskRequired(const Instruction * I)317 bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); } 318 getNumStores()319 unsigned getNumStores() const { return LAI->getNumStores(); } getNumLoads()320 unsigned getNumLoads() const { return LAI->getNumLoads(); } 321 322 // Returns true if the NoNaN attribute is set on the function. hasFunNoNaNAttr()323 bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } 324 325 private: 326 /// Return true if the pre-header, exiting and latch blocks of \p Lp and all 327 /// its nested loops are considered legal for vectorization. These legal 328 /// checks are common for inner and outer loop vectorization. 329 /// Temporarily taking UseVPlanNativePath parameter. If true, take 330 /// the new code path being implemented for outer loop vectorization 331 /// (should be functional for inner loop vectorization) based on VPlan. 332 /// If false, good old LV code. 333 bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath); 334 335 /// Return true if the pre-header, exiting and latch blocks of \p Lp 336 /// (non-recursive) are considered legal for vectorization. 337 /// Temporarily taking UseVPlanNativePath parameter. If true, take 338 /// the new code path being implemented for outer loop vectorization 339 /// (should be functional for inner loop vectorization) based on VPlan. 340 /// If false, good old LV code. 341 bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath); 342 343 /// Check if a single basic block loop is vectorizable. 344 /// At this point we know that this is a loop with a constant trip count 345 /// and we only need to check individual instructions. 346 bool canVectorizeInstrs(); 347 348 /// When we vectorize loops we may change the order in which 349 /// we read and write from memory. This method checks if it is 350 /// legal to vectorize the code, considering only memory constrains. 351 /// Returns true if the loop is vectorizable 352 bool canVectorizeMemory(); 353 354 /// Return true if we can vectorize this loop using the IF-conversion 355 /// transformation. 356 bool canVectorizeWithIfConvert(); 357 358 /// Return true if we can vectorize this outer loop. The method performs 359 /// specific checks for outer loop vectorization. 360 bool canVectorizeOuterLoop(); 361 362 /// Return true if all of the instructions in the block can be speculatively 363 /// executed. \p SafePtrs is a list of addresses that are known to be legal 364 /// and we know that we can read from them without segfault. 365 bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs); 366 367 /// Updates the vectorization state by adding \p Phi to the inductions list. 368 /// This can set \p Phi as the main induction of the loop if \p Phi is a 369 /// better choice for the main induction than the existing one. 370 void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID, 371 SmallPtrSetImpl<Value *> &AllowedExit); 372 373 /// Create an analysis remark that explains why vectorization failed 374 /// 375 /// \p RemarkName is the identifier for the remark. If \p I is passed it is 376 /// an instruction that prevents vectorization. Otherwise the loop is used 377 /// for the location of the remark. \return the remark object that can be 378 /// streamed to. 379 OptimizationRemarkAnalysis 380 createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const { 381 return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(), 382 RemarkName, TheLoop, I); 383 } 384 385 /// If an access has a symbolic strides, this maps the pointer value to 386 /// the stride symbol. getSymbolicStrides()387 const ValueToValueMap *getSymbolicStrides() { 388 // FIXME: Currently, the set of symbolic strides is sometimes queried before 389 // it's collected. This happens from canVectorizeWithIfConvert, when the 390 // pointer is checked to reference consecutive elements suitable for a 391 // masked access. 392 return LAI ? &LAI->getSymbolicStrides() : nullptr; 393 } 394 395 /// The loop that we evaluate. 396 Loop *TheLoop; 397 398 /// Loop Info analysis. 399 LoopInfo *LI; 400 401 /// A wrapper around ScalarEvolution used to add runtime SCEV checks. 402 /// Applies dynamic knowledge to simplify SCEV expressions in the context 403 /// of existing SCEV assumptions. The analysis will also add a minimal set 404 /// of new predicates if this is required to enable vectorization and 405 /// unrolling. 406 PredicatedScalarEvolution &PSE; 407 408 /// Target Library Info. 409 TargetLibraryInfo *TLI; 410 411 /// Dominator Tree. 412 DominatorTree *DT; 413 414 // LoopAccess analysis. 415 std::function<const LoopAccessInfo &(Loop &)> *GetLAA; 416 417 // And the loop-accesses info corresponding to this loop. This pointer is 418 // null until canVectorizeMemory sets it up. 419 const LoopAccessInfo *LAI = nullptr; 420 421 /// Interface to emit optimization remarks. 422 OptimizationRemarkEmitter *ORE; 423 424 // --- vectorization state --- // 425 426 /// Holds the primary induction variable. This is the counter of the 427 /// loop. 428 PHINode *PrimaryInduction = nullptr; 429 430 /// Holds the reduction variables. 431 ReductionList Reductions; 432 433 /// Holds all of the induction variables that we found in the loop. 434 /// Notice that inductions don't need to start at zero and that induction 435 /// variables can be pointers. 436 InductionList Inductions; 437 438 /// Holds all the casts that participate in the update chain of the induction 439 /// variables, and that have been proven to be redundant (possibly under a 440 /// runtime guard). These casts can be ignored when creating the vectorized 441 /// loop body. 442 SmallPtrSet<Instruction *, 4> InductionCastsToIgnore; 443 444 /// Holds the phi nodes that are first-order recurrences. 445 RecurrenceSet FirstOrderRecurrences; 446 447 /// Holds instructions that need to sink past other instructions to handle 448 /// first-order recurrences. 449 DenseMap<Instruction *, Instruction *> SinkAfter; 450 451 /// Holds the widest induction type encountered. 452 Type *WidestIndTy = nullptr; 453 454 /// Allowed outside users. This holds the induction and reduction 455 /// vars which can be accessed from outside the loop. 456 SmallPtrSet<Value *, 4> AllowedExit; 457 458 /// Can we assume the absence of NaNs. 459 bool HasFunNoNaNAttr = false; 460 461 /// Vectorization requirements that will go through late-evaluation. 462 LoopVectorizationRequirements *Requirements; 463 464 /// Used to emit an analysis of any legality issues. 465 LoopVectorizeHints *Hints; 466 467 /// The demanded bits analsyis is used to compute the minimum type size in 468 /// which a reduction can be computed. 469 DemandedBits *DB; 470 471 /// The assumption cache analysis is used to compute the minimum type size in 472 /// which a reduction can be computed. 473 AssumptionCache *AC; 474 475 /// While vectorizing these instructions we have to generate a 476 /// call to the appropriate masked intrinsic 477 SmallPtrSet<const Instruction *, 8> MaskedOp; 478 }; 479 480 } // namespace llvm 481 482 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H 483