1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file contains the declarations of the Vectorization Plan base classes:
11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12 ///    VPBlockBase, together implementing a Hierarchical CFG;
13 /// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be
14 ///    treated as proper graphs for generic algorithms;
15 /// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained
16 ///    within VPBasicBlocks;
17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18 ///    instruction;
19 /// 5. The VPlan class holding a candidate for vectorization;
20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21 /// These are documented in docs/VectorizationPlan.rst.
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27 
28 #include "VPlanLoopInfo.h"
29 #include "VPlanValue.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DepthFirstIterator.h"
32 #include "llvm/ADT/GraphTraits.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/SmallBitVector.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Twine.h"
39 #include "llvm/ADT/ilist.h"
40 #include "llvm/ADT/ilist_node.h"
41 #include "llvm/Analysis/VectorUtils.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include <algorithm>
44 #include <cassert>
45 #include <cstddef>
46 #include <map>
47 #include <string>
48 
49 namespace llvm {
50 
51 class BasicBlock;
52 class DominatorTree;
53 class InnerLoopVectorizer;
54 class LoopInfo;
55 class raw_ostream;
56 class RecurrenceDescriptor;
57 class Value;
58 class VPBasicBlock;
59 class VPRegionBlock;
60 class VPlan;
61 class VPlanSlp;
62 
63 /// A range of powers-of-2 vectorization factors with fixed start and
64 /// adjustable end. The range includes start and excludes end, e.g.,:
65 /// [1, 9) = {1, 2, 4, 8}
66 struct VFRange {
67   // A power of 2.
68   const ElementCount Start;
69 
70   // Need not be a power of 2. If End <= Start range is empty.
71   ElementCount End;
72 
isEmptyVFRange73   bool isEmpty() const {
74     return End.getKnownMinValue() <= Start.getKnownMinValue();
75   }
76 
VFRangeVFRange77   VFRange(const ElementCount &Start, const ElementCount &End)
78       : Start(Start), End(End) {
79     assert(Start.isScalable() == End.isScalable() &&
80            "Both Start and End should have the same scalable flag");
81     assert(isPowerOf2_32(Start.getKnownMinValue()) &&
82            "Expected Start to be a power of 2");
83   }
84 };
85 
86 using VPlanPtr = std::unique_ptr<VPlan>;
87 
88 /// In what follows, the term "input IR" refers to code that is fed into the
89 /// vectorizer whereas the term "output IR" refers to code that is generated by
90 /// the vectorizer.
91 
92 /// VPIteration represents a single point in the iteration space of the output
93 /// (vectorized and/or unrolled) IR loop.
94 struct VPIteration {
95   /// in [0..UF)
96   unsigned Part;
97 
98   /// in [0..VF)
99   unsigned Lane;
100 };
101 
102 /// This is a helper struct for maintaining vectorization state. It's used for
103 /// mapping values from the original loop to their corresponding values in
104 /// the new loop. Two mappings are maintained: one for vectorized values and
105 /// one for scalarized values. Vectorized values are represented with UF
106 /// vector values in the new loop, and scalarized values are represented with
107 /// UF x VF scalar values in the new loop. UF and VF are the unroll and
108 /// vectorization factors, respectively.
109 ///
110 /// Entries can be added to either map with setVectorValue and setScalarValue,
111 /// which assert that an entry was not already added before. If an entry is to
112 /// replace an existing one, call resetVectorValue and resetScalarValue. This is
113 /// currently needed to modify the mapped values during "fix-up" operations that
114 /// occur once the first phase of widening is complete. These operations include
115 /// type truncation and the second phase of recurrence widening.
116 ///
117 /// Entries from either map can be retrieved using the getVectorValue and
118 /// getScalarValue functions, which assert that the desired value exists.
119 struct VectorizerValueMap {
120   friend struct VPTransformState;
121 
122 private:
123   /// The unroll factor. Each entry in the vector map contains UF vector values.
124   unsigned UF;
125 
126   /// The vectorization factor. Each entry in the scalar map contains UF x VF
127   /// scalar values.
128   ElementCount VF;
129 
130   /// The vector and scalar map storage. We use std::map and not DenseMap
131   /// because insertions to DenseMap invalidate its iterators.
132   using VectorParts = SmallVector<Value *, 2>;
133   using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>;
134   std::map<Value *, VectorParts> VectorMapStorage;
135   std::map<Value *, ScalarParts> ScalarMapStorage;
136 
137 public:
138   /// Construct an empty map with the given unroll and vectorization factors.
VectorizerValueMapVectorizerValueMap139   VectorizerValueMap(unsigned UF, ElementCount VF) : UF(UF), VF(VF) {}
140 
141   /// \return True if the map has any vector entry for \p Key.
hasAnyVectorValueVectorizerValueMap142   bool hasAnyVectorValue(Value *Key) const {
143     return VectorMapStorage.count(Key);
144   }
145 
146   /// \return True if the map has a vector entry for \p Key and \p Part.
hasVectorValueVectorizerValueMap147   bool hasVectorValue(Value *Key, unsigned Part) const {
148     assert(Part < UF && "Queried Vector Part is too large.");
149     if (!hasAnyVectorValue(Key))
150       return false;
151     const VectorParts &Entry = VectorMapStorage.find(Key)->second;
152     assert(Entry.size() == UF && "VectorParts has wrong dimensions.");
153     return Entry[Part] != nullptr;
154   }
155 
156   /// \return True if the map has any scalar entry for \p Key.
hasAnyScalarValueVectorizerValueMap157   bool hasAnyScalarValue(Value *Key) const {
158     return ScalarMapStorage.count(Key);
159   }
160 
161   /// \return True if the map has a scalar entry for \p Key and \p Instance.
hasScalarValueVectorizerValueMap162   bool hasScalarValue(Value *Key, const VPIteration &Instance) const {
163     assert(Instance.Part < UF && "Queried Scalar Part is too large.");
164     assert(Instance.Lane < VF.getKnownMinValue() &&
165            "Queried Scalar Lane is too large.");
166 
167     if (!hasAnyScalarValue(Key))
168       return false;
169     const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
170     assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
171     assert(Entry[Instance.Part].size() == VF.getKnownMinValue() &&
172            "ScalarParts has wrong dimensions.");
173     return Entry[Instance.Part][Instance.Lane] != nullptr;
174   }
175 
176   /// Retrieve the existing vector value that corresponds to \p Key and
177   /// \p Part.
getVectorValueVectorizerValueMap178   Value *getVectorValue(Value *Key, unsigned Part) {
179     assert(hasVectorValue(Key, Part) && "Getting non-existent value.");
180     return VectorMapStorage[Key][Part];
181   }
182 
183   /// Retrieve the existing scalar value that corresponds to \p Key and
184   /// \p Instance.
getScalarValueVectorizerValueMap185   Value *getScalarValue(Value *Key, const VPIteration &Instance) {
186     assert(hasScalarValue(Key, Instance) && "Getting non-existent value.");
187     return ScalarMapStorage[Key][Instance.Part][Instance.Lane];
188   }
189 
190   /// Set a vector value associated with \p Key and \p Part. Assumes such a
191   /// value is not already set. If it is, use resetVectorValue() instead.
setVectorValueVectorizerValueMap192   void setVectorValue(Value *Key, unsigned Part, Value *Vector) {
193     assert(!hasVectorValue(Key, Part) && "Vector value already set for part");
194     if (!VectorMapStorage.count(Key)) {
195       VectorParts Entry(UF);
196       VectorMapStorage[Key] = Entry;
197     }
198     VectorMapStorage[Key][Part] = Vector;
199   }
200 
201   /// Set a scalar value associated with \p Key and \p Instance. Assumes such a
202   /// value is not already set.
setScalarValueVectorizerValueMap203   void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) {
204     assert(!hasScalarValue(Key, Instance) && "Scalar value already set");
205     if (!ScalarMapStorage.count(Key)) {
206       ScalarParts Entry(UF);
207       // TODO: Consider storing uniform values only per-part, as they occupy
208       //       lane 0 only, keeping the other VF-1 redundant entries null.
209       for (unsigned Part = 0; Part < UF; ++Part)
210         Entry[Part].resize(VF.getKnownMinValue(), nullptr);
211       ScalarMapStorage[Key] = Entry;
212     }
213     ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
214   }
215 
216   /// Reset the vector value associated with \p Key for the given \p Part.
217   /// This function can be used to update values that have already been
218   /// vectorized. This is the case for "fix-up" operations including type
219   /// truncation and the second phase of recurrence vectorization.
resetVectorValueVectorizerValueMap220   void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {
221     assert(hasVectorValue(Key, Part) && "Vector value not set for part");
222     VectorMapStorage[Key][Part] = Vector;
223   }
224 
225   /// Reset the scalar value associated with \p Key for \p Part and \p Lane.
226   /// This function can be used to update values that have already been
227   /// scalarized. This is the case for "fix-up" operations including scalar phi
228   /// nodes for scalarized and predicated instructions.
resetScalarValueVectorizerValueMap229   void resetScalarValue(Value *Key, const VPIteration &Instance,
230                         Value *Scalar) {
231     assert(hasScalarValue(Key, Instance) &&
232            "Scalar value not set for part and lane");
233     ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
234   }
235 };
236 
237 /// This class is used to enable the VPlan to invoke a method of ILV. This is
238 /// needed until the method is refactored out of ILV and becomes reusable.
239 struct VPCallback {
~VPCallbackVPCallback240   virtual ~VPCallback() {}
241   virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
242   virtual Value *getOrCreateScalarValue(Value *V,
243                                         const VPIteration &Instance) = 0;
244 };
245 
246 /// VPTransformState holds information passed down when "executing" a VPlan,
247 /// needed for generating the output IR.
248 struct VPTransformState {
VPTransformStateVPTransformState249   VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
250                    DominatorTree *DT, IRBuilder<> &Builder,
251                    VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV,
252                    VPCallback &Callback)
253       : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
254         ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}
255 
256   /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
257   ElementCount VF;
258   unsigned UF;
259 
260   /// Hold the indices to generate specific scalar instructions. Null indicates
261   /// that all instances are to be generated, using either scalar or vector
262   /// instructions.
263   Optional<VPIteration> Instance;
264 
265   struct DataState {
266     /// A type for vectorized values in the new loop. Each value from the
267     /// original loop, when vectorized, is represented by UF vector values in
268     /// the new unrolled loop, where UF is the unroll factor.
269     typedef SmallVector<Value *, 2> PerPartValuesTy;
270 
271     DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
272   } Data;
273 
274   /// Get the generated Value for a given VPValue and a given Part. Note that
275   /// as some Defs are still created by ILV and managed in its ValueMap, this
276   /// method will delegate the call to ILV in such cases in order to provide
277   /// callers a consistent API.
278   /// \see set.
getVPTransformState279   Value *get(VPValue *Def, unsigned Part) {
280     // If Values have been set for this Def return the one relevant for \p Part.
281     if (Data.PerPartOutput.count(Def))
282       return Data.PerPartOutput[Def][Part];
283     // Def is managed by ILV: bring the Values from ValueMap.
284     return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
285   }
286 
287   /// Get the generated Value for a given VPValue and given Part and Lane.
getVPTransformState288   Value *get(VPValue *Def, const VPIteration &Instance) {
289     // If the Def is managed directly by VPTransformState, extract the lane from
290     // the relevant part. Note that currently only VPInstructions and external
291     // defs are managed by VPTransformState. Other Defs are still created by ILV
292     // and managed in its ValueMap. For those this method currently just
293     // delegates the call to ILV below.
294     if (Data.PerPartOutput.count(Def)) {
295       auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
296       if (!VecPart->getType()->isVectorTy()) {
297         assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar");
298         return VecPart;
299       }
300       // TODO: Cache created scalar values.
301       return Builder.CreateExtractElement(VecPart,
302                                           Builder.getInt32(Instance.Lane));
303     }
304 
305     return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance);
306   }
307 
308   /// Set the generated Value for a given VPValue and a given Part.
setVPTransformState309   void set(VPValue *Def, Value *V, unsigned Part) {
310     if (!Data.PerPartOutput.count(Def)) {
311       DataState::PerPartValuesTy Entry(UF);
312       Data.PerPartOutput[Def] = Entry;
313     }
314     Data.PerPartOutput[Def][Part] = V;
315   }
316   void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part);
317 
318   /// Hold state information used when constructing the CFG of the output IR,
319   /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
320   struct CFGState {
321     /// The previous VPBasicBlock visited. Initially set to null.
322     VPBasicBlock *PrevVPBB = nullptr;
323 
324     /// The previous IR BasicBlock created or used. Initially set to the new
325     /// header BasicBlock.
326     BasicBlock *PrevBB = nullptr;
327 
328     /// The last IR BasicBlock in the output IR. Set to the new latch
329     /// BasicBlock, used for placing the newly created BasicBlocks.
330     BasicBlock *LastBB = nullptr;
331 
332     /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
333     /// of replication, maps the BasicBlock of the last replica created.
334     SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
335 
336     /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed
337     /// up at the end of vector code generation.
338     SmallVector<VPBasicBlock *, 8> VPBBsToFix;
339 
340     CFGState() = default;
341   } CFG;
342 
343   /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
344   LoopInfo *LI;
345 
346   /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
347   DominatorTree *DT;
348 
349   /// Hold a reference to the IRBuilder used to generate output IR code.
350   IRBuilder<> &Builder;
351 
352   /// Hold a reference to the Value state information used when generating the
353   /// Values of the output IR.
354   VectorizerValueMap &ValueMap;
355 
356   /// Hold a reference to a mapping between VPValues in VPlan and original
357   /// Values they correspond to.
358   VPValue2ValueTy VPValue2Value;
359 
360   /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
361   Value *CanonicalIV = nullptr;
362 
363   /// Hold the trip count of the scalar loop.
364   Value *TripCount = nullptr;
365 
366   /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
367   InnerLoopVectorizer *ILV;
368 
369   VPCallback &Callback;
370 };
371 
372 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
373 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
374 class VPBlockBase {
375   friend class VPBlockUtils;
376 
377   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
378 
379   /// An optional name for the block.
380   std::string Name;
381 
382   /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
383   /// it is a topmost VPBlockBase.
384   VPRegionBlock *Parent = nullptr;
385 
386   /// List of predecessor blocks.
387   SmallVector<VPBlockBase *, 1> Predecessors;
388 
389   /// List of successor blocks.
390   SmallVector<VPBlockBase *, 1> Successors;
391 
392   /// Successor selector, null for zero or single successor blocks.
393   VPValue *CondBit = nullptr;
394 
395   /// Current block predicate - null if the block does not need a predicate.
396   VPValue *Predicate = nullptr;
397 
398   /// VPlan containing the block. Can only be set on the entry block of the
399   /// plan.
400   VPlan *Plan = nullptr;
401 
402   /// Add \p Successor as the last successor to this block.
appendSuccessor(VPBlockBase * Successor)403   void appendSuccessor(VPBlockBase *Successor) {
404     assert(Successor && "Cannot add nullptr successor!");
405     Successors.push_back(Successor);
406   }
407 
408   /// Add \p Predecessor as the last predecessor to this block.
appendPredecessor(VPBlockBase * Predecessor)409   void appendPredecessor(VPBlockBase *Predecessor) {
410     assert(Predecessor && "Cannot add nullptr predecessor!");
411     Predecessors.push_back(Predecessor);
412   }
413 
414   /// Remove \p Predecessor from the predecessors of this block.
removePredecessor(VPBlockBase * Predecessor)415   void removePredecessor(VPBlockBase *Predecessor) {
416     auto Pos = std::find(Predecessors.begin(), Predecessors.end(), Predecessor);
417     assert(Pos && "Predecessor does not exist");
418     Predecessors.erase(Pos);
419   }
420 
421   /// Remove \p Successor from the successors of this block.
removeSuccessor(VPBlockBase * Successor)422   void removeSuccessor(VPBlockBase *Successor) {
423     auto Pos = std::find(Successors.begin(), Successors.end(), Successor);
424     assert(Pos && "Successor does not exist");
425     Successors.erase(Pos);
426   }
427 
428 protected:
VPBlockBase(const unsigned char SC,const std::string & N)429   VPBlockBase(const unsigned char SC, const std::string &N)
430       : SubclassID(SC), Name(N) {}
431 
432 public:
433   /// An enumeration for keeping track of the concrete subclass of VPBlockBase
434   /// that are actually instantiated. Values of this enumeration are kept in the
435   /// SubclassID field of the VPBlockBase objects. They are used for concrete
436   /// type identification.
437   using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
438 
439   using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
440 
441   virtual ~VPBlockBase() = default;
442 
getName()443   const std::string &getName() const { return Name; }
444 
setName(const Twine & newName)445   void setName(const Twine &newName) { Name = newName.str(); }
446 
447   /// \return an ID for the concrete type of this object.
448   /// This is used to implement the classof checks. This should not be used
449   /// for any other purpose, as the values may change as LLVM evolves.
getVPBlockID()450   unsigned getVPBlockID() const { return SubclassID; }
451 
getParent()452   VPRegionBlock *getParent() { return Parent; }
getParent()453   const VPRegionBlock *getParent() const { return Parent; }
454 
455   /// \return A pointer to the plan containing the current block.
456   VPlan *getPlan();
457   const VPlan *getPlan() const;
458 
459   /// Sets the pointer of the plan containing the block. The block must be the
460   /// entry block into the VPlan.
461   void setPlan(VPlan *ParentPlan);
462 
setParent(VPRegionBlock * P)463   void setParent(VPRegionBlock *P) { Parent = P; }
464 
465   /// \return the VPBasicBlock that is the entry of this VPBlockBase,
466   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
467   /// VPBlockBase is a VPBasicBlock, it is returned.
468   const VPBasicBlock *getEntryBasicBlock() const;
469   VPBasicBlock *getEntryBasicBlock();
470 
471   /// \return the VPBasicBlock that is the exit of this VPBlockBase,
472   /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
473   /// VPBlockBase is a VPBasicBlock, it is returned.
474   const VPBasicBlock *getExitBasicBlock() const;
475   VPBasicBlock *getExitBasicBlock();
476 
getSuccessors()477   const VPBlocksTy &getSuccessors() const { return Successors; }
getSuccessors()478   VPBlocksTy &getSuccessors() { return Successors; }
479 
getPredecessors()480   const VPBlocksTy &getPredecessors() const { return Predecessors; }
getPredecessors()481   VPBlocksTy &getPredecessors() { return Predecessors; }
482 
483   /// \return the successor of this VPBlockBase if it has a single successor.
484   /// Otherwise return a null pointer.
getSingleSuccessor()485   VPBlockBase *getSingleSuccessor() const {
486     return (Successors.size() == 1 ? *Successors.begin() : nullptr);
487   }
488 
489   /// \return the predecessor of this VPBlockBase if it has a single
490   /// predecessor. Otherwise return a null pointer.
getSinglePredecessor()491   VPBlockBase *getSinglePredecessor() const {
492     return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
493   }
494 
getNumSuccessors()495   size_t getNumSuccessors() const { return Successors.size(); }
getNumPredecessors()496   size_t getNumPredecessors() const { return Predecessors.size(); }
497 
498   /// An Enclosing Block of a block B is any block containing B, including B
499   /// itself. \return the closest enclosing block starting from "this", which
500   /// has successors. \return the root enclosing block if all enclosing blocks
501   /// have no successors.
502   VPBlockBase *getEnclosingBlockWithSuccessors();
503 
504   /// \return the closest enclosing block starting from "this", which has
505   /// predecessors. \return the root enclosing block if all enclosing blocks
506   /// have no predecessors.
507   VPBlockBase *getEnclosingBlockWithPredecessors();
508 
509   /// \return the successors either attached directly to this VPBlockBase or, if
510   /// this VPBlockBase is the exit block of a VPRegionBlock and has no
511   /// successors of its own, search recursively for the first enclosing
512   /// VPRegionBlock that has successors and return them. If no such
513   /// VPRegionBlock exists, return the (empty) successors of the topmost
514   /// VPBlockBase reached.
getHierarchicalSuccessors()515   const VPBlocksTy &getHierarchicalSuccessors() {
516     return getEnclosingBlockWithSuccessors()->getSuccessors();
517   }
518 
519   /// \return the hierarchical successor of this VPBlockBase if it has a single
520   /// hierarchical successor. Otherwise return a null pointer.
getSingleHierarchicalSuccessor()521   VPBlockBase *getSingleHierarchicalSuccessor() {
522     return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
523   }
524 
525   /// \return the predecessors either attached directly to this VPBlockBase or,
526   /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
527   /// predecessors of its own, search recursively for the first enclosing
528   /// VPRegionBlock that has predecessors and return them. If no such
529   /// VPRegionBlock exists, return the (empty) predecessors of the topmost
530   /// VPBlockBase reached.
getHierarchicalPredecessors()531   const VPBlocksTy &getHierarchicalPredecessors() {
532     return getEnclosingBlockWithPredecessors()->getPredecessors();
533   }
534 
535   /// \return the hierarchical predecessor of this VPBlockBase if it has a
536   /// single hierarchical predecessor. Otherwise return a null pointer.
getSingleHierarchicalPredecessor()537   VPBlockBase *getSingleHierarchicalPredecessor() {
538     return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
539   }
540 
541   /// \return the condition bit selecting the successor.
getCondBit()542   VPValue *getCondBit() { return CondBit; }
543 
getCondBit()544   const VPValue *getCondBit() const { return CondBit; }
545 
setCondBit(VPValue * CV)546   void setCondBit(VPValue *CV) { CondBit = CV; }
547 
getPredicate()548   VPValue *getPredicate() { return Predicate; }
549 
getPredicate()550   const VPValue *getPredicate() const { return Predicate; }
551 
setPredicate(VPValue * Pred)552   void setPredicate(VPValue *Pred) { Predicate = Pred; }
553 
554   /// Set a given VPBlockBase \p Successor as the single successor of this
555   /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
556   /// This VPBlockBase must have no successors.
setOneSuccessor(VPBlockBase * Successor)557   void setOneSuccessor(VPBlockBase *Successor) {
558     assert(Successors.empty() && "Setting one successor when others exist.");
559     appendSuccessor(Successor);
560   }
561 
562   /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
563   /// successors of this VPBlockBase. \p Condition is set as the successor
564   /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p
565   /// IfFalse. This VPBlockBase must have no successors.
setTwoSuccessors(VPBlockBase * IfTrue,VPBlockBase * IfFalse,VPValue * Condition)566   void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
567                         VPValue *Condition) {
568     assert(Successors.empty() && "Setting two successors when others exist.");
569     assert(Condition && "Setting two successors without condition!");
570     CondBit = Condition;
571     appendSuccessor(IfTrue);
572     appendSuccessor(IfFalse);
573   }
574 
575   /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
576   /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
577   /// as successor of any VPBasicBlock in \p NewPreds.
setPredecessors(ArrayRef<VPBlockBase * > NewPreds)578   void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
579     assert(Predecessors.empty() && "Block predecessors already set.");
580     for (auto *Pred : NewPreds)
581       appendPredecessor(Pred);
582   }
583 
584   /// Remove all the predecessor of this block.
clearPredecessors()585   void clearPredecessors() { Predecessors.clear(); }
586 
587   /// Remove all the successors of this block and set to null its condition bit
clearSuccessors()588   void clearSuccessors() {
589     Successors.clear();
590     CondBit = nullptr;
591   }
592 
593   /// The method which generates the output IR that correspond to this
594   /// VPBlockBase, thereby "executing" the VPlan.
595   virtual void execute(struct VPTransformState *State) = 0;
596 
597   /// Delete all blocks reachable from a given VPBlockBase, inclusive.
598   static void deleteCFG(VPBlockBase *Entry);
599 
printAsOperand(raw_ostream & OS,bool PrintType)600   void printAsOperand(raw_ostream &OS, bool PrintType) const {
601     OS << getName();
602   }
603 
print(raw_ostream & OS)604   void print(raw_ostream &OS) const {
605     // TODO: Only printing VPBB name for now since we only have dot printing
606     // support for VPInstructions/Recipes.
607     printAsOperand(OS, false);
608   }
609 
610   /// Return true if it is legal to hoist instructions into this block.
isLegalToHoistInto()611   bool isLegalToHoistInto() {
612     // There are currently no constraints that prevent an instruction to be
613     // hoisted into a VPBlockBase.
614     return true;
615   }
616 
617   /// Replace all operands of VPUsers in the block with \p NewValue and also
618   /// replaces all uses of VPValues defined in the block with NewValue.
619   virtual void dropAllReferences(VPValue *NewValue) = 0;
620 };
621 
622 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
623 /// instructions.
624 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
625   friend VPBasicBlock;
626   friend class VPBlockUtils;
627 
628   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
629 
630   /// Each VPRecipe belongs to a single VPBasicBlock.
631   VPBasicBlock *Parent = nullptr;
632 
633 public:
634   /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
635   /// that is actually instantiated. Values of this enumeration are kept in the
636   /// SubclassID field of the VPRecipeBase objects. They are used for concrete
637   /// type identification.
638   using VPRecipeTy = enum {
639     VPBlendSC,
640     VPBranchOnMaskSC,
641     VPInstructionSC,
642     VPInterleaveSC,
643     VPPredInstPHISC,
644     VPReductionSC,
645     VPReplicateSC,
646     VPWidenCallSC,
647     VPWidenCanonicalIVSC,
648     VPWidenGEPSC,
649     VPWidenIntOrFpInductionSC,
650     VPWidenMemoryInstructionSC,
651     VPWidenPHISC,
652     VPWidenSC,
653     VPWidenSelectSC
654   };
655 
VPRecipeBase(const unsigned char SC)656   VPRecipeBase(const unsigned char SC) : SubclassID(SC) {}
657   virtual ~VPRecipeBase() = default;
658 
659   /// \return an ID for the concrete type of this object.
660   /// This is used to implement the classof checks. This should not be used
661   /// for any other purpose, as the values may change as LLVM evolves.
getVPRecipeID()662   unsigned getVPRecipeID() const { return SubclassID; }
663 
664   /// \return the VPBasicBlock which this VPRecipe belongs to.
getParent()665   VPBasicBlock *getParent() { return Parent; }
getParent()666   const VPBasicBlock *getParent() const { return Parent; }
667 
668   /// The method which generates the output IR instructions that correspond to
669   /// this VPRecipe, thereby "executing" the VPlan.
670   virtual void execute(struct VPTransformState &State) = 0;
671 
672   /// Each recipe prints itself.
673   virtual void print(raw_ostream &O, const Twine &Indent,
674                      VPSlotTracker &SlotTracker) const = 0;
675 
676   /// Dump the recipe to stderr (for debugging).
677   void dump() const;
678 
679   /// Insert an unlinked recipe into a basic block immediately before
680   /// the specified recipe.
681   void insertBefore(VPRecipeBase *InsertPos);
682 
683   /// Insert an unlinked Recipe into a basic block immediately after
684   /// the specified Recipe.
685   void insertAfter(VPRecipeBase *InsertPos);
686 
687   /// Unlink this recipe from its current VPBasicBlock and insert it into
688   /// the VPBasicBlock that MovePos lives in, right after MovePos.
689   void moveAfter(VPRecipeBase *MovePos);
690 
691   /// This method unlinks 'this' from the containing basic block, but does not
692   /// delete it.
693   void removeFromParent();
694 
695   /// This method unlinks 'this' from the containing basic block and deletes it.
696   ///
697   /// \returns an iterator pointing to the element after the erased one
698   iplist<VPRecipeBase>::iterator eraseFromParent();
699 
700   /// Returns a pointer to a VPUser, if the recipe inherits from VPUser or
701   /// nullptr otherwise.
702   VPUser *toVPUser();
703 
704   /// Returns a pointer to a VPValue, if the recipe inherits from VPValue or
705   /// nullptr otherwise.
706   VPValue *toVPValue();
707   const VPValue *toVPValue() const;
708 
709   /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
710   /// otherwise.
getUnderlyingInstr()711   Instruction *getUnderlyingInstr() {
712     if (auto *VPV = toVPValue())
713       return cast_or_null<Instruction>(VPV->getUnderlyingValue());
714     return nullptr;
715   }
getUnderlyingInstr()716   const Instruction *getUnderlyingInstr() const {
717     if (auto *VPV = toVPValue())
718       return cast_or_null<Instruction>(VPV->getUnderlyingValue());
719     return nullptr;
720   }
721 };
722 
classof(const VPRecipeBase * Recipe)723 inline bool VPUser::classof(const VPRecipeBase *Recipe) {
724   return Recipe->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
725          Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
726          Recipe->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
727          Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC ||
728          Recipe->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
729          Recipe->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
730          Recipe->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
731          Recipe->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
732          Recipe->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
733          Recipe->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC ||
734          Recipe->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
735 }
736 
737 /// This is a concrete Recipe that models a single VPlan-level instruction.
738 /// While as any Recipe it may generate a sequence of IR instructions when
739 /// executed, these instructions would always form a single-def expression as
740 /// the VPInstruction is also a single def-use vertex.
741 class VPInstruction : public VPUser, public VPValue, public VPRecipeBase {
742   friend class VPlanSlp;
743 
744 public:
745   /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
746   enum {
747     Not = Instruction::OtherOpsEnd + 1,
748     ICmpULE,
749     SLPLoad,
750     SLPStore,
751     ActiveLaneMask,
752   };
753 
754 private:
755   typedef unsigned char OpcodeTy;
756   OpcodeTy Opcode;
757 
758   /// Utility method serving execute(): generates a single instance of the
759   /// modeled instruction.
760   void generateInstruction(VPTransformState &State, unsigned Part);
761 
762 protected:
setUnderlyingInstr(Instruction * I)763   void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
764 
765 public:
VPInstruction(unsigned Opcode,ArrayRef<VPValue * > Operands)766   VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
767       : VPUser(Operands), VPValue(VPValue::VPVInstructionSC),
768         VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {}
769 
VPInstruction(unsigned Opcode,std::initializer_list<VPValue * > Operands)770   VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
771       : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {}
772 
773   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPValue * V)774   static inline bool classof(const VPValue *V) {
775     return V->getVPValueID() == VPValue::VPVInstructionSC;
776   }
777 
clone()778   VPInstruction *clone() const {
779     SmallVector<VPValue *, 2> Operands(operands());
780     return new VPInstruction(Opcode, Operands);
781   }
782 
783   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * R)784   static inline bool classof(const VPRecipeBase *R) {
785     return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
786   }
787 
getOpcode()788   unsigned getOpcode() const { return Opcode; }
789 
790   /// Generate the instruction.
791   /// TODO: We currently execute only per-part unless a specific instance is
792   /// provided.
793   void execute(VPTransformState &State) override;
794 
795   /// Print the Recipe.
796   void print(raw_ostream &O, const Twine &Indent,
797              VPSlotTracker &SlotTracker) const override;
798 
799   /// Print the VPInstruction.
800   void print(raw_ostream &O) const;
801   void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
802 
803   /// Return true if this instruction may modify memory.
mayWriteToMemory()804   bool mayWriteToMemory() const {
805     // TODO: we can use attributes of the called function to rule out memory
806     //       modifications.
807     return Opcode == Instruction::Store || Opcode == Instruction::Call ||
808            Opcode == Instruction::Invoke || Opcode == SLPStore;
809   }
810 
hasResult()811   bool hasResult() const {
812     // CallInst may or may not have a result, depending on the called function.
813     // Conservatively return calls have results for now.
814     switch (getOpcode()) {
815     case Instruction::Ret:
816     case Instruction::Br:
817     case Instruction::Store:
818     case Instruction::Switch:
819     case Instruction::IndirectBr:
820     case Instruction::Resume:
821     case Instruction::CatchRet:
822     case Instruction::Unreachable:
823     case Instruction::Fence:
824     case Instruction::AtomicRMW:
825       return false;
826     default:
827       return true;
828     }
829   }
830 };
831 
832 /// VPWidenRecipe is a recipe for producing a copy of vector type its
833 /// ingredient. This recipe covers most of the traditional vectorization cases
834 /// where each ingredient transforms into a vectorized version of itself.
835 class VPWidenRecipe : public VPRecipeBase, public VPValue, public VPUser {
836 public:
837   template <typename IterT>
VPWidenRecipe(Instruction & I,iterator_range<IterT> Operands)838   VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
839       : VPRecipeBase(VPRecipeBase::VPWidenSC), VPValue(VPValue::VPVWidenSC, &I),
840         VPUser(Operands) {}
841 
842   ~VPWidenRecipe() override = default;
843 
844   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)845   static inline bool classof(const VPRecipeBase *V) {
846     return V->getVPRecipeID() == VPRecipeBase::VPWidenSC;
847   }
classof(const VPValue * V)848   static inline bool classof(const VPValue *V) {
849     return V->getVPValueID() == VPValue::VPVWidenSC;
850   }
851 
852   /// Produce widened copies of all Ingredients.
853   void execute(VPTransformState &State) override;
854 
855   /// Print the recipe.
856   void print(raw_ostream &O, const Twine &Indent,
857              VPSlotTracker &SlotTracker) const override;
858 };
859 
860 /// A recipe for widening Call instructions.
861 class VPWidenCallRecipe : public VPRecipeBase, public VPValue, public VPUser {
862 
863 public:
864   template <typename IterT>
VPWidenCallRecipe(CallInst & I,iterator_range<IterT> CallArguments)865   VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments)
866       : VPRecipeBase(VPRecipeBase::VPWidenCallSC),
867         VPValue(VPValue::VPVWidenCallSC, &I), VPUser(CallArguments) {}
868 
869   ~VPWidenCallRecipe() override = default;
870 
871   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)872   static inline bool classof(const VPRecipeBase *V) {
873     return V->getVPRecipeID() == VPRecipeBase::VPWidenCallSC;
874   }
875 
876   /// Produce a widened version of the call instruction.
877   void execute(VPTransformState &State) override;
878 
879   /// Print the recipe.
880   void print(raw_ostream &O, const Twine &Indent,
881              VPSlotTracker &SlotTracker) const override;
882 };
883 
884 /// A recipe for widening select instructions.
885 class VPWidenSelectRecipe : public VPRecipeBase, public VPValue, public VPUser {
886 
887   /// Is the condition of the select loop invariant?
888   bool InvariantCond;
889 
890 public:
891   template <typename IterT>
VPWidenSelectRecipe(SelectInst & I,iterator_range<IterT> Operands,bool InvariantCond)892   VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands,
893                       bool InvariantCond)
894       : VPRecipeBase(VPRecipeBase::VPWidenSelectSC),
895         VPValue(VPValue::VPVWidenSelectSC, &I), VPUser(Operands),
896         InvariantCond(InvariantCond) {}
897 
898   ~VPWidenSelectRecipe() override = default;
899 
900   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)901   static inline bool classof(const VPRecipeBase *V) {
902     return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC;
903   }
904 
905   /// Produce a widened version of the select instruction.
906   void execute(VPTransformState &State) override;
907 
908   /// Print the recipe.
909   void print(raw_ostream &O, const Twine &Indent,
910              VPSlotTracker &SlotTracker) const override;
911 };
912 
913 /// A recipe for handling GEP instructions.
914 class VPWidenGEPRecipe : public VPRecipeBase, public VPValue, public VPUser {
915   bool IsPtrLoopInvariant;
916   SmallBitVector IsIndexLoopInvariant;
917 
918 public:
919   template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst * GEP,iterator_range<IterT> Operands)920   VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
921       : VPRecipeBase(VPRecipeBase::VPWidenGEPSC), VPValue(VPWidenGEPSC, GEP),
922         VPUser(Operands), IsIndexLoopInvariant(GEP->getNumIndices(), false) {}
923 
924   template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst * GEP,iterator_range<IterT> Operands,Loop * OrigLoop)925   VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands,
926                    Loop *OrigLoop)
927       : VPRecipeBase(VPRecipeBase::VPWidenGEPSC),
928         VPValue(VPValue::VPVWidenGEPSC, GEP), VPUser(Operands),
929         IsIndexLoopInvariant(GEP->getNumIndices(), false) {
930     IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
931     for (auto Index : enumerate(GEP->indices()))
932       IsIndexLoopInvariant[Index.index()] =
933           OrigLoop->isLoopInvariant(Index.value().get());
934   }
935   ~VPWidenGEPRecipe() override = default;
936 
937   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)938   static inline bool classof(const VPRecipeBase *V) {
939     return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC;
940   }
941 
942   /// Generate the gep nodes.
943   void execute(VPTransformState &State) override;
944 
945   /// Print the recipe.
946   void print(raw_ostream &O, const Twine &Indent,
947              VPSlotTracker &SlotTracker) const override;
948 };
949 
950 /// A recipe for handling phi nodes of integer and floating-point inductions,
951 /// producing their vector and scalar values.
952 class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
953   PHINode *IV;
954   TruncInst *Trunc;
955 
956 public:
957   VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr)
VPRecipeBase(VPWidenIntOrFpInductionSC)958       : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {}
959   ~VPWidenIntOrFpInductionRecipe() override = default;
960 
961   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)962   static inline bool classof(const VPRecipeBase *V) {
963     return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC;
964   }
965 
966   /// Generate the vectorized and scalarized versions of the phi node as
967   /// needed by their users.
968   void execute(VPTransformState &State) override;
969 
970   /// Print the recipe.
971   void print(raw_ostream &O, const Twine &Indent,
972              VPSlotTracker &SlotTracker) const override;
973 };
974 
975 /// A recipe for handling all phi nodes except for integer and FP inductions.
976 class VPWidenPHIRecipe : public VPRecipeBase {
977   PHINode *Phi;
978 
979 public:
VPWidenPHIRecipe(PHINode * Phi)980   VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {}
981   ~VPWidenPHIRecipe() override = default;
982 
983   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)984   static inline bool classof(const VPRecipeBase *V) {
985     return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC;
986   }
987 
988   /// Generate the phi/select nodes.
989   void execute(VPTransformState &State) override;
990 
991   /// Print the recipe.
992   void print(raw_ostream &O, const Twine &Indent,
993              VPSlotTracker &SlotTracker) const override;
994 };
995 
996 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
997 /// instructions.
998 class VPBlendRecipe : public VPRecipeBase, public VPUser {
999   PHINode *Phi;
1000 
1001 public:
1002   /// The blend operation is a User of the incoming values and of their
1003   /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1004   /// might be incoming with a full mask for which there is no VPValue.
VPBlendRecipe(PHINode * Phi,ArrayRef<VPValue * > Operands)1005   VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
1006       : VPRecipeBase(VPBlendSC), VPUser(Operands), Phi(Phi) {
1007     assert(Operands.size() > 0 &&
1008            ((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
1009            "Expected either a single incoming value or a positive even number "
1010            "of operands");
1011   }
1012 
1013   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1014   static inline bool classof(const VPRecipeBase *V) {
1015     return V->getVPRecipeID() == VPRecipeBase::VPBlendSC;
1016   }
1017 
1018   /// Return the number of incoming values, taking into account that a single
1019   /// incoming value has no mask.
getNumIncomingValues()1020   unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1021 
1022   /// Return incoming value number \p Idx.
getIncomingValue(unsigned Idx)1023   VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); }
1024 
1025   /// Return mask number \p Idx.
getMask(unsigned Idx)1026   VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); }
1027 
1028   /// Generate the phi/select nodes.
1029   void execute(VPTransformState &State) override;
1030 
1031   /// Print the recipe.
1032   void print(raw_ostream &O, const Twine &Indent,
1033              VPSlotTracker &SlotTracker) const override;
1034 };
1035 
1036 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1037 /// or stores into one wide load/store and shuffles. The first operand of a
1038 /// VPInterleave recipe is the address, followed by the stored values, followed
1039 /// by an optional mask.
1040 class VPInterleaveRecipe : public VPRecipeBase, public VPUser {
1041   const InterleaveGroup<Instruction> *IG;
1042 
1043   bool HasMask = false;
1044 
1045 public:
VPInterleaveRecipe(const InterleaveGroup<Instruction> * IG,VPValue * Addr,ArrayRef<VPValue * > StoredValues,VPValue * Mask)1046   VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
1047                      ArrayRef<VPValue *> StoredValues, VPValue *Mask)
1048       : VPRecipeBase(VPInterleaveSC), VPUser({Addr}), IG(IG) {
1049     for (auto *SV : StoredValues)
1050       addOperand(SV);
1051     if (Mask) {
1052       HasMask = true;
1053       addOperand(Mask);
1054     }
1055   }
1056   ~VPInterleaveRecipe() override = default;
1057 
1058   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1059   static inline bool classof(const VPRecipeBase *V) {
1060     return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC;
1061   }
1062 
1063   /// Return the address accessed by this recipe.
getAddr()1064   VPValue *getAddr() const {
1065     return getOperand(0); // Address is the 1st, mandatory operand.
1066   }
1067 
1068   /// Return the mask used by this recipe. Note that a full mask is represented
1069   /// by a nullptr.
getMask()1070   VPValue *getMask() const {
1071     // Mask is optional and therefore the last, currently 2nd operand.
1072     return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
1073   }
1074 
1075   /// Return the VPValues stored by this interleave group. If it is a load
1076   /// interleave group, return an empty ArrayRef.
getStoredValues()1077   ArrayRef<VPValue *> getStoredValues() const {
1078     // The first operand is the address, followed by the stored values, followed
1079     // by an optional mask.
1080     return ArrayRef<VPValue *>(op_begin(), getNumOperands())
1081         .slice(1, getNumOperands() - (HasMask ? 2 : 1));
1082   }
1083 
1084   /// Generate the wide load or store, and shuffles.
1085   void execute(VPTransformState &State) override;
1086 
1087   /// Print the recipe.
1088   void print(raw_ostream &O, const Twine &Indent,
1089              VPSlotTracker &SlotTracker) const override;
1090 
getInterleaveGroup()1091   const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
1092 };
1093 
1094 /// A recipe to represent inloop reduction operations, performing a reduction on
1095 /// a vector operand into a scalar value, and adding the result to a chain.
1096 /// The Operands are {ChainOp, VecOp, [Condition]}.
1097 class VPReductionRecipe : public VPRecipeBase, public VPValue, public VPUser {
1098   /// The recurrence decriptor for the reduction in question.
1099   RecurrenceDescriptor *RdxDesc;
1100   /// Fast math flags to use for the resulting reduction operation.
1101   bool NoNaN;
1102   /// Pointer to the TTI, needed to create the target reduction
1103   const TargetTransformInfo *TTI;
1104 
1105 public:
VPReductionRecipe(RecurrenceDescriptor * R,Instruction * I,VPValue * ChainOp,VPValue * VecOp,VPValue * CondOp,bool NoNaN,const TargetTransformInfo * TTI)1106   VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp,
1107                     VPValue *VecOp, VPValue *CondOp, bool NoNaN,
1108                     const TargetTransformInfo *TTI)
1109       : VPRecipeBase(VPRecipeBase::VPReductionSC),
1110         VPValue(VPValue::VPVReductionSC, I), VPUser({ChainOp, VecOp}),
1111         RdxDesc(R), NoNaN(NoNaN), TTI(TTI) {
1112     if (CondOp)
1113       addOperand(CondOp);
1114   }
1115 
1116   ~VPReductionRecipe() override = default;
1117 
1118   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPValue * V)1119   static inline bool classof(const VPValue *V) {
1120     return V->getVPValueID() == VPValue::VPVReductionSC;
1121   }
classof(const VPRecipeBase * V)1122   static inline bool classof(const VPRecipeBase *V) {
1123     return V->getVPRecipeID() == VPRecipeBase::VPReductionSC;
1124   }
1125 
1126   /// Generate the reduction in the loop
1127   void execute(VPTransformState &State) override;
1128 
1129   /// Print the recipe.
1130   void print(raw_ostream &O, const Twine &Indent,
1131              VPSlotTracker &SlotTracker) const override;
1132 
1133   /// The VPValue of the scalar Chain being accumulated.
getChainOp()1134   VPValue *getChainOp() const { return getOperand(0); }
1135   /// The VPValue of the vector value to be reduced.
getVecOp()1136   VPValue *getVecOp() const { return getOperand(1); }
1137   /// The VPValue of the condition for the block.
getCondOp()1138   VPValue *getCondOp() const {
1139     return getNumOperands() > 2 ? getOperand(2) : nullptr;
1140   }
1141 };
1142 
1143 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
1144 /// copies of the original scalar type, one per lane, instead of producing a
1145 /// single copy of widened type for all lanes. If the instruction is known to be
1146 /// uniform only one copy, per lane zero, will be generated.
1147 class VPReplicateRecipe : public VPRecipeBase, public VPUser, public VPValue {
1148   /// Indicator if only a single replica per lane is needed.
1149   bool IsUniform;
1150 
1151   /// Indicator if the replicas are also predicated.
1152   bool IsPredicated;
1153 
1154   /// Indicator if the scalar values should also be packed into a vector.
1155   bool AlsoPack;
1156 
1157 public:
1158   template <typename IterT>
1159   VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
1160                     bool IsUniform, bool IsPredicated = false)
VPRecipeBase(VPReplicateSC)1161       : VPRecipeBase(VPReplicateSC), VPUser(Operands),
1162         VPValue(VPVReplicateSC, I), IsUniform(IsUniform),
1163         IsPredicated(IsPredicated) {
1164     // Retain the previous behavior of predicateInstructions(), where an
1165     // insert-element of a predicated instruction got hoisted into the
1166     // predicated basic block iff it was its only user. This is achieved by
1167     // having predicated instructions also pack their values into a vector by
1168     // default unless they have a replicated user which uses their scalar value.
1169     AlsoPack = IsPredicated && !I->use_empty();
1170   }
1171 
1172   ~VPReplicateRecipe() override = default;
1173 
1174   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1175   static inline bool classof(const VPRecipeBase *V) {
1176     return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC;
1177   }
1178 
classof(const VPValue * V)1179   static inline bool classof(const VPValue *V) {
1180     return V->getVPValueID() == VPValue::VPVReplicateSC;
1181   }
1182 
1183   /// Generate replicas of the desired Ingredient. Replicas will be generated
1184   /// for all parts and lanes unless a specific part and lane are specified in
1185   /// the \p State.
1186   void execute(VPTransformState &State) override;
1187 
setAlsoPack(bool Pack)1188   void setAlsoPack(bool Pack) { AlsoPack = Pack; }
1189 
1190   /// Print the recipe.
1191   void print(raw_ostream &O, const Twine &Indent,
1192              VPSlotTracker &SlotTracker) const override;
1193 
isUniform()1194   bool isUniform() const { return IsUniform; }
1195 };
1196 
1197 /// A recipe for generating conditional branches on the bits of a mask.
1198 class VPBranchOnMaskRecipe : public VPRecipeBase, public VPUser {
1199 public:
VPBranchOnMaskRecipe(VPValue * BlockInMask)1200   VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) {
1201     if (BlockInMask) // nullptr means all-one mask.
1202       addOperand(BlockInMask);
1203   }
1204 
1205   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1206   static inline bool classof(const VPRecipeBase *V) {
1207     return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC;
1208   }
1209 
1210   /// Generate the extraction of the appropriate bit from the block mask and the
1211   /// conditional branch.
1212   void execute(VPTransformState &State) override;
1213 
1214   /// Print the recipe.
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker)1215   void print(raw_ostream &O, const Twine &Indent,
1216              VPSlotTracker &SlotTracker) const override {
1217     O << " +\n" << Indent << "\"BRANCH-ON-MASK ";
1218     if (VPValue *Mask = getMask())
1219       Mask->print(O, SlotTracker);
1220     else
1221       O << " All-One";
1222     O << "\\l\"";
1223   }
1224 
1225   /// Return the mask used by this recipe. Note that a full mask is represented
1226   /// by a nullptr.
getMask()1227   VPValue *getMask() const {
1228     assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
1229     // Mask is optional.
1230     return getNumOperands() == 1 ? getOperand(0) : nullptr;
1231   }
1232 };
1233 
1234 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
1235 /// control converges back from a Branch-on-Mask. The phi nodes are needed in
1236 /// order to merge values that are set under such a branch and feed their uses.
1237 /// The phi nodes can be scalar or vector depending on the users of the value.
1238 /// This recipe works in concert with VPBranchOnMaskRecipe.
1239 class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser {
1240 
1241 public:
1242   /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
1243   /// nodes after merging back from a Branch-on-Mask.
VPPredInstPHIRecipe(VPValue * PredV)1244   VPPredInstPHIRecipe(VPValue *PredV)
1245       : VPRecipeBase(VPPredInstPHISC), VPUser(PredV) {}
1246   ~VPPredInstPHIRecipe() override = default;
1247 
1248   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1249   static inline bool classof(const VPRecipeBase *V) {
1250     return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC;
1251   }
1252 
1253   /// Generates phi nodes for live-outs as needed to retain SSA form.
1254   void execute(VPTransformState &State) override;
1255 
1256   /// Print the recipe.
1257   void print(raw_ostream &O, const Twine &Indent,
1258              VPSlotTracker &SlotTracker) const override;
1259 };
1260 
1261 /// A Recipe for widening load/store operations.
1262 /// The recipe uses the following VPValues:
1263 /// - For load: Address, optional mask
1264 /// - For store: Address, stored value, optional mask
1265 /// TODO: We currently execute only per-part unless a specific instance is
1266 /// provided.
1267 class VPWidenMemoryInstructionRecipe : public VPRecipeBase,
1268                                        public VPValue,
1269                                        public VPUser {
1270 
setMask(VPValue * Mask)1271   void setMask(VPValue *Mask) {
1272     if (!Mask)
1273       return;
1274     addOperand(Mask);
1275   }
1276 
isMasked()1277   bool isMasked() const {
1278     return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
1279   }
1280 
1281 public:
VPWidenMemoryInstructionRecipe(LoadInst & Load,VPValue * Addr,VPValue * Mask)1282   VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
1283       : VPRecipeBase(VPWidenMemoryInstructionSC),
1284         VPValue(VPValue::VPVMemoryInstructionSC, &Load), VPUser({Addr}) {
1285     setMask(Mask);
1286   }
1287 
VPWidenMemoryInstructionRecipe(StoreInst & Store,VPValue * Addr,VPValue * StoredValue,VPValue * Mask)1288   VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
1289                                  VPValue *StoredValue, VPValue *Mask)
1290       : VPRecipeBase(VPWidenMemoryInstructionSC),
1291         VPValue(VPValue::VPVMemoryInstructionSC, &Store),
1292         VPUser({Addr, StoredValue}) {
1293     setMask(Mask);
1294   }
1295 
1296   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1297   static inline bool classof(const VPRecipeBase *V) {
1298     return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
1299   }
1300 
1301   /// Return the address accessed by this recipe.
getAddr()1302   VPValue *getAddr() const {
1303     return getOperand(0); // Address is the 1st, mandatory operand.
1304   }
1305 
1306   /// Return the mask used by this recipe. Note that a full mask is represented
1307   /// by a nullptr.
getMask()1308   VPValue *getMask() const {
1309     // Mask is optional and therefore the last operand.
1310     return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1311   }
1312 
1313   /// Returns true if this recipe is a store.
isStore()1314   bool isStore() const { return isa<StoreInst>(getUnderlyingInstr()); }
1315 
1316   /// Return the address accessed by this recipe.
getStoredValue()1317   VPValue *getStoredValue() const {
1318     assert(isStore() && "Stored value only available for store instructions");
1319     return getOperand(1); // Stored value is the 2nd, mandatory operand.
1320   }
1321 
1322   /// Generate the wide load/store.
1323   void execute(VPTransformState &State) override;
1324 
1325   /// Print the recipe.
1326   void print(raw_ostream &O, const Twine &Indent,
1327              VPSlotTracker &SlotTracker) const override;
1328 };
1329 
1330 /// A Recipe for widening the canonical induction variable of the vector loop.
1331 class VPWidenCanonicalIVRecipe : public VPRecipeBase {
1332   /// A VPValue representing the canonical vector IV.
1333   VPValue Val;
1334 
1335 public:
VPWidenCanonicalIVRecipe()1336   VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) {}
1337   ~VPWidenCanonicalIVRecipe() override = default;
1338 
1339   /// Return the VPValue representing the canonical vector induction variable of
1340   /// the vector loop.
getVPValue()1341   const VPValue *getVPValue() const { return &Val; }
getVPValue()1342   VPValue *getVPValue() { return &Val; }
1343 
1344   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1345   static inline bool classof(const VPRecipeBase *V) {
1346     return V->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
1347   }
1348 
1349   /// Generate a canonical vector induction variable of the vector loop, with
1350   /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
1351   /// step = <VF*UF, VF*UF, ..., VF*UF>.
1352   void execute(VPTransformState &State) override;
1353 
1354   /// Print the recipe.
1355   void print(raw_ostream &O, const Twine &Indent,
1356              VPSlotTracker &SlotTracker) const override;
1357 };
1358 
1359 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
1360 /// holds a sequence of zero or more VPRecipe's each representing a sequence of
1361 /// output IR instructions.
1362 class VPBasicBlock : public VPBlockBase {
1363 public:
1364   using RecipeListTy = iplist<VPRecipeBase>;
1365 
1366 private:
1367   /// The VPRecipes held in the order of output instructions to generate.
1368   RecipeListTy Recipes;
1369 
1370 public:
1371   VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
1372       : VPBlockBase(VPBasicBlockSC, Name.str()) {
1373     if (Recipe)
1374       appendRecipe(Recipe);
1375   }
1376 
~VPBasicBlock()1377   ~VPBasicBlock() override { Recipes.clear(); }
1378 
1379   /// Instruction iterators...
1380   using iterator = RecipeListTy::iterator;
1381   using const_iterator = RecipeListTy::const_iterator;
1382   using reverse_iterator = RecipeListTy::reverse_iterator;
1383   using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
1384 
1385   //===--------------------------------------------------------------------===//
1386   /// Recipe iterator methods
1387   ///
begin()1388   inline iterator begin() { return Recipes.begin(); }
begin()1389   inline const_iterator begin() const { return Recipes.begin(); }
end()1390   inline iterator end() { return Recipes.end(); }
end()1391   inline const_iterator end() const { return Recipes.end(); }
1392 
rbegin()1393   inline reverse_iterator rbegin() { return Recipes.rbegin(); }
rbegin()1394   inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
rend()1395   inline reverse_iterator rend() { return Recipes.rend(); }
rend()1396   inline const_reverse_iterator rend() const { return Recipes.rend(); }
1397 
size()1398   inline size_t size() const { return Recipes.size(); }
empty()1399   inline bool empty() const { return Recipes.empty(); }
front()1400   inline const VPRecipeBase &front() const { return Recipes.front(); }
front()1401   inline VPRecipeBase &front() { return Recipes.front(); }
back()1402   inline const VPRecipeBase &back() const { return Recipes.back(); }
back()1403   inline VPRecipeBase &back() { return Recipes.back(); }
1404 
1405   /// Returns a reference to the list of recipes.
getRecipeList()1406   RecipeListTy &getRecipeList() { return Recipes; }
1407 
1408   /// Returns a pointer to a member of the recipe list.
getSublistAccess(VPRecipeBase *)1409   static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
1410     return &VPBasicBlock::Recipes;
1411   }
1412 
1413   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPBlockBase * V)1414   static inline bool classof(const VPBlockBase *V) {
1415     return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
1416   }
1417 
insert(VPRecipeBase * Recipe,iterator InsertPt)1418   void insert(VPRecipeBase *Recipe, iterator InsertPt) {
1419     assert(Recipe && "No recipe to append.");
1420     assert(!Recipe->Parent && "Recipe already in VPlan");
1421     Recipe->Parent = this;
1422     Recipes.insert(InsertPt, Recipe);
1423   }
1424 
1425   /// Augment the existing recipes of a VPBasicBlock with an additional
1426   /// \p Recipe as the last recipe.
appendRecipe(VPRecipeBase * Recipe)1427   void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
1428 
1429   /// The method which generates the output IR instructions that correspond to
1430   /// this VPBasicBlock, thereby "executing" the VPlan.
1431   void execute(struct VPTransformState *State) override;
1432 
1433   /// Return the position of the first non-phi node recipe in the block.
1434   iterator getFirstNonPhi();
1435 
1436   void dropAllReferences(VPValue *NewValue) override;
1437 
1438 private:
1439   /// Create an IR BasicBlock to hold the output instructions generated by this
1440   /// VPBasicBlock, and return it. Update the CFGState accordingly.
1441   BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
1442 };
1443 
1444 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
1445 /// which form a Single-Entry-Single-Exit subgraph of the output IR CFG.
1446 /// A VPRegionBlock may indicate that its contents are to be replicated several
1447 /// times. This is designed to support predicated scalarization, in which a
1448 /// scalar if-then code structure needs to be generated VF * UF times. Having
1449 /// this replication indicator helps to keep a single model for multiple
1450 /// candidate VF's. The actual replication takes place only once the desired VF
1451 /// and UF have been determined.
1452 class VPRegionBlock : public VPBlockBase {
1453   /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
1454   VPBlockBase *Entry;
1455 
1456   /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock.
1457   VPBlockBase *Exit;
1458 
1459   /// An indicator whether this region is to generate multiple replicated
1460   /// instances of output IR corresponding to its VPBlockBases.
1461   bool IsReplicator;
1462 
1463 public:
1464   VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit,
1465                 const std::string &Name = "", bool IsReplicator = false)
VPBlockBase(VPRegionBlockSC,Name)1466       : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit),
1467         IsReplicator(IsReplicator) {
1468     assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
1469     assert(Exit->getSuccessors().empty() && "Exit block has successors.");
1470     Entry->setParent(this);
1471     Exit->setParent(this);
1472   }
1473   VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
VPBlockBase(VPRegionBlockSC,Name)1474       : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr),
1475         IsReplicator(IsReplicator) {}
1476 
~VPRegionBlock()1477   ~VPRegionBlock() override {
1478     if (Entry) {
1479       VPValue DummyValue;
1480       Entry->dropAllReferences(&DummyValue);
1481       deleteCFG(Entry);
1482     }
1483   }
1484 
1485   /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPBlockBase * V)1486   static inline bool classof(const VPBlockBase *V) {
1487     return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
1488   }
1489 
getEntry()1490   const VPBlockBase *getEntry() const { return Entry; }
getEntry()1491   VPBlockBase *getEntry() { return Entry; }
1492 
1493   /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
1494   /// EntryBlock must have no predecessors.
setEntry(VPBlockBase * EntryBlock)1495   void setEntry(VPBlockBase *EntryBlock) {
1496     assert(EntryBlock->getPredecessors().empty() &&
1497            "Entry block cannot have predecessors.");
1498     Entry = EntryBlock;
1499     EntryBlock->setParent(this);
1500   }
1501 
1502   // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
1503   // specific interface of llvm::Function, instead of using
1504   // GraphTraints::getEntryNode. We should add a new template parameter to
1505   // DominatorTreeBase representing the Graph type.
front()1506   VPBlockBase &front() const { return *Entry; }
1507 
getExit()1508   const VPBlockBase *getExit() const { return Exit; }
getExit()1509   VPBlockBase *getExit() { return Exit; }
1510 
1511   /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p
1512   /// ExitBlock must have no successors.
setExit(VPBlockBase * ExitBlock)1513   void setExit(VPBlockBase *ExitBlock) {
1514     assert(ExitBlock->getSuccessors().empty() &&
1515            "Exit block cannot have successors.");
1516     Exit = ExitBlock;
1517     ExitBlock->setParent(this);
1518   }
1519 
1520   /// An indicator whether this region is to generate multiple replicated
1521   /// instances of output IR corresponding to its VPBlockBases.
isReplicator()1522   bool isReplicator() const { return IsReplicator; }
1523 
1524   /// The method which generates the output IR instructions that correspond to
1525   /// this VPRegionBlock, thereby "executing" the VPlan.
1526   void execute(struct VPTransformState *State) override;
1527 
1528   void dropAllReferences(VPValue *NewValue) override;
1529 };
1530 
1531 //===----------------------------------------------------------------------===//
1532 // GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs     //
1533 //===----------------------------------------------------------------------===//
1534 
1535 // The following set of template specializations implement GraphTraits to treat
1536 // any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
1537 // that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
1538 // VPBlockBase is a VPRegionBlock, this specialization provides access to its
1539 // successors/predecessors but not to the blocks inside the region.
1540 
1541 template <> struct GraphTraits<VPBlockBase *> {
1542   using NodeRef = VPBlockBase *;
1543   using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1544 
1545   static NodeRef getEntryNode(NodeRef N) { return N; }
1546 
1547   static inline ChildIteratorType child_begin(NodeRef N) {
1548     return N->getSuccessors().begin();
1549   }
1550 
1551   static inline ChildIteratorType child_end(NodeRef N) {
1552     return N->getSuccessors().end();
1553   }
1554 };
1555 
1556 template <> struct GraphTraits<const VPBlockBase *> {
1557   using NodeRef = const VPBlockBase *;
1558   using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator;
1559 
1560   static NodeRef getEntryNode(NodeRef N) { return N; }
1561 
1562   static inline ChildIteratorType child_begin(NodeRef N) {
1563     return N->getSuccessors().begin();
1564   }
1565 
1566   static inline ChildIteratorType child_end(NodeRef N) {
1567     return N->getSuccessors().end();
1568   }
1569 };
1570 
1571 // Inverse order specialization for VPBasicBlocks. Predecessors are used instead
1572 // of successors for the inverse traversal.
1573 template <> struct GraphTraits<Inverse<VPBlockBase *>> {
1574   using NodeRef = VPBlockBase *;
1575   using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1576 
1577   static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
1578 
1579   static inline ChildIteratorType child_begin(NodeRef N) {
1580     return N->getPredecessors().begin();
1581   }
1582 
1583   static inline ChildIteratorType child_end(NodeRef N) {
1584     return N->getPredecessors().end();
1585   }
1586 };
1587 
1588 // The following set of template specializations implement GraphTraits to
1589 // treat VPRegionBlock as a graph and recurse inside its nodes. It's important
1590 // to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
1591 // (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
1592 // there won't be automatic recursion into other VPBlockBases that turn to be
1593 // VPRegionBlocks.
1594 
1595 template <>
1596 struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
1597   using GraphRef = VPRegionBlock *;
1598   using nodes_iterator = df_iterator<NodeRef>;
1599 
1600   static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1601 
1602   static nodes_iterator nodes_begin(GraphRef N) {
1603     return nodes_iterator::begin(N->getEntry());
1604   }
1605 
1606   static nodes_iterator nodes_end(GraphRef N) {
1607     // df_iterator::end() returns an empty iterator so the node used doesn't
1608     // matter.
1609     return nodes_iterator::end(N);
1610   }
1611 };
1612 
1613 template <>
1614 struct GraphTraits<const VPRegionBlock *>
1615     : public GraphTraits<const VPBlockBase *> {
1616   using GraphRef = const VPRegionBlock *;
1617   using nodes_iterator = df_iterator<NodeRef>;
1618 
1619   static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1620 
1621   static nodes_iterator nodes_begin(GraphRef N) {
1622     return nodes_iterator::begin(N->getEntry());
1623   }
1624 
1625   static nodes_iterator nodes_end(GraphRef N) {
1626     // df_iterator::end() returns an empty iterator so the node used doesn't
1627     // matter.
1628     return nodes_iterator::end(N);
1629   }
1630 };
1631 
1632 template <>
1633 struct GraphTraits<Inverse<VPRegionBlock *>>
1634     : public GraphTraits<Inverse<VPBlockBase *>> {
1635   using GraphRef = VPRegionBlock *;
1636   using nodes_iterator = df_iterator<NodeRef>;
1637 
1638   static NodeRef getEntryNode(Inverse<GraphRef> N) {
1639     return N.Graph->getExit();
1640   }
1641 
1642   static nodes_iterator nodes_begin(GraphRef N) {
1643     return nodes_iterator::begin(N->getExit());
1644   }
1645 
1646   static nodes_iterator nodes_end(GraphRef N) {
1647     // df_iterator::end() returns an empty iterator so the node used doesn't
1648     // matter.
1649     return nodes_iterator::end(N);
1650   }
1651 };
1652 
1653 /// VPlan models a candidate for vectorization, encoding various decisions take
1654 /// to produce efficient output IR, including which branches, basic-blocks and
1655 /// output IR instructions to generate, and their cost. VPlan holds a
1656 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
1657 /// VPBlock.
1658 class VPlan {
1659   friend class VPlanPrinter;
1660   friend class VPSlotTracker;
1661 
1662   /// Hold the single entry to the Hierarchical CFG of the VPlan.
1663   VPBlockBase *Entry;
1664 
1665   /// Holds the VFs applicable to this VPlan.
1666   SmallSetVector<ElementCount, 2> VFs;
1667 
1668   /// Holds the name of the VPlan, for printing.
1669   std::string Name;
1670 
1671   /// Holds all the external definitions created for this VPlan.
1672   // TODO: Introduce a specific representation for external definitions in
1673   // VPlan. External definitions must be immutable and hold a pointer to its
1674   // underlying IR that will be used to implement its structural comparison
1675   // (operators '==' and '<').
1676   SmallPtrSet<VPValue *, 16> VPExternalDefs;
1677 
1678   /// Represents the backedge taken count of the original loop, for folding
1679   /// the tail.
1680   VPValue *BackedgeTakenCount = nullptr;
1681 
1682   /// Holds a mapping between Values and their corresponding VPValue inside
1683   /// VPlan.
1684   Value2VPValueTy Value2VPValue;
1685 
1686   /// Contains all VPValues that been allocated by addVPValue directly and need
1687   /// to be free when the plan's destructor is called.
1688   SmallVector<VPValue *, 16> VPValuesToFree;
1689 
1690   /// Holds the VPLoopInfo analysis for this VPlan.
1691   VPLoopInfo VPLInfo;
1692 
1693   /// Holds the condition bit values built during VPInstruction to VPRecipe transformation.
1694   SmallVector<VPValue *, 4> VPCBVs;
1695 
1696 public:
1697   VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
1698     if (Entry)
1699       Entry->setPlan(this);
1700   }
1701 
1702   ~VPlan() {
1703     if (Entry) {
1704       VPValue DummyValue;
1705       for (VPBlockBase *Block : depth_first(Entry))
1706         Block->dropAllReferences(&DummyValue);
1707 
1708       VPBlockBase::deleteCFG(Entry);
1709     }
1710     for (VPValue *VPV : VPValuesToFree)
1711       delete VPV;
1712     if (BackedgeTakenCount)
1713       delete BackedgeTakenCount;
1714     for (VPValue *Def : VPExternalDefs)
1715       delete Def;
1716     for (VPValue *CBV : VPCBVs)
1717       delete CBV;
1718   }
1719 
1720   /// Generate the IR code for this VPlan.
1721   void execute(struct VPTransformState *State);
1722 
1723   VPBlockBase *getEntry() { return Entry; }
1724   const VPBlockBase *getEntry() const { return Entry; }
1725 
1726   VPBlockBase *setEntry(VPBlockBase *Block) {
1727     Entry = Block;
1728     Block->setPlan(this);
1729     return Entry;
1730   }
1731 
1732   /// The backedge taken count of the original loop.
1733   VPValue *getOrCreateBackedgeTakenCount() {
1734     if (!BackedgeTakenCount)
1735       BackedgeTakenCount = new VPValue();
1736     return BackedgeTakenCount;
1737   }
1738 
1739   void addVF(ElementCount VF) { VFs.insert(VF); }
1740 
1741   bool hasVF(ElementCount VF) { return VFs.count(VF); }
1742 
1743   const std::string &getName() const { return Name; }
1744 
1745   void setName(const Twine &newName) { Name = newName.str(); }
1746 
1747   /// Add \p VPVal to the pool of external definitions if it's not already
1748   /// in the pool.
1749   void addExternalDef(VPValue *VPVal) {
1750     VPExternalDefs.insert(VPVal);
1751   }
1752 
1753   /// Add \p CBV to the vector of condition bit values.
1754   void addCBV(VPValue *CBV) {
1755     VPCBVs.push_back(CBV);
1756   }
1757 
1758   void addVPValue(Value *V) {
1759     assert(V && "Trying to add a null Value to VPlan");
1760     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
1761     VPValue *VPV = new VPValue(V);
1762     Value2VPValue[V] = VPV;
1763     VPValuesToFree.push_back(VPV);
1764   }
1765 
1766   void addVPValue(Value *V, VPValue *VPV) {
1767     assert(V && "Trying to add a null Value to VPlan");
1768     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
1769     Value2VPValue[V] = VPV;
1770   }
1771 
1772   VPValue *getVPValue(Value *V) {
1773     assert(V && "Trying to get the VPValue of a null Value");
1774     assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
1775     return Value2VPValue[V];
1776   }
1777 
1778   VPValue *getOrAddVPValue(Value *V) {
1779     assert(V && "Trying to get or add the VPValue of a null Value");
1780     if (!Value2VPValue.count(V))
1781       addVPValue(V);
1782     return getVPValue(V);
1783   }
1784 
1785   void removeVPValueFor(Value *V) { Value2VPValue.erase(V); }
1786 
1787   /// Return the VPLoopInfo analysis for this VPlan.
1788   VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
1789   const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
1790 
1791   /// Dump the plan to stderr (for debugging).
1792   void dump() const;
1793 
1794   /// Returns a range mapping the values the range \p Operands to their
1795   /// corresponding VPValues.
1796   iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
1797   mapToVPValues(User::op_range Operands) {
1798     std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
1799       return getOrAddVPValue(Op);
1800     };
1801     return map_range(Operands, Fn);
1802   }
1803 
1804 private:
1805   /// Add to the given dominator tree the header block and every new basic block
1806   /// that was created between it and the latch block, inclusive.
1807   static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
1808                                   BasicBlock *LoopPreHeaderBB,
1809                                   BasicBlock *LoopExitBB);
1810 };
1811 
1812 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
1813 /// indented and follows the dot format.
1814 class VPlanPrinter {
1815   friend inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan);
1816   friend inline raw_ostream &operator<<(raw_ostream &OS,
1817                                         const struct VPlanIngredient &I);
1818 
1819 private:
1820   raw_ostream &OS;
1821   const VPlan &Plan;
1822   unsigned Depth = 0;
1823   unsigned TabWidth = 2;
1824   std::string Indent;
1825   unsigned BID = 0;
1826   SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
1827 
1828   VPSlotTracker SlotTracker;
1829 
1830   VPlanPrinter(raw_ostream &O, const VPlan &P)
1831       : OS(O), Plan(P), SlotTracker(&P) {}
1832 
1833   /// Handle indentation.
1834   void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
1835 
1836   /// Print a given \p Block of the Plan.
1837   void dumpBlock(const VPBlockBase *Block);
1838 
1839   /// Print the information related to the CFG edges going out of a given
1840   /// \p Block, followed by printing the successor blocks themselves.
1841   void dumpEdges(const VPBlockBase *Block);
1842 
1843   /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
1844   /// its successor blocks.
1845   void dumpBasicBlock(const VPBasicBlock *BasicBlock);
1846 
1847   /// Print a given \p Region of the Plan.
1848   void dumpRegion(const VPRegionBlock *Region);
1849 
1850   unsigned getOrCreateBID(const VPBlockBase *Block) {
1851     return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
1852   }
1853 
1854   const Twine getOrCreateName(const VPBlockBase *Block);
1855 
1856   const Twine getUID(const VPBlockBase *Block);
1857 
1858   /// Print the information related to a CFG edge between two VPBlockBases.
1859   void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
1860                 const Twine &Label);
1861 
1862   void dump();
1863 
1864   static void printAsIngredient(raw_ostream &O, const Value *V);
1865 };
1866 
1867 struct VPlanIngredient {
1868   const Value *V;
1869 
1870   VPlanIngredient(const Value *V) : V(V) {}
1871 };
1872 
1873 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
1874   VPlanPrinter::printAsIngredient(OS, I.V);
1875   return OS;
1876 }
1877 
1878 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
1879   VPlanPrinter Printer(OS, Plan);
1880   Printer.dump();
1881   return OS;
1882 }
1883 
1884 //===----------------------------------------------------------------------===//
1885 // VPlan Utilities
1886 //===----------------------------------------------------------------------===//
1887 
1888 /// Class that provides utilities for VPBlockBases in VPlan.
1889 class VPBlockUtils {
1890 public:
1891   VPBlockUtils() = delete;
1892 
1893   /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
1894   /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
1895   /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr
1896   /// has more than one successor, its conditional bit is propagated to \p
1897   /// NewBlock. \p NewBlock must have neither successors nor predecessors.
1898   static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
1899     assert(NewBlock->getSuccessors().empty() &&
1900            "Can't insert new block with successors.");
1901     // TODO: move successors from BlockPtr to NewBlock when this functionality
1902     // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr
1903     // already has successors.
1904     BlockPtr->setOneSuccessor(NewBlock);
1905     NewBlock->setPredecessors({BlockPtr});
1906     NewBlock->setParent(BlockPtr->getParent());
1907   }
1908 
1909   /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
1910   /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
1911   /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
1912   /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor
1913   /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse
1914   /// must have neither successors nor predecessors.
1915   static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
1916                                    VPValue *Condition, VPBlockBase *BlockPtr) {
1917     assert(IfTrue->getSuccessors().empty() &&
1918            "Can't insert IfTrue with successors.");
1919     assert(IfFalse->getSuccessors().empty() &&
1920            "Can't insert IfFalse with successors.");
1921     BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition);
1922     IfTrue->setPredecessors({BlockPtr});
1923     IfFalse->setPredecessors({BlockPtr});
1924     IfTrue->setParent(BlockPtr->getParent());
1925     IfFalse->setParent(BlockPtr->getParent());
1926   }
1927 
1928   /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
1929   /// the successors of \p From and \p From to the predecessors of \p To. Both
1930   /// VPBlockBases must have the same parent, which can be null. Both
1931   /// VPBlockBases can be already connected to other VPBlockBases.
1932   static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
1933     assert((From->getParent() == To->getParent()) &&
1934            "Can't connect two block with different parents");
1935     assert(From->getNumSuccessors() < 2 &&
1936            "Blocks can't have more than two successors.");
1937     From->appendSuccessor(To);
1938     To->appendPredecessor(From);
1939   }
1940 
1941   /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
1942   /// from the successors of \p From and \p From from the predecessors of \p To.
1943   static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
1944     assert(To && "Successor to disconnect is null.");
1945     From->removeSuccessor(To);
1946     To->removePredecessor(From);
1947   }
1948 
1949   /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
1950   static bool isBackEdge(const VPBlockBase *FromBlock,
1951                          const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
1952     assert(FromBlock->getParent() == ToBlock->getParent() &&
1953            FromBlock->getParent() && "Must be in same region");
1954     const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
1955     const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
1956     if (!FromLoop || !ToLoop || FromLoop != ToLoop)
1957       return false;
1958 
1959     // A back-edge is a branch from the loop latch to its header.
1960     return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
1961   }
1962 
1963   /// Returns true if \p Block is a loop latch
1964   static bool blockIsLoopLatch(const VPBlockBase *Block,
1965                                const VPLoopInfo *VPLInfo) {
1966     if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
1967       return ParentVPL->isLoopLatch(Block);
1968 
1969     return false;
1970   }
1971 
1972   /// Count and return the number of succesors of \p PredBlock excluding any
1973   /// backedges.
1974   static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
1975                                       VPLoopInfo *VPLI) {
1976     unsigned Count = 0;
1977     for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
1978       if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
1979         Count++;
1980     }
1981     return Count;
1982   }
1983 };
1984 
1985 class VPInterleavedAccessInfo {
1986   DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
1987       InterleaveGroupMap;
1988 
1989   /// Type for mapping of instruction based interleave groups to VPInstruction
1990   /// interleave groups
1991   using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
1992                              InterleaveGroup<VPInstruction> *>;
1993 
1994   /// Recursively \p Region and populate VPlan based interleave groups based on
1995   /// \p IAI.
1996   void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
1997                    InterleavedAccessInfo &IAI);
1998   /// Recursively traverse \p Block and populate VPlan based interleave groups
1999   /// based on \p IAI.
2000   void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
2001                   InterleavedAccessInfo &IAI);
2002 
2003 public:
2004   VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
2005 
2006   ~VPInterleavedAccessInfo() {
2007     SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
2008     // Avoid releasing a pointer twice.
2009     for (auto &I : InterleaveGroupMap)
2010       DelSet.insert(I.second);
2011     for (auto *Ptr : DelSet)
2012       delete Ptr;
2013   }
2014 
2015   /// Get the interleave group that \p Instr belongs to.
2016   ///
2017   /// \returns nullptr if doesn't have such group.
2018   InterleaveGroup<VPInstruction> *
2019   getInterleaveGroup(VPInstruction *Instr) const {
2020     if (InterleaveGroupMap.count(Instr))
2021       return InterleaveGroupMap.find(Instr)->second;
2022     return nullptr;
2023   }
2024 };
2025 
2026 /// Class that maps (parts of) an existing VPlan to trees of combined
2027 /// VPInstructions.
2028 class VPlanSlp {
2029   enum class OpMode { Failed, Load, Opcode };
2030 
2031   /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
2032   /// DenseMap keys.
2033   struct BundleDenseMapInfo {
2034     static SmallVector<VPValue *, 4> getEmptyKey() {
2035       return {reinterpret_cast<VPValue *>(-1)};
2036     }
2037 
2038     static SmallVector<VPValue *, 4> getTombstoneKey() {
2039       return {reinterpret_cast<VPValue *>(-2)};
2040     }
2041 
2042     static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
2043       return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
2044     }
2045 
2046     static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
2047                         const SmallVector<VPValue *, 4> &RHS) {
2048       return LHS == RHS;
2049     }
2050   };
2051 
2052   /// Mapping of values in the original VPlan to a combined VPInstruction.
2053   DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
2054       BundleToCombined;
2055 
2056   VPInterleavedAccessInfo &IAI;
2057 
2058   /// Basic block to operate on. For now, only instructions in a single BB are
2059   /// considered.
2060   const VPBasicBlock &BB;
2061 
2062   /// Indicates whether we managed to combine all visited instructions or not.
2063   bool CompletelySLP = true;
2064 
2065   /// Width of the widest combined bundle in bits.
2066   unsigned WidestBundleBits = 0;
2067 
2068   using MultiNodeOpTy =
2069       typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
2070 
2071   // Input operand bundles for the current multi node. Each multi node operand
2072   // bundle contains values not matching the multi node's opcode. They will
2073   // be reordered in reorderMultiNodeOps, once we completed building a
2074   // multi node.
2075   SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
2076 
2077   /// Indicates whether we are building a multi node currently.
2078   bool MultiNodeActive = false;
2079 
2080   /// Check if we can vectorize Operands together.
2081   bool areVectorizable(ArrayRef<VPValue *> Operands) const;
2082 
2083   /// Add combined instruction \p New for the bundle \p Operands.
2084   void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
2085 
2086   /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
2087   VPInstruction *markFailed();
2088 
2089   /// Reorder operands in the multi node to maximize sequential memory access
2090   /// and commutative operations.
2091   SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
2092 
2093   /// Choose the best candidate to use for the lane after \p Last. The set of
2094   /// candidates to choose from are values with an opcode matching \p Last's
2095   /// or loads consecutive to \p Last.
2096   std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
2097                                        SmallPtrSetImpl<VPValue *> &Candidates,
2098                                        VPInterleavedAccessInfo &IAI);
2099 
2100   /// Print bundle \p Values to dbgs().
2101   void dumpBundle(ArrayRef<VPValue *> Values);
2102 
2103 public:
2104   VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
2105 
2106   ~VPlanSlp() = default;
2107 
2108   /// Tries to build an SLP tree rooted at \p Operands and returns a
2109   /// VPInstruction combining \p Operands, if they can be combined.
2110   VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
2111 
2112   /// Return the width of the widest combined bundle in bits.
2113   unsigned getWidestBundleBits() const { return WidestBundleBits; }
2114 
2115   /// Return true if all visited instruction can be combined.
2116   bool isCompletelySLP() const { return CompletelySLP; }
2117 };
2118 } // end namespace llvm
2119 
2120 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
2121