1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file contains the declarations of the Vectorization Plan base classes:
11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12 /// VPBlockBase, together implementing a Hierarchical CFG;
13 /// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be
14 /// treated as proper graphs for generic algorithms;
15 /// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained
16 /// within VPBasicBlocks;
17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18 /// instruction;
19 /// 5. The VPlan class holding a candidate for vectorization;
20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21 /// These are documented in docs/VectorizationPlan.rst.
22 //
23 //===----------------------------------------------------------------------===//
24
25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28 #include "VPlanLoopInfo.h"
29 #include "VPlanValue.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DepthFirstIterator.h"
32 #include "llvm/ADT/GraphTraits.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/SmallBitVector.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Twine.h"
39 #include "llvm/ADT/ilist.h"
40 #include "llvm/ADT/ilist_node.h"
41 #include "llvm/Analysis/VectorUtils.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include <algorithm>
44 #include <cassert>
45 #include <cstddef>
46 #include <map>
47 #include <string>
48
49 namespace llvm {
50
51 class BasicBlock;
52 class DominatorTree;
53 class InnerLoopVectorizer;
54 class LoopInfo;
55 class raw_ostream;
56 class RecurrenceDescriptor;
57 class Value;
58 class VPBasicBlock;
59 class VPRegionBlock;
60 class VPlan;
61 class VPlanSlp;
62
63 /// A range of powers-of-2 vectorization factors with fixed start and
64 /// adjustable end. The range includes start and excludes end, e.g.,:
65 /// [1, 9) = {1, 2, 4, 8}
66 struct VFRange {
67 // A power of 2.
68 const ElementCount Start;
69
70 // Need not be a power of 2. If End <= Start range is empty.
71 ElementCount End;
72
isEmptyVFRange73 bool isEmpty() const {
74 return End.getKnownMinValue() <= Start.getKnownMinValue();
75 }
76
VFRangeVFRange77 VFRange(const ElementCount &Start, const ElementCount &End)
78 : Start(Start), End(End) {
79 assert(Start.isScalable() == End.isScalable() &&
80 "Both Start and End should have the same scalable flag");
81 assert(isPowerOf2_32(Start.getKnownMinValue()) &&
82 "Expected Start to be a power of 2");
83 }
84 };
85
86 using VPlanPtr = std::unique_ptr<VPlan>;
87
88 /// In what follows, the term "input IR" refers to code that is fed into the
89 /// vectorizer whereas the term "output IR" refers to code that is generated by
90 /// the vectorizer.
91
92 /// VPIteration represents a single point in the iteration space of the output
93 /// (vectorized and/or unrolled) IR loop.
94 struct VPIteration {
95 /// in [0..UF)
96 unsigned Part;
97
98 /// in [0..VF)
99 unsigned Lane;
100 };
101
102 /// This is a helper struct for maintaining vectorization state. It's used for
103 /// mapping values from the original loop to their corresponding values in
104 /// the new loop. Two mappings are maintained: one for vectorized values and
105 /// one for scalarized values. Vectorized values are represented with UF
106 /// vector values in the new loop, and scalarized values are represented with
107 /// UF x VF scalar values in the new loop. UF and VF are the unroll and
108 /// vectorization factors, respectively.
109 ///
110 /// Entries can be added to either map with setVectorValue and setScalarValue,
111 /// which assert that an entry was not already added before. If an entry is to
112 /// replace an existing one, call resetVectorValue and resetScalarValue. This is
113 /// currently needed to modify the mapped values during "fix-up" operations that
114 /// occur once the first phase of widening is complete. These operations include
115 /// type truncation and the second phase of recurrence widening.
116 ///
117 /// Entries from either map can be retrieved using the getVectorValue and
118 /// getScalarValue functions, which assert that the desired value exists.
119 struct VectorizerValueMap {
120 friend struct VPTransformState;
121
122 private:
123 /// The unroll factor. Each entry in the vector map contains UF vector values.
124 unsigned UF;
125
126 /// The vectorization factor. Each entry in the scalar map contains UF x VF
127 /// scalar values.
128 ElementCount VF;
129
130 /// The vector and scalar map storage. We use std::map and not DenseMap
131 /// because insertions to DenseMap invalidate its iterators.
132 using VectorParts = SmallVector<Value *, 2>;
133 using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>;
134 std::map<Value *, VectorParts> VectorMapStorage;
135 std::map<Value *, ScalarParts> ScalarMapStorage;
136
137 public:
138 /// Construct an empty map with the given unroll and vectorization factors.
VectorizerValueMapVectorizerValueMap139 VectorizerValueMap(unsigned UF, ElementCount VF) : UF(UF), VF(VF) {}
140
141 /// \return True if the map has any vector entry for \p Key.
hasAnyVectorValueVectorizerValueMap142 bool hasAnyVectorValue(Value *Key) const {
143 return VectorMapStorage.count(Key);
144 }
145
146 /// \return True if the map has a vector entry for \p Key and \p Part.
hasVectorValueVectorizerValueMap147 bool hasVectorValue(Value *Key, unsigned Part) const {
148 assert(Part < UF && "Queried Vector Part is too large.");
149 if (!hasAnyVectorValue(Key))
150 return false;
151 const VectorParts &Entry = VectorMapStorage.find(Key)->second;
152 assert(Entry.size() == UF && "VectorParts has wrong dimensions.");
153 return Entry[Part] != nullptr;
154 }
155
156 /// \return True if the map has any scalar entry for \p Key.
hasAnyScalarValueVectorizerValueMap157 bool hasAnyScalarValue(Value *Key) const {
158 return ScalarMapStorage.count(Key);
159 }
160
161 /// \return True if the map has a scalar entry for \p Key and \p Instance.
hasScalarValueVectorizerValueMap162 bool hasScalarValue(Value *Key, const VPIteration &Instance) const {
163 assert(Instance.Part < UF && "Queried Scalar Part is too large.");
164 assert(Instance.Lane < VF.getKnownMinValue() &&
165 "Queried Scalar Lane is too large.");
166
167 if (!hasAnyScalarValue(Key))
168 return false;
169 const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
170 assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
171 assert(Entry[Instance.Part].size() == VF.getKnownMinValue() &&
172 "ScalarParts has wrong dimensions.");
173 return Entry[Instance.Part][Instance.Lane] != nullptr;
174 }
175
176 /// Retrieve the existing vector value that corresponds to \p Key and
177 /// \p Part.
getVectorValueVectorizerValueMap178 Value *getVectorValue(Value *Key, unsigned Part) {
179 assert(hasVectorValue(Key, Part) && "Getting non-existent value.");
180 return VectorMapStorage[Key][Part];
181 }
182
183 /// Retrieve the existing scalar value that corresponds to \p Key and
184 /// \p Instance.
getScalarValueVectorizerValueMap185 Value *getScalarValue(Value *Key, const VPIteration &Instance) {
186 assert(hasScalarValue(Key, Instance) && "Getting non-existent value.");
187 return ScalarMapStorage[Key][Instance.Part][Instance.Lane];
188 }
189
190 /// Set a vector value associated with \p Key and \p Part. Assumes such a
191 /// value is not already set. If it is, use resetVectorValue() instead.
setVectorValueVectorizerValueMap192 void setVectorValue(Value *Key, unsigned Part, Value *Vector) {
193 assert(!hasVectorValue(Key, Part) && "Vector value already set for part");
194 if (!VectorMapStorage.count(Key)) {
195 VectorParts Entry(UF);
196 VectorMapStorage[Key] = Entry;
197 }
198 VectorMapStorage[Key][Part] = Vector;
199 }
200
201 /// Set a scalar value associated with \p Key and \p Instance. Assumes such a
202 /// value is not already set.
setScalarValueVectorizerValueMap203 void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) {
204 assert(!hasScalarValue(Key, Instance) && "Scalar value already set");
205 if (!ScalarMapStorage.count(Key)) {
206 ScalarParts Entry(UF);
207 // TODO: Consider storing uniform values only per-part, as they occupy
208 // lane 0 only, keeping the other VF-1 redundant entries null.
209 for (unsigned Part = 0; Part < UF; ++Part)
210 Entry[Part].resize(VF.getKnownMinValue(), nullptr);
211 ScalarMapStorage[Key] = Entry;
212 }
213 ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
214 }
215
216 /// Reset the vector value associated with \p Key for the given \p Part.
217 /// This function can be used to update values that have already been
218 /// vectorized. This is the case for "fix-up" operations including type
219 /// truncation and the second phase of recurrence vectorization.
resetVectorValueVectorizerValueMap220 void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {
221 assert(hasVectorValue(Key, Part) && "Vector value not set for part");
222 VectorMapStorage[Key][Part] = Vector;
223 }
224
225 /// Reset the scalar value associated with \p Key for \p Part and \p Lane.
226 /// This function can be used to update values that have already been
227 /// scalarized. This is the case for "fix-up" operations including scalar phi
228 /// nodes for scalarized and predicated instructions.
resetScalarValueVectorizerValueMap229 void resetScalarValue(Value *Key, const VPIteration &Instance,
230 Value *Scalar) {
231 assert(hasScalarValue(Key, Instance) &&
232 "Scalar value not set for part and lane");
233 ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
234 }
235 };
236
237 /// This class is used to enable the VPlan to invoke a method of ILV. This is
238 /// needed until the method is refactored out of ILV and becomes reusable.
239 struct VPCallback {
~VPCallbackVPCallback240 virtual ~VPCallback() {}
241 virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
242 virtual Value *getOrCreateScalarValue(Value *V,
243 const VPIteration &Instance) = 0;
244 };
245
246 /// VPTransformState holds information passed down when "executing" a VPlan,
247 /// needed for generating the output IR.
248 struct VPTransformState {
VPTransformStateVPTransformState249 VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
250 DominatorTree *DT, IRBuilder<> &Builder,
251 VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV,
252 VPCallback &Callback)
253 : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
254 ValueMap(ValueMap), ILV(ILV), Callback(Callback) {}
255
256 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
257 ElementCount VF;
258 unsigned UF;
259
260 /// Hold the indices to generate specific scalar instructions. Null indicates
261 /// that all instances are to be generated, using either scalar or vector
262 /// instructions.
263 Optional<VPIteration> Instance;
264
265 struct DataState {
266 /// A type for vectorized values in the new loop. Each value from the
267 /// original loop, when vectorized, is represented by UF vector values in
268 /// the new unrolled loop, where UF is the unroll factor.
269 typedef SmallVector<Value *, 2> PerPartValuesTy;
270
271 DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
272 } Data;
273
274 /// Get the generated Value for a given VPValue and a given Part. Note that
275 /// as some Defs are still created by ILV and managed in its ValueMap, this
276 /// method will delegate the call to ILV in such cases in order to provide
277 /// callers a consistent API.
278 /// \see set.
getVPTransformState279 Value *get(VPValue *Def, unsigned Part) {
280 // If Values have been set for this Def return the one relevant for \p Part.
281 if (Data.PerPartOutput.count(Def))
282 return Data.PerPartOutput[Def][Part];
283 // Def is managed by ILV: bring the Values from ValueMap.
284 return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
285 }
286
287 /// Get the generated Value for a given VPValue and given Part and Lane.
getVPTransformState288 Value *get(VPValue *Def, const VPIteration &Instance) {
289 // If the Def is managed directly by VPTransformState, extract the lane from
290 // the relevant part. Note that currently only VPInstructions and external
291 // defs are managed by VPTransformState. Other Defs are still created by ILV
292 // and managed in its ValueMap. For those this method currently just
293 // delegates the call to ILV below.
294 if (Data.PerPartOutput.count(Def)) {
295 auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
296 if (!VecPart->getType()->isVectorTy()) {
297 assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar");
298 return VecPart;
299 }
300 // TODO: Cache created scalar values.
301 return Builder.CreateExtractElement(VecPart,
302 Builder.getInt32(Instance.Lane));
303 }
304
305 return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance);
306 }
307
308 /// Set the generated Value for a given VPValue and a given Part.
setVPTransformState309 void set(VPValue *Def, Value *V, unsigned Part) {
310 if (!Data.PerPartOutput.count(Def)) {
311 DataState::PerPartValuesTy Entry(UF);
312 Data.PerPartOutput[Def] = Entry;
313 }
314 Data.PerPartOutput[Def][Part] = V;
315 }
316 void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part);
317
318 /// Hold state information used when constructing the CFG of the output IR,
319 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
320 struct CFGState {
321 /// The previous VPBasicBlock visited. Initially set to null.
322 VPBasicBlock *PrevVPBB = nullptr;
323
324 /// The previous IR BasicBlock created or used. Initially set to the new
325 /// header BasicBlock.
326 BasicBlock *PrevBB = nullptr;
327
328 /// The last IR BasicBlock in the output IR. Set to the new latch
329 /// BasicBlock, used for placing the newly created BasicBlocks.
330 BasicBlock *LastBB = nullptr;
331
332 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
333 /// of replication, maps the BasicBlock of the last replica created.
334 SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
335
336 /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed
337 /// up at the end of vector code generation.
338 SmallVector<VPBasicBlock *, 8> VPBBsToFix;
339
340 CFGState() = default;
341 } CFG;
342
343 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
344 LoopInfo *LI;
345
346 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
347 DominatorTree *DT;
348
349 /// Hold a reference to the IRBuilder used to generate output IR code.
350 IRBuilder<> &Builder;
351
352 /// Hold a reference to the Value state information used when generating the
353 /// Values of the output IR.
354 VectorizerValueMap &ValueMap;
355
356 /// Hold a reference to a mapping between VPValues in VPlan and original
357 /// Values they correspond to.
358 VPValue2ValueTy VPValue2Value;
359
360 /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
361 Value *CanonicalIV = nullptr;
362
363 /// Hold the trip count of the scalar loop.
364 Value *TripCount = nullptr;
365
366 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
367 InnerLoopVectorizer *ILV;
368
369 VPCallback &Callback;
370 };
371
372 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
373 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
374 class VPBlockBase {
375 friend class VPBlockUtils;
376
377 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
378
379 /// An optional name for the block.
380 std::string Name;
381
382 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
383 /// it is a topmost VPBlockBase.
384 VPRegionBlock *Parent = nullptr;
385
386 /// List of predecessor blocks.
387 SmallVector<VPBlockBase *, 1> Predecessors;
388
389 /// List of successor blocks.
390 SmallVector<VPBlockBase *, 1> Successors;
391
392 /// Successor selector, null for zero or single successor blocks.
393 VPValue *CondBit = nullptr;
394
395 /// Current block predicate - null if the block does not need a predicate.
396 VPValue *Predicate = nullptr;
397
398 /// VPlan containing the block. Can only be set on the entry block of the
399 /// plan.
400 VPlan *Plan = nullptr;
401
402 /// Add \p Successor as the last successor to this block.
appendSuccessor(VPBlockBase * Successor)403 void appendSuccessor(VPBlockBase *Successor) {
404 assert(Successor && "Cannot add nullptr successor!");
405 Successors.push_back(Successor);
406 }
407
408 /// Add \p Predecessor as the last predecessor to this block.
appendPredecessor(VPBlockBase * Predecessor)409 void appendPredecessor(VPBlockBase *Predecessor) {
410 assert(Predecessor && "Cannot add nullptr predecessor!");
411 Predecessors.push_back(Predecessor);
412 }
413
414 /// Remove \p Predecessor from the predecessors of this block.
removePredecessor(VPBlockBase * Predecessor)415 void removePredecessor(VPBlockBase *Predecessor) {
416 auto Pos = std::find(Predecessors.begin(), Predecessors.end(), Predecessor);
417 assert(Pos && "Predecessor does not exist");
418 Predecessors.erase(Pos);
419 }
420
421 /// Remove \p Successor from the successors of this block.
removeSuccessor(VPBlockBase * Successor)422 void removeSuccessor(VPBlockBase *Successor) {
423 auto Pos = std::find(Successors.begin(), Successors.end(), Successor);
424 assert(Pos && "Successor does not exist");
425 Successors.erase(Pos);
426 }
427
428 protected:
VPBlockBase(const unsigned char SC,const std::string & N)429 VPBlockBase(const unsigned char SC, const std::string &N)
430 : SubclassID(SC), Name(N) {}
431
432 public:
433 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
434 /// that are actually instantiated. Values of this enumeration are kept in the
435 /// SubclassID field of the VPBlockBase objects. They are used for concrete
436 /// type identification.
437 using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
438
439 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
440
441 virtual ~VPBlockBase() = default;
442
getName()443 const std::string &getName() const { return Name; }
444
setName(const Twine & newName)445 void setName(const Twine &newName) { Name = newName.str(); }
446
447 /// \return an ID for the concrete type of this object.
448 /// This is used to implement the classof checks. This should not be used
449 /// for any other purpose, as the values may change as LLVM evolves.
getVPBlockID()450 unsigned getVPBlockID() const { return SubclassID; }
451
getParent()452 VPRegionBlock *getParent() { return Parent; }
getParent()453 const VPRegionBlock *getParent() const { return Parent; }
454
455 /// \return A pointer to the plan containing the current block.
456 VPlan *getPlan();
457 const VPlan *getPlan() const;
458
459 /// Sets the pointer of the plan containing the block. The block must be the
460 /// entry block into the VPlan.
461 void setPlan(VPlan *ParentPlan);
462
setParent(VPRegionBlock * P)463 void setParent(VPRegionBlock *P) { Parent = P; }
464
465 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
466 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
467 /// VPBlockBase is a VPBasicBlock, it is returned.
468 const VPBasicBlock *getEntryBasicBlock() const;
469 VPBasicBlock *getEntryBasicBlock();
470
471 /// \return the VPBasicBlock that is the exit of this VPBlockBase,
472 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
473 /// VPBlockBase is a VPBasicBlock, it is returned.
474 const VPBasicBlock *getExitBasicBlock() const;
475 VPBasicBlock *getExitBasicBlock();
476
getSuccessors()477 const VPBlocksTy &getSuccessors() const { return Successors; }
getSuccessors()478 VPBlocksTy &getSuccessors() { return Successors; }
479
getPredecessors()480 const VPBlocksTy &getPredecessors() const { return Predecessors; }
getPredecessors()481 VPBlocksTy &getPredecessors() { return Predecessors; }
482
483 /// \return the successor of this VPBlockBase if it has a single successor.
484 /// Otherwise return a null pointer.
getSingleSuccessor()485 VPBlockBase *getSingleSuccessor() const {
486 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
487 }
488
489 /// \return the predecessor of this VPBlockBase if it has a single
490 /// predecessor. Otherwise return a null pointer.
getSinglePredecessor()491 VPBlockBase *getSinglePredecessor() const {
492 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
493 }
494
getNumSuccessors()495 size_t getNumSuccessors() const { return Successors.size(); }
getNumPredecessors()496 size_t getNumPredecessors() const { return Predecessors.size(); }
497
498 /// An Enclosing Block of a block B is any block containing B, including B
499 /// itself. \return the closest enclosing block starting from "this", which
500 /// has successors. \return the root enclosing block if all enclosing blocks
501 /// have no successors.
502 VPBlockBase *getEnclosingBlockWithSuccessors();
503
504 /// \return the closest enclosing block starting from "this", which has
505 /// predecessors. \return the root enclosing block if all enclosing blocks
506 /// have no predecessors.
507 VPBlockBase *getEnclosingBlockWithPredecessors();
508
509 /// \return the successors either attached directly to this VPBlockBase or, if
510 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
511 /// successors of its own, search recursively for the first enclosing
512 /// VPRegionBlock that has successors and return them. If no such
513 /// VPRegionBlock exists, return the (empty) successors of the topmost
514 /// VPBlockBase reached.
getHierarchicalSuccessors()515 const VPBlocksTy &getHierarchicalSuccessors() {
516 return getEnclosingBlockWithSuccessors()->getSuccessors();
517 }
518
519 /// \return the hierarchical successor of this VPBlockBase if it has a single
520 /// hierarchical successor. Otherwise return a null pointer.
getSingleHierarchicalSuccessor()521 VPBlockBase *getSingleHierarchicalSuccessor() {
522 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
523 }
524
525 /// \return the predecessors either attached directly to this VPBlockBase or,
526 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
527 /// predecessors of its own, search recursively for the first enclosing
528 /// VPRegionBlock that has predecessors and return them. If no such
529 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
530 /// VPBlockBase reached.
getHierarchicalPredecessors()531 const VPBlocksTy &getHierarchicalPredecessors() {
532 return getEnclosingBlockWithPredecessors()->getPredecessors();
533 }
534
535 /// \return the hierarchical predecessor of this VPBlockBase if it has a
536 /// single hierarchical predecessor. Otherwise return a null pointer.
getSingleHierarchicalPredecessor()537 VPBlockBase *getSingleHierarchicalPredecessor() {
538 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
539 }
540
541 /// \return the condition bit selecting the successor.
getCondBit()542 VPValue *getCondBit() { return CondBit; }
543
getCondBit()544 const VPValue *getCondBit() const { return CondBit; }
545
setCondBit(VPValue * CV)546 void setCondBit(VPValue *CV) { CondBit = CV; }
547
getPredicate()548 VPValue *getPredicate() { return Predicate; }
549
getPredicate()550 const VPValue *getPredicate() const { return Predicate; }
551
setPredicate(VPValue * Pred)552 void setPredicate(VPValue *Pred) { Predicate = Pred; }
553
554 /// Set a given VPBlockBase \p Successor as the single successor of this
555 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
556 /// This VPBlockBase must have no successors.
setOneSuccessor(VPBlockBase * Successor)557 void setOneSuccessor(VPBlockBase *Successor) {
558 assert(Successors.empty() && "Setting one successor when others exist.");
559 appendSuccessor(Successor);
560 }
561
562 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
563 /// successors of this VPBlockBase. \p Condition is set as the successor
564 /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p
565 /// IfFalse. This VPBlockBase must have no successors.
setTwoSuccessors(VPBlockBase * IfTrue,VPBlockBase * IfFalse,VPValue * Condition)566 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
567 VPValue *Condition) {
568 assert(Successors.empty() && "Setting two successors when others exist.");
569 assert(Condition && "Setting two successors without condition!");
570 CondBit = Condition;
571 appendSuccessor(IfTrue);
572 appendSuccessor(IfFalse);
573 }
574
575 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
576 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
577 /// as successor of any VPBasicBlock in \p NewPreds.
setPredecessors(ArrayRef<VPBlockBase * > NewPreds)578 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
579 assert(Predecessors.empty() && "Block predecessors already set.");
580 for (auto *Pred : NewPreds)
581 appendPredecessor(Pred);
582 }
583
584 /// Remove all the predecessor of this block.
clearPredecessors()585 void clearPredecessors() { Predecessors.clear(); }
586
587 /// Remove all the successors of this block and set to null its condition bit
clearSuccessors()588 void clearSuccessors() {
589 Successors.clear();
590 CondBit = nullptr;
591 }
592
593 /// The method which generates the output IR that correspond to this
594 /// VPBlockBase, thereby "executing" the VPlan.
595 virtual void execute(struct VPTransformState *State) = 0;
596
597 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
598 static void deleteCFG(VPBlockBase *Entry);
599
printAsOperand(raw_ostream & OS,bool PrintType)600 void printAsOperand(raw_ostream &OS, bool PrintType) const {
601 OS << getName();
602 }
603
print(raw_ostream & OS)604 void print(raw_ostream &OS) const {
605 // TODO: Only printing VPBB name for now since we only have dot printing
606 // support for VPInstructions/Recipes.
607 printAsOperand(OS, false);
608 }
609
610 /// Return true if it is legal to hoist instructions into this block.
isLegalToHoistInto()611 bool isLegalToHoistInto() {
612 // There are currently no constraints that prevent an instruction to be
613 // hoisted into a VPBlockBase.
614 return true;
615 }
616
617 /// Replace all operands of VPUsers in the block with \p NewValue and also
618 /// replaces all uses of VPValues defined in the block with NewValue.
619 virtual void dropAllReferences(VPValue *NewValue) = 0;
620 };
621
622 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
623 /// instructions.
624 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
625 friend VPBasicBlock;
626 friend class VPBlockUtils;
627
628 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
629
630 /// Each VPRecipe belongs to a single VPBasicBlock.
631 VPBasicBlock *Parent = nullptr;
632
633 public:
634 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
635 /// that is actually instantiated. Values of this enumeration are kept in the
636 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
637 /// type identification.
638 using VPRecipeTy = enum {
639 VPBlendSC,
640 VPBranchOnMaskSC,
641 VPInstructionSC,
642 VPInterleaveSC,
643 VPPredInstPHISC,
644 VPReductionSC,
645 VPReplicateSC,
646 VPWidenCallSC,
647 VPWidenCanonicalIVSC,
648 VPWidenGEPSC,
649 VPWidenIntOrFpInductionSC,
650 VPWidenMemoryInstructionSC,
651 VPWidenPHISC,
652 VPWidenSC,
653 VPWidenSelectSC
654 };
655
VPRecipeBase(const unsigned char SC)656 VPRecipeBase(const unsigned char SC) : SubclassID(SC) {}
657 virtual ~VPRecipeBase() = default;
658
659 /// \return an ID for the concrete type of this object.
660 /// This is used to implement the classof checks. This should not be used
661 /// for any other purpose, as the values may change as LLVM evolves.
getVPRecipeID()662 unsigned getVPRecipeID() const { return SubclassID; }
663
664 /// \return the VPBasicBlock which this VPRecipe belongs to.
getParent()665 VPBasicBlock *getParent() { return Parent; }
getParent()666 const VPBasicBlock *getParent() const { return Parent; }
667
668 /// The method which generates the output IR instructions that correspond to
669 /// this VPRecipe, thereby "executing" the VPlan.
670 virtual void execute(struct VPTransformState &State) = 0;
671
672 /// Each recipe prints itself.
673 virtual void print(raw_ostream &O, const Twine &Indent,
674 VPSlotTracker &SlotTracker) const = 0;
675
676 /// Dump the recipe to stderr (for debugging).
677 void dump() const;
678
679 /// Insert an unlinked recipe into a basic block immediately before
680 /// the specified recipe.
681 void insertBefore(VPRecipeBase *InsertPos);
682
683 /// Insert an unlinked Recipe into a basic block immediately after
684 /// the specified Recipe.
685 void insertAfter(VPRecipeBase *InsertPos);
686
687 /// Unlink this recipe from its current VPBasicBlock and insert it into
688 /// the VPBasicBlock that MovePos lives in, right after MovePos.
689 void moveAfter(VPRecipeBase *MovePos);
690
691 /// This method unlinks 'this' from the containing basic block, but does not
692 /// delete it.
693 void removeFromParent();
694
695 /// This method unlinks 'this' from the containing basic block and deletes it.
696 ///
697 /// \returns an iterator pointing to the element after the erased one
698 iplist<VPRecipeBase>::iterator eraseFromParent();
699
700 /// Returns a pointer to a VPUser, if the recipe inherits from VPUser or
701 /// nullptr otherwise.
702 VPUser *toVPUser();
703
704 /// Returns a pointer to a VPValue, if the recipe inherits from VPValue or
705 /// nullptr otherwise.
706 VPValue *toVPValue();
707 const VPValue *toVPValue() const;
708
709 /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
710 /// otherwise.
getUnderlyingInstr()711 Instruction *getUnderlyingInstr() {
712 if (auto *VPV = toVPValue())
713 return cast_or_null<Instruction>(VPV->getUnderlyingValue());
714 return nullptr;
715 }
getUnderlyingInstr()716 const Instruction *getUnderlyingInstr() const {
717 if (auto *VPV = toVPValue())
718 return cast_or_null<Instruction>(VPV->getUnderlyingValue());
719 return nullptr;
720 }
721 };
722
classof(const VPRecipeBase * Recipe)723 inline bool VPUser::classof(const VPRecipeBase *Recipe) {
724 return Recipe->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
725 Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
726 Recipe->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
727 Recipe->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC ||
728 Recipe->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
729 Recipe->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
730 Recipe->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
731 Recipe->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
732 Recipe->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
733 Recipe->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC ||
734 Recipe->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
735 }
736
737 /// This is a concrete Recipe that models a single VPlan-level instruction.
738 /// While as any Recipe it may generate a sequence of IR instructions when
739 /// executed, these instructions would always form a single-def expression as
740 /// the VPInstruction is also a single def-use vertex.
741 class VPInstruction : public VPUser, public VPValue, public VPRecipeBase {
742 friend class VPlanSlp;
743
744 public:
745 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
746 enum {
747 Not = Instruction::OtherOpsEnd + 1,
748 ICmpULE,
749 SLPLoad,
750 SLPStore,
751 ActiveLaneMask,
752 };
753
754 private:
755 typedef unsigned char OpcodeTy;
756 OpcodeTy Opcode;
757
758 /// Utility method serving execute(): generates a single instance of the
759 /// modeled instruction.
760 void generateInstruction(VPTransformState &State, unsigned Part);
761
762 protected:
setUnderlyingInstr(Instruction * I)763 void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
764
765 public:
VPInstruction(unsigned Opcode,ArrayRef<VPValue * > Operands)766 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
767 : VPUser(Operands), VPValue(VPValue::VPVInstructionSC),
768 VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {}
769
VPInstruction(unsigned Opcode,std::initializer_list<VPValue * > Operands)770 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
771 : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {}
772
773 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPValue * V)774 static inline bool classof(const VPValue *V) {
775 return V->getVPValueID() == VPValue::VPVInstructionSC;
776 }
777
clone()778 VPInstruction *clone() const {
779 SmallVector<VPValue *, 2> Operands(operands());
780 return new VPInstruction(Opcode, Operands);
781 }
782
783 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * R)784 static inline bool classof(const VPRecipeBase *R) {
785 return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
786 }
787
getOpcode()788 unsigned getOpcode() const { return Opcode; }
789
790 /// Generate the instruction.
791 /// TODO: We currently execute only per-part unless a specific instance is
792 /// provided.
793 void execute(VPTransformState &State) override;
794
795 /// Print the Recipe.
796 void print(raw_ostream &O, const Twine &Indent,
797 VPSlotTracker &SlotTracker) const override;
798
799 /// Print the VPInstruction.
800 void print(raw_ostream &O) const;
801 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
802
803 /// Return true if this instruction may modify memory.
mayWriteToMemory()804 bool mayWriteToMemory() const {
805 // TODO: we can use attributes of the called function to rule out memory
806 // modifications.
807 return Opcode == Instruction::Store || Opcode == Instruction::Call ||
808 Opcode == Instruction::Invoke || Opcode == SLPStore;
809 }
810
hasResult()811 bool hasResult() const {
812 // CallInst may or may not have a result, depending on the called function.
813 // Conservatively return calls have results for now.
814 switch (getOpcode()) {
815 case Instruction::Ret:
816 case Instruction::Br:
817 case Instruction::Store:
818 case Instruction::Switch:
819 case Instruction::IndirectBr:
820 case Instruction::Resume:
821 case Instruction::CatchRet:
822 case Instruction::Unreachable:
823 case Instruction::Fence:
824 case Instruction::AtomicRMW:
825 return false;
826 default:
827 return true;
828 }
829 }
830 };
831
832 /// VPWidenRecipe is a recipe for producing a copy of vector type its
833 /// ingredient. This recipe covers most of the traditional vectorization cases
834 /// where each ingredient transforms into a vectorized version of itself.
835 class VPWidenRecipe : public VPRecipeBase, public VPValue, public VPUser {
836 public:
837 template <typename IterT>
VPWidenRecipe(Instruction & I,iterator_range<IterT> Operands)838 VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
839 : VPRecipeBase(VPRecipeBase::VPWidenSC), VPValue(VPValue::VPVWidenSC, &I),
840 VPUser(Operands) {}
841
842 ~VPWidenRecipe() override = default;
843
844 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)845 static inline bool classof(const VPRecipeBase *V) {
846 return V->getVPRecipeID() == VPRecipeBase::VPWidenSC;
847 }
classof(const VPValue * V)848 static inline bool classof(const VPValue *V) {
849 return V->getVPValueID() == VPValue::VPVWidenSC;
850 }
851
852 /// Produce widened copies of all Ingredients.
853 void execute(VPTransformState &State) override;
854
855 /// Print the recipe.
856 void print(raw_ostream &O, const Twine &Indent,
857 VPSlotTracker &SlotTracker) const override;
858 };
859
860 /// A recipe for widening Call instructions.
861 class VPWidenCallRecipe : public VPRecipeBase, public VPValue, public VPUser {
862
863 public:
864 template <typename IterT>
VPWidenCallRecipe(CallInst & I,iterator_range<IterT> CallArguments)865 VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments)
866 : VPRecipeBase(VPRecipeBase::VPWidenCallSC),
867 VPValue(VPValue::VPVWidenCallSC, &I), VPUser(CallArguments) {}
868
869 ~VPWidenCallRecipe() override = default;
870
871 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)872 static inline bool classof(const VPRecipeBase *V) {
873 return V->getVPRecipeID() == VPRecipeBase::VPWidenCallSC;
874 }
875
876 /// Produce a widened version of the call instruction.
877 void execute(VPTransformState &State) override;
878
879 /// Print the recipe.
880 void print(raw_ostream &O, const Twine &Indent,
881 VPSlotTracker &SlotTracker) const override;
882 };
883
884 /// A recipe for widening select instructions.
885 class VPWidenSelectRecipe : public VPRecipeBase, public VPValue, public VPUser {
886
887 /// Is the condition of the select loop invariant?
888 bool InvariantCond;
889
890 public:
891 template <typename IterT>
VPWidenSelectRecipe(SelectInst & I,iterator_range<IterT> Operands,bool InvariantCond)892 VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands,
893 bool InvariantCond)
894 : VPRecipeBase(VPRecipeBase::VPWidenSelectSC),
895 VPValue(VPValue::VPVWidenSelectSC, &I), VPUser(Operands),
896 InvariantCond(InvariantCond) {}
897
898 ~VPWidenSelectRecipe() override = default;
899
900 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)901 static inline bool classof(const VPRecipeBase *V) {
902 return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC;
903 }
904
905 /// Produce a widened version of the select instruction.
906 void execute(VPTransformState &State) override;
907
908 /// Print the recipe.
909 void print(raw_ostream &O, const Twine &Indent,
910 VPSlotTracker &SlotTracker) const override;
911 };
912
913 /// A recipe for handling GEP instructions.
914 class VPWidenGEPRecipe : public VPRecipeBase, public VPValue, public VPUser {
915 bool IsPtrLoopInvariant;
916 SmallBitVector IsIndexLoopInvariant;
917
918 public:
919 template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst * GEP,iterator_range<IterT> Operands)920 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
921 : VPRecipeBase(VPRecipeBase::VPWidenGEPSC), VPValue(VPWidenGEPSC, GEP),
922 VPUser(Operands), IsIndexLoopInvariant(GEP->getNumIndices(), false) {}
923
924 template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst * GEP,iterator_range<IterT> Operands,Loop * OrigLoop)925 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands,
926 Loop *OrigLoop)
927 : VPRecipeBase(VPRecipeBase::VPWidenGEPSC),
928 VPValue(VPValue::VPVWidenGEPSC, GEP), VPUser(Operands),
929 IsIndexLoopInvariant(GEP->getNumIndices(), false) {
930 IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
931 for (auto Index : enumerate(GEP->indices()))
932 IsIndexLoopInvariant[Index.index()] =
933 OrigLoop->isLoopInvariant(Index.value().get());
934 }
935 ~VPWidenGEPRecipe() override = default;
936
937 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)938 static inline bool classof(const VPRecipeBase *V) {
939 return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC;
940 }
941
942 /// Generate the gep nodes.
943 void execute(VPTransformState &State) override;
944
945 /// Print the recipe.
946 void print(raw_ostream &O, const Twine &Indent,
947 VPSlotTracker &SlotTracker) const override;
948 };
949
950 /// A recipe for handling phi nodes of integer and floating-point inductions,
951 /// producing their vector and scalar values.
952 class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
953 PHINode *IV;
954 TruncInst *Trunc;
955
956 public:
957 VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr)
VPRecipeBase(VPWidenIntOrFpInductionSC)958 : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {}
959 ~VPWidenIntOrFpInductionRecipe() override = default;
960
961 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)962 static inline bool classof(const VPRecipeBase *V) {
963 return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC;
964 }
965
966 /// Generate the vectorized and scalarized versions of the phi node as
967 /// needed by their users.
968 void execute(VPTransformState &State) override;
969
970 /// Print the recipe.
971 void print(raw_ostream &O, const Twine &Indent,
972 VPSlotTracker &SlotTracker) const override;
973 };
974
975 /// A recipe for handling all phi nodes except for integer and FP inductions.
976 class VPWidenPHIRecipe : public VPRecipeBase {
977 PHINode *Phi;
978
979 public:
VPWidenPHIRecipe(PHINode * Phi)980 VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {}
981 ~VPWidenPHIRecipe() override = default;
982
983 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)984 static inline bool classof(const VPRecipeBase *V) {
985 return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC;
986 }
987
988 /// Generate the phi/select nodes.
989 void execute(VPTransformState &State) override;
990
991 /// Print the recipe.
992 void print(raw_ostream &O, const Twine &Indent,
993 VPSlotTracker &SlotTracker) const override;
994 };
995
996 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
997 /// instructions.
998 class VPBlendRecipe : public VPRecipeBase, public VPUser {
999 PHINode *Phi;
1000
1001 public:
1002 /// The blend operation is a User of the incoming values and of their
1003 /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1004 /// might be incoming with a full mask for which there is no VPValue.
VPBlendRecipe(PHINode * Phi,ArrayRef<VPValue * > Operands)1005 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
1006 : VPRecipeBase(VPBlendSC), VPUser(Operands), Phi(Phi) {
1007 assert(Operands.size() > 0 &&
1008 ((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
1009 "Expected either a single incoming value or a positive even number "
1010 "of operands");
1011 }
1012
1013 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1014 static inline bool classof(const VPRecipeBase *V) {
1015 return V->getVPRecipeID() == VPRecipeBase::VPBlendSC;
1016 }
1017
1018 /// Return the number of incoming values, taking into account that a single
1019 /// incoming value has no mask.
getNumIncomingValues()1020 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1021
1022 /// Return incoming value number \p Idx.
getIncomingValue(unsigned Idx)1023 VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); }
1024
1025 /// Return mask number \p Idx.
getMask(unsigned Idx)1026 VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); }
1027
1028 /// Generate the phi/select nodes.
1029 void execute(VPTransformState &State) override;
1030
1031 /// Print the recipe.
1032 void print(raw_ostream &O, const Twine &Indent,
1033 VPSlotTracker &SlotTracker) const override;
1034 };
1035
1036 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1037 /// or stores into one wide load/store and shuffles. The first operand of a
1038 /// VPInterleave recipe is the address, followed by the stored values, followed
1039 /// by an optional mask.
1040 class VPInterleaveRecipe : public VPRecipeBase, public VPUser {
1041 const InterleaveGroup<Instruction> *IG;
1042
1043 bool HasMask = false;
1044
1045 public:
VPInterleaveRecipe(const InterleaveGroup<Instruction> * IG,VPValue * Addr,ArrayRef<VPValue * > StoredValues,VPValue * Mask)1046 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
1047 ArrayRef<VPValue *> StoredValues, VPValue *Mask)
1048 : VPRecipeBase(VPInterleaveSC), VPUser({Addr}), IG(IG) {
1049 for (auto *SV : StoredValues)
1050 addOperand(SV);
1051 if (Mask) {
1052 HasMask = true;
1053 addOperand(Mask);
1054 }
1055 }
1056 ~VPInterleaveRecipe() override = default;
1057
1058 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1059 static inline bool classof(const VPRecipeBase *V) {
1060 return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC;
1061 }
1062
1063 /// Return the address accessed by this recipe.
getAddr()1064 VPValue *getAddr() const {
1065 return getOperand(0); // Address is the 1st, mandatory operand.
1066 }
1067
1068 /// Return the mask used by this recipe. Note that a full mask is represented
1069 /// by a nullptr.
getMask()1070 VPValue *getMask() const {
1071 // Mask is optional and therefore the last, currently 2nd operand.
1072 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
1073 }
1074
1075 /// Return the VPValues stored by this interleave group. If it is a load
1076 /// interleave group, return an empty ArrayRef.
getStoredValues()1077 ArrayRef<VPValue *> getStoredValues() const {
1078 // The first operand is the address, followed by the stored values, followed
1079 // by an optional mask.
1080 return ArrayRef<VPValue *>(op_begin(), getNumOperands())
1081 .slice(1, getNumOperands() - (HasMask ? 2 : 1));
1082 }
1083
1084 /// Generate the wide load or store, and shuffles.
1085 void execute(VPTransformState &State) override;
1086
1087 /// Print the recipe.
1088 void print(raw_ostream &O, const Twine &Indent,
1089 VPSlotTracker &SlotTracker) const override;
1090
getInterleaveGroup()1091 const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
1092 };
1093
1094 /// A recipe to represent inloop reduction operations, performing a reduction on
1095 /// a vector operand into a scalar value, and adding the result to a chain.
1096 /// The Operands are {ChainOp, VecOp, [Condition]}.
1097 class VPReductionRecipe : public VPRecipeBase, public VPValue, public VPUser {
1098 /// The recurrence decriptor for the reduction in question.
1099 RecurrenceDescriptor *RdxDesc;
1100 /// Fast math flags to use for the resulting reduction operation.
1101 bool NoNaN;
1102 /// Pointer to the TTI, needed to create the target reduction
1103 const TargetTransformInfo *TTI;
1104
1105 public:
VPReductionRecipe(RecurrenceDescriptor * R,Instruction * I,VPValue * ChainOp,VPValue * VecOp,VPValue * CondOp,bool NoNaN,const TargetTransformInfo * TTI)1106 VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp,
1107 VPValue *VecOp, VPValue *CondOp, bool NoNaN,
1108 const TargetTransformInfo *TTI)
1109 : VPRecipeBase(VPRecipeBase::VPReductionSC),
1110 VPValue(VPValue::VPVReductionSC, I), VPUser({ChainOp, VecOp}),
1111 RdxDesc(R), NoNaN(NoNaN), TTI(TTI) {
1112 if (CondOp)
1113 addOperand(CondOp);
1114 }
1115
1116 ~VPReductionRecipe() override = default;
1117
1118 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPValue * V)1119 static inline bool classof(const VPValue *V) {
1120 return V->getVPValueID() == VPValue::VPVReductionSC;
1121 }
classof(const VPRecipeBase * V)1122 static inline bool classof(const VPRecipeBase *V) {
1123 return V->getVPRecipeID() == VPRecipeBase::VPReductionSC;
1124 }
1125
1126 /// Generate the reduction in the loop
1127 void execute(VPTransformState &State) override;
1128
1129 /// Print the recipe.
1130 void print(raw_ostream &O, const Twine &Indent,
1131 VPSlotTracker &SlotTracker) const override;
1132
1133 /// The VPValue of the scalar Chain being accumulated.
getChainOp()1134 VPValue *getChainOp() const { return getOperand(0); }
1135 /// The VPValue of the vector value to be reduced.
getVecOp()1136 VPValue *getVecOp() const { return getOperand(1); }
1137 /// The VPValue of the condition for the block.
getCondOp()1138 VPValue *getCondOp() const {
1139 return getNumOperands() > 2 ? getOperand(2) : nullptr;
1140 }
1141 };
1142
1143 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
1144 /// copies of the original scalar type, one per lane, instead of producing a
1145 /// single copy of widened type for all lanes. If the instruction is known to be
1146 /// uniform only one copy, per lane zero, will be generated.
1147 class VPReplicateRecipe : public VPRecipeBase, public VPUser, public VPValue {
1148 /// Indicator if only a single replica per lane is needed.
1149 bool IsUniform;
1150
1151 /// Indicator if the replicas are also predicated.
1152 bool IsPredicated;
1153
1154 /// Indicator if the scalar values should also be packed into a vector.
1155 bool AlsoPack;
1156
1157 public:
1158 template <typename IterT>
1159 VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
1160 bool IsUniform, bool IsPredicated = false)
VPRecipeBase(VPReplicateSC)1161 : VPRecipeBase(VPReplicateSC), VPUser(Operands),
1162 VPValue(VPVReplicateSC, I), IsUniform(IsUniform),
1163 IsPredicated(IsPredicated) {
1164 // Retain the previous behavior of predicateInstructions(), where an
1165 // insert-element of a predicated instruction got hoisted into the
1166 // predicated basic block iff it was its only user. This is achieved by
1167 // having predicated instructions also pack their values into a vector by
1168 // default unless they have a replicated user which uses their scalar value.
1169 AlsoPack = IsPredicated && !I->use_empty();
1170 }
1171
1172 ~VPReplicateRecipe() override = default;
1173
1174 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1175 static inline bool classof(const VPRecipeBase *V) {
1176 return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC;
1177 }
1178
classof(const VPValue * V)1179 static inline bool classof(const VPValue *V) {
1180 return V->getVPValueID() == VPValue::VPVReplicateSC;
1181 }
1182
1183 /// Generate replicas of the desired Ingredient. Replicas will be generated
1184 /// for all parts and lanes unless a specific part and lane are specified in
1185 /// the \p State.
1186 void execute(VPTransformState &State) override;
1187
setAlsoPack(bool Pack)1188 void setAlsoPack(bool Pack) { AlsoPack = Pack; }
1189
1190 /// Print the recipe.
1191 void print(raw_ostream &O, const Twine &Indent,
1192 VPSlotTracker &SlotTracker) const override;
1193
isUniform()1194 bool isUniform() const { return IsUniform; }
1195 };
1196
1197 /// A recipe for generating conditional branches on the bits of a mask.
1198 class VPBranchOnMaskRecipe : public VPRecipeBase, public VPUser {
1199 public:
VPBranchOnMaskRecipe(VPValue * BlockInMask)1200 VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) {
1201 if (BlockInMask) // nullptr means all-one mask.
1202 addOperand(BlockInMask);
1203 }
1204
1205 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1206 static inline bool classof(const VPRecipeBase *V) {
1207 return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC;
1208 }
1209
1210 /// Generate the extraction of the appropriate bit from the block mask and the
1211 /// conditional branch.
1212 void execute(VPTransformState &State) override;
1213
1214 /// Print the recipe.
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker)1215 void print(raw_ostream &O, const Twine &Indent,
1216 VPSlotTracker &SlotTracker) const override {
1217 O << " +\n" << Indent << "\"BRANCH-ON-MASK ";
1218 if (VPValue *Mask = getMask())
1219 Mask->print(O, SlotTracker);
1220 else
1221 O << " All-One";
1222 O << "\\l\"";
1223 }
1224
1225 /// Return the mask used by this recipe. Note that a full mask is represented
1226 /// by a nullptr.
getMask()1227 VPValue *getMask() const {
1228 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
1229 // Mask is optional.
1230 return getNumOperands() == 1 ? getOperand(0) : nullptr;
1231 }
1232 };
1233
1234 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
1235 /// control converges back from a Branch-on-Mask. The phi nodes are needed in
1236 /// order to merge values that are set under such a branch and feed their uses.
1237 /// The phi nodes can be scalar or vector depending on the users of the value.
1238 /// This recipe works in concert with VPBranchOnMaskRecipe.
1239 class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser {
1240
1241 public:
1242 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
1243 /// nodes after merging back from a Branch-on-Mask.
VPPredInstPHIRecipe(VPValue * PredV)1244 VPPredInstPHIRecipe(VPValue *PredV)
1245 : VPRecipeBase(VPPredInstPHISC), VPUser(PredV) {}
1246 ~VPPredInstPHIRecipe() override = default;
1247
1248 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1249 static inline bool classof(const VPRecipeBase *V) {
1250 return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC;
1251 }
1252
1253 /// Generates phi nodes for live-outs as needed to retain SSA form.
1254 void execute(VPTransformState &State) override;
1255
1256 /// Print the recipe.
1257 void print(raw_ostream &O, const Twine &Indent,
1258 VPSlotTracker &SlotTracker) const override;
1259 };
1260
1261 /// A Recipe for widening load/store operations.
1262 /// The recipe uses the following VPValues:
1263 /// - For load: Address, optional mask
1264 /// - For store: Address, stored value, optional mask
1265 /// TODO: We currently execute only per-part unless a specific instance is
1266 /// provided.
1267 class VPWidenMemoryInstructionRecipe : public VPRecipeBase,
1268 public VPValue,
1269 public VPUser {
1270
setMask(VPValue * Mask)1271 void setMask(VPValue *Mask) {
1272 if (!Mask)
1273 return;
1274 addOperand(Mask);
1275 }
1276
isMasked()1277 bool isMasked() const {
1278 return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
1279 }
1280
1281 public:
VPWidenMemoryInstructionRecipe(LoadInst & Load,VPValue * Addr,VPValue * Mask)1282 VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
1283 : VPRecipeBase(VPWidenMemoryInstructionSC),
1284 VPValue(VPValue::VPVMemoryInstructionSC, &Load), VPUser({Addr}) {
1285 setMask(Mask);
1286 }
1287
VPWidenMemoryInstructionRecipe(StoreInst & Store,VPValue * Addr,VPValue * StoredValue,VPValue * Mask)1288 VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
1289 VPValue *StoredValue, VPValue *Mask)
1290 : VPRecipeBase(VPWidenMemoryInstructionSC),
1291 VPValue(VPValue::VPVMemoryInstructionSC, &Store),
1292 VPUser({Addr, StoredValue}) {
1293 setMask(Mask);
1294 }
1295
1296 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1297 static inline bool classof(const VPRecipeBase *V) {
1298 return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
1299 }
1300
1301 /// Return the address accessed by this recipe.
getAddr()1302 VPValue *getAddr() const {
1303 return getOperand(0); // Address is the 1st, mandatory operand.
1304 }
1305
1306 /// Return the mask used by this recipe. Note that a full mask is represented
1307 /// by a nullptr.
getMask()1308 VPValue *getMask() const {
1309 // Mask is optional and therefore the last operand.
1310 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1311 }
1312
1313 /// Returns true if this recipe is a store.
isStore()1314 bool isStore() const { return isa<StoreInst>(getUnderlyingInstr()); }
1315
1316 /// Return the address accessed by this recipe.
getStoredValue()1317 VPValue *getStoredValue() const {
1318 assert(isStore() && "Stored value only available for store instructions");
1319 return getOperand(1); // Stored value is the 2nd, mandatory operand.
1320 }
1321
1322 /// Generate the wide load/store.
1323 void execute(VPTransformState &State) override;
1324
1325 /// Print the recipe.
1326 void print(raw_ostream &O, const Twine &Indent,
1327 VPSlotTracker &SlotTracker) const override;
1328 };
1329
1330 /// A Recipe for widening the canonical induction variable of the vector loop.
1331 class VPWidenCanonicalIVRecipe : public VPRecipeBase {
1332 /// A VPValue representing the canonical vector IV.
1333 VPValue Val;
1334
1335 public:
VPWidenCanonicalIVRecipe()1336 VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) {}
1337 ~VPWidenCanonicalIVRecipe() override = default;
1338
1339 /// Return the VPValue representing the canonical vector induction variable of
1340 /// the vector loop.
getVPValue()1341 const VPValue *getVPValue() const { return &Val; }
getVPValue()1342 VPValue *getVPValue() { return &Val; }
1343
1344 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPRecipeBase * V)1345 static inline bool classof(const VPRecipeBase *V) {
1346 return V->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
1347 }
1348
1349 /// Generate a canonical vector induction variable of the vector loop, with
1350 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
1351 /// step = <VF*UF, VF*UF, ..., VF*UF>.
1352 void execute(VPTransformState &State) override;
1353
1354 /// Print the recipe.
1355 void print(raw_ostream &O, const Twine &Indent,
1356 VPSlotTracker &SlotTracker) const override;
1357 };
1358
1359 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
1360 /// holds a sequence of zero or more VPRecipe's each representing a sequence of
1361 /// output IR instructions.
1362 class VPBasicBlock : public VPBlockBase {
1363 public:
1364 using RecipeListTy = iplist<VPRecipeBase>;
1365
1366 private:
1367 /// The VPRecipes held in the order of output instructions to generate.
1368 RecipeListTy Recipes;
1369
1370 public:
1371 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
1372 : VPBlockBase(VPBasicBlockSC, Name.str()) {
1373 if (Recipe)
1374 appendRecipe(Recipe);
1375 }
1376
~VPBasicBlock()1377 ~VPBasicBlock() override { Recipes.clear(); }
1378
1379 /// Instruction iterators...
1380 using iterator = RecipeListTy::iterator;
1381 using const_iterator = RecipeListTy::const_iterator;
1382 using reverse_iterator = RecipeListTy::reverse_iterator;
1383 using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
1384
1385 //===--------------------------------------------------------------------===//
1386 /// Recipe iterator methods
1387 ///
begin()1388 inline iterator begin() { return Recipes.begin(); }
begin()1389 inline const_iterator begin() const { return Recipes.begin(); }
end()1390 inline iterator end() { return Recipes.end(); }
end()1391 inline const_iterator end() const { return Recipes.end(); }
1392
rbegin()1393 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
rbegin()1394 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
rend()1395 inline reverse_iterator rend() { return Recipes.rend(); }
rend()1396 inline const_reverse_iterator rend() const { return Recipes.rend(); }
1397
size()1398 inline size_t size() const { return Recipes.size(); }
empty()1399 inline bool empty() const { return Recipes.empty(); }
front()1400 inline const VPRecipeBase &front() const { return Recipes.front(); }
front()1401 inline VPRecipeBase &front() { return Recipes.front(); }
back()1402 inline const VPRecipeBase &back() const { return Recipes.back(); }
back()1403 inline VPRecipeBase &back() { return Recipes.back(); }
1404
1405 /// Returns a reference to the list of recipes.
getRecipeList()1406 RecipeListTy &getRecipeList() { return Recipes; }
1407
1408 /// Returns a pointer to a member of the recipe list.
getSublistAccess(VPRecipeBase *)1409 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
1410 return &VPBasicBlock::Recipes;
1411 }
1412
1413 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPBlockBase * V)1414 static inline bool classof(const VPBlockBase *V) {
1415 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
1416 }
1417
insert(VPRecipeBase * Recipe,iterator InsertPt)1418 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
1419 assert(Recipe && "No recipe to append.");
1420 assert(!Recipe->Parent && "Recipe already in VPlan");
1421 Recipe->Parent = this;
1422 Recipes.insert(InsertPt, Recipe);
1423 }
1424
1425 /// Augment the existing recipes of a VPBasicBlock with an additional
1426 /// \p Recipe as the last recipe.
appendRecipe(VPRecipeBase * Recipe)1427 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
1428
1429 /// The method which generates the output IR instructions that correspond to
1430 /// this VPBasicBlock, thereby "executing" the VPlan.
1431 void execute(struct VPTransformState *State) override;
1432
1433 /// Return the position of the first non-phi node recipe in the block.
1434 iterator getFirstNonPhi();
1435
1436 void dropAllReferences(VPValue *NewValue) override;
1437
1438 private:
1439 /// Create an IR BasicBlock to hold the output instructions generated by this
1440 /// VPBasicBlock, and return it. Update the CFGState accordingly.
1441 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
1442 };
1443
1444 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
1445 /// which form a Single-Entry-Single-Exit subgraph of the output IR CFG.
1446 /// A VPRegionBlock may indicate that its contents are to be replicated several
1447 /// times. This is designed to support predicated scalarization, in which a
1448 /// scalar if-then code structure needs to be generated VF * UF times. Having
1449 /// this replication indicator helps to keep a single model for multiple
1450 /// candidate VF's. The actual replication takes place only once the desired VF
1451 /// and UF have been determined.
1452 class VPRegionBlock : public VPBlockBase {
1453 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
1454 VPBlockBase *Entry;
1455
1456 /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock.
1457 VPBlockBase *Exit;
1458
1459 /// An indicator whether this region is to generate multiple replicated
1460 /// instances of output IR corresponding to its VPBlockBases.
1461 bool IsReplicator;
1462
1463 public:
1464 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit,
1465 const std::string &Name = "", bool IsReplicator = false)
VPBlockBase(VPRegionBlockSC,Name)1466 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit),
1467 IsReplicator(IsReplicator) {
1468 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
1469 assert(Exit->getSuccessors().empty() && "Exit block has successors.");
1470 Entry->setParent(this);
1471 Exit->setParent(this);
1472 }
1473 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
VPBlockBase(VPRegionBlockSC,Name)1474 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr),
1475 IsReplicator(IsReplicator) {}
1476
~VPRegionBlock()1477 ~VPRegionBlock() override {
1478 if (Entry) {
1479 VPValue DummyValue;
1480 Entry->dropAllReferences(&DummyValue);
1481 deleteCFG(Entry);
1482 }
1483 }
1484
1485 /// Method to support type inquiry through isa, cast, and dyn_cast.
classof(const VPBlockBase * V)1486 static inline bool classof(const VPBlockBase *V) {
1487 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
1488 }
1489
getEntry()1490 const VPBlockBase *getEntry() const { return Entry; }
getEntry()1491 VPBlockBase *getEntry() { return Entry; }
1492
1493 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
1494 /// EntryBlock must have no predecessors.
setEntry(VPBlockBase * EntryBlock)1495 void setEntry(VPBlockBase *EntryBlock) {
1496 assert(EntryBlock->getPredecessors().empty() &&
1497 "Entry block cannot have predecessors.");
1498 Entry = EntryBlock;
1499 EntryBlock->setParent(this);
1500 }
1501
1502 // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
1503 // specific interface of llvm::Function, instead of using
1504 // GraphTraints::getEntryNode. We should add a new template parameter to
1505 // DominatorTreeBase representing the Graph type.
front()1506 VPBlockBase &front() const { return *Entry; }
1507
getExit()1508 const VPBlockBase *getExit() const { return Exit; }
getExit()1509 VPBlockBase *getExit() { return Exit; }
1510
1511 /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p
1512 /// ExitBlock must have no successors.
setExit(VPBlockBase * ExitBlock)1513 void setExit(VPBlockBase *ExitBlock) {
1514 assert(ExitBlock->getSuccessors().empty() &&
1515 "Exit block cannot have successors.");
1516 Exit = ExitBlock;
1517 ExitBlock->setParent(this);
1518 }
1519
1520 /// An indicator whether this region is to generate multiple replicated
1521 /// instances of output IR corresponding to its VPBlockBases.
isReplicator()1522 bool isReplicator() const { return IsReplicator; }
1523
1524 /// The method which generates the output IR instructions that correspond to
1525 /// this VPRegionBlock, thereby "executing" the VPlan.
1526 void execute(struct VPTransformState *State) override;
1527
1528 void dropAllReferences(VPValue *NewValue) override;
1529 };
1530
1531 //===----------------------------------------------------------------------===//
1532 // GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs //
1533 //===----------------------------------------------------------------------===//
1534
1535 // The following set of template specializations implement GraphTraits to treat
1536 // any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
1537 // that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
1538 // VPBlockBase is a VPRegionBlock, this specialization provides access to its
1539 // successors/predecessors but not to the blocks inside the region.
1540
1541 template <> struct GraphTraits<VPBlockBase *> {
1542 using NodeRef = VPBlockBase *;
1543 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1544
1545 static NodeRef getEntryNode(NodeRef N) { return N; }
1546
1547 static inline ChildIteratorType child_begin(NodeRef N) {
1548 return N->getSuccessors().begin();
1549 }
1550
1551 static inline ChildIteratorType child_end(NodeRef N) {
1552 return N->getSuccessors().end();
1553 }
1554 };
1555
1556 template <> struct GraphTraits<const VPBlockBase *> {
1557 using NodeRef = const VPBlockBase *;
1558 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator;
1559
1560 static NodeRef getEntryNode(NodeRef N) { return N; }
1561
1562 static inline ChildIteratorType child_begin(NodeRef N) {
1563 return N->getSuccessors().begin();
1564 }
1565
1566 static inline ChildIteratorType child_end(NodeRef N) {
1567 return N->getSuccessors().end();
1568 }
1569 };
1570
1571 // Inverse order specialization for VPBasicBlocks. Predecessors are used instead
1572 // of successors for the inverse traversal.
1573 template <> struct GraphTraits<Inverse<VPBlockBase *>> {
1574 using NodeRef = VPBlockBase *;
1575 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
1576
1577 static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
1578
1579 static inline ChildIteratorType child_begin(NodeRef N) {
1580 return N->getPredecessors().begin();
1581 }
1582
1583 static inline ChildIteratorType child_end(NodeRef N) {
1584 return N->getPredecessors().end();
1585 }
1586 };
1587
1588 // The following set of template specializations implement GraphTraits to
1589 // treat VPRegionBlock as a graph and recurse inside its nodes. It's important
1590 // to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
1591 // (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
1592 // there won't be automatic recursion into other VPBlockBases that turn to be
1593 // VPRegionBlocks.
1594
1595 template <>
1596 struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
1597 using GraphRef = VPRegionBlock *;
1598 using nodes_iterator = df_iterator<NodeRef>;
1599
1600 static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1601
1602 static nodes_iterator nodes_begin(GraphRef N) {
1603 return nodes_iterator::begin(N->getEntry());
1604 }
1605
1606 static nodes_iterator nodes_end(GraphRef N) {
1607 // df_iterator::end() returns an empty iterator so the node used doesn't
1608 // matter.
1609 return nodes_iterator::end(N);
1610 }
1611 };
1612
1613 template <>
1614 struct GraphTraits<const VPRegionBlock *>
1615 : public GraphTraits<const VPBlockBase *> {
1616 using GraphRef = const VPRegionBlock *;
1617 using nodes_iterator = df_iterator<NodeRef>;
1618
1619 static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
1620
1621 static nodes_iterator nodes_begin(GraphRef N) {
1622 return nodes_iterator::begin(N->getEntry());
1623 }
1624
1625 static nodes_iterator nodes_end(GraphRef N) {
1626 // df_iterator::end() returns an empty iterator so the node used doesn't
1627 // matter.
1628 return nodes_iterator::end(N);
1629 }
1630 };
1631
1632 template <>
1633 struct GraphTraits<Inverse<VPRegionBlock *>>
1634 : public GraphTraits<Inverse<VPBlockBase *>> {
1635 using GraphRef = VPRegionBlock *;
1636 using nodes_iterator = df_iterator<NodeRef>;
1637
1638 static NodeRef getEntryNode(Inverse<GraphRef> N) {
1639 return N.Graph->getExit();
1640 }
1641
1642 static nodes_iterator nodes_begin(GraphRef N) {
1643 return nodes_iterator::begin(N->getExit());
1644 }
1645
1646 static nodes_iterator nodes_end(GraphRef N) {
1647 // df_iterator::end() returns an empty iterator so the node used doesn't
1648 // matter.
1649 return nodes_iterator::end(N);
1650 }
1651 };
1652
1653 /// VPlan models a candidate for vectorization, encoding various decisions take
1654 /// to produce efficient output IR, including which branches, basic-blocks and
1655 /// output IR instructions to generate, and their cost. VPlan holds a
1656 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
1657 /// VPBlock.
1658 class VPlan {
1659 friend class VPlanPrinter;
1660 friend class VPSlotTracker;
1661
1662 /// Hold the single entry to the Hierarchical CFG of the VPlan.
1663 VPBlockBase *Entry;
1664
1665 /// Holds the VFs applicable to this VPlan.
1666 SmallSetVector<ElementCount, 2> VFs;
1667
1668 /// Holds the name of the VPlan, for printing.
1669 std::string Name;
1670
1671 /// Holds all the external definitions created for this VPlan.
1672 // TODO: Introduce a specific representation for external definitions in
1673 // VPlan. External definitions must be immutable and hold a pointer to its
1674 // underlying IR that will be used to implement its structural comparison
1675 // (operators '==' and '<').
1676 SmallPtrSet<VPValue *, 16> VPExternalDefs;
1677
1678 /// Represents the backedge taken count of the original loop, for folding
1679 /// the tail.
1680 VPValue *BackedgeTakenCount = nullptr;
1681
1682 /// Holds a mapping between Values and their corresponding VPValue inside
1683 /// VPlan.
1684 Value2VPValueTy Value2VPValue;
1685
1686 /// Contains all VPValues that been allocated by addVPValue directly and need
1687 /// to be free when the plan's destructor is called.
1688 SmallVector<VPValue *, 16> VPValuesToFree;
1689
1690 /// Holds the VPLoopInfo analysis for this VPlan.
1691 VPLoopInfo VPLInfo;
1692
1693 /// Holds the condition bit values built during VPInstruction to VPRecipe transformation.
1694 SmallVector<VPValue *, 4> VPCBVs;
1695
1696 public:
1697 VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
1698 if (Entry)
1699 Entry->setPlan(this);
1700 }
1701
1702 ~VPlan() {
1703 if (Entry) {
1704 VPValue DummyValue;
1705 for (VPBlockBase *Block : depth_first(Entry))
1706 Block->dropAllReferences(&DummyValue);
1707
1708 VPBlockBase::deleteCFG(Entry);
1709 }
1710 for (VPValue *VPV : VPValuesToFree)
1711 delete VPV;
1712 if (BackedgeTakenCount)
1713 delete BackedgeTakenCount;
1714 for (VPValue *Def : VPExternalDefs)
1715 delete Def;
1716 for (VPValue *CBV : VPCBVs)
1717 delete CBV;
1718 }
1719
1720 /// Generate the IR code for this VPlan.
1721 void execute(struct VPTransformState *State);
1722
1723 VPBlockBase *getEntry() { return Entry; }
1724 const VPBlockBase *getEntry() const { return Entry; }
1725
1726 VPBlockBase *setEntry(VPBlockBase *Block) {
1727 Entry = Block;
1728 Block->setPlan(this);
1729 return Entry;
1730 }
1731
1732 /// The backedge taken count of the original loop.
1733 VPValue *getOrCreateBackedgeTakenCount() {
1734 if (!BackedgeTakenCount)
1735 BackedgeTakenCount = new VPValue();
1736 return BackedgeTakenCount;
1737 }
1738
1739 void addVF(ElementCount VF) { VFs.insert(VF); }
1740
1741 bool hasVF(ElementCount VF) { return VFs.count(VF); }
1742
1743 const std::string &getName() const { return Name; }
1744
1745 void setName(const Twine &newName) { Name = newName.str(); }
1746
1747 /// Add \p VPVal to the pool of external definitions if it's not already
1748 /// in the pool.
1749 void addExternalDef(VPValue *VPVal) {
1750 VPExternalDefs.insert(VPVal);
1751 }
1752
1753 /// Add \p CBV to the vector of condition bit values.
1754 void addCBV(VPValue *CBV) {
1755 VPCBVs.push_back(CBV);
1756 }
1757
1758 void addVPValue(Value *V) {
1759 assert(V && "Trying to add a null Value to VPlan");
1760 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
1761 VPValue *VPV = new VPValue(V);
1762 Value2VPValue[V] = VPV;
1763 VPValuesToFree.push_back(VPV);
1764 }
1765
1766 void addVPValue(Value *V, VPValue *VPV) {
1767 assert(V && "Trying to add a null Value to VPlan");
1768 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
1769 Value2VPValue[V] = VPV;
1770 }
1771
1772 VPValue *getVPValue(Value *V) {
1773 assert(V && "Trying to get the VPValue of a null Value");
1774 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
1775 return Value2VPValue[V];
1776 }
1777
1778 VPValue *getOrAddVPValue(Value *V) {
1779 assert(V && "Trying to get or add the VPValue of a null Value");
1780 if (!Value2VPValue.count(V))
1781 addVPValue(V);
1782 return getVPValue(V);
1783 }
1784
1785 void removeVPValueFor(Value *V) { Value2VPValue.erase(V); }
1786
1787 /// Return the VPLoopInfo analysis for this VPlan.
1788 VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
1789 const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
1790
1791 /// Dump the plan to stderr (for debugging).
1792 void dump() const;
1793
1794 /// Returns a range mapping the values the range \p Operands to their
1795 /// corresponding VPValues.
1796 iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
1797 mapToVPValues(User::op_range Operands) {
1798 std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
1799 return getOrAddVPValue(Op);
1800 };
1801 return map_range(Operands, Fn);
1802 }
1803
1804 private:
1805 /// Add to the given dominator tree the header block and every new basic block
1806 /// that was created between it and the latch block, inclusive.
1807 static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
1808 BasicBlock *LoopPreHeaderBB,
1809 BasicBlock *LoopExitBB);
1810 };
1811
1812 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is
1813 /// indented and follows the dot format.
1814 class VPlanPrinter {
1815 friend inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan);
1816 friend inline raw_ostream &operator<<(raw_ostream &OS,
1817 const struct VPlanIngredient &I);
1818
1819 private:
1820 raw_ostream &OS;
1821 const VPlan &Plan;
1822 unsigned Depth = 0;
1823 unsigned TabWidth = 2;
1824 std::string Indent;
1825 unsigned BID = 0;
1826 SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
1827
1828 VPSlotTracker SlotTracker;
1829
1830 VPlanPrinter(raw_ostream &O, const VPlan &P)
1831 : OS(O), Plan(P), SlotTracker(&P) {}
1832
1833 /// Handle indentation.
1834 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
1835
1836 /// Print a given \p Block of the Plan.
1837 void dumpBlock(const VPBlockBase *Block);
1838
1839 /// Print the information related to the CFG edges going out of a given
1840 /// \p Block, followed by printing the successor blocks themselves.
1841 void dumpEdges(const VPBlockBase *Block);
1842
1843 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
1844 /// its successor blocks.
1845 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
1846
1847 /// Print a given \p Region of the Plan.
1848 void dumpRegion(const VPRegionBlock *Region);
1849
1850 unsigned getOrCreateBID(const VPBlockBase *Block) {
1851 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
1852 }
1853
1854 const Twine getOrCreateName(const VPBlockBase *Block);
1855
1856 const Twine getUID(const VPBlockBase *Block);
1857
1858 /// Print the information related to a CFG edge between two VPBlockBases.
1859 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
1860 const Twine &Label);
1861
1862 void dump();
1863
1864 static void printAsIngredient(raw_ostream &O, const Value *V);
1865 };
1866
1867 struct VPlanIngredient {
1868 const Value *V;
1869
1870 VPlanIngredient(const Value *V) : V(V) {}
1871 };
1872
1873 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
1874 VPlanPrinter::printAsIngredient(OS, I.V);
1875 return OS;
1876 }
1877
1878 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
1879 VPlanPrinter Printer(OS, Plan);
1880 Printer.dump();
1881 return OS;
1882 }
1883
1884 //===----------------------------------------------------------------------===//
1885 // VPlan Utilities
1886 //===----------------------------------------------------------------------===//
1887
1888 /// Class that provides utilities for VPBlockBases in VPlan.
1889 class VPBlockUtils {
1890 public:
1891 VPBlockUtils() = delete;
1892
1893 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
1894 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
1895 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr
1896 /// has more than one successor, its conditional bit is propagated to \p
1897 /// NewBlock. \p NewBlock must have neither successors nor predecessors.
1898 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
1899 assert(NewBlock->getSuccessors().empty() &&
1900 "Can't insert new block with successors.");
1901 // TODO: move successors from BlockPtr to NewBlock when this functionality
1902 // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr
1903 // already has successors.
1904 BlockPtr->setOneSuccessor(NewBlock);
1905 NewBlock->setPredecessors({BlockPtr});
1906 NewBlock->setParent(BlockPtr->getParent());
1907 }
1908
1909 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
1910 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
1911 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
1912 /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor
1913 /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse
1914 /// must have neither successors nor predecessors.
1915 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
1916 VPValue *Condition, VPBlockBase *BlockPtr) {
1917 assert(IfTrue->getSuccessors().empty() &&
1918 "Can't insert IfTrue with successors.");
1919 assert(IfFalse->getSuccessors().empty() &&
1920 "Can't insert IfFalse with successors.");
1921 BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition);
1922 IfTrue->setPredecessors({BlockPtr});
1923 IfFalse->setPredecessors({BlockPtr});
1924 IfTrue->setParent(BlockPtr->getParent());
1925 IfFalse->setParent(BlockPtr->getParent());
1926 }
1927
1928 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
1929 /// the successors of \p From and \p From to the predecessors of \p To. Both
1930 /// VPBlockBases must have the same parent, which can be null. Both
1931 /// VPBlockBases can be already connected to other VPBlockBases.
1932 static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
1933 assert((From->getParent() == To->getParent()) &&
1934 "Can't connect two block with different parents");
1935 assert(From->getNumSuccessors() < 2 &&
1936 "Blocks can't have more than two successors.");
1937 From->appendSuccessor(To);
1938 To->appendPredecessor(From);
1939 }
1940
1941 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
1942 /// from the successors of \p From and \p From from the predecessors of \p To.
1943 static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
1944 assert(To && "Successor to disconnect is null.");
1945 From->removeSuccessor(To);
1946 To->removePredecessor(From);
1947 }
1948
1949 /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
1950 static bool isBackEdge(const VPBlockBase *FromBlock,
1951 const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
1952 assert(FromBlock->getParent() == ToBlock->getParent() &&
1953 FromBlock->getParent() && "Must be in same region");
1954 const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
1955 const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
1956 if (!FromLoop || !ToLoop || FromLoop != ToLoop)
1957 return false;
1958
1959 // A back-edge is a branch from the loop latch to its header.
1960 return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
1961 }
1962
1963 /// Returns true if \p Block is a loop latch
1964 static bool blockIsLoopLatch(const VPBlockBase *Block,
1965 const VPLoopInfo *VPLInfo) {
1966 if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
1967 return ParentVPL->isLoopLatch(Block);
1968
1969 return false;
1970 }
1971
1972 /// Count and return the number of succesors of \p PredBlock excluding any
1973 /// backedges.
1974 static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
1975 VPLoopInfo *VPLI) {
1976 unsigned Count = 0;
1977 for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
1978 if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
1979 Count++;
1980 }
1981 return Count;
1982 }
1983 };
1984
1985 class VPInterleavedAccessInfo {
1986 DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
1987 InterleaveGroupMap;
1988
1989 /// Type for mapping of instruction based interleave groups to VPInstruction
1990 /// interleave groups
1991 using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
1992 InterleaveGroup<VPInstruction> *>;
1993
1994 /// Recursively \p Region and populate VPlan based interleave groups based on
1995 /// \p IAI.
1996 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
1997 InterleavedAccessInfo &IAI);
1998 /// Recursively traverse \p Block and populate VPlan based interleave groups
1999 /// based on \p IAI.
2000 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
2001 InterleavedAccessInfo &IAI);
2002
2003 public:
2004 VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
2005
2006 ~VPInterleavedAccessInfo() {
2007 SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
2008 // Avoid releasing a pointer twice.
2009 for (auto &I : InterleaveGroupMap)
2010 DelSet.insert(I.second);
2011 for (auto *Ptr : DelSet)
2012 delete Ptr;
2013 }
2014
2015 /// Get the interleave group that \p Instr belongs to.
2016 ///
2017 /// \returns nullptr if doesn't have such group.
2018 InterleaveGroup<VPInstruction> *
2019 getInterleaveGroup(VPInstruction *Instr) const {
2020 if (InterleaveGroupMap.count(Instr))
2021 return InterleaveGroupMap.find(Instr)->second;
2022 return nullptr;
2023 }
2024 };
2025
2026 /// Class that maps (parts of) an existing VPlan to trees of combined
2027 /// VPInstructions.
2028 class VPlanSlp {
2029 enum class OpMode { Failed, Load, Opcode };
2030
2031 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
2032 /// DenseMap keys.
2033 struct BundleDenseMapInfo {
2034 static SmallVector<VPValue *, 4> getEmptyKey() {
2035 return {reinterpret_cast<VPValue *>(-1)};
2036 }
2037
2038 static SmallVector<VPValue *, 4> getTombstoneKey() {
2039 return {reinterpret_cast<VPValue *>(-2)};
2040 }
2041
2042 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
2043 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
2044 }
2045
2046 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
2047 const SmallVector<VPValue *, 4> &RHS) {
2048 return LHS == RHS;
2049 }
2050 };
2051
2052 /// Mapping of values in the original VPlan to a combined VPInstruction.
2053 DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
2054 BundleToCombined;
2055
2056 VPInterleavedAccessInfo &IAI;
2057
2058 /// Basic block to operate on. For now, only instructions in a single BB are
2059 /// considered.
2060 const VPBasicBlock &BB;
2061
2062 /// Indicates whether we managed to combine all visited instructions or not.
2063 bool CompletelySLP = true;
2064
2065 /// Width of the widest combined bundle in bits.
2066 unsigned WidestBundleBits = 0;
2067
2068 using MultiNodeOpTy =
2069 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
2070
2071 // Input operand bundles for the current multi node. Each multi node operand
2072 // bundle contains values not matching the multi node's opcode. They will
2073 // be reordered in reorderMultiNodeOps, once we completed building a
2074 // multi node.
2075 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
2076
2077 /// Indicates whether we are building a multi node currently.
2078 bool MultiNodeActive = false;
2079
2080 /// Check if we can vectorize Operands together.
2081 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
2082
2083 /// Add combined instruction \p New for the bundle \p Operands.
2084 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
2085
2086 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
2087 VPInstruction *markFailed();
2088
2089 /// Reorder operands in the multi node to maximize sequential memory access
2090 /// and commutative operations.
2091 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
2092
2093 /// Choose the best candidate to use for the lane after \p Last. The set of
2094 /// candidates to choose from are values with an opcode matching \p Last's
2095 /// or loads consecutive to \p Last.
2096 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
2097 SmallPtrSetImpl<VPValue *> &Candidates,
2098 VPInterleavedAccessInfo &IAI);
2099
2100 /// Print bundle \p Values to dbgs().
2101 void dumpBundle(ArrayRef<VPValue *> Values);
2102
2103 public:
2104 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
2105
2106 ~VPlanSlp() = default;
2107
2108 /// Tries to build an SLP tree rooted at \p Operands and returns a
2109 /// VPInstruction combining \p Operands, if they can be combined.
2110 VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
2111
2112 /// Return the width of the widest combined bundle in bits.
2113 unsigned getWidestBundleBits() const { return WidestBundleBits; }
2114
2115 /// Return true if all visited instruction can be combined.
2116 bool isCompletelySLP() const { return CompletelySLP; }
2117 };
2118 } // end namespace llvm
2119
2120 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
2121