1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Move instructions between statements.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "polly/ForwardOpTree.h"
14 #include "polly/Options.h"
15 #include "polly/ScopBuilder.h"
16 #include "polly/ScopInfo.h"
17 #include "polly/ScopPass.h"
18 #include "polly/Support/GICHelper.h"
19 #include "polly/Support/ISLOStream.h"
20 #include "polly/Support/ISLTools.h"
21 #include "polly/Support/VirtualInstruction.h"
22 #include "polly/ZoneAlgo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Value.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "isl/ctx.h"
39 #include "isl/isl-noexceptions.h"
40 #include <cassert>
41 #include <memory>
42 
43 #define DEBUG_TYPE "polly-optree"
44 
45 using namespace llvm;
46 using namespace polly;
47 
48 static cl::opt<bool>
49     AnalyzeKnown("polly-optree-analyze-known",
50                  cl::desc("Analyze array contents for load forwarding"),
51                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
52 
53 static cl::opt<bool>
54     NormalizePHIs("polly-optree-normalize-phi",
55                   cl::desc("Replace PHIs by their incoming values"),
56                   cl::cat(PollyCategory), cl::init(false), cl::Hidden);
57 
58 static cl::opt<unsigned>
59     MaxOps("polly-optree-max-ops",
60            cl::desc("Maximum number of ISL operations to invest for known "
61                     "analysis; 0=no limit"),
62            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
63 
64 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
65 STATISTIC(KnownOutOfQuota,
66           "Analyses aborted because max_operations was reached");
67 
68 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
69 STATISTIC(TotalKnownLoadsForwarded,
70           "Number of forwarded loads because their value was known");
71 STATISTIC(TotalReloads, "Number of reloaded values");
72 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
73 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
74 STATISTIC(TotalModifiedStmts,
75           "Number of statements with at least one forwarded tree");
76 
77 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
78 
79 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
80 STATISTIC(NumValueWritesInLoops,
81           "Number of scalar value writes nested in affine loops after OpTree");
82 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
83 STATISTIC(NumPHIWritesInLoops,
84           "Number of scalar phi writes nested in affine loops after OpTree");
85 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
86 STATISTIC(NumSingletonWritesInLoops,
87           "Number of singleton writes nested in affine loops after OpTree");
88 
89 namespace {
90 
91 /// The state of whether an operand tree was/can be forwarded.
92 ///
93 /// The items apply to an instructions and its operand tree with the instruction
94 /// as the root element. If the value in question is not an instruction in the
95 /// SCoP, it can be a leaf of an instruction's operand tree.
96 enum ForwardingDecision {
97   /// An uninitialized value.
98   FD_Unknown,
99 
100   /// The root instruction or value cannot be forwarded at all.
101   FD_CannotForward,
102 
103   /// The root instruction or value can be forwarded as a leaf of a larger
104   /// operand tree.
105   /// It does not make sense to move the value itself, it would just replace it
106   /// by a use of itself. For instance, a constant "5" used in a statement can
107   /// be forwarded, but it would just replace it by the same constant "5".
108   /// However, it makes sense to move as an operand of
109   ///
110   ///   %add = add 5, 5
111   ///
112   /// where "5" is moved as part of a larger operand tree. "5" would be placed
113   /// (disregarding for a moment that literal constants don't have a location
114   /// and can be used anywhere) into the same statement as %add would.
115   FD_CanForwardLeaf,
116 
117   /// The root instruction can be forwarded and doing so avoids a scalar
118   /// dependency.
119   ///
120   /// This can be either because the operand tree can be moved to the target
121   /// statement, or a memory access is redirected to read from a different
122   /// location.
123   FD_CanForwardProfitably,
124 
125   /// A forwarding method cannot be applied to the operand tree.
126   /// The difference to FD_CannotForward is that there might be other methods
127   /// that can handle it.
128   FD_NotApplicable
129 };
130 
131 /// Represents the evaluation of and action to taken when forwarding a value
132 /// from an operand tree.
133 struct ForwardingAction {
134   using KeyTy = std::pair<Value *, ScopStmt *>;
135 
136   /// Evaluation of forwarding a value.
137   ForwardingDecision Decision = FD_Unknown;
138 
139   /// Callback to execute the forwarding.
140   /// Returning true allows deleting the polly::MemoryAccess if the value is the
141   /// root of the operand tree (and its elimination the reason why the
142   /// forwarding is done). Return false if the MemoryAccess is reused or there
143   /// might be other users of the read accesses. In the letter case the
144   /// polly::SimplifyPass can remove dead MemoryAccesses.
__anon391836e40202__anon391836e40111::ForwardingAction145   std::function<bool()> Execute = []() -> bool {
146     llvm_unreachable("unspecified how to forward");
147   };
148 
149   /// Other values that need to be forwarded if this action is executed. Their
150   /// actions are executed after this one.
151   SmallVector<KeyTy, 4> Depends;
152 
153   /// Named ctor: The method creating this object does not apply to the kind of
154   /// value, but other methods may.
notApplicable__anon391836e40111::ForwardingAction155   static ForwardingAction notApplicable() {
156     ForwardingAction Result;
157     Result.Decision = FD_NotApplicable;
158     return Result;
159   }
160 
161   /// Named ctor: The value cannot be forwarded.
cannotForward__anon391836e40111::ForwardingAction162   static ForwardingAction cannotForward() {
163     ForwardingAction Result;
164     Result.Decision = FD_CannotForward;
165     return Result;
166   }
167 
168   /// Named ctor: The value can just be used without any preparation.
triviallyForwardable__anon391836e40111::ForwardingAction169   static ForwardingAction triviallyForwardable(bool IsProfitable, Value *Val) {
170     ForwardingAction Result;
171     Result.Decision =
172         IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
173     Result.Execute = [=]() {
174       LLVM_DEBUG(dbgs() << "    trivially forwarded: " << *Val << "\n");
175       return true;
176     };
177     return Result;
178   }
179 
180   /// Name ctor: The value can be forwarded by executing an action.
canForward__anon391836e40111::ForwardingAction181   static ForwardingAction canForward(std::function<bool()> Execute,
182                                      ArrayRef<KeyTy> Depends,
183                                      bool IsProfitable) {
184     ForwardingAction Result;
185     Result.Decision =
186         IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
187     Result.Execute = std::move(Execute);
188     Result.Depends.append(Depends.begin(), Depends.end());
189     return Result;
190   }
191 };
192 
193 /// Implementation of operand tree forwarding for a specific SCoP.
194 ///
195 /// For a statement that requires a scalar value (through a value read
196 /// MemoryAccess), see if its operand can be moved into the statement. If so,
197 /// the MemoryAccess is removed and the all the operand tree instructions are
198 /// moved into the statement. All original instructions are left in the source
199 /// statements. The simplification pass can clean these up.
200 class ForwardOpTreeImpl : ZoneAlgorithm {
201 private:
202   using MemoizationTy = DenseMap<ForwardingAction::KeyTy, ForwardingAction>;
203 
204   /// Scope guard to limit the number of isl operations for this pass.
205   IslMaxOperationsGuard &MaxOpGuard;
206 
207   /// How many instructions have been copied to other statements.
208   int NumInstructionsCopied = 0;
209 
210   /// Number of loads forwarded because their value was known.
211   int NumKnownLoadsForwarded = 0;
212 
213   /// Number of values reloaded from known array elements.
214   int NumReloads = 0;
215 
216   /// How many read-only accesses have been copied.
217   int NumReadOnlyCopied = 0;
218 
219   /// How many operand trees have been forwarded.
220   int NumForwardedTrees = 0;
221 
222   /// Number of statements with at least one forwarded operand tree.
223   int NumModifiedStmts = 0;
224 
225   /// Whether we carried out at least one change to the SCoP.
226   bool Modified = false;
227 
228   /// Cache of how to forward values.
229   /// The key of this map is the llvm::Value to be forwarded and the
230   /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value
231   /// can evaluate differently depending on where it is evaluate. For instance,
232   /// a synthesizable Scev represents a recurrence with an loop but the loop's
233   /// exit value if evaluated after the loop.
234   /// The cached results are only valid for the current TargetStmt.
235   /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the
236   /// exit value when instantiated outside of the loop. The primary concern is
237   /// ambiguity when crossing PHI nodes, which currently is not supported.
238   MemoizationTy ForwardingActions;
239 
240   /// Contains the zones where array elements are known to contain a specific
241   /// value.
242   /// { [Element[] -> Zone[]] -> ValInst[] }
243   /// @see computeKnown()
244   isl::union_map Known;
245 
246   /// Translator for newly introduced ValInsts to already existing ValInsts such
247   /// that new introduced load instructions can reuse the Known analysis of its
248   /// original load. { ValInst[] -> ValInst[] }
249   isl::union_map Translator;
250 
251   /// Get list of array elements that do contain the same ValInst[] at Domain[].
252   ///
253   /// @param ValInst { Domain[] -> ValInst[] }
254   ///                The values for which we search for alternative locations,
255   ///                per statement instance.
256   ///
257   /// @return { Domain[] -> Element[] }
258   ///         For each statement instance, the array elements that contain the
259   ///         same ValInst.
findSameContentElements(isl::union_map ValInst)260   isl::union_map findSameContentElements(isl::union_map ValInst) {
261     assert(!ValInst.is_single_valued().is_false());
262 
263     // { Domain[] }
264     isl::union_set Domain = ValInst.domain();
265 
266     // { Domain[] -> Scatter[] }
267     isl::union_map Schedule = getScatterFor(Domain);
268 
269     // { Element[] -> [Scatter[] -> ValInst[]] }
270     isl::union_map MustKnownCurried =
271         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
272 
273     // { [Domain[] -> ValInst[]] -> Scatter[] }
274     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
275 
276     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
277     isl::union_map SchedValDomVal =
278         DomValSched.range_product(ValInst.range_map()).reverse();
279 
280     // { Element[] -> [Domain[] -> ValInst[]] }
281     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
282 
283     // { Domain[] -> Element[] }
284     isl::union_map MustKnownMap =
285         MustKnownInst.uncurry().domain().unwrap().reverse();
286     simplify(MustKnownMap);
287 
288     return MustKnownMap;
289   }
290 
291   /// Find a single array element for each statement instance, within a single
292   /// array.
293   ///
294   /// @param MustKnown { Domain[] -> Element[] }
295   ///                  Set of candidate array elements.
296   /// @param Domain    { Domain[] }
297   ///                  The statement instance for which we need elements for.
298   ///
299   /// @return { Domain[] -> Element[] }
300   ///         For each statement instance, an array element out of @p MustKnown.
301   ///         All array elements must be in the same array (Polly does not yet
302   ///         support reading from different accesses using the same
303   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
304   ///         null.
singleLocation(isl::union_map MustKnown,isl::set Domain)305   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
306     // { Domain[] -> Element[] }
307     isl::map Result;
308 
309     // MemoryAccesses can read only elements from a single array
310     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
311     // Look through all spaces until we find one that contains at least the
312     // wanted statement instance.s
313     for (isl::map Map : MustKnown.get_map_list()) {
314       // Get the array this is accessing.
315       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
316       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
317 
318       // No support for generation of indirect array accesses.
319       if (SAI->getBasePtrOriginSAI())
320         continue;
321 
322       // Determine whether this map contains all wanted values.
323       isl::set MapDom = Map.domain();
324       if (!Domain.is_subset(MapDom).is_true())
325         continue;
326 
327       // There might be multiple array elements that contain the same value, but
328       // choose only one of them. lexmin is used because it returns a one-value
329       // mapping, we do not care about which one.
330       // TODO: Get the simplest access function.
331       Result = Map.lexmin();
332       break;
333     }
334 
335     return Result;
336   }
337 
338 public:
ForwardOpTreeImpl(Scop * S,LoopInfo * LI,IslMaxOperationsGuard & MaxOpGuard)339   ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
340       : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
341 
342   /// Compute the zones of known array element contents.
343   ///
344   /// @return True if the computed #Known is usable.
computeKnownValues()345   bool computeKnownValues() {
346     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
347 
348     // Check that nothing strange occurs.
349     collectCompatibleElts();
350 
351     {
352       IslQuotaScope QuotaScope = MaxOpGuard.enter();
353 
354       computeCommon();
355       if (NormalizePHIs)
356         computeNormalizedPHIs();
357       Known = computeKnown(true, true);
358 
359       // Preexisting ValInsts use the known content analysis of themselves.
360       Translator = makeIdentityMap(Known.range(), false);
361     }
362 
363     if (!Known || !Translator || !NormalizeMap) {
364       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
365       Known = nullptr;
366       Translator = nullptr;
367       NormalizeMap = nullptr;
368       LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
369       return false;
370     }
371 
372     KnownAnalyzed++;
373     LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
374 
375     return true;
376   }
377 
printStatistics(raw_ostream & OS,int Indent=0)378   void printStatistics(raw_ostream &OS, int Indent = 0) {
379     OS.indent(Indent) << "Statistics {\n";
380     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
381                           << '\n';
382     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
383                           << '\n';
384     OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
385     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
386                           << '\n';
387     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
388                           << '\n';
389     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
390                           << NumModifiedStmts << '\n';
391     OS.indent(Indent) << "}\n";
392   }
393 
printStatements(raw_ostream & OS,int Indent=0) const394   void printStatements(raw_ostream &OS, int Indent = 0) const {
395     OS.indent(Indent) << "After statements {\n";
396     for (auto &Stmt : *S) {
397       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
398       for (auto *MA : Stmt)
399         MA->print(OS);
400 
401       OS.indent(Indent + 12);
402       Stmt.printInstructions(OS);
403     }
404     OS.indent(Indent) << "}\n";
405   }
406 
407   /// Create a new MemoryAccess of type read and MemoryKind::Array.
408   ///
409   /// @param Stmt           The statement in which the access occurs.
410   /// @param LI             The instruction that does the access.
411   /// @param AccessRelation The array element that each statement instance
412   ///                       accesses.
413   ///
414   /// @param The newly created access.
makeReadArrayAccess(ScopStmt * Stmt,LoadInst * LI,isl::map AccessRelation)415   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
416                                     isl::map AccessRelation) {
417     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
418     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
419 
420     // Create a dummy SCEV access, to be replaced anyway.
421     SmallVector<const SCEV *, 4> Sizes;
422     Sizes.reserve(SAI->getNumberOfDimensions());
423     SmallVector<const SCEV *, 4> Subscripts;
424     Subscripts.reserve(SAI->getNumberOfDimensions());
425     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
426       Sizes.push_back(SAI->getDimensionSize(i));
427       Subscripts.push_back(nullptr);
428     }
429 
430     MemoryAccess *Access =
431         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
432                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
433     S->addAccessFunction(Access);
434     Stmt->addAccess(Access, true);
435 
436     Access->setNewAccessRelation(AccessRelation);
437 
438     return Access;
439   }
440 
441   /// Forward a load by reading from an array element that contains the same
442   /// value. Typically the location it was loaded from.
443   ///
444   /// @param TargetStmt  The statement the operand tree will be copied to.
445   /// @param Inst        The (possibly speculatable) instruction to forward.
446   /// @param UseStmt     The statement that uses @p Inst.
447   /// @param UseLoop     The loop @p Inst is used in.
448   /// @param DefStmt     The statement @p Inst is defined in.
449   /// @param DefLoop     The loop which contains @p Inst.
450   ///
451   /// @return A ForwardingAction object describing the feasibility and
452   ///         profitability evaluation and the callback carrying-out the value
453   ///         forwarding.
forwardKnownLoad(ScopStmt * TargetStmt,Instruction * Inst,ScopStmt * UseStmt,Loop * UseLoop,ScopStmt * DefStmt,Loop * DefLoop)454   ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
455                                     ScopStmt *UseStmt, Loop *UseLoop,
456                                     ScopStmt *DefStmt, Loop *DefLoop) {
457     // Cannot do anything without successful known analysis.
458     if (Known.is_null() || Translator.is_null() ||
459         MaxOpGuard.hasQuotaExceeded())
460       return ForwardingAction::notApplicable();
461 
462     LoadInst *LI = dyn_cast<LoadInst>(Inst);
463     if (!LI)
464       return ForwardingAction::notApplicable();
465 
466     ForwardingDecision OpDecision =
467         forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop);
468     switch (OpDecision) {
469     case FD_CanForwardProfitably:
470     case FD_CanForwardLeaf:
471       break;
472     case FD_CannotForward:
473       return ForwardingAction::cannotForward();
474     default:
475       llvm_unreachable("Shouldn't return this");
476     }
477 
478     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
479     if (Access) {
480       // If the load is already in the statement, no forwarding is necessary.
481       // However, it might happen that the LoadInst is already present in the
482       // statement's instruction list. In that case we do as follows:
483       // - For the evaluation, we can trivially forward it as it is
484       //   benefit of forwarding an already present instruction.
485       // - For the execution, prepend the instruction (to make it
486       //   available to all instructions following in the instruction list), but
487       //   do not add another MemoryAccess.
488       auto ExecAction = [this, TargetStmt, LI, Access]() -> bool {
489         TargetStmt->prependInstruction(LI);
490         LLVM_DEBUG(
491             dbgs() << "    forwarded known load with preexisting MemoryAccess"
492                    << Access << "\n");
493         (void)Access;
494 
495         NumKnownLoadsForwarded++;
496         TotalKnownLoadsForwarded++;
497         return true;
498       };
499       return ForwardingAction::canForward(
500           ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
501     }
502 
503     // Allow the following Isl calculations (until we return the
504     // ForwardingAction, excluding the code inside the lambda that will be
505     // executed later) to fail.
506     IslQuotaScope QuotaScope = MaxOpGuard.enter();
507 
508     // { DomainDef[] -> ValInst[] }
509     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
510     assert(!isNormalized(ExpectedVal).is_false() &&
511            "LoadInsts are always normalized");
512 
513     // { DomainUse[] -> DomainTarget[] }
514     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
515 
516     // { DomainTarget[] -> ValInst[] }
517     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
518     isl::union_map TranslatedExpectedVal =
519         isl::union_map(TargetExpectedVal).apply_range(Translator);
520 
521     // { DomainTarget[] -> Element[] }
522     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
523 
524     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
525     if (!SameVal)
526       return ForwardingAction::notApplicable();
527 
528     LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
529                       << "\n");
530     LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
531                       << "\n");
532 
533     // { ValInst[] }
534     isl::space ValInstSpace = ExpectedVal.get_space().range();
535 
536     // After adding a new load to the SCoP, also update the Known content
537     // about it. The new load will have a known ValInst of
538     // { [DomainTarget[] -> Value[]] }
539     // but which -- because it is a copy of it -- has same value as the
540     // { [DomainDef[] -> Value[]] }
541     // that it replicates. Instead of  cloning the known content of
542     // [DomainDef[] -> Value[]]
543     // for DomainTarget[], we add a 'translator' that maps
544     // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
545     // before comparing to the known content.
546     // TODO: 'Translator' could also be used to map PHINodes to their incoming
547     // ValInsts.
548     isl::map LocalTranslator;
549     if (!ValInstSpace.is_wrapping().is_false()) {
550       // { DefDomain[] -> Value[] }
551       isl::map ValInsts = ExpectedVal.range().unwrap();
552 
553       // { DefDomain[] }
554       isl::set DefDomain = ValInsts.domain();
555 
556       // { Value[] }
557       isl::space ValSpace = ValInstSpace.unwrap().range();
558 
559       // { Value[] -> Value[] }
560       isl::map ValToVal =
561           isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
562 
563       // { DomainDef[] -> DomainTarget[] }
564       isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
565 
566       // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
567       LocalTranslator = DefToTarget.reverse().product(ValToVal);
568       LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
569                         << "\n");
570 
571       if (!LocalTranslator)
572         return ForwardingAction::notApplicable();
573     }
574 
575     auto ExecAction = [this, TargetStmt, LI, SameVal,
576                        LocalTranslator]() -> bool {
577       TargetStmt->prependInstruction(LI);
578       MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
579       LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
580                         << Access << "\n");
581       (void)Access;
582 
583       if (LocalTranslator)
584         Translator = Translator.add_map(LocalTranslator);
585 
586       NumKnownLoadsForwarded++;
587       TotalKnownLoadsForwarded++;
588       return true;
589     };
590     return ForwardingAction::canForward(
591         ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
592   }
593 
594   /// Forward a scalar by redirecting the access to an array element that stores
595   /// the same value.
596   ///
597   /// @param TargetStmt  The statement the operand tree will be copied to.
598   /// @param Inst        The scalar to forward.
599   /// @param UseStmt     The statement that uses @p Inst.
600   /// @param UseLoop     The loop @p Inst is used in.
601   /// @param DefStmt     The statement @p Inst is defined in.
602   /// @param DefLoop     The loop which contains @p Inst.
603   ///
604   /// @return A ForwardingAction object describing the feasibility and
605   ///         profitability evaluation and the callback carrying-out the value
606   ///         forwarding.
reloadKnownContent(ScopStmt * TargetStmt,Instruction * Inst,ScopStmt * UseStmt,Loop * UseLoop,ScopStmt * DefStmt,Loop * DefLoop)607   ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
608                                       ScopStmt *UseStmt, Loop *UseLoop,
609                                       ScopStmt *DefStmt, Loop *DefLoop) {
610     // Cannot do anything without successful known analysis.
611     if (Known.is_null() || Translator.is_null() ||
612         MaxOpGuard.hasQuotaExceeded())
613       return ForwardingAction::notApplicable();
614 
615     // Don't spend too much time analyzing whether it can be reloaded.
616     IslQuotaScope QuotaScope = MaxOpGuard.enter();
617 
618     // { DomainDef[] -> ValInst[] }
619     isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
620 
621     // { DomainUse[] -> DomainTarget[] }
622     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
623 
624     // { DomainTarget[] -> ValInst[] }
625     isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
626     isl::union_map TranslatedExpectedVal =
627         TargetExpectedVal.apply_range(Translator);
628 
629     // { DomainTarget[] -> Element[] }
630     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
631 
632     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
633     simplify(SameVal);
634     if (!SameVal)
635       return ForwardingAction::notApplicable();
636 
637     auto ExecAction = [this, TargetStmt, Inst, SameVal]() {
638       MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
639       if (!Access)
640         Access = TargetStmt->ensureValueRead(Inst);
641       Access->setNewAccessRelation(SameVal);
642 
643       LLVM_DEBUG(dbgs() << "    forwarded known content of " << *Inst
644                         << " which is " << SameVal << "\n");
645       TotalReloads++;
646       NumReloads++;
647       return false;
648     };
649 
650     return ForwardingAction::canForward(ExecAction, {}, true);
651   }
652 
653   /// Forwards a speculatively executable instruction.
654   ///
655   /// @param TargetStmt  The statement the operand tree will be copied to.
656   /// @param UseInst     The (possibly speculatable) instruction to forward.
657   /// @param DefStmt     The statement @p UseInst is defined in.
658   /// @param DefLoop     The loop which contains @p UseInst.
659   ///
660   /// @return A ForwardingAction object describing the feasibility and
661   ///         profitability evaluation and the callback carrying-out the value
662   ///         forwarding.
forwardSpeculatable(ScopStmt * TargetStmt,Instruction * UseInst,ScopStmt * DefStmt,Loop * DefLoop)663   ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt,
664                                        Instruction *UseInst, ScopStmt *DefStmt,
665                                        Loop *DefLoop) {
666     // PHIs, unless synthesizable, are not yet supported.
667     if (isa<PHINode>(UseInst))
668       return ForwardingAction::notApplicable();
669 
670     // Compatible instructions must satisfy the following conditions:
671     // 1. Idempotent (instruction will be copied, not moved; although its
672     //    original instance might be removed by simplification)
673     // 2. Not access memory (There might be memory writes between)
674     // 3. Not cause undefined behaviour (we might copy to a location when the
675     //    original instruction was no executed; this is currently not possible
676     //    because we do not forward PHINodes)
677     // 4. Not leak memory if executed multiple times (i.e. malloc)
678     //
679     // Instruction::mayHaveSideEffects is not sufficient because it considers
680     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
681     // not sufficient because it allows memory accesses.
682     if (mayBeMemoryDependent(*UseInst))
683       return ForwardingAction::notApplicable();
684 
685     SmallVector<ForwardingAction::KeyTy, 4> Depends;
686     Depends.reserve(UseInst->getNumOperands());
687     for (Value *OpVal : UseInst->operand_values()) {
688       ForwardingDecision OpDecision =
689           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop);
690       switch (OpDecision) {
691       case FD_CannotForward:
692         return ForwardingAction::cannotForward();
693 
694       case FD_CanForwardLeaf:
695       case FD_CanForwardProfitably:
696         Depends.emplace_back(OpVal, DefStmt);
697         break;
698 
699       case FD_NotApplicable:
700       case FD_Unknown:
701         llvm_unreachable(
702             "forwardTree should never return FD_NotApplicable/FD_Unknown");
703       }
704     }
705 
706     auto ExecAction = [this, TargetStmt, UseInst]() {
707       // To ensure the right order, prepend this instruction before its
708       // operands. This ensures that its operands are inserted before the
709       // instruction using them.
710       TargetStmt->prependInstruction(UseInst);
711 
712       LLVM_DEBUG(dbgs() << "    forwarded speculable instruction: " << *UseInst
713                         << "\n");
714       NumInstructionsCopied++;
715       TotalInstructionsCopied++;
716       return true;
717     };
718     return ForwardingAction::canForward(ExecAction, Depends, true);
719   }
720 
721   /// Determines whether an operand tree can be forwarded and returns
722   /// instructions how to do so in the form of a ForwardingAction object.
723   ///
724   /// @param TargetStmt  The statement the operand tree will be copied to.
725   /// @param UseVal      The value (usually an instruction) which is root of an
726   ///                    operand tree.
727   /// @param UseStmt     The statement that uses @p UseVal.
728   /// @param UseLoop     The loop @p UseVal is used in.
729   ///
730   /// @return A ForwardingAction object describing the feasibility and
731   ///         profitability evaluation and the callback carrying-out the value
732   ///         forwarding.
forwardTreeImpl(ScopStmt * TargetStmt,Value * UseVal,ScopStmt * UseStmt,Loop * UseLoop)733   ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal,
734                                    ScopStmt *UseStmt, Loop *UseLoop) {
735     ScopStmt *DefStmt = nullptr;
736     Loop *DefLoop = nullptr;
737 
738     // { DefDomain[] -> TargetDomain[] }
739     isl::map DefToTarget;
740 
741     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
742     switch (VUse.getKind()) {
743     case VirtualUse::Constant:
744     case VirtualUse::Block:
745     case VirtualUse::Hoisted:
746       // These can be used anywhere without special considerations.
747       return ForwardingAction::triviallyForwardable(false, UseVal);
748 
749     case VirtualUse::Synthesizable: {
750       // Check if the value is synthesizable at the new location as well. This
751       // might be possible when leaving a loop for which ScalarEvolution is
752       // unable to derive the exit value for.
753       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
754       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
755       // do not forward past its loop header. This would require us to use a
756       // previous loop induction variable instead the current one. We currently
757       // do not allow forwarding PHI nodes, thus this should never occur (the
758       // only exception where no phi is necessary being an unreachable loop
759       // without edge from the outside).
760       VirtualUse TargetUse = VirtualUse::create(
761           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
762       if (TargetUse.getKind() == VirtualUse::Synthesizable)
763         return ForwardingAction::triviallyForwardable(false, UseVal);
764 
765       LLVM_DEBUG(
766           dbgs() << "    Synthesizable would not be synthesizable anymore: "
767                  << *UseVal << "\n");
768       return ForwardingAction::cannotForward();
769     }
770 
771     case VirtualUse::ReadOnly: {
772       if (!ModelReadOnlyScalars)
773         return ForwardingAction::triviallyForwardable(false, UseVal);
774 
775       // If we model read-only scalars, we need to create a MemoryAccess for it.
776       auto ExecAction = [this, TargetStmt, UseVal]() {
777         TargetStmt->ensureValueRead(UseVal);
778 
779         LLVM_DEBUG(dbgs() << "    forwarded read-only value " << *UseVal
780                           << "\n");
781         NumReadOnlyCopied++;
782         TotalReadOnlyCopied++;
783 
784         // Note that we cannot return true here. With a operand tree
785         // depth of 0, UseVal is the use in TargetStmt that we try to replace.
786         // With -polly-analyze-read-only-scalars=true we would ensure the
787         // existence of a MemoryAccess (which already exists for a leaf) and be
788         // removed again by tryForwardTree because it's goal is to remove this
789         // scalar MemoryAccess. It interprets FD_CanForwardTree as the
790         // permission to do so.
791         return false;
792       };
793       return ForwardingAction::canForward(ExecAction, {}, false);
794     }
795 
796     case VirtualUse::Intra:
797       // Knowing that UseStmt and DefStmt are the same statement instance, just
798       // reuse the information about UseStmt for DefStmt
799       DefStmt = UseStmt;
800 
801       LLVM_FALLTHROUGH;
802     case VirtualUse::Inter:
803       Instruction *Inst = cast<Instruction>(UseVal);
804 
805       if (!DefStmt) {
806         DefStmt = S->getStmtFor(Inst);
807         if (!DefStmt)
808           return ForwardingAction::cannotForward();
809       }
810 
811       DefLoop = LI->getLoopFor(Inst->getParent());
812 
813       ForwardingAction SpeculativeResult =
814           forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop);
815       if (SpeculativeResult.Decision != FD_NotApplicable)
816         return SpeculativeResult;
817 
818       ForwardingAction KnownResult = forwardKnownLoad(
819           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
820       if (KnownResult.Decision != FD_NotApplicable)
821         return KnownResult;
822 
823       ForwardingAction ReloadResult = reloadKnownContent(
824           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
825       if (ReloadResult.Decision != FD_NotApplicable)
826         return ReloadResult;
827 
828       // When no method is found to forward the operand tree, we effectively
829       // cannot handle it.
830       LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
831       return ForwardingAction::cannotForward();
832     }
833 
834     llvm_unreachable("Case unhandled");
835   }
836 
837   /// Determines whether an operand tree can be forwarded. Previous evaluations
838   /// are cached.
839   ///
840   /// @param TargetStmt  The statement the operand tree will be copied to.
841   /// @param UseVal      The value (usually an instruction) which is root of an
842   ///                    operand tree.
843   /// @param UseStmt     The statement that uses @p UseVal.
844   /// @param UseLoop     The loop @p UseVal is used in.
845   ///
846   /// @return FD_CannotForward        if @p UseVal cannot be forwarded.
847   ///         FD_CanForwardLeaf       if @p UseVal is forwardable, but not
848   ///                                 profitable.
849   ///         FD_CanForwardProfitably if @p UseVal is forwardable and useful to
850   ///                                 do.
forwardTree(ScopStmt * TargetStmt,Value * UseVal,ScopStmt * UseStmt,Loop * UseLoop)851   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
852                                  ScopStmt *UseStmt, Loop *UseLoop) {
853     // Lookup any cached evaluation.
854     auto It = ForwardingActions.find({UseVal, UseStmt});
855     if (It != ForwardingActions.end())
856       return It->second.Decision;
857 
858     // Make a new evaluation.
859     ForwardingAction Action =
860         forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop);
861     ForwardingDecision Result = Action.Decision;
862 
863     // Remember for the next time.
864     assert(!ForwardingActions.count({UseVal, UseStmt}) &&
865            "circular dependency?");
866     ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)});
867 
868     return Result;
869   }
870 
871   /// Forward an operand tree using cached actions.
872   ///
873   /// @param Stmt   Statement the operand tree is moved into.
874   /// @param UseVal Root of the operand tree within @p Stmt.
875   /// @param RA     The MemoryAccess for @p UseVal that the forwarding intends
876   ///               to remove.
applyForwardingActions(ScopStmt * Stmt,Value * UseVal,MemoryAccess * RA)877   void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) {
878     using ChildItTy =
879         decltype(std::declval<ForwardingAction>().Depends.begin());
880     using EdgeTy = std::pair<ForwardingAction *, ChildItTy>;
881 
882     DenseSet<ForwardingAction::KeyTy> Visited;
883     SmallVector<EdgeTy, 32> Stack;
884     SmallVector<ForwardingAction *, 32> Ordered;
885 
886     // Seed the tree search using the root value.
887     assert(ForwardingActions.count({UseVal, Stmt}));
888     ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}];
889     Stack.emplace_back(RootAction, RootAction->Depends.begin());
890 
891     // Compute the postorder of the operand tree: all operands of an instruction
892     // must be visited before the instruction itself. As an additional
893     // requirement, the topological ordering must be 'compact': Any subtree node
894     // must not be interleaved with nodes from a non-shared subtree. This is
895     // because the same llvm::Instruction can be materialized multiple times as
896     // used at different ScopStmts which might be different values. Intersecting
897     // these lifetimes may result in miscompilations.
898     // FIXME: Intersecting lifetimes might still be possible for the roots
899     // themselves, since instructions are just prepended to a ScopStmt's
900     // instruction list.
901     while (!Stack.empty()) {
902       EdgeTy &Top = Stack.back();
903       ForwardingAction *TopAction = Top.first;
904       ChildItTy &TopEdge = Top.second;
905 
906       if (TopEdge == TopAction->Depends.end()) {
907         // Postorder sorting
908         Ordered.push_back(TopAction);
909         Stack.pop_back();
910         continue;
911       }
912       ForwardingAction::KeyTy Key = *TopEdge;
913 
914       // Next edge for this level
915       ++TopEdge;
916 
917       auto VisitIt = Visited.insert(Key);
918       if (!VisitIt.second)
919         continue;
920 
921       assert(ForwardingActions.count(Key) &&
922              "Must not insert new actions during execution phase");
923       ForwardingAction *ChildAction = &ForwardingActions[Key];
924       Stack.emplace_back(ChildAction, ChildAction->Depends.begin());
925     }
926 
927     // Actually, we need the reverse postorder because actions prepend new
928     // instructions. Therefore, the first one will always be the action for the
929     // operand tree's root.
930     assert(Ordered.back() == RootAction);
931     if (RootAction->Execute())
932       Stmt->removeSingleMemoryAccess(RA);
933     Ordered.pop_back();
934     for (auto DepAction : reverse(Ordered)) {
935       assert(DepAction->Decision != FD_Unknown &&
936              DepAction->Decision != FD_CannotForward);
937       assert(DepAction != RootAction);
938       DepAction->Execute();
939     }
940   }
941 
942   /// Try to forward an operand tree rooted in @p RA.
tryForwardTree(MemoryAccess * RA)943   bool tryForwardTree(MemoryAccess *RA) {
944     assert(RA->isLatestScalarKind());
945     LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
946 
947     ScopStmt *Stmt = RA->getStatement();
948     Loop *InLoop = Stmt->getSurroundingLoop();
949 
950     isl::map TargetToUse;
951     if (!Known.is_null()) {
952       isl::space DomSpace = Stmt->getDomainSpace();
953       TargetToUse =
954           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
955     }
956 
957     ForwardingDecision Assessment =
958         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop);
959 
960     // If considered feasible and profitable, forward it.
961     bool Changed = false;
962     if (Assessment == FD_CanForwardProfitably) {
963       applyForwardingActions(Stmt, RA->getAccessValue(), RA);
964       Changed = true;
965     }
966 
967     ForwardingActions.clear();
968     return Changed;
969   }
970 
971   /// Return which SCoP this instance is processing.
getScop() const972   Scop *getScop() const { return S; }
973 
974   /// Run the algorithm: Use value read accesses as operand tree roots and try
975   /// to forward them into the statement.
forwardOperandTrees()976   bool forwardOperandTrees() {
977     for (ScopStmt &Stmt : *S) {
978       bool StmtModified = false;
979 
980       // Because we are modifying the MemoryAccess list, collect them first to
981       // avoid iterator invalidation.
982       SmallVector<MemoryAccess *, 16> Accs(Stmt.begin(), Stmt.end());
983 
984       for (MemoryAccess *RA : Accs) {
985         if (!RA->isRead())
986           continue;
987         if (!RA->isLatestScalarKind())
988           continue;
989 
990         if (tryForwardTree(RA)) {
991           Modified = true;
992           StmtModified = true;
993           NumForwardedTrees++;
994           TotalForwardedTrees++;
995         }
996       }
997 
998       if (StmtModified) {
999         NumModifiedStmts++;
1000         TotalModifiedStmts++;
1001       }
1002     }
1003 
1004     if (Modified) {
1005       ScopsModified++;
1006       S->realignParams();
1007     }
1008     return Modified;
1009   }
1010 
1011   /// Print the pass result, performed transformations and the SCoP after the
1012   /// transformation.
print(raw_ostream & OS,int Indent=0)1013   void print(raw_ostream &OS, int Indent = 0) {
1014     printStatistics(OS, Indent);
1015 
1016     if (!Modified) {
1017       // This line can easily be checked in regression tests.
1018       OS << "ForwardOpTree executed, but did not modify anything\n";
1019       return;
1020     }
1021 
1022     printStatements(OS, Indent);
1023   }
1024 };
1025 
1026 /// Pass that redirects scalar reads to array elements that are known to contain
1027 /// the same value.
1028 ///
1029 /// This reduces the number of scalar accesses and therefore potentially
1030 /// increases the freedom of the scheduler. In the ideal case, all reads of a
1031 /// scalar definition are redirected (We currently do not care about removing
1032 /// the write in this case).  This is also useful for the main DeLICM pass as
1033 /// there are less scalars to be mapped.
1034 class ForwardOpTree : public ScopPass {
1035 private:
1036   /// The pass implementation, also holding per-scop data.
1037   std::unique_ptr<ForwardOpTreeImpl> Impl;
1038 
1039 public:
1040   static char ID;
1041 
ForwardOpTree()1042   explicit ForwardOpTree() : ScopPass(ID) {}
1043   ForwardOpTree(const ForwardOpTree &) = delete;
1044   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
1045 
getAnalysisUsage(AnalysisUsage & AU) const1046   void getAnalysisUsage(AnalysisUsage &AU) const override {
1047     AU.addRequiredTransitive<ScopInfoRegionPass>();
1048     AU.addRequired<LoopInfoWrapperPass>();
1049     AU.setPreservesAll();
1050   }
1051 
runOnScop(Scop & S)1052   bool runOnScop(Scop &S) override {
1053     // Free resources for previous SCoP's computation, if not yet done.
1054     releaseMemory();
1055 
1056     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1057 
1058     {
1059       IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
1060       Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
1061 
1062       if (AnalyzeKnown) {
1063         LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
1064         Impl->computeKnownValues();
1065       }
1066 
1067       LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
1068       Impl->forwardOperandTrees();
1069 
1070       if (MaxOpGuard.hasQuotaExceeded()) {
1071         LLVM_DEBUG(dbgs() << "Not all operations completed because of "
1072                              "max_operations exceeded\n");
1073         KnownOutOfQuota++;
1074       }
1075     }
1076 
1077     LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
1078     LLVM_DEBUG(dbgs() << S);
1079 
1080     // Update statistics
1081     auto ScopStats = S.getStatistics();
1082     NumValueWrites += ScopStats.NumValueWrites;
1083     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
1084     NumPHIWrites += ScopStats.NumPHIWrites;
1085     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
1086     NumSingletonWrites += ScopStats.NumSingletonWrites;
1087     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
1088 
1089     return false;
1090   }
1091 
printScop(raw_ostream & OS,Scop & S) const1092   void printScop(raw_ostream &OS, Scop &S) const override {
1093     if (!Impl)
1094       return;
1095 
1096     assert(Impl->getScop() == &S);
1097     Impl->print(OS);
1098   }
1099 
releaseMemory()1100   void releaseMemory() override { Impl.reset(); }
1101 }; // class ForwardOpTree
1102 
1103 char ForwardOpTree::ID;
1104 } // namespace
1105 
createForwardOpTreePass()1106 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
1107 
1108 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
1109                       "Polly - Forward operand tree", false, false)
1110 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1111 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
1112                     "Polly - Forward operand tree", false, false)
1113