1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Move instructions between statements.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "polly/ForwardOpTree.h"
14 #include "polly/Options.h"
15 #include "polly/ScopBuilder.h"
16 #include "polly/ScopInfo.h"
17 #include "polly/ScopPass.h"
18 #include "polly/Support/GICHelper.h"
19 #include "polly/Support/ISLOStream.h"
20 #include "polly/Support/ISLTools.h"
21 #include "polly/Support/VirtualInstruction.h"
22 #include "polly/ZoneAlgo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Value.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "isl/ctx.h"
39 #include "isl/isl-noexceptions.h"
40 #include <cassert>
41 #include <memory>
42
43 #define DEBUG_TYPE "polly-optree"
44
45 using namespace llvm;
46 using namespace polly;
47
48 static cl::opt<bool>
49 AnalyzeKnown("polly-optree-analyze-known",
50 cl::desc("Analyze array contents for load forwarding"),
51 cl::cat(PollyCategory), cl::init(true), cl::Hidden);
52
53 static cl::opt<bool>
54 NormalizePHIs("polly-optree-normalize-phi",
55 cl::desc("Replace PHIs by their incoming values"),
56 cl::cat(PollyCategory), cl::init(false), cl::Hidden);
57
58 static cl::opt<unsigned>
59 MaxOps("polly-optree-max-ops",
60 cl::desc("Maximum number of ISL operations to invest for known "
61 "analysis; 0=no limit"),
62 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
63
64 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
65 STATISTIC(KnownOutOfQuota,
66 "Analyses aborted because max_operations was reached");
67
68 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
69 STATISTIC(TotalKnownLoadsForwarded,
70 "Number of forwarded loads because their value was known");
71 STATISTIC(TotalReloads, "Number of reloaded values");
72 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
73 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
74 STATISTIC(TotalModifiedStmts,
75 "Number of statements with at least one forwarded tree");
76
77 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
78
79 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
80 STATISTIC(NumValueWritesInLoops,
81 "Number of scalar value writes nested in affine loops after OpTree");
82 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
83 STATISTIC(NumPHIWritesInLoops,
84 "Number of scalar phi writes nested in affine loops after OpTree");
85 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
86 STATISTIC(NumSingletonWritesInLoops,
87 "Number of singleton writes nested in affine loops after OpTree");
88
89 namespace {
90
91 /// The state of whether an operand tree was/can be forwarded.
92 ///
93 /// The items apply to an instructions and its operand tree with the instruction
94 /// as the root element. If the value in question is not an instruction in the
95 /// SCoP, it can be a leaf of an instruction's operand tree.
96 enum ForwardingDecision {
97 /// An uninitialized value.
98 FD_Unknown,
99
100 /// The root instruction or value cannot be forwarded at all.
101 FD_CannotForward,
102
103 /// The root instruction or value can be forwarded as a leaf of a larger
104 /// operand tree.
105 /// It does not make sense to move the value itself, it would just replace it
106 /// by a use of itself. For instance, a constant "5" used in a statement can
107 /// be forwarded, but it would just replace it by the same constant "5".
108 /// However, it makes sense to move as an operand of
109 ///
110 /// %add = add 5, 5
111 ///
112 /// where "5" is moved as part of a larger operand tree. "5" would be placed
113 /// (disregarding for a moment that literal constants don't have a location
114 /// and can be used anywhere) into the same statement as %add would.
115 FD_CanForwardLeaf,
116
117 /// The root instruction can be forwarded and doing so avoids a scalar
118 /// dependency.
119 ///
120 /// This can be either because the operand tree can be moved to the target
121 /// statement, or a memory access is redirected to read from a different
122 /// location.
123 FD_CanForwardProfitably,
124
125 /// A forwarding method cannot be applied to the operand tree.
126 /// The difference to FD_CannotForward is that there might be other methods
127 /// that can handle it.
128 FD_NotApplicable
129 };
130
131 /// Represents the evaluation of and action to taken when forwarding a value
132 /// from an operand tree.
133 struct ForwardingAction {
134 using KeyTy = std::pair<Value *, ScopStmt *>;
135
136 /// Evaluation of forwarding a value.
137 ForwardingDecision Decision = FD_Unknown;
138
139 /// Callback to execute the forwarding.
140 /// Returning true allows deleting the polly::MemoryAccess if the value is the
141 /// root of the operand tree (and its elimination the reason why the
142 /// forwarding is done). Return false if the MemoryAccess is reused or there
143 /// might be other users of the read accesses. In the letter case the
144 /// polly::SimplifyPass can remove dead MemoryAccesses.
__anon391836e40202__anon391836e40111::ForwardingAction145 std::function<bool()> Execute = []() -> bool {
146 llvm_unreachable("unspecified how to forward");
147 };
148
149 /// Other values that need to be forwarded if this action is executed. Their
150 /// actions are executed after this one.
151 SmallVector<KeyTy, 4> Depends;
152
153 /// Named ctor: The method creating this object does not apply to the kind of
154 /// value, but other methods may.
notApplicable__anon391836e40111::ForwardingAction155 static ForwardingAction notApplicable() {
156 ForwardingAction Result;
157 Result.Decision = FD_NotApplicable;
158 return Result;
159 }
160
161 /// Named ctor: The value cannot be forwarded.
cannotForward__anon391836e40111::ForwardingAction162 static ForwardingAction cannotForward() {
163 ForwardingAction Result;
164 Result.Decision = FD_CannotForward;
165 return Result;
166 }
167
168 /// Named ctor: The value can just be used without any preparation.
triviallyForwardable__anon391836e40111::ForwardingAction169 static ForwardingAction triviallyForwardable(bool IsProfitable, Value *Val) {
170 ForwardingAction Result;
171 Result.Decision =
172 IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
173 Result.Execute = [=]() {
174 LLVM_DEBUG(dbgs() << " trivially forwarded: " << *Val << "\n");
175 return true;
176 };
177 return Result;
178 }
179
180 /// Name ctor: The value can be forwarded by executing an action.
canForward__anon391836e40111::ForwardingAction181 static ForwardingAction canForward(std::function<bool()> Execute,
182 ArrayRef<KeyTy> Depends,
183 bool IsProfitable) {
184 ForwardingAction Result;
185 Result.Decision =
186 IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
187 Result.Execute = std::move(Execute);
188 Result.Depends.append(Depends.begin(), Depends.end());
189 return Result;
190 }
191 };
192
193 /// Implementation of operand tree forwarding for a specific SCoP.
194 ///
195 /// For a statement that requires a scalar value (through a value read
196 /// MemoryAccess), see if its operand can be moved into the statement. If so,
197 /// the MemoryAccess is removed and the all the operand tree instructions are
198 /// moved into the statement. All original instructions are left in the source
199 /// statements. The simplification pass can clean these up.
200 class ForwardOpTreeImpl : ZoneAlgorithm {
201 private:
202 using MemoizationTy = DenseMap<ForwardingAction::KeyTy, ForwardingAction>;
203
204 /// Scope guard to limit the number of isl operations for this pass.
205 IslMaxOperationsGuard &MaxOpGuard;
206
207 /// How many instructions have been copied to other statements.
208 int NumInstructionsCopied = 0;
209
210 /// Number of loads forwarded because their value was known.
211 int NumKnownLoadsForwarded = 0;
212
213 /// Number of values reloaded from known array elements.
214 int NumReloads = 0;
215
216 /// How many read-only accesses have been copied.
217 int NumReadOnlyCopied = 0;
218
219 /// How many operand trees have been forwarded.
220 int NumForwardedTrees = 0;
221
222 /// Number of statements with at least one forwarded operand tree.
223 int NumModifiedStmts = 0;
224
225 /// Whether we carried out at least one change to the SCoP.
226 bool Modified = false;
227
228 /// Cache of how to forward values.
229 /// The key of this map is the llvm::Value to be forwarded and the
230 /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value
231 /// can evaluate differently depending on where it is evaluate. For instance,
232 /// a synthesizable Scev represents a recurrence with an loop but the loop's
233 /// exit value if evaluated after the loop.
234 /// The cached results are only valid for the current TargetStmt.
235 /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the
236 /// exit value when instantiated outside of the loop. The primary concern is
237 /// ambiguity when crossing PHI nodes, which currently is not supported.
238 MemoizationTy ForwardingActions;
239
240 /// Contains the zones where array elements are known to contain a specific
241 /// value.
242 /// { [Element[] -> Zone[]] -> ValInst[] }
243 /// @see computeKnown()
244 isl::union_map Known;
245
246 /// Translator for newly introduced ValInsts to already existing ValInsts such
247 /// that new introduced load instructions can reuse the Known analysis of its
248 /// original load. { ValInst[] -> ValInst[] }
249 isl::union_map Translator;
250
251 /// Get list of array elements that do contain the same ValInst[] at Domain[].
252 ///
253 /// @param ValInst { Domain[] -> ValInst[] }
254 /// The values for which we search for alternative locations,
255 /// per statement instance.
256 ///
257 /// @return { Domain[] -> Element[] }
258 /// For each statement instance, the array elements that contain the
259 /// same ValInst.
findSameContentElements(isl::union_map ValInst)260 isl::union_map findSameContentElements(isl::union_map ValInst) {
261 assert(!ValInst.is_single_valued().is_false());
262
263 // { Domain[] }
264 isl::union_set Domain = ValInst.domain();
265
266 // { Domain[] -> Scatter[] }
267 isl::union_map Schedule = getScatterFor(Domain);
268
269 // { Element[] -> [Scatter[] -> ValInst[]] }
270 isl::union_map MustKnownCurried =
271 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
272
273 // { [Domain[] -> ValInst[]] -> Scatter[] }
274 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
275
276 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
277 isl::union_map SchedValDomVal =
278 DomValSched.range_product(ValInst.range_map()).reverse();
279
280 // { Element[] -> [Domain[] -> ValInst[]] }
281 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
282
283 // { Domain[] -> Element[] }
284 isl::union_map MustKnownMap =
285 MustKnownInst.uncurry().domain().unwrap().reverse();
286 simplify(MustKnownMap);
287
288 return MustKnownMap;
289 }
290
291 /// Find a single array element for each statement instance, within a single
292 /// array.
293 ///
294 /// @param MustKnown { Domain[] -> Element[] }
295 /// Set of candidate array elements.
296 /// @param Domain { Domain[] }
297 /// The statement instance for which we need elements for.
298 ///
299 /// @return { Domain[] -> Element[] }
300 /// For each statement instance, an array element out of @p MustKnown.
301 /// All array elements must be in the same array (Polly does not yet
302 /// support reading from different accesses using the same
303 /// MemoryAccess). If no mapping for all of @p Domain exists, returns
304 /// null.
singleLocation(isl::union_map MustKnown,isl::set Domain)305 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
306 // { Domain[] -> Element[] }
307 isl::map Result;
308
309 // MemoryAccesses can read only elements from a single array
310 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
311 // Look through all spaces until we find one that contains at least the
312 // wanted statement instance.s
313 for (isl::map Map : MustKnown.get_map_list()) {
314 // Get the array this is accessing.
315 isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
316 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
317
318 // No support for generation of indirect array accesses.
319 if (SAI->getBasePtrOriginSAI())
320 continue;
321
322 // Determine whether this map contains all wanted values.
323 isl::set MapDom = Map.domain();
324 if (!Domain.is_subset(MapDom).is_true())
325 continue;
326
327 // There might be multiple array elements that contain the same value, but
328 // choose only one of them. lexmin is used because it returns a one-value
329 // mapping, we do not care about which one.
330 // TODO: Get the simplest access function.
331 Result = Map.lexmin();
332 break;
333 }
334
335 return Result;
336 }
337
338 public:
ForwardOpTreeImpl(Scop * S,LoopInfo * LI,IslMaxOperationsGuard & MaxOpGuard)339 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
340 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
341
342 /// Compute the zones of known array element contents.
343 ///
344 /// @return True if the computed #Known is usable.
computeKnownValues()345 bool computeKnownValues() {
346 isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
347
348 // Check that nothing strange occurs.
349 collectCompatibleElts();
350
351 {
352 IslQuotaScope QuotaScope = MaxOpGuard.enter();
353
354 computeCommon();
355 if (NormalizePHIs)
356 computeNormalizedPHIs();
357 Known = computeKnown(true, true);
358
359 // Preexisting ValInsts use the known content analysis of themselves.
360 Translator = makeIdentityMap(Known.range(), false);
361 }
362
363 if (!Known || !Translator || !NormalizeMap) {
364 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
365 Known = nullptr;
366 Translator = nullptr;
367 NormalizeMap = nullptr;
368 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
369 return false;
370 }
371
372 KnownAnalyzed++;
373 LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
374
375 return true;
376 }
377
printStatistics(raw_ostream & OS,int Indent=0)378 void printStatistics(raw_ostream &OS, int Indent = 0) {
379 OS.indent(Indent) << "Statistics {\n";
380 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
381 << '\n';
382 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
383 << '\n';
384 OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
385 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
386 << '\n';
387 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
388 << '\n';
389 OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
390 << NumModifiedStmts << '\n';
391 OS.indent(Indent) << "}\n";
392 }
393
printStatements(raw_ostream & OS,int Indent=0) const394 void printStatements(raw_ostream &OS, int Indent = 0) const {
395 OS.indent(Indent) << "After statements {\n";
396 for (auto &Stmt : *S) {
397 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
398 for (auto *MA : Stmt)
399 MA->print(OS);
400
401 OS.indent(Indent + 12);
402 Stmt.printInstructions(OS);
403 }
404 OS.indent(Indent) << "}\n";
405 }
406
407 /// Create a new MemoryAccess of type read and MemoryKind::Array.
408 ///
409 /// @param Stmt The statement in which the access occurs.
410 /// @param LI The instruction that does the access.
411 /// @param AccessRelation The array element that each statement instance
412 /// accesses.
413 ///
414 /// @param The newly created access.
makeReadArrayAccess(ScopStmt * Stmt,LoadInst * LI,isl::map AccessRelation)415 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
416 isl::map AccessRelation) {
417 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
418 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
419
420 // Create a dummy SCEV access, to be replaced anyway.
421 SmallVector<const SCEV *, 4> Sizes;
422 Sizes.reserve(SAI->getNumberOfDimensions());
423 SmallVector<const SCEV *, 4> Subscripts;
424 Subscripts.reserve(SAI->getNumberOfDimensions());
425 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
426 Sizes.push_back(SAI->getDimensionSize(i));
427 Subscripts.push_back(nullptr);
428 }
429
430 MemoryAccess *Access =
431 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
432 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
433 S->addAccessFunction(Access);
434 Stmt->addAccess(Access, true);
435
436 Access->setNewAccessRelation(AccessRelation);
437
438 return Access;
439 }
440
441 /// Forward a load by reading from an array element that contains the same
442 /// value. Typically the location it was loaded from.
443 ///
444 /// @param TargetStmt The statement the operand tree will be copied to.
445 /// @param Inst The (possibly speculatable) instruction to forward.
446 /// @param UseStmt The statement that uses @p Inst.
447 /// @param UseLoop The loop @p Inst is used in.
448 /// @param DefStmt The statement @p Inst is defined in.
449 /// @param DefLoop The loop which contains @p Inst.
450 ///
451 /// @return A ForwardingAction object describing the feasibility and
452 /// profitability evaluation and the callback carrying-out the value
453 /// forwarding.
forwardKnownLoad(ScopStmt * TargetStmt,Instruction * Inst,ScopStmt * UseStmt,Loop * UseLoop,ScopStmt * DefStmt,Loop * DefLoop)454 ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
455 ScopStmt *UseStmt, Loop *UseLoop,
456 ScopStmt *DefStmt, Loop *DefLoop) {
457 // Cannot do anything without successful known analysis.
458 if (Known.is_null() || Translator.is_null() ||
459 MaxOpGuard.hasQuotaExceeded())
460 return ForwardingAction::notApplicable();
461
462 LoadInst *LI = dyn_cast<LoadInst>(Inst);
463 if (!LI)
464 return ForwardingAction::notApplicable();
465
466 ForwardingDecision OpDecision =
467 forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop);
468 switch (OpDecision) {
469 case FD_CanForwardProfitably:
470 case FD_CanForwardLeaf:
471 break;
472 case FD_CannotForward:
473 return ForwardingAction::cannotForward();
474 default:
475 llvm_unreachable("Shouldn't return this");
476 }
477
478 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
479 if (Access) {
480 // If the load is already in the statement, no forwarding is necessary.
481 // However, it might happen that the LoadInst is already present in the
482 // statement's instruction list. In that case we do as follows:
483 // - For the evaluation, we can trivially forward it as it is
484 // benefit of forwarding an already present instruction.
485 // - For the execution, prepend the instruction (to make it
486 // available to all instructions following in the instruction list), but
487 // do not add another MemoryAccess.
488 auto ExecAction = [this, TargetStmt, LI, Access]() -> bool {
489 TargetStmt->prependInstruction(LI);
490 LLVM_DEBUG(
491 dbgs() << " forwarded known load with preexisting MemoryAccess"
492 << Access << "\n");
493 (void)Access;
494
495 NumKnownLoadsForwarded++;
496 TotalKnownLoadsForwarded++;
497 return true;
498 };
499 return ForwardingAction::canForward(
500 ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
501 }
502
503 // Allow the following Isl calculations (until we return the
504 // ForwardingAction, excluding the code inside the lambda that will be
505 // executed later) to fail.
506 IslQuotaScope QuotaScope = MaxOpGuard.enter();
507
508 // { DomainDef[] -> ValInst[] }
509 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
510 assert(!isNormalized(ExpectedVal).is_false() &&
511 "LoadInsts are always normalized");
512
513 // { DomainUse[] -> DomainTarget[] }
514 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
515
516 // { DomainTarget[] -> ValInst[] }
517 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
518 isl::union_map TranslatedExpectedVal =
519 isl::union_map(TargetExpectedVal).apply_range(Translator);
520
521 // { DomainTarget[] -> Element[] }
522 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
523
524 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
525 if (!SameVal)
526 return ForwardingAction::notApplicable();
527
528 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal
529 << "\n");
530 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates
531 << "\n");
532
533 // { ValInst[] }
534 isl::space ValInstSpace = ExpectedVal.get_space().range();
535
536 // After adding a new load to the SCoP, also update the Known content
537 // about it. The new load will have a known ValInst of
538 // { [DomainTarget[] -> Value[]] }
539 // but which -- because it is a copy of it -- has same value as the
540 // { [DomainDef[] -> Value[]] }
541 // that it replicates. Instead of cloning the known content of
542 // [DomainDef[] -> Value[]]
543 // for DomainTarget[], we add a 'translator' that maps
544 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
545 // before comparing to the known content.
546 // TODO: 'Translator' could also be used to map PHINodes to their incoming
547 // ValInsts.
548 isl::map LocalTranslator;
549 if (!ValInstSpace.is_wrapping().is_false()) {
550 // { DefDomain[] -> Value[] }
551 isl::map ValInsts = ExpectedVal.range().unwrap();
552
553 // { DefDomain[] }
554 isl::set DefDomain = ValInsts.domain();
555
556 // { Value[] }
557 isl::space ValSpace = ValInstSpace.unwrap().range();
558
559 // { Value[] -> Value[] }
560 isl::map ValToVal =
561 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
562
563 // { DomainDef[] -> DomainTarget[] }
564 isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
565
566 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
567 LocalTranslator = DefToTarget.reverse().product(ValToVal);
568 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator
569 << "\n");
570
571 if (!LocalTranslator)
572 return ForwardingAction::notApplicable();
573 }
574
575 auto ExecAction = [this, TargetStmt, LI, SameVal,
576 LocalTranslator]() -> bool {
577 TargetStmt->prependInstruction(LI);
578 MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
579 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess"
580 << Access << "\n");
581 (void)Access;
582
583 if (LocalTranslator)
584 Translator = Translator.add_map(LocalTranslator);
585
586 NumKnownLoadsForwarded++;
587 TotalKnownLoadsForwarded++;
588 return true;
589 };
590 return ForwardingAction::canForward(
591 ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
592 }
593
594 /// Forward a scalar by redirecting the access to an array element that stores
595 /// the same value.
596 ///
597 /// @param TargetStmt The statement the operand tree will be copied to.
598 /// @param Inst The scalar to forward.
599 /// @param UseStmt The statement that uses @p Inst.
600 /// @param UseLoop The loop @p Inst is used in.
601 /// @param DefStmt The statement @p Inst is defined in.
602 /// @param DefLoop The loop which contains @p Inst.
603 ///
604 /// @return A ForwardingAction object describing the feasibility and
605 /// profitability evaluation and the callback carrying-out the value
606 /// forwarding.
reloadKnownContent(ScopStmt * TargetStmt,Instruction * Inst,ScopStmt * UseStmt,Loop * UseLoop,ScopStmt * DefStmt,Loop * DefLoop)607 ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
608 ScopStmt *UseStmt, Loop *UseLoop,
609 ScopStmt *DefStmt, Loop *DefLoop) {
610 // Cannot do anything without successful known analysis.
611 if (Known.is_null() || Translator.is_null() ||
612 MaxOpGuard.hasQuotaExceeded())
613 return ForwardingAction::notApplicable();
614
615 // Don't spend too much time analyzing whether it can be reloaded.
616 IslQuotaScope QuotaScope = MaxOpGuard.enter();
617
618 // { DomainDef[] -> ValInst[] }
619 isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
620
621 // { DomainUse[] -> DomainTarget[] }
622 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
623
624 // { DomainTarget[] -> ValInst[] }
625 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
626 isl::union_map TranslatedExpectedVal =
627 TargetExpectedVal.apply_range(Translator);
628
629 // { DomainTarget[] -> Element[] }
630 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
631
632 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
633 simplify(SameVal);
634 if (!SameVal)
635 return ForwardingAction::notApplicable();
636
637 auto ExecAction = [this, TargetStmt, Inst, SameVal]() {
638 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
639 if (!Access)
640 Access = TargetStmt->ensureValueRead(Inst);
641 Access->setNewAccessRelation(SameVal);
642
643 LLVM_DEBUG(dbgs() << " forwarded known content of " << *Inst
644 << " which is " << SameVal << "\n");
645 TotalReloads++;
646 NumReloads++;
647 return false;
648 };
649
650 return ForwardingAction::canForward(ExecAction, {}, true);
651 }
652
653 /// Forwards a speculatively executable instruction.
654 ///
655 /// @param TargetStmt The statement the operand tree will be copied to.
656 /// @param UseInst The (possibly speculatable) instruction to forward.
657 /// @param DefStmt The statement @p UseInst is defined in.
658 /// @param DefLoop The loop which contains @p UseInst.
659 ///
660 /// @return A ForwardingAction object describing the feasibility and
661 /// profitability evaluation and the callback carrying-out the value
662 /// forwarding.
forwardSpeculatable(ScopStmt * TargetStmt,Instruction * UseInst,ScopStmt * DefStmt,Loop * DefLoop)663 ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt,
664 Instruction *UseInst, ScopStmt *DefStmt,
665 Loop *DefLoop) {
666 // PHIs, unless synthesizable, are not yet supported.
667 if (isa<PHINode>(UseInst))
668 return ForwardingAction::notApplicable();
669
670 // Compatible instructions must satisfy the following conditions:
671 // 1. Idempotent (instruction will be copied, not moved; although its
672 // original instance might be removed by simplification)
673 // 2. Not access memory (There might be memory writes between)
674 // 3. Not cause undefined behaviour (we might copy to a location when the
675 // original instruction was no executed; this is currently not possible
676 // because we do not forward PHINodes)
677 // 4. Not leak memory if executed multiple times (i.e. malloc)
678 //
679 // Instruction::mayHaveSideEffects is not sufficient because it considers
680 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
681 // not sufficient because it allows memory accesses.
682 if (mayBeMemoryDependent(*UseInst))
683 return ForwardingAction::notApplicable();
684
685 SmallVector<ForwardingAction::KeyTy, 4> Depends;
686 Depends.reserve(UseInst->getNumOperands());
687 for (Value *OpVal : UseInst->operand_values()) {
688 ForwardingDecision OpDecision =
689 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop);
690 switch (OpDecision) {
691 case FD_CannotForward:
692 return ForwardingAction::cannotForward();
693
694 case FD_CanForwardLeaf:
695 case FD_CanForwardProfitably:
696 Depends.emplace_back(OpVal, DefStmt);
697 break;
698
699 case FD_NotApplicable:
700 case FD_Unknown:
701 llvm_unreachable(
702 "forwardTree should never return FD_NotApplicable/FD_Unknown");
703 }
704 }
705
706 auto ExecAction = [this, TargetStmt, UseInst]() {
707 // To ensure the right order, prepend this instruction before its
708 // operands. This ensures that its operands are inserted before the
709 // instruction using them.
710 TargetStmt->prependInstruction(UseInst);
711
712 LLVM_DEBUG(dbgs() << " forwarded speculable instruction: " << *UseInst
713 << "\n");
714 NumInstructionsCopied++;
715 TotalInstructionsCopied++;
716 return true;
717 };
718 return ForwardingAction::canForward(ExecAction, Depends, true);
719 }
720
721 /// Determines whether an operand tree can be forwarded and returns
722 /// instructions how to do so in the form of a ForwardingAction object.
723 ///
724 /// @param TargetStmt The statement the operand tree will be copied to.
725 /// @param UseVal The value (usually an instruction) which is root of an
726 /// operand tree.
727 /// @param UseStmt The statement that uses @p UseVal.
728 /// @param UseLoop The loop @p UseVal is used in.
729 ///
730 /// @return A ForwardingAction object describing the feasibility and
731 /// profitability evaluation and the callback carrying-out the value
732 /// forwarding.
forwardTreeImpl(ScopStmt * TargetStmt,Value * UseVal,ScopStmt * UseStmt,Loop * UseLoop)733 ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal,
734 ScopStmt *UseStmt, Loop *UseLoop) {
735 ScopStmt *DefStmt = nullptr;
736 Loop *DefLoop = nullptr;
737
738 // { DefDomain[] -> TargetDomain[] }
739 isl::map DefToTarget;
740
741 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
742 switch (VUse.getKind()) {
743 case VirtualUse::Constant:
744 case VirtualUse::Block:
745 case VirtualUse::Hoisted:
746 // These can be used anywhere without special considerations.
747 return ForwardingAction::triviallyForwardable(false, UseVal);
748
749 case VirtualUse::Synthesizable: {
750 // Check if the value is synthesizable at the new location as well. This
751 // might be possible when leaving a loop for which ScalarEvolution is
752 // unable to derive the exit value for.
753 // TODO: If there is a LCSSA PHI at the loop exit, use that one.
754 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
755 // do not forward past its loop header. This would require us to use a
756 // previous loop induction variable instead the current one. We currently
757 // do not allow forwarding PHI nodes, thus this should never occur (the
758 // only exception where no phi is necessary being an unreachable loop
759 // without edge from the outside).
760 VirtualUse TargetUse = VirtualUse::create(
761 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
762 if (TargetUse.getKind() == VirtualUse::Synthesizable)
763 return ForwardingAction::triviallyForwardable(false, UseVal);
764
765 LLVM_DEBUG(
766 dbgs() << " Synthesizable would not be synthesizable anymore: "
767 << *UseVal << "\n");
768 return ForwardingAction::cannotForward();
769 }
770
771 case VirtualUse::ReadOnly: {
772 if (!ModelReadOnlyScalars)
773 return ForwardingAction::triviallyForwardable(false, UseVal);
774
775 // If we model read-only scalars, we need to create a MemoryAccess for it.
776 auto ExecAction = [this, TargetStmt, UseVal]() {
777 TargetStmt->ensureValueRead(UseVal);
778
779 LLVM_DEBUG(dbgs() << " forwarded read-only value " << *UseVal
780 << "\n");
781 NumReadOnlyCopied++;
782 TotalReadOnlyCopied++;
783
784 // Note that we cannot return true here. With a operand tree
785 // depth of 0, UseVal is the use in TargetStmt that we try to replace.
786 // With -polly-analyze-read-only-scalars=true we would ensure the
787 // existence of a MemoryAccess (which already exists for a leaf) and be
788 // removed again by tryForwardTree because it's goal is to remove this
789 // scalar MemoryAccess. It interprets FD_CanForwardTree as the
790 // permission to do so.
791 return false;
792 };
793 return ForwardingAction::canForward(ExecAction, {}, false);
794 }
795
796 case VirtualUse::Intra:
797 // Knowing that UseStmt and DefStmt are the same statement instance, just
798 // reuse the information about UseStmt for DefStmt
799 DefStmt = UseStmt;
800
801 LLVM_FALLTHROUGH;
802 case VirtualUse::Inter:
803 Instruction *Inst = cast<Instruction>(UseVal);
804
805 if (!DefStmt) {
806 DefStmt = S->getStmtFor(Inst);
807 if (!DefStmt)
808 return ForwardingAction::cannotForward();
809 }
810
811 DefLoop = LI->getLoopFor(Inst->getParent());
812
813 ForwardingAction SpeculativeResult =
814 forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop);
815 if (SpeculativeResult.Decision != FD_NotApplicable)
816 return SpeculativeResult;
817
818 ForwardingAction KnownResult = forwardKnownLoad(
819 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
820 if (KnownResult.Decision != FD_NotApplicable)
821 return KnownResult;
822
823 ForwardingAction ReloadResult = reloadKnownContent(
824 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
825 if (ReloadResult.Decision != FD_NotApplicable)
826 return ReloadResult;
827
828 // When no method is found to forward the operand tree, we effectively
829 // cannot handle it.
830 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n");
831 return ForwardingAction::cannotForward();
832 }
833
834 llvm_unreachable("Case unhandled");
835 }
836
837 /// Determines whether an operand tree can be forwarded. Previous evaluations
838 /// are cached.
839 ///
840 /// @param TargetStmt The statement the operand tree will be copied to.
841 /// @param UseVal The value (usually an instruction) which is root of an
842 /// operand tree.
843 /// @param UseStmt The statement that uses @p UseVal.
844 /// @param UseLoop The loop @p UseVal is used in.
845 ///
846 /// @return FD_CannotForward if @p UseVal cannot be forwarded.
847 /// FD_CanForwardLeaf if @p UseVal is forwardable, but not
848 /// profitable.
849 /// FD_CanForwardProfitably if @p UseVal is forwardable and useful to
850 /// do.
forwardTree(ScopStmt * TargetStmt,Value * UseVal,ScopStmt * UseStmt,Loop * UseLoop)851 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
852 ScopStmt *UseStmt, Loop *UseLoop) {
853 // Lookup any cached evaluation.
854 auto It = ForwardingActions.find({UseVal, UseStmt});
855 if (It != ForwardingActions.end())
856 return It->second.Decision;
857
858 // Make a new evaluation.
859 ForwardingAction Action =
860 forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop);
861 ForwardingDecision Result = Action.Decision;
862
863 // Remember for the next time.
864 assert(!ForwardingActions.count({UseVal, UseStmt}) &&
865 "circular dependency?");
866 ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)});
867
868 return Result;
869 }
870
871 /// Forward an operand tree using cached actions.
872 ///
873 /// @param Stmt Statement the operand tree is moved into.
874 /// @param UseVal Root of the operand tree within @p Stmt.
875 /// @param RA The MemoryAccess for @p UseVal that the forwarding intends
876 /// to remove.
applyForwardingActions(ScopStmt * Stmt,Value * UseVal,MemoryAccess * RA)877 void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) {
878 using ChildItTy =
879 decltype(std::declval<ForwardingAction>().Depends.begin());
880 using EdgeTy = std::pair<ForwardingAction *, ChildItTy>;
881
882 DenseSet<ForwardingAction::KeyTy> Visited;
883 SmallVector<EdgeTy, 32> Stack;
884 SmallVector<ForwardingAction *, 32> Ordered;
885
886 // Seed the tree search using the root value.
887 assert(ForwardingActions.count({UseVal, Stmt}));
888 ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}];
889 Stack.emplace_back(RootAction, RootAction->Depends.begin());
890
891 // Compute the postorder of the operand tree: all operands of an instruction
892 // must be visited before the instruction itself. As an additional
893 // requirement, the topological ordering must be 'compact': Any subtree node
894 // must not be interleaved with nodes from a non-shared subtree. This is
895 // because the same llvm::Instruction can be materialized multiple times as
896 // used at different ScopStmts which might be different values. Intersecting
897 // these lifetimes may result in miscompilations.
898 // FIXME: Intersecting lifetimes might still be possible for the roots
899 // themselves, since instructions are just prepended to a ScopStmt's
900 // instruction list.
901 while (!Stack.empty()) {
902 EdgeTy &Top = Stack.back();
903 ForwardingAction *TopAction = Top.first;
904 ChildItTy &TopEdge = Top.second;
905
906 if (TopEdge == TopAction->Depends.end()) {
907 // Postorder sorting
908 Ordered.push_back(TopAction);
909 Stack.pop_back();
910 continue;
911 }
912 ForwardingAction::KeyTy Key = *TopEdge;
913
914 // Next edge for this level
915 ++TopEdge;
916
917 auto VisitIt = Visited.insert(Key);
918 if (!VisitIt.second)
919 continue;
920
921 assert(ForwardingActions.count(Key) &&
922 "Must not insert new actions during execution phase");
923 ForwardingAction *ChildAction = &ForwardingActions[Key];
924 Stack.emplace_back(ChildAction, ChildAction->Depends.begin());
925 }
926
927 // Actually, we need the reverse postorder because actions prepend new
928 // instructions. Therefore, the first one will always be the action for the
929 // operand tree's root.
930 assert(Ordered.back() == RootAction);
931 if (RootAction->Execute())
932 Stmt->removeSingleMemoryAccess(RA);
933 Ordered.pop_back();
934 for (auto DepAction : reverse(Ordered)) {
935 assert(DepAction->Decision != FD_Unknown &&
936 DepAction->Decision != FD_CannotForward);
937 assert(DepAction != RootAction);
938 DepAction->Execute();
939 }
940 }
941
942 /// Try to forward an operand tree rooted in @p RA.
tryForwardTree(MemoryAccess * RA)943 bool tryForwardTree(MemoryAccess *RA) {
944 assert(RA->isLatestScalarKind());
945 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
946
947 ScopStmt *Stmt = RA->getStatement();
948 Loop *InLoop = Stmt->getSurroundingLoop();
949
950 isl::map TargetToUse;
951 if (!Known.is_null()) {
952 isl::space DomSpace = Stmt->getDomainSpace();
953 TargetToUse =
954 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
955 }
956
957 ForwardingDecision Assessment =
958 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop);
959
960 // If considered feasible and profitable, forward it.
961 bool Changed = false;
962 if (Assessment == FD_CanForwardProfitably) {
963 applyForwardingActions(Stmt, RA->getAccessValue(), RA);
964 Changed = true;
965 }
966
967 ForwardingActions.clear();
968 return Changed;
969 }
970
971 /// Return which SCoP this instance is processing.
getScop() const972 Scop *getScop() const { return S; }
973
974 /// Run the algorithm: Use value read accesses as operand tree roots and try
975 /// to forward them into the statement.
forwardOperandTrees()976 bool forwardOperandTrees() {
977 for (ScopStmt &Stmt : *S) {
978 bool StmtModified = false;
979
980 // Because we are modifying the MemoryAccess list, collect them first to
981 // avoid iterator invalidation.
982 SmallVector<MemoryAccess *, 16> Accs(Stmt.begin(), Stmt.end());
983
984 for (MemoryAccess *RA : Accs) {
985 if (!RA->isRead())
986 continue;
987 if (!RA->isLatestScalarKind())
988 continue;
989
990 if (tryForwardTree(RA)) {
991 Modified = true;
992 StmtModified = true;
993 NumForwardedTrees++;
994 TotalForwardedTrees++;
995 }
996 }
997
998 if (StmtModified) {
999 NumModifiedStmts++;
1000 TotalModifiedStmts++;
1001 }
1002 }
1003
1004 if (Modified) {
1005 ScopsModified++;
1006 S->realignParams();
1007 }
1008 return Modified;
1009 }
1010
1011 /// Print the pass result, performed transformations and the SCoP after the
1012 /// transformation.
print(raw_ostream & OS,int Indent=0)1013 void print(raw_ostream &OS, int Indent = 0) {
1014 printStatistics(OS, Indent);
1015
1016 if (!Modified) {
1017 // This line can easily be checked in regression tests.
1018 OS << "ForwardOpTree executed, but did not modify anything\n";
1019 return;
1020 }
1021
1022 printStatements(OS, Indent);
1023 }
1024 };
1025
1026 /// Pass that redirects scalar reads to array elements that are known to contain
1027 /// the same value.
1028 ///
1029 /// This reduces the number of scalar accesses and therefore potentially
1030 /// increases the freedom of the scheduler. In the ideal case, all reads of a
1031 /// scalar definition are redirected (We currently do not care about removing
1032 /// the write in this case). This is also useful for the main DeLICM pass as
1033 /// there are less scalars to be mapped.
1034 class ForwardOpTree : public ScopPass {
1035 private:
1036 /// The pass implementation, also holding per-scop data.
1037 std::unique_ptr<ForwardOpTreeImpl> Impl;
1038
1039 public:
1040 static char ID;
1041
ForwardOpTree()1042 explicit ForwardOpTree() : ScopPass(ID) {}
1043 ForwardOpTree(const ForwardOpTree &) = delete;
1044 ForwardOpTree &operator=(const ForwardOpTree &) = delete;
1045
getAnalysisUsage(AnalysisUsage & AU) const1046 void getAnalysisUsage(AnalysisUsage &AU) const override {
1047 AU.addRequiredTransitive<ScopInfoRegionPass>();
1048 AU.addRequired<LoopInfoWrapperPass>();
1049 AU.setPreservesAll();
1050 }
1051
runOnScop(Scop & S)1052 bool runOnScop(Scop &S) override {
1053 // Free resources for previous SCoP's computation, if not yet done.
1054 releaseMemory();
1055
1056 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1057
1058 {
1059 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
1060 Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
1061
1062 if (AnalyzeKnown) {
1063 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
1064 Impl->computeKnownValues();
1065 }
1066
1067 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
1068 Impl->forwardOperandTrees();
1069
1070 if (MaxOpGuard.hasQuotaExceeded()) {
1071 LLVM_DEBUG(dbgs() << "Not all operations completed because of "
1072 "max_operations exceeded\n");
1073 KnownOutOfQuota++;
1074 }
1075 }
1076
1077 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
1078 LLVM_DEBUG(dbgs() << S);
1079
1080 // Update statistics
1081 auto ScopStats = S.getStatistics();
1082 NumValueWrites += ScopStats.NumValueWrites;
1083 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
1084 NumPHIWrites += ScopStats.NumPHIWrites;
1085 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
1086 NumSingletonWrites += ScopStats.NumSingletonWrites;
1087 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
1088
1089 return false;
1090 }
1091
printScop(raw_ostream & OS,Scop & S) const1092 void printScop(raw_ostream &OS, Scop &S) const override {
1093 if (!Impl)
1094 return;
1095
1096 assert(Impl->getScop() == &S);
1097 Impl->print(OS);
1098 }
1099
releaseMemory()1100 void releaseMemory() override { Impl.reset(); }
1101 }; // class ForwardOpTree
1102
1103 char ForwardOpTree::ID;
1104 } // namespace
1105
createForwardOpTreePass()1106 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
1107
1108 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
1109 "Polly - Forward operand tree", false, false)
1110 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1111 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
1112 "Polly - Forward operand tree", false, false)
1113