1 //===-- MemorySSA.cpp - Memory SSA Builder---------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------===//
9 //
10 // This file implements the MemorySSA class.
11 //
12 //===----------------------------------------------------------------===//
13 #include "llvm/Transforms/Utils/MemorySSA.h"
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/ADT/DepthFirstIterator.h"
17 #include "llvm/ADT/GraphTraits.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/Analysis/CFG.h"
25 #include "llvm/Analysis/GlobalsModRef.h"
26 #include "llvm/Analysis/IteratedDominanceFrontier.h"
27 #include "llvm/Analysis/MemoryLocation.h"
28 #include "llvm/Analysis/PHITransAddr.h"
29 #include "llvm/IR/AssemblyAnnotationWriter.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/GlobalVariable.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/LLVMContext.h"
36 #include "llvm/IR/Metadata.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/PatternMatch.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/FormattedStream.h"
41 #include "llvm/Transforms/Scalar.h"
42 #include <algorithm>
43 
44 #define DEBUG_TYPE "memoryssa"
45 using namespace llvm;
46 STATISTIC(NumClobberCacheLookups, "Number of Memory SSA version cache lookups");
47 STATISTIC(NumClobberCacheHits, "Number of Memory SSA version cache hits");
48 STATISTIC(NumClobberCacheInserts, "Number of MemorySSA version cache inserts");
49 
50 INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
51                       true)
52 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
53 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
54 INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
55                     true)
56 
57 INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa",
58                       "Memory SSA Printer", false, false)
59 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
60 INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa",
61                     "Memory SSA Printer", false, false)
62 
63 static cl::opt<bool>
64     VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden,
65                     cl::desc("Verify MemorySSA in legacy printer pass."));
66 
67 namespace llvm {
68 /// \brief An assembly annotator class to print Memory SSA information in
69 /// comments.
70 class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
71   friend class MemorySSA;
72   const MemorySSA *MSSA;
73 
74 public:
MemorySSAAnnotatedWriter(const MemorySSA * M)75   MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {}
76 
emitBasicBlockStartAnnot(const BasicBlock * BB,formatted_raw_ostream & OS)77   virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
78                                         formatted_raw_ostream &OS) {
79     if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
80       OS << "; " << *MA << "\n";
81   }
82 
emitInstructionAnnot(const Instruction * I,formatted_raw_ostream & OS)83   virtual void emitInstructionAnnot(const Instruction *I,
84                                     formatted_raw_ostream &OS) {
85     if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
86       OS << "; " << *MA << "\n";
87   }
88 };
89 
90 /// \brief A MemorySSAWalker that does AA walks and caching of lookups to
91 /// disambiguate accesses.
92 ///
93 /// FIXME: The current implementation of this can take quadratic space in rare
94 /// cases. This can be fixed, but it is something to note until it is fixed.
95 ///
96 /// In order to trigger this behavior, you need to store to N distinct locations
97 /// (that AA can prove don't alias), perform M stores to other memory
98 /// locations that AA can prove don't alias any of the initial N locations, and
99 /// then load from all of the N locations. In this case, we insert M cache
100 /// entries for each of the N loads.
101 ///
102 /// For example:
103 /// define i32 @foo() {
104 ///   %a = alloca i32, align 4
105 ///   %b = alloca i32, align 4
106 ///   store i32 0, i32* %a, align 4
107 ///   store i32 0, i32* %b, align 4
108 ///
109 ///   ; Insert M stores to other memory that doesn't alias %a or %b here
110 ///
111 ///   %c = load i32, i32* %a, align 4 ; Caches M entries in
112 ///                                   ; CachedUpwardsClobberingAccess for the
113 ///                                   ; MemoryLocation %a
114 ///   %d = load i32, i32* %b, align 4 ; Caches M entries in
115 ///                                   ; CachedUpwardsClobberingAccess for the
116 ///                                   ; MemoryLocation %b
117 ///
118 ///   ; For completeness' sake, loading %a or %b again would not cache *another*
119 ///   ; M entries.
120 ///   %r = add i32 %c, %d
121 ///   ret i32 %r
122 /// }
123 class MemorySSA::CachingWalker final : public MemorySSAWalker {
124 public:
125   CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *);
126   ~CachingWalker() override;
127 
128   MemoryAccess *getClobberingMemoryAccess(const Instruction *) override;
129   MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
130                                           MemoryLocation &) override;
131   void invalidateInfo(MemoryAccess *) override;
132 
133 protected:
134   struct UpwardsMemoryQuery;
135   MemoryAccess *doCacheLookup(const MemoryAccess *, const UpwardsMemoryQuery &,
136                               const MemoryLocation &);
137 
138   void doCacheInsert(const MemoryAccess *, MemoryAccess *,
139                      const UpwardsMemoryQuery &, const MemoryLocation &);
140 
141   void doCacheRemove(const MemoryAccess *, const UpwardsMemoryQuery &,
142                      const MemoryLocation &);
143 
144 private:
145   MemoryAccessPair UpwardsDFSWalk(MemoryAccess *, const MemoryLocation &,
146                                   UpwardsMemoryQuery &, bool);
147   MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
148   bool instructionClobbersQuery(const MemoryDef *, UpwardsMemoryQuery &,
149                                 const MemoryLocation &Loc) const;
150   void verifyRemoved(MemoryAccess *);
151   SmallDenseMap<ConstMemoryAccessPair, MemoryAccess *>
152       CachedUpwardsClobberingAccess;
153   DenseMap<const MemoryAccess *, MemoryAccess *> CachedUpwardsClobberingCall;
154   AliasAnalysis *AA;
155   DominatorTree *DT;
156 };
157 }
158 
159 namespace {
160 struct RenamePassData {
161   DomTreeNode *DTN;
162   DomTreeNode::const_iterator ChildIt;
163   MemoryAccess *IncomingVal;
164 
RenamePassData__anonb93731900111::RenamePassData165   RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It,
166                  MemoryAccess *M)
167       : DTN(D), ChildIt(It), IncomingVal(M) {}
swap__anonb93731900111::RenamePassData168   void swap(RenamePassData &RHS) {
169     std::swap(DTN, RHS.DTN);
170     std::swap(ChildIt, RHS.ChildIt);
171     std::swap(IncomingVal, RHS.IncomingVal);
172   }
173 };
174 }
175 
176 namespace llvm {
177 /// \brief Rename a single basic block into MemorySSA form.
178 /// Uses the standard SSA renaming algorithm.
179 /// \returns The new incoming value.
renameBlock(BasicBlock * BB,MemoryAccess * IncomingVal)180 MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB,
181                                      MemoryAccess *IncomingVal) {
182   auto It = PerBlockAccesses.find(BB);
183   // Skip most processing if the list is empty.
184   if (It != PerBlockAccesses.end()) {
185     AccessList *Accesses = It->second.get();
186     for (MemoryAccess &L : *Accesses) {
187       switch (L.getValueID()) {
188       case Value::MemoryUseVal:
189         cast<MemoryUse>(&L)->setDefiningAccess(IncomingVal);
190         break;
191       case Value::MemoryDefVal:
192         // We can't legally optimize defs, because we only allow single
193         // memory phis/uses on operations, and if we optimize these, we can
194         // end up with multiple reaching defs. Uses do not have this
195         // problem, since they do not produce a value
196         cast<MemoryDef>(&L)->setDefiningAccess(IncomingVal);
197         IncomingVal = &L;
198         break;
199       case Value::MemoryPhiVal:
200         IncomingVal = &L;
201         break;
202       }
203     }
204   }
205 
206   // Pass through values to our successors
207   for (const BasicBlock *S : successors(BB)) {
208     auto It = PerBlockAccesses.find(S);
209     // Rename the phi nodes in our successor block
210     if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front()))
211       continue;
212     AccessList *Accesses = It->second.get();
213     auto *Phi = cast<MemoryPhi>(&Accesses->front());
214     Phi->addIncoming(IncomingVal, BB);
215   }
216 
217   return IncomingVal;
218 }
219 
220 /// \brief This is the standard SSA renaming algorithm.
221 ///
222 /// We walk the dominator tree in preorder, renaming accesses, and then filling
223 /// in phi nodes in our successors.
renamePass(DomTreeNode * Root,MemoryAccess * IncomingVal,SmallPtrSet<BasicBlock *,16> & Visited)224 void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
225                            SmallPtrSet<BasicBlock *, 16> &Visited) {
226   SmallVector<RenamePassData, 32> WorkStack;
227   IncomingVal = renameBlock(Root->getBlock(), IncomingVal);
228   WorkStack.push_back({Root, Root->begin(), IncomingVal});
229   Visited.insert(Root->getBlock());
230 
231   while (!WorkStack.empty()) {
232     DomTreeNode *Node = WorkStack.back().DTN;
233     DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt;
234     IncomingVal = WorkStack.back().IncomingVal;
235 
236     if (ChildIt == Node->end()) {
237       WorkStack.pop_back();
238     } else {
239       DomTreeNode *Child = *ChildIt;
240       ++WorkStack.back().ChildIt;
241       BasicBlock *BB = Child->getBlock();
242       Visited.insert(BB);
243       IncomingVal = renameBlock(BB, IncomingVal);
244       WorkStack.push_back({Child, Child->begin(), IncomingVal});
245     }
246   }
247 }
248 
249 /// \brief Compute dominator levels, used by the phi insertion algorithm above.
computeDomLevels(DenseMap<DomTreeNode *,unsigned> & DomLevels)250 void MemorySSA::computeDomLevels(DenseMap<DomTreeNode *, unsigned> &DomLevels) {
251   for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode());
252        DFI != DFE; ++DFI)
253     DomLevels[*DFI] = DFI.getPathLength() - 1;
254 }
255 
256 /// \brief This handles unreachable block accesses by deleting phi nodes in
257 /// unreachable blocks, and marking all other unreachable MemoryAccess's as
258 /// being uses of the live on entry definition.
markUnreachableAsLiveOnEntry(BasicBlock * BB)259 void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
260   assert(!DT->isReachableFromEntry(BB) &&
261          "Reachable block found while handling unreachable blocks");
262 
263   // Make sure phi nodes in our reachable successors end up with a
264   // LiveOnEntryDef for our incoming edge, even though our block is forward
265   // unreachable.  We could just disconnect these blocks from the CFG fully,
266   // but we do not right now.
267   for (const BasicBlock *S : successors(BB)) {
268     if (!DT->isReachableFromEntry(S))
269       continue;
270     auto It = PerBlockAccesses.find(S);
271     // Rename the phi nodes in our successor block
272     if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front()))
273       continue;
274     AccessList *Accesses = It->second.get();
275     auto *Phi = cast<MemoryPhi>(&Accesses->front());
276     Phi->addIncoming(LiveOnEntryDef.get(), BB);
277   }
278 
279   auto It = PerBlockAccesses.find(BB);
280   if (It == PerBlockAccesses.end())
281     return;
282 
283   auto &Accesses = It->second;
284   for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) {
285     auto Next = std::next(AI);
286     // If we have a phi, just remove it. We are going to replace all
287     // users with live on entry.
288     if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI))
289       UseOrDef->setDefiningAccess(LiveOnEntryDef.get());
290     else
291       Accesses->erase(AI);
292     AI = Next;
293   }
294 }
295 
MemorySSA(Function & Func,AliasAnalysis * AA,DominatorTree * DT)296 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
297     : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
298       NextID(0) {
299   buildMemorySSA();
300 }
301 
MemorySSA(MemorySSA && MSSA)302 MemorySSA::MemorySSA(MemorySSA &&MSSA)
303     : AA(MSSA.AA), DT(MSSA.DT), F(MSSA.F),
304       ValueToMemoryAccess(std::move(MSSA.ValueToMemoryAccess)),
305       PerBlockAccesses(std::move(MSSA.PerBlockAccesses)),
306       LiveOnEntryDef(std::move(MSSA.LiveOnEntryDef)),
307       Walker(std::move(MSSA.Walker)), NextID(MSSA.NextID) {
308   // Update the Walker MSSA pointer so it doesn't point to the moved-from MSSA
309   // object any more.
310   Walker->MSSA = this;
311 }
312 
~MemorySSA()313 MemorySSA::~MemorySSA() {
314   // Drop all our references
315   for (const auto &Pair : PerBlockAccesses)
316     for (MemoryAccess &MA : *Pair.second)
317       MA.dropAllReferences();
318 }
319 
getOrCreateAccessList(const BasicBlock * BB)320 MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
321   auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr));
322 
323   if (Res.second)
324     Res.first->second = make_unique<AccessList>();
325   return Res.first->second.get();
326 }
327 
buildMemorySSA()328 void MemorySSA::buildMemorySSA() {
329   // We create an access to represent "live on entry", for things like
330   // arguments or users of globals, where the memory they use is defined before
331   // the beginning of the function. We do not actually insert it into the IR.
332   // We do not define a live on exit for the immediate uses, and thus our
333   // semantics do *not* imply that something with no immediate uses can simply
334   // be removed.
335   BasicBlock &StartingPoint = F.getEntryBlock();
336   LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr,
337                                           &StartingPoint, NextID++);
338 
339   // We maintain lists of memory accesses per-block, trading memory for time. We
340   // could just look up the memory access for every possible instruction in the
341   // stream.
342   SmallPtrSet<BasicBlock *, 32> DefiningBlocks;
343   SmallPtrSet<BasicBlock *, 32> DefUseBlocks;
344   // Go through each block, figure out where defs occur, and chain together all
345   // the accesses.
346   for (BasicBlock &B : F) {
347     bool InsertIntoDef = false;
348     AccessList *Accesses = nullptr;
349     for (Instruction &I : B) {
350       MemoryUseOrDef *MUD = createNewAccess(&I);
351       if (!MUD)
352         continue;
353       InsertIntoDef |= isa<MemoryDef>(MUD);
354 
355       if (!Accesses)
356         Accesses = getOrCreateAccessList(&B);
357       Accesses->push_back(MUD);
358     }
359     if (InsertIntoDef)
360       DefiningBlocks.insert(&B);
361     if (Accesses)
362       DefUseBlocks.insert(&B);
363   }
364 
365   // Compute live-in.
366   // Live in is normally defined as "all the blocks on the path from each def to
367   // each of it's uses".
368   // MemoryDef's are implicit uses of previous state, so they are also uses.
369   // This means we don't really have def-only instructions.  The only
370   // MemoryDef's that are not really uses are those that are of the LiveOnEntry
371   // variable (because LiveOnEntry can reach anywhere, and every def is a
372   // must-kill of LiveOnEntry).
373   // In theory, you could precisely compute live-in by using alias-analysis to
374   // disambiguate defs and uses to see which really pair up with which.
375   // In practice, this would be really expensive and difficult. So we simply
376   // assume all defs are also uses that need to be kept live.
377   // Because of this, the end result of this live-in computation will be "the
378   // entire set of basic blocks that reach any use".
379 
380   SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
381   SmallVector<BasicBlock *, 64> LiveInBlockWorklist(DefUseBlocks.begin(),
382                                                     DefUseBlocks.end());
383   // Now that we have a set of blocks where a value is live-in, recursively add
384   // predecessors until we find the full region the value is live.
385   while (!LiveInBlockWorklist.empty()) {
386     BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
387 
388     // The block really is live in here, insert it into the set.  If already in
389     // the set, then it has already been processed.
390     if (!LiveInBlocks.insert(BB).second)
391       continue;
392 
393     // Since the value is live into BB, it is either defined in a predecessor or
394     // live into it to.
395     LiveInBlockWorklist.append(pred_begin(BB), pred_end(BB));
396   }
397 
398   // Determine where our MemoryPhi's should go
399   ForwardIDFCalculator IDFs(*DT);
400   IDFs.setDefiningBlocks(DefiningBlocks);
401   IDFs.setLiveInBlocks(LiveInBlocks);
402   SmallVector<BasicBlock *, 32> IDFBlocks;
403   IDFs.calculate(IDFBlocks);
404 
405   // Now place MemoryPhi nodes.
406   for (auto &BB : IDFBlocks) {
407     // Insert phi node
408     AccessList *Accesses = getOrCreateAccessList(BB);
409     MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
410     ValueToMemoryAccess.insert(std::make_pair(BB, Phi));
411     // Phi's always are placed at the front of the block.
412     Accesses->push_front(Phi);
413   }
414 
415   // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get
416   // filled in with all blocks.
417   SmallPtrSet<BasicBlock *, 16> Visited;
418   renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
419 
420   MemorySSAWalker *Walker = getWalker();
421 
422   // Now optimize the MemoryUse's defining access to point to the nearest
423   // dominating clobbering def.
424   // This ensures that MemoryUse's that are killed by the same store are
425   // immediate users of that store, one of the invariants we guarantee.
426   for (auto DomNode : depth_first(DT)) {
427     BasicBlock *BB = DomNode->getBlock();
428     auto AI = PerBlockAccesses.find(BB);
429     if (AI == PerBlockAccesses.end())
430       continue;
431     AccessList *Accesses = AI->second.get();
432     for (auto &MA : *Accesses) {
433       if (auto *MU = dyn_cast<MemoryUse>(&MA)) {
434         Instruction *Inst = MU->getMemoryInst();
435         MU->setDefiningAccess(Walker->getClobberingMemoryAccess(Inst));
436       }
437     }
438   }
439 
440   // Mark the uses in unreachable blocks as live on entry, so that they go
441   // somewhere.
442   for (auto &BB : F)
443     if (!Visited.count(&BB))
444       markUnreachableAsLiveOnEntry(&BB);
445 }
446 
getWalker()447 MemorySSAWalker *MemorySSA::getWalker() {
448   if (Walker)
449     return Walker.get();
450 
451   Walker = make_unique<CachingWalker>(this, AA, DT);
452   return Walker.get();
453 }
454 
createMemoryPhi(BasicBlock * BB)455 MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
456   assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB");
457   AccessList *Accesses = getOrCreateAccessList(BB);
458   MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
459   ValueToMemoryAccess.insert(std::make_pair(BB, Phi));
460   // Phi's always are placed at the front of the block.
461   Accesses->push_front(Phi);
462   return Phi;
463 }
464 
createDefinedAccess(Instruction * I,MemoryAccess * Definition)465 MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
466                                                MemoryAccess *Definition) {
467   assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
468   MemoryUseOrDef *NewAccess = createNewAccess(I);
469   assert(
470       NewAccess != nullptr &&
471       "Tried to create a memory access for a non-memory touching instruction");
472   NewAccess->setDefiningAccess(Definition);
473   return NewAccess;
474 }
475 
createMemoryAccessInBB(Instruction * I,MemoryAccess * Definition,const BasicBlock * BB,InsertionPlace Point)476 MemoryAccess *MemorySSA::createMemoryAccessInBB(Instruction *I,
477                                                 MemoryAccess *Definition,
478                                                 const BasicBlock *BB,
479                                                 InsertionPlace Point) {
480   MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
481   auto *Accesses = getOrCreateAccessList(BB);
482   if (Point == Beginning) {
483     // It goes after any phi nodes
484     auto AI = std::find_if(
485         Accesses->begin(), Accesses->end(),
486         [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); });
487 
488     Accesses->insert(AI, NewAccess);
489   } else {
490     Accesses->push_back(NewAccess);
491   }
492 
493   return NewAccess;
494 }
createMemoryAccessBefore(Instruction * I,MemoryAccess * Definition,MemoryAccess * InsertPt)495 MemoryAccess *MemorySSA::createMemoryAccessBefore(Instruction *I,
496                                                   MemoryAccess *Definition,
497                                                   MemoryAccess *InsertPt) {
498   assert(I->getParent() == InsertPt->getBlock() &&
499          "New and old access must be in the same block");
500   MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
501   auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
502   Accesses->insert(AccessList::iterator(InsertPt), NewAccess);
503   return NewAccess;
504 }
505 
createMemoryAccessAfter(Instruction * I,MemoryAccess * Definition,MemoryAccess * InsertPt)506 MemoryAccess *MemorySSA::createMemoryAccessAfter(Instruction *I,
507                                                  MemoryAccess *Definition,
508                                                  MemoryAccess *InsertPt) {
509   assert(I->getParent() == InsertPt->getBlock() &&
510          "New and old access must be in the same block");
511   MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
512   auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
513   Accesses->insertAfter(AccessList::iterator(InsertPt), NewAccess);
514   return NewAccess;
515 }
516 
517 /// \brief Helper function to create new memory accesses
createNewAccess(Instruction * I)518 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
519   // The assume intrinsic has a control dependency which we model by claiming
520   // that it writes arbitrarily. Ignore that fake memory dependency here.
521   // FIXME: Replace this special casing with a more accurate modelling of
522   // assume's control dependency.
523   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
524     if (II->getIntrinsicID() == Intrinsic::assume)
525       return nullptr;
526 
527   // Find out what affect this instruction has on memory.
528   ModRefInfo ModRef = AA->getModRefInfo(I);
529   bool Def = bool(ModRef & MRI_Mod);
530   bool Use = bool(ModRef & MRI_Ref);
531 
532   // It's possible for an instruction to not modify memory at all. During
533   // construction, we ignore them.
534   if (!Def && !Use)
535     return nullptr;
536 
537   assert((Def || Use) &&
538          "Trying to create a memory access with a non-memory instruction");
539 
540   MemoryUseOrDef *MUD;
541   if (Def)
542     MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++);
543   else
544     MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent());
545   ValueToMemoryAccess.insert(std::make_pair(I, MUD));
546   return MUD;
547 }
548 
findDominatingDef(BasicBlock * UseBlock,enum InsertionPlace Where)549 MemoryAccess *MemorySSA::findDominatingDef(BasicBlock *UseBlock,
550                                            enum InsertionPlace Where) {
551   // Handle the initial case
552   if (Where == Beginning)
553     // The only thing that could define us at the beginning is a phi node
554     if (MemoryPhi *Phi = getMemoryAccess(UseBlock))
555       return Phi;
556 
557   DomTreeNode *CurrNode = DT->getNode(UseBlock);
558   // Need to be defined by our dominator
559   if (Where == Beginning)
560     CurrNode = CurrNode->getIDom();
561   Where = End;
562   while (CurrNode) {
563     auto It = PerBlockAccesses.find(CurrNode->getBlock());
564     if (It != PerBlockAccesses.end()) {
565       auto &Accesses = It->second;
566       for (MemoryAccess &RA : reverse(*Accesses)) {
567         if (isa<MemoryDef>(RA) || isa<MemoryPhi>(RA))
568           return &RA;
569       }
570     }
571     CurrNode = CurrNode->getIDom();
572   }
573   return LiveOnEntryDef.get();
574 }
575 
576 /// \brief Returns true if \p Replacer dominates \p Replacee .
dominatesUse(const MemoryAccess * Replacer,const MemoryAccess * Replacee) const577 bool MemorySSA::dominatesUse(const MemoryAccess *Replacer,
578                              const MemoryAccess *Replacee) const {
579   if (isa<MemoryUseOrDef>(Replacee))
580     return DT->dominates(Replacer->getBlock(), Replacee->getBlock());
581   const auto *MP = cast<MemoryPhi>(Replacee);
582   // For a phi node, the use occurs in the predecessor block of the phi node.
583   // Since we may occur multiple times in the phi node, we have to check each
584   // operand to ensure Replacer dominates each operand where Replacee occurs.
585   for (const Use &Arg : MP->operands()) {
586     if (Arg.get() != Replacee &&
587         !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg)))
588       return false;
589   }
590   return true;
591 }
592 
593 /// \brief If all arguments of a MemoryPHI are defined by the same incoming
594 /// argument, return that argument.
onlySingleValue(MemoryPhi * MP)595 static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
596   MemoryAccess *MA = nullptr;
597 
598   for (auto &Arg : MP->operands()) {
599     if (!MA)
600       MA = cast<MemoryAccess>(Arg);
601     else if (MA != Arg)
602       return nullptr;
603   }
604   return MA;
605 }
606 
607 /// \brief Properly remove \p MA from all of MemorySSA's lookup tables.
608 ///
609 /// Because of the way the intrusive list and use lists work, it is important to
610 /// do removal in the right order.
removeFromLookups(MemoryAccess * MA)611 void MemorySSA::removeFromLookups(MemoryAccess *MA) {
612   assert(MA->use_empty() &&
613          "Trying to remove memory access that still has uses");
614   if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA))
615     MUD->setDefiningAccess(nullptr);
616   // Invalidate our walker's cache if necessary
617   if (!isa<MemoryUse>(MA))
618     Walker->invalidateInfo(MA);
619   // The call below to erase will destroy MA, so we can't change the order we
620   // are doing things here
621   Value *MemoryInst;
622   if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
623     MemoryInst = MUD->getMemoryInst();
624   } else {
625     MemoryInst = MA->getBlock();
626   }
627   ValueToMemoryAccess.erase(MemoryInst);
628 
629   auto AccessIt = PerBlockAccesses.find(MA->getBlock());
630   std::unique_ptr<AccessList> &Accesses = AccessIt->second;
631   Accesses->erase(MA);
632   if (Accesses->empty())
633     PerBlockAccesses.erase(AccessIt);
634 }
635 
removeMemoryAccess(MemoryAccess * MA)636 void MemorySSA::removeMemoryAccess(MemoryAccess *MA) {
637   assert(!isLiveOnEntryDef(MA) && "Trying to remove the live on entry def");
638   // We can only delete phi nodes if they have no uses, or we can replace all
639   // uses with a single definition.
640   MemoryAccess *NewDefTarget = nullptr;
641   if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) {
642     // Note that it is sufficient to know that all edges of the phi node have
643     // the same argument.  If they do, by the definition of dominance frontiers
644     // (which we used to place this phi), that argument must dominate this phi,
645     // and thus, must dominate the phi's uses, and so we will not hit the assert
646     // below.
647     NewDefTarget = onlySingleValue(MP);
648     assert((NewDefTarget || MP->use_empty()) &&
649            "We can't delete this memory phi");
650   } else {
651     NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
652   }
653 
654   // Re-point the uses at our defining access
655   if (!MA->use_empty())
656     MA->replaceAllUsesWith(NewDefTarget);
657 
658   // The call below to erase will destroy MA, so we can't change the order we
659   // are doing things here
660   removeFromLookups(MA);
661 }
662 
print(raw_ostream & OS) const663 void MemorySSA::print(raw_ostream &OS) const {
664   MemorySSAAnnotatedWriter Writer(this);
665   F.print(OS, &Writer);
666 }
667 
dump() const668 void MemorySSA::dump() const {
669   MemorySSAAnnotatedWriter Writer(this);
670   F.print(dbgs(), &Writer);
671 }
672 
verifyMemorySSA() const673 void MemorySSA::verifyMemorySSA() const {
674   verifyDefUses(F);
675   verifyDomination(F);
676   verifyOrdering(F);
677 }
678 
679 /// \brief Verify that the order and existence of MemoryAccesses matches the
680 /// order and existence of memory affecting instructions.
verifyOrdering(Function & F) const681 void MemorySSA::verifyOrdering(Function &F) const {
682   // Walk all the blocks, comparing what the lookups think and what the access
683   // lists think, as well as the order in the blocks vs the order in the access
684   // lists.
685   SmallVector<MemoryAccess *, 32> ActualAccesses;
686   for (BasicBlock &B : F) {
687     const AccessList *AL = getBlockAccesses(&B);
688     MemoryAccess *Phi = getMemoryAccess(&B);
689     if (Phi)
690       ActualAccesses.push_back(Phi);
691     for (Instruction &I : B) {
692       MemoryAccess *MA = getMemoryAccess(&I);
693       assert((!MA || AL) && "We have memory affecting instructions "
694                             "in this block but they are not in the "
695                             "access list");
696       if (MA)
697         ActualAccesses.push_back(MA);
698     }
699     // Either we hit the assert, really have no accesses, or we have both
700     // accesses and an access list
701     if (!AL)
702       continue;
703     assert(AL->size() == ActualAccesses.size() &&
704            "We don't have the same number of accesses in the block as on the "
705            "access list");
706     auto ALI = AL->begin();
707     auto AAI = ActualAccesses.begin();
708     while (ALI != AL->end() && AAI != ActualAccesses.end()) {
709       assert(&*ALI == *AAI && "Not the same accesses in the same order");
710       ++ALI;
711       ++AAI;
712     }
713     ActualAccesses.clear();
714   }
715 }
716 
717 /// \brief Verify the domination properties of MemorySSA by checking that each
718 /// definition dominates all of its uses.
verifyDomination(Function & F) const719 void MemorySSA::verifyDomination(Function &F) const {
720   for (BasicBlock &B : F) {
721     // Phi nodes are attached to basic blocks
722     if (MemoryPhi *MP = getMemoryAccess(&B)) {
723       for (User *U : MP->users()) {
724         BasicBlock *UseBlock;
725         // Phi operands are used on edges, we simulate the right domination by
726         // acting as if the use occurred at the end of the predecessor block.
727         if (MemoryPhi *P = dyn_cast<MemoryPhi>(U)) {
728           for (const auto &Arg : P->operands()) {
729             if (Arg == MP) {
730               UseBlock = P->getIncomingBlock(Arg);
731               break;
732             }
733           }
734         } else {
735           UseBlock = cast<MemoryAccess>(U)->getBlock();
736         }
737         (void)UseBlock;
738         assert(DT->dominates(MP->getBlock(), UseBlock) &&
739                "Memory PHI does not dominate it's uses");
740       }
741     }
742 
743     for (Instruction &I : B) {
744       MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I));
745       if (!MD)
746         continue;
747 
748       for (User *U : MD->users()) {
749         BasicBlock *UseBlock;
750         (void)UseBlock;
751         // Things are allowed to flow to phi nodes over their predecessor edge.
752         if (auto *P = dyn_cast<MemoryPhi>(U)) {
753           for (const auto &Arg : P->operands()) {
754             if (Arg == MD) {
755               UseBlock = P->getIncomingBlock(Arg);
756               break;
757             }
758           }
759         } else {
760           UseBlock = cast<MemoryAccess>(U)->getBlock();
761         }
762         assert(DT->dominates(MD->getBlock(), UseBlock) &&
763                "Memory Def does not dominate it's uses");
764       }
765     }
766   }
767 }
768 
769 /// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use
770 /// appears in the use list of \p Def.
771 ///
772 /// llvm_unreachable is used instead of asserts because this may be called in
773 /// a build without asserts. In that case, we don't want this to turn into a
774 /// nop.
verifyUseInDefs(MemoryAccess * Def,MemoryAccess * Use) const775 void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
776   // The live on entry use may cause us to get a NULL def here
777   if (!Def) {
778     if (!isLiveOnEntryDef(Use))
779       llvm_unreachable("Null def but use not point to live on entry def");
780   } else if (std::find(Def->user_begin(), Def->user_end(), Use) ==
781              Def->user_end()) {
782     llvm_unreachable("Did not find use in def's use list");
783   }
784 }
785 
786 /// \brief Verify the immediate use information, by walking all the memory
787 /// accesses and verifying that, for each use, it appears in the
788 /// appropriate def's use list
verifyDefUses(Function & F) const789 void MemorySSA::verifyDefUses(Function &F) const {
790   for (BasicBlock &B : F) {
791     // Phi nodes are attached to basic blocks
792     if (MemoryPhi *Phi = getMemoryAccess(&B)) {
793       assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
794                                           pred_begin(&B), pred_end(&B))) &&
795              "Incomplete MemoryPhi Node");
796       for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
797         verifyUseInDefs(Phi->getIncomingValue(I), Phi);
798     }
799 
800     for (Instruction &I : B) {
801       if (MemoryAccess *MA = getMemoryAccess(&I)) {
802         assert(isa<MemoryUseOrDef>(MA) &&
803                "Found a phi node not attached to a bb");
804         verifyUseInDefs(cast<MemoryUseOrDef>(MA)->getDefiningAccess(), MA);
805       }
806     }
807   }
808 }
809 
getMemoryAccess(const Value * I) const810 MemoryAccess *MemorySSA::getMemoryAccess(const Value *I) const {
811   return ValueToMemoryAccess.lookup(I);
812 }
813 
getMemoryAccess(const BasicBlock * BB) const814 MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const {
815   return cast_or_null<MemoryPhi>(getMemoryAccess((const Value *)BB));
816 }
817 
818 /// \brief Determine, for two memory accesses in the same block,
819 /// whether \p Dominator dominates \p Dominatee.
820 /// \returns True if \p Dominator dominates \p Dominatee.
locallyDominates(const MemoryAccess * Dominator,const MemoryAccess * Dominatee) const821 bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
822                                  const MemoryAccess *Dominatee) const {
823 
824   assert((Dominator->getBlock() == Dominatee->getBlock()) &&
825          "Asking for local domination when accesses are in different blocks!");
826 
827   // A node dominates itself.
828   if (Dominatee == Dominator)
829     return true;
830 
831   // When Dominatee is defined on function entry, it is not dominated by another
832   // memory access.
833   if (isLiveOnEntryDef(Dominatee))
834     return false;
835 
836   // When Dominator is defined on function entry, it dominates the other memory
837   // access.
838   if (isLiveOnEntryDef(Dominator))
839     return true;
840 
841   // Get the access list for the block
842   const AccessList *AccessList = getBlockAccesses(Dominator->getBlock());
843   AccessList::const_reverse_iterator It(Dominator->getIterator());
844 
845   // If we hit the beginning of the access list before we hit dominatee, we must
846   // dominate it
847   return std::none_of(It, AccessList->rend(),
848                       [&](const MemoryAccess &MA) { return &MA == Dominatee; });
849 }
850 
851 const static char LiveOnEntryStr[] = "liveOnEntry";
852 
print(raw_ostream & OS) const853 void MemoryDef::print(raw_ostream &OS) const {
854   MemoryAccess *UO = getDefiningAccess();
855 
856   OS << getID() << " = MemoryDef(";
857   if (UO && UO->getID())
858     OS << UO->getID();
859   else
860     OS << LiveOnEntryStr;
861   OS << ')';
862 }
863 
print(raw_ostream & OS) const864 void MemoryPhi::print(raw_ostream &OS) const {
865   bool First = true;
866   OS << getID() << " = MemoryPhi(";
867   for (const auto &Op : operands()) {
868     BasicBlock *BB = getIncomingBlock(Op);
869     MemoryAccess *MA = cast<MemoryAccess>(Op);
870     if (!First)
871       OS << ',';
872     else
873       First = false;
874 
875     OS << '{';
876     if (BB->hasName())
877       OS << BB->getName();
878     else
879       BB->printAsOperand(OS, false);
880     OS << ',';
881     if (unsigned ID = MA->getID())
882       OS << ID;
883     else
884       OS << LiveOnEntryStr;
885     OS << '}';
886   }
887   OS << ')';
888 }
889 
~MemoryAccess()890 MemoryAccess::~MemoryAccess() {}
891 
print(raw_ostream & OS) const892 void MemoryUse::print(raw_ostream &OS) const {
893   MemoryAccess *UO = getDefiningAccess();
894   OS << "MemoryUse(";
895   if (UO && UO->getID())
896     OS << UO->getID();
897   else
898     OS << LiveOnEntryStr;
899   OS << ')';
900 }
901 
dump() const902 void MemoryAccess::dump() const {
903   print(dbgs());
904   dbgs() << "\n";
905 }
906 
907 char MemorySSAPrinterLegacyPass::ID = 0;
908 
MemorySSAPrinterLegacyPass()909 MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) {
910   initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
911 }
912 
getAnalysisUsage(AnalysisUsage & AU) const913 void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
914   AU.setPreservesAll();
915   AU.addRequired<MemorySSAWrapperPass>();
916   AU.addPreserved<MemorySSAWrapperPass>();
917 }
918 
runOnFunction(Function & F)919 bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
920   auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
921   MSSA.print(dbgs());
922   if (VerifyMemorySSA)
923     MSSA.verifyMemorySSA();
924   return false;
925 }
926 
927 char MemorySSAAnalysis::PassID;
928 
run(Function & F,AnalysisManager<Function> & AM)929 MemorySSA MemorySSAAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
930   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
931   auto &AA = AM.getResult<AAManager>(F);
932   return MemorySSA(F, &AA, &DT);
933 }
934 
run(Function & F,FunctionAnalysisManager & AM)935 PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
936                                             FunctionAnalysisManager &AM) {
937   OS << "MemorySSA for function: " << F.getName() << "\n";
938   AM.getResult<MemorySSAAnalysis>(F).print(OS);
939 
940   return PreservedAnalyses::all();
941 }
942 
run(Function & F,FunctionAnalysisManager & AM)943 PreservedAnalyses MemorySSAVerifierPass::run(Function &F,
944                                              FunctionAnalysisManager &AM) {
945   AM.getResult<MemorySSAAnalysis>(F).verifyMemorySSA();
946 
947   return PreservedAnalyses::all();
948 }
949 
950 char MemorySSAWrapperPass::ID = 0;
951 
MemorySSAWrapperPass()952 MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) {
953   initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry());
954 }
955 
releaseMemory()956 void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); }
957 
getAnalysisUsage(AnalysisUsage & AU) const958 void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
959   AU.setPreservesAll();
960   AU.addRequiredTransitive<DominatorTreeWrapperPass>();
961   AU.addRequiredTransitive<AAResultsWrapperPass>();
962 }
963 
runOnFunction(Function & F)964 bool MemorySSAWrapperPass::runOnFunction(Function &F) {
965   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
966   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
967   MSSA.reset(new MemorySSA(F, &AA, &DT));
968   return false;
969 }
970 
verifyAnalysis() const971 void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); }
972 
print(raw_ostream & OS,const Module * M) const973 void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
974   MSSA->print(OS);
975 }
976 
MemorySSAWalker(MemorySSA * M)977 MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
978 
CachingWalker(MemorySSA * M,AliasAnalysis * A,DominatorTree * D)979 MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
980                                         DominatorTree *D)
981     : MemorySSAWalker(M), AA(A), DT(D) {}
982 
~CachingWalker()983 MemorySSA::CachingWalker::~CachingWalker() {}
984 
985 struct MemorySSA::CachingWalker::UpwardsMemoryQuery {
986   // True if we saw a phi whose predecessor was a backedge
987   bool SawBackedgePhi;
988   // True if our original query started off as a call
989   bool IsCall;
990   // The pointer location we started the query with. This will be empty if
991   // IsCall is true.
992   MemoryLocation StartingLoc;
993   // This is the instruction we were querying about.
994   const Instruction *Inst;
995   // Set of visited Instructions for this query.
996   DenseSet<MemoryAccessPair> Visited;
997   // Vector of visited call accesses for this query. This is separated out
998   // because you can always cache and lookup the result of call queries (IE when
999   // IsCall == true) for every call in the chain. The calls have no AA location
1000   // associated with them with them, and thus, no context dependence.
1001   SmallVector<const MemoryAccess *, 32> VisitedCalls;
1002   // The MemoryAccess we actually got called with, used to test local domination
1003   const MemoryAccess *OriginalAccess;
1004 
UpwardsMemoryQueryllvm::MemorySSA::CachingWalker::UpwardsMemoryQuery1005   UpwardsMemoryQuery()
1006       : SawBackedgePhi(false), IsCall(false), Inst(nullptr),
1007         OriginalAccess(nullptr) {}
1008 
UpwardsMemoryQueryllvm::MemorySSA::CachingWalker::UpwardsMemoryQuery1009   UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
1010       : SawBackedgePhi(false), IsCall(ImmutableCallSite(Inst)), Inst(Inst),
1011         OriginalAccess(Access) {}
1012 };
1013 
invalidateInfo(MemoryAccess * MA)1014 void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
1015 
1016   // TODO: We can do much better cache invalidation with differently stored
1017   // caches.  For now, for MemoryUses, we simply remove them
1018   // from the cache, and kill the entire call/non-call cache for everything
1019   // else.  The problem is for phis or defs, currently we'd need to follow use
1020   // chains down and invalidate anything below us in the chain that currently
1021   // terminates at this access.
1022 
1023   // See if this is a MemoryUse, if so, just remove the cached info. MemoryUse
1024   // is by definition never a barrier, so nothing in the cache could point to
1025   // this use. In that case, we only need invalidate the info for the use
1026   // itself.
1027 
1028   if (MemoryUse *MU = dyn_cast<MemoryUse>(MA)) {
1029     UpwardsMemoryQuery Q;
1030     Instruction *I = MU->getMemoryInst();
1031     Q.IsCall = bool(ImmutableCallSite(I));
1032     Q.Inst = I;
1033     if (!Q.IsCall)
1034       Q.StartingLoc = MemoryLocation::get(I);
1035     doCacheRemove(MA, Q, Q.StartingLoc);
1036   } else {
1037     // If it is not a use, the best we can do right now is destroy the cache.
1038     CachedUpwardsClobberingCall.clear();
1039     CachedUpwardsClobberingAccess.clear();
1040   }
1041 
1042 #ifdef EXPENSIVE_CHECKS
1043   // Run this only when expensive checks are enabled.
1044   verifyRemoved(MA);
1045 #endif
1046 }
1047 
doCacheRemove(const MemoryAccess * M,const UpwardsMemoryQuery & Q,const MemoryLocation & Loc)1048 void MemorySSA::CachingWalker::doCacheRemove(const MemoryAccess *M,
1049                                              const UpwardsMemoryQuery &Q,
1050                                              const MemoryLocation &Loc) {
1051   if (Q.IsCall)
1052     CachedUpwardsClobberingCall.erase(M);
1053   else
1054     CachedUpwardsClobberingAccess.erase({M, Loc});
1055 }
1056 
doCacheInsert(const MemoryAccess * M,MemoryAccess * Result,const UpwardsMemoryQuery & Q,const MemoryLocation & Loc)1057 void MemorySSA::CachingWalker::doCacheInsert(const MemoryAccess *M,
1058                                              MemoryAccess *Result,
1059                                              const UpwardsMemoryQuery &Q,
1060                                              const MemoryLocation &Loc) {
1061   // This is fine for Phis, since there are times where we can't optimize them.
1062   // Making a def its own clobber is never correct, though.
1063   assert((Result != M || isa<MemoryPhi>(M)) &&
1064          "Something can't clobber itself!");
1065   ++NumClobberCacheInserts;
1066   if (Q.IsCall)
1067     CachedUpwardsClobberingCall[M] = Result;
1068   else
1069     CachedUpwardsClobberingAccess[{M, Loc}] = Result;
1070 }
1071 
1072 MemoryAccess *
doCacheLookup(const MemoryAccess * M,const UpwardsMemoryQuery & Q,const MemoryLocation & Loc)1073 MemorySSA::CachingWalker::doCacheLookup(const MemoryAccess *M,
1074                                         const UpwardsMemoryQuery &Q,
1075                                         const MemoryLocation &Loc) {
1076   ++NumClobberCacheLookups;
1077   MemoryAccess *Result;
1078 
1079   if (Q.IsCall)
1080     Result = CachedUpwardsClobberingCall.lookup(M);
1081   else
1082     Result = CachedUpwardsClobberingAccess.lookup({M, Loc});
1083 
1084   if (Result)
1085     ++NumClobberCacheHits;
1086   return Result;
1087 }
1088 
instructionClobbersQuery(const MemoryDef * MD,UpwardsMemoryQuery & Q,const MemoryLocation & Loc) const1089 bool MemorySSA::CachingWalker::instructionClobbersQuery(
1090     const MemoryDef *MD, UpwardsMemoryQuery &Q,
1091     const MemoryLocation &Loc) const {
1092   Instruction *DefMemoryInst = MD->getMemoryInst();
1093   assert(DefMemoryInst && "Defining instruction not actually an instruction");
1094 
1095   if (!Q.IsCall)
1096     return AA->getModRefInfo(DefMemoryInst, Loc) & MRI_Mod;
1097 
1098   // If this is a call, mark it for caching
1099   if (ImmutableCallSite(DefMemoryInst))
1100     Q.VisitedCalls.push_back(MD);
1101   ModRefInfo I = AA->getModRefInfo(DefMemoryInst, ImmutableCallSite(Q.Inst));
1102   return I != MRI_NoModRef;
1103 }
1104 
UpwardsDFSWalk(MemoryAccess * StartingAccess,const MemoryLocation & Loc,UpwardsMemoryQuery & Q,bool FollowingBackedge)1105 MemoryAccessPair MemorySSA::CachingWalker::UpwardsDFSWalk(
1106     MemoryAccess *StartingAccess, const MemoryLocation &Loc,
1107     UpwardsMemoryQuery &Q, bool FollowingBackedge) {
1108   MemoryAccess *ModifyingAccess = nullptr;
1109 
1110   auto DFI = df_begin(StartingAccess);
1111   for (auto DFE = df_end(StartingAccess); DFI != DFE;) {
1112     MemoryAccess *CurrAccess = *DFI;
1113     if (MSSA->isLiveOnEntryDef(CurrAccess))
1114       return {CurrAccess, Loc};
1115     // If this is a MemoryDef, check whether it clobbers our current query. This
1116     // needs to be done before consulting the cache, because the cache reports
1117     // the clobber for CurrAccess. If CurrAccess is a clobber for this query,
1118     // and we ask the cache for information first, then we might skip this
1119     // clobber, which is bad.
1120     if (auto *MD = dyn_cast<MemoryDef>(CurrAccess)) {
1121       // If we hit the top, stop following this path.
1122       // While we can do lookups, we can't sanely do inserts here unless we were
1123       // to track everything we saw along the way, since we don't know where we
1124       // will stop.
1125       if (instructionClobbersQuery(MD, Q, Loc)) {
1126         ModifyingAccess = CurrAccess;
1127         break;
1128       }
1129     }
1130     if (auto CacheResult = doCacheLookup(CurrAccess, Q, Loc))
1131       return {CacheResult, Loc};
1132 
1133     // We need to know whether it is a phi so we can track backedges.
1134     // Otherwise, walk all upward defs.
1135     if (!isa<MemoryPhi>(CurrAccess)) {
1136       ++DFI;
1137       continue;
1138     }
1139 
1140 #ifndef NDEBUG
1141     // The loop below visits the phi's children for us. Because phis are the
1142     // only things with multiple edges, skipping the children should always lead
1143     // us to the end of the loop.
1144     //
1145     // Use a copy of DFI because skipChildren would kill our search stack, which
1146     // would make caching anything on the way back impossible.
1147     auto DFICopy = DFI;
1148     assert(DFICopy.skipChildren() == DFE &&
1149            "Skipping phi's children doesn't end the DFS?");
1150 #endif
1151 
1152     const MemoryAccessPair PHIPair(CurrAccess, Loc);
1153 
1154     // Don't try to optimize this phi again if we've already tried to do so.
1155     if (!Q.Visited.insert(PHIPair).second) {
1156       ModifyingAccess = CurrAccess;
1157       break;
1158     }
1159 
1160     std::size_t InitialVisitedCallSize = Q.VisitedCalls.size();
1161 
1162     // Recurse on PHI nodes, since we need to change locations.
1163     // TODO: Allow graphtraits on pairs, which would turn this whole function
1164     // into a normal single depth first walk.
1165     MemoryAccess *FirstDef = nullptr;
1166     for (auto MPI = upward_defs_begin(PHIPair), MPE = upward_defs_end();
1167          MPI != MPE; ++MPI) {
1168       bool Backedge =
1169           !FollowingBackedge &&
1170           DT->dominates(CurrAccess->getBlock(), MPI.getPhiArgBlock());
1171 
1172       MemoryAccessPair CurrentPair =
1173           UpwardsDFSWalk(MPI->first, MPI->second, Q, Backedge);
1174       // All the phi arguments should reach the same point if we can bypass
1175       // this phi. The alternative is that they hit this phi node, which
1176       // means we can skip this argument.
1177       if (FirstDef && CurrentPair.first != PHIPair.first &&
1178           CurrentPair.first != FirstDef) {
1179         ModifyingAccess = CurrAccess;
1180         break;
1181       }
1182 
1183       if (!FirstDef)
1184         FirstDef = CurrentPair.first;
1185     }
1186 
1187     // If we exited the loop early, go with the result it gave us.
1188     if (!ModifyingAccess) {
1189       assert(FirstDef && "Found a Phi with no upward defs?");
1190       ModifyingAccess = FirstDef;
1191     } else {
1192       // If we can't optimize this Phi, then we can't safely cache any of the
1193       // calls we visited when trying to optimize it. Wipe them out now.
1194       Q.VisitedCalls.resize(InitialVisitedCallSize);
1195     }
1196     break;
1197   }
1198 
1199   if (!ModifyingAccess)
1200     return {MSSA->getLiveOnEntryDef(), Q.StartingLoc};
1201 
1202   const BasicBlock *OriginalBlock = StartingAccess->getBlock();
1203   assert(DFI.getPathLength() > 0 && "We dropped our path?");
1204   unsigned N = DFI.getPathLength();
1205   // If we found a clobbering def, the last element in the path will be our
1206   // clobber, so we don't want to cache that to itself. OTOH, if we optimized a
1207   // phi, we can add the last thing in the path to the cache, since that won't
1208   // be the result.
1209   if (DFI.getPath(N - 1) == ModifyingAccess)
1210     --N;
1211   for (; N > 1; --N) {
1212     MemoryAccess *CacheAccess = DFI.getPath(N - 1);
1213     BasicBlock *CurrBlock = CacheAccess->getBlock();
1214     if (!FollowingBackedge)
1215       doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc);
1216     if (DT->dominates(CurrBlock, OriginalBlock) &&
1217         (CurrBlock != OriginalBlock || !FollowingBackedge ||
1218          MSSA->locallyDominates(CacheAccess, StartingAccess)))
1219       break;
1220   }
1221 
1222   // Cache everything else on the way back. The caller should cache
1223   // StartingAccess for us.
1224   for (; N > 1; --N) {
1225     MemoryAccess *CacheAccess = DFI.getPath(N - 1);
1226     doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc);
1227   }
1228   assert(Q.Visited.size() < 1000 && "Visited too much");
1229 
1230   return {ModifyingAccess, Loc};
1231 }
1232 
1233 /// \brief Walk the use-def chains starting at \p MA and find
1234 /// the MemoryAccess that actually clobbers Loc.
1235 ///
1236 /// \returns our clobbering memory access
getClobberingMemoryAccess(MemoryAccess * StartingAccess,UpwardsMemoryQuery & Q)1237 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
1238     MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
1239   return UpwardsDFSWalk(StartingAccess, Q.StartingLoc, Q, false).first;
1240 }
1241 
getClobberingMemoryAccess(MemoryAccess * StartingAccess,MemoryLocation & Loc)1242 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
1243     MemoryAccess *StartingAccess, MemoryLocation &Loc) {
1244   if (isa<MemoryPhi>(StartingAccess))
1245     return StartingAccess;
1246 
1247   auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess);
1248   if (MSSA->isLiveOnEntryDef(StartingUseOrDef))
1249     return StartingUseOrDef;
1250 
1251   Instruction *I = StartingUseOrDef->getMemoryInst();
1252 
1253   // Conservatively, fences are always clobbers, so don't perform the walk if we
1254   // hit a fence.
1255   if (isa<FenceInst>(I))
1256     return StartingUseOrDef;
1257 
1258   UpwardsMemoryQuery Q;
1259   Q.OriginalAccess = StartingUseOrDef;
1260   Q.StartingLoc = Loc;
1261   Q.Inst = StartingUseOrDef->getMemoryInst();
1262   Q.IsCall = false;
1263 
1264   if (auto CacheResult = doCacheLookup(StartingUseOrDef, Q, Q.StartingLoc))
1265     return CacheResult;
1266 
1267   // Unlike the other function, do not walk to the def of a def, because we are
1268   // handed something we already believe is the clobbering access.
1269   MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef)
1270                                      ? StartingUseOrDef->getDefiningAccess()
1271                                      : StartingUseOrDef;
1272 
1273   MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q);
1274   // Only cache this if it wouldn't make Clobber point to itself.
1275   if (Clobber != StartingAccess)
1276     doCacheInsert(Q.OriginalAccess, Clobber, Q, Q.StartingLoc);
1277   DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
1278   DEBUG(dbgs() << *StartingUseOrDef << "\n");
1279   DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
1280   DEBUG(dbgs() << *Clobber << "\n");
1281   return Clobber;
1282 }
1283 
1284 MemoryAccess *
getClobberingMemoryAccess(const Instruction * I)1285 MemorySSA::CachingWalker::getClobberingMemoryAccess(const Instruction *I) {
1286   // There should be no way to lookup an instruction and get a phi as the
1287   // access, since we only map BB's to PHI's. So, this must be a use or def.
1288   auto *StartingAccess = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(I));
1289 
1290   // We can't sanely do anything with a FenceInst, they conservatively
1291   // clobber all memory, and have no locations to get pointers from to
1292   // try to disambiguate
1293   if (isa<FenceInst>(I))
1294     return StartingAccess;
1295 
1296   UpwardsMemoryQuery Q;
1297   Q.OriginalAccess = StartingAccess;
1298   Q.IsCall = bool(ImmutableCallSite(I));
1299   if (!Q.IsCall)
1300     Q.StartingLoc = MemoryLocation::get(I);
1301   Q.Inst = I;
1302   if (auto CacheResult = doCacheLookup(StartingAccess, Q, Q.StartingLoc))
1303     return CacheResult;
1304 
1305   // Start with the thing we already think clobbers this location
1306   MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
1307 
1308   // At this point, DefiningAccess may be the live on entry def.
1309   // If it is, we will not get a better result.
1310   if (MSSA->isLiveOnEntryDef(DefiningAccess))
1311     return DefiningAccess;
1312 
1313   MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q);
1314   // DFS won't cache a result for DefiningAccess. So, if DefiningAccess isn't
1315   // our clobber, be sure that it gets a cache entry, too.
1316   if (Result != DefiningAccess)
1317     doCacheInsert(DefiningAccess, Result, Q, Q.StartingLoc);
1318   doCacheInsert(Q.OriginalAccess, Result, Q, Q.StartingLoc);
1319   // TODO: When this implementation is more mature, we may want to figure out
1320   // what this additional caching buys us. It's most likely A Good Thing.
1321   if (Q.IsCall)
1322     for (const MemoryAccess *MA : Q.VisitedCalls)
1323       if (MA != Result)
1324         doCacheInsert(MA, Result, Q, Q.StartingLoc);
1325 
1326   DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
1327   DEBUG(dbgs() << *DefiningAccess << "\n");
1328   DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
1329   DEBUG(dbgs() << *Result << "\n");
1330 
1331   return Result;
1332 }
1333 
1334 // Verify that MA doesn't exist in any of the caches.
verifyRemoved(MemoryAccess * MA)1335 void MemorySSA::CachingWalker::verifyRemoved(MemoryAccess *MA) {
1336 #ifndef NDEBUG
1337   for (auto &P : CachedUpwardsClobberingAccess)
1338     assert(P.first.first != MA && P.second != MA &&
1339            "Found removed MemoryAccess in cache.");
1340   for (auto &P : CachedUpwardsClobberingCall)
1341     assert(P.first != MA && P.second != MA &&
1342            "Found removed MemoryAccess in cache.");
1343 #endif // !NDEBUG
1344 }
1345 
1346 MemoryAccess *
getClobberingMemoryAccess(const Instruction * I)1347 DoNothingMemorySSAWalker::getClobberingMemoryAccess(const Instruction *I) {
1348   MemoryAccess *MA = MSSA->getMemoryAccess(I);
1349   if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
1350     return Use->getDefiningAccess();
1351   return MA;
1352 }
1353 
getClobberingMemoryAccess(MemoryAccess * StartingAccess,MemoryLocation &)1354 MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess(
1355     MemoryAccess *StartingAccess, MemoryLocation &) {
1356   if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess))
1357     return Use->getDefiningAccess();
1358   return StartingAccess;
1359 }
1360 }
1361