1 //===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a CFL-based, summary-based alias analysis algorithm. It
11 // differs from CFLSteensAliasAnalysis in its inclusion-based nature while
12 // CFLSteensAliasAnalysis is unification-based. This pass has worse performance
13 // than CFLSteensAliasAnalysis (the worst case complexity of
14 // CFLAndersAliasAnalysis is cubic, while the worst case complexity of
15 // CFLSteensAliasAnalysis is almost linear), but it is able to yield more
16 // precise analysis result. The precision of this analysis is roughly the same
17 // as that of an one level context-sensitive Andersen's algorithm.
18 //
19 // The algorithm used here is based on recursive state machine matching scheme
20 // proposed in "Demand-driven alias analysis for C" by Xin Zheng and Radu
21 // Rugina. The general idea is to extend the traditional transitive closure
22 // algorithm to perform CFL matching along the way: instead of recording
23 // "whether X is reachable from Y", we keep track of "whether X is reachable
24 // from Y at state Z", where the "state" field indicates where we are in the CFL
25 // matching process. To understand the matching better, it is advisable to have
26 // the state machine shown in Figure 3 of the paper available when reading the
27 // codes: all we do here is to selectively expand the transitive closure by
28 // discarding edges that are not recognized by the state machine.
29 //
30 // There are two differences between our current implementation and the one
31 // described in the paper:
32 // - Our algorithm eagerly computes all alias pairs after the CFLGraph is built,
33 // while in the paper the authors did the computation in a demand-driven
34 // fashion. We did not implement the demand-driven algorithm due to the
35 // additional coding complexity and higher memory profile, but if we found it
36 // necessary we may switch to it eventually.
37 // - In the paper the authors use a state machine that does not distinguish
38 // value reads from value writes. For example, if Y is reachable from X at state
39 // S3, it may be the case that X is written into Y, or it may be the case that
40 // there's a third value Z that writes into both X and Y. To make that
41 // distinction (which is crucial in building function summary as well as
42 // retrieving mod-ref info), we choose to duplicate some of the states in the
43 // paper's proposed state machine. The duplication does not change the set the
44 // machine accepts. Given a pair of reachable values, it only provides more
45 // detailed information on which value is being written into and which is being
46 // read from.
47 //
48 //===----------------------------------------------------------------------===//
49 
50 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
51 // CFLAndersAA is interprocedural. This is *technically* A Bad Thing, because
52 // FunctionPasses are only allowed to inspect the Function that they're being
53 // run on. Realistically, this likely isn't a problem until we allow
54 // FunctionPasses to run concurrently.
55 
56 #include "llvm/Analysis/CFLAndersAliasAnalysis.h"
57 #include "AliasAnalysisSummary.h"
58 #include "CFLGraph.h"
59 #include "llvm/ADT/DenseMap.h"
60 #include "llvm/ADT/DenseMapInfo.h"
61 #include "llvm/ADT/DenseSet.h"
62 #include "llvm/ADT/None.h"
63 #include "llvm/ADT/Optional.h"
64 #include "llvm/ADT/STLExtras.h"
65 #include "llvm/ADT/SmallVector.h"
66 #include "llvm/ADT/iterator_range.h"
67 #include "llvm/Analysis/AliasAnalysis.h"
68 #include "llvm/Analysis/MemoryLocation.h"
69 #include "llvm/IR/Argument.h"
70 #include "llvm/IR/Function.h"
71 #include "llvm/IR/PassManager.h"
72 #include "llvm/IR/Type.h"
73 #include "llvm/Pass.h"
74 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/raw_ostream.h"
78 #include <algorithm>
79 #include <bitset>
80 #include <cassert>
81 #include <cstddef>
82 #include <cstdint>
83 #include <functional>
84 #include <utility>
85 #include <vector>
86 
87 using namespace llvm;
88 using namespace llvm::cflaa;
89 
90 #define DEBUG_TYPE "cfl-anders-aa"
91 
CFLAndersAAResult(const TargetLibraryInfo & TLI)92 CFLAndersAAResult::CFLAndersAAResult(const TargetLibraryInfo &TLI) : TLI(TLI) {}
CFLAndersAAResult(CFLAndersAAResult && RHS)93 CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS)
94     : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {}
95 CFLAndersAAResult::~CFLAndersAAResult() = default;
96 
97 namespace {
98 
99 enum class MatchState : uint8_t {
100   // The following state represents S1 in the paper.
101   FlowFromReadOnly = 0,
102   // The following two states together represent S2 in the paper.
103   // The 'NoReadWrite' suffix indicates that there exists an alias path that
104   // does not contain assignment and reverse assignment edges.
105   // The 'ReadOnly' suffix indicates that there exists an alias path that
106   // contains reverse assignment edges only.
107   FlowFromMemAliasNoReadWrite,
108   FlowFromMemAliasReadOnly,
109   // The following two states together represent S3 in the paper.
110   // The 'WriteOnly' suffix indicates that there exists an alias path that
111   // contains assignment edges only.
112   // The 'ReadWrite' suffix indicates that there exists an alias path that
113   // contains both assignment and reverse assignment edges. Note that if X and Y
114   // are reachable at 'ReadWrite' state, it does NOT mean X is both read from
115   // and written to Y. Instead, it means that a third value Z is written to both
116   // X and Y.
117   FlowToWriteOnly,
118   FlowToReadWrite,
119   // The following two states together represent S4 in the paper.
120   FlowToMemAliasWriteOnly,
121   FlowToMemAliasReadWrite,
122 };
123 
124 using StateSet = std::bitset<7>;
125 
126 const unsigned ReadOnlyStateMask =
127     (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) |
128     (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly));
129 const unsigned WriteOnlyStateMask =
130     (1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) |
131     (1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly));
132 
133 // A pair that consists of a value and an offset
134 struct OffsetValue {
135   const Value *Val;
136   int64_t Offset;
137 };
138 
operator ==(OffsetValue LHS,OffsetValue RHS)139 bool operator==(OffsetValue LHS, OffsetValue RHS) {
140   return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset;
141 }
operator <(OffsetValue LHS,OffsetValue RHS)142 bool operator<(OffsetValue LHS, OffsetValue RHS) {
143   return std::less<const Value *>()(LHS.Val, RHS.Val) ||
144          (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset);
145 }
146 
147 // A pair that consists of an InstantiatedValue and an offset
148 struct OffsetInstantiatedValue {
149   InstantiatedValue IVal;
150   int64_t Offset;
151 };
152 
operator ==(OffsetInstantiatedValue LHS,OffsetInstantiatedValue RHS)153 bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) {
154   return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset;
155 }
156 
157 // We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in
158 // the paper) during the analysis.
159 class ReachabilitySet {
160   using ValueStateMap = DenseMap<InstantiatedValue, StateSet>;
161   using ValueReachMap = DenseMap<InstantiatedValue, ValueStateMap>;
162 
163   ValueReachMap ReachMap;
164 
165 public:
166   using const_valuestate_iterator = ValueStateMap::const_iterator;
167   using const_value_iterator = ValueReachMap::const_iterator;
168 
169   // Insert edge 'From->To' at state 'State'
insert(InstantiatedValue From,InstantiatedValue To,MatchState State)170   bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) {
171     assert(From != To);
172     auto &States = ReachMap[To][From];
173     auto Idx = static_cast<size_t>(State);
174     if (!States.test(Idx)) {
175       States.set(Idx);
176       return true;
177     }
178     return false;
179   }
180 
181   // Return the set of all ('From', 'State') pair for a given node 'To'
182   iterator_range<const_valuestate_iterator>
reachableValueAliases(InstantiatedValue V) const183   reachableValueAliases(InstantiatedValue V) const {
184     auto Itr = ReachMap.find(V);
185     if (Itr == ReachMap.end())
186       return make_range<const_valuestate_iterator>(const_valuestate_iterator(),
187                                                    const_valuestate_iterator());
188     return make_range<const_valuestate_iterator>(Itr->second.begin(),
189                                                  Itr->second.end());
190   }
191 
value_mappings() const192   iterator_range<const_value_iterator> value_mappings() const {
193     return make_range<const_value_iterator>(ReachMap.begin(), ReachMap.end());
194   }
195 };
196 
197 // We use AliasMemSet to keep track of all memory aliases (the nonterminal "M"
198 // in the paper) during the analysis.
199 class AliasMemSet {
200   using MemSet = DenseSet<InstantiatedValue>;
201   using MemMapType = DenseMap<InstantiatedValue, MemSet>;
202 
203   MemMapType MemMap;
204 
205 public:
206   using const_mem_iterator = MemSet::const_iterator;
207 
insert(InstantiatedValue LHS,InstantiatedValue RHS)208   bool insert(InstantiatedValue LHS, InstantiatedValue RHS) {
209     // Top-level values can never be memory aliases because one cannot take the
210     // addresses of them
211     assert(LHS.DerefLevel > 0 && RHS.DerefLevel > 0);
212     return MemMap[LHS].insert(RHS).second;
213   }
214 
getMemoryAliases(InstantiatedValue V) const215   const MemSet *getMemoryAliases(InstantiatedValue V) const {
216     auto Itr = MemMap.find(V);
217     if (Itr == MemMap.end())
218       return nullptr;
219     return &Itr->second;
220   }
221 };
222 
223 // We use AliasAttrMap to keep track of the AliasAttr of each node.
224 class AliasAttrMap {
225   using MapType = DenseMap<InstantiatedValue, AliasAttrs>;
226 
227   MapType AttrMap;
228 
229 public:
230   using const_iterator = MapType::const_iterator;
231 
add(InstantiatedValue V,AliasAttrs Attr)232   bool add(InstantiatedValue V, AliasAttrs Attr) {
233     auto &OldAttr = AttrMap[V];
234     auto NewAttr = OldAttr | Attr;
235     if (OldAttr == NewAttr)
236       return false;
237     OldAttr = NewAttr;
238     return true;
239   }
240 
getAttrs(InstantiatedValue V) const241   AliasAttrs getAttrs(InstantiatedValue V) const {
242     AliasAttrs Attr;
243     auto Itr = AttrMap.find(V);
244     if (Itr != AttrMap.end())
245       Attr = Itr->second;
246     return Attr;
247   }
248 
mappings() const249   iterator_range<const_iterator> mappings() const {
250     return make_range<const_iterator>(AttrMap.begin(), AttrMap.end());
251   }
252 };
253 
254 struct WorkListItem {
255   InstantiatedValue From;
256   InstantiatedValue To;
257   MatchState State;
258 };
259 
260 struct ValueSummary {
261   struct Record {
262     InterfaceValue IValue;
263     unsigned DerefLevel;
264   };
265   SmallVector<Record, 4> FromRecords, ToRecords;
266 };
267 
268 } // end anonymous namespace
269 
270 namespace llvm {
271 
272 // Specialize DenseMapInfo for OffsetValue.
273 template <> struct DenseMapInfo<OffsetValue> {
getEmptyKeyllvm::DenseMapInfo274   static OffsetValue getEmptyKey() {
275     return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(),
276                        DenseMapInfo<int64_t>::getEmptyKey()};
277   }
278 
getTombstoneKeyllvm::DenseMapInfo279   static OffsetValue getTombstoneKey() {
280     return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(),
281                        DenseMapInfo<int64_t>::getEmptyKey()};
282   }
283 
getHashValuellvm::DenseMapInfo284   static unsigned getHashValue(const OffsetValue &OVal) {
285     return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue(
286         std::make_pair(OVal.Val, OVal.Offset));
287   }
288 
isEqualllvm::DenseMapInfo289   static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) {
290     return LHS == RHS;
291   }
292 };
293 
294 // Specialize DenseMapInfo for OffsetInstantiatedValue.
295 template <> struct DenseMapInfo<OffsetInstantiatedValue> {
getEmptyKeyllvm::DenseMapInfo296   static OffsetInstantiatedValue getEmptyKey() {
297     return OffsetInstantiatedValue{
298         DenseMapInfo<InstantiatedValue>::getEmptyKey(),
299         DenseMapInfo<int64_t>::getEmptyKey()};
300   }
301 
getTombstoneKeyllvm::DenseMapInfo302   static OffsetInstantiatedValue getTombstoneKey() {
303     return OffsetInstantiatedValue{
304         DenseMapInfo<InstantiatedValue>::getTombstoneKey(),
305         DenseMapInfo<int64_t>::getEmptyKey()};
306   }
307 
getHashValuellvm::DenseMapInfo308   static unsigned getHashValue(const OffsetInstantiatedValue &OVal) {
309     return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue(
310         std::make_pair(OVal.IVal, OVal.Offset));
311   }
312 
isEqualllvm::DenseMapInfo313   static bool isEqual(const OffsetInstantiatedValue &LHS,
314                       const OffsetInstantiatedValue &RHS) {
315     return LHS == RHS;
316   }
317 };
318 
319 } // end namespace llvm
320 
321 class CFLAndersAAResult::FunctionInfo {
322   /// Map a value to other values that may alias it
323   /// Since the alias relation is symmetric, to save some space we assume values
324   /// are properly ordered: if a and b alias each other, and a < b, then b is in
325   /// AliasMap[a] but not vice versa.
326   DenseMap<const Value *, std::vector<OffsetValue>> AliasMap;
327 
328   /// Map a value to its corresponding AliasAttrs
329   DenseMap<const Value *, AliasAttrs> AttrMap;
330 
331   /// Summary of externally visible effects.
332   AliasSummary Summary;
333 
334   Optional<AliasAttrs> getAttrs(const Value *) const;
335 
336 public:
337   FunctionInfo(const Function &, const SmallVectorImpl<Value *> &,
338                const ReachabilitySet &, const AliasAttrMap &);
339 
340   bool mayAlias(const Value *, LocationSize, const Value *, LocationSize) const;
getAliasSummary() const341   const AliasSummary &getAliasSummary() const { return Summary; }
342 };
343 
hasReadOnlyState(StateSet Set)344 static bool hasReadOnlyState(StateSet Set) {
345   return (Set & StateSet(ReadOnlyStateMask)).any();
346 }
347 
hasWriteOnlyState(StateSet Set)348 static bool hasWriteOnlyState(StateSet Set) {
349   return (Set & StateSet(WriteOnlyStateMask)).any();
350 }
351 
352 static Optional<InterfaceValue>
getInterfaceValue(InstantiatedValue IValue,const SmallVectorImpl<Value * > & RetVals)353 getInterfaceValue(InstantiatedValue IValue,
354                   const SmallVectorImpl<Value *> &RetVals) {
355   auto Val = IValue.Val;
356 
357   Optional<unsigned> Index;
358   if (auto Arg = dyn_cast<Argument>(Val))
359     Index = Arg->getArgNo() + 1;
360   else if (is_contained(RetVals, Val))
361     Index = 0;
362 
363   if (Index)
364     return InterfaceValue{*Index, IValue.DerefLevel};
365   return None;
366 }
367 
populateAttrMap(DenseMap<const Value *,AliasAttrs> & AttrMap,const AliasAttrMap & AMap)368 static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap,
369                             const AliasAttrMap &AMap) {
370   for (const auto &Mapping : AMap.mappings()) {
371     auto IVal = Mapping.first;
372 
373     // Insert IVal into the map
374     auto &Attr = AttrMap[IVal.Val];
375     // AttrMap only cares about top-level values
376     if (IVal.DerefLevel == 0)
377       Attr |= Mapping.second;
378   }
379 }
380 
381 static void
populateAliasMap(DenseMap<const Value *,std::vector<OffsetValue>> & AliasMap,const ReachabilitySet & ReachSet)382 populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
383                  const ReachabilitySet &ReachSet) {
384   for (const auto &OuterMapping : ReachSet.value_mappings()) {
385     // AliasMap only cares about top-level values
386     if (OuterMapping.first.DerefLevel > 0)
387       continue;
388 
389     auto Val = OuterMapping.first.Val;
390     auto &AliasList = AliasMap[Val];
391     for (const auto &InnerMapping : OuterMapping.second) {
392       // Again, AliasMap only cares about top-level values
393       if (InnerMapping.first.DerefLevel == 0)
394         AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset});
395     }
396 
397     // Sort AliasList for faster lookup
398     llvm::sort(AliasList.begin(), AliasList.end());
399   }
400 }
401 
populateExternalRelations(SmallVectorImpl<ExternalRelation> & ExtRelations,const Function & Fn,const SmallVectorImpl<Value * > & RetVals,const ReachabilitySet & ReachSet)402 static void populateExternalRelations(
403     SmallVectorImpl<ExternalRelation> &ExtRelations, const Function &Fn,
404     const SmallVectorImpl<Value *> &RetVals, const ReachabilitySet &ReachSet) {
405   // If a function only returns one of its argument X, then X will be both an
406   // argument and a return value at the same time. This is an edge case that
407   // needs special handling here.
408   for (const auto &Arg : Fn.args()) {
409     if (is_contained(RetVals, &Arg)) {
410       auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0};
411       auto RetVal = InterfaceValue{0, 0};
412       ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0});
413     }
414   }
415 
416   // Below is the core summary construction logic.
417   // A naive solution of adding only the value aliases that are parameters or
418   // return values in ReachSet to the summary won't work: It is possible that a
419   // parameter P is written into an intermediate value I, and the function
420   // subsequently returns *I. In that case, *I is does not value alias anything
421   // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to
422   // (I, 1).
423   // To account for the aforementioned case, we need to check each non-parameter
424   // and non-return value for the possibility of acting as an intermediate.
425   // 'ValueMap' here records, for each value, which InterfaceValues read from or
426   // write into it. If both the read list and the write list of a given value
427   // are non-empty, we know that a particular value is an intermidate and we
428   // need to add summary edges from the writes to the reads.
429   DenseMap<Value *, ValueSummary> ValueMap;
430   for (const auto &OuterMapping : ReachSet.value_mappings()) {
431     if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) {
432       for (const auto &InnerMapping : OuterMapping.second) {
433         // If Src is a param/return value, we get a same-level assignment.
434         if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) {
435           // This may happen if both Dst and Src are return values
436           if (*Dst == *Src)
437             continue;
438 
439           if (hasReadOnlyState(InnerMapping.second))
440             ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset});
441           // No need to check for WriteOnly state, since ReachSet is symmetric
442         } else {
443           // If Src is not a param/return, add it to ValueMap
444           auto SrcIVal = InnerMapping.first;
445           if (hasReadOnlyState(InnerMapping.second))
446             ValueMap[SrcIVal.Val].FromRecords.push_back(
447                 ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
448           if (hasWriteOnlyState(InnerMapping.second))
449             ValueMap[SrcIVal.Val].ToRecords.push_back(
450                 ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
451         }
452       }
453     }
454   }
455 
456   for (const auto &Mapping : ValueMap) {
457     for (const auto &FromRecord : Mapping.second.FromRecords) {
458       for (const auto &ToRecord : Mapping.second.ToRecords) {
459         auto ToLevel = ToRecord.DerefLevel;
460         auto FromLevel = FromRecord.DerefLevel;
461         // Same-level assignments should have already been processed by now
462         if (ToLevel == FromLevel)
463           continue;
464 
465         auto SrcIndex = FromRecord.IValue.Index;
466         auto SrcLevel = FromRecord.IValue.DerefLevel;
467         auto DstIndex = ToRecord.IValue.Index;
468         auto DstLevel = ToRecord.IValue.DerefLevel;
469         if (ToLevel > FromLevel)
470           SrcLevel += ToLevel - FromLevel;
471         else
472           DstLevel += FromLevel - ToLevel;
473 
474         ExtRelations.push_back(ExternalRelation{
475             InterfaceValue{SrcIndex, SrcLevel},
476             InterfaceValue{DstIndex, DstLevel}, UnknownOffset});
477       }
478     }
479   }
480 
481   // Remove duplicates in ExtRelations
482   llvm::sort(ExtRelations.begin(), ExtRelations.end());
483   ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()),
484                      ExtRelations.end());
485 }
486 
populateExternalAttributes(SmallVectorImpl<ExternalAttribute> & ExtAttributes,const Function & Fn,const SmallVectorImpl<Value * > & RetVals,const AliasAttrMap & AMap)487 static void populateExternalAttributes(
488     SmallVectorImpl<ExternalAttribute> &ExtAttributes, const Function &Fn,
489     const SmallVectorImpl<Value *> &RetVals, const AliasAttrMap &AMap) {
490   for (const auto &Mapping : AMap.mappings()) {
491     if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) {
492       auto Attr = getExternallyVisibleAttrs(Mapping.second);
493       if (Attr.any())
494         ExtAttributes.push_back(ExternalAttribute{*IVal, Attr});
495     }
496   }
497 }
498 
FunctionInfo(const Function & Fn,const SmallVectorImpl<Value * > & RetVals,const ReachabilitySet & ReachSet,const AliasAttrMap & AMap)499 CFLAndersAAResult::FunctionInfo::FunctionInfo(
500     const Function &Fn, const SmallVectorImpl<Value *> &RetVals,
501     const ReachabilitySet &ReachSet, const AliasAttrMap &AMap) {
502   populateAttrMap(AttrMap, AMap);
503   populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap);
504   populateAliasMap(AliasMap, ReachSet);
505   populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet);
506 }
507 
508 Optional<AliasAttrs>
getAttrs(const Value * V) const509 CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
510   assert(V != nullptr);
511 
512   auto Itr = AttrMap.find(V);
513   if (Itr != AttrMap.end())
514     return Itr->second;
515   return None;
516 }
517 
mayAlias(const Value * LHS,LocationSize LHSSize,const Value * RHS,LocationSize RHSSize) const518 bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
519                                                LocationSize LHSSize,
520                                                const Value *RHS,
521                                                LocationSize RHSSize) const {
522   assert(LHS && RHS);
523 
524   // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created
525   // after the analysis gets executed, and we want to be conservative in those
526   // cases.
527   auto MaybeAttrsA = getAttrs(LHS);
528   auto MaybeAttrsB = getAttrs(RHS);
529   if (!MaybeAttrsA || !MaybeAttrsB)
530     return true;
531 
532   // Check AliasAttrs before AliasMap lookup since it's cheaper
533   auto AttrsA = *MaybeAttrsA;
534   auto AttrsB = *MaybeAttrsB;
535   if (hasUnknownOrCallerAttr(AttrsA))
536     return AttrsB.any();
537   if (hasUnknownOrCallerAttr(AttrsB))
538     return AttrsA.any();
539   if (isGlobalOrArgAttr(AttrsA))
540     return isGlobalOrArgAttr(AttrsB);
541   if (isGlobalOrArgAttr(AttrsB))
542     return isGlobalOrArgAttr(AttrsA);
543 
544   // At this point both LHS and RHS should point to locally allocated objects
545 
546   auto Itr = AliasMap.find(LHS);
547   if (Itr != AliasMap.end()) {
548 
549     // Find out all (X, Offset) where X == RHS
550     auto Comparator = [](OffsetValue LHS, OffsetValue RHS) {
551       return std::less<const Value *>()(LHS.Val, RHS.Val);
552     };
553 #ifdef EXPENSIVE_CHECKS
554     assert(std::is_sorted(Itr->second.begin(), Itr->second.end(), Comparator));
555 #endif
556     auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(),
557                                       OffsetValue{RHS, 0}, Comparator);
558 
559     if (RangePair.first != RangePair.second) {
560       // Be conservative about UnknownSize
561       if (LHSSize == MemoryLocation::UnknownSize ||
562           RHSSize == MemoryLocation::UnknownSize)
563         return true;
564 
565       for (const auto &OVal : make_range(RangePair)) {
566         // Be conservative about UnknownOffset
567         if (OVal.Offset == UnknownOffset)
568           return true;
569 
570         // We know that LHS aliases (RHS + OVal.Offset) if the control flow
571         // reaches here. The may-alias query essentially becomes integer
572         // range-overlap queries over two ranges [OVal.Offset, OVal.Offset +
573         // LHSSize) and [0, RHSSize).
574 
575         // Try to be conservative on super large offsets
576         if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX))
577           return true;
578 
579         auto LHSStart = OVal.Offset;
580         // FIXME: Do we need to guard against integer overflow?
581         auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize);
582         auto RHSStart = 0;
583         auto RHSEnd = static_cast<int64_t>(RHSSize);
584         if (LHSEnd > RHSStart && LHSStart < RHSEnd)
585           return true;
586       }
587     }
588   }
589 
590   return false;
591 }
592 
propagate(InstantiatedValue From,InstantiatedValue To,MatchState State,ReachabilitySet & ReachSet,std::vector<WorkListItem> & WorkList)593 static void propagate(InstantiatedValue From, InstantiatedValue To,
594                       MatchState State, ReachabilitySet &ReachSet,
595                       std::vector<WorkListItem> &WorkList) {
596   if (From == To)
597     return;
598   if (ReachSet.insert(From, To, State))
599     WorkList.push_back(WorkListItem{From, To, State});
600 }
601 
initializeWorkList(std::vector<WorkListItem> & WorkList,ReachabilitySet & ReachSet,const CFLGraph & Graph)602 static void initializeWorkList(std::vector<WorkListItem> &WorkList,
603                                ReachabilitySet &ReachSet,
604                                const CFLGraph &Graph) {
605   for (const auto &Mapping : Graph.value_mappings()) {
606     auto Val = Mapping.first;
607     auto &ValueInfo = Mapping.second;
608     assert(ValueInfo.getNumLevels() > 0);
609 
610     // Insert all immediate assignment neighbors to the worklist
611     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
612       auto Src = InstantiatedValue{Val, I};
613       // If there's an assignment edge from X to Y, it means Y is reachable from
614       // X at S2 and X is reachable from Y at S1
615       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
616         propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
617                   WorkList);
618         propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet,
619                   WorkList);
620       }
621     }
622   }
623 }
624 
getNodeBelow(const CFLGraph & Graph,InstantiatedValue V)625 static Optional<InstantiatedValue> getNodeBelow(const CFLGraph &Graph,
626                                                 InstantiatedValue V) {
627   auto NodeBelow = InstantiatedValue{V.Val, V.DerefLevel + 1};
628   if (Graph.getNode(NodeBelow))
629     return NodeBelow;
630   return None;
631 }
632 
processWorkListItem(const WorkListItem & Item,const CFLGraph & Graph,ReachabilitySet & ReachSet,AliasMemSet & MemSet,std::vector<WorkListItem> & WorkList)633 static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
634                                 ReachabilitySet &ReachSet, AliasMemSet &MemSet,
635                                 std::vector<WorkListItem> &WorkList) {
636   auto FromNode = Item.From;
637   auto ToNode = Item.To;
638 
639   auto NodeInfo = Graph.getNode(ToNode);
640   assert(NodeInfo != nullptr);
641 
642   // TODO: propagate field offsets
643 
644   // FIXME: Here is a neat trick we can do: since both ReachSet and MemSet holds
645   // relations that are symmetric, we could actually cut the storage by half by
646   // sorting FromNode and ToNode before insertion happens.
647 
648   // The newly added value alias pair may potentially generate more memory
649   // alias pairs. Check for them here.
650   auto FromNodeBelow = getNodeBelow(Graph, FromNode);
651   auto ToNodeBelow = getNodeBelow(Graph, ToNode);
652   if (FromNodeBelow && ToNodeBelow &&
653       MemSet.insert(*FromNodeBelow, *ToNodeBelow)) {
654     propagate(*FromNodeBelow, *ToNodeBelow,
655               MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList);
656     for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) {
657       auto Src = Mapping.first;
658       auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) {
659         if (Mapping.second.test(static_cast<size_t>(FromState)))
660           propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList);
661       };
662 
663       MemAliasPropagate(MatchState::FlowFromReadOnly,
664                         MatchState::FlowFromMemAliasReadOnly);
665       MemAliasPropagate(MatchState::FlowToWriteOnly,
666                         MatchState::FlowToMemAliasWriteOnly);
667       MemAliasPropagate(MatchState::FlowToReadWrite,
668                         MatchState::FlowToMemAliasReadWrite);
669     }
670   }
671 
672   // This is the core of the state machine walking algorithm. We expand ReachSet
673   // based on which state we are at (which in turn dictates what edges we
674   // should examine)
675   // From a high-level point of view, the state machine here guarantees two
676   // properties:
677   // - If *X and *Y are memory aliases, then X and Y are value aliases
678   // - If Y is an alias of X, then reverse assignment edges (if there is any)
679   // should precede any assignment edges on the path from X to Y.
680   auto NextAssignState = [&](MatchState State) {
681     for (const auto &AssignEdge : NodeInfo->Edges)
682       propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList);
683   };
684   auto NextRevAssignState = [&](MatchState State) {
685     for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
686       propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList);
687   };
688   auto NextMemState = [&](MatchState State) {
689     if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) {
690       for (const auto &MemAlias : *AliasSet)
691         propagate(FromNode, MemAlias, State, ReachSet, WorkList);
692     }
693   };
694 
695   switch (Item.State) {
696   case MatchState::FlowFromReadOnly:
697     NextRevAssignState(MatchState::FlowFromReadOnly);
698     NextAssignState(MatchState::FlowToReadWrite);
699     NextMemState(MatchState::FlowFromMemAliasReadOnly);
700     break;
701 
702   case MatchState::FlowFromMemAliasNoReadWrite:
703     NextRevAssignState(MatchState::FlowFromReadOnly);
704     NextAssignState(MatchState::FlowToWriteOnly);
705     break;
706 
707   case MatchState::FlowFromMemAliasReadOnly:
708     NextRevAssignState(MatchState::FlowFromReadOnly);
709     NextAssignState(MatchState::FlowToReadWrite);
710     break;
711 
712   case MatchState::FlowToWriteOnly:
713     NextAssignState(MatchState::FlowToWriteOnly);
714     NextMemState(MatchState::FlowToMemAliasWriteOnly);
715     break;
716 
717   case MatchState::FlowToReadWrite:
718     NextAssignState(MatchState::FlowToReadWrite);
719     NextMemState(MatchState::FlowToMemAliasReadWrite);
720     break;
721 
722   case MatchState::FlowToMemAliasWriteOnly:
723     NextAssignState(MatchState::FlowToWriteOnly);
724     break;
725 
726   case MatchState::FlowToMemAliasReadWrite:
727     NextAssignState(MatchState::FlowToReadWrite);
728     break;
729   }
730 }
731 
buildAttrMap(const CFLGraph & Graph,const ReachabilitySet & ReachSet)732 static AliasAttrMap buildAttrMap(const CFLGraph &Graph,
733                                  const ReachabilitySet &ReachSet) {
734   AliasAttrMap AttrMap;
735   std::vector<InstantiatedValue> WorkList, NextList;
736 
737   // Initialize each node with its original AliasAttrs in CFLGraph
738   for (const auto &Mapping : Graph.value_mappings()) {
739     auto Val = Mapping.first;
740     auto &ValueInfo = Mapping.second;
741     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
742       auto Node = InstantiatedValue{Val, I};
743       AttrMap.add(Node, ValueInfo.getNodeInfoAtLevel(I).Attr);
744       WorkList.push_back(Node);
745     }
746   }
747 
748   while (!WorkList.empty()) {
749     for (const auto &Dst : WorkList) {
750       auto DstAttr = AttrMap.getAttrs(Dst);
751       if (DstAttr.none())
752         continue;
753 
754       // Propagate attr on the same level
755       for (const auto &Mapping : ReachSet.reachableValueAliases(Dst)) {
756         auto Src = Mapping.first;
757         if (AttrMap.add(Src, DstAttr))
758           NextList.push_back(Src);
759       }
760 
761       // Propagate attr to the levels below
762       auto DstBelow = getNodeBelow(Graph, Dst);
763       while (DstBelow) {
764         if (AttrMap.add(*DstBelow, DstAttr)) {
765           NextList.push_back(*DstBelow);
766           break;
767         }
768         DstBelow = getNodeBelow(Graph, *DstBelow);
769       }
770     }
771     WorkList.swap(NextList);
772     NextList.clear();
773   }
774 
775   return AttrMap;
776 }
777 
778 CFLAndersAAResult::FunctionInfo
buildInfoFrom(const Function & Fn)779 CFLAndersAAResult::buildInfoFrom(const Function &Fn) {
780   CFLGraphBuilder<CFLAndersAAResult> GraphBuilder(
781       *this, TLI,
782       // Cast away the constness here due to GraphBuilder's API requirement
783       const_cast<Function &>(Fn));
784   auto &Graph = GraphBuilder.getCFLGraph();
785 
786   ReachabilitySet ReachSet;
787   AliasMemSet MemSet;
788 
789   std::vector<WorkListItem> WorkList, NextList;
790   initializeWorkList(WorkList, ReachSet, Graph);
791   // TODO: make sure we don't stop before the fix point is reached
792   while (!WorkList.empty()) {
793     for (const auto &Item : WorkList)
794       processWorkListItem(Item, Graph, ReachSet, MemSet, NextList);
795 
796     NextList.swap(WorkList);
797     NextList.clear();
798   }
799 
800   // Now that we have all the reachability info, propagate AliasAttrs according
801   // to it
802   auto IValueAttrMap = buildAttrMap(Graph, ReachSet);
803 
804   return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet,
805                       std::move(IValueAttrMap));
806 }
807 
scan(const Function & Fn)808 void CFLAndersAAResult::scan(const Function &Fn) {
809   auto InsertPair = Cache.insert(std::make_pair(&Fn, Optional<FunctionInfo>()));
810   (void)InsertPair;
811   assert(InsertPair.second &&
812          "Trying to scan a function that has already been cached");
813 
814   // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
815   // may get evaluated after operator[], potentially triggering a DenseMap
816   // resize and invalidating the reference returned by operator[]
817   auto FunInfo = buildInfoFrom(Fn);
818   Cache[&Fn] = std::move(FunInfo);
819   Handles.emplace_front(const_cast<Function *>(&Fn), this);
820 }
821 
evict(const Function * Fn)822 void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); }
823 
824 const Optional<CFLAndersAAResult::FunctionInfo> &
ensureCached(const Function & Fn)825 CFLAndersAAResult::ensureCached(const Function &Fn) {
826   auto Iter = Cache.find(&Fn);
827   if (Iter == Cache.end()) {
828     scan(Fn);
829     Iter = Cache.find(&Fn);
830     assert(Iter != Cache.end());
831     assert(Iter->second.hasValue());
832   }
833   return Iter->second;
834 }
835 
getAliasSummary(const Function & Fn)836 const AliasSummary *CFLAndersAAResult::getAliasSummary(const Function &Fn) {
837   auto &FunInfo = ensureCached(Fn);
838   if (FunInfo.hasValue())
839     return &FunInfo->getAliasSummary();
840   else
841     return nullptr;
842 }
843 
query(const MemoryLocation & LocA,const MemoryLocation & LocB)844 AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
845                                      const MemoryLocation &LocB) {
846   auto *ValA = LocA.Ptr;
847   auto *ValB = LocB.Ptr;
848 
849   if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
850     return NoAlias;
851 
852   auto *Fn = parentFunctionOfValue(ValA);
853   if (!Fn) {
854     Fn = parentFunctionOfValue(ValB);
855     if (!Fn) {
856       // The only times this is known to happen are when globals + InlineAsm are
857       // involved
858       LLVM_DEBUG(
859           dbgs()
860           << "CFLAndersAA: could not extract parent function information.\n");
861       return MayAlias;
862     }
863   } else {
864     assert(!parentFunctionOfValue(ValB) || parentFunctionOfValue(ValB) == Fn);
865   }
866 
867   assert(Fn != nullptr);
868   auto &FunInfo = ensureCached(*Fn);
869 
870   // AliasMap lookup
871   if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size))
872     return MayAlias;
873   return NoAlias;
874 }
875 
alias(const MemoryLocation & LocA,const MemoryLocation & LocB)876 AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
877                                      const MemoryLocation &LocB) {
878   if (LocA.Ptr == LocB.Ptr)
879     return MustAlias;
880 
881   // Comparisons between global variables and other constants should be
882   // handled by BasicAA.
883   // CFLAndersAA may report NoAlias when comparing a GlobalValue and
884   // ConstantExpr, but every query needs to have at least one Value tied to a
885   // Function, and neither GlobalValues nor ConstantExprs are.
886   if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr))
887     return AAResultBase::alias(LocA, LocB);
888 
889   AliasResult QueryResult = query(LocA, LocB);
890   if (QueryResult == MayAlias)
891     return AAResultBase::alias(LocA, LocB);
892 
893   return QueryResult;
894 }
895 
896 AnalysisKey CFLAndersAA::Key;
897 
run(Function & F,FunctionAnalysisManager & AM)898 CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) {
899   return CFLAndersAAResult(AM.getResult<TargetLibraryAnalysis>(F));
900 }
901 
902 char CFLAndersAAWrapperPass::ID = 0;
903 INITIALIZE_PASS(CFLAndersAAWrapperPass, "cfl-anders-aa",
904                 "Inclusion-Based CFL Alias Analysis", false, true)
905 
createCFLAndersAAWrapperPass()906 ImmutablePass *llvm::createCFLAndersAAWrapperPass() {
907   return new CFLAndersAAWrapperPass();
908 }
909 
CFLAndersAAWrapperPass()910 CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) {
911   initializeCFLAndersAAWrapperPassPass(*PassRegistry::getPassRegistry());
912 }
913 
initializePass()914 void CFLAndersAAWrapperPass::initializePass() {
915   auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
916   Result.reset(new CFLAndersAAResult(TLIWP.getTLI()));
917 }
918 
getAnalysisUsage(AnalysisUsage & AU) const919 void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
920   AU.setPreservesAll();
921   AU.addRequired<TargetLibraryInfoWrapperPass>();
922 }
923