1 //- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ---*- C++-*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a CFL-base, summary-based alias analysis algorithm. It
11 // does not depend on types. The algorithm is a mixture of the one described in
12 // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast
13 // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by
14 // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a
15 // graph of the uses of a variable, where each node is a memory location, and
16 // each edge is an action that happened on that memory location.  The "actions"
17 // can be one of Dereference, Reference, or Assign. The precision of this
18 // analysis is roughly the same as that of an one level context-sensitive
19 // Steensgaard's algorithm.
20 //
21 // Two variables are considered as aliasing iff you can reach one value's node
22 // from the other value's node and the language formed by concatenating all of
23 // the edge labels (actions) conforms to a context-free grammar.
24 //
25 // Because this algorithm requires a graph search on each query, we execute the
26 // algorithm outlined in "Fast algorithms..." (mentioned above)
27 // in order to transform the graph into sets of variables that may alias in
28 // ~nlogn time (n = number of variables), which makes queries take constant
29 // time.
30 //===----------------------------------------------------------------------===//
31 
32 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
33 // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because
34 // FunctionPasses are only allowed to inspect the Function that they're being
35 // run on. Realistically, this likely isn't a problem until we allow
36 // FunctionPasses to run concurrently.
37 
38 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
39 #include "CFLGraph.h"
40 #include "StratifiedSets.h"
41 #include "llvm/ADT/DenseMap.h"
42 #include "llvm/ADT/None.h"
43 #include "llvm/ADT/Optional.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Compiler.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <memory>
55 #include <tuple>
56 
57 using namespace llvm;
58 using namespace llvm::cflaa;
59 
60 #define DEBUG_TYPE "cfl-steens-aa"
61 
CFLSteensAAResult(const TargetLibraryInfo & TLI)62 CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI)
63     : AAResultBase(), TLI(TLI) {}
CFLSteensAAResult(CFLSteensAAResult && Arg)64 CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
65     : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {}
~CFLSteensAAResult()66 CFLSteensAAResult::~CFLSteensAAResult() {}
67 
68 /// Information we have about a function and would like to keep around.
69 class CFLSteensAAResult::FunctionInfo {
70   StratifiedSets<InstantiatedValue> Sets;
71   AliasSummary Summary;
72 
73 public:
74   FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
75                StratifiedSets<InstantiatedValue> S);
76 
getStratifiedSets() const77   const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
78     return Sets;
79   }
getAliasSummary() const80   const AliasSummary &getAliasSummary() const { return Summary; }
81 };
82 
83 /// Try to go from a Value* to a Function*. Never returns nullptr.
84 static Optional<Function *> parentFunctionOfValue(Value *);
85 
86 const StratifiedIndex StratifiedLink::SetSentinel =
87     std::numeric_limits<StratifiedIndex>::max();
88 
89 //===----------------------------------------------------------------------===//
90 // Function declarations that require types defined in the namespace above
91 //===----------------------------------------------------------------------===//
92 
93 /// Determines whether it would be pointless to add the given Value to our sets.
94 static bool canSkipAddingToSets(Value *Val);
95 
parentFunctionOfValue(Value * Val)96 static Optional<Function *> parentFunctionOfValue(Value *Val) {
97   if (auto *Inst = dyn_cast<Instruction>(Val)) {
98     auto *Bb = Inst->getParent();
99     return Bb->getParent();
100   }
101 
102   if (auto *Arg = dyn_cast<Argument>(Val))
103     return Arg->getParent();
104   return None;
105 }
106 
canSkipAddingToSets(Value * Val)107 static bool canSkipAddingToSets(Value *Val) {
108   // Constants can share instances, which may falsely unify multiple
109   // sets, e.g. in
110   // store i32* null, i32** %ptr1
111   // store i32* null, i32** %ptr2
112   // clearly ptr1 and ptr2 should not be unified into the same set, so
113   // we should filter out the (potentially shared) instance to
114   // i32* null.
115   if (isa<Constant>(Val)) {
116     // TODO: Because all of these things are constant, we can determine whether
117     // the data is *actually* mutable at graph building time. This will probably
118     // come for free/cheap with offset awareness.
119     bool CanStoreMutableData = isa<GlobalValue>(Val) ||
120                                isa<ConstantExpr>(Val) ||
121                                isa<ConstantAggregate>(Val);
122     return !CanStoreMutableData;
123   }
124 
125   return false;
126 }
127 
FunctionInfo(Function & Fn,const SmallVectorImpl<Value * > & RetVals,StratifiedSets<InstantiatedValue> S)128 CFLSteensAAResult::FunctionInfo::FunctionInfo(
129     Function &Fn, const SmallVectorImpl<Value *> &RetVals,
130     StratifiedSets<InstantiatedValue> S)
131     : Sets(std::move(S)) {
132   // Historically, an arbitrary upper-bound of 50 args was selected. We may want
133   // to remove this if it doesn't really matter in practice.
134   if (Fn.arg_size() > MaxSupportedArgsInSummary)
135     return;
136 
137   DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;
138 
139   // Our intention here is to record all InterfaceValues that share the same
140   // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
141   // have its StratifiedIndex scanned here and check if the index is presented
142   // in InterfaceMap: if it is not, we add the correspondence to the map;
143   // otherwise, an aliasing relation is found and we add it to
144   // RetParamRelations.
145 
146   auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
147                                     StratifiedIndex SetIndex) {
148     unsigned Level = 0;
149     while (true) {
150       InterfaceValue CurrValue{InterfaceIndex, Level};
151 
152       auto Itr = InterfaceMap.find(SetIndex);
153       if (Itr != InterfaceMap.end()) {
154         if (CurrValue != Itr->second)
155           Summary.RetParamRelations.push_back(
156               ExternalRelation{CurrValue, Itr->second});
157         break;
158       }
159 
160       auto &Link = Sets.getLink(SetIndex);
161       InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
162       auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs);
163       if (ExternalAttrs.any())
164         Summary.RetParamAttributes.push_back(
165             ExternalAttribute{CurrValue, ExternalAttrs});
166 
167       if (!Link.hasBelow())
168         break;
169 
170       ++Level;
171       SetIndex = Link.Below;
172     }
173   };
174 
175   // Populate RetParamRelations for return values
176   for (auto *RetVal : RetVals) {
177     assert(RetVal != nullptr);
178     assert(RetVal->getType()->isPointerTy());
179     auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
180     if (RetInfo.hasValue())
181       AddToRetParamRelations(0, RetInfo->Index);
182   }
183 
184   // Populate RetParamRelations for parameters
185   unsigned I = 0;
186   for (auto &Param : Fn.args()) {
187     if (Param.getType()->isPointerTy()) {
188       auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
189       if (ParamInfo.hasValue())
190         AddToRetParamRelations(I + 1, ParamInfo->Index);
191     }
192     ++I;
193   }
194 }
195 
196 // Builds the graph + StratifiedSets for a function.
buildSetsFrom(Function * Fn)197 CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
198   CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, TLI, *Fn);
199   StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
200 
201   // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
202   auto &Graph = GraphBuilder.getCFLGraph();
203   for (const auto &Mapping : Graph.value_mappings()) {
204     auto Val = Mapping.first;
205     if (canSkipAddingToSets(Val))
206       continue;
207     auto &ValueInfo = Mapping.second;
208 
209     assert(ValueInfo.getNumLevels() > 0);
210     SetBuilder.add(InstantiatedValue{Val, 0});
211     SetBuilder.noteAttributes(InstantiatedValue{Val, 0},
212                               ValueInfo.getNodeInfoAtLevel(0).Attr);
213     for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) {
214       SetBuilder.add(InstantiatedValue{Val, I + 1});
215       SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1},
216                                 ValueInfo.getNodeInfoAtLevel(I + 1).Attr);
217       SetBuilder.addBelow(InstantiatedValue{Val, I},
218                           InstantiatedValue{Val, I + 1});
219     }
220   }
221 
222   // Add all assign edges to StratifiedSets
223   for (const auto &Mapping : Graph.value_mappings()) {
224     auto Val = Mapping.first;
225     if (canSkipAddingToSets(Val))
226       continue;
227     auto &ValueInfo = Mapping.second;
228 
229     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
230       auto Src = InstantiatedValue{Val, I};
231       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
232         SetBuilder.addWith(Src, Edge.Other);
233     }
234   }
235 
236   return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
237 }
238 
scan(Function * Fn)239 void CFLSteensAAResult::scan(Function *Fn) {
240   auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
241   (void)InsertPair;
242   assert(InsertPair.second &&
243          "Trying to scan a function that has already been cached");
244 
245   // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
246   // may get evaluated after operator[], potentially triggering a DenseMap
247   // resize and invalidating the reference returned by operator[]
248   auto FunInfo = buildSetsFrom(Fn);
249   Cache[Fn] = std::move(FunInfo);
250 
251   Handles.push_front(FunctionHandle(Fn, this));
252 }
253 
evict(Function * Fn)254 void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
255 
256 /// Ensures that the given function is available in the cache, and returns the
257 /// entry.
258 const Optional<CFLSteensAAResult::FunctionInfo> &
ensureCached(Function * Fn)259 CFLSteensAAResult::ensureCached(Function *Fn) {
260   auto Iter = Cache.find(Fn);
261   if (Iter == Cache.end()) {
262     scan(Fn);
263     Iter = Cache.find(Fn);
264     assert(Iter != Cache.end());
265     assert(Iter->second.hasValue());
266   }
267   return Iter->second;
268 }
269 
getAliasSummary(Function & Fn)270 const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
271   auto &FunInfo = ensureCached(&Fn);
272   if (FunInfo.hasValue())
273     return &FunInfo->getAliasSummary();
274   else
275     return nullptr;
276 }
277 
query(const MemoryLocation & LocA,const MemoryLocation & LocB)278 AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
279                                      const MemoryLocation &LocB) {
280   auto *ValA = const_cast<Value *>(LocA.Ptr);
281   auto *ValB = const_cast<Value *>(LocB.Ptr);
282 
283   if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
284     return NoAlias;
285 
286   Function *Fn = nullptr;
287   auto MaybeFnA = parentFunctionOfValue(ValA);
288   auto MaybeFnB = parentFunctionOfValue(ValB);
289   if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
290     // The only times this is known to happen are when globals + InlineAsm are
291     // involved
292     DEBUG(dbgs()
293           << "CFLSteensAA: could not extract parent function information.\n");
294     return MayAlias;
295   }
296 
297   if (MaybeFnA.hasValue()) {
298     Fn = *MaybeFnA;
299     assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) &&
300            "Interprocedural queries not supported");
301   } else {
302     Fn = *MaybeFnB;
303   }
304 
305   assert(Fn != nullptr);
306   auto &MaybeInfo = ensureCached(Fn);
307   assert(MaybeInfo.hasValue());
308 
309   auto &Sets = MaybeInfo->getStratifiedSets();
310   auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
311   if (!MaybeA.hasValue())
312     return MayAlias;
313 
314   auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
315   if (!MaybeB.hasValue())
316     return MayAlias;
317 
318   auto SetA = *MaybeA;
319   auto SetB = *MaybeB;
320   auto AttrsA = Sets.getLink(SetA.Index).Attrs;
321   auto AttrsB = Sets.getLink(SetB.Index).Attrs;
322 
323   // If both values are local (meaning the corresponding set has attribute
324   // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them:
325   // they may-alias each other if and only if they are in the same set.
326   // If at least one value is non-local (meaning it either is global/argument or
327   // it comes from unknown sources like integer cast), the situation becomes a
328   // bit more interesting. We follow three general rules described below:
329   // - Non-local values may alias each other
330   // - AttrNone values do not alias any non-local values
331   // - AttrEscaped do not alias globals/arguments, but they may alias
332   // AttrUnknown values
333   if (SetA.Index == SetB.Index)
334     return MayAlias;
335   if (AttrsA.none() || AttrsB.none())
336     return NoAlias;
337   if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
338     return MayAlias;
339   if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
340     return MayAlias;
341   return NoAlias;
342 }
343 
getArgModRefInfo(ImmutableCallSite CS,unsigned ArgIdx)344 ModRefInfo CFLSteensAAResult::getArgModRefInfo(ImmutableCallSite CS,
345                                                unsigned ArgIdx) {
346   if (auto CalledFunc = CS.getCalledFunction()) {
347     auto &MaybeInfo = ensureCached(const_cast<Function *>(CalledFunc));
348     if (!MaybeInfo.hasValue())
349       return MRI_ModRef;
350     auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes;
351     auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations;
352 
353     bool ArgAttributeIsWritten =
354         std::any_of(RetParamAttributes.begin(), RetParamAttributes.end(),
355                     [ArgIdx](const ExternalAttribute &ExtAttr) {
356                       return ExtAttr.IValue.Index == ArgIdx + 1;
357                     });
358     bool ArgIsAccessed =
359         std::any_of(RetParamRelations.begin(), RetParamRelations.end(),
360                     [ArgIdx](const ExternalRelation &ExtRelation) {
361                       return ExtRelation.To.Index == ArgIdx + 1 ||
362                              ExtRelation.From.Index == ArgIdx + 1;
363                     });
364 
365     return (!ArgIsAccessed && !ArgAttributeIsWritten) ? MRI_NoModRef
366                                                       : MRI_ModRef;
367   }
368 
369   return MRI_ModRef;
370 }
371 
372 FunctionModRefBehavior
getModRefBehavior(ImmutableCallSite CS)373 CFLSteensAAResult::getModRefBehavior(ImmutableCallSite CS) {
374   // If we know the callee, try analyzing it
375   if (auto CalledFunc = CS.getCalledFunction())
376     return getModRefBehavior(CalledFunc);
377 
378   // Otherwise, be conservative
379   return FMRB_UnknownModRefBehavior;
380 }
381 
getModRefBehavior(const Function * F)382 FunctionModRefBehavior CFLSteensAAResult::getModRefBehavior(const Function *F) {
383   assert(F != nullptr);
384 
385   // TODO: Remove the const_cast
386   auto &MaybeInfo = ensureCached(const_cast<Function *>(F));
387   if (!MaybeInfo.hasValue())
388     return FMRB_UnknownModRefBehavior;
389   auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes;
390   auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations;
391 
392   // First, if any argument is marked Escpaed, Unknown or Global, anything may
393   // happen to them and thus we can't draw any conclusion.
394   if (!RetParamAttributes.empty())
395     return FMRB_UnknownModRefBehavior;
396 
397   // Currently we don't (and can't) distinguish reads from writes in
398   // RetParamRelations. All we can say is whether there may be memory access or
399   // not.
400   if (RetParamRelations.empty())
401     return FMRB_DoesNotAccessMemory;
402 
403   // Check if something beyond argmem gets touched.
404   bool AccessArgMemoryOnly =
405       std::all_of(RetParamRelations.begin(), RetParamRelations.end(),
406                   [](const ExternalRelation &ExtRelation) {
407                     // Both DerefLevels has to be 0, since we don't know which
408                     // one is a read and which is a write.
409                     return ExtRelation.From.DerefLevel == 0 &&
410                            ExtRelation.To.DerefLevel == 0;
411                   });
412   return AccessArgMemoryOnly ? FMRB_OnlyAccessesArgumentPointees
413                              : FMRB_UnknownModRefBehavior;
414 }
415 
416 char CFLSteensAA::PassID;
417 
run(Function & F,AnalysisManager<Function> & AM)418 CFLSteensAAResult CFLSteensAA::run(Function &F, AnalysisManager<Function> &AM) {
419   return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F));
420 }
421 
422 char CFLSteensAAWrapperPass::ID = 0;
423 INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa",
424                 "Unification-Based CFL Alias Analysis", false, true)
425 
createCFLSteensAAWrapperPass()426 ImmutablePass *llvm::createCFLSteensAAWrapperPass() {
427   return new CFLSteensAAWrapperPass();
428 }
429 
CFLSteensAAWrapperPass()430 CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
431   initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry());
432 }
433 
initializePass()434 void CFLSteensAAWrapperPass::initializePass() {
435   auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
436   Result.reset(new CFLSteensAAResult(TLIWP.getTLI()));
437 }
438 
getAnalysisUsage(AnalysisUsage & AU) const439 void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
440   AU.setPreservesAll();
441   AU.addRequired<TargetLibraryInfoWrapperPass>();
442 }
443