1 //===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a CFL-base, summary-based alias analysis algorithm. It
11 // does not depend on types. The algorithm is a mixture of the one described in
12 // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast
13 // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by
14 // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a
15 // graph of the uses of a variable, where each node is a memory location, and
16 // each edge is an action that happened on that memory location.  The "actions"
17 // can be one of Dereference, Reference, or Assign. The precision of this
18 // analysis is roughly the same as that of an one level context-sensitive
19 // Steensgaard's algorithm.
20 //
21 // Two variables are considered as aliasing iff you can reach one value's node
22 // from the other value's node and the language formed by concatenating all of
23 // the edge labels (actions) conforms to a context-free grammar.
24 //
25 // Because this algorithm requires a graph search on each query, we execute the
26 // algorithm outlined in "Fast algorithms..." (mentioned above)
27 // in order to transform the graph into sets of variables that may alias in
28 // ~nlogn time (n = number of variables), which makes queries take constant
29 // time.
30 //===----------------------------------------------------------------------===//
31 
32 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
33 // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because
34 // FunctionPasses are only allowed to inspect the Function that they're being
35 // run on. Realistically, this likely isn't a problem until we allow
36 // FunctionPasses to run concurrently.
37 
38 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
39 #include "AliasAnalysisSummary.h"
40 #include "CFLGraph.h"
41 #include "StratifiedSets.h"
42 #include "llvm/ADT/DenseMap.h"
43 #include "llvm/ADT/Optional.h"
44 #include "llvm/ADT/SmallVector.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/Type.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/Pass.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include <algorithm>
54 #include <cassert>
55 #include <limits>
56 #include <memory>
57 #include <utility>
58 
59 using namespace llvm;
60 using namespace llvm::cflaa;
61 
62 #define DEBUG_TYPE "cfl-steens-aa"
63 
CFLSteensAAResult(const TargetLibraryInfo & TLI)64 CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI)
65     : AAResultBase(), TLI(TLI) {}
CFLSteensAAResult(CFLSteensAAResult && Arg)66 CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
67     : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {}
68 CFLSteensAAResult::~CFLSteensAAResult() = default;
69 
70 /// Information we have about a function and would like to keep around.
71 class CFLSteensAAResult::FunctionInfo {
72   StratifiedSets<InstantiatedValue> Sets;
73   AliasSummary Summary;
74 
75 public:
76   FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
77                StratifiedSets<InstantiatedValue> S);
78 
getStratifiedSets() const79   const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
80     return Sets;
81   }
82 
getAliasSummary() const83   const AliasSummary &getAliasSummary() const { return Summary; }
84 };
85 
86 const StratifiedIndex StratifiedLink::SetSentinel =
87     std::numeric_limits<StratifiedIndex>::max();
88 
89 //===----------------------------------------------------------------------===//
90 // Function declarations that require types defined in the namespace above
91 //===----------------------------------------------------------------------===//
92 
93 /// Determines whether it would be pointless to add the given Value to our sets.
canSkipAddingToSets(Value * Val)94 static bool canSkipAddingToSets(Value *Val) {
95   // Constants can share instances, which may falsely unify multiple
96   // sets, e.g. in
97   // store i32* null, i32** %ptr1
98   // store i32* null, i32** %ptr2
99   // clearly ptr1 and ptr2 should not be unified into the same set, so
100   // we should filter out the (potentially shared) instance to
101   // i32* null.
102   if (isa<Constant>(Val)) {
103     // TODO: Because all of these things are constant, we can determine whether
104     // the data is *actually* mutable at graph building time. This will probably
105     // come for free/cheap with offset awareness.
106     bool CanStoreMutableData = isa<GlobalValue>(Val) ||
107                                isa<ConstantExpr>(Val) ||
108                                isa<ConstantAggregate>(Val);
109     return !CanStoreMutableData;
110   }
111 
112   return false;
113 }
114 
FunctionInfo(Function & Fn,const SmallVectorImpl<Value * > & RetVals,StratifiedSets<InstantiatedValue> S)115 CFLSteensAAResult::FunctionInfo::FunctionInfo(
116     Function &Fn, const SmallVectorImpl<Value *> &RetVals,
117     StratifiedSets<InstantiatedValue> S)
118     : Sets(std::move(S)) {
119   // Historically, an arbitrary upper-bound of 50 args was selected. We may want
120   // to remove this if it doesn't really matter in practice.
121   if (Fn.arg_size() > MaxSupportedArgsInSummary)
122     return;
123 
124   DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;
125 
126   // Our intention here is to record all InterfaceValues that share the same
127   // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
128   // have its StratifiedIndex scanned here and check if the index is presented
129   // in InterfaceMap: if it is not, we add the correspondence to the map;
130   // otherwise, an aliasing relation is found and we add it to
131   // RetParamRelations.
132 
133   auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
134                                     StratifiedIndex SetIndex) {
135     unsigned Level = 0;
136     while (true) {
137       InterfaceValue CurrValue{InterfaceIndex, Level};
138 
139       auto Itr = InterfaceMap.find(SetIndex);
140       if (Itr != InterfaceMap.end()) {
141         if (CurrValue != Itr->second)
142           Summary.RetParamRelations.push_back(
143               ExternalRelation{CurrValue, Itr->second, UnknownOffset});
144         break;
145       }
146 
147       auto &Link = Sets.getLink(SetIndex);
148       InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
149       auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs);
150       if (ExternalAttrs.any())
151         Summary.RetParamAttributes.push_back(
152             ExternalAttribute{CurrValue, ExternalAttrs});
153 
154       if (!Link.hasBelow())
155         break;
156 
157       ++Level;
158       SetIndex = Link.Below;
159     }
160   };
161 
162   // Populate RetParamRelations for return values
163   for (auto *RetVal : RetVals) {
164     assert(RetVal != nullptr);
165     assert(RetVal->getType()->isPointerTy());
166     auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
167     if (RetInfo.hasValue())
168       AddToRetParamRelations(0, RetInfo->Index);
169   }
170 
171   // Populate RetParamRelations for parameters
172   unsigned I = 0;
173   for (auto &Param : Fn.args()) {
174     if (Param.getType()->isPointerTy()) {
175       auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
176       if (ParamInfo.hasValue())
177         AddToRetParamRelations(I + 1, ParamInfo->Index);
178     }
179     ++I;
180   }
181 }
182 
183 // Builds the graph + StratifiedSets for a function.
buildSetsFrom(Function * Fn)184 CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
185   CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, TLI, *Fn);
186   StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
187 
188   // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
189   auto &Graph = GraphBuilder.getCFLGraph();
190   for (const auto &Mapping : Graph.value_mappings()) {
191     auto Val = Mapping.first;
192     if (canSkipAddingToSets(Val))
193       continue;
194     auto &ValueInfo = Mapping.second;
195 
196     assert(ValueInfo.getNumLevels() > 0);
197     SetBuilder.add(InstantiatedValue{Val, 0});
198     SetBuilder.noteAttributes(InstantiatedValue{Val, 0},
199                               ValueInfo.getNodeInfoAtLevel(0).Attr);
200     for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) {
201       SetBuilder.add(InstantiatedValue{Val, I + 1});
202       SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1},
203                                 ValueInfo.getNodeInfoAtLevel(I + 1).Attr);
204       SetBuilder.addBelow(InstantiatedValue{Val, I},
205                           InstantiatedValue{Val, I + 1});
206     }
207   }
208 
209   // Add all assign edges to StratifiedSets
210   for (const auto &Mapping : Graph.value_mappings()) {
211     auto Val = Mapping.first;
212     if (canSkipAddingToSets(Val))
213       continue;
214     auto &ValueInfo = Mapping.second;
215 
216     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
217       auto Src = InstantiatedValue{Val, I};
218       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
219         SetBuilder.addWith(Src, Edge.Other);
220     }
221   }
222 
223   return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
224 }
225 
scan(Function * Fn)226 void CFLSteensAAResult::scan(Function *Fn) {
227   auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
228   (void)InsertPair;
229   assert(InsertPair.second &&
230          "Trying to scan a function that has already been cached");
231 
232   // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
233   // may get evaluated after operator[], potentially triggering a DenseMap
234   // resize and invalidating the reference returned by operator[]
235   auto FunInfo = buildSetsFrom(Fn);
236   Cache[Fn] = std::move(FunInfo);
237 
238   Handles.emplace_front(Fn, this);
239 }
240 
evict(Function * Fn)241 void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
242 
243 /// Ensures that the given function is available in the cache, and returns the
244 /// entry.
245 const Optional<CFLSteensAAResult::FunctionInfo> &
ensureCached(Function * Fn)246 CFLSteensAAResult::ensureCached(Function *Fn) {
247   auto Iter = Cache.find(Fn);
248   if (Iter == Cache.end()) {
249     scan(Fn);
250     Iter = Cache.find(Fn);
251     assert(Iter != Cache.end());
252     assert(Iter->second.hasValue());
253   }
254   return Iter->second;
255 }
256 
getAliasSummary(Function & Fn)257 const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
258   auto &FunInfo = ensureCached(&Fn);
259   if (FunInfo.hasValue())
260     return &FunInfo->getAliasSummary();
261   else
262     return nullptr;
263 }
264 
query(const MemoryLocation & LocA,const MemoryLocation & LocB)265 AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
266                                      const MemoryLocation &LocB) {
267   auto *ValA = const_cast<Value *>(LocA.Ptr);
268   auto *ValB = const_cast<Value *>(LocB.Ptr);
269 
270   if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
271     return NoAlias;
272 
273   Function *Fn = nullptr;
274   Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
275   Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
276   if (!MaybeFnA && !MaybeFnB) {
277     // The only times this is known to happen are when globals + InlineAsm are
278     // involved
279     LLVM_DEBUG(
280         dbgs()
281         << "CFLSteensAA: could not extract parent function information.\n");
282     return MayAlias;
283   }
284 
285   if (MaybeFnA) {
286     Fn = MaybeFnA;
287     assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
288            "Interprocedural queries not supported");
289   } else {
290     Fn = MaybeFnB;
291   }
292 
293   assert(Fn != nullptr);
294   auto &MaybeInfo = ensureCached(Fn);
295   assert(MaybeInfo.hasValue());
296 
297   auto &Sets = MaybeInfo->getStratifiedSets();
298   auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
299   if (!MaybeA.hasValue())
300     return MayAlias;
301 
302   auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
303   if (!MaybeB.hasValue())
304     return MayAlias;
305 
306   auto SetA = *MaybeA;
307   auto SetB = *MaybeB;
308   auto AttrsA = Sets.getLink(SetA.Index).Attrs;
309   auto AttrsB = Sets.getLink(SetB.Index).Attrs;
310 
311   // If both values are local (meaning the corresponding set has attribute
312   // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them:
313   // they may-alias each other if and only if they are in the same set.
314   // If at least one value is non-local (meaning it either is global/argument or
315   // it comes from unknown sources like integer cast), the situation becomes a
316   // bit more interesting. We follow three general rules described below:
317   // - Non-local values may alias each other
318   // - AttrNone values do not alias any non-local values
319   // - AttrEscaped do not alias globals/arguments, but they may alias
320   // AttrUnknown values
321   if (SetA.Index == SetB.Index)
322     return MayAlias;
323   if (AttrsA.none() || AttrsB.none())
324     return NoAlias;
325   if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
326     return MayAlias;
327   if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
328     return MayAlias;
329   return NoAlias;
330 }
331 
332 AnalysisKey CFLSteensAA::Key;
333 
run(Function & F,FunctionAnalysisManager & AM)334 CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
335   return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F));
336 }
337 
338 char CFLSteensAAWrapperPass::ID = 0;
339 INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa",
340                 "Unification-Based CFL Alias Analysis", false, true)
341 
createCFLSteensAAWrapperPass()342 ImmutablePass *llvm::createCFLSteensAAWrapperPass() {
343   return new CFLSteensAAWrapperPass();
344 }
345 
CFLSteensAAWrapperPass()346 CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
347   initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry());
348 }
349 
initializePass()350 void CFLSteensAAWrapperPass::initializePass() {
351   auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
352   Result.reset(new CFLSteensAAResult(TLIWP.getTLI()));
353 }
354 
getAnalysisUsage(AnalysisUsage & AU) const355 void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
356   AU.setPreservesAll();
357   AU.addRequired<TargetLibraryInfoWrapperPass>();
358 }
359